2024-01-04 03:38:15 +01:00
|
|
|
package index
|
|
|
|
|
|
|
|
import (
|
|
|
|
"errors"
|
|
|
|
"github.com/blevesearch/bleve/v2"
|
|
|
|
"github.com/blevesearch/bleve/v2/analysis/analyzer/custom"
|
|
|
|
"github.com/blevesearch/bleve/v2/analysis/token/camelcase"
|
|
|
|
"github.com/blevesearch/bleve/v2/analysis/token/lowercase"
|
|
|
|
"github.com/blevesearch/bleve/v2/analysis/token/unicodenorm"
|
|
|
|
"github.com/blevesearch/bleve/v2/analysis/tokenizer/unicode"
|
|
|
|
"github.com/blevesearch/bleve/v2/search/query"
|
2024-09-09 11:44:22 +02:00
|
|
|
"github.com/rs/zerolog/log"
|
2024-01-04 03:38:15 +01:00
|
|
|
"github.com/thomiceli/opengist/internal/config"
|
|
|
|
"strconv"
|
2024-09-09 11:44:22 +02:00
|
|
|
"sync/atomic"
|
2024-01-04 03:38:15 +01:00
|
|
|
)
|
|
|
|
|
2024-09-09 11:44:22 +02:00
|
|
|
var atomicIndexer atomic.Pointer[Indexer]
|
|
|
|
|
|
|
|
type Indexer struct {
|
|
|
|
Index bleve.Index
|
|
|
|
}
|
2024-01-04 03:38:15 +01:00
|
|
|
|
|
|
|
func Enabled() bool {
|
|
|
|
return config.C.IndexEnabled
|
|
|
|
}
|
|
|
|
|
2024-09-09 11:44:22 +02:00
|
|
|
func Init(indexFilename string) {
|
|
|
|
atomicIndexer.Store(&Indexer{Index: nil})
|
|
|
|
|
|
|
|
go func() {
|
|
|
|
bleveIndex, err := open(indexFilename)
|
|
|
|
if err != nil {
|
|
|
|
log.Error().Err(err).Msg("Failed to open index")
|
|
|
|
(*atomicIndexer.Load()).close()
|
|
|
|
}
|
|
|
|
atomicIndexer.Store(&Indexer{Index: bleveIndex})
|
|
|
|
log.Info().Msg("Indexer initialized")
|
|
|
|
}()
|
|
|
|
}
|
|
|
|
|
|
|
|
func open(indexFilename string) (bleve.Index, error) {
|
|
|
|
bleveIndex, err := bleve.Open(indexFilename)
|
2024-01-04 03:38:15 +01:00
|
|
|
if err == nil {
|
2024-09-09 11:44:22 +02:00
|
|
|
return bleveIndex, nil
|
2024-01-04 03:38:15 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
if !errors.Is(err, bleve.ErrorIndexPathDoesNotExist) {
|
2024-09-09 11:44:22 +02:00
|
|
|
return nil, err
|
2024-01-04 03:38:15 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
docMapping := bleve.NewDocumentMapping()
|
|
|
|
docMapping.AddFieldMappingsAt("GistID", bleve.NewNumericFieldMapping())
|
|
|
|
docMapping.AddFieldMappingsAt("Content", bleve.NewTextFieldMapping())
|
|
|
|
|
|
|
|
mapping := bleve.NewIndexMapping()
|
|
|
|
|
|
|
|
if err = mapping.AddCustomTokenFilter("unicodeNormalize", map[string]any{
|
|
|
|
"type": unicodenorm.Name,
|
|
|
|
"form": unicodenorm.NFC,
|
|
|
|
}); err != nil {
|
2024-09-09 11:44:22 +02:00
|
|
|
return nil, err
|
2024-01-04 03:38:15 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
if err = mapping.AddCustomAnalyzer("gistAnalyser", map[string]interface{}{
|
|
|
|
"type": custom.Name,
|
|
|
|
"char_filters": []string{},
|
|
|
|
"tokenizer": unicode.Name,
|
|
|
|
"token_filters": []string{"unicodeNormalize", camelcase.Name, lowercase.Name},
|
|
|
|
}); err != nil {
|
2024-09-09 11:44:22 +02:00
|
|
|
return nil, err
|
2024-01-04 03:38:15 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
docMapping.DefaultAnalyzer = "gistAnalyser"
|
|
|
|
|
2024-09-09 11:44:22 +02:00
|
|
|
return bleve.New(indexFilename, mapping)
|
|
|
|
}
|
|
|
|
|
|
|
|
func Close() {
|
|
|
|
(*atomicIndexer.Load()).close()
|
|
|
|
}
|
|
|
|
|
|
|
|
func (i *Indexer) close() {
|
|
|
|
if i == nil || i.Index == nil {
|
|
|
|
return
|
|
|
|
}
|
2024-01-04 03:38:15 +01:00
|
|
|
|
2024-09-09 11:44:22 +02:00
|
|
|
err := i.Index.Close()
|
|
|
|
if err != nil {
|
|
|
|
log.Error().Err(err).Msg("Failed to close bleve index")
|
|
|
|
}
|
|
|
|
log.Info().Msg("Indexer closed")
|
|
|
|
atomicIndexer.Store(&Indexer{Index: nil})
|
2024-01-04 03:38:15 +01:00
|
|
|
}
|
|
|
|
|
2024-09-09 11:44:22 +02:00
|
|
|
func checkForIndexer() error {
|
|
|
|
if (*atomicIndexer.Load()).Index == nil {
|
|
|
|
return errors.New("indexer is not initialized")
|
|
|
|
}
|
|
|
|
|
|
|
|
return nil
|
2024-01-04 03:38:15 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
func AddInIndex(gist *Gist) error {
|
|
|
|
if !Enabled() {
|
|
|
|
return nil
|
|
|
|
}
|
2024-09-09 11:44:22 +02:00
|
|
|
if err := checkForIndexer(); err != nil {
|
|
|
|
return err
|
|
|
|
}
|
2024-01-04 03:38:15 +01:00
|
|
|
|
|
|
|
if gist == nil {
|
|
|
|
return errors.New("failed to add nil gist to index")
|
|
|
|
}
|
2024-09-09 11:44:22 +02:00
|
|
|
return (*atomicIndexer.Load()).Index.Index(strconv.Itoa(int(gist.GistID)), gist)
|
2024-01-04 03:38:15 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
func RemoveFromIndex(gistID uint) error {
|
|
|
|
if !Enabled() {
|
|
|
|
return nil
|
|
|
|
}
|
2024-09-09 11:44:22 +02:00
|
|
|
if err := checkForIndexer(); err != nil {
|
|
|
|
return err
|
|
|
|
}
|
2024-01-04 03:38:15 +01:00
|
|
|
|
2024-09-09 11:44:22 +02:00
|
|
|
return (*atomicIndexer.Load()).Index.Delete(strconv.Itoa(int(gistID)))
|
2024-01-04 03:38:15 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
func SearchGists(queryStr string, queryMetadata SearchGistMetadata, gistsIds []uint, page int) ([]uint, uint64, map[string]int, error) {
|
|
|
|
if !Enabled() {
|
|
|
|
return nil, 0, nil, nil
|
|
|
|
}
|
2024-09-09 11:44:22 +02:00
|
|
|
if err := checkForIndexer(); err != nil {
|
|
|
|
return nil, 0, nil, err
|
|
|
|
}
|
2024-01-04 03:38:15 +01:00
|
|
|
|
|
|
|
var err error
|
|
|
|
var indexerQuery query.Query
|
|
|
|
if queryStr != "" {
|
|
|
|
contentQuery := bleve.NewMatchPhraseQuery(queryStr)
|
|
|
|
contentQuery.FieldVal = "Content"
|
|
|
|
indexerQuery = contentQuery
|
|
|
|
} else {
|
|
|
|
contentQuery := bleve.NewMatchAllQuery()
|
|
|
|
indexerQuery = contentQuery
|
|
|
|
}
|
|
|
|
|
2024-09-07 14:36:16 +02:00
|
|
|
repoQueries := make([]query.Query, 0, len(gistsIds))
|
|
|
|
|
|
|
|
truee := true
|
|
|
|
for _, id := range gistsIds {
|
|
|
|
f := float64(id)
|
|
|
|
qq := bleve.NewNumericRangeInclusiveQuery(&f, &f, &truee, &truee)
|
|
|
|
qq.SetField("GistID")
|
|
|
|
repoQueries = append(repoQueries, qq)
|
2024-01-04 03:38:15 +01:00
|
|
|
}
|
|
|
|
|
2024-09-07 14:36:16 +02:00
|
|
|
indexerQuery = bleve.NewConjunctionQuery(bleve.NewDisjunctionQuery(repoQueries...), indexerQuery)
|
|
|
|
|
2024-01-04 03:38:15 +01:00
|
|
|
addQuery := func(field, value string) {
|
|
|
|
if value != "" && value != "." {
|
|
|
|
q := bleve.NewMatchPhraseQuery(value)
|
|
|
|
q.FieldVal = field
|
|
|
|
indexerQuery = bleve.NewConjunctionQuery(indexerQuery, q)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
addQuery("Username", queryMetadata.Username)
|
|
|
|
addQuery("Title", queryMetadata.Title)
|
|
|
|
addQuery("Extensions", "."+queryMetadata.Extension)
|
|
|
|
addQuery("Filenames", queryMetadata.Filename)
|
|
|
|
addQuery("Languages", queryMetadata.Language)
|
|
|
|
|
|
|
|
languageFacet := bleve.NewFacetRequest("Languages", 10)
|
|
|
|
|
|
|
|
perPage := 10
|
|
|
|
offset := (page - 1) * perPage
|
|
|
|
|
|
|
|
s := bleve.NewSearchRequestOptions(indexerQuery, perPage, offset, false)
|
|
|
|
s.AddFacet("languageFacet", languageFacet)
|
|
|
|
s.Fields = []string{"GistID"}
|
|
|
|
s.IncludeLocations = false
|
|
|
|
|
2024-09-09 11:44:22 +02:00
|
|
|
results, err := (*atomicIndexer.Load()).Index.Search(s)
|
2024-01-04 03:38:15 +01:00
|
|
|
if err != nil {
|
|
|
|
return nil, 0, nil, err
|
|
|
|
}
|
|
|
|
|
|
|
|
gistIds := make([]uint, 0, len(results.Hits))
|
|
|
|
for _, hit := range results.Hits {
|
|
|
|
gistIds = append(gistIds, uint(hit.Fields["GistID"].(float64)))
|
|
|
|
}
|
|
|
|
|
|
|
|
languageCounts := make(map[string]int)
|
|
|
|
if facets, found := results.Facets["languageFacet"]; found {
|
|
|
|
for _, term := range facets.Terms.Terms() {
|
|
|
|
languageCounts[term.Term] = term.Count
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return gistIds, results.Total, languageCounts, nil
|
|
|
|
}
|