0
Fork 0
mirror of https://github.com/thomiceli/opengist.git synced 2025-03-12 02:21:45 -05:00
opengist/internal/index/bleve.go

164 lines
4.4 KiB
Go
Raw Permalink Normal View History

2024-01-04 03:38:15 +01:00
package index
import (
"errors"
"github.com/blevesearch/bleve/v2"
"github.com/blevesearch/bleve/v2/analysis/analyzer/custom"
"github.com/blevesearch/bleve/v2/analysis/token/camelcase"
"github.com/blevesearch/bleve/v2/analysis/token/lowercase"
"github.com/blevesearch/bleve/v2/analysis/token/unicodenorm"
"github.com/blevesearch/bleve/v2/analysis/tokenizer/unicode"
"github.com/blevesearch/bleve/v2/search/query"
2024-09-09 11:44:22 +02:00
"github.com/rs/zerolog/log"
2024-01-04 03:38:15 +01:00
"strconv"
)
2025-02-23 21:03:51 +01:00
type BleveIndexer struct {
index bleve.Index
path string
2024-09-09 11:44:22 +02:00
}
2024-01-04 03:38:15 +01:00
2025-02-23 21:03:51 +01:00
func NewBleveIndexer(path string) *BleveIndexer {
return &BleveIndexer{path: path}
2024-01-04 03:38:15 +01:00
}
2025-02-23 21:03:51 +01:00
func (i *BleveIndexer) Init() {
2024-09-09 11:44:22 +02:00
go func() {
2025-02-23 21:03:51 +01:00
bleveIndex, err := i.open()
2024-09-09 11:44:22 +02:00
if err != nil {
2025-02-23 21:03:51 +01:00
log.Error().Err(err).Msg("Failed to open Bleve index")
i.Close()
2024-09-09 11:44:22 +02:00
}
2025-02-23 21:03:51 +01:00
i.index = bleveIndex
log.Info().Msg("Bleve indexer initialized")
2024-09-09 11:44:22 +02:00
}()
}
2025-02-23 21:03:51 +01:00
func (i *BleveIndexer) open() (bleve.Index, error) {
bleveIndex, err := bleve.Open(i.path)
2024-01-04 03:38:15 +01:00
if err == nil {
2024-09-09 11:44:22 +02:00
return bleveIndex, nil
2024-01-04 03:38:15 +01:00
}
if !errors.Is(err, bleve.ErrorIndexPathDoesNotExist) {
2024-09-09 11:44:22 +02:00
return nil, err
2024-01-04 03:38:15 +01:00
}
docMapping := bleve.NewDocumentMapping()
docMapping.AddFieldMappingsAt("GistID", bleve.NewNumericFieldMapping())
docMapping.AddFieldMappingsAt("Content", bleve.NewTextFieldMapping())
mapping := bleve.NewIndexMapping()
if err = mapping.AddCustomTokenFilter("unicodeNormalize", map[string]any{
"type": unicodenorm.Name,
"form": unicodenorm.NFC,
}); err != nil {
2024-09-09 11:44:22 +02:00
return nil, err
2024-01-04 03:38:15 +01:00
}
if err = mapping.AddCustomAnalyzer("gistAnalyser", map[string]interface{}{
"type": custom.Name,
"char_filters": []string{},
"tokenizer": unicode.Name,
"token_filters": []string{"unicodeNormalize", camelcase.Name, lowercase.Name},
}); err != nil {
2024-09-09 11:44:22 +02:00
return nil, err
2024-01-04 03:38:15 +01:00
}
docMapping.DefaultAnalyzer = "gistAnalyser"
2025-02-23 21:03:51 +01:00
return bleve.New(i.path, mapping)
2024-09-09 11:44:22 +02:00
}
2025-02-23 21:03:51 +01:00
func (i *BleveIndexer) Close() {
if i == nil || i.index == nil {
2024-09-09 11:44:22 +02:00
return
}
2024-01-04 03:38:15 +01:00
2025-02-23 21:03:51 +01:00
err := i.index.Close()
2024-09-09 11:44:22 +02:00
if err != nil {
2025-02-23 21:03:51 +01:00
log.Error().Err(err).Msg("Failed to close Bleve index")
2024-09-09 11:44:22 +02:00
}
2025-02-23 21:03:51 +01:00
log.Info().Msg("Bleve indexer closed")
2024-01-04 03:38:15 +01:00
}
2025-02-23 21:03:51 +01:00
func (i *BleveIndexer) Add(gist *Gist) error {
2024-01-04 03:38:15 +01:00
if gist == nil {
return errors.New("failed to add nil gist to index")
}
2025-02-23 21:03:51 +01:00
return (*atomicIndexer.Load()).(*BleveIndexer).index.Index(strconv.Itoa(int(gist.GistID)), gist)
2024-01-04 03:38:15 +01:00
}
2025-02-23 21:03:51 +01:00
func (i *BleveIndexer) Remove(gistID uint) error {
return (*atomicIndexer.Load()).(*BleveIndexer).index.Delete(strconv.Itoa(int(gistID)))
2024-01-04 03:38:15 +01:00
}
2025-03-11 09:04:24 +01:00
func (i *BleveIndexer) Search(queryStr string, queryMetadata SearchGistMetadata, userId uint, page int) ([]uint, uint64, map[string]int, error) {
2024-01-04 03:38:15 +01:00
var err error
var indexerQuery query.Query
if queryStr != "" {
contentQuery := bleve.NewMatchPhraseQuery(queryStr)
contentQuery.FieldVal = "Content"
indexerQuery = contentQuery
} else {
contentQuery := bleve.NewMatchAllQuery()
indexerQuery = contentQuery
}
2025-03-11 09:04:24 +01:00
privateQuery := bleve.NewBoolFieldQuery(false)
privateQuery.SetField("Private")
2024-09-07 14:36:16 +02:00
2025-03-11 09:04:24 +01:00
userIdMatch := float64(userId)
2024-09-07 14:36:16 +02:00
truee := true
2025-03-11 09:04:24 +01:00
userIdQuery := bleve.NewNumericRangeInclusiveQuery(&userIdMatch, &userIdMatch, &truee, &truee)
userIdQuery.SetField("UserID")
2024-01-04 03:38:15 +01:00
2025-03-11 09:04:24 +01:00
accessQuery := bleve.NewDisjunctionQuery(privateQuery, userIdQuery)
indexerQuery = bleve.NewConjunctionQuery(accessQuery, indexerQuery)
2024-09-07 14:36:16 +02:00
2024-01-04 03:38:15 +01:00
addQuery := func(field, value string) {
if value != "" && value != "." {
q := bleve.NewMatchPhraseQuery(value)
q.FieldVal = field
indexerQuery = bleve.NewConjunctionQuery(indexerQuery, q)
}
}
addQuery("Username", queryMetadata.Username)
addQuery("Title", queryMetadata.Title)
addQuery("Extensions", "."+queryMetadata.Extension)
addQuery("Filenames", queryMetadata.Filename)
addQuery("Languages", queryMetadata.Language)
2025-01-24 14:39:42 +01:00
addQuery("Topics", queryMetadata.Topic)
2024-01-04 03:38:15 +01:00
languageFacet := bleve.NewFacetRequest("Languages", 10)
perPage := 10
offset := (page - 1) * perPage
s := bleve.NewSearchRequestOptions(indexerQuery, perPage+1, offset, false)
2024-01-04 03:38:15 +01:00
s.AddFacet("languageFacet", languageFacet)
s.Fields = []string{"GistID"}
s.IncludeLocations = false
2025-02-23 21:03:51 +01:00
results, err := (*atomicIndexer.Load()).(*BleveIndexer).index.Search(s)
2024-01-04 03:38:15 +01:00
if err != nil {
return nil, 0, nil, err
}
gistIds := make([]uint, 0, len(results.Hits))
for _, hit := range results.Hits {
gistIds = append(gistIds, uint(hit.Fields["GistID"].(float64)))
}
languageCounts := make(map[string]int)
if facets, found := results.Facets["languageFacet"]; found {
for _, term := range facets.Terms.Terms() {
languageCounts[term.Term] = term.Count
}
}
return gistIds, results.Total, languageCounts, nil
}