0
Fork 0
mirror of https://codeberg.org/forgejo/forgejo.git synced 2024-12-29 02:44:08 -05:00
forgejo/modules/indexer/issues/internal/model.go
6543 38c3cc4eb7
Patch in exact search for meilisearch (#29671)
meilisearch does not have an search option to contorl fuzzynes per query
right now:
 - https://github.com/meilisearch/meilisearch/issues/1192
 - https://github.com/orgs/meilisearch/discussions/377
 - https://github.com/meilisearch/meilisearch/discussions/1096

so we have to create a workaround by post-filter the search result in
gitea until this is addressed.

For future works I added an option in backend only atm, to enable
fuzzynes for issue indexer too.
And also refactored the code so the fuzzy option is equal in logic to
code indexer

---
*Sponsored by Kithara Software GmbH*
Conflicts:
	routers/web/repo/search.go
	trivial context confict s/isMatch/isFuzzy/
2024-03-11 23:37:00 +07:00

150 lines
6.2 KiB
Go

// Copyright 2023 The Gitea Authors. All rights reserved.
// SPDX-License-Identifier: MIT
package internal
import (
"code.gitea.io/gitea/models/db"
"code.gitea.io/gitea/modules/optional"
"code.gitea.io/gitea/modules/timeutil"
)
// IndexerData data stored in the issue indexer
type IndexerData struct {
ID int64 `json:"id"`
RepoID int64 `json:"repo_id"`
IsPublic bool `json:"is_public"` // If the repo is public
// Fields used for keyword searching
Title string `json:"title"`
Content string `json:"content"`
Comments []string `json:"comments"`
// Fields used for filtering
IsPull bool `json:"is_pull"`
IsClosed bool `json:"is_closed"`
LabelIDs []int64 `json:"label_ids"`
NoLabel bool `json:"no_label"` // True if LabelIDs is empty
MilestoneID int64 `json:"milestone_id"`
ProjectID int64 `json:"project_id"`
ProjectBoardID int64 `json:"project_board_id"`
PosterID int64 `json:"poster_id"`
AssigneeID int64 `json:"assignee_id"`
MentionIDs []int64 `json:"mention_ids"`
ReviewedIDs []int64 `json:"reviewed_ids"`
ReviewRequestedIDs []int64 `json:"review_requested_ids"`
SubscriberIDs []int64 `json:"subscriber_ids"`
UpdatedUnix timeutil.TimeStamp `json:"updated_unix"`
// Fields used for sorting
// UpdatedUnix is both used for filtering and sorting.
// ID is used for sorting too, to make the sorting stable.
CreatedUnix timeutil.TimeStamp `json:"created_unix"`
DeadlineUnix timeutil.TimeStamp `json:"deadline_unix"`
CommentCount int64 `json:"comment_count"`
}
// Match represents on search result
type Match struct {
ID int64 `json:"id"`
Score float64 `json:"score"`
}
// SearchResult represents search results
type SearchResult struct {
Total int64
Hits []Match
}
// SearchOptions represents search options.
//
// It has a slightly different design from database query options.
// In database query options, a field is never a pointer, so it could be confusing when it's zero value:
// Do you want to find data with a field value of 0, or do you not specify the field in the options?
// To avoid this confusion, db introduced db.NoConditionID(-1).
// So zero value means the field is not specified in the search options, and db.NoConditionID means "== 0" or "id NOT IN (SELECT id FROM ...)"
// It's still not ideal, it trapped developers many times.
// And sometimes -1 could be a valid value, like issue ID, negative numbers indicate exclusion.
// Since db.NoConditionID is for "db" (the package name is db), it makes sense not to use it in the indexer:
// Why do bleve/elasticsearch/meilisearch indexers need to know about db.NoConditionID?
// So in SearchOptions, we use pointer for fields which could be not specified,
// and always use the value to filter if it's not nil, even if it's zero or negative.
// It can handle almost all cases, if there is an exception, we can add a new field, like NoLabelOnly.
// Unfortunately, we still use db for the indexer and have to convert between db.NoConditionID and nil for legacy reasons.
type SearchOptions struct {
Keyword string // keyword to search
IsFuzzyKeyword bool // if false the levenshtein distance is 0
RepoIDs []int64 // repository IDs which the issues belong to
AllPublic bool // if include all public repositories
IsPull optional.Option[bool] // if the issues is a pull request
IsClosed optional.Option[bool] // if the issues is closed
IncludedLabelIDs []int64 // labels the issues have
ExcludedLabelIDs []int64 // labels the issues don't have
IncludedAnyLabelIDs []int64 // labels the issues have at least one. It will be ignored if IncludedLabelIDs is not empty. It's an uncommon filter, but it has been supported accidentally by issues.IssuesOptions.IncludedLabelNames.
NoLabelOnly bool // if the issues have no label, if true, IncludedLabelIDs and ExcludedLabelIDs, IncludedAnyLabelIDs will be ignored
MilestoneIDs []int64 // milestones the issues have
ProjectID *int64 // project the issues belong to
ProjectBoardID *int64 // project board the issues belong to
PosterID *int64 // poster of the issues
AssigneeID *int64 // assignee of the issues, zero means no assignee
MentionID *int64 // mentioned user of the issues
ReviewedID *int64 // reviewer of the issues
ReviewRequestedID *int64 // requested reviewer of the issues
SubscriberID *int64 // subscriber of the issues
UpdatedAfterUnix *int64
UpdatedBeforeUnix *int64
db.Paginator
SortBy SortBy // sort by field
}
// Copy returns a copy of the options.
// Be careful, it's not a deep copy, so `SearchOptions.RepoIDs = {...}` is OK while `SearchOptions.RepoIDs[0] = ...` is not.
func (o *SearchOptions) Copy(edit ...func(options *SearchOptions)) *SearchOptions {
if o == nil {
return nil
}
v := *o
for _, e := range edit {
e(&v)
}
return &v
}
type SortBy string
const (
SortByCreatedDesc SortBy = "-created_unix"
SortByUpdatedDesc SortBy = "-updated_unix"
SortByCommentsDesc SortBy = "-comment_count"
SortByDeadlineDesc SortBy = "-deadline_unix"
SortByCreatedAsc SortBy = "created_unix"
SortByUpdatedAsc SortBy = "updated_unix"
SortByCommentsAsc SortBy = "comment_count"
SortByDeadlineAsc SortBy = "deadline_unix"
// Unsupported sort types which are supported by issues.IssuesOptions.SortType:
//
// - "priorityrepo":
// It's impossible to support it in the indexer.
// It is based on the specified repository in the request, so we cannot add static field to the indexer.
// If we do something like that query the issues in the specified repository first then append other issues,
// it will break the pagination.
//
// - "project-column-sorting":
// Although it's possible to support it by adding project.ProjectIssue.Sorting to the indexer,
// but what if the issue belongs to multiple projects?
// Since it's unsupported to search issues with keyword in project page, we don't need to support it.
)