mirror of
https://github.com/TryGhost/Ghost.git
synced 2025-03-04 02:01:58 -05:00
🌐 Improved sodo-search for CJK (#21148)
no ref According to the flexsearch documentation, https://github.com/nextapps-de/flexsearch?tab=readme-ov-file#cjk-word-break-chinese-japanese-korean for searching CJK text, need to pass in a custom encode function for better search results. This enhancement for CJK will only take effect when the ghost site locale is set to one of `zh`, `zh-Hans`, `zh-Hant`, `ja`, `ko`. Co-authored-by: Cathy Sarisky <42299862+cathysarisky@users.noreply.github.com>
This commit is contained in:
parent
3bbe8c8c7a
commit
300eba49ca
1 changed files with 19 additions and 3 deletions
|
@ -15,7 +15,8 @@ export default class SearchIndex {
|
|||
id: 'id',
|
||||
index: ['title', 'excerpt'],
|
||||
store: true
|
||||
}
|
||||
},
|
||||
...this.#getEncodeOptions()
|
||||
});
|
||||
this.authorsIndex = new Flexsearch.Document({
|
||||
tokenize: 'forward',
|
||||
|
@ -23,7 +24,8 @@ export default class SearchIndex {
|
|||
id: 'id',
|
||||
index: ['name'],
|
||||
store: true
|
||||
}
|
||||
},
|
||||
...this.#getEncodeOptions()
|
||||
});
|
||||
this.tagsIndex = new Flexsearch.Document({
|
||||
tokenize: 'forward',
|
||||
|
@ -31,7 +33,8 @@ export default class SearchIndex {
|
|||
id: 'id',
|
||||
index: ['name'],
|
||||
store: true
|
||||
}
|
||||
},
|
||||
...this.#getEncodeOptions()
|
||||
});
|
||||
|
||||
this.init = this.init.bind(this);
|
||||
|
@ -133,4 +136,17 @@ export default class SearchIndex {
|
|||
tags: this.#normalizeSearchResult(tags)
|
||||
};
|
||||
}
|
||||
|
||||
#getEncodeOptions() {
|
||||
const regex = new RegExp(
|
||||
`[\u{4E00}-\u{9FFF}\u{3040}-\u{309F}\u{30A0}-\u{30FF}\u{AC00}-\u{D7A3}\u{3400}-\u{4DBF}\u{20000}-\u{2A6DF}\u{2A700}-\u{2B73F}\u{2B740}-\u{2B81F}\u{2B820}-\u{2CEAF}\u{2CEB0}-\u{2EBEF}\u{30000}-\u{3134F}\u{31350}-\u{323AF}\u{2EBF0}-\u{2EE5F}\u{F900}-\u{FAFF}\u{2F800}-\u{2FA1F}]|[0-9A-Za-zа-я\u00C0-\u017F\u0400-\u04FF\u0600-\u06FF\u0980-\u09FF\u1E00-\u1EFF]+`,
|
||||
'mug'
|
||||
);
|
||||
|
||||
return {
|
||||
encode: (str) => {
|
||||
return ('' + str).toLowerCase().match(regex) ?? [];
|
||||
}
|
||||
};
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Add table
Reference in a new issue