0
Fork 0
mirror of https://codeberg.org/forgejo/forgejo.git synced 2025-03-18 09:31:28 -05:00

fix: more permissive markup commit hash detection ()

This allows many more variants of commit hashes to be detected and interpreted as link if they are enclosed by up to two different non-word/non-digit characters. I also had in mind RTL languages, where the question mark and similar symbols are on the left of the commit hash.

Resolves 

Reviewed-on: https://codeberg.org/forgejo/forgejo/pulls/6784
Reviewed-by: Earl Warren <earl-warren@noreply.codeberg.org>
Reviewed-by: Gusted <gusted@noreply.codeberg.org>
Co-authored-by: Robert Wolff <mahlzahn@posteo.de>
Co-committed-by: Robert Wolff <mahlzahn@posteo.de>
This commit is contained in:
Robert Wolff 2025-02-05 09:04:19 +00:00 committed by Gusted
parent 499497c959
commit 519169ee7b
3 changed files with 17 additions and 5 deletions

View file

@ -1,4 +1,5 @@
// Copyright 2017 The Gitea Authors. All rights reserved.
// Copyright 2025 The Forgejo Authors.
// SPDX-License-Identifier: MIT
package markup
@ -48,13 +49,13 @@ var (
// hashCurrentPattern matches string that represents a commit SHA, e.g. d8a994ef243349f321568f9e36d5c3f444b99cae
// Although SHA1 hashes are 40 chars long, SHA256 are 64, the regex matches the hash from 7 to 64 chars in length
// so that abbreviated hash links can be used as well. This matches git and GitHub usability.
hashCurrentPattern = regexp.MustCompile(`(?:\s|^|\(|\[)([0-9a-f]{7,64})(?:\s|$|\)|\]|[.,:](\s|$))`)
hashCurrentPattern = regexp.MustCompile(`(?:^|\s)[^\w\d]{0,2}([0-9a-f]{7,64})[^\w\d]{0,2}(?:\s|$)`)
// shortLinkPattern matches short but difficult to parse [[name|link|arg=test]] syntax
shortLinkPattern = regexp.MustCompile(`\[\[(.*?)\]\](\w*)`)
// anySHA1Pattern splits url containing SHA into parts
anyHashPattern = regexp.MustCompile(`https?://(?:\S+/){4,5}([0-9a-f]{40,64})(/[-+~_%.a-zA-Z0-9/]+)?(\?[-+~_%\.a-zA-Z0-9=&]+)?(#[-+~_%.a-zA-Z0-9]+)?`)
// anyHashPattern splits url containing SHA into parts
anyHashPattern = regexp.MustCompile(`https?://(?:\S+/){4,5}([0-9a-f]{7,64})(/[-+~_%.a-zA-Z0-9/]+)?(\?[-+~_%\.a-zA-Z0-9=&]+)?(#[-+~_%.a-zA-Z0-9]+)?`)
// comparePattern matches "http://domain/org/repo/compare/COMMIT1...COMMIT2#hash"
comparePattern = regexp.MustCompile(`https?://(?:\S+/){4,5}([0-9a-f]{7,64})(\.\.\.?)([0-9a-f]{7,64})?(#[-+~_%.a-zA-Z0-9]+)?`)
@ -1174,7 +1175,7 @@ func emojiProcessor(ctx *RenderContext, node *html.Node) {
}
}
// hashCurrentPatternProcessor renders SHA1 strings to corresponding links that
// hashCurrentPatternProcessor renders SHA1/SHA256 strings to corresponding links that
// are assumed to be in the same repository.
func hashCurrentPatternProcessor(ctx *RenderContext, node *html.Node) {
if ctx.Metas == nil || ctx.Metas["user"] == "" || ctx.Metas["repo"] == "" || ctx.Metas["repoPath"] == "" {

View file

@ -1,4 +1,5 @@
// Copyright 2018 The Gitea Authors. All rights reserved.
// Copyright 2025 The Forgejo Authors.
// SPDX-License-Identifier: MIT
package markup
@ -391,7 +392,7 @@ func TestRender_FullIssueURLs(t *testing.T) {
`<a href="http://localhost:3000/testOrg/testOrgRepo/pulls/2/commits" class="ref-issue">testOrg/testOrgRepo#2/commits</a>`)
}
func TestRegExp_sha1CurrentPattern(t *testing.T) {
func TestRegExp_hashCurrentPattern(t *testing.T) {
trueTestCases := []string{
"d8a994ef243349f321568f9e36d5c3f444b99cae",
"abcdefabcdefabcdefabcdefabcdefabcdefabcd",
@ -399,6 +400,13 @@ func TestRegExp_sha1CurrentPattern(t *testing.T) {
"[abcdefabcdefabcdefabcdefabcdefabcdefabcd]",
"abcdefabcdefabcdefabcdefabcdefabcdefabcd.",
"abcdefabcdefabcdefabcdefabcdefabcdefabcd:",
"d8a994ef243349f321568f9e36d5c3f444b99cae12424fa123391042fbae2319",
"abcdefd?",
"abcdefd!",
"!abcd3ef",
":abcd3ef",
".abcd3ef",
" (abcd3ef). ",
}
falseTestCases := []string{
"test",
@ -406,6 +414,8 @@ func TestRegExp_sha1CurrentPattern(t *testing.T) {
"e59ff077-2d03-4e6b-964d-63fbaea81f",
"abcdefghijklmnopqrstuvwxyzabcdefghijklmn",
"abcdefghijklmnopqrstuvwxyzabcdefghijklmO",
"commit/abcdefd",
"abcd3ef...defabcd",
}
for _, testCase := range trueTestCases {

View file

@ -1,4 +1,5 @@
// Copyright 2017 The Gitea Authors. All rights reserved.
// Copyright 2025 The Forgejo Authors.
// SPDX-License-Identifier: MIT
package markup_test