0
Fork 0
mirror of https://codeberg.org/forgejo/forgejo.git synced 2025-01-13 18:10:55 -05:00
forgejo/routers/web/repo/blame.go
Gusted 5b3a82d621
[FEAT] Enable ambiguous character detection in configured contexts
- The ambiguous character detection is an important security feature to
combat against sourcebase attacks (https://trojansource.codes/).
- However there are a few problems with the feature as it stands
today (i) it's apparantly an big performance hitter, it's twice as slow
as syntax highlighting (ii) it contains false positives, because it's
reporting valid problems but not valid within the context of a
programming language (ambiguous charachters in code comments being a
prime example) that can lead to security issues (iii) charachters from
certain languages always being marked as ambiguous. It's a lot of effort
to fix the aforementioned issues.
- Therefore, make it configurable in which context the ambiguous
character detection should be run, this avoids running detection in all
contexts such as file views, but still enable it in commits and pull
requests diffs where it matters the most. Ideally this also becomes an
per-repository setting, but the code architecture doesn't allow for a
clean implementation of that.
- Adds unit test.
- Adds integration tests to ensure that the contexts and instance-wide
is respected (and that ambigious charachter detection actually work in
different places).
- Ref: https://codeberg.org/forgejo/forgejo/pulls/2395#issuecomment-1575547
- Ref: https://codeberg.org/forgejo/forgejo/issues/564
2024-02-23 13:12:17 +01:00

292 lines
7.8 KiB
Go

// Copyright 2019 The Gitea Authors. All rights reserved.
// SPDX-License-Identifier: MIT
package repo
import (
"fmt"
gotemplate "html/template"
"net/http"
"net/url"
"strings"
user_model "code.gitea.io/gitea/models/user"
"code.gitea.io/gitea/modules/charset"
"code.gitea.io/gitea/modules/context"
"code.gitea.io/gitea/modules/git"
"code.gitea.io/gitea/modules/highlight"
"code.gitea.io/gitea/modules/log"
"code.gitea.io/gitea/modules/templates"
"code.gitea.io/gitea/modules/timeutil"
"code.gitea.io/gitea/modules/util"
files_service "code.gitea.io/gitea/services/repository/files"
)
type blameRow struct {
RowNumber int
Avatar gotemplate.HTML
RepoLink string
PartSha string
PreviousSha string
PreviousShaURL string
IsFirstCommit bool
CommitURL string
CommitMessage string
CommitSince gotemplate.HTML
Code gotemplate.HTML
EscapeStatus *charset.EscapeStatus
}
// RefBlame render blame page
func RefBlame(ctx *context.Context) {
if ctx.Repo.TreePath == "" {
ctx.NotFound("No file specified", nil)
return
}
paths := make([]string, 0, 5)
treeNames := strings.Split(ctx.Repo.TreePath, "/")
for i := range treeNames {
paths = append(paths, strings.Join(treeNames[:i+1], "/"))
}
// Get current entry user currently looking at.
entry, err := ctx.Repo.Commit.GetTreeEntryByPath(ctx.Repo.TreePath)
if err != nil {
HandleGitError(ctx, "Repo.Commit.GetTreeEntryByPath", err)
return
}
blob := entry.Blob()
ctx.Data["PageIsViewCode"] = true
ctx.Data["IsBlame"] = true
ctx.Data["BranchLink"] = ctx.Repo.RepoLink + "/src/" + ctx.Repo.BranchNameSubURL()
ctx.Data["RawFileLink"] = ctx.Repo.RepoLink + "/raw/" + ctx.Repo.BranchNameSubURL() + "/" + util.PathEscapeSegments(ctx.Repo.TreePath)
ctx.Data["Paths"] = paths
ctx.Data["TreeNames"] = treeNames
ctx.Data["FileSize"] = blob.Size()
ctx.Data["FileName"] = blob.Name()
ctx.Data["NumLinesSet"] = true
ctx.Data["NumLines"], err = blob.GetBlobLineCount()
if err != nil {
ctx.ServerError("GetBlobLineCount", err)
return
}
result, err := performBlame(ctx, ctx.Repo.Commit, ctx.Repo.TreePath, ctx.FormBool("bypass-blame-ignore"))
if err != nil {
ctx.ServerError("performBlame", err)
return
}
ctx.Data["UsesIgnoreRevs"] = result.UsesIgnoreRevs
ctx.Data["FaultyIgnoreRevsFile"] = result.FaultyIgnoreRevsFile
commitNames := processBlameParts(ctx, result.Parts)
if ctx.Written() {
return
}
renderBlame(ctx, result.Parts, commitNames)
ctx.HTML(http.StatusOK, tplRepoHome)
}
type blameResult struct {
Parts []*git.BlamePart
UsesIgnoreRevs bool
FaultyIgnoreRevsFile bool
}
func performBlame(ctx *context.Context, commit *git.Commit, file string, bypassBlameIgnore bool) (*blameResult, error) {
repoPath := ctx.Repo.Repository.RepoPath()
objectFormat, err := ctx.Repo.GitRepo.GetObjectFormat()
if err != nil {
return nil, err
}
blameReader, err := git.CreateBlameReader(ctx, objectFormat, repoPath, commit, file, bypassBlameIgnore)
if err != nil {
return nil, err
}
r := &blameResult{}
if err := fillBlameResult(blameReader, r); err != nil {
_ = blameReader.Close()
return nil, err
}
err = blameReader.Close()
if err != nil {
if len(r.Parts) == 0 && r.UsesIgnoreRevs {
// try again without ignored revs
blameReader, err = git.CreateBlameReader(ctx, objectFormat, repoPath, commit, file, true)
if err != nil {
return nil, err
}
r := &blameResult{
FaultyIgnoreRevsFile: true,
}
if err := fillBlameResult(blameReader, r); err != nil {
_ = blameReader.Close()
return nil, err
}
return r, blameReader.Close()
}
return nil, err
}
return r, nil
}
func fillBlameResult(br *git.BlameReader, r *blameResult) error {
r.UsesIgnoreRevs = br.UsesIgnoreRevs()
previousHelper := make(map[string]*git.BlamePart)
r.Parts = make([]*git.BlamePart, 0, 5)
for {
blamePart, err := br.NextPart()
if err != nil {
return fmt.Errorf("BlameReader.NextPart failed: %w", err)
}
if blamePart == nil {
break
}
if prev, ok := previousHelper[blamePart.Sha]; ok {
if blamePart.PreviousSha == "" {
blamePart.PreviousSha = prev.PreviousSha
blamePart.PreviousPath = prev.PreviousPath
}
} else {
previousHelper[blamePart.Sha] = blamePart
}
r.Parts = append(r.Parts, blamePart)
}
return nil
}
func processBlameParts(ctx *context.Context, blameParts []*git.BlamePart) map[string]*user_model.UserCommit {
// store commit data by SHA to look up avatar info etc
commitNames := make(map[string]*user_model.UserCommit)
// and as blameParts can reference the same commits multiple
// times, we cache the lookup work locally
commits := make([]*git.Commit, 0, len(blameParts))
commitCache := map[string]*git.Commit{}
commitCache[ctx.Repo.Commit.ID.String()] = ctx.Repo.Commit
for _, part := range blameParts {
sha := part.Sha
if _, ok := commitNames[sha]; ok {
continue
}
// find the blamePart commit, to look up parent & email address for avatars
commit, ok := commitCache[sha]
var err error
if !ok {
commit, err = ctx.Repo.GitRepo.GetCommit(sha)
if err != nil {
if git.IsErrNotExist(err) {
ctx.NotFound("Repo.GitRepo.GetCommit", err)
} else {
ctx.ServerError("Repo.GitRepo.GetCommit", err)
}
return nil
}
commitCache[sha] = commit
}
commits = append(commits, commit)
}
// populate commit email addresses to later look up avatars.
for _, c := range user_model.ValidateCommitsWithEmails(ctx, commits) {
commitNames[c.ID.String()] = c
}
return commitNames
}
func renderBlame(ctx *context.Context, blameParts []*git.BlamePart, commitNames map[string]*user_model.UserCommit) {
repoLink := ctx.Repo.RepoLink
language, err := files_service.TryGetContentLanguage(ctx.Repo.GitRepo, ctx.Repo.CommitID, ctx.Repo.TreePath)
if err != nil {
log.Error("Unable to get file language for %-v:%s. Error: %v", ctx.Repo.Repository, ctx.Repo.TreePath, err)
}
lines := make([]string, 0)
rows := make([]*blameRow, 0)
escapeStatus := &charset.EscapeStatus{}
var lexerName string
avatarUtils := templates.NewAvatarUtils(ctx)
i := 0
commitCnt := 0
for _, part := range blameParts {
for index, line := range part.Lines {
i++
lines = append(lines, line)
br := &blameRow{
RowNumber: i,
}
commit := commitNames[part.Sha]
if index == 0 {
// Count commit number
commitCnt++
// User avatar image
commitSince := timeutil.TimeSinceUnix(timeutil.TimeStamp(commit.Author.When.Unix()), ctx.Locale)
var avatar string
if commit.User != nil {
avatar = string(avatarUtils.Avatar(commit.User, 18, "gt-mr-3"))
} else {
avatar = string(avatarUtils.AvatarByEmail(commit.Author.Email, commit.Author.Name, 18, "gt-mr-3"))
}
br.Avatar = gotemplate.HTML(avatar)
br.RepoLink = repoLink
br.PartSha = part.Sha
br.PreviousSha = part.PreviousSha
br.PreviousShaURL = fmt.Sprintf("%s/blame/commit/%s/%s", repoLink, url.PathEscape(part.PreviousSha), util.PathEscapeSegments(part.PreviousPath))
br.CommitURL = fmt.Sprintf("%s/commit/%s", repoLink, url.PathEscape(part.Sha))
br.CommitMessage = commit.CommitMessage
br.CommitSince = commitSince
}
if i != len(lines)-1 {
line += "\n"
}
fileName := fmt.Sprintf("%v", ctx.Data["FileName"])
line, lexerNameForLine := highlight.Code(fileName, language, line)
// set lexer name to the first detected lexer. this is certainly suboptimal and
// we should instead highlight the whole file at once
if lexerName == "" {
lexerName = lexerNameForLine
}
br.EscapeStatus, br.Code = charset.EscapeControlHTML(line, ctx.Locale, charset.FileviewContext)
rows = append(rows, br)
escapeStatus = escapeStatus.Or(br.EscapeStatus)
}
}
ctx.Data["EscapeStatus"] = escapeStatus
ctx.Data["BlameRows"] = rows
ctx.Data["CommitCnt"] = commitCnt
ctx.Data["LexerName"] = lexerName
}