0
Fork 0
mirror of https://codeberg.org/forgejo/forgejo.git synced 2025-01-12 09:30:35 -05:00
forgejo/modules/git/repo_attribute.go
Gergely Nagy ae0635fd61
Correctly support linguist-documentation=false
If a documentation file is marked with a `linguist-documentation=false`
attribute, include it in language stats.

However, make sure that we do *not* include documentation languages as
fallback.

Added a new test case to exercise the formerly buggy behaviour.

Problem discovered while reviewing @KN4CK3R's tests from gitea#29267.

Signed-off-by: Gergely Nagy <forgejo@gergo.csillger.hu>
2024-02-26 14:18:33 +01:00

338 lines
8 KiB
Go

// Copyright 2019 The Gitea Authors. All rights reserved.
// SPDX-License-Identifier: MIT
package git
import (
"bytes"
"context"
"fmt"
"io"
"os"
"code.gitea.io/gitea/modules/log"
"code.gitea.io/gitea/modules/optional"
)
// CheckAttributeOpts represents the possible options to CheckAttribute
type CheckAttributeOpts struct {
CachedOnly bool
AllAttributes bool
Attributes []string
Filenames []string
IndexFile string
WorkTree string
}
// CheckAttribute return the Blame object of file
func (repo *Repository) CheckAttribute(opts CheckAttributeOpts) (map[string]map[string]string, error) {
env := []string{}
if len(opts.IndexFile) > 0 {
env = append(env, "GIT_INDEX_FILE="+opts.IndexFile)
}
if len(opts.WorkTree) > 0 {
env = append(env, "GIT_WORK_TREE="+opts.WorkTree)
}
if len(env) > 0 {
env = append(os.Environ(), env...)
}
stdOut := new(bytes.Buffer)
stdErr := new(bytes.Buffer)
cmd := NewCommand(repo.Ctx, "check-attr", "-z")
if opts.AllAttributes {
cmd.AddArguments("-a")
} else {
for _, attribute := range opts.Attributes {
if attribute != "" {
cmd.AddDynamicArguments(attribute)
}
}
}
if opts.CachedOnly {
cmd.AddArguments("--cached")
}
cmd.AddDashesAndList(opts.Filenames...)
if err := cmd.Run(&RunOpts{
Env: env,
Dir: repo.Path,
Stdout: stdOut,
Stderr: stdErr,
}); err != nil {
return nil, fmt.Errorf("failed to run check-attr: %w\n%s\n%s", err, stdOut.String(), stdErr.String())
}
// FIXME: This is incorrect on versions < 1.8.5
fields := bytes.Split(stdOut.Bytes(), []byte{'\000'})
if len(fields)%3 != 1 {
return nil, fmt.Errorf("wrong number of fields in return from check-attr")
}
name2attribute2info := make(map[string]map[string]string)
for i := 0; i < (len(fields) / 3); i++ {
filename := string(fields[3*i])
attribute := string(fields[3*i+1])
info := string(fields[3*i+2])
attribute2info := name2attribute2info[filename]
if attribute2info == nil {
attribute2info = make(map[string]string)
}
attribute2info[attribute] = info
name2attribute2info[filename] = attribute2info
}
return name2attribute2info, nil
}
// CheckAttributeReader provides a reader for check-attribute content that can be long running
type CheckAttributeReader struct {
// params
Attributes []string
Repo *Repository
IndexFile string
WorkTree string
stdinReader io.ReadCloser
stdinWriter *os.File
stdOut attributeWriter
cmd *Command
env []string
ctx context.Context
cancel context.CancelFunc
}
// Init initializes the CheckAttributeReader
func (c *CheckAttributeReader) Init(ctx context.Context) error {
if len(c.Attributes) == 0 {
lw := new(nulSeparatedAttributeWriter)
lw.attributes = make(chan attributeTriple)
lw.closed = make(chan struct{})
c.stdOut = lw
c.stdOut.Close()
return fmt.Errorf("no provided Attributes to check")
}
c.ctx, c.cancel = context.WithCancel(ctx)
c.cmd = NewCommand(c.ctx, "check-attr", "--stdin", "-z")
if len(c.IndexFile) > 0 {
c.cmd.AddArguments("--cached")
c.env = append(c.env, "GIT_INDEX_FILE="+c.IndexFile)
}
if len(c.WorkTree) > 0 {
c.env = append(c.env, "GIT_WORK_TREE="+c.WorkTree)
}
// Version 2.43.1 has a bug where the behavior of `GIT_FLUSH` is flipped.
// Ref: https://lore.kernel.org/git/CABn0oJvg3M_kBW-u=j3QhKnO=6QOzk-YFTgonYw_UvFS1NTX4g@mail.gmail.com
if InvertedGitFlushEnv {
c.env = append(c.env, "GIT_FLUSH=0")
} else {
c.env = append(c.env, "GIT_FLUSH=1")
}
c.cmd.AddDynamicArguments(c.Attributes...)
var err error
c.stdinReader, c.stdinWriter, err = os.Pipe()
if err != nil {
c.cancel()
return err
}
lw := new(nulSeparatedAttributeWriter)
lw.attributes = make(chan attributeTriple, 5)
lw.closed = make(chan struct{})
c.stdOut = lw
return nil
}
// Run run cmd
func (c *CheckAttributeReader) Run() error {
defer func() {
_ = c.stdinReader.Close()
_ = c.stdOut.Close()
}()
stdErr := new(bytes.Buffer)
err := c.cmd.Run(&RunOpts{
Env: c.env,
Dir: c.Repo.Path,
Stdin: c.stdinReader,
Stdout: c.stdOut,
Stderr: stdErr,
})
if err != nil && // If there is an error we need to return but:
c.ctx.Err() != err && // 1. Ignore the context error if the context is cancelled or exceeds the deadline (RunWithContext could return c.ctx.Err() which is Canceled or DeadlineExceeded)
err.Error() != "signal: killed" { // 2. We should not pass up errors due to the program being killed
return fmt.Errorf("failed to run attr-check. Error: %w\nStderr: %s", err, stdErr.String())
}
return nil
}
// CheckPath check attr for given path
func (c *CheckAttributeReader) CheckPath(path string) (rs map[string]string, err error) {
defer func() {
if err != nil && err != c.ctx.Err() {
log.Error("Unexpected error when checking path %s in %s. Error: %v", path, c.Repo.Path, err)
}
}()
select {
case <-c.ctx.Done():
return nil, c.ctx.Err()
default:
}
if _, err = c.stdinWriter.Write([]byte(path + "\x00")); err != nil {
defer c.Close()
return nil, err
}
rs = make(map[string]string)
for range c.Attributes {
select {
case attr, ok := <-c.stdOut.ReadAttribute():
if !ok {
return nil, c.ctx.Err()
}
rs[attr.Attribute] = attr.Value
case <-c.ctx.Done():
return nil, c.ctx.Err()
}
}
return rs, nil
}
// Close close pip after use
func (c *CheckAttributeReader) Close() error {
c.cancel()
err := c.stdinWriter.Close()
return err
}
type attributeWriter interface {
io.WriteCloser
ReadAttribute() <-chan attributeTriple
}
type attributeTriple struct {
Filename string
Attribute string
Value string
}
type nulSeparatedAttributeWriter struct {
tmp []byte
attributes chan attributeTriple
closed chan struct{}
working attributeTriple
pos int
}
func (wr *nulSeparatedAttributeWriter) Write(p []byte) (n int, err error) {
l, read := len(p), 0
nulIdx := bytes.IndexByte(p, '\x00')
for nulIdx >= 0 {
wr.tmp = append(wr.tmp, p[:nulIdx]...)
switch wr.pos {
case 0:
wr.working = attributeTriple{
Filename: string(wr.tmp),
}
case 1:
wr.working.Attribute = string(wr.tmp)
case 2:
wr.working.Value = string(wr.tmp)
}
wr.tmp = wr.tmp[:0]
wr.pos++
if wr.pos > 2 {
wr.attributes <- wr.working
wr.pos = 0
}
read += nulIdx + 1
if l > read {
p = p[nulIdx+1:]
nulIdx = bytes.IndexByte(p, '\x00')
} else {
return l, nil
}
}
wr.tmp = append(wr.tmp, p...)
return len(p), nil
}
func (wr *nulSeparatedAttributeWriter) ReadAttribute() <-chan attributeTriple {
return wr.attributes
}
func (wr *nulSeparatedAttributeWriter) Close() error {
select {
case <-wr.closed:
return nil
default:
}
close(wr.attributes)
close(wr.closed)
return nil
}
// Create a check attribute reader for the current repository and provided commit ID
func (repo *Repository) CheckAttributeReader(commitID string) (*CheckAttributeReader, context.CancelFunc) {
indexFilename, worktree, deleteTemporaryFile, err := repo.ReadTreeToTemporaryIndex(commitID)
if err != nil {
return nil, func() {}
}
checker := &CheckAttributeReader{
Attributes: []string{"linguist-vendored", "linguist-generated", "linguist-language", "gitlab-language", "linguist-documentation", "linguist-detectable"},
Repo: repo,
IndexFile: indexFilename,
WorkTree: worktree,
}
ctx, cancel := context.WithCancel(repo.Ctx)
if err := checker.Init(ctx); err != nil {
log.Error("Unable to open checker for %s. Error: %v", commitID, err)
} else {
go func() {
err := checker.Run()
if err != nil && err != ctx.Err() {
log.Error("Unable to open checker for %s. Error: %v", commitID, err)
}
cancel()
}()
}
deferable := func() {
_ = checker.Close()
cancel()
deleteTemporaryFile()
}
return checker, deferable
}
// true if "set"/"true", false if "unset"/"false", none otherwise
func attributeToBool(attr map[string]string, name string) optional.Option[bool] {
if value, has := attr[name]; has && value != "unspecified" {
switch value {
case "set", "true":
return optional.Some(true)
case "unset", "false":
return optional.Some(false)
}
}
return optional.None[bool]()
}