0
Fork 0
mirror of https://codeberg.org/forgejo/forgejo.git synced 2025-01-20 21:32:45 -05:00
forgejo/modules/csv/csv.go
Richard Mahn 98f7013756
Prevent NPE in CSV diff rendering when column removed (#17018)
Fixes #16837 if a column is deleted.

We were clobbering the columns that were added by looping through the aline (base) and then when bline (head) was looped through, it clobbered what was in the "cells" array that is show in the diff, and then left a nil cell because nothing was shifted.

This fix properly shifts the cells, and properly puts the b cell either at its location or after, according to what the aline placed in the cells.

This includes test, adding a new test function since adding/removing cells works best with three columns, not two, which results in 4 columns of the resulting cells because it has a deleted column and an added column. If you try this locally, you can try those cases and others, such as adding a column.

There was no need to do anything special for the rows when `aline == 0 || bline == 0` so that was removed. This allows the same code to be used for removed or added lines, with the bcell text always being the RightCell, acell text being the LeftCell.

I still added the patch zeripath gave at https://github.com/go-gitea/gitea/issues/16837#issuecomment-913007382 so that just in case for some reason a cell is nil (which shouldn't happen now) it doesn't throw a 500 error, so the user can at least view the raw diff.

Also fixes in the [view.go](https://github.com/go-gitea/gitea/pull/17018/files#diff-43a7f4747c7ba8bff888c9be11affaafd595fd55d27f3333840eb19df9fad393L521) file how if a CSV file is empty (either created empty or if you edit it and remove all contents) it throws a huge 500 error when you then save it (when you view the file). Since we allow creating, saving and pushing empty files, we shouldn't throw an error on an empty CSV file, but just show its empty contents. This doesn't happen if it is a Markdown file or other type of file that is empty.
EDIT: Now handled in the markup/csv renderer code
2021-10-20 20:10:03 +01:00

111 lines
2.8 KiB
Go

// Copyright 2021 The Gitea Authors. All rights reserved.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
package csv
import (
"bytes"
stdcsv "encoding/csv"
"errors"
"io"
"regexp"
"strings"
"code.gitea.io/gitea/modules/translation"
"code.gitea.io/gitea/modules/util"
)
var quoteRegexp = regexp.MustCompile(`["'][\s\S]+?["']`)
// CreateReader creates a csv.Reader with the given delimiter.
func CreateReader(input io.Reader, delimiter rune) *stdcsv.Reader {
rd := stdcsv.NewReader(input)
rd.Comma = delimiter
rd.TrimLeadingSpace = true
return rd
}
// CreateReaderAndGuessDelimiter tries to guess the field delimiter from the content and creates a csv.Reader.
func CreateReaderAndGuessDelimiter(rd io.Reader) (*stdcsv.Reader, error) {
var data = make([]byte, 1e4)
size, err := rd.Read(data)
if err != nil {
if err == io.EOF {
return CreateReader(bytes.NewReader([]byte{}), rune(',')), nil
}
return nil, err
}
delimiter := guessDelimiter(data[:size])
var newInput io.Reader
if size < 1e4 {
newInput = bytes.NewReader(data[:size])
} else {
newInput = io.MultiReader(bytes.NewReader(data), rd)
}
return CreateReader(newInput, delimiter), nil
}
// guessDelimiter scores the input CSV data against delimiters, and returns the best match.
// Reads at most 10k bytes & 10 lines.
func guessDelimiter(data []byte) rune {
maxLines := 10
maxBytes := util.Min(len(data), 1e4)
text := string(data[:maxBytes])
text = quoteRegexp.ReplaceAllLiteralString(text, "")
lines := strings.SplitN(text, "\n", maxLines+1)
lines = lines[:util.Min(maxLines, len(lines))]
delimiters := []rune{',', ';', '\t', '|', '@'}
bestDelim := delimiters[0]
bestScore := 0.0
for _, delim := range delimiters {
score := scoreDelimiter(lines, delim)
if score > bestScore {
bestScore = score
bestDelim = delim
}
}
return bestDelim
}
// scoreDelimiter uses a count & regularity metric to evaluate a delimiter against lines of CSV.
func scoreDelimiter(lines []string, delim rune) float64 {
countTotal := 0
countLineMax := 0
linesNotEqual := 0
for _, line := range lines {
if len(line) == 0 {
continue
}
countLine := strings.Count(line, string(delim))
countTotal += countLine
if countLine != countLineMax {
if countLineMax != 0 {
linesNotEqual++
}
countLineMax = util.Max(countLine, countLineMax)
}
}
return float64(countTotal) * (1 - float64(linesNotEqual)/float64(len(lines)))
}
// FormatError converts csv errors into readable messages.
func FormatError(err error, locale translation.Locale) (string, error) {
var perr *stdcsv.ParseError
if errors.As(err, &perr) {
if perr.Err == stdcsv.ErrFieldCount {
return locale.Tr("repo.error.csv.invalid_field_count", perr.Line), nil
}
return locale.Tr("repo.error.csv.unexpected", perr.Line, perr.Column), nil
}
return "", err
}