opengist/internal/git/output_parser.go

207 lines
4.1 KiB
Go
Raw Normal View History

package git
import (
2023-03-18 22:18:20 +00:00
"bufio"
"bytes"
2023-03-19 02:18:56 +00:00
"encoding/csv"
"fmt"
"io"
2023-03-18 22:18:20 +00:00
"regexp"
2023-03-19 02:18:56 +00:00
"strings"
)
2023-03-18 22:18:20 +00:00
type File struct {
2023-03-18 22:23:23 +00:00
Filename string
OldFilename string
Content string
2023-03-18 22:18:20 +00:00
Truncated bool
IsCreated bool
IsDeleted bool
}
2023-03-19 02:18:56 +00:00
type CsvFile struct {
File
Header []string
Rows [][]string
}
2023-03-18 22:18:20 +00:00
type Commit struct {
2023-03-20 12:30:25 +00:00
Hash string
AuthorName string
AuthorEmail string
Timestamp string
Changed string
Files []File
2023-03-18 22:18:20 +00:00
}
func truncateCommandOutput(out io.Reader, maxBytes int64) (string, bool, error) {
2023-03-18 17:22:27 +00:00
var buf []byte
var err error
if maxBytes < 0 {
buf, err = io.ReadAll(out)
2023-03-18 17:22:27 +00:00
} else {
buf, err = io.ReadAll(io.LimitReader(out, maxBytes))
}
2023-03-18 17:22:27 +00:00
if err != nil {
return "", false, err
}
2023-03-18 17:22:27 +00:00
truncated := len(buf) >= int(maxBytes)
// Remove the last line if it's truncated
if truncated {
// Find the index of the last newline character
lastNewline := bytes.LastIndexByte(buf, '\n')
2023-03-18 17:22:27 +00:00
if lastNewline > 0 {
// Trim the data buffer up to the last newline character
buf = buf[:lastNewline]
}
}
2023-03-18 17:22:27 +00:00
return string(buf), truncated, nil
}
2023-03-18 22:18:20 +00:00
func parseLog(out io.Reader) []*Commit {
scanner := bufio.NewScanner(out)
var commits []*Commit
var currentCommit *Commit
var currentFile *File
var isContent bool
2023-03-18 22:53:05 +00:00
var bytesRead = 0
2023-03-20 13:05:56 +00:00
scanNext := true
for {
if scanNext && !scanner.Scan() {
break
}
scanNext = true
2023-03-18 22:18:20 +00:00
// new commit found
currentFile = nil
currentCommit = &Commit{Hash: string(scanner.Bytes()[2:]), Files: []File{}}
scanner.Scan()
2023-03-20 12:30:25 +00:00
currentCommit.AuthorName = string(scanner.Bytes()[2:])
scanner.Scan()
currentCommit.AuthorEmail = string(scanner.Bytes()[2:])
2023-03-18 22:18:20 +00:00
scanner.Scan()
currentCommit.Timestamp = string(scanner.Bytes()[2:])
scanner.Scan()
2023-03-20 13:05:56 +00:00
// if there is no shortstat, it means that the commit is empty, we add it and move onto the next one
if scanner.Bytes()[0] != ' ' {
commits = append(commits, currentCommit)
// avoid scanning the next line, as we already did it
scanNext = false
continue
}
2023-03-18 22:18:20 +00:00
changed := scanner.Bytes()[1:]
changed = bytes.ReplaceAll(changed, []byte("(+)"), []byte(""))
changed = bytes.ReplaceAll(changed, []byte("(-)"), []byte(""))
currentCommit.Changed = string(changed)
// twice because --shortstat adds a new line
scanner.Scan()
scanner.Scan()
// commit header parsed
// files changes inside the commit
for {
line := scanner.Bytes()
// end of content of file
if len(line) == 0 {
isContent = false
if currentFile != nil {
currentCommit.Files = append(currentCommit.Files, *currentFile)
}
break
}
// new file found
if bytes.HasPrefix(line, []byte("diff --git")) {
// current file is finished, we can add it to the commit
if currentFile != nil {
currentCommit.Files = append(currentCommit.Files, *currentFile)
}
// create a new file
isContent = false
2023-03-18 22:53:05 +00:00
bytesRead = 0
2023-03-18 22:18:20 +00:00
currentFile = &File{}
filenameRegex := regexp.MustCompile(`^diff --git a/(.+) b/(.+)$`)
matches := filenameRegex.FindStringSubmatch(string(line))
if len(matches) == 3 {
currentFile.Filename = matches[2]
if matches[1] != matches[2] {
currentFile.OldFilename = matches[1]
}
}
scanner.Scan()
continue
}
if bytes.HasPrefix(line, []byte("new")) {
currentFile.IsCreated = true
}
if bytes.HasPrefix(line, []byte("deleted")) {
currentFile.IsDeleted = true
}
// file content found
if line[0] == '@' {
isContent = true
}
if isContent {
currentFile.Content += string(line) + "\n"
2023-03-18 22:53:05 +00:00
bytesRead += len(line)
if bytesRead > 2<<18 {
currentFile.Truncated = true
currentFile.Content = ""
isContent = false
}
2023-03-18 22:18:20 +00:00
}
scanner.Scan()
}
2023-03-20 13:05:56 +00:00
commits = append(commits, currentCommit)
2023-03-18 22:18:20 +00:00
}
return commits
}
2023-03-19 02:18:56 +00:00
func ParseCsv(file *File) (*CsvFile, error) {
reader := csv.NewReader(strings.NewReader(file.Content))
records, err := reader.ReadAll()
if err != nil {
return nil, err
}
header := records[0]
numColumns := len(header)
for i := 1; i < len(records); i++ {
if len(records[i]) != numColumns {
return nil, fmt.Errorf("CSV file has invalid row at index %d", i)
}
}
return &CsvFile{
File: *file,
Header: header,
Rows: records[1:],
}, nil
}