From b3a856a05ea42feb68e06cfc46aa2a4354352324 Mon Sep 17 00:00:00 2001 From: Thomas Miceli <27960254+thomiceli@users.noreply.github.com> Date: Sat, 30 Dec 2023 23:46:14 +0100 Subject: [PATCH] Optimize reading gist files content (#186) --- internal/db/gist.go | 20 +++--- internal/git/commands.go | 116 +++++++++++++++++++++++++++++++++++ internal/render/highlight.go | 2 +- internal/web/gist.go | 2 +- public/embed.scss | 2 +- public/style.css | 2 +- 6 files changed, 130 insertions(+), 14 deletions(-) diff --git a/internal/db/gist.go b/internal/db/gist.go index a3b4946..136beee 100644 --- a/internal/db/gist.go +++ b/internal/db/gist.go @@ -311,24 +311,24 @@ func (gist *Gist) DeleteRepository() error { } func (gist *Gist) Files(revision string, truncate bool) ([]*git.File, error) { - var files []*git.File - filesStr, err := git.GetFilesOfRepository(gist.User.Username, gist.Uuid, revision) + filesCat, err := git.CatFileBatch(gist.User.Username, gist.Uuid, revision, truncate) if err != nil { // if the revision or the file do not exist - if exiterr, ok := err.(*exec.ExitError); ok && exiterr.ExitCode() == 128 { return nil, &git.RevisionNotFoundError{} } - return nil, err } - for _, fileStr := range filesStr { - file, err := gist.File(revision, fileStr, truncate) - if err != nil { - return nil, err - } - files = append(files, file) + var files []*git.File + for _, fileCat := range filesCat { + files = append(files, &git.File{ + Filename: fileCat.Name, + Size: fileCat.Size, + HumanSize: humanize.IBytes(fileCat.Size), + Content: fileCat.Content, + Truncated: fileCat.Truncated, + }) } return files, err } diff --git a/internal/git/commands.go b/internal/git/commands.go index 02f96ab..b176eb2 100644 --- a/internal/git/commands.go +++ b/internal/git/commands.go @@ -1,9 +1,11 @@ package git import ( + "bufio" "bytes" "context" "fmt" + "io" "os" "os/exec" "path" @@ -124,6 +126,120 @@ func GetFilesOfRepository(user string, gist string, revision string) ([]string, return slice[:len(slice)-1], nil } +type catFileBatch struct { + Name, Hash, Content string + Size uint64 + Truncated bool +} + +func CatFileBatch(user string, gist string, revision string, truncate bool) ([]*catFileBatch, error) { + repositoryPath := RepositoryPath(user, gist) + + lsTreeCmd := exec.Command("git", "ls-tree", "-l", revision) + lsTreeCmd.Dir = repositoryPath + lsTreeOutput, err := lsTreeCmd.Output() + if err != nil { + return nil, err + } + + fileMap := make([]*catFileBatch, 0) + + lines := strings.Split(string(lsTreeOutput), "\n") + for _, line := range lines { + fields := strings.Fields(line) + if len(fields) < 4 { + continue // Skip lines that don't have enough fields + } + + hash := fields[2] + size, err := strconv.ParseUint(fields[3], 10, 64) + if err != nil { + continue // Skip lines with invalid size field + } + name := strings.Join(fields[4:], " ") // File name may contain spaces + + fileMap = append(fileMap, &catFileBatch{ + Hash: hash, + Size: size, + Name: name, + }) + } + + catFileCmd := exec.Command("git", "cat-file", "--batch") + catFileCmd.Dir = repositoryPath + stdin, err := catFileCmd.StdinPipe() + if err != nil { + return nil, err + } + stdout, err := catFileCmd.StdoutPipe() + if err != nil { + return nil, err + } + if err = catFileCmd.Start(); err != nil { + return nil, err + } + + reader := bufio.NewReader(stdout) + + for _, file := range fileMap { + _, err = stdin.Write([]byte(file.Hash + "\n")) + if err != nil { + return nil, err + } + + header, err := reader.ReadString('\n') + if err != nil { + return nil, err + } + + parts := strings.Fields(header) + if len(parts) > 3 { + continue // Not a valid header, skip this entry + } + + size, err := strconv.ParseUint(parts[2], 10, 64) + if err != nil { + return nil, err + } + + sizeToRead := size + if truncate && sizeToRead > truncateLimit { + sizeToRead = truncateLimit + } + + // Read exactly size bytes from header, or the max allowed if truncated + content := make([]byte, sizeToRead) + if _, err = io.ReadFull(reader, content); err != nil { + return nil, err + } + + file.Content = string(content) + + if truncate && size > truncateLimit { + // skip other bytes if truncated + if _, err = reader.Discard(int(size - truncateLimit)); err != nil { + return nil, err + } + file.Truncated = true + } + + // Read the blank line following the content + if _, err := reader.ReadByte(); err != nil { + return nil, err + } + } + + if err = stdin.Close(); err != nil { + return nil, err + } + + if err = catFileCmd.Wait(); err != nil { + return nil, err + } + + return fileMap, nil +} + func GetFileContent(user string, gist string, revision string, filename string, truncate bool) (string, bool, error) { repositoryPath := RepositoryPath(user, gist) diff --git a/internal/render/highlight.go b/internal/render/highlight.go index 01d0a6b..bcaeea4 100644 --- a/internal/render/highlight.go +++ b/internal/render/highlight.go @@ -39,7 +39,7 @@ func HighlightFile(file *git.File) (RenderedFile, error) { formatter := html.New(html.WithClasses(true), html.PreventSurroundingPre(true)) - iterator, err := lexer.Tokenise(nil, file.Content) + iterator, err := lexer.Tokenise(nil, file.Content+"\n") if err != nil { return rendered, err } diff --git a/internal/web/gist.go b/internal/web/gist.go index 88b8695..cc9e278 100644 --- a/internal/web/gist.go +++ b/internal/web/gist.go @@ -704,7 +704,7 @@ func downloadZip(ctx echo.Context) error { gist := getData(ctx, "gist").(*db.Gist) revision := ctx.Param("revision") - files, err := gist.Files(revision, true) + files, err := gist.Files(revision, false) if err != nil { return errorRes(500, "Error fetching files from repository", err) } diff --git a/public/embed.scss b/public/embed.scss index 584beda..fc1d354 100644 --- a/public/embed.scss +++ b/public/embed.scss @@ -107,6 +107,6 @@ dl.dl-config dd { @apply overflow-auto whitespace-pre; } -.chroma.preview.markdown code { +.chroma.preview.markdown pre code { @apply p-4; } diff --git a/public/style.css b/public/style.css index c0e3455..4763dbe 100644 --- a/public/style.css +++ b/public/style.css @@ -167,7 +167,7 @@ dl.dl-config dd { @apply overflow-auto whitespace-pre !important; } -.chroma.preview.markdown code { +.chroma.preview.markdown pre code { @apply p-4 !important; }