From f6bd13fa06a548386e2bf731c4493c4541ecf0c0 Mon Sep 17 00:00:00 2001 From: Zack Date: Tue, 28 May 2024 16:08:31 -0700 Subject: [PATCH 1/2] show hash progress on large files --- src/croc/croc.go | 2 +- src/utils/utils.go | 57 ++++++++++++++++++++++++++++++++++------- src/utils/utils_test.go | 12 ++++----- 3 files changed, 55 insertions(+), 16 deletions(-) diff --git a/src/croc/croc.go b/src/croc/croc.go index 7c2f19330..f3d7e788a 100644 --- a/src/croc/croc.go +++ b/src/croc/croc.go @@ -506,7 +506,7 @@ func (c *Client) sendCollectFiles(filesInfo []FileInfo) (err error) { c.Options.HashAlgorithm = "xxhash" } - c.FilesToTransfer[i].Hash, err = utils.HashFile(fullPath, c.Options.HashAlgorithm) + c.FilesToTransfer[i].Hash, err = utils.HashFile(fullPath, c.Options.HashAlgorithm, fileInfo.Size > 1e7) log.Debugf("hashed %s to %x using %s", fullPath, c.FilesToTransfer[i].Hash, c.Options.HashAlgorithm) totalFilesSize += fileInfo.Size if err != nil { diff --git a/src/utils/utils.go b/src/utils/utils.go index b97e99cf5..989322ff2 100644 --- a/src/utils/utils.go +++ b/src/utils/utils.go @@ -26,6 +26,7 @@ import ( "github.com/cespare/xxhash" "github.com/kalafut/imohash" "github.com/schollz/mnemonicode" + "github.com/schollz/progressbar/v3" ) const NbPinNumbers = 4 @@ -77,7 +78,11 @@ func GetInput(prompt string) string { // HashFile returns the hash of a file or, in case of a symlink, the // SHA256 hash of its target. Takes an argument to specify the algorithm to use. -func HashFile(fname string, algorithm string) (hash256 []byte, err error) { +func HashFile(fname string, algorithm string, showProgress ...bool) (hash256 []byte, err error) { + doShowProgress := false + if len(showProgress) > 0 { + doShowProgress = showProgress[0] + } var fstats os.FileInfo fstats, err = os.Lstat(fname) if err != nil { @@ -95,16 +100,16 @@ func HashFile(fname string, algorithm string) (hash256 []byte, err error) { case "imohash": return IMOHashFile(fname) case "md5": - return MD5HashFile(fname) + return MD5HashFile(fname, doShowProgress) case "xxhash": - return XXHashFile(fname) + return XXHashFile(fname, doShowProgress) } err = fmt.Errorf("unspecified algorithm") return } // MD5HashFile returns MD5 hash -func MD5HashFile(fname string) (hash256 []byte, err error) { +func MD5HashFile(fname string, doShowProgress bool) (hash256 []byte, err error) { f, err := os.Open(fname) if err != nil { return @@ -112,8 +117,25 @@ func MD5HashFile(fname string) (hash256 []byte, err error) { defer f.Close() h := md5.New() - if _, err = io.Copy(h, f); err != nil { - return + if doShowProgress { + stat, _ := f.Stat() + fnameShort := path.Base(fname) + if len(fnameShort) > 20 { + fnameShort = fnameShort[:20] + "..." + } + bar := progressbar.NewOptions64(stat.Size(), + progressbar.OptionSetWriter(os.Stderr), + progressbar.OptionShowBytes(true), + progressbar.OptionSetDescription(fmt.Sprintf("Hashing %s", fnameShort)), + progressbar.OptionClearOnFinish(), + ) + if _, err = io.Copy(io.MultiWriter(h, bar), f); err != nil { + return + } + } else { + if _, err = io.Copy(h, f); err != nil { + return + } } hash256 = h.Sum(nil) @@ -137,7 +159,7 @@ func IMOHashFileFull(fname string) (hash []byte, err error) { } // XXHashFile returns the xxhash of a file -func XXHashFile(fname string) (hash256 []byte, err error) { +func XXHashFile(fname string, doShowProgress bool) (hash256 []byte, err error) { f, err := os.Open(fname) if err != nil { return @@ -145,8 +167,25 @@ func XXHashFile(fname string) (hash256 []byte, err error) { defer f.Close() h := xxhash.New() - if _, err = io.Copy(h, f); err != nil { - return + if doShowProgress { + stat, _ := f.Stat() + fnameShort := path.Base(fname) + if len(fnameShort) > 20 { + fnameShort = fnameShort[:20] + "..." + } + bar := progressbar.NewOptions64(stat.Size(), + progressbar.OptionSetWriter(os.Stderr), + progressbar.OptionShowBytes(true), + progressbar.OptionSetDescription(fmt.Sprintf("Hashing %s", fnameShort)), + progressbar.OptionClearOnFinish(), + ) + if _, err = io.Copy(io.MultiWriter(h, bar), f); err != nil { + return + } + } else { + if _, err = io.Copy(h, f); err != nil { + return + } } hash256 = h.Sum(nil) diff --git a/src/utils/utils_test.go b/src/utils/utils_test.go index ac8b62558..df74d10ae 100644 --- a/src/utils/utils_test.go +++ b/src/utils/utils_test.go @@ -24,7 +24,7 @@ func BenchmarkMD5(b *testing.B) { bigFile() b.ResetTimer() for i := 0; i < b.N; i++ { - MD5HashFile("bigfile.test") + MD5HashFile("bigfile.test", false) } } @@ -32,7 +32,7 @@ func BenchmarkXXHash(b *testing.B) { bigFile() b.ResetTimer() for i := 0; i < b.N; i++ { - XXHashFile("bigfile.test") + XXHashFile("bigfile.test", false) } } @@ -78,10 +78,10 @@ func TestExists(t *testing.T) { func TestMD5HashFile(t *testing.T) { bigFile() defer os.Remove("bigfile.test") - b, err := MD5HashFile("bigfile.test") + b, err := MD5HashFile("bigfile.test", false) assert.Nil(t, err) assert.Equal(t, "8304ff018e02baad0e3555bade29a405", fmt.Sprintf("%x", b)) - _, err = MD5HashFile("bigfile.test.nofile") + _, err = MD5HashFile("bigfile.test.nofile", false) assert.NotNil(t, err) } @@ -96,10 +96,10 @@ func TestIMOHashFile(t *testing.T) { func TestXXHashFile(t *testing.T) { bigFile() defer os.Remove("bigfile.test") - b, err := XXHashFile("bigfile.test") + b, err := XXHashFile("bigfile.test", false) assert.Nil(t, err) assert.Equal(t, "4918740eb5ccb6f7", fmt.Sprintf("%x", b)) - _, err = XXHashFile("nofile") + _, err = XXHashFile("nofile", false) assert.NotNil(t, err) } From c0c3370d9bb6520c59cdd384573e2f6c62813df8 Mon Sep 17 00:00:00 2001 From: Zack Date: Tue, 28 May 2024 16:19:38 -0700 Subject: [PATCH 2/2] add highway hash --- go.mod | 2 ++ go.sum | 3 +++ src/utils/utils.go | 44 +++++++++++++++++++++++++++++++++++++++++ src/utils/utils_test.go | 18 +++++++++++++++++ 4 files changed, 67 insertions(+) diff --git a/go.mod b/go.mod index 1f5a85c76..60d30cc71 100644 --- a/go.mod +++ b/go.mod @@ -20,6 +20,8 @@ require ( golang.org/x/time v0.5.0 ) +require github.com/minio/highwayhash v1.0.2 + require ( github.com/OneOfOne/xxhash v1.2.8 // indirect github.com/cpuguy83/go-md2man/v2 v2.0.4 // indirect diff --git a/go.sum b/go.sum index ab3a907df..208a89683 100644 --- a/go.sum +++ b/go.sum @@ -29,6 +29,8 @@ github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI= github.com/magisterquis/connectproxy v0.0.0-20200725203833-3582e84f0c9b h1:xZ59n7Frzh8CwyfAapUZLSg+gXH5m63YEaFCMpDHhpI= github.com/magisterquis/connectproxy v0.0.0-20200725203833-3582e84f0c9b/go.mod h1:uDd4sYVYsqcxAB8j+Q7uhL6IJCs/r1kxib1HV4bgOMg= github.com/mattn/go-isatty v0.0.20/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y= +github.com/minio/highwayhash v1.0.2 h1:Aak5U0nElisjDCfPSG79Tgzkn2gl66NxOMspRrKnA/g= +github.com/minio/highwayhash v1.0.2/go.mod h1:BQskDq+xkJ12lmlUUi7U0M5Swg3EWR+dLTk+kldvVxY= github.com/mitchellh/colorstring v0.0.0-20190213212951-d06e56a500db h1:62I3jR2EmQ4l5rM/4FEfDWcRD+abF5XlKShorW5LRoQ= github.com/mitchellh/colorstring v0.0.0-20190213212951-d06e56a500db/go.mod h1:l0dey0ia/Uv7NcFFVbCLtqEBQbrT4OCwCSKTEv6enCw= github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= @@ -86,6 +88,7 @@ golang.org/x/net v0.25.0/go.mod h1:JkAGAh7GEvH74S6FOH42FLoXpXbE/aqXSrIQjXgsiwM= golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.1.0/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sys v0.0.0-20190130150945-aca44879d564/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= diff --git a/src/utils/utils.go b/src/utils/utils.go index 989322ff2..acb016c23 100644 --- a/src/utils/utils.go +++ b/src/utils/utils.go @@ -25,6 +25,7 @@ import ( "github.com/cespare/xxhash" "github.com/kalafut/imohash" + "github.com/minio/highwayhash" "github.com/schollz/mnemonicode" "github.com/schollz/progressbar/v3" ) @@ -103,11 +104,54 @@ func HashFile(fname string, algorithm string, showProgress ...bool) (hash256 []b return MD5HashFile(fname, doShowProgress) case "xxhash": return XXHashFile(fname, doShowProgress) + case "highway": + return HighwayHashFile(fname, doShowProgress) } err = fmt.Errorf("unspecified algorithm") return } +// HighwayHashFile returns highwayhash of a file +func HighwayHashFile(fname string, doShowProgress bool) (hashHighway []byte, err error) { + f, err := os.Open(fname) + if err != nil { + return + } + defer f.Close() + key, err := hex.DecodeString("1553c5383fb0b86578c3310da665b4f6e0521acf22eb58a99532ffed02a6b115") + if err != nil { + return + } + h, err := highwayhash.New(key) + if err != nil { + err = fmt.Errorf("could not create highwayhash: %s", err.Error()) + return + } + if doShowProgress { + stat, _ := f.Stat() + fnameShort := path.Base(fname) + if len(fnameShort) > 20 { + fnameShort = fnameShort[:20] + "..." + } + bar := progressbar.NewOptions64(stat.Size(), + progressbar.OptionSetWriter(os.Stderr), + progressbar.OptionShowBytes(true), + progressbar.OptionSetDescription(fmt.Sprintf("Hashing %s", fnameShort)), + progressbar.OptionClearOnFinish(), + ) + if _, err = io.Copy(io.MultiWriter(h, bar), f); err != nil { + return + } + } else { + if _, err = io.Copy(h, f); err != nil { + return + } + } + + hashHighway = h.Sum(nil) + return +} + // MD5HashFile returns MD5 hash func MD5HashFile(fname string, doShowProgress bool) (hash256 []byte, err error) { f, err := os.Open(fname) diff --git a/src/utils/utils_test.go b/src/utils/utils_test.go index df74d10ae..39c91b98e 100644 --- a/src/utils/utils_test.go +++ b/src/utils/utils_test.go @@ -44,6 +44,14 @@ func BenchmarkImoHash(b *testing.B) { } } +func BenchmarkHighwayHash(b *testing.B) { + bigFile() + b.ResetTimer() + for i := 0; i < b.N; i++ { + HighwayHashFile("bigfile.test", false) + } +} + func BenchmarkImoHashFull(b *testing.B) { bigFile() b.ResetTimer() @@ -85,6 +93,16 @@ func TestMD5HashFile(t *testing.T) { assert.NotNil(t, err) } +func TestHighwayHashFile(t *testing.T) { + bigFile() + defer os.Remove("bigfile.test") + b, err := HighwayHashFile("bigfile.test", false) + assert.Nil(t, err) + assert.Equal(t, "3c32999529323ed66a67aeac5720c7bf1301dcc5dca87d8d46595e85ff990329", fmt.Sprintf("%x", b)) + _, err = HighwayHashFile("bigfile.test.nofile", false) + assert.NotNil(t, err) +} + func TestIMOHashFile(t *testing.T) { bigFile() defer os.Remove("bigfile.test")