diff --git a/.github/dependabot.yml b/.github/dependabot.yml index a2a66d0..41811b8 100644 --- a/.github/dependabot.yml +++ b/.github/dependabot.yml @@ -8,8 +8,8 @@ updates: - package-ecosystem: "gomod" # See documentation for possible values directory: "/" # Location of package manifests schedule: - interval: "weekly" + interval: "daily" - package-ecosystem: "github-actions" # See documentation for possible values directory: "/" # Location of package manifests schedule: - interval: "weekly" + interval: "daily" diff --git a/.github/workflows/cicd.yml b/.github/workflows/cicd.yml index 296cc72..62da53d 100644 --- a/.github/workflows/cicd.yml +++ b/.github/workflows/cicd.yml @@ -27,7 +27,7 @@ jobs: run: go test -v ./... - name: Upload - uses: actions/upload-artifact@v3 + uses: actions/upload-artifact@v2 with: name: plattentests-go path: ./plattentests-go diff --git a/go.mod b/go.mod index 03a31c0..8c101e2 100644 --- a/go.mod +++ b/go.mod @@ -11,13 +11,13 @@ require ( github.com/google/go-cmp v0.5.9 // indirect github.com/google/uuid v1.3.0 // indirect github.com/kelseyhightower/envconfig v1.4.0 - github.com/lithammer/fuzzysearch v1.1.5 github.com/mattn/go-ieproxy v0.0.9 // indirect github.com/mattn/go-isatty v0.0.17 // indirect github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect + github.com/texttheater/golang-levenshtein/levenshtein v0.0.0-20200805054039-cae8b0eaed6c github.com/ugorji/go/codec v1.2.8 // indirect github.com/zmb3/spotify/v2 v2.3.1 - golang.org/x/crypto v0.5.0 // indirect + golang.org/x/crypto v0.4.0 // indirect golang.org/x/oauth2 v0.4.0 golang.org/x/text v0.6.0 ) diff --git a/go.sum b/go.sum index f4ce89e..c9bea8c 100644 --- a/go.sum +++ b/go.sum @@ -164,8 +164,6 @@ github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE= github.com/leodido/go-urn v1.2.1 h1:BqpAaACuzVSgi/VLzGZIobT2z4v53pjosyNd9Yv6n/w= github.com/leodido/go-urn v1.2.1/go.mod h1:zt4jvISO2HfUBqxjfIshjdMTYS56ZS/qv49ictyFfxY= -github.com/lithammer/fuzzysearch v1.1.5 h1:Ag7aKU08wp0R9QCfF4GoGST9HbmAIeLP7xwMrOBEp1c= -github.com/lithammer/fuzzysearch v1.1.5/go.mod h1:1R1LRNk7yKid1BaQkmuLQaHruxcC4HmAH30Dh61Ih1Q= github.com/mattn/go-ieproxy v0.0.1/go.mod h1:pYabZ6IHcRpFh7vIaLfK7rdcWgFEb3SFJ6/gNWuh88E= github.com/mattn/go-ieproxy v0.0.9 h1:RvVbLiMv/Hbjf1gRaC2AQyzwbdVhdId7D2vPnXIml4k= github.com/mattn/go-ieproxy v0.0.9/go.mod h1:eF30/rfdQUO9EnzNIZQr0r9HiLMlZNCpJkHbmMuOAE0= @@ -200,6 +198,8 @@ github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/ github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU= github.com/stretchr/testify v1.8.1 h1:w7B6lhMri9wdJUVmEZPGGhZzrYTPvgJArz7wNPgYKsk= github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4= +github.com/texttheater/golang-levenshtein/levenshtein v0.0.0-20200805054039-cae8b0eaed6c h1:HelZ2kAFadG0La9d+4htN4HzQ68Bm2iM9qKMSMES6xg= +github.com/texttheater/golang-levenshtein/levenshtein v0.0.0-20200805054039-cae8b0eaed6c/go.mod h1:JlzghshsemAMDGZLytTFY8C1JQxQPhnatWqNwUXjggo= github.com/ugorji/go v1.2.7 h1:qYhyWUUd6WbiM+C6JZAUkIJt/1WrjzNHY9+KCIjVqTo= github.com/ugorji/go v1.2.7/go.mod h1:nF9osbDWLy6bDVv/Rtoh6QgnvNDpmCalQV5urGCCS6M= github.com/ugorji/go/codec v1.2.7/go.mod h1:WGN1fab3R1fzQlVQTkfxVtIBhWDRqOviHU95kRgeqEY= @@ -225,8 +225,8 @@ golang.org/x/crypto v0.0.0-20201002170205-7f63de1d35b0/go.mod h1:LzIPMQfyMNhhGPh golang.org/x/crypto v0.0.0-20201016220609-9e8e0b390897/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc= golang.org/x/crypto v0.0.0-20211215153901-e495a2d5b3d3/go.mod h1:IxCIyHEi3zRg3s0A5j5BB6A9Jmi73HwBIUl50j+osU4= -golang.org/x/crypto v0.5.0 h1:U/0M97KRkSFvyD/3FSmdP5W5swImpNgle/EHFhOsQPE= -golang.org/x/crypto v0.5.0/go.mod h1:NK/OQwhpMQP3MwtdjgLlYHnH9ebylxKWv3e0fK+mkQU= +golang.org/x/crypto v0.4.0 h1:UVQgzMY87xqpKNgb+kDsll2Igd33HszWHFLmpaRMq/8= +golang.org/x/crypto v0.4.0/go.mod h1:3quD/ATkf6oY+rnes5c3ExXTbLc8mueNue5/DoinL80= golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= golang.org/x/exp v0.0.0-20190306152737-a1d7652674e8/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= golang.org/x/exp v0.0.0-20190510132918-efd6b22b2522/go.mod h1:ZjyILWgesfNpC6sMxTJOJm9Kp84zZh5NQWvqDGG3Qr8= @@ -292,6 +292,7 @@ golang.org/x/net v0.0.0-20210916014120-12bc252f5db8/go.mod h1:9nx3DQGgdP8bBQD5qx golang.org/x/net v0.0.0-20211112202133-69e39bad7dc2/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y= golang.org/x/net v0.0.0-20220630215102-69896b714898/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c= golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c= +golang.org/x/net v0.3.0/go.mod h1:MBQ8lrhLObU/6UmLb4fmbmk5OcyYmqtbGd/9yIeKjEE= golang.org/x/net v0.4.0/go.mod h1:MBQ8lrhLObU/6UmLb4fmbmk5OcyYmqtbGd/9yIeKjEE= golang.org/x/net v0.5.0 h1:GyT4nK/YDHSqa1c4753ouYCDajOYKTja9Xb/OHtgvSw= golang.org/x/net v0.5.0/go.mod h1:DivGGAXEgPSlEBzxGzZI+ZLohi+xUj054jfeKui00ws= diff --git a/main.go b/main.go index 0c99a8e..618ecf4 100644 --- a/main.go +++ b/main.go @@ -22,7 +22,7 @@ import ( "github.com/kelseyhightower/envconfig" - "github.com/lithammer/fuzzysearch/fuzzy" + "github.com/texttheater/golang-levenshtein/levenshtein" ) const MAX_SEARCH_RESULTS = 3 @@ -175,10 +175,8 @@ func searchSong(client spotify.Client, track string, record crawler.Record) spot } // handle track results only if tracks are available if results.Tracks != nil && results.Tracks.Tracks != nil && len(results.Tracks.Tracks) > 0 { - allTrackNames := []string{} for i, item := range results.Tracks.Tracks { log.Printf(" found item: %s - %s (%s)", item.Artists[0].Name, item.Name, item.Album.Name) - allTrackNames = append(allTrackNames, item.Name) // only get MAX_SEARCH_RESULTS results if i >= MAX_SEARCH_RESULTS-1 { break @@ -186,18 +184,24 @@ func searchSong(client spotify.Client, track string, record crawler.Record) spot } item := results.Tracks.Tracks[0] - // TODO: do some fuzzy search - ranking := fuzzy.RankFind(searchTerm, allTrackNames) - log.Printf("%d %d %+v", len(allTrackNames), len(ranking), ranking) + bandnameFromSearch := strings.ToLower(item.Artists[0].Name) + bandnameFromPlattentests := strings.ToLower(record.Band) + distance := levenshtein.DistanceForStrings([]rune(bandnameFromSearch), []rune(bandnameFromPlattentests), levenshtein.DefaultOptions) - if strings.EqualFold(item.Artists[0].Name, record.Band) { - log.Printf(" using item: %s - %s (%s)", item.Artists[0].Name, item.Name, item.Album.Name) - return item.ID - } else { - log.Printf(" not adding item %s - %s (%s) since artists don't match (%s != %s)", item.Artists[0].Name, item.Name, item.Album.Name, record.Band, item.Artists[0].Name) + log.Println(" Levenshtein distance between", bandnameFromSearch, "and", bandnameFromPlattentests, ":", distance) + threshold := 0.8 + + calculatedThreshold := 1 - float64(distance)/float64(max(len(bandnameFromSearch), len(bandnameFromPlattentests))) + if (calculatedThreshold) < threshold { + log.Println(" Levenshtein distance too large") + //s := strconv.FormatFloat(calculatedThreshold, 'g', 5, 32) + log.Println(" distance is ", distance, " and percenate is ", calculatedThreshold) + log.Printf(" not adding item %s - %s (%s) since artists don't match (%s != %s)", bandnameFromSearch, item.Name, item.Album.Name, bandnameFromPlattentests, bandnameFromSearch) return "" + } else { + log.Printf(" using item: %s - %s (%s)", bandnameFromSearch, item.Name, item.Album.Name) + return item.ID } - } if record.Recordname == "" { @@ -256,3 +260,10 @@ func get_port() string { } return port } + +func max(x, y int) int { + if x > y { + return x + } + return y +}