359 lines
7.8 KiB
Go
359 lines
7.8 KiB
Go
package match
|
||
|
||
import (
|
||
"context"
|
||
"fmt"
|
||
"math"
|
||
"regexp"
|
||
"sort"
|
||
"strings"
|
||
"sync"
|
||
|
||
"navimigrate/internal/model"
|
||
"navimigrate/internal/navidrome"
|
||
)
|
||
|
||
type Searcher interface {
|
||
SearchTracks(ctx context.Context, query string, limit int) ([]navidrome.Track, error)
|
||
}
|
||
|
||
type Matcher struct {
|
||
searcher Searcher
|
||
threshold float64
|
||
cacheMu sync.RWMutex
|
||
cache map[string][]navidrome.Track
|
||
}
|
||
|
||
func NewMatcher(searcher Searcher, threshold float64) *Matcher {
|
||
if threshold < 0 {
|
||
threshold = 45
|
||
}
|
||
return &Matcher{
|
||
searcher: searcher,
|
||
threshold: threshold,
|
||
cache: map[string][]navidrome.Track{},
|
||
}
|
||
}
|
||
|
||
func (m *Matcher) MatchTrack(ctx context.Context, src model.Track) model.MatchedTrack {
|
||
queries := m.buildQueries(src)
|
||
if len(queries) == 0 {
|
||
return model.MatchedTrack{Source: src, Matched: false, Reason: "no usable metadata"}
|
||
}
|
||
|
||
type scored struct {
|
||
track navidrome.Track
|
||
score float64
|
||
query string
|
||
}
|
||
|
||
best := scored{score: -999}
|
||
seen := map[string]struct{}{}
|
||
for _, q := range queries {
|
||
candidates, err := m.searchCached(ctx, q)
|
||
if err != nil {
|
||
continue
|
||
}
|
||
for _, c := range candidates {
|
||
if _, ok := seen[c.ID]; ok {
|
||
continue
|
||
}
|
||
seen[c.ID] = struct{}{}
|
||
score := scoreCandidate(src, c)
|
||
if score > best.score {
|
||
best = scored{track: c, score: score, query: q}
|
||
}
|
||
}
|
||
}
|
||
|
||
if best.track.ID == "" {
|
||
return model.MatchedTrack{Source: src, Matched: false, Reason: "no candidates"}
|
||
}
|
||
|
||
if best.score >= m.threshold {
|
||
return model.MatchedTrack{
|
||
Source: src,
|
||
TargetID: best.track.ID,
|
||
Score: best.score,
|
||
Query: best.query,
|
||
Matched: true,
|
||
}
|
||
}
|
||
|
||
reason := fmt.Sprintf("best score %.1f below threshold %.1f", best.score, m.threshold)
|
||
return model.MatchedTrack{
|
||
Source: src,
|
||
TargetID: best.track.ID,
|
||
Score: best.score,
|
||
Query: best.query,
|
||
Matched: false,
|
||
Reason: reason,
|
||
}
|
||
}
|
||
|
||
func (m *Matcher) searchCached(ctx context.Context, q string) ([]navidrome.Track, error) {
|
||
q = strings.TrimSpace(q)
|
||
if q == "" {
|
||
return nil, nil
|
||
}
|
||
|
||
m.cacheMu.RLock()
|
||
if v, ok := m.cache[q]; ok {
|
||
m.cacheMu.RUnlock()
|
||
return v, nil
|
||
}
|
||
m.cacheMu.RUnlock()
|
||
|
||
res, err := m.searcher.SearchTracks(ctx, q, 20)
|
||
if err != nil {
|
||
return nil, err
|
||
}
|
||
|
||
m.cacheMu.Lock()
|
||
m.cache[q] = res
|
||
m.cacheMu.Unlock()
|
||
|
||
return res, nil
|
||
}
|
||
|
||
func (m *Matcher) buildQueries(src model.Track) []string {
|
||
title := strings.TrimSpace(src.Title)
|
||
if title == "" {
|
||
return nil
|
||
}
|
||
artist := ""
|
||
if len(src.Artists) > 0 {
|
||
artist = src.Artists[0]
|
||
}
|
||
latinTitle := strings.TrimSpace(transliterateToLatin(title))
|
||
latinArtist := strings.TrimSpace(transliterateToLatin(artist))
|
||
|
||
queries := []string{}
|
||
if src.ISRC != "" {
|
||
queries = append(queries, src.ISRC)
|
||
}
|
||
queries = append(queries, strings.TrimSpace(title+" "+artist))
|
||
if latinTitle != "" {
|
||
queries = append(queries, strings.TrimSpace(latinTitle+" "+latinArtist))
|
||
}
|
||
|
||
cleanTitle := cleanTitle(title)
|
||
if cleanTitle != title {
|
||
queries = append(queries, strings.TrimSpace(cleanTitle+" "+artist))
|
||
latinClean := strings.TrimSpace(transliterateToLatin(cleanTitle))
|
||
if latinClean != "" {
|
||
queries = append(queries, strings.TrimSpace(latinClean+" "+latinArtist))
|
||
}
|
||
}
|
||
|
||
queries = append(queries, title)
|
||
if latinTitle != "" {
|
||
queries = append(queries, latinTitle)
|
||
}
|
||
|
||
uniq := map[string]struct{}{}
|
||
out := make([]string, 0, len(queries))
|
||
for _, q := range queries {
|
||
q = strings.TrimSpace(q)
|
||
if q == "" {
|
||
continue
|
||
}
|
||
if _, ok := uniq[q]; ok {
|
||
continue
|
||
}
|
||
uniq[q] = struct{}{}
|
||
out = append(out, q)
|
||
}
|
||
return out
|
||
}
|
||
|
||
func scoreCandidate(src model.Track, dst navidrome.Track) float64 {
|
||
score := 0.0
|
||
|
||
if src.ISRC != "" && hasISRC(dst.ISRCs, src.ISRC) {
|
||
score += 60
|
||
}
|
||
|
||
score += 25 * similarity(normalize(src.Title), normalize(dst.Title))
|
||
|
||
primaryArtist := ""
|
||
if len(src.Artists) > 0 {
|
||
primaryArtist = src.Artists[0]
|
||
}
|
||
if primaryArtist != "" {
|
||
score += 20 * similarity(normalize(primaryArtist), normalize(dst.Artist))
|
||
}
|
||
|
||
if src.DurationMS > 0 && dst.Duration > 0 {
|
||
delta := math.Abs(float64(src.DurationMS/1000 - dst.Duration))
|
||
switch {
|
||
case delta <= 2:
|
||
score += 10
|
||
case delta <= 5:
|
||
score += 7
|
||
case delta <= 10:
|
||
score += 4
|
||
case delta > 25:
|
||
score -= 6
|
||
}
|
||
}
|
||
|
||
nt := normalize(src.Title)
|
||
dt := normalize(dst.Title)
|
||
if !strings.Contains(nt, "live") && strings.Contains(dt, "live") {
|
||
score -= 8
|
||
}
|
||
if !strings.Contains(nt, "remix") && strings.Contains(dt, "remix") {
|
||
score -= 6
|
||
}
|
||
if strings.Contains(dt, "karaoke") {
|
||
score -= 12
|
||
}
|
||
|
||
return score
|
||
}
|
||
|
||
func hasISRC(candidates []string, wanted string) bool {
|
||
wanted = strings.ToUpper(strings.TrimSpace(wanted))
|
||
if wanted == "" {
|
||
return false
|
||
}
|
||
for _, c := range candidates {
|
||
if strings.EqualFold(strings.TrimSpace(c), wanted) {
|
||
return true
|
||
}
|
||
}
|
||
return false
|
||
}
|
||
|
||
var nonAlphaNum = regexp.MustCompile(`[^a-z0-9]+`)
|
||
|
||
func normalize(s string) string {
|
||
s = transliterateToLatin(s)
|
||
s = strings.ToLower(strings.TrimSpace(s))
|
||
s = strings.ReplaceAll(s, "&", " and ")
|
||
s = nonAlphaNum.ReplaceAllString(s, " ")
|
||
tokens := strings.Fields(s)
|
||
return strings.Join(tokens, " ")
|
||
}
|
||
|
||
var cyrillicToLatin = map[rune]string{
|
||
'а': "a", 'б': "b", 'в': "v", 'г': "g", 'д': "d", 'е': "e", 'ё': "e", 'ж': "zh", 'з': "z", 'и': "i", 'й': "i",
|
||
'к': "k", 'л': "l", 'м': "m", 'н': "n", 'о': "o", 'п': "p", 'р': "r", 'с': "s", 'т': "t", 'у': "u", 'ф': "f",
|
||
'х': "h", 'ц': "ts", 'ч': "ch", 'ш': "sh", 'щ': "shch", 'ъ': "", 'ы': "y", 'ь': "", 'э': "e", 'ю': "yu", 'я': "ya",
|
||
'і': "i", 'ї': "yi", 'є': "ye", 'ґ': "g",
|
||
'А': "a", 'Б': "b", 'В': "v", 'Г': "g", 'Д': "d", 'Е': "e", 'Ё': "e", 'Ж': "zh", 'З': "z", 'И': "i", 'Й': "i",
|
||
'К': "k", 'Л': "l", 'М': "m", 'Н': "n", 'О': "o", 'П': "p", 'Р': "r", 'С': "s", 'Т': "t", 'У': "u", 'Ф': "f",
|
||
'Х': "h", 'Ц': "ts", 'Ч': "ch", 'Ш': "sh", 'Щ': "shch", 'Ъ': "", 'Ы': "y", 'Ь': "", 'Э': "e", 'Ю': "yu", 'Я': "ya",
|
||
'І': "i", 'Ї': "yi", 'Є': "ye", 'Ґ': "g",
|
||
}
|
||
|
||
func transliterateToLatin(s string) string {
|
||
if s == "" {
|
||
return s
|
||
}
|
||
b := strings.Builder{}
|
||
b.Grow(len(s) + 8)
|
||
for _, r := range s {
|
||
if v, ok := cyrillicToLatin[r]; ok {
|
||
b.WriteString(v)
|
||
continue
|
||
}
|
||
b.WriteRune(r)
|
||
}
|
||
return b.String()
|
||
}
|
||
|
||
var cleanupRe = regexp.MustCompile(`(?i)\s*\(([^)]*(remaster|remastered|live|mono|stereo|version|deluxe|explicit|clean|bonus)[^)]*)\)|\s*-\s*(remaster(ed)?|live|version|edit|radio edit).*`)
|
||
|
||
func cleanTitle(s string) string {
|
||
clean := cleanupRe.ReplaceAllString(s, "")
|
||
clean = strings.TrimSpace(clean)
|
||
if clean == "" {
|
||
return s
|
||
}
|
||
return clean
|
||
}
|
||
|
||
func similarity(a, b string) float64 {
|
||
if a == "" || b == "" {
|
||
return 0
|
||
}
|
||
if a == b {
|
||
return 1
|
||
}
|
||
ta := tokenSet(a)
|
||
tb := tokenSet(b)
|
||
if len(ta) == 0 || len(tb) == 0 {
|
||
return 0
|
||
}
|
||
|
||
inter := 0
|
||
for t := range ta {
|
||
if _, ok := tb[t]; ok {
|
||
inter++
|
||
}
|
||
}
|
||
if inter == 0 {
|
||
return 0
|
||
}
|
||
|
||
jaccard := float64(inter) / float64(len(ta)+len(tb)-inter)
|
||
lev := levenshteinRatio(a, b)
|
||
return (jaccard * 0.6) + (lev * 0.4)
|
||
}
|
||
|
||
func tokenSet(s string) map[string]struct{} {
|
||
parts := strings.Fields(s)
|
||
set := make(map[string]struct{}, len(parts))
|
||
for _, p := range parts {
|
||
set[p] = struct{}{}
|
||
}
|
||
return set
|
||
}
|
||
|
||
func levenshteinRatio(a, b string) float64 {
|
||
ar := []rune(a)
|
||
br := []rune(b)
|
||
if len(ar) == 0 || len(br) == 0 {
|
||
return 0
|
||
}
|
||
d := levenshtein(ar, br)
|
||
maxLen := len(ar)
|
||
if len(br) > maxLen {
|
||
maxLen = len(br)
|
||
}
|
||
return 1 - float64(d)/float64(maxLen)
|
||
}
|
||
|
||
func levenshtein(a, b []rune) int {
|
||
dp := make([]int, len(b)+1)
|
||
for j := 0; j <= len(b); j++ {
|
||
dp[j] = j
|
||
}
|
||
for i := 1; i <= len(a); i++ {
|
||
prev := dp[0]
|
||
dp[0] = i
|
||
for j := 1; j <= len(b); j++ {
|
||
tmp := dp[j]
|
||
cost := 0
|
||
if a[i-1] != b[j-1] {
|
||
cost = 1
|
||
}
|
||
dp[j] = min3(
|
||
dp[j]+1,
|
||
dp[j-1]+1,
|
||
prev+cost,
|
||
)
|
||
prev = tmp
|
||
}
|
||
}
|
||
return dp[len(b)]
|
||
}
|
||
|
||
func min3(a, b, c int) int {
|
||
arr := []int{a, b, c}
|
||
sort.Ints(arr)
|
||
return arr[0]
|
||
}
|