package match import ( "context" "fmt" "math" "regexp" "sort" "strings" "sync" "navimigrate/internal/model" "navimigrate/internal/navidrome" ) type Searcher interface { SearchTracks(ctx context.Context, query string, limit int) ([]navidrome.Track, error) } type Matcher struct { searcher Searcher threshold float64 cacheMu sync.RWMutex cache map[string][]navidrome.Track } func NewMatcher(searcher Searcher, threshold float64) *Matcher { if threshold < 0 { threshold = 45 } return &Matcher{ searcher: searcher, threshold: threshold, cache: map[string][]navidrome.Track{}, } } func (m *Matcher) MatchTrack(ctx context.Context, src model.Track) model.MatchedTrack { queries := m.buildQueries(src) if len(queries) == 0 { return model.MatchedTrack{Source: src, Matched: false, Reason: "no usable metadata"} } type scored struct { track navidrome.Track score float64 query string } best := scored{score: -999} seen := map[string]struct{}{} for _, q := range queries { candidates, err := m.searchCached(ctx, q) if err != nil { continue } for _, c := range candidates { if _, ok := seen[c.ID]; ok { continue } seen[c.ID] = struct{}{} score := scoreCandidate(src, c) if score > best.score { best = scored{track: c, score: score, query: q} } } } if best.track.ID == "" { return model.MatchedTrack{Source: src, Matched: false, Reason: "no candidates"} } if best.score >= m.threshold { return model.MatchedTrack{ Source: src, TargetID: best.track.ID, Score: best.score, Query: best.query, Matched: true, } } reason := fmt.Sprintf("best score %.1f below threshold %.1f", best.score, m.threshold) return model.MatchedTrack{ Source: src, TargetID: best.track.ID, Score: best.score, Query: best.query, Matched: false, Reason: reason, } } func (m *Matcher) searchCached(ctx context.Context, q string) ([]navidrome.Track, error) { q = strings.TrimSpace(q) if q == "" { return nil, nil } m.cacheMu.RLock() if v, ok := m.cache[q]; ok { m.cacheMu.RUnlock() return v, nil } m.cacheMu.RUnlock() res, err := m.searcher.SearchTracks(ctx, q, 20) if err != nil { return nil, err } m.cacheMu.Lock() m.cache[q] = res m.cacheMu.Unlock() return res, nil } func (m *Matcher) buildQueries(src model.Track) []string { title := strings.TrimSpace(src.Title) if title == "" { return nil } artist := "" if len(src.Artists) > 0 { artist = src.Artists[0] } latinTitle := strings.TrimSpace(transliterateToLatin(title)) latinArtist := strings.TrimSpace(transliterateToLatin(artist)) queries := []string{} if src.ISRC != "" { queries = append(queries, src.ISRC) } queries = append(queries, strings.TrimSpace(title+" "+artist)) if latinTitle != "" { queries = append(queries, strings.TrimSpace(latinTitle+" "+latinArtist)) } cleanTitle := cleanTitle(title) if cleanTitle != title { queries = append(queries, strings.TrimSpace(cleanTitle+" "+artist)) latinClean := strings.TrimSpace(transliterateToLatin(cleanTitle)) if latinClean != "" { queries = append(queries, strings.TrimSpace(latinClean+" "+latinArtist)) } } queries = append(queries, title) if latinTitle != "" { queries = append(queries, latinTitle) } uniq := map[string]struct{}{} out := make([]string, 0, len(queries)) for _, q := range queries { q = strings.TrimSpace(q) if q == "" { continue } if _, ok := uniq[q]; ok { continue } uniq[q] = struct{}{} out = append(out, q) } return out } func scoreCandidate(src model.Track, dst navidrome.Track) float64 { score := 0.0 if src.ISRC != "" && hasISRC(dst.ISRCs, src.ISRC) { score += 60 } score += 25 * similarity(normalize(src.Title), normalize(dst.Title)) primaryArtist := "" if len(src.Artists) > 0 { primaryArtist = src.Artists[0] } if primaryArtist != "" { score += 20 * similarity(normalize(primaryArtist), normalize(dst.Artist)) } if src.DurationMS > 0 && dst.Duration > 0 { delta := math.Abs(float64(src.DurationMS/1000 - dst.Duration)) switch { case delta <= 2: score += 10 case delta <= 5: score += 7 case delta <= 10: score += 4 case delta > 25: score -= 6 } } nt := normalize(src.Title) dt := normalize(dst.Title) if !strings.Contains(nt, "live") && strings.Contains(dt, "live") { score -= 8 } if !strings.Contains(nt, "remix") && strings.Contains(dt, "remix") { score -= 6 } if strings.Contains(dt, "karaoke") { score -= 12 } return score } func hasISRC(candidates []string, wanted string) bool { wanted = strings.ToUpper(strings.TrimSpace(wanted)) if wanted == "" { return false } for _, c := range candidates { if strings.EqualFold(strings.TrimSpace(c), wanted) { return true } } return false } var nonAlphaNum = regexp.MustCompile(`[^a-z0-9]+`) func normalize(s string) string { s = transliterateToLatin(s) s = strings.ToLower(strings.TrimSpace(s)) s = strings.ReplaceAll(s, "&", " and ") s = nonAlphaNum.ReplaceAllString(s, " ") tokens := strings.Fields(s) return strings.Join(tokens, " ") } var cyrillicToLatin = map[rune]string{ 'а': "a", 'б': "b", 'в': "v", 'г': "g", 'д': "d", 'е': "e", 'ё': "e", 'ж': "zh", 'з': "z", 'и': "i", 'й': "i", 'к': "k", 'л': "l", 'м': "m", 'н': "n", 'о': "o", 'п': "p", 'р': "r", 'с': "s", 'т': "t", 'у': "u", 'ф': "f", 'х': "h", 'ц': "ts", 'ч': "ch", 'ш': "sh", 'щ': "shch", 'ъ': "", 'ы': "y", 'ь': "", 'э': "e", 'ю': "yu", 'я': "ya", 'і': "i", 'ї': "yi", 'є': "ye", 'ґ': "g", 'А': "a", 'Б': "b", 'В': "v", 'Г': "g", 'Д': "d", 'Е': "e", 'Ё': "e", 'Ж': "zh", 'З': "z", 'И': "i", 'Й': "i", 'К': "k", 'Л': "l", 'М': "m", 'Н': "n", 'О': "o", 'П': "p", 'Р': "r", 'С': "s", 'Т': "t", 'У': "u", 'Ф': "f", 'Х': "h", 'Ц': "ts", 'Ч': "ch", 'Ш': "sh", 'Щ': "shch", 'Ъ': "", 'Ы': "y", 'Ь': "", 'Э': "e", 'Ю': "yu", 'Я': "ya", 'І': "i", 'Ї': "yi", 'Є': "ye", 'Ґ': "g", } func transliterateToLatin(s string) string { if s == "" { return s } b := strings.Builder{} b.Grow(len(s) + 8) for _, r := range s { if v, ok := cyrillicToLatin[r]; ok { b.WriteString(v) continue } b.WriteRune(r) } return b.String() } var cleanupRe = regexp.MustCompile(`(?i)\s*\(([^)]*(remaster|remastered|live|mono|stereo|version|deluxe|explicit|clean|bonus)[^)]*)\)|\s*-\s*(remaster(ed)?|live|version|edit|radio edit).*`) func cleanTitle(s string) string { clean := cleanupRe.ReplaceAllString(s, "") clean = strings.TrimSpace(clean) if clean == "" { return s } return clean } func similarity(a, b string) float64 { if a == "" || b == "" { return 0 } if a == b { return 1 } ta := tokenSet(a) tb := tokenSet(b) if len(ta) == 0 || len(tb) == 0 { return 0 } inter := 0 for t := range ta { if _, ok := tb[t]; ok { inter++ } } if inter == 0 { return 0 } jaccard := float64(inter) / float64(len(ta)+len(tb)-inter) lev := levenshteinRatio(a, b) return (jaccard * 0.6) + (lev * 0.4) } func tokenSet(s string) map[string]struct{} { parts := strings.Fields(s) set := make(map[string]struct{}, len(parts)) for _, p := range parts { set[p] = struct{}{} } return set } func levenshteinRatio(a, b string) float64 { ar := []rune(a) br := []rune(b) if len(ar) == 0 || len(br) == 0 { return 0 } d := levenshtein(ar, br) maxLen := len(ar) if len(br) > maxLen { maxLen = len(br) } return 1 - float64(d)/float64(maxLen) } func levenshtein(a, b []rune) int { dp := make([]int, len(b)+1) for j := 0; j <= len(b); j++ { dp[j] = j } for i := 1; i <= len(a); i++ { prev := dp[0] dp[0] = i for j := 1; j <= len(b); j++ { tmp := dp[j] cost := 0 if a[i-1] != b[j-1] { cost = 1 } dp[j] = min3( dp[j]+1, dp[j-1]+1, prev+cost, ) prev = tmp } } return dp[len(b)] } func min3(a, b, c int) int { arr := []int{a, b, c} sort.Ints(arr) return arr[0] }