diff --git a/cmd/rip/main.go b/cmd/rip/main.go index d68a6d3..b0a6108 100644 --- a/cmd/rip/main.go +++ b/cmd/rip/main.go @@ -1422,9 +1422,9 @@ type resolvedLastFMTrack struct { } var ( - lastFMTitleTagsRe = regexp.MustCompile(`]*\btitle=(?:"([^"]+)"|'([^']+)')`) lastFMTotalTracksRe = regexp.MustCompile(`data-playlisting-entry-count="(\d+)"`) - lastFMPlaylistTitleRe = regexp.MustCompile(`

([^<]+)

`) + lastFMPlaylistTitleRe = regexp.MustCompile(`]*class="[^"]*playlisting-playlist-header-title[^"]*"[^>]*>([^<]+)`) lastFMMirrorTitleRe = regexp.MustCompile(`^Title:\s*(.+?)\s+\|`) lastFMMirrorLinkTextRe = regexp.MustCompile(`\[([^\]]+)\]\(`) errLastFMInvalidSource = "unsupported source" @@ -1662,7 +1662,11 @@ func isValidLastFMPlaylistURL(raw string) bool { return false } h := strings.ToLower(strings.TrimPrefix(strings.TrimSpace(u.Host), "www.")) - return h == "last.fm" || strings.HasSuffix(h, ".last.fm") + if h != "last.fm" && !strings.HasSuffix(h, ".last.fm") { + return false + } + p := strings.ToLower(strings.TrimSpace(u.Path)) + return strings.Contains(p, "/playlists/") } func fetchLastFMPlaylist(ctx context.Context, verifySSL bool, playlistURL string) (string, []lastFMTrack, error) { @@ -1827,8 +1831,10 @@ func extractLastFMTitleArtistPairs(page string) []lastFMTrack { titles := lastFMTitleTagsRe.FindAllStringSubmatch(page, -1) out := make([]lastFMTrack, 0, len(titles)/2) for i := 0; i+1 < len(titles); i += 2 { - title := html.UnescapeString(strings.TrimSpace(titles[i][1])) - artist := html.UnescapeString(strings.TrimSpace(titles[i+1][1])) + titleRaw := strings.TrimSpace(firstNonEmpty(titles[i][1], titles[i][2])) + artistRaw := strings.TrimSpace(firstNonEmpty(titles[i+1][1], titles[i+1][2])) + title := html.UnescapeString(titleRaw) + artist := html.UnescapeString(artistRaw) if title == "" || artist == "" { continue } @@ -1837,6 +1843,15 @@ func extractLastFMTitleArtistPairs(page string) []lastFMTrack { return out } +func firstNonEmpty(items ...string) string { + for _, item := range items { + if strings.TrimSpace(item) != "" { + return strings.TrimSpace(item) + } + } + return "" +} + func extractLastFMTracksFromMirrorMarkdown(md string) (string, []lastFMTrack) { lines := strings.Split(strings.ReplaceAll(md, "\r\n", "\n"), "\n") title := "" @@ -2316,6 +2331,18 @@ func promptSearchInteractive(defaultLimit int) (string, string, searchOptions, e func normalizeSearchResults(source, mediaType string, pages []map[string]any) []searchResult { results := make([]searchResult, 0) + seen := map[string]struct{}{} + appendUnique := func(r searchResult) { + if strings.TrimSpace(r.ID) == "" || strings.TrimSpace(r.Title) == "" { + return + } + key := r.ID + if _, ok := seen[key]; ok { + return + } + seen[key] = struct{}{} + results = append(results, r) + } for _, page := range pages { switch source { case "qobuz": @@ -2351,9 +2378,7 @@ func normalizeSearchResults(source, mediaType string, pages []map[string]any) [] trackCount = searchInt(itm["track_count"]) } explicit := searchBool(itm["parental_warning"]) - if id != "" && title != "" { - results = append(results, searchResult{ID: id, Title: title, Artist: artist, Album: album, TrackCount: trackCount, Explicit: explicit}) - } + appendUnique(searchResult{ID: id, Title: title, Artist: artist, Album: album, TrackCount: trackCount, Explicit: explicit}) } case "tidal": items, ok := page["items"].([]any) @@ -2387,9 +2412,7 @@ func normalizeSearchResults(source, mediaType string, pages []map[string]any) [] trackCount = searchInt(itm["tracks_count"]) } explicit := searchBool(itm["explicit"]) - if id != "" && title != "" { - results = append(results, searchResult{ID: id, Title: title, Artist: artist, Album: album, TrackCount: trackCount, Explicit: explicit}) - } + appendUnique(searchResult{ID: id, Title: title, Artist: artist, Album: album, TrackCount: trackCount, Explicit: explicit}) } case "deezer": key := mediaType + "s" @@ -2415,9 +2438,7 @@ func normalizeSearchResults(source, mediaType string, pages []map[string]any) [] album := nestedSearchString(itm, "album", "title") trackCount := searchInt(itm["nb_tracks"]) explicit := searchBool(itm["explicit_lyrics"]) - if id != "" && title != "" { - results = append(results, searchResult{ID: id, Title: title, Artist: artist, Album: album, TrackCount: trackCount, Explicit: explicit}) - } + appendUnique(searchResult{ID: id, Title: title, Artist: artist, Album: album, TrackCount: trackCount, Explicit: explicit}) } case "soundcloud": items, ok := page["items"].([]any) @@ -2433,9 +2454,7 @@ func normalizeSearchResults(source, mediaType string, pages []map[string]any) [] title := asString(itm["title"]) artist := nestedSearchString(itm, "artist", "name") trackCount := searchInt(itm["tracks_count"]) - if id != "" && title != "" { - results = append(results, searchResult{ID: id, Title: title, Artist: artist, TrackCount: trackCount}) - } + appendUnique(searchResult{ID: id, Title: title, Artist: artist, TrackCount: trackCount}) } } } diff --git a/cmd/rip/main_test.go b/cmd/rip/main_test.go index d297600..2e15266 100644 --- a/cmd/rip/main_test.go +++ b/cmd/rip/main_test.go @@ -114,6 +114,9 @@ func TestIsValidLastFMPlaylistURL(t *testing.T) { if isValidLastFMPlaylistURL("https://example.com/user/x/playlists/123") { t.Fatalf("expected non-last.fm host to be invalid") } + if isValidLastFMPlaylistURL("https://www.last.fm/user/x/library") { + t.Fatalf("expected non-playlist last.fm url to be invalid") + } } func TestExtractLastFMPlaylistInfoAndPairs(t *testing.T) { @@ -144,6 +147,30 @@ func TestExtractLastFMPlaylistInfoAndPairs(t *testing.T) { } } +func TestExtractLastFMPlaylistInfoFlexibleClass(t *testing.T) { + html := `

Road & Rain

+
` + title, total, err := extractLastFMPlaylistInfo(html) + if err != nil { + t.Fatalf("extractLastFMPlaylistInfo() error = %v", err) + } + if title != "Road & Rain" || total != 1 { + t.Fatalf("unexpected parsed values: title=%q total=%d", title, total) + } +} + +func TestExtractLastFMTitleArtistPairsSingleQuotes(t *testing.T) { + html := ` +` + pairs := extractLastFMTitleArtistPairs(html) + if len(pairs) != 1 { + t.Fatalf("pairs len = %d, want 1", len(pairs)) + } + if pairs[0].Title != "Dreams" || pairs[0].Artist != "Fleetwood Mac" { + t.Fatalf("unexpected pair: %+v", pairs[0]) + } +} + func TestParseGlobalArgsNoDBBeforeCommand(t *testing.T) { opts, err := parseGlobalArgs([]string{"-ndb", "url", "https://play.qobuz.com/album/0004228000522"}) if err != nil { @@ -266,6 +293,26 @@ func TestWriteSearchResultsToFileCreatesParentDirectory(t *testing.T) { } } +func TestNormalizeSearchResultsDedupesByID(t *testing.T) { + pages := []map[string]any{ + {"tracks": map[string]any{"items": []any{ + map[string]any{"id": "1", "title": "Dreams", "artist": map[string]any{"name": "Fleetwood Mac"}}, + map[string]any{"id": "1", "title": "Dreams", "artist": map[string]any{"name": "Fleetwood Mac"}}, + }}}, + {"tracks": map[string]any{"items": []any{ + map[string]any{"id": "2", "title": "Go Your Own Way", "artist": map[string]any{"name": "Fleetwood Mac"}}, + map[string]any{"id": "1", "title": "Dreams", "artist": map[string]any{"name": "Fleetwood Mac"}}, + }}}, + } + results := normalizeSearchResults("qobuz", "track", pages) + if len(results) != 2 { + t.Fatalf("len(results)=%d want 2", len(results)) + } + if results[0].ID != "1" || results[1].ID != "2" { + t.Fatalf("unexpected IDs order: %+v", results) + } +} + func TestErrorWithActionableHintForSSL(t *testing.T) { err := errors.New("x509: certificate signed by unknown authority") msg := errorWithActionableHint(err, globalOptions{}) diff --git a/internal/urlparse/parse.go b/internal/urlparse/parse.go index a47ee81..daf556e 100644 --- a/internal/urlparse/parse.go +++ b/internal/urlparse/parse.go @@ -50,7 +50,7 @@ func Parse(raw string) *ParsedURL { case isDeezerHost(host): return parseDeezer(raw, parts) case isSoundcloudHost(host): - return parseSoundcloud(raw, parts) + return parseSoundcloud(raw, host, parts) default: return nil } @@ -85,6 +85,13 @@ func parseTidal(raw string, parts []string) *ParsedURL { return nil } + if isLangToken(parts[0]) { + parts = parts[1:] + } + if len(parts) < 2 { + return nil + } + if parts[0] == "browse" { parts = parts[1:] } @@ -128,14 +135,20 @@ func parseDeezer(raw string, parts []string) *ParsedURL { return &ParsedURL{OriginalURL: raw, Source: "deezer", MediaType: mediaType, ID: id, Kind: KindGeneric} } -func parseSoundcloud(raw string, parts []string) *ParsedURL { +func parseSoundcloud(raw, host string, parts []string) *ParsedURL { if len(parts) < 1 { return nil } + if host == "on.soundcloud.com" { + return &ParsedURL{OriginalURL: raw, Source: "soundcloud", MediaType: "track", ID: raw, Kind: KindSoundcloud} + } + mediaType := "track" if len(parts) >= 3 && parts[1] == "sets" { mediaType = "playlist" + } else if len(parts) < 2 || parts[1] == "sets" { + return nil } return &ParsedURL{OriginalURL: raw, Source: "soundcloud", MediaType: mediaType, ID: raw, Kind: KindSoundcloud} @@ -169,7 +182,7 @@ func isTidalHost(host string) bool { } func isDeezerHost(host string) bool { - return host == "deezer.com" + return host == "deezer.com" || strings.HasSuffix(host, ".deezer.com") } func isSoundcloudHost(host string) bool { diff --git a/internal/urlparse/parse_test.go b/internal/urlparse/parse_test.go index c625236..cac7fc4 100644 --- a/internal/urlparse/parse_test.go +++ b/internal/urlparse/parse_test.go @@ -28,13 +28,19 @@ func TestQobuzAlbumURL(t *testing.T) { } func TestTidalTrackURL(t *testing.T) { - url := "https://tidal.com/browse/track/3083287" - result := Parse(url) - if result == nil { - t.Fatalf("expected parsed url") + inputs := []string{ + "https://tidal.com/browse/track/3083287", + "https://tidal.com/us/browse/track/3083287", + "https://tidal.com/us/track/3083287", } - if result.Source != "tidal" || result.MediaType != "track" || result.ID != "3083287" { - t.Fatalf("unexpected parse result: %+v", result) + for _, url := range inputs { + result := Parse(url) + if result == nil { + t.Fatalf("expected parsed url for %q", url) + } + if result.Source != "tidal" || result.MediaType != "track" || result.ID != "3083287" { + t.Fatalf("unexpected parse result for %q: %+v", url, result) + } } } @@ -93,6 +99,7 @@ func TestURLWithLanguageCode(t *testing.T) { "https://www.qobuz.com/gb-en/album/name/id123456", "https://www.deezer.com/en/track/4195713", "https://www.deezer.com/fr/track/4195713", + "https://m.deezer.com/en/track/4195713", } for _, input := range inputs { if result := Parse(input); result == nil { @@ -101,6 +108,17 @@ func TestURLWithLanguageCode(t *testing.T) { } } +func TestDeezerMobileHostURL(t *testing.T) { + url := "https://m.deezer.com/track/4195713" + result := Parse(url) + if result == nil { + t.Fatalf("expected parsed url") + } + if result.Source != "deezer" || result.MediaType != "track" || result.ID != "4195713" { + t.Fatalf("unexpected parse result: %+v", result) + } +} + func TestSoundcloudURL(t *testing.T) { inputs := []string{ "https://soundcloud.com/artist-name/track-name", @@ -118,3 +136,15 @@ func TestSoundcloudURL(t *testing.T) { } } } + +func TestSoundcloudProfileURLIsNotTrack(t *testing.T) { + if result := Parse("https://soundcloud.com/artist-name"); result != nil { + t.Fatalf("expected nil for profile url, got %+v", result) + } +} + +func TestSoundcloudSetsRootWithoutPlaylistSlugInvalid(t *testing.T) { + if result := Parse("https://soundcloud.com/artist-name/sets"); result != nil { + t.Fatalf("expected nil for sets root url, got %+v", result) + } +}