mirror of
https://git.sr.ht/~joren/streamrip-go
synced 2026-06-17 15:05:39 +02:00
tighten lastfm parsing and locale url handling
This commit is contained in:
@@ -1422,9 +1422,9 @@ type resolvedLastFMTrack struct {
|
|||||||
}
|
}
|
||||||
|
|
||||||
var (
|
var (
|
||||||
lastFMTitleTagsRe = regexp.MustCompile(`<a\s+href="[^"]+"\s+title="([^"]+)"`)
|
lastFMTitleTagsRe = regexp.MustCompile(`<a\b[^>]*\btitle=(?:"([^"]+)"|'([^']+)')`)
|
||||||
lastFMTotalTracksRe = regexp.MustCompile(`data-playlisting-entry-count="(\d+)"`)
|
lastFMTotalTracksRe = regexp.MustCompile(`data-playlisting-entry-count="(\d+)"`)
|
||||||
lastFMPlaylistTitleRe = regexp.MustCompile(`<h1 class="playlisting-playlist-header-title">([^<]+)</h1>`)
|
lastFMPlaylistTitleRe = regexp.MustCompile(`<h1[^>]*class="[^"]*playlisting-playlist-header-title[^"]*"[^>]*>([^<]+)</h1>`)
|
||||||
lastFMMirrorTitleRe = regexp.MustCompile(`^Title:\s*(.+?)\s+\|`)
|
lastFMMirrorTitleRe = regexp.MustCompile(`^Title:\s*(.+?)\s+\|`)
|
||||||
lastFMMirrorLinkTextRe = regexp.MustCompile(`\[([^\]]+)\]\(`)
|
lastFMMirrorLinkTextRe = regexp.MustCompile(`\[([^\]]+)\]\(`)
|
||||||
errLastFMInvalidSource = "unsupported source"
|
errLastFMInvalidSource = "unsupported source"
|
||||||
@@ -1662,7 +1662,11 @@ func isValidLastFMPlaylistURL(raw string) bool {
|
|||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
h := strings.ToLower(strings.TrimPrefix(strings.TrimSpace(u.Host), "www."))
|
h := strings.ToLower(strings.TrimPrefix(strings.TrimSpace(u.Host), "www."))
|
||||||
return h == "last.fm" || strings.HasSuffix(h, ".last.fm")
|
if h != "last.fm" && !strings.HasSuffix(h, ".last.fm") {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
p := strings.ToLower(strings.TrimSpace(u.Path))
|
||||||
|
return strings.Contains(p, "/playlists/")
|
||||||
}
|
}
|
||||||
|
|
||||||
func fetchLastFMPlaylist(ctx context.Context, verifySSL bool, playlistURL string) (string, []lastFMTrack, error) {
|
func fetchLastFMPlaylist(ctx context.Context, verifySSL bool, playlistURL string) (string, []lastFMTrack, error) {
|
||||||
@@ -1827,8 +1831,10 @@ func extractLastFMTitleArtistPairs(page string) []lastFMTrack {
|
|||||||
titles := lastFMTitleTagsRe.FindAllStringSubmatch(page, -1)
|
titles := lastFMTitleTagsRe.FindAllStringSubmatch(page, -1)
|
||||||
out := make([]lastFMTrack, 0, len(titles)/2)
|
out := make([]lastFMTrack, 0, len(titles)/2)
|
||||||
for i := 0; i+1 < len(titles); i += 2 {
|
for i := 0; i+1 < len(titles); i += 2 {
|
||||||
title := html.UnescapeString(strings.TrimSpace(titles[i][1]))
|
titleRaw := strings.TrimSpace(firstNonEmpty(titles[i][1], titles[i][2]))
|
||||||
artist := html.UnescapeString(strings.TrimSpace(titles[i+1][1]))
|
artistRaw := strings.TrimSpace(firstNonEmpty(titles[i+1][1], titles[i+1][2]))
|
||||||
|
title := html.UnescapeString(titleRaw)
|
||||||
|
artist := html.UnescapeString(artistRaw)
|
||||||
if title == "" || artist == "" {
|
if title == "" || artist == "" {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
@@ -1837,6 +1843,15 @@ func extractLastFMTitleArtistPairs(page string) []lastFMTrack {
|
|||||||
return out
|
return out
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func firstNonEmpty(items ...string) string {
|
||||||
|
for _, item := range items {
|
||||||
|
if strings.TrimSpace(item) != "" {
|
||||||
|
return strings.TrimSpace(item)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
|
||||||
func extractLastFMTracksFromMirrorMarkdown(md string) (string, []lastFMTrack) {
|
func extractLastFMTracksFromMirrorMarkdown(md string) (string, []lastFMTrack) {
|
||||||
lines := strings.Split(strings.ReplaceAll(md, "\r\n", "\n"), "\n")
|
lines := strings.Split(strings.ReplaceAll(md, "\r\n", "\n"), "\n")
|
||||||
title := ""
|
title := ""
|
||||||
@@ -2316,6 +2331,18 @@ func promptSearchInteractive(defaultLimit int) (string, string, searchOptions, e
|
|||||||
|
|
||||||
func normalizeSearchResults(source, mediaType string, pages []map[string]any) []searchResult {
|
func normalizeSearchResults(source, mediaType string, pages []map[string]any) []searchResult {
|
||||||
results := make([]searchResult, 0)
|
results := make([]searchResult, 0)
|
||||||
|
seen := map[string]struct{}{}
|
||||||
|
appendUnique := func(r searchResult) {
|
||||||
|
if strings.TrimSpace(r.ID) == "" || strings.TrimSpace(r.Title) == "" {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
key := r.ID
|
||||||
|
if _, ok := seen[key]; ok {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
seen[key] = struct{}{}
|
||||||
|
results = append(results, r)
|
||||||
|
}
|
||||||
for _, page := range pages {
|
for _, page := range pages {
|
||||||
switch source {
|
switch source {
|
||||||
case "qobuz":
|
case "qobuz":
|
||||||
@@ -2351,9 +2378,7 @@ func normalizeSearchResults(source, mediaType string, pages []map[string]any) []
|
|||||||
trackCount = searchInt(itm["track_count"])
|
trackCount = searchInt(itm["track_count"])
|
||||||
}
|
}
|
||||||
explicit := searchBool(itm["parental_warning"])
|
explicit := searchBool(itm["parental_warning"])
|
||||||
if id != "" && title != "" {
|
appendUnique(searchResult{ID: id, Title: title, Artist: artist, Album: album, TrackCount: trackCount, Explicit: explicit})
|
||||||
results = append(results, searchResult{ID: id, Title: title, Artist: artist, Album: album, TrackCount: trackCount, Explicit: explicit})
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
case "tidal":
|
case "tidal":
|
||||||
items, ok := page["items"].([]any)
|
items, ok := page["items"].([]any)
|
||||||
@@ -2387,9 +2412,7 @@ func normalizeSearchResults(source, mediaType string, pages []map[string]any) []
|
|||||||
trackCount = searchInt(itm["tracks_count"])
|
trackCount = searchInt(itm["tracks_count"])
|
||||||
}
|
}
|
||||||
explicit := searchBool(itm["explicit"])
|
explicit := searchBool(itm["explicit"])
|
||||||
if id != "" && title != "" {
|
appendUnique(searchResult{ID: id, Title: title, Artist: artist, Album: album, TrackCount: trackCount, Explicit: explicit})
|
||||||
results = append(results, searchResult{ID: id, Title: title, Artist: artist, Album: album, TrackCount: trackCount, Explicit: explicit})
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
case "deezer":
|
case "deezer":
|
||||||
key := mediaType + "s"
|
key := mediaType + "s"
|
||||||
@@ -2415,9 +2438,7 @@ func normalizeSearchResults(source, mediaType string, pages []map[string]any) []
|
|||||||
album := nestedSearchString(itm, "album", "title")
|
album := nestedSearchString(itm, "album", "title")
|
||||||
trackCount := searchInt(itm["nb_tracks"])
|
trackCount := searchInt(itm["nb_tracks"])
|
||||||
explicit := searchBool(itm["explicit_lyrics"])
|
explicit := searchBool(itm["explicit_lyrics"])
|
||||||
if id != "" && title != "" {
|
appendUnique(searchResult{ID: id, Title: title, Artist: artist, Album: album, TrackCount: trackCount, Explicit: explicit})
|
||||||
results = append(results, searchResult{ID: id, Title: title, Artist: artist, Album: album, TrackCount: trackCount, Explicit: explicit})
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
case "soundcloud":
|
case "soundcloud":
|
||||||
items, ok := page["items"].([]any)
|
items, ok := page["items"].([]any)
|
||||||
@@ -2433,9 +2454,7 @@ func normalizeSearchResults(source, mediaType string, pages []map[string]any) []
|
|||||||
title := asString(itm["title"])
|
title := asString(itm["title"])
|
||||||
artist := nestedSearchString(itm, "artist", "name")
|
artist := nestedSearchString(itm, "artist", "name")
|
||||||
trackCount := searchInt(itm["tracks_count"])
|
trackCount := searchInt(itm["tracks_count"])
|
||||||
if id != "" && title != "" {
|
appendUnique(searchResult{ID: id, Title: title, Artist: artist, TrackCount: trackCount})
|
||||||
results = append(results, searchResult{ID: id, Title: title, Artist: artist, TrackCount: trackCount})
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -114,6 +114,9 @@ func TestIsValidLastFMPlaylistURL(t *testing.T) {
|
|||||||
if isValidLastFMPlaylistURL("https://example.com/user/x/playlists/123") {
|
if isValidLastFMPlaylistURL("https://example.com/user/x/playlists/123") {
|
||||||
t.Fatalf("expected non-last.fm host to be invalid")
|
t.Fatalf("expected non-last.fm host to be invalid")
|
||||||
}
|
}
|
||||||
|
if isValidLastFMPlaylistURL("https://www.last.fm/user/x/library") {
|
||||||
|
t.Fatalf("expected non-playlist last.fm url to be invalid")
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestExtractLastFMPlaylistInfoAndPairs(t *testing.T) {
|
func TestExtractLastFMPlaylistInfoAndPairs(t *testing.T) {
|
||||||
@@ -144,6 +147,30 @@ func TestExtractLastFMPlaylistInfoAndPairs(t *testing.T) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestExtractLastFMPlaylistInfoFlexibleClass(t *testing.T) {
|
||||||
|
html := `<h1 id="x" class="foo playlisting-playlist-header-title bar">Road & Rain</h1>
|
||||||
|
<div data-playlisting-entry-count="1"></div>`
|
||||||
|
title, total, err := extractLastFMPlaylistInfo(html)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("extractLastFMPlaylistInfo() error = %v", err)
|
||||||
|
}
|
||||||
|
if title != "Road & Rain" || total != 1 {
|
||||||
|
t.Fatalf("unexpected parsed values: title=%q total=%d", title, total)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestExtractLastFMTitleArtistPairsSingleQuotes(t *testing.T) {
|
||||||
|
html := `<a href='/music/a' title='Dreams'></a>
|
||||||
|
<a href='/music/b' title='Fleetwood Mac'></a>`
|
||||||
|
pairs := extractLastFMTitleArtistPairs(html)
|
||||||
|
if len(pairs) != 1 {
|
||||||
|
t.Fatalf("pairs len = %d, want 1", len(pairs))
|
||||||
|
}
|
||||||
|
if pairs[0].Title != "Dreams" || pairs[0].Artist != "Fleetwood Mac" {
|
||||||
|
t.Fatalf("unexpected pair: %+v", pairs[0])
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
func TestParseGlobalArgsNoDBBeforeCommand(t *testing.T) {
|
func TestParseGlobalArgsNoDBBeforeCommand(t *testing.T) {
|
||||||
opts, err := parseGlobalArgs([]string{"-ndb", "url", "https://play.qobuz.com/album/0004228000522"})
|
opts, err := parseGlobalArgs([]string{"-ndb", "url", "https://play.qobuz.com/album/0004228000522"})
|
||||||
if err != nil {
|
if err != nil {
|
||||||
@@ -266,6 +293,26 @@ func TestWriteSearchResultsToFileCreatesParentDirectory(t *testing.T) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestNormalizeSearchResultsDedupesByID(t *testing.T) {
|
||||||
|
pages := []map[string]any{
|
||||||
|
{"tracks": map[string]any{"items": []any{
|
||||||
|
map[string]any{"id": "1", "title": "Dreams", "artist": map[string]any{"name": "Fleetwood Mac"}},
|
||||||
|
map[string]any{"id": "1", "title": "Dreams", "artist": map[string]any{"name": "Fleetwood Mac"}},
|
||||||
|
}}},
|
||||||
|
{"tracks": map[string]any{"items": []any{
|
||||||
|
map[string]any{"id": "2", "title": "Go Your Own Way", "artist": map[string]any{"name": "Fleetwood Mac"}},
|
||||||
|
map[string]any{"id": "1", "title": "Dreams", "artist": map[string]any{"name": "Fleetwood Mac"}},
|
||||||
|
}}},
|
||||||
|
}
|
||||||
|
results := normalizeSearchResults("qobuz", "track", pages)
|
||||||
|
if len(results) != 2 {
|
||||||
|
t.Fatalf("len(results)=%d want 2", len(results))
|
||||||
|
}
|
||||||
|
if results[0].ID != "1" || results[1].ID != "2" {
|
||||||
|
t.Fatalf("unexpected IDs order: %+v", results)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
func TestErrorWithActionableHintForSSL(t *testing.T) {
|
func TestErrorWithActionableHintForSSL(t *testing.T) {
|
||||||
err := errors.New("x509: certificate signed by unknown authority")
|
err := errors.New("x509: certificate signed by unknown authority")
|
||||||
msg := errorWithActionableHint(err, globalOptions{})
|
msg := errorWithActionableHint(err, globalOptions{})
|
||||||
|
|||||||
@@ -50,7 +50,7 @@ func Parse(raw string) *ParsedURL {
|
|||||||
case isDeezerHost(host):
|
case isDeezerHost(host):
|
||||||
return parseDeezer(raw, parts)
|
return parseDeezer(raw, parts)
|
||||||
case isSoundcloudHost(host):
|
case isSoundcloudHost(host):
|
||||||
return parseSoundcloud(raw, parts)
|
return parseSoundcloud(raw, host, parts)
|
||||||
default:
|
default:
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
@@ -85,6 +85,13 @@ func parseTidal(raw string, parts []string) *ParsedURL {
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if isLangToken(parts[0]) {
|
||||||
|
parts = parts[1:]
|
||||||
|
}
|
||||||
|
if len(parts) < 2 {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
if parts[0] == "browse" {
|
if parts[0] == "browse" {
|
||||||
parts = parts[1:]
|
parts = parts[1:]
|
||||||
}
|
}
|
||||||
@@ -128,14 +135,20 @@ func parseDeezer(raw string, parts []string) *ParsedURL {
|
|||||||
return &ParsedURL{OriginalURL: raw, Source: "deezer", MediaType: mediaType, ID: id, Kind: KindGeneric}
|
return &ParsedURL{OriginalURL: raw, Source: "deezer", MediaType: mediaType, ID: id, Kind: KindGeneric}
|
||||||
}
|
}
|
||||||
|
|
||||||
func parseSoundcloud(raw string, parts []string) *ParsedURL {
|
func parseSoundcloud(raw, host string, parts []string) *ParsedURL {
|
||||||
if len(parts) < 1 {
|
if len(parts) < 1 {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if host == "on.soundcloud.com" {
|
||||||
|
return &ParsedURL{OriginalURL: raw, Source: "soundcloud", MediaType: "track", ID: raw, Kind: KindSoundcloud}
|
||||||
|
}
|
||||||
|
|
||||||
mediaType := "track"
|
mediaType := "track"
|
||||||
if len(parts) >= 3 && parts[1] == "sets" {
|
if len(parts) >= 3 && parts[1] == "sets" {
|
||||||
mediaType = "playlist"
|
mediaType = "playlist"
|
||||||
|
} else if len(parts) < 2 || parts[1] == "sets" {
|
||||||
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
return &ParsedURL{OriginalURL: raw, Source: "soundcloud", MediaType: mediaType, ID: raw, Kind: KindSoundcloud}
|
return &ParsedURL{OriginalURL: raw, Source: "soundcloud", MediaType: mediaType, ID: raw, Kind: KindSoundcloud}
|
||||||
@@ -169,7 +182,7 @@ func isTidalHost(host string) bool {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func isDeezerHost(host string) bool {
|
func isDeezerHost(host string) bool {
|
||||||
return host == "deezer.com"
|
return host == "deezer.com" || strings.HasSuffix(host, ".deezer.com")
|
||||||
}
|
}
|
||||||
|
|
||||||
func isSoundcloudHost(host string) bool {
|
func isSoundcloudHost(host string) bool {
|
||||||
|
|||||||
@@ -28,13 +28,19 @@ func TestQobuzAlbumURL(t *testing.T) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func TestTidalTrackURL(t *testing.T) {
|
func TestTidalTrackURL(t *testing.T) {
|
||||||
url := "https://tidal.com/browse/track/3083287"
|
inputs := []string{
|
||||||
result := Parse(url)
|
"https://tidal.com/browse/track/3083287",
|
||||||
if result == nil {
|
"https://tidal.com/us/browse/track/3083287",
|
||||||
t.Fatalf("expected parsed url")
|
"https://tidal.com/us/track/3083287",
|
||||||
}
|
}
|
||||||
if result.Source != "tidal" || result.MediaType != "track" || result.ID != "3083287" {
|
for _, url := range inputs {
|
||||||
t.Fatalf("unexpected parse result: %+v", result)
|
result := Parse(url)
|
||||||
|
if result == nil {
|
||||||
|
t.Fatalf("expected parsed url for %q", url)
|
||||||
|
}
|
||||||
|
if result.Source != "tidal" || result.MediaType != "track" || result.ID != "3083287" {
|
||||||
|
t.Fatalf("unexpected parse result for %q: %+v", url, result)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -93,6 +99,7 @@ func TestURLWithLanguageCode(t *testing.T) {
|
|||||||
"https://www.qobuz.com/gb-en/album/name/id123456",
|
"https://www.qobuz.com/gb-en/album/name/id123456",
|
||||||
"https://www.deezer.com/en/track/4195713",
|
"https://www.deezer.com/en/track/4195713",
|
||||||
"https://www.deezer.com/fr/track/4195713",
|
"https://www.deezer.com/fr/track/4195713",
|
||||||
|
"https://m.deezer.com/en/track/4195713",
|
||||||
}
|
}
|
||||||
for _, input := range inputs {
|
for _, input := range inputs {
|
||||||
if result := Parse(input); result == nil {
|
if result := Parse(input); result == nil {
|
||||||
@@ -101,6 +108,17 @@ func TestURLWithLanguageCode(t *testing.T) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestDeezerMobileHostURL(t *testing.T) {
|
||||||
|
url := "https://m.deezer.com/track/4195713"
|
||||||
|
result := Parse(url)
|
||||||
|
if result == nil {
|
||||||
|
t.Fatalf("expected parsed url")
|
||||||
|
}
|
||||||
|
if result.Source != "deezer" || result.MediaType != "track" || result.ID != "4195713" {
|
||||||
|
t.Fatalf("unexpected parse result: %+v", result)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
func TestSoundcloudURL(t *testing.T) {
|
func TestSoundcloudURL(t *testing.T) {
|
||||||
inputs := []string{
|
inputs := []string{
|
||||||
"https://soundcloud.com/artist-name/track-name",
|
"https://soundcloud.com/artist-name/track-name",
|
||||||
@@ -118,3 +136,15 @@ func TestSoundcloudURL(t *testing.T) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestSoundcloudProfileURLIsNotTrack(t *testing.T) {
|
||||||
|
if result := Parse("https://soundcloud.com/artist-name"); result != nil {
|
||||||
|
t.Fatalf("expected nil for profile url, got %+v", result)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestSoundcloudSetsRootWithoutPlaylistSlugInvalid(t *testing.T) {
|
||||||
|
if result := Parse("https://soundcloud.com/artist-name/sets"); result != nil {
|
||||||
|
t.Fatalf("expected nil for sets root url, got %+v", result)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user