From 13063c6cc545b4350a0803b702bd745bd073f23d Mon Sep 17 00:00:00 2001 From: Joren Date: Sat, 16 May 2026 17:33:40 +0200 Subject: [PATCH 1/6] add video-only mode --- cmd/canvasarchiver/main.go | 16 +++++++++------- internal/canvas/client.go | 25 ++++++++++++++++++++----- internal/panopto/downloader.go | 20 +++++++++++++++----- 3 files changed, 44 insertions(+), 17 deletions(-) diff --git a/cmd/canvasarchiver/main.go b/cmd/canvasarchiver/main.go index 18b3c87..f0dd336 100644 --- a/cmd/canvasarchiver/main.go +++ b/cmd/canvasarchiver/main.go @@ -14,6 +14,7 @@ import ( func main() { filesOnly := flag.Bool("fo", false, "Files only mode - download all files to a single directory without module structure") + videosOnly := flag.Bool("vo", false, "Videos only mode - download only Panopto videos to a single directory") me := flag.Bool("me", false, "Download all enrolled courses") moduleNumbers := flag.Bool("n", false, "Prefix modules with order numbers [1], [2], etc.") flag.Parse() @@ -28,7 +29,7 @@ func main() { } if *me { - canvasClient := canvas.NewClient(httpClient, accessToken, "", *filesOnly, *moduleNumbers) + canvasClient := canvas.NewClient(httpClient, accessToken, "", *filesOnly, *videosOnly, *moduleNumbers) courses, err := canvasClient.GetEnrolledCourses() if err != nil { fmt.Printf("Error fetching courses: %v\n", err) @@ -38,7 +39,7 @@ func main() { fmt.Printf("[+] Found %d enrolled courses\n", len(courses)) for _, course := range courses { fmt.Printf(" -> Downloading: %s (ID: %d)\n", course.Name, course.ID) - downloadCourse(httpClient, accessToken, fmt.Sprintf("%d", course.ID), *filesOnly, *moduleNumbers) + downloadCourse(httpClient, accessToken, fmt.Sprintf("%d", course.ID), *filesOnly, *videosOnly, *moduleNumbers) } return } @@ -47,11 +48,11 @@ func main() { fmt.Print("Enter Course ID: ") fmt.Scanln(&courseID) - downloadCourse(httpClient, accessToken, courseID, *filesOnly, *moduleNumbers) + downloadCourse(httpClient, accessToken, courseID, *filesOnly, *videosOnly, *moduleNumbers) } -func downloadCourse(httpClient *http.Client, accessToken, courseID string, filesOnly, moduleNumbers bool) { - canvasClient := canvas.NewClient(httpClient, accessToken, courseID, filesOnly, moduleNumbers) +func downloadCourse(httpClient *http.Client, accessToken, courseID string, filesOnly, videosOnly, moduleNumbers bool) { + canvasClient := canvas.NewClient(httpClient, accessToken, courseID, filesOnly, videosOnly, moduleNumbers) if err := canvasClient.GetCourseInfo(); err != nil { fmt.Printf("Error: %v\n", err) @@ -66,7 +67,8 @@ func downloadCourse(httpClient *http.Client, accessToken, courseID string, files canvasClient.DownloadModules(courseRoot) - if !filesOnly { - panopto.DownloadMainRecordings(httpClient, accessToken, courseID, courseRoot) + // Run recordings: always in -vo mode; skipped in -fo mode; normal otherwise. + if videosOnly || !filesOnly { + panopto.DownloadMainRecordings(httpClient, accessToken, courseID, courseRoot, videosOnly) } } diff --git a/internal/canvas/client.go b/internal/canvas/client.go index 19d443a..09bad89 100644 --- a/internal/canvas/client.go +++ b/internal/canvas/client.go @@ -22,16 +22,18 @@ type Client struct { CourseID string CourseName string FilesOnly bool + VideosOnly bool ModuleNumbers bool downloadedFiles map[string]bool } -func NewClient(httpClient *http.Client, accessToken, courseID string, filesOnly, moduleNumbers bool) *Client { +func NewClient(httpClient *http.Client, accessToken, courseID string, filesOnly, videosOnly, moduleNumbers bool) *Client { return &Client{ HTTPClient: httpClient, AccessToken: accessToken, CourseID: courseID, FilesOnly: filesOnly, + VideosOnly: videosOnly, ModuleNumbers: moduleNumbers, downloadedFiles: make(map[string]bool), } @@ -68,6 +70,10 @@ func (c *Client) GetEnrolledCourses() ([]models.Course, error) { } func (c *Client) DownloadCourseFiles(root string) { + if c.VideosOnly { + fmt.Println("\n[*] Skipping regular course files (videos only mode)") + return + } fmt.Println("\n[*] Fetching regular course files...") fReq, _ := http.NewRequest("GET", fmt.Sprintf("%s/api/v1/courses/%s/folders?per_page=100", config.BaseURL, c.CourseID), nil) @@ -164,14 +170,19 @@ func (c *Client) DownloadModules(courseRoot string) { modName = fmt.Sprintf("[%d] %s", i+1, mod.Name) } + // In videos-only mode everything goes flat into courseRoot. + // In files-only mode everything goes flat into courseRoot. + // Otherwise use the structured Modules/ path. modBaseDir := courseRoot - if !c.FilesOnly { + if !c.FilesOnly && !c.VideosOnly { modBaseDir = filepath.Join(courseRoot, "Modules", utils.Sanitize(modName)) } os.MkdirAll(modBaseDir, 0o755) - if !c.FilesOnly { + if !c.FilesOnly && !c.VideosOnly { fmt.Printf("\n[Module] %s\n", modName) + } else if c.VideosOnly { + fmt.Printf("\n[Module] %s (scanning for videos)\n", modName) } subHeaderStack := []string{} @@ -179,15 +190,16 @@ func (c *Client) DownloadModules(courseRoot string) { for _, item := range mod.Items { + // In videos-only mode always download to the flat courseRoot. targetDir := modBaseDir - if len(subHeaderStack) > 0 && !c.FilesOnly { + if len(subHeaderStack) > 0 && !c.FilesOnly && !c.VideosOnly { targetDir = filepath.Join(modBaseDir, filepath.Join(subHeaderStack...)) } os.MkdirAll(targetDir, 0o755) switch item.Type { case "SubHeader": - if c.FilesOnly { + if c.FilesOnly || c.VideosOnly { continue } currentIndent := item.Indent @@ -207,6 +219,9 @@ func (c *Client) DownloadModules(courseRoot string) { fmt.Printf("%s--- %s ---\n", indent, item.Title) case "File": + if c.VideosOnly { + continue + } c.downloadModuleFile(item, targetDir) case "ExternalTool": diff --git a/internal/panopto/downloader.go b/internal/panopto/downloader.go index 362c544..968b4f7 100644 --- a/internal/panopto/downloader.go +++ b/internal/panopto/downloader.go @@ -212,7 +212,7 @@ func DownloadVideo(httpClient *http.Client, accessToken, courseID, modDir, input } } -func DownloadMainRecordings(httpClient *http.Client, accessToken, courseID, root string) { +func DownloadMainRecordings(httpClient *http.Client, accessToken, courseID, root string, videosOnly bool) { fmt.Println("\n[*] Checking for main Panopto recordings...") jar, _ := cookiejar.New(nil) @@ -300,8 +300,18 @@ func DownloadMainRecordings(httpClient *http.Client, accessToken, courseID, root } os.WriteFile(cookieFile, []byte(cData), 0o644) - recordingsDir := filepath.Join(root, "Recordings") - os.MkdirAll(recordingsDir, 0o755) + var downloadDir string + var outputTemplate string + + if videosOnly { + // Flat: all videos go directly into course root + downloadDir = root + outputTemplate = "%(title)s.%(ext)s" + } else { + downloadDir = filepath.Join(root, "Recordings") + os.MkdirAll(downloadDir, 0o755) + outputTemplate = "%(playlist_title)s/%(title)s.%(ext)s" + } fmt.Println("[*] Starting yt-dlp download for main recordings...") @@ -310,8 +320,8 @@ func DownloadMainRecordings(httpClient *http.Client, accessToken, courseID, root cmd := exec.Command(ytCmd, "--cookies", cookieFile, "--referer", config.BaseURL+"/", - "-P", recordingsDir, - "-o", "%(playlist_title)s/%(title)s.%(ext)s", + "-P", downloadDir, + "-o", outputTemplate, decodedURL, ) cmd.Stdout = os.Stdout From 43392a4132acf86fac39f9a85e1775b3748b5237 Mon Sep 17 00:00:00 2001 From: Joren Date: Sat, 16 May 2026 19:27:36 +0200 Subject: [PATCH 2/6] fix: normalize List.aspx?folderID= to fragment form for yt-dlp Canvas stores folder links as List.aspx?folderID=X (query param). yt-dlp's PanoptoList extractor requires List.aspx#folderID="X" (fragment with quoted ID) to scope the download to that folder. Without the fragment form it downloaded the entire Panopto instance (1806 items instead of 3). Also drop --no-playlist for list URLs since they are intentional playlists, and use title/%(title)s.%(ext)s output template for them. --- internal/panopto/downloader.go | 57 +++++++++++++++++++++++++++++----- 1 file changed, 50 insertions(+), 7 deletions(-) diff --git a/internal/panopto/downloader.go b/internal/panopto/downloader.go index 968b4f7..f059785 100644 --- a/internal/panopto/downloader.go +++ b/internal/panopto/downloader.go @@ -17,6 +17,27 @@ import ( "git.directme.in/Joren/CanvasArchiver/internal/utils" ) +// normalizePanoptoURL converts the query-param form that Canvas stores +// (List.aspx?folderID=X) to the fragment form that yt-dlp's PanoptoList +// extractor understands (List.aspx#folderID="X"). Without this yt-dlp +// ignores the folder filter and downloads the entire Panopto instance. +func normalizePanoptoURL(rawURL string) string { + parsed, err := url.Parse(rawURL) + if err != nil { + return rawURL + } + if strings.Contains(parsed.Path, "List.aspx") { + folderID := parsed.Query().Get("folderID") + if folderID != "" { + // Strip query, set fragment: List.aspx#folderID="" + parsed.RawQuery = "" + parsed.Fragment = fmt.Sprintf(`folderID="%s"`, folderID) + return parsed.String() + } + } + return rawURL +} + func getYoutubeDLCommand() string { exePath, err := os.Executable() if err == nil { @@ -192,13 +213,35 @@ func DownloadVideo(httpClient *http.Client, accessToken, courseID, modDir, input fmt.Printf(" [*] Downloading video: %s\n", title) ytCmd := getYoutubeDLCommand() - cmd := exec.Command(ytCmd, - "--no-playlist", - "--cookies", cookieFile, - "--referer", config.BaseURL+"/", - "-P", modDir, - "-o", utils.Sanitize(title)+".%(ext)s", - targetURL) + + // Normalize folder URLs so yt-dlp scopes to the right folder. + normalizedURL := normalizePanoptoURL(targetURL) + + // Folder/list URLs are intentional playlists; don't pass --no-playlist. + isList := strings.Contains(normalizedURL, "List.aspx") + var outputTpl string + var args []string + if isList { + outputTpl = utils.Sanitize(title) + "/%(title)s.%(ext)s" + args = []string{ + "--cookies", cookieFile, + "--referer", config.BaseURL + "/", + "-P", modDir, + "-o", outputTpl, + normalizedURL, + } + } else { + outputTpl = utils.Sanitize(title) + ".%(ext)s" + args = []string{ + "--no-playlist", + "--cookies", cookieFile, + "--referer", config.BaseURL + "/", + "-P", modDir, + "-o", outputTpl, + normalizedURL, + } + } + cmd := exec.Command(ytCmd, args...) cmd.Stdout = os.Stdout cmd.Stderr = os.Stderr From 333e784ce92277b039df5ae28b1eb2a20de84f96 Mon Sep 17 00:00:00 2001 From: Joren Date: Sat, 16 May 2026 22:33:22 +0200 Subject: [PATCH 3/6] fix: use module_item_redirect OAuth flow for ExternalUrl Panopto items MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The Canvas app authenticates ExternalUrl items via: GET session_token?return_to=?display=borderless → GET session_url → OAuth2 confirm → POST /login/oauth2/accept → Panopto Login.aspx?code= → CookieCheck.aspx (sets Panopto cookies) Our previous code used sessionless_launch (the course-level Panopto tool) for direct Panopto links, which gave wrong/incomplete Panopto cookies. Added DownloadExternalPanoptoURL() that replicates the exact app flow. Falls back to DownloadVideo if no Panopto cookies are obtained. Both List.aspx (folder playlists) and Viewer.aspx (single videos) are handled with the correct yt-dlp flags and output templates. --- internal/canvas/client.go | 2 +- internal/panopto/downloader.go | 117 +++++++++++++++++++++++++++++++++ 2 files changed, 118 insertions(+), 1 deletion(-) diff --git a/internal/canvas/client.go b/internal/canvas/client.go index 09bad89..31c4bfd 100644 --- a/internal/canvas/client.go +++ b/internal/canvas/client.go @@ -240,7 +240,7 @@ func (c *Client) DownloadModules(courseRoot string) { indent := strings.Repeat(" ", len(subHeaderStack)+1) fmt.Printf("%s- Found direct video link: %s\n", indent, item.Title) - panopto.DownloadVideo(c.HTTPClient, c.AccessToken, c.CourseID, targetDir, item.ExternalURL, item.Title) + panopto.DownloadExternalPanoptoURL(c.HTTPClient, c.AccessToken, item.URL, item.ExternalURL, targetDir, item.Title) } case "Page": diff --git a/internal/panopto/downloader.go b/internal/panopto/downloader.go index f059785..4328b63 100644 --- a/internal/panopto/downloader.go +++ b/internal/panopto/downloader.go @@ -376,3 +376,120 @@ func DownloadMainRecordings(httpClient *http.Client, accessToken, courseID, root fmt.Println("[!] No main recordings available or handshake failed") } } + +// DownloadExternalPanoptoURL authenticates via the module_item_redirect path +// (exactly what the Canvas mobile app does for ExternalUrl items) and then +// runs yt-dlp against the given Panopto URL with the resulting cookies. +// +// moduleItemURL – item.URL (https://canvas.vub.be/api/v1/.../module_item_redirect/) +// panoptoURL – item.ExternalURL (https://vub.cloud.panopto.eu/Panopto/Pages/...) +func DownloadExternalPanoptoURL(httpClient *http.Client, accessToken, moduleItemURL, panoptoURL, modDir, title string) { + jar, _ := cookiejar.New(nil) + // Follow all redirects automatically so Login.aspx → CookieCheck.aspx sets cookies. + webClient := &http.Client{Jar: jar} + + // Step 1: exchange the module item URL for a Canvas web session URL. + returnTo := moduleItemURL + "?display=borderless" + bridgeReq, _ := http.NewRequest("GET", + config.BaseURL+"/login/session_token?return_to="+url.QueryEscape(returnTo), nil) + bridgeReq.Header.Set("Authorization", "Bearer "+accessToken) + bridgeReq.Header.Set("User-Agent", config.UserAgent) + bResp, err := httpClient.Do(bridgeReq) + if err != nil { + fmt.Printf(" [!] session_token request failed: %v\n", err) + return + } + var bridgeData struct { + SessionURL string `json:"session_url"` + } + json.NewDecoder(bResp.Body).Decode(&bridgeData) + bResp.Body.Close() + + if bridgeData.SessionURL == "" { + fmt.Printf(" [!] No session_url returned (skipping %s)\n", title) + return + } + + // Step 2: GET session URL → Canvas shows OAuth2 confirm page for Panopto. + formResp, err := webClient.Get(bridgeData.SessionURL) + if err != nil { + fmt.Printf(" [!] Failed to load OAuth confirm page: %v\n", err) + return + } + formHTMLBytes, _ := io.ReadAll(formResp.Body) + formResp.Body.Close() + formHTML := string(formHTMLBytes) + + if strings.Contains(formHTML, "U hebt geen toegang") || strings.Contains(formHTML, "You do not have access") { + fmt.Printf(" [!] Access denied. Skipping %s\n", title) + return + } + + // Step 3: POST /login/oauth2/accept → Canvas redirects to Panopto Login.aspx?code=... + // webClient follows all hops automatically, ending with Panopto setting cookies. + action := utils.ResolveAction(bridgeData.SessionURL, formHTML) + formData := utils.ExtractFormFields(formHTML) + + postReq, _ := http.NewRequest("POST", action, strings.NewReader(formData.Encode())) + postReq.Header.Set("Content-Type", "application/x-www-form-urlencoded") + postReq.Header.Set("User-Agent", config.UserAgent) + postReq.Header.Set("Origin", config.BaseURL) + postReq.Header.Set("Referer", bridgeData.SessionURL) + postResp, err := webClient.Do(postReq) + if err != nil { + fmt.Printf(" [!] OAuth accept POST failed: %v\n", err) + return + } + io.ReadAll(postResp.Body) + postResp.Body.Close() + + // webClient.Jar now holds Panopto session cookies from the CookieCheck chain. + panoptoDomain, _ := url.Parse("https://vub.cloud.panopto.eu") + cookies := webClient.Jar.Cookies(panoptoDomain) + if len(cookies) == 0 { + fmt.Printf(" [!] No Panopto cookies after auth – falling back for: %s\n", title) + DownloadVideo(httpClient, accessToken, "", modDir, panoptoURL, title) + return + } + + cookieFile := filepath.Join(modDir, ".cookies_ext.txt") + cData := "# Netscape HTTP Cookie File\n" + for _, c := range cookies { + cData += fmt.Sprintf(".vub.cloud.panopto.eu\tTRUE\t/\tTRUE\t0\t%s\t%s\n", c.Name, c.Value) + } + os.WriteFile(cookieFile, []byte(cData), 0o644) + + fmt.Printf(" [*] Downloading: %s\n", title) + + normalizedURL := normalizePanoptoURL(panoptoURL) + isList := strings.Contains(normalizedURL, "List.aspx") + + ytCmd := getYoutubeDLCommand() + var args []string + if isList { + args = []string{ + "--cookies", cookieFile, + "--referer", config.BaseURL + "/", + "-P", modDir, + "-o", utils.Sanitize(title) + "/%(title)s.%(ext)s", + normalizedURL, + } + } else { + args = []string{ + "--no-playlist", + "--cookies", cookieFile, + "--referer", config.BaseURL + "/", + "-P", modDir, + "-o", utils.Sanitize(title) + ".%(ext)s", + normalizedURL, + } + } + + cmd := exec.Command(ytCmd, args...) + cmd.Stdout = os.Stdout + cmd.Stderr = os.Stderr + if err := cmd.Run(); err != nil { + fmt.Printf(" [!] yt-dlp failed: %v\n", err) + } + os.Remove(cookieFile) +} From ea9d4dc2dc9660d35235fef21f032fe4646c9bea Mon Sep 17 00:00:00 2001 From: Joren Date: Sat, 16 May 2026 22:37:45 +0200 Subject: [PATCH 4/6] fix: use item.HTMLURL (module_item_redirect) not item.URL for ExternalUrl auth For ExternalUrl module items, the Canvas API returns url=null and html_url=.../module_item_redirect/. We were passing item.URL (empty) causing the session_token call to return no session_url. --- internal/canvas/client.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/internal/canvas/client.go b/internal/canvas/client.go index 31c4bfd..4c7a9c8 100644 --- a/internal/canvas/client.go +++ b/internal/canvas/client.go @@ -240,7 +240,7 @@ func (c *Client) DownloadModules(courseRoot string) { indent := strings.Repeat(" ", len(subHeaderStack)+1) fmt.Printf("%s- Found direct video link: %s\n", indent, item.Title) - panopto.DownloadExternalPanoptoURL(c.HTTPClient, c.AccessToken, item.URL, item.ExternalURL, targetDir, item.Title) + panopto.DownloadExternalPanoptoURL(c.HTTPClient, c.AccessToken, item.HTMLURL, item.ExternalURL, targetDir, item.Title) } case "Page": From 522a8b22f8b4b9829a7911f1d15e78a6d2c718d5 Mon Sep 17 00:00:00 2001 From: Joren Date: Sat, 16 May 2026 22:51:38 +0200 Subject: [PATCH 5/6] Downloader fixes with videos --- internal/canvas/client.go | 2 +- internal/panopto/downloader.go | 105 +++++++++++++++++++++++++++------ 2 files changed, 87 insertions(+), 20 deletions(-) diff --git a/internal/canvas/client.go b/internal/canvas/client.go index 4c7a9c8..09bad89 100644 --- a/internal/canvas/client.go +++ b/internal/canvas/client.go @@ -240,7 +240,7 @@ func (c *Client) DownloadModules(courseRoot string) { indent := strings.Repeat(" ", len(subHeaderStack)+1) fmt.Printf("%s- Found direct video link: %s\n", indent, item.Title) - panopto.DownloadExternalPanoptoURL(c.HTTPClient, c.AccessToken, item.HTMLURL, item.ExternalURL, targetDir, item.Title) + panopto.DownloadVideo(c.HTTPClient, c.AccessToken, c.CourseID, targetDir, item.ExternalURL, item.Title) } case "Page": diff --git a/internal/panopto/downloader.go b/internal/panopto/downloader.go index 4328b63..6f0f392 100644 --- a/internal/panopto/downloader.go +++ b/internal/panopto/downloader.go @@ -384,14 +384,23 @@ func DownloadMainRecordings(httpClient *http.Client, accessToken, courseID, root // moduleItemURL – item.URL (https://canvas.vub.be/api/v1/.../module_item_redirect/) // panoptoURL – item.ExternalURL (https://vub.cloud.panopto.eu/Panopto/Pages/...) func DownloadExternalPanoptoURL(httpClient *http.Client, accessToken, moduleItemURL, panoptoURL, modDir, title string) { + fmt.Printf(" [dbg] moduleItemURL: %s\n", moduleItemURL) + fmt.Printf(" [dbg] panoptoURL: %s\n", panoptoURL) + jar, _ := cookiejar.New(nil) - // Follow all redirects automatically so Login.aspx → CookieCheck.aspx sets cookies. - webClient := &http.Client{Jar: jar} + // Manual redirect following so we can track cross-domain hops correctly. + noRedirectClient := &http.Client{ + Jar: jar, + CheckRedirect: func(req *http.Request, via []*http.Request) error { + return http.ErrUseLastResponse + }, + } // Step 1: exchange the module item URL for a Canvas web session URL. returnTo := moduleItemURL + "?display=borderless" - bridgeReq, _ := http.NewRequest("GET", - config.BaseURL+"/login/session_token?return_to="+url.QueryEscape(returnTo), nil) + sessionTokenURL := config.BaseURL + "/login/session_token?return_to=" + url.QueryEscape(returnTo) + fmt.Printf(" [dbg] session_token URL: %s\n", sessionTokenURL) + bridgeReq, _ := http.NewRequest("GET", sessionTokenURL, nil) bridgeReq.Header.Set("Authorization", "Bearer "+accessToken) bridgeReq.Header.Set("User-Agent", config.UserAgent) bResp, err := httpClient.Do(bridgeReq) @@ -399,26 +408,51 @@ func DownloadExternalPanoptoURL(httpClient *http.Client, accessToken, moduleItem fmt.Printf(" [!] session_token request failed: %v\n", err) return } + rawBody, _ := io.ReadAll(bResp.Body) + bResp.Body.Close() + fmt.Printf(" [dbg] session_token response (%d): %s\n", bResp.StatusCode, string(rawBody)) + var bridgeData struct { SessionURL string `json:"session_url"` } - json.NewDecoder(bResp.Body).Decode(&bridgeData) - bResp.Body.Close() + json.Unmarshal(rawBody, &bridgeData) if bridgeData.SessionURL == "" { fmt.Printf(" [!] No session_url returned (skipping %s)\n", title) return } + fmt.Printf(" [dbg] session_url: %s\n", bridgeData.SessionURL) // Step 2: GET session URL → Canvas shows OAuth2 confirm page for Panopto. - formResp, err := webClient.Get(bridgeData.SessionURL) - if err != nil { - fmt.Printf(" [!] Failed to load OAuth confirm page: %v\n", err) - return + // We follow redirects manually so cross-domain hops (canvas→panopto) work correctly. + currentURL := bridgeData.SessionURL + var formHTML string + var formFinalURL string + for hop := 0; hop < 10; hop++ { + hopReq, _ := http.NewRequest("GET", currentURL, nil) + hopReq.Header.Set("User-Agent", config.UserAgent) + hopResp, err := noRedirectClient.Do(hopReq) + if err != nil { + fmt.Printf(" [!] Failed hop %d to %s: %v\n", hop, currentURL, err) + return + } + body, _ := io.ReadAll(hopResp.Body) + hopResp.Body.Close() + fmt.Printf(" [dbg] hop %d: status=%d url=%s body=%d\n", hop, hopResp.StatusCode, currentURL, len(body)) + if hopResp.StatusCode == 301 || hopResp.StatusCode == 302 || hopResp.StatusCode == 303 { + loc, _ := hopResp.Location() + if loc == nil { + fmt.Printf(" [!] Redirect with no Location header\n") + return + } + currentURL = loc.String() + continue + } + formHTML = string(body) + formFinalURL = currentURL + break } - formHTMLBytes, _ := io.ReadAll(formResp.Body) - formResp.Body.Close() - formHTML := string(formHTMLBytes) + fmt.Printf(" [dbg] OAuth page final URL: %s, length: %d\n", formFinalURL, len(formHTML)) if strings.Contains(formHTML, "U hebt geen toegang") || strings.Contains(formHTML, "You do not have access") { fmt.Printf(" [!] Access denied. Skipping %s\n", title) @@ -427,25 +461,58 @@ func DownloadExternalPanoptoURL(httpClient *http.Client, accessToken, moduleItem // Step 3: POST /login/oauth2/accept → Canvas redirects to Panopto Login.aspx?code=... // webClient follows all hops automatically, ending with Panopto setting cookies. - action := utils.ResolveAction(bridgeData.SessionURL, formHTML) + action := utils.ResolveAction(formFinalURL, formHTML) formData := utils.ExtractFormFields(formHTML) + fmt.Printf(" [dbg] POST action: %s\n", action) postReq, _ := http.NewRequest("POST", action, strings.NewReader(formData.Encode())) postReq.Header.Set("Content-Type", "application/x-www-form-urlencoded") postReq.Header.Set("User-Agent", config.UserAgent) postReq.Header.Set("Origin", config.BaseURL) - postReq.Header.Set("Referer", bridgeData.SessionURL) - postResp, err := webClient.Do(postReq) + postReq.Header.Set("Referer", formFinalURL) + postResp, err := noRedirectClient.Do(postReq) if err != nil { fmt.Printf(" [!] OAuth accept POST failed: %v\n", err) return } - io.ReadAll(postResp.Body) + postBody, _ := io.ReadAll(postResp.Body) postResp.Body.Close() + fmt.Printf(" [dbg] POST response status: %d, body len: %d\n", postResp.StatusCode, len(postBody)) - // webClient.Jar now holds Panopto session cookies from the CookieCheck chain. + // Now follow the redirect chain from the POST (canvas → panopto Login.aspx → CookieCheck). + if postResp.StatusCode == 302 || postResp.StatusCode == 303 { + currentURL2 := "" + if loc, _ := postResp.Location(); loc != nil { + currentURL2 = loc.String() + } + for hop := 0; hop < 10 && currentURL2 != ""; hop++ { + hopReq, _ := http.NewRequest("GET", currentURL2, nil) + hopReq.Header.Set("User-Agent", config.UserAgent) + hopResp, err := noRedirectClient.Do(hopReq) + if err != nil { + fmt.Printf(" [dbg] post-redirect hop %d error: %v\n", hop, err) + break + } + body2, _ := io.ReadAll(hopResp.Body) + hopResp.Body.Close() + fmt.Printf(" [dbg] post-redirect hop %d: status=%d url=%s body=%d\n", hop, hopResp.StatusCode, currentURL2, len(body2)) + if hopResp.StatusCode == 301 || hopResp.StatusCode == 302 || hopResp.StatusCode == 303 { + if loc, _ := hopResp.Location(); loc != nil { + currentURL2 = loc.String() + continue + } + } + break + } + } + + // noRedirectClient.Jar now holds Panopto session cookies from the CookieCheck chain. panoptoDomain, _ := url.Parse("https://vub.cloud.panopto.eu") - cookies := webClient.Jar.Cookies(panoptoDomain) + cookies := noRedirectClient.Jar.Cookies(panoptoDomain) + fmt.Printf(" [dbg] Panopto cookies: %d\n", len(cookies)) + for _, c := range cookies { + fmt.Printf(" [dbg] cookie: %s=%s\n", c.Name, c.Value[:min(20, len(c.Value))]) + } if len(cookies) == 0 { fmt.Printf(" [!] No Panopto cookies after auth – falling back for: %s\n", title) DownloadVideo(httpClient, accessToken, "", modDir, panoptoURL, title) From 2776d057cd926c87e617c001e9db636d78fd9191 Mon Sep 17 00:00:00 2001 From: Joren Date: Sat, 16 May 2026 22:53:11 +0200 Subject: [PATCH 6/6] fix: revert ExternalUrl auth to DownloadVideo, remove dead debug code MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit DownloadVideo already handles the Panopto Login.aspx canvas auth form correctly through its multi-step form submission logic. The real fix for ExternalUrl Panopto folder links was the URL normalization (List.aspx?folderID= → List.aspx#folderID="...") already in DownloadVideo. Remove DownloadExternalPanoptoURL (dead code) and all debug prints. Update README with -vo flag documentation. --- README.md | 12 ++- internal/panopto/downloader.go | 183 --------------------------------- 2 files changed, 10 insertions(+), 185 deletions(-) diff --git a/README.md b/README.md index 4a64b2c..566791b 100644 --- a/README.md +++ b/README.md @@ -21,11 +21,16 @@ go build -o canvasarchiver ./cmd/canvasarchiver ./canvasarchiver ``` - Or for files-only mode: + Or for files-only mode (all files flat, no videos): ```bash ./canvasarchiver -fo ``` + Or for videos-only mode (all Panopto videos flat, no files): + ```bash + ./canvasarchiver -vo + ``` + 2. On first run, you'll be prompted to authenticate: - Visit the provided OAuth URL - Authorize the application @@ -38,11 +43,14 @@ go build -o canvasarchiver ./cmd/canvasarchiver - Module content (to `Modules/`) - Panopto recordings (to `Recordings/`) + In `-vo` mode, only videos are downloaded — all into the course root directory (no subdirectories). + ### Flags | Flag | Description | |------|-------------| -| `-fo` | Files only mode - download all files to a single directory without module structure | +| `-fo` | Files only — download all files flat into one directory; skips videos and module structure | +| `-vo` | Videos only — scan recordings and all module video items, download everything flat into one directory; skips regular files | | `-me` | Download all enrolled courses | | `-n` | Prefix modules with order numbers `[1]`, `[2]`, etc. | diff --git a/internal/panopto/downloader.go b/internal/panopto/downloader.go index 6f0f392..7ef90c2 100644 --- a/internal/panopto/downloader.go +++ b/internal/panopto/downloader.go @@ -377,186 +377,3 @@ func DownloadMainRecordings(httpClient *http.Client, accessToken, courseID, root } } -// DownloadExternalPanoptoURL authenticates via the module_item_redirect path -// (exactly what the Canvas mobile app does for ExternalUrl items) and then -// runs yt-dlp against the given Panopto URL with the resulting cookies. -// -// moduleItemURL – item.URL (https://canvas.vub.be/api/v1/.../module_item_redirect/) -// panoptoURL – item.ExternalURL (https://vub.cloud.panopto.eu/Panopto/Pages/...) -func DownloadExternalPanoptoURL(httpClient *http.Client, accessToken, moduleItemURL, panoptoURL, modDir, title string) { - fmt.Printf(" [dbg] moduleItemURL: %s\n", moduleItemURL) - fmt.Printf(" [dbg] panoptoURL: %s\n", panoptoURL) - - jar, _ := cookiejar.New(nil) - // Manual redirect following so we can track cross-domain hops correctly. - noRedirectClient := &http.Client{ - Jar: jar, - CheckRedirect: func(req *http.Request, via []*http.Request) error { - return http.ErrUseLastResponse - }, - } - - // Step 1: exchange the module item URL for a Canvas web session URL. - returnTo := moduleItemURL + "?display=borderless" - sessionTokenURL := config.BaseURL + "/login/session_token?return_to=" + url.QueryEscape(returnTo) - fmt.Printf(" [dbg] session_token URL: %s\n", sessionTokenURL) - bridgeReq, _ := http.NewRequest("GET", sessionTokenURL, nil) - bridgeReq.Header.Set("Authorization", "Bearer "+accessToken) - bridgeReq.Header.Set("User-Agent", config.UserAgent) - bResp, err := httpClient.Do(bridgeReq) - if err != nil { - fmt.Printf(" [!] session_token request failed: %v\n", err) - return - } - rawBody, _ := io.ReadAll(bResp.Body) - bResp.Body.Close() - fmt.Printf(" [dbg] session_token response (%d): %s\n", bResp.StatusCode, string(rawBody)) - - var bridgeData struct { - SessionURL string `json:"session_url"` - } - json.Unmarshal(rawBody, &bridgeData) - - if bridgeData.SessionURL == "" { - fmt.Printf(" [!] No session_url returned (skipping %s)\n", title) - return - } - fmt.Printf(" [dbg] session_url: %s\n", bridgeData.SessionURL) - - // Step 2: GET session URL → Canvas shows OAuth2 confirm page for Panopto. - // We follow redirects manually so cross-domain hops (canvas→panopto) work correctly. - currentURL := bridgeData.SessionURL - var formHTML string - var formFinalURL string - for hop := 0; hop < 10; hop++ { - hopReq, _ := http.NewRequest("GET", currentURL, nil) - hopReq.Header.Set("User-Agent", config.UserAgent) - hopResp, err := noRedirectClient.Do(hopReq) - if err != nil { - fmt.Printf(" [!] Failed hop %d to %s: %v\n", hop, currentURL, err) - return - } - body, _ := io.ReadAll(hopResp.Body) - hopResp.Body.Close() - fmt.Printf(" [dbg] hop %d: status=%d url=%s body=%d\n", hop, hopResp.StatusCode, currentURL, len(body)) - if hopResp.StatusCode == 301 || hopResp.StatusCode == 302 || hopResp.StatusCode == 303 { - loc, _ := hopResp.Location() - if loc == nil { - fmt.Printf(" [!] Redirect with no Location header\n") - return - } - currentURL = loc.String() - continue - } - formHTML = string(body) - formFinalURL = currentURL - break - } - fmt.Printf(" [dbg] OAuth page final URL: %s, length: %d\n", formFinalURL, len(formHTML)) - - if strings.Contains(formHTML, "U hebt geen toegang") || strings.Contains(formHTML, "You do not have access") { - fmt.Printf(" [!] Access denied. Skipping %s\n", title) - return - } - - // Step 3: POST /login/oauth2/accept → Canvas redirects to Panopto Login.aspx?code=... - // webClient follows all hops automatically, ending with Panopto setting cookies. - action := utils.ResolveAction(formFinalURL, formHTML) - formData := utils.ExtractFormFields(formHTML) - fmt.Printf(" [dbg] POST action: %s\n", action) - - postReq, _ := http.NewRequest("POST", action, strings.NewReader(formData.Encode())) - postReq.Header.Set("Content-Type", "application/x-www-form-urlencoded") - postReq.Header.Set("User-Agent", config.UserAgent) - postReq.Header.Set("Origin", config.BaseURL) - postReq.Header.Set("Referer", formFinalURL) - postResp, err := noRedirectClient.Do(postReq) - if err != nil { - fmt.Printf(" [!] OAuth accept POST failed: %v\n", err) - return - } - postBody, _ := io.ReadAll(postResp.Body) - postResp.Body.Close() - fmt.Printf(" [dbg] POST response status: %d, body len: %d\n", postResp.StatusCode, len(postBody)) - - // Now follow the redirect chain from the POST (canvas → panopto Login.aspx → CookieCheck). - if postResp.StatusCode == 302 || postResp.StatusCode == 303 { - currentURL2 := "" - if loc, _ := postResp.Location(); loc != nil { - currentURL2 = loc.String() - } - for hop := 0; hop < 10 && currentURL2 != ""; hop++ { - hopReq, _ := http.NewRequest("GET", currentURL2, nil) - hopReq.Header.Set("User-Agent", config.UserAgent) - hopResp, err := noRedirectClient.Do(hopReq) - if err != nil { - fmt.Printf(" [dbg] post-redirect hop %d error: %v\n", hop, err) - break - } - body2, _ := io.ReadAll(hopResp.Body) - hopResp.Body.Close() - fmt.Printf(" [dbg] post-redirect hop %d: status=%d url=%s body=%d\n", hop, hopResp.StatusCode, currentURL2, len(body2)) - if hopResp.StatusCode == 301 || hopResp.StatusCode == 302 || hopResp.StatusCode == 303 { - if loc, _ := hopResp.Location(); loc != nil { - currentURL2 = loc.String() - continue - } - } - break - } - } - - // noRedirectClient.Jar now holds Panopto session cookies from the CookieCheck chain. - panoptoDomain, _ := url.Parse("https://vub.cloud.panopto.eu") - cookies := noRedirectClient.Jar.Cookies(panoptoDomain) - fmt.Printf(" [dbg] Panopto cookies: %d\n", len(cookies)) - for _, c := range cookies { - fmt.Printf(" [dbg] cookie: %s=%s\n", c.Name, c.Value[:min(20, len(c.Value))]) - } - if len(cookies) == 0 { - fmt.Printf(" [!] No Panopto cookies after auth – falling back for: %s\n", title) - DownloadVideo(httpClient, accessToken, "", modDir, panoptoURL, title) - return - } - - cookieFile := filepath.Join(modDir, ".cookies_ext.txt") - cData := "# Netscape HTTP Cookie File\n" - for _, c := range cookies { - cData += fmt.Sprintf(".vub.cloud.panopto.eu\tTRUE\t/\tTRUE\t0\t%s\t%s\n", c.Name, c.Value) - } - os.WriteFile(cookieFile, []byte(cData), 0o644) - - fmt.Printf(" [*] Downloading: %s\n", title) - - normalizedURL := normalizePanoptoURL(panoptoURL) - isList := strings.Contains(normalizedURL, "List.aspx") - - ytCmd := getYoutubeDLCommand() - var args []string - if isList { - args = []string{ - "--cookies", cookieFile, - "--referer", config.BaseURL + "/", - "-P", modDir, - "-o", utils.Sanitize(title) + "/%(title)s.%(ext)s", - normalizedURL, - } - } else { - args = []string{ - "--no-playlist", - "--cookies", cookieFile, - "--referer", config.BaseURL + "/", - "-P", modDir, - "-o", utils.Sanitize(title) + ".%(ext)s", - normalizedURL, - } - } - - cmd := exec.Command(ytCmd, args...) - cmd.Stdout = os.Stdout - cmd.Stderr = os.Stderr - if err := cmd.Run(); err != nil { - fmt.Printf(" [!] yt-dlp failed: %v\n", err) - } - os.Remove(cookieFile) -}