Files
CanvasArchiver/internal/panopto/downloader.go
2026-05-16 22:51:38 +02:00

563 lines
18 KiB
Go
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
package panopto
import (
"encoding/json"
"fmt"
"io"
"net/http"
"net/http/cookiejar"
"net/url"
"os"
"os/exec"
"path/filepath"
"regexp"
"strings"
"git.directme.in/Joren/CanvasArchiver/internal/config"
"git.directme.in/Joren/CanvasArchiver/internal/utils"
)
// normalizePanoptoURL converts the query-param form that Canvas stores
// (List.aspx?folderID=X) to the fragment form that yt-dlp's PanoptoList
// extractor understands (List.aspx#folderID="X"). Without this yt-dlp
// ignores the folder filter and downloads the entire Panopto instance.
func normalizePanoptoURL(rawURL string) string {
parsed, err := url.Parse(rawURL)
if err != nil {
return rawURL
}
if strings.Contains(parsed.Path, "List.aspx") {
folderID := parsed.Query().Get("folderID")
if folderID != "" {
// Strip query, set fragment: List.aspx#folderID="<id>"
parsed.RawQuery = ""
parsed.Fragment = fmt.Sprintf(`folderID="%s"`, folderID)
return parsed.String()
}
}
return rawURL
}
func getYoutubeDLCommand() string {
exePath, err := os.Executable()
if err == nil {
dir := filepath.Dir(exePath)
localYtDlp := filepath.Join(dir, "yt-dlp.exe")
if _, err := os.Stat(localYtDlp); err == nil {
return localYtDlp
}
if _, err := os.Stat("yt-dlp.exe"); err == nil {
abs, _ := filepath.Abs("yt-dlp.exe")
return abs
}
}
return "yt-dlp"
}
func DownloadVideo(httpClient *http.Client, accessToken, courseID, modDir, inputURL, title string) {
jar, _ := cookiejar.New(nil)
panoptoClient := &http.Client{
Jar: jar,
CheckRedirect: func(req *http.Request, via []*http.Request) error {
return http.ErrUseLastResponse
},
}
var launchURL string
isDirectLink := false
if strings.Contains(inputURL, "/api/v1/") {
launchURL = inputURL
} else {
isDirectLink = true
launchURL = fmt.Sprintf("%s/api/v1/courses/%s/external_tools/sessionless_launch?id=%s&launch_type=course_navigation",
config.BaseURL, courseID, config.PanoptoID)
}
req, _ := http.NewRequest("GET", launchURL, nil)
req.Header.Set("Authorization", "Bearer "+accessToken)
req.Header.Set("User-Agent", config.UserAgent)
resp, err := httpClient.Do(req)
if err != nil {
fmt.Printf(" [!] Failed to get launch URL: %v\n", err)
return
}
var launchData struct {
URL string `json:"url"`
}
json.NewDecoder(resp.Body).Decode(&launchData)
resp.Body.Close()
if launchData.URL == "" {
fmt.Printf(" [!] No launch URL found (Video skipped)\n")
return
}
bridgeReq, _ := http.NewRequest("GET",
config.BaseURL+"/login/session_token?return_to="+url.QueryEscape(launchData.URL), nil)
bridgeReq.Header.Set("Authorization", "Bearer "+accessToken)
bridgeReq.Header.Set("User-Agent", config.UserAgent)
bResp, _ := httpClient.Do(bridgeReq)
var bridgeData struct {
SessionURL string `json:"session_url"`
}
json.NewDecoder(bResp.Body).Decode(&bridgeData)
bResp.Body.Close()
formResp, err := httpClient.Get(bridgeData.SessionURL)
if err != nil {
return
}
formHTMLBytes, _ := io.ReadAll(formResp.Body)
formResp.Body.Close()
formHTML := string(formHTMLBytes)
if strings.Contains(formHTML, "U hebt geen toegang") || strings.Contains(formHTML, "You do not have access") {
fmt.Printf(" [!] Access denied by Panopto (U hebt geen toegang). Skipping.\n")
return
}
action := utils.ResolveAction(bridgeData.SessionURL, formHTML)
formData := utils.ExtractFormFields(formHTML)
pReq, _ := http.NewRequest("POST", action, strings.NewReader(formData.Encode()))
pReq.Header.Set("Content-Type", "application/x-www-form-urlencoded")
pReq.Header.Set("User-Agent", config.UserAgent)
pReq.Header.Set("Origin", config.BaseURL)
pReq.Header.Set("Referer", config.BaseURL+"/")
pResp, err := panoptoClient.Do(pReq)
if err != nil {
fmt.Printf(" [!] Panopto POST failed: %v\n", err)
return
}
pHtml, _ := io.ReadAll(pResp.Body)
pResp.Body.Close()
var finalURL string
redirectRegex := regexp.MustCompile(`window\.location\.replace\('([^']+)'\)`)
match := redirectRegex.FindStringSubmatch(string(pHtml))
if len(match) > 1 {
finalURL, _ = url.QueryUnescape(strings.ReplaceAll(match[1], `\x`, "%"))
} else {
action2 := utils.ResolveAction(action, string(pHtml))
validData := utils.ExtractFormFields(string(pHtml))
validData.Set("__EVENTTARGET", "checkedLtiPostMessage")
validData.Set("__EVENTARGUMENT", "")
validData.Set("checkedLtiPostMessage", "true")
validData.Set("ltiPostMessage", "")
finalReq, _ := http.NewRequest("POST", action2, strings.NewReader(validData.Encode()))
finalReq.Header.Set("Content-Type", "application/x-www-form-urlencoded")
finalReq.Header.Set("User-Agent", config.UserAgent)
finalReq.Header.Set("Origin", "https://vub.cloud.panopto.eu")
finalReq.Header.Set("Referer", action)
finalResp, err := panoptoClient.Do(finalReq)
if err == nil {
finalHTMLBytes, _ := io.ReadAll(finalResp.Body)
finalHTML := string(finalHTMLBytes)
finalResp.Body.Close()
if finalResp.StatusCode == 302 || finalResp.StatusCode == 303 {
loc, _ := finalResp.Location()
if loc != nil {
finalURL = loc.String()
}
} else {
finalMatch := redirectRegex.FindStringSubmatch(finalHTML)
if len(finalMatch) > 1 {
finalURL, _ = url.QueryUnescape(strings.ReplaceAll(finalMatch[1], `\x`, "%"))
}
}
}
}
// This is for making sure yt-dlp does not auto-start downloading all videos, when access to a hyperlink is denied
if finalURL != "" && !strings.Contains(finalURL, "NonFatalError") {
targetURL := finalURL
if isDirectLink {
targetURL = inputURL
checkReq, _ := http.NewRequest("GET", targetURL, nil)
checkReq.Header.Set("User-Agent", config.UserAgent)
checkResp, err := panoptoClient.Do(checkReq)
if err == nil {
checkResp.Body.Close()
if checkResp.StatusCode == http.StatusFound || checkResp.StatusCode == http.StatusSeeOther {
loc, _ := checkResp.Location()
if loc != nil && (strings.Contains(loc.String(), "Login.aspx") || strings.Contains(loc.String(), "Auth")) {
fmt.Printf(" [!] Video inaccessible (redirects to Login). Skipping to prevent mass download.\n")
return
}
}
}
}
cookieFile := filepath.Join(modDir, ".cookies_temp.txt")
cData := "# Netscape HTTP Cookie File\n"
panoptoDomain, _ := url.Parse("https://vub.cloud.panopto.eu")
for _, cookie := range panoptoClient.Jar.Cookies(panoptoDomain) {
cData += fmt.Sprintf(".vub.cloud.panopto.eu\tTRUE\t/\tTRUE\t0\t%s\t%s\n", cookie.Name, cookie.Value)
}
os.WriteFile(cookieFile, []byte(cData), 0o644)
fmt.Printf(" [*] Downloading video: %s\n", title)
ytCmd := getYoutubeDLCommand()
// Normalize folder URLs so yt-dlp scopes to the right folder.
normalizedURL := normalizePanoptoURL(targetURL)
// Folder/list URLs are intentional playlists; don't pass --no-playlist.
isList := strings.Contains(normalizedURL, "List.aspx")
var outputTpl string
var args []string
if isList {
outputTpl = utils.Sanitize(title) + "/%(title)s.%(ext)s"
args = []string{
"--cookies", cookieFile,
"--referer", config.BaseURL + "/",
"-P", modDir,
"-o", outputTpl,
normalizedURL,
}
} else {
outputTpl = utils.Sanitize(title) + ".%(ext)s"
args = []string{
"--no-playlist",
"--cookies", cookieFile,
"--referer", config.BaseURL + "/",
"-P", modDir,
"-o", outputTpl,
normalizedURL,
}
}
cmd := exec.Command(ytCmd, args...)
cmd.Stdout = os.Stdout
cmd.Stderr = os.Stderr
if err := cmd.Run(); err != nil {
fmt.Printf(" [!] yt-dlp failed: %v\n", err)
}
os.Remove(cookieFile)
} else {
fmt.Printf(" [!] Handshake failed for: %s\n", title)
}
}
func DownloadMainRecordings(httpClient *http.Client, accessToken, courseID, root string, videosOnly bool) {
fmt.Println("\n[*] Checking for main Panopto recordings...")
jar, _ := cookiejar.New(nil)
mainClient := &http.Client{Jar: jar}
req, _ := http.NewRequest("GET",
fmt.Sprintf("%s/api/v1/courses/%s/external_tools/sessionless_launch?id=%s&launch_type=course_navigation",
config.BaseURL, courseID, config.PanoptoID), nil)
req.Header.Set("Authorization", "Bearer "+accessToken)
req.Header.Set("User-Agent", config.UserAgent)
resp, _ := httpClient.Do(req)
var launchData struct {
URL string `json:"url"`
}
json.NewDecoder(resp.Body).Decode(&launchData)
resp.Body.Close()
if launchData.URL == "" {
fmt.Println("[!] No main Panopto recordings found")
return
}
bridgeReq, _ := http.NewRequest("GET",
config.BaseURL+"/login/session_token?return_to="+url.QueryEscape(launchData.URL), nil)
bridgeReq.Header.Set("Authorization", "Bearer "+accessToken)
bridgeReq.Header.Set("User-Agent", config.UserAgent)
bResp, _ := httpClient.Do(bridgeReq)
var bridgeData struct {
SessionURL string `json:"session_url"`
}
json.NewDecoder(bResp.Body).Decode(&bridgeData)
bResp.Body.Close()
formResp, _ := mainClient.Get(bridgeData.SessionURL)
formHTML, _ := io.ReadAll(formResp.Body)
formResp.Body.Close()
action := utils.ResolveAction(bridgeData.SessionURL, string(formHTML))
formData := utils.ExtractFormFields(string(formHTML))
pResp, _ := mainClient.PostForm(action, formData)
pHtml, _ := io.ReadAll(pResp.Body)
pResp.Body.Close()
redirectRegex := regexp.MustCompile(`window\.location\.replace\('([^']+)'\)`)
finalMatch := redirectRegex.FindStringSubmatch(string(pHtml))
var finalHTML string
var finalURL string
if len(finalMatch) > 1 {
finalURL = finalMatch[1]
} else {
validData := utils.ExtractFormFields(string(pHtml))
validData.Set("__EVENTTARGET", "checkedLtiPostMessage")
validData.Set("__EVENTARGUMENT", "")
validData.Set("checkedLtiPostMessage", "true")
validData.Set("ltiPostMessage", "")
finalResp, _ := mainClient.PostForm(action, validData)
finalHTMLBytes, _ := io.ReadAll(finalResp.Body)
finalHTML = string(finalHTMLBytes)
finalResp.Body.Close()
finalMatch = redirectRegex.FindStringSubmatch(finalHTML)
if len(finalMatch) > 1 {
finalURL = finalMatch[1]
}
}
if finalURL != "" {
cleanURL := strings.ReplaceAll(finalURL, `\x`, "%")
decodedURL, err := url.QueryUnescape(cleanURL)
if err != nil {
decodedURL = cleanURL
}
fmt.Printf("[+] Panopto URL Found: %s\n", decodedURL)
cookieFile := filepath.Join(root, ".cookies_main.txt")
cData := "# Netscape HTTP Cookie File\n"
panoptoURL, _ := url.Parse("https://vub.cloud.panopto.eu")
for _, cookie := range mainClient.Jar.Cookies(panoptoURL) {
cData += fmt.Sprintf(".vub.cloud.panopto.eu\tTRUE\t/\tTRUE\t0\t%s\t%s\n", cookie.Name, cookie.Value)
}
os.WriteFile(cookieFile, []byte(cData), 0o644)
var downloadDir string
var outputTemplate string
if videosOnly {
// Flat: all videos go directly into course root
downloadDir = root
outputTemplate = "%(title)s.%(ext)s"
} else {
downloadDir = filepath.Join(root, "Recordings")
os.MkdirAll(downloadDir, 0o755)
outputTemplate = "%(playlist_title)s/%(title)s.%(ext)s"
}
fmt.Println("[*] Starting yt-dlp download for main recordings...")
ytCmd := getYoutubeDLCommand()
cmd := exec.Command(ytCmd,
"--cookies", cookieFile,
"--referer", config.BaseURL+"/",
"-P", downloadDir,
"-o", outputTemplate,
decodedURL,
)
cmd.Stdout = os.Stdout
cmd.Stderr = os.Stderr
cmd.Run()
os.Remove(cookieFile)
} else {
fmt.Println("[!] No main recordings available or handshake failed")
}
}
// DownloadExternalPanoptoURL authenticates via the module_item_redirect path
// (exactly what the Canvas mobile app does for ExternalUrl items) and then
// runs yt-dlp against the given Panopto URL with the resulting cookies.
//
// moduleItemURL item.URL (https://canvas.vub.be/api/v1/.../module_item_redirect/<id>)
// panoptoURL item.ExternalURL (https://vub.cloud.panopto.eu/Panopto/Pages/...)
func DownloadExternalPanoptoURL(httpClient *http.Client, accessToken, moduleItemURL, panoptoURL, modDir, title string) {
fmt.Printf(" [dbg] moduleItemURL: %s\n", moduleItemURL)
fmt.Printf(" [dbg] panoptoURL: %s\n", panoptoURL)
jar, _ := cookiejar.New(nil)
// Manual redirect following so we can track cross-domain hops correctly.
noRedirectClient := &http.Client{
Jar: jar,
CheckRedirect: func(req *http.Request, via []*http.Request) error {
return http.ErrUseLastResponse
},
}
// Step 1: exchange the module item URL for a Canvas web session URL.
returnTo := moduleItemURL + "?display=borderless"
sessionTokenURL := config.BaseURL + "/login/session_token?return_to=" + url.QueryEscape(returnTo)
fmt.Printf(" [dbg] session_token URL: %s\n", sessionTokenURL)
bridgeReq, _ := http.NewRequest("GET", sessionTokenURL, nil)
bridgeReq.Header.Set("Authorization", "Bearer "+accessToken)
bridgeReq.Header.Set("User-Agent", config.UserAgent)
bResp, err := httpClient.Do(bridgeReq)
if err != nil {
fmt.Printf(" [!] session_token request failed: %v\n", err)
return
}
rawBody, _ := io.ReadAll(bResp.Body)
bResp.Body.Close()
fmt.Printf(" [dbg] session_token response (%d): %s\n", bResp.StatusCode, string(rawBody))
var bridgeData struct {
SessionURL string `json:"session_url"`
}
json.Unmarshal(rawBody, &bridgeData)
if bridgeData.SessionURL == "" {
fmt.Printf(" [!] No session_url returned (skipping %s)\n", title)
return
}
fmt.Printf(" [dbg] session_url: %s\n", bridgeData.SessionURL)
// Step 2: GET session URL → Canvas shows OAuth2 confirm page for Panopto.
// We follow redirects manually so cross-domain hops (canvas→panopto) work correctly.
currentURL := bridgeData.SessionURL
var formHTML string
var formFinalURL string
for hop := 0; hop < 10; hop++ {
hopReq, _ := http.NewRequest("GET", currentURL, nil)
hopReq.Header.Set("User-Agent", config.UserAgent)
hopResp, err := noRedirectClient.Do(hopReq)
if err != nil {
fmt.Printf(" [!] Failed hop %d to %s: %v\n", hop, currentURL, err)
return
}
body, _ := io.ReadAll(hopResp.Body)
hopResp.Body.Close()
fmt.Printf(" [dbg] hop %d: status=%d url=%s body=%d\n", hop, hopResp.StatusCode, currentURL, len(body))
if hopResp.StatusCode == 301 || hopResp.StatusCode == 302 || hopResp.StatusCode == 303 {
loc, _ := hopResp.Location()
if loc == nil {
fmt.Printf(" [!] Redirect with no Location header\n")
return
}
currentURL = loc.String()
continue
}
formHTML = string(body)
formFinalURL = currentURL
break
}
fmt.Printf(" [dbg] OAuth page final URL: %s, length: %d\n", formFinalURL, len(formHTML))
if strings.Contains(formHTML, "U hebt geen toegang") || strings.Contains(formHTML, "You do not have access") {
fmt.Printf(" [!] Access denied. Skipping %s\n", title)
return
}
// Step 3: POST /login/oauth2/accept → Canvas redirects to Panopto Login.aspx?code=...
// webClient follows all hops automatically, ending with Panopto setting cookies.
action := utils.ResolveAction(formFinalURL, formHTML)
formData := utils.ExtractFormFields(formHTML)
fmt.Printf(" [dbg] POST action: %s\n", action)
postReq, _ := http.NewRequest("POST", action, strings.NewReader(formData.Encode()))
postReq.Header.Set("Content-Type", "application/x-www-form-urlencoded")
postReq.Header.Set("User-Agent", config.UserAgent)
postReq.Header.Set("Origin", config.BaseURL)
postReq.Header.Set("Referer", formFinalURL)
postResp, err := noRedirectClient.Do(postReq)
if err != nil {
fmt.Printf(" [!] OAuth accept POST failed: %v\n", err)
return
}
postBody, _ := io.ReadAll(postResp.Body)
postResp.Body.Close()
fmt.Printf(" [dbg] POST response status: %d, body len: %d\n", postResp.StatusCode, len(postBody))
// Now follow the redirect chain from the POST (canvas → panopto Login.aspx → CookieCheck).
if postResp.StatusCode == 302 || postResp.StatusCode == 303 {
currentURL2 := ""
if loc, _ := postResp.Location(); loc != nil {
currentURL2 = loc.String()
}
for hop := 0; hop < 10 && currentURL2 != ""; hop++ {
hopReq, _ := http.NewRequest("GET", currentURL2, nil)
hopReq.Header.Set("User-Agent", config.UserAgent)
hopResp, err := noRedirectClient.Do(hopReq)
if err != nil {
fmt.Printf(" [dbg] post-redirect hop %d error: %v\n", hop, err)
break
}
body2, _ := io.ReadAll(hopResp.Body)
hopResp.Body.Close()
fmt.Printf(" [dbg] post-redirect hop %d: status=%d url=%s body=%d\n", hop, hopResp.StatusCode, currentURL2, len(body2))
if hopResp.StatusCode == 301 || hopResp.StatusCode == 302 || hopResp.StatusCode == 303 {
if loc, _ := hopResp.Location(); loc != nil {
currentURL2 = loc.String()
continue
}
}
break
}
}
// noRedirectClient.Jar now holds Panopto session cookies from the CookieCheck chain.
panoptoDomain, _ := url.Parse("https://vub.cloud.panopto.eu")
cookies := noRedirectClient.Jar.Cookies(panoptoDomain)
fmt.Printf(" [dbg] Panopto cookies: %d\n", len(cookies))
for _, c := range cookies {
fmt.Printf(" [dbg] cookie: %s=%s\n", c.Name, c.Value[:min(20, len(c.Value))])
}
if len(cookies) == 0 {
fmt.Printf(" [!] No Panopto cookies after auth falling back for: %s\n", title)
DownloadVideo(httpClient, accessToken, "", modDir, panoptoURL, title)
return
}
cookieFile := filepath.Join(modDir, ".cookies_ext.txt")
cData := "# Netscape HTTP Cookie File\n"
for _, c := range cookies {
cData += fmt.Sprintf(".vub.cloud.panopto.eu\tTRUE\t/\tTRUE\t0\t%s\t%s\n", c.Name, c.Value)
}
os.WriteFile(cookieFile, []byte(cData), 0o644)
fmt.Printf(" [*] Downloading: %s\n", title)
normalizedURL := normalizePanoptoURL(panoptoURL)
isList := strings.Contains(normalizedURL, "List.aspx")
ytCmd := getYoutubeDLCommand()
var args []string
if isList {
args = []string{
"--cookies", cookieFile,
"--referer", config.BaseURL + "/",
"-P", modDir,
"-o", utils.Sanitize(title) + "/%(title)s.%(ext)s",
normalizedURL,
}
} else {
args = []string{
"--no-playlist",
"--cookies", cookieFile,
"--referer", config.BaseURL + "/",
"-P", modDir,
"-o", utils.Sanitize(title) + ".%(ext)s",
normalizedURL,
}
}
cmd := exec.Command(ytCmd, args...)
cmd.Stdout = os.Stdout
cmd.Stderr = os.Stderr
if err := cmd.Run(); err != nil {
fmt.Printf(" [!] yt-dlp failed: %v\n", err)
}
os.Remove(cookieFile)
}