package main import ( "bufio" "flag" "fmt" "io" "log" "os" "os/exec" "path/filepath" "regexp" "strconv" "strings" "sync" "time" ) type FileQuality struct { Path string Bitrate int Bitdepth int Size int64 ReleaseID string // MusicBrainz release ID or other unique identifier ReleaseYear string // Release year from metadata CatalogNum string // Catalog number ReleaseInfo string // Human-readable release info for logging } type ProcessResult struct { BaseFile string SpaceRecovered int64 Processed bool } var ( logFile *os.File armed bool dryRun bool forceMode bool // Force processing even if releases appear different verbose bool // Control console output concurrency int // Number of concurrent goroutines logWriter *bufio.Writer version = "1.2.0" dateFormat = "2006-01-02 15:04:05" logMutex sync.Mutex ) func init() { flag.BoolVar(&armed, "armed", false, "Enable actual file operations (rename/delete)") flag.BoolVar(&dryRun, "dry-run", false, "Simulate operations without making changes") flag.BoolVar(&forceMode, "force", false, "Force processing even if releases appear different") flag.BoolVar(&verbose, "verbose", false, "Show detailed logs on console") flag.IntVar(&concurrency, "concurrency", 4, "Number of concurrent file groups to process") flag.Usage = func() { fmt.Fprintf(flag.CommandLine.Output(), "FLAC Duplicate Cleaner v%s\n", version) fmt.Fprintf(flag.CommandLine.Output(), "Usage: %s [options] [directory]\n", os.Args[0]) flag.PrintDefaults() fmt.Println("\nBy default, runs in dry-run mode showing what would be done") fmt.Println("Specify -armed to actually perform operations") fmt.Println("Use -force to process files even if they appear to be from different releases") fmt.Println("Add -verbose to show detailed logs on console") } } func initLogging(logFilename string) error { var err error logFile, err = os.OpenFile(logFilename, os.O_APPEND|os.O_CREATE|os.O_WRONLY, 0644) if err != nil { return fmt.Errorf("failed to open log file: %w", err) } logWriter = bufio.NewWriter(logFile) // Set log output based on verbose flag if verbose { log.SetOutput(io.MultiWriter(os.Stdout, logWriter)) } else { log.SetOutput(logWriter) } return nil } func closeLogging() { logMutex.Lock() defer logMutex.Unlock() logWriter.Flush() logFile.Close() } func writeLog(message string) { logMutex.Lock() defer logMutex.Unlock() log.Println(message) } func printFinalStats(message string) { // Always print final stats to console regardless of verbose setting fmt.Println(message) log.Println(message) } func getCurrentTime() string { return time.Now().Format(dateFormat) } func getMediaInfo(filePath string) (int, int, string, string, string, error) { // Get bitrate bitrateCmd := exec.Command("mediainfo", "--Output=Audio;%BitRate%", filePath) bitrateOut, err := bitrateCmd.Output() if err != nil { return 0, 0, "", "", "", fmt.Errorf("mediainfo bitrate failed: %w", err) } bitrateStr := strings.TrimSpace(string(bitrateOut)) bitrate, err := strconv.Atoi(bitrateStr) if err != nil { bitrate = 0 } // Get bitdepth bitdepthCmd := exec.Command("mediainfo", "--Output=Audio;%BitDepth%", filePath) bitdepthOut, err := bitdepthCmd.Output() if err != nil { return 0, 0, "", "", "", fmt.Errorf("mediainfo bitdepth failed: %w", err) } bitdepthStr := strings.TrimSpace(string(bitdepthOut)) bitdepth, err := strconv.Atoi(bitdepthStr) if err != nil { bitdepth = 0 } // Get MusicBrainz release ID releaseIDCmd := exec.Command("mediainfo", "--Output=General;%MUSICBRAINZ_RELEASETRACKID%", filePath) releaseIDOut, err := releaseIDCmd.Output() var releaseID string if err == nil { releaseID = strings.TrimSpace(string(releaseIDOut)) } // Get release year yearCmd := exec.Command("mediainfo", "--Output=General;%Released_Date%", filePath) yearOut, err := yearCmd.Output() var year string if err == nil { year = strings.TrimSpace(string(yearOut)) // If no released date, try recorded date if year == "" { yearCmd = exec.Command("mediainfo", "--Output=General;%Recorded_Date%", filePath) yearOut, err := yearCmd.Output() if err == nil { year = strings.TrimSpace(string(yearOut)) } } // Extract just the year if it's a full date if len(year) >= 4 { year = year[0:4] } } // Get catalog number catalogCmd := exec.Command("mediainfo", "--Output=General;%CATALOGNUMBER%", filePath) catalogOut, err := catalogCmd.Output() var catalogNum string if err == nil { catalogNum = strings.TrimSpace(string(catalogOut)) } return bitrate, bitdepth, releaseID, year, catalogNum, nil } func getReleaseInfo(filePath string) string { // Get a summary of release info for logging infoCmd := exec.Command("mediainfo", "--Output=General;Album: %Album%, Released: %Released_Date%, Recorded: %Recorded_Date%, Label: %Label%, CatalogNum: %CATALOGNUMBER%", filePath) infoOut, _ := infoCmd.Output() return strings.TrimSpace(string(infoOut)) } func findDuplicateFiles(rootDir string) (map[string][]FileQuality, error) { dupePattern := regexp.MustCompile(`(?i)(.+)( \(\d+\))\.flac$`) fileGroups := make(map[string][]FileQuality) var filesMutex sync.Mutex err := filepath.Walk(rootDir, func(path string, info os.FileInfo, err error) error { if err != nil { return fmt.Errorf("access error %q: %w", path, err) } if info.IsDir() { return nil } baseName := strings.ToLower(filepath.Base(path)) if !strings.HasSuffix(baseName, ".flac") { return nil } // Check if file matches the duplicate pattern matches := dupePattern.FindStringSubmatch(path) filesMutex.Lock() defer filesMutex.Unlock() if len(matches) > 1 { baseFile := matches[1] + ".flac" fileInfo, err := os.Stat(path) if err != nil { return fmt.Errorf("stat failed for %q: %w", path, err) } fileGroups[baseFile] = append(fileGroups[baseFile], FileQuality{Path: path, Size: fileInfo.Size()}) } else { // Include the original file in the group baseFile := strings.TrimSuffix(path, ".flac") + ".flac" fileInfo, err := os.Stat(path) if err != nil { return fmt.Errorf("stat failed for %q: %w", path, err) } fileGroups[baseFile] = append(fileGroups[baseFile], FileQuality{Path: path, Size: fileInfo.Size()}) } return nil }) if err != nil { return nil, fmt.Errorf("error walking directory: %w", err) } // Filter out groups that don't have duplicates for key, group := range fileGroups { if len(group) <= 1 { delete(fileGroups, key) } } return fileGroups, nil } func bytesToHumanReadable(bytes int64) string { const unit = 1024 if bytes < unit { return fmt.Sprintf("%d B", bytes) } div, exp := int64(unit), 0 for n := bytes / unit; n >= unit; n /= unit { div *= unit exp++ } return fmt.Sprintf("%.1f %ciB", float64(bytes)/float64(div), "KMGTPE"[exp]) } func processFileGroup(baseFile string, files []FileQuality) (int64, bool) { writeLog(fmt.Sprintf("Processing group for: %s", baseFile)) processed := false var spaceRecovered int64 = 0 // Get quality info for all files var bestFile FileQuality var totalSize int64 releaseMap := make(map[string][]FileQuality) for i := range files { bitrate, bitdepth, releaseID, year, catalogNum, err := getMediaInfo(files[i].Path) if err != nil { writeLog(fmt.Sprintf(" - Error getting media info for %s: %v", files[i].Path, err)) continue } files[i].Bitrate = bitrate files[i].Bitdepth = bitdepth files[i].ReleaseID = releaseID files[i].ReleaseYear = year files[i].CatalogNum = catalogNum files[i].ReleaseInfo = getReleaseInfo(files[i].Path) totalSize += files[i].Size // Create a unique key for grouping by release releaseKey := releaseID if releaseKey == "" { // Fallback if no MusicBrainz ID releaseKey = fmt.Sprintf("%s-%s", year, catalogNum) } releaseMap[releaseKey] = append(releaseMap[releaseKey], files[i]) writeLog(fmt.Sprintf(" - Found: %s (Bitrate: %d, Bitdepth: %d, Size: %s, Year: %s, CatalogNum: %s)", files[i].Path, files[i].Bitrate, files[i].Bitdepth, bytesToHumanReadable(files[i].Size), files[i].ReleaseYear, files[i].CatalogNum)) writeLog(fmt.Sprintf(" Release info: %s", files[i].ReleaseInfo)) } // Check if files are from different releases if len(releaseMap) > 1 && !forceMode { writeLog(" ! Files appear to be from different releases. Skipping group (use -force to override).") for releaseKey, releaseFiles := range releaseMap { writeLog(fmt.Sprintf(" Release group: %s", releaseKey)) for _, file := range releaseFiles { writeLog(fmt.Sprintf(" - %s", file.Path)) } } return 0, false } // Process each release group for releaseKey, releaseFiles := range releaseMap { if len(releaseFiles) <= 1 { // Skip if only one file for this release continue } writeLog(fmt.Sprintf(" Processing release group: %s", releaseKey)) bestFile = FileQuality{} // Find best file for this release for _, file := range releaseFiles { // Determine the best file using quality then size as tiebreaker if file.Bitrate > bestFile.Bitrate || (file.Bitrate == bestFile.Bitrate && file.Bitdepth > bestFile.Bitdepth) || (file.Bitrate == bestFile.Bitrate && file.Bitdepth == bestFile.Bitdepth && file.Size > bestFile.Size) { bestFile = file } } if bestFile.Path == "" { writeLog(" - No valid files found in this release group") continue } writeLog(fmt.Sprintf(" -> Keeping: %s (Bitrate: %d, Bitdepth: %d, Size: %s)", bestFile.Path, bestFile.Bitrate, bestFile.Bitdepth, bytesToHumanReadable(bestFile.Size))) // If this is the primary file (without (n) in name), make sure the best quality version is it isBaseFile := (bestFile.Path == baseFile || strings.TrimSuffix(bestFile.Path, ".flac") == strings.TrimSuffix(baseFile, ".flac")) // Generate target filename targetFilename := baseFile if !isBaseFile && len(releaseMap) > 1 { // For multiple releases, append year/identifier to filename to avoid conflicts ext := filepath.Ext(baseFile) baseName := strings.TrimSuffix(baseFile, ext) // Use year or catalog number as identifier identifier := bestFile.ReleaseYear if identifier == "" && bestFile.CatalogNum != "" { identifier = bestFile.CatalogNum } if identifier != "" { targetFilename = fmt.Sprintf("%s [%s]%s", baseName, identifier, ext) } } // Rename best file to target name if needed if bestFile.Path != targetFilename { action := "Would rename" if armed && !dryRun { action = "Renaming" err := os.Rename(bestFile.Path, targetFilename) if err != nil { writeLog(fmt.Sprintf(" ! Rename failed: %v", err)) continue } } writeLog(fmt.Sprintf(" * %s: %s -> %s", action, bestFile.Path, targetFilename)) bestFile.Path = targetFilename } // Delete other files in this release group for _, file := range releaseFiles { if file.Path != bestFile.Path { action := "Would delete" if armed && !dryRun { action = "Deleting" err := os.Remove(file.Path) if err != nil { writeLog(fmt.Sprintf(" ! Delete failed for %s: %v", file.Path, err)) continue } } writeLog(fmt.Sprintf(" * %s: %s (Recovering %s)", action, file.Path, bytesToHumanReadable(file.Size))) spaceRecovered += file.Size processed = true } } } writeLog(fmt.Sprintf(" Total potentially recoverable space: %s", bytesToHumanReadable(spaceRecovered))) return spaceRecovered, processed } func main() { flag.Parse() // Default to dry-run mode unless armed is explicitly set if !armed { dryRun = true } // Get working directory dir := "." if flag.NArg() > 0 { dir = flag.Arg(0) } // Initialize logging err := initLogging("dupe_cleanup.log") if err != nil { log.Fatalf("Error initializing logging: %v", err) } defer closeLogging() writeLog("===============================") writeLog(fmt.Sprintf("FLAC Duplicate Cleaner v%s", version)) writeLog(fmt.Sprintf("Started at %s", getCurrentTime())) writeLog(fmt.Sprintf("Processing directory: %s", dir)) if dryRun { writeLog("DRY-RUN MODE: No files will be renamed or deleted.") } else { writeLog("ARMED MODE: Files will be renamed and deleted!") } if forceMode { writeLog("FORCE MODE: Will process even if releases appear different!") } writeLog(fmt.Sprintf("CONCURRENCY: Processing %d file groups simultaneously", concurrency)) writeLog("===============================") // Check mediainfo is available _, err = exec.LookPath("mediainfo") if err != nil { writeLog("Error: 'mediainfo' command not found. Please install MediaInfo package.") os.Exit(1) } fileGroups, err := findDuplicateFiles(dir) if err != nil { writeLog(fmt.Sprintf("Error finding duplicate files: %v", err)) os.Exit(1) } if len(fileGroups) == 0 { writeLog("No duplicate FLAC files found.") } else { writeLog(fmt.Sprintf("Found %d groups of duplicate files", len(fileGroups))) writeLog("") } // Process file groups concurrently with limited goroutines var wg sync.WaitGroup results := make(chan ProcessResult, len(fileGroups)) semaphore := make(chan struct{}, concurrency) for baseFile, group := range fileGroups { wg.Add(1) go func(bf string, g []FileQuality) { defer wg.Done() semaphore <- struct{}{} // Acquire semaphore defer func() { <-semaphore }() // Release semaphore spaceRecovered, processed := processFileGroup(bf, g) results <- ProcessResult{ BaseFile: bf, SpaceRecovered: spaceRecovered, Processed: processed, } }(baseFile, group) } // Close results channel once all goroutines complete go func() { wg.Wait() close(results) }() // Process results var totalRecoverableSpace int64 processedGroups := 0 for result := range results { if result.Processed { processedGroups++ totalRecoverableSpace += result.SpaceRecovered } } // Final stats - these always print to console regardless of verbose setting finalTime := getCurrentTime() printFinalStats(fmt.Sprintf("Processed %d of %d file groups", processedGroups, len(fileGroups))) printFinalStats(fmt.Sprintf("Total recoverable space: %s", bytesToHumanReadable(totalRecoverableSpace))) printFinalStats(fmt.Sprintf("Cleanup completed at %s", finalTime)) printFinalStats("===============================") }