489 lines
14 KiB
Go
489 lines
14 KiB
Go
package main
|
|
|
|
import (
|
|
"bufio"
|
|
"flag"
|
|
"fmt"
|
|
"io"
|
|
"log"
|
|
"os"
|
|
"os/exec"
|
|
"path/filepath"
|
|
"regexp"
|
|
"strconv"
|
|
"strings"
|
|
"sync"
|
|
"time"
|
|
)
|
|
|
|
type FileQuality struct {
|
|
Path string
|
|
Bitrate int
|
|
Bitdepth int
|
|
Size int64
|
|
ReleaseID string // MusicBrainz release ID or other unique identifier
|
|
ReleaseYear string // Release year from metadata
|
|
CatalogNum string // Catalog number
|
|
ReleaseInfo string // Human-readable release info for logging
|
|
}
|
|
|
|
type ProcessResult struct {
|
|
BaseFile string
|
|
SpaceRecovered int64
|
|
Processed bool
|
|
}
|
|
|
|
var (
|
|
logFile *os.File
|
|
armed bool
|
|
dryRun bool
|
|
forceMode bool // Force processing even if releases appear different
|
|
verbose bool // Control console output
|
|
concurrency int // Number of concurrent goroutines
|
|
logWriter *bufio.Writer
|
|
version = "1.2.0"
|
|
dateFormat = "2006-01-02 15:04:05"
|
|
logMutex sync.Mutex
|
|
)
|
|
|
|
func init() {
|
|
flag.BoolVar(&armed, "armed", false, "Enable actual file operations (rename/delete)")
|
|
flag.BoolVar(&dryRun, "dry-run", false, "Simulate operations without making changes")
|
|
flag.BoolVar(&forceMode, "force", false, "Force processing even if releases appear different")
|
|
flag.BoolVar(&verbose, "verbose", false, "Show detailed logs on console")
|
|
flag.IntVar(&concurrency, "concurrency", 4, "Number of concurrent file groups to process")
|
|
flag.Usage = func() {
|
|
fmt.Fprintf(flag.CommandLine.Output(), "FLAC Duplicate Cleaner v%s\n", version)
|
|
fmt.Fprintf(flag.CommandLine.Output(), "Usage: %s [options] [directory]\n", os.Args[0])
|
|
flag.PrintDefaults()
|
|
fmt.Println("\nBy default, runs in dry-run mode showing what would be done")
|
|
fmt.Println("Specify -armed to actually perform operations")
|
|
fmt.Println("Use -force to process files even if they appear to be from different releases")
|
|
fmt.Println("Add -verbose to show detailed logs on console")
|
|
}
|
|
}
|
|
|
|
func initLogging(logFilename string) error {
|
|
var err error
|
|
logFile, err = os.OpenFile(logFilename, os.O_APPEND|os.O_CREATE|os.O_WRONLY, 0644)
|
|
if err != nil {
|
|
return fmt.Errorf("failed to open log file: %w", err)
|
|
}
|
|
logWriter = bufio.NewWriter(logFile)
|
|
|
|
// Set log output based on verbose flag
|
|
if verbose {
|
|
log.SetOutput(io.MultiWriter(os.Stdout, logWriter))
|
|
} else {
|
|
log.SetOutput(logWriter)
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
func closeLogging() {
|
|
logMutex.Lock()
|
|
defer logMutex.Unlock()
|
|
logWriter.Flush()
|
|
logFile.Close()
|
|
}
|
|
|
|
func writeLog(message string) {
|
|
logMutex.Lock()
|
|
defer logMutex.Unlock()
|
|
log.Println(message)
|
|
}
|
|
|
|
func printFinalStats(message string) {
|
|
// Always print final stats to console regardless of verbose setting
|
|
fmt.Println(message)
|
|
log.Println(message)
|
|
}
|
|
|
|
func getCurrentTime() string {
|
|
return time.Now().Format(dateFormat)
|
|
}
|
|
|
|
func getMediaInfo(filePath string) (int, int, string, string, string, error) {
|
|
// Get bitrate
|
|
bitrateCmd := exec.Command("mediainfo", "--Output=Audio;%BitRate%", filePath)
|
|
bitrateOut, err := bitrateCmd.Output()
|
|
if err != nil {
|
|
return 0, 0, "", "", "", fmt.Errorf("mediainfo bitrate failed: %w", err)
|
|
}
|
|
bitrateStr := strings.TrimSpace(string(bitrateOut))
|
|
bitrate, err := strconv.Atoi(bitrateStr)
|
|
if err != nil {
|
|
bitrate = 0
|
|
}
|
|
|
|
// Get bitdepth
|
|
bitdepthCmd := exec.Command("mediainfo", "--Output=Audio;%BitDepth%", filePath)
|
|
bitdepthOut, err := bitdepthCmd.Output()
|
|
if err != nil {
|
|
return 0, 0, "", "", "", fmt.Errorf("mediainfo bitdepth failed: %w", err)
|
|
}
|
|
bitdepthStr := strings.TrimSpace(string(bitdepthOut))
|
|
bitdepth, err := strconv.Atoi(bitdepthStr)
|
|
if err != nil {
|
|
bitdepth = 0
|
|
}
|
|
|
|
// Get MusicBrainz release ID
|
|
releaseIDCmd := exec.Command("mediainfo", "--Output=General;%MUSICBRAINZ_RELEASETRACKID%", filePath)
|
|
releaseIDOut, err := releaseIDCmd.Output()
|
|
var releaseID string
|
|
if err == nil {
|
|
releaseID = strings.TrimSpace(string(releaseIDOut))
|
|
}
|
|
|
|
// Get release year
|
|
yearCmd := exec.Command("mediainfo", "--Output=General;%Released_Date%", filePath)
|
|
yearOut, err := yearCmd.Output()
|
|
var year string
|
|
if err == nil {
|
|
year = strings.TrimSpace(string(yearOut))
|
|
// If no released date, try recorded date
|
|
if year == "" {
|
|
yearCmd = exec.Command("mediainfo", "--Output=General;%Recorded_Date%", filePath)
|
|
yearOut, err := yearCmd.Output()
|
|
if err == nil {
|
|
year = strings.TrimSpace(string(yearOut))
|
|
}
|
|
}
|
|
// Extract just the year if it's a full date
|
|
if len(year) >= 4 {
|
|
year = year[0:4]
|
|
}
|
|
}
|
|
|
|
// Get catalog number
|
|
catalogCmd := exec.Command("mediainfo", "--Output=General;%CATALOGNUMBER%", filePath)
|
|
catalogOut, err := catalogCmd.Output()
|
|
var catalogNum string
|
|
if err == nil {
|
|
catalogNum = strings.TrimSpace(string(catalogOut))
|
|
}
|
|
|
|
return bitrate, bitdepth, releaseID, year, catalogNum, nil
|
|
}
|
|
|
|
func getReleaseInfo(filePath string) string {
|
|
// Get a summary of release info for logging
|
|
infoCmd := exec.Command("mediainfo", "--Output=General;Album: %Album%, Released: %Released_Date%, Recorded: %Recorded_Date%, Label: %Label%, CatalogNum: %CATALOGNUMBER%", filePath)
|
|
infoOut, _ := infoCmd.Output()
|
|
return strings.TrimSpace(string(infoOut))
|
|
}
|
|
|
|
func findDuplicateFiles(rootDir string) (map[string][]FileQuality, error) {
|
|
dupePattern := regexp.MustCompile(`(?i)(.+)( \(\d+\))\.flac$`)
|
|
fileGroups := make(map[string][]FileQuality)
|
|
var filesMutex sync.Mutex
|
|
|
|
err := filepath.Walk(rootDir, func(path string, info os.FileInfo, err error) error {
|
|
if err != nil {
|
|
return fmt.Errorf("access error %q: %w", path, err)
|
|
}
|
|
|
|
if info.IsDir() {
|
|
return nil
|
|
}
|
|
|
|
baseName := strings.ToLower(filepath.Base(path))
|
|
if !strings.HasSuffix(baseName, ".flac") {
|
|
return nil
|
|
}
|
|
|
|
// Check if file matches the duplicate pattern
|
|
matches := dupePattern.FindStringSubmatch(path)
|
|
filesMutex.Lock()
|
|
defer filesMutex.Unlock()
|
|
|
|
if len(matches) > 1 {
|
|
baseFile := matches[1] + ".flac"
|
|
fileInfo, err := os.Stat(path)
|
|
if err != nil {
|
|
return fmt.Errorf("stat failed for %q: %w", path, err)
|
|
}
|
|
fileGroups[baseFile] = append(fileGroups[baseFile], FileQuality{Path: path, Size: fileInfo.Size()})
|
|
} else {
|
|
// Include the original file in the group
|
|
baseFile := strings.TrimSuffix(path, ".flac") + ".flac"
|
|
fileInfo, err := os.Stat(path)
|
|
if err != nil {
|
|
return fmt.Errorf("stat failed for %q: %w", path, err)
|
|
}
|
|
fileGroups[baseFile] = append(fileGroups[baseFile], FileQuality{Path: path, Size: fileInfo.Size()})
|
|
}
|
|
|
|
return nil
|
|
})
|
|
|
|
if err != nil {
|
|
return nil, fmt.Errorf("error walking directory: %w", err)
|
|
}
|
|
|
|
// Filter out groups that don't have duplicates
|
|
for key, group := range fileGroups {
|
|
if len(group) <= 1 {
|
|
delete(fileGroups, key)
|
|
}
|
|
}
|
|
|
|
return fileGroups, nil
|
|
}
|
|
|
|
func bytesToHumanReadable(bytes int64) string {
|
|
const unit = 1024
|
|
if bytes < unit {
|
|
return fmt.Sprintf("%d B", bytes)
|
|
}
|
|
div, exp := int64(unit), 0
|
|
for n := bytes / unit; n >= unit; n /= unit {
|
|
div *= unit
|
|
exp++
|
|
}
|
|
return fmt.Sprintf("%.1f %ciB", float64(bytes)/float64(div), "KMGTPE"[exp])
|
|
}
|
|
|
|
func processFileGroup(baseFile string, files []FileQuality) (int64, bool) {
|
|
writeLog(fmt.Sprintf("Processing group for: %s", baseFile))
|
|
processed := false
|
|
var spaceRecovered int64 = 0
|
|
|
|
// Get quality info for all files
|
|
var bestFile FileQuality
|
|
var totalSize int64
|
|
releaseMap := make(map[string][]FileQuality)
|
|
|
|
for i := range files {
|
|
bitrate, bitdepth, releaseID, year, catalogNum, err := getMediaInfo(files[i].Path)
|
|
if err != nil {
|
|
writeLog(fmt.Sprintf(" - Error getting media info for %s: %v", files[i].Path, err))
|
|
continue
|
|
}
|
|
files[i].Bitrate = bitrate
|
|
files[i].Bitdepth = bitdepth
|
|
files[i].ReleaseID = releaseID
|
|
files[i].ReleaseYear = year
|
|
files[i].CatalogNum = catalogNum
|
|
files[i].ReleaseInfo = getReleaseInfo(files[i].Path)
|
|
totalSize += files[i].Size
|
|
|
|
// Create a unique key for grouping by release
|
|
releaseKey := releaseID
|
|
if releaseKey == "" {
|
|
// Fallback if no MusicBrainz ID
|
|
releaseKey = fmt.Sprintf("%s-%s", year, catalogNum)
|
|
}
|
|
|
|
releaseMap[releaseKey] = append(releaseMap[releaseKey], files[i])
|
|
|
|
writeLog(fmt.Sprintf(" - Found: %s (Bitrate: %d, Bitdepth: %d, Size: %s, Year: %s, CatalogNum: %s)",
|
|
files[i].Path, files[i].Bitrate, files[i].Bitdepth,
|
|
bytesToHumanReadable(files[i].Size), files[i].ReleaseYear, files[i].CatalogNum))
|
|
writeLog(fmt.Sprintf(" Release info: %s", files[i].ReleaseInfo))
|
|
}
|
|
|
|
// Check if files are from different releases
|
|
if len(releaseMap) > 1 && !forceMode {
|
|
writeLog(" ! Files appear to be from different releases. Skipping group (use -force to override).")
|
|
for releaseKey, releaseFiles := range releaseMap {
|
|
writeLog(fmt.Sprintf(" Release group: %s", releaseKey))
|
|
for _, file := range releaseFiles {
|
|
writeLog(fmt.Sprintf(" - %s", file.Path))
|
|
}
|
|
}
|
|
return 0, false
|
|
}
|
|
|
|
// Process each release group
|
|
for releaseKey, releaseFiles := range releaseMap {
|
|
if len(releaseFiles) <= 1 {
|
|
// Skip if only one file for this release
|
|
continue
|
|
}
|
|
|
|
writeLog(fmt.Sprintf(" Processing release group: %s", releaseKey))
|
|
bestFile = FileQuality{}
|
|
|
|
// Find best file for this release
|
|
for _, file := range releaseFiles {
|
|
// Determine the best file using quality then size as tiebreaker
|
|
if file.Bitrate > bestFile.Bitrate ||
|
|
(file.Bitrate == bestFile.Bitrate && file.Bitdepth > bestFile.Bitdepth) ||
|
|
(file.Bitrate == bestFile.Bitrate && file.Bitdepth == bestFile.Bitdepth && file.Size > bestFile.Size) {
|
|
bestFile = file
|
|
}
|
|
}
|
|
|
|
if bestFile.Path == "" {
|
|
writeLog(" - No valid files found in this release group")
|
|
continue
|
|
}
|
|
|
|
writeLog(fmt.Sprintf(" -> Keeping: %s (Bitrate: %d, Bitdepth: %d, Size: %s)",
|
|
bestFile.Path, bestFile.Bitrate, bestFile.Bitdepth, bytesToHumanReadable(bestFile.Size)))
|
|
|
|
// If this is the primary file (without (n) in name), make sure the best quality version is it
|
|
isBaseFile := (bestFile.Path == baseFile ||
|
|
strings.TrimSuffix(bestFile.Path, ".flac") == strings.TrimSuffix(baseFile, ".flac"))
|
|
|
|
// Generate target filename
|
|
targetFilename := baseFile
|
|
if !isBaseFile && len(releaseMap) > 1 {
|
|
// For multiple releases, append year/identifier to filename to avoid conflicts
|
|
ext := filepath.Ext(baseFile)
|
|
baseName := strings.TrimSuffix(baseFile, ext)
|
|
|
|
// Use year or catalog number as identifier
|
|
identifier := bestFile.ReleaseYear
|
|
if identifier == "" && bestFile.CatalogNum != "" {
|
|
identifier = bestFile.CatalogNum
|
|
}
|
|
|
|
if identifier != "" {
|
|
targetFilename = fmt.Sprintf("%s [%s]%s", baseName, identifier, ext)
|
|
}
|
|
}
|
|
|
|
// Rename best file to target name if needed
|
|
if bestFile.Path != targetFilename {
|
|
action := "Would rename"
|
|
if armed && !dryRun {
|
|
action = "Renaming"
|
|
err := os.Rename(bestFile.Path, targetFilename)
|
|
if err != nil {
|
|
writeLog(fmt.Sprintf(" ! Rename failed: %v", err))
|
|
continue
|
|
}
|
|
}
|
|
writeLog(fmt.Sprintf(" * %s: %s -> %s", action, bestFile.Path, targetFilename))
|
|
bestFile.Path = targetFilename
|
|
}
|
|
|
|
// Delete other files in this release group
|
|
for _, file := range releaseFiles {
|
|
if file.Path != bestFile.Path {
|
|
action := "Would delete"
|
|
if armed && !dryRun {
|
|
action = "Deleting"
|
|
err := os.Remove(file.Path)
|
|
if err != nil {
|
|
writeLog(fmt.Sprintf(" ! Delete failed for %s: %v", file.Path, err))
|
|
continue
|
|
}
|
|
}
|
|
writeLog(fmt.Sprintf(" * %s: %s (Recovering %s)", action, file.Path, bytesToHumanReadable(file.Size)))
|
|
spaceRecovered += file.Size
|
|
processed = true
|
|
}
|
|
}
|
|
}
|
|
|
|
writeLog(fmt.Sprintf(" Total potentially recoverable space: %s", bytesToHumanReadable(spaceRecovered)))
|
|
return spaceRecovered, processed
|
|
}
|
|
|
|
func main() {
|
|
flag.Parse()
|
|
|
|
// Default to dry-run mode unless armed is explicitly set
|
|
if !armed {
|
|
dryRun = true
|
|
}
|
|
|
|
// Get working directory
|
|
dir := "."
|
|
if flag.NArg() > 0 {
|
|
dir = flag.Arg(0)
|
|
}
|
|
|
|
// Initialize logging
|
|
err := initLogging("dupe_cleanup.log")
|
|
if err != nil {
|
|
log.Fatalf("Error initializing logging: %v", err)
|
|
}
|
|
defer closeLogging()
|
|
|
|
writeLog("===============================")
|
|
writeLog(fmt.Sprintf("FLAC Duplicate Cleaner v%s", version))
|
|
writeLog(fmt.Sprintf("Started at %s", getCurrentTime()))
|
|
writeLog(fmt.Sprintf("Processing directory: %s", dir))
|
|
if dryRun {
|
|
writeLog("DRY-RUN MODE: No files will be renamed or deleted.")
|
|
} else {
|
|
writeLog("ARMED MODE: Files will be renamed and deleted!")
|
|
}
|
|
if forceMode {
|
|
writeLog("FORCE MODE: Will process even if releases appear different!")
|
|
}
|
|
writeLog(fmt.Sprintf("CONCURRENCY: Processing %d file groups simultaneously", concurrency))
|
|
writeLog("===============================")
|
|
|
|
// Check mediainfo is available
|
|
_, err = exec.LookPath("mediainfo")
|
|
if err != nil {
|
|
writeLog("Error: 'mediainfo' command not found. Please install MediaInfo package.")
|
|
os.Exit(1)
|
|
}
|
|
|
|
fileGroups, err := findDuplicateFiles(dir)
|
|
if err != nil {
|
|
writeLog(fmt.Sprintf("Error finding duplicate files: %v", err))
|
|
os.Exit(1)
|
|
}
|
|
|
|
if len(fileGroups) == 0 {
|
|
writeLog("No duplicate FLAC files found.")
|
|
} else {
|
|
writeLog(fmt.Sprintf("Found %d groups of duplicate files", len(fileGroups)))
|
|
writeLog("")
|
|
}
|
|
|
|
// Process file groups concurrently with limited goroutines
|
|
var wg sync.WaitGroup
|
|
results := make(chan ProcessResult, len(fileGroups))
|
|
semaphore := make(chan struct{}, concurrency)
|
|
|
|
for baseFile, group := range fileGroups {
|
|
wg.Add(1)
|
|
go func(bf string, g []FileQuality) {
|
|
defer wg.Done()
|
|
semaphore <- struct{}{} // Acquire semaphore
|
|
defer func() { <-semaphore }() // Release semaphore
|
|
|
|
spaceRecovered, processed := processFileGroup(bf, g)
|
|
results <- ProcessResult{
|
|
BaseFile: bf,
|
|
SpaceRecovered: spaceRecovered,
|
|
Processed: processed,
|
|
}
|
|
}(baseFile, group)
|
|
}
|
|
|
|
// Close results channel once all goroutines complete
|
|
go func() {
|
|
wg.Wait()
|
|
close(results)
|
|
}()
|
|
|
|
// Process results
|
|
var totalRecoverableSpace int64
|
|
processedGroups := 0
|
|
|
|
for result := range results {
|
|
if result.Processed {
|
|
processedGroups++
|
|
totalRecoverableSpace += result.SpaceRecovered
|
|
}
|
|
}
|
|
|
|
// Final stats - these always print to console regardless of verbose setting
|
|
finalTime := getCurrentTime()
|
|
printFinalStats(fmt.Sprintf("Processed %d of %d file groups", processedGroups, len(fileGroups)))
|
|
printFinalStats(fmt.Sprintf("Total recoverable space: %s", bytesToHumanReadable(totalRecoverableSpace)))
|
|
printFinalStats(fmt.Sprintf("Cleanup completed at %s", finalTime))
|
|
printFinalStats("===============================")
|
|
}
|