Compare commits

...

2 Commits

Author SHA1 Message Date
c160a0a24a
Verbose 2025-04-02 21:52:46 +02:00
b1eed73dec
multi release 2025-04-02 21:45:23 +02:00

312
main.go
View File

@ -12,34 +12,54 @@ import (
"regexp"
"strconv"
"strings"
"sync"
"time"
)
type FileQuality struct {
Path string
Bitrate int
Bitdepth int
Size int64
Path string
Bitrate int
Bitdepth int
Size int64
ReleaseID string // MusicBrainz release ID or other unique identifier
ReleaseYear string // Release year from metadata
CatalogNum string // Catalog number
ReleaseInfo string // Human-readable release info for logging
}
type ProcessResult struct {
BaseFile string
SpaceRecovered int64
Processed bool
}
var (
logFile *os.File
armed bool
dryRun bool
logWriter *bufio.Writer
version = "1.0.0"
dateFormat = "2006-01-02 15:04:05"
logFile *os.File
armed bool
dryRun bool
forceMode bool // Force processing even if releases appear different
verbose bool // Control console output
concurrency int // Number of concurrent goroutines
logWriter *bufio.Writer
version = "1.2.0"
dateFormat = "2006-01-02 15:04:05"
logMutex sync.Mutex
)
func init() {
flag.BoolVar(&armed, "armed", false, "Enable actual file operations (rename/delete)")
flag.BoolVar(&dryRun, "dry-run", false, "Simulate operations without making changes")
flag.BoolVar(&forceMode, "force", false, "Force processing even if releases appear different")
flag.BoolVar(&verbose, "verbose", false, "Show detailed logs on console")
flag.IntVar(&concurrency, "concurrency", 4, "Number of concurrent file groups to process")
flag.Usage = func() {
fmt.Fprintf(flag.CommandLine.Output(), "FLAC Duplicate Cleaner v%s\n", version)
fmt.Fprintf(flag.CommandLine.Output(), "Usage: %s [options] [directory]\n", os.Args[0])
flag.PrintDefaults()
fmt.Println("\nBy default, runs in dry-run mode showing what would be done")
fmt.Println("Specify -armed to actually perform operations")
fmt.Println("Use -force to process files even if they appear to be from different releases")
fmt.Println("Add -verbose to show detailed logs on console")
}
}
@ -50,16 +70,33 @@ func initLogging(logFilename string) error {
return fmt.Errorf("failed to open log file: %w", err)
}
logWriter = bufio.NewWriter(logFile)
log.SetOutput(io.MultiWriter(os.Stdout, logWriter))
// Set log output based on verbose flag
if verbose {
log.SetOutput(io.MultiWriter(os.Stdout, logWriter))
} else {
log.SetOutput(logWriter)
}
return nil
}
func closeLogging() {
logMutex.Lock()
defer logMutex.Unlock()
logWriter.Flush()
logFile.Close()
}
func writeLog(message string) {
logMutex.Lock()
defer logMutex.Unlock()
log.Println(message)
}
func printFinalStats(message string) {
// Always print final stats to console regardless of verbose setting
fmt.Println(message)
log.Println(message)
}
@ -67,12 +104,12 @@ func getCurrentTime() string {
return time.Now().Format(dateFormat)
}
func getMediaInfo(filePath string) (int, int, error) {
func getMediaInfo(filePath string) (int, int, string, string, string, error) {
// Get bitrate
bitrateCmd := exec.Command("mediainfo", "--Output=Audio;%BitRate%", filePath)
bitrateOut, err := bitrateCmd.Output()
if err != nil {
return 0, 0, fmt.Errorf("mediainfo bitrate failed: %w", err)
return 0, 0, "", "", "", fmt.Errorf("mediainfo bitrate failed: %w", err)
}
bitrateStr := strings.TrimSpace(string(bitrateOut))
bitrate, err := strconv.Atoi(bitrateStr)
@ -84,7 +121,7 @@ func getMediaInfo(filePath string) (int, int, error) {
bitdepthCmd := exec.Command("mediainfo", "--Output=Audio;%BitDepth%", filePath)
bitdepthOut, err := bitdepthCmd.Output()
if err != nil {
return 0, 0, fmt.Errorf("mediainfo bitdepth failed: %w", err)
return 0, 0, "", "", "", fmt.Errorf("mediainfo bitdepth failed: %w", err)
}
bitdepthStr := strings.TrimSpace(string(bitdepthOut))
bitdepth, err := strconv.Atoi(bitdepthStr)
@ -92,12 +129,56 @@ func getMediaInfo(filePath string) (int, int, error) {
bitdepth = 0
}
return bitrate, bitdepth, nil
// Get MusicBrainz release ID
releaseIDCmd := exec.Command("mediainfo", "--Output=General;%MUSICBRAINZ_RELEASETRACKID%", filePath)
releaseIDOut, err := releaseIDCmd.Output()
var releaseID string
if err == nil {
releaseID = strings.TrimSpace(string(releaseIDOut))
}
// Get release year
yearCmd := exec.Command("mediainfo", "--Output=General;%Released_Date%", filePath)
yearOut, err := yearCmd.Output()
var year string
if err == nil {
year = strings.TrimSpace(string(yearOut))
// If no released date, try recorded date
if year == "" {
yearCmd = exec.Command("mediainfo", "--Output=General;%Recorded_Date%", filePath)
yearOut, err := yearCmd.Output()
if err == nil {
year = strings.TrimSpace(string(yearOut))
}
}
// Extract just the year if it's a full date
if len(year) >= 4 {
year = year[0:4]
}
}
// Get catalog number
catalogCmd := exec.Command("mediainfo", "--Output=General;%CATALOGNUMBER%", filePath)
catalogOut, err := catalogCmd.Output()
var catalogNum string
if err == nil {
catalogNum = strings.TrimSpace(string(catalogOut))
}
return bitrate, bitdepth, releaseID, year, catalogNum, nil
}
func getReleaseInfo(filePath string) string {
// Get a summary of release info for logging
infoCmd := exec.Command("mediainfo", "--Output=General;Album: %Album%, Released: %Released_Date%, Recorded: %Recorded_Date%, Label: %Label%, CatalogNum: %CATALOGNUMBER%", filePath)
infoOut, _ := infoCmd.Output()
return strings.TrimSpace(string(infoOut))
}
func findDuplicateFiles(rootDir string) (map[string][]FileQuality, error) {
dupePattern := regexp.MustCompile(`(?i)(.+)( \(\d+\))\.flac$`)
fileGroups := make(map[string][]FileQuality)
var filesMutex sync.Mutex
err := filepath.Walk(rootDir, func(path string, info os.FileInfo, err error) error {
if err != nil {
@ -115,6 +196,9 @@ func findDuplicateFiles(rootDir string) (map[string][]FileQuality, error) {
// Check if file matches the duplicate pattern
matches := dupePattern.FindStringSubmatch(path)
filesMutex.Lock()
defer filesMutex.Unlock()
if len(matches) > 1 {
baseFile := matches[1] + ".flac"
fileInfo, err := os.Stat(path)
@ -162,74 +246,143 @@ func bytesToHumanReadable(bytes int64) string {
return fmt.Sprintf("%.1f %ciB", float64(bytes)/float64(div), "KMGTPE"[exp])
}
func processFileGroup(baseFile string, files []FileQuality) {
func processFileGroup(baseFile string, files []FileQuality) (int64, bool) {
writeLog(fmt.Sprintf("Processing group for: %s", baseFile))
processed := false
var spaceRecovered int64 = 0
// Get quality info for all files
var bestFile FileQuality
var totalSize int64
releaseMap := make(map[string][]FileQuality)
for i := range files {
bitrate, bitdepth, err := getMediaInfo(files[i].Path)
bitrate, bitdepth, releaseID, year, catalogNum, err := getMediaInfo(files[i].Path)
if err != nil {
writeLog(fmt.Sprintf(" - Error getting media info for %s: %v", files[i].Path, err))
continue
}
files[i].Bitrate = bitrate
files[i].Bitdepth = bitdepth
files[i].ReleaseID = releaseID
files[i].ReleaseYear = year
files[i].CatalogNum = catalogNum
files[i].ReleaseInfo = getReleaseInfo(files[i].Path)
totalSize += files[i].Size
writeLog(fmt.Sprintf(" - Found: %s (Bitrate: %d, Bitdepth: %d, Size: %d bytes)",
files[i].Path, files[i].Bitrate, files[i].Bitdepth, files[i].Size))
// Determine the best file using quality then size as tiebreaker
if files[i].Bitrate > bestFile.Bitrate ||
(files[i].Bitrate == bestFile.Bitrate && files[i].Bitdepth > bestFile.Bitdepth) ||
(files[i].Bitrate == bestFile.Bitrate && files[i].Bitdepth == bestFile.Bitdepth && files[i].Size > bestFile.Size) {
bestFile = files[i]
// Create a unique key for grouping by release
releaseKey := releaseID
if releaseKey == "" {
// Fallback if no MusicBrainz ID
releaseKey = fmt.Sprintf("%s-%s", year, catalogNum)
}
releaseMap[releaseKey] = append(releaseMap[releaseKey], files[i])
writeLog(fmt.Sprintf(" - Found: %s (Bitrate: %d, Bitdepth: %d, Size: %s, Year: %s, CatalogNum: %s)",
files[i].Path, files[i].Bitrate, files[i].Bitdepth,
bytesToHumanReadable(files[i].Size), files[i].ReleaseYear, files[i].CatalogNum))
writeLog(fmt.Sprintf(" Release info: %s", files[i].ReleaseInfo))
}
if bestFile.Path == "" {
writeLog(" - No valid files found in group")
return
}
writeLog(fmt.Sprintf(" -> Keeping: %s (Bitrate: %d, Bitdepth: %d, Size: %d bytes)",
bestFile.Path, bestFile.Bitrate, bestFile.Bitdepth, bestFile.Size))
// Rename best file to original name if it's not already
if bestFile.Path != baseFile {
action := "Would rename"
if armed && !dryRun {
action = "Renaming"
err := os.Rename(bestFile.Path, baseFile)
if err != nil {
writeLog(fmt.Sprintf(" ! Rename failed: %v", err))
return
// Check if files are from different releases
if len(releaseMap) > 1 && !forceMode {
writeLog(" ! Files appear to be from different releases. Skipping group (use -force to override).")
for releaseKey, releaseFiles := range releaseMap {
writeLog(fmt.Sprintf(" Release group: %s", releaseKey))
for _, file := range releaseFiles {
writeLog(fmt.Sprintf(" - %s", file.Path))
}
}
writeLog(fmt.Sprintf(" * %s: %s -> %s", action, bestFile.Path, baseFile))
bestFile.Path = baseFile
return 0, false
}
// Delete other files
for _, file := range files {
if file.Path != bestFile.Path {
action := "Would delete"
// Process each release group
for releaseKey, releaseFiles := range releaseMap {
if len(releaseFiles) <= 1 {
// Skip if only one file for this release
continue
}
writeLog(fmt.Sprintf(" Processing release group: %s", releaseKey))
bestFile = FileQuality{}
// Find best file for this release
for _, file := range releaseFiles {
// Determine the best file using quality then size as tiebreaker
if file.Bitrate > bestFile.Bitrate ||
(file.Bitrate == bestFile.Bitrate && file.Bitdepth > bestFile.Bitdepth) ||
(file.Bitrate == bestFile.Bitrate && file.Bitdepth == bestFile.Bitdepth && file.Size > bestFile.Size) {
bestFile = file
}
}
if bestFile.Path == "" {
writeLog(" - No valid files found in this release group")
continue
}
writeLog(fmt.Sprintf(" -> Keeping: %s (Bitrate: %d, Bitdepth: %d, Size: %s)",
bestFile.Path, bestFile.Bitrate, bestFile.Bitdepth, bytesToHumanReadable(bestFile.Size)))
// If this is the primary file (without (n) in name), make sure the best quality version is it
isBaseFile := (bestFile.Path == baseFile ||
strings.TrimSuffix(bestFile.Path, ".flac") == strings.TrimSuffix(baseFile, ".flac"))
// Generate target filename
targetFilename := baseFile
if !isBaseFile && len(releaseMap) > 1 {
// For multiple releases, append year/identifier to filename to avoid conflicts
ext := filepath.Ext(baseFile)
baseName := strings.TrimSuffix(baseFile, ext)
// Use year or catalog number as identifier
identifier := bestFile.ReleaseYear
if identifier == "" && bestFile.CatalogNum != "" {
identifier = bestFile.CatalogNum
}
if identifier != "" {
targetFilename = fmt.Sprintf("%s [%s]%s", baseName, identifier, ext)
}
}
// Rename best file to target name if needed
if bestFile.Path != targetFilename {
action := "Would rename"
if armed && !dryRun {
action = "Deleting"
err := os.Remove(file.Path)
action = "Renaming"
err := os.Rename(bestFile.Path, targetFilename)
if err != nil {
writeLog(fmt.Sprintf(" ! Delete failed for %s: %v", file.Path, err))
writeLog(fmt.Sprintf(" ! Rename failed: %v", err))
continue
}
}
writeLog(fmt.Sprintf(" * %s: %s (Recovering %s)", action, file.Path, bytesToHumanReadable(file.Size)))
writeLog(fmt.Sprintf(" * %s: %s -> %s", action, bestFile.Path, targetFilename))
bestFile.Path = targetFilename
}
// Delete other files in this release group
for _, file := range releaseFiles {
if file.Path != bestFile.Path {
action := "Would delete"
if armed && !dryRun {
action = "Deleting"
err := os.Remove(file.Path)
if err != nil {
writeLog(fmt.Sprintf(" ! Delete failed for %s: %v", file.Path, err))
continue
}
}
writeLog(fmt.Sprintf(" * %s: %s (Recovering %s)", action, file.Path, bytesToHumanReadable(file.Size)))
spaceRecovered += file.Size
processed = true
}
}
}
writeLog(fmt.Sprintf(" Total recoverable space: %s", bytesToHumanReadable(totalSize-bestFile.Size)))
writeLog(fmt.Sprintf(" Total potentially recoverable space: %s", bytesToHumanReadable(spaceRecovered)))
return spaceRecovered, processed
}
func main() {
@ -262,6 +415,10 @@ func main() {
} else {
writeLog("ARMED MODE: Files will be renamed and deleted!")
}
if forceMode {
writeLog("FORCE MODE: Will process even if releases appear different!")
}
writeLog(fmt.Sprintf("CONCURRENCY: Processing %d file groups simultaneously", concurrency))
writeLog("===============================")
// Check mediainfo is available
@ -284,19 +441,48 @@ func main() {
writeLog("")
}
var totalRecoverableSpace int64
// Process file groups concurrently with limited goroutines
var wg sync.WaitGroup
results := make(chan ProcessResult, len(fileGroups))
semaphore := make(chan struct{}, concurrency)
for baseFile, group := range fileGroups {
processFileGroup(baseFile, group)
for _, file := range group {
if file.Path != baseFile {
totalRecoverableSpace += file.Size
wg.Add(1)
go func(bf string, g []FileQuality) {
defer wg.Done()
semaphore <- struct{}{} // Acquire semaphore
defer func() { <-semaphore }() // Release semaphore
spaceRecovered, processed := processFileGroup(bf, g)
results <- ProcessResult{
BaseFile: bf,
SpaceRecovered: spaceRecovered,
Processed: processed,
}
}
writeLog("")
}(baseFile, group)
}
writeLog(fmt.Sprintf("Total recoverable space: %s", bytesToHumanReadable(totalRecoverableSpace)))
writeLog(fmt.Sprintf("Cleanup completed at %s", getCurrentTime()))
writeLog("===============================")
// Close results channel once all goroutines complete
go func() {
wg.Wait()
close(results)
}()
// Process results
var totalRecoverableSpace int64
processedGroups := 0
for result := range results {
if result.Processed {
processedGroups++
totalRecoverableSpace += result.SpaceRecovered
}
}
// Final stats - these always print to console regardless of verbose setting
finalTime := getCurrentTime()
printFinalStats(fmt.Sprintf("Processed %d of %d file groups", processedGroups, len(fileGroups)))
printFinalStats(fmt.Sprintf("Total recoverable space: %s", bytesToHumanReadable(totalRecoverableSpace)))
printFinalStats(fmt.Sprintf("Cleanup completed at %s", finalTime))
printFinalStats("===============================")
}