diff --git a/Stream-Mapparr/fuzzy_matcher.py b/Stream-Mapparr/fuzzy_matcher.py index 44b3b22..ede3fa9 100644 --- a/Stream-Mapparr/fuzzy_matcher.py +++ b/Stream-Mapparr/fuzzy_matcher.py @@ -14,18 +14,38 @@ from glob import glob LOGGER = logging.getLogger("plugins.fuzzy_matcher") # Hardcoded regex patterns to ignore during fuzzy matching +# Note: All patterns are applied with re.IGNORECASE flag in normalize_name() HARDCODED_IGNORE_PATTERNS = [ + # Bracketed quality tags: [4K], [UHD], [FHD], [HD], [SD], [Unknown], [Unk], [Slow], [Dead] r'\[(4K|UHD|FHD|HD|SD|Unknown|Unk|Slow|Dead)\]', r'\[(?:4k|uhd|fhd|hd|sd|unknown|unk|slow|dead)\]', + + # Single letter tags in parentheses: (A), (B), (C), etc. r'\([A-Z]\)', + + # Regional: " East" or " east" r'\s[Ee][Aa][Ss][Tt]', - r'\s(?:UHD|FHD|SD|HD|FD)\s', - r'\s(?:UHD|FHD|SD|HD|FD)$', - r'\b(?:UHD|FHD|SD|HD|FD):?\s', - r'\s\(CX\)', - r'\s\((UHD|FHD|SD|HD|FD|Backup)\)', - r'\bUSA?:\s', - r'\bUS\s', + + # Unbracketed quality tags in middle: " 4K ", " UHD ", " FHD ", " HD ", " SD ", etc. + r'\s(?:4K|UHD|FHD|HD|SD|Unknown|Unk|Slow|Dead|FD)\s', + + # Unbracketed quality tags at end: " 4K", " UHD", " FHD", " HD", " SD", etc. + r'\s(?:4K|UHD|FHD|HD|SD|Unknown|Unk|Slow|Dead|FD)$', + + # Word boundary quality tags with optional colon: "4K:", "UHD:", "FHD:", "HD:", etc. + r'\b(?:4K|UHD|FHD|HD|SD|Unknown|Unk|Slow|Dead|FD):?\s', + + # Special tags + r'\s\(CX\)', # Cinemax tag + + # Parenthesized quality tags: (4K), (UHD), (FHD), (HD), (SD), (Unknown), (Unk), (Slow), (Dead), (Backup) + r'\s\((4K|UHD|FHD|HD|SD|Unknown|Unk|Slow|Dead|FD|Backup)\)', + + # Geographic prefixes + r'\bUSA?:\s', # "US:" or "USA:" + r'\bUS\s', # "US " at word boundary + + # Backup tags r'\([bB]ackup\)', ]