From 4e794879e59e9baafad89f3e73fea8c7cc7a8635 Mon Sep 17 00:00:00 2001 From: Claude Date: Sun, 9 Nov 2025 18:01:25 +0000 Subject: [PATCH] Refactor fuzzy matcher patterns for granular control Changes: 1. Categorize HARDCODED_IGNORE_PATTERNS into 4 distinct lists: - QUALITY_PATTERNS: Quality tags ([4K], HD, (SD), etc.) - REGIONAL_PATTERNS: Regional indicators (East) - GEOGRAPHIC_PATTERNS: Geographic prefixes (US:, USA:) - MISC_PATTERNS: Miscellaneous patterns ((CX), (Backup), single-letter tags) 2. Update normalize_name function signature: - Remove remove_quality_tags parameter - Add ignore_quality, ignore_regional, ignore_geographic, ignore_misc (all default to True) - Maintains backward compatibility with default True values 3. Implement dynamic pattern application: - Build patterns_to_apply list based on category flags - Apply only selected pattern categories 4. Improve user_ignored_tags handling: - Tags with brackets/parentheses: literal match - Simple word tags: use word boundaries (\b) to avoid partial matches - Fixes issue where "East" tag would incorrectly match "east" in "Feast" 5. Update version to 25.313.1157 (Julian date: Nov 9, 2025 11:57 AM) This refactoring enables future UI controls for granular pattern filtering while maintaining full backward compatibility. --- Stream-Mapparr/fuzzy_matcher.py | 79 ++++++++++++++++++++++++--------- Stream-Mapparr/plugin.py | 2 +- 2 files changed, 59 insertions(+), 22 deletions(-) diff --git a/Stream-Mapparr/fuzzy_matcher.py b/Stream-Mapparr/fuzzy_matcher.py index a00cf85..cd0d195 100644 --- a/Stream-Mapparr/fuzzy_matcher.py +++ b/Stream-Mapparr/fuzzy_matcher.py @@ -11,24 +11,20 @@ import logging from glob import glob # Version: YY.DDD.HHMM (Julian date format: Year.DayOfYear.Time) -__version__ = "25.310.1806" +__version__ = "25.313.1157" # Setup logging LOGGER = logging.getLogger("plugins.fuzzy_matcher") -# Hardcoded regex patterns to ignore during fuzzy matching +# Categorized regex patterns for granular control during fuzzy matching # Note: All patterns are applied with re.IGNORECASE flag in normalize_name() -HARDCODED_IGNORE_PATTERNS = [ + +# Quality-related patterns: [4K], HD, (SD), etc. +QUALITY_PATTERNS = [ # Bracketed quality tags: [4K], [UHD], [FHD], [HD], [SD], [Unknown], [Unk], [Slow], [Dead] r'\[(4K|UHD|FHD|HD|SD|Unknown|Unk|Slow|Dead)\]', r'\[(?:4k|uhd|fhd|hd|sd|unknown|unk|slow|dead)\]', - # Single letter tags in parentheses: (A), (B), (C), etc. - r'\([A-Z]\)', - - # Regional: " East" or " east" - r'\s[Ee][Aa][Ss][Tt]', - # Unbracketed quality tags in middle: " 4K ", " UHD ", " FHD ", " HD ", " SD ", etc. r'\s(?:4K|UHD|FHD|HD|SD|Unknown|Unk|Slow|Dead|FD)\s', @@ -38,15 +34,30 @@ HARDCODED_IGNORE_PATTERNS = [ # Word boundary quality tags with optional colon: "4K:", "UHD:", "FHD:", "HD:", etc. r'\b(?:4K|UHD|FHD|HD|SD|Unknown|Unk|Slow|Dead|FD):?\s', - # Special tags - r'\s\(CX\)', # Cinemax tag - # Parenthesized quality tags: (4K), (UHD), (FHD), (HD), (SD), (Unknown), (Unk), (Slow), (Dead), (Backup) r'\s\((4K|UHD|FHD|HD|SD|Unknown|Unk|Slow|Dead|FD|Backup)\)', +] +# Regional indicator patterns: East, West, etc. +REGIONAL_PATTERNS = [ + # Regional: " East" or " east" + r'\s[Ee][Aa][Ss][Tt]', +] + +# Geographic prefix patterns: US:, USA:, etc. +GEOGRAPHIC_PATTERNS = [ # Geographic prefixes r'\bUSA?:\s', # "US:" or "USA:" r'\bUS\s', # "US " at word boundary +] + +# Miscellaneous patterns: (CX), (Backup), single-letter tags, etc. +MISC_PATTERNS = [ + # Single letter tags in parentheses: (A), (B), (C), etc. + r'\([A-Z]\)', + + # Special tags + r'\s\(CX\)', # Cinemax tag # Backup tags r'\([bB]ackup\)', @@ -181,14 +192,18 @@ class FuzzyMatcher: callsign = re.sub(r'-(?:TV|CD|LP|DT|LD)$', '', callsign) return callsign - def normalize_name(self, name, user_ignored_tags=None, remove_quality_tags=True, remove_cinemax=False): + def normalize_name(self, name, user_ignored_tags=None, ignore_quality=True, ignore_regional=True, + ignore_geographic=True, ignore_misc=True, remove_cinemax=False): """ Normalize channel or stream name for matching by removing tags, prefixes, and other noise. Args: name: Name to normalize user_ignored_tags: Additional user-configured tags to ignore (list of strings) - remove_quality_tags: If True, remove hardcoded quality patterns (for matching only, not display) + ignore_quality: If True, remove quality-related patterns (e.g., [4K], HD, (SD)) + ignore_regional: If True, remove regional indicator patterns (e.g., East) + ignore_geographic: If True, remove geographic prefix patterns (e.g., US:, USA) + ignore_misc: If True, remove miscellaneous patterns (e.g., (CX), (Backup), single-letter tags) remove_cinemax: If True, remove "Cinemax" prefix (useful when channel name contains "max") Returns: @@ -204,15 +219,37 @@ class FuzzyMatcher: if remove_cinemax: name = re.sub(r'\bCinemax\b\s*', '', name, flags=re.IGNORECASE) - # Apply hardcoded ignore patterns only if remove_quality_tags is True - if remove_quality_tags: - for pattern in HARDCODED_IGNORE_PATTERNS: - name = re.sub(pattern, '', name, flags=re.IGNORECASE) + # Build list of patterns to apply based on category flags + patterns_to_apply = [] - # Apply user-configured ignored tags + if ignore_quality: + patterns_to_apply.extend(QUALITY_PATTERNS) + + if ignore_regional: + patterns_to_apply.extend(REGIONAL_PATTERNS) + + if ignore_geographic: + patterns_to_apply.extend(GEOGRAPHIC_PATTERNS) + + if ignore_misc: + patterns_to_apply.extend(MISC_PATTERNS) + + # Apply selected hardcoded patterns + for pattern in patterns_to_apply: + name = re.sub(pattern, '', name, flags=re.IGNORECASE) + + # Apply user-configured ignored tags with improved handling for tag in user_ignored_tags: - escaped_tag = re.escape(tag) - name = re.sub(escaped_tag, '', name, flags=re.IGNORECASE) + # Check if tag contains brackets or parentheses - if so, match literally + if '[' in tag or ']' in tag or '(' in tag or ')' in tag: + # Literal match for bracketed/parenthesized tags + escaped_tag = re.escape(tag) + name = re.sub(escaped_tag, '', name, flags=re.IGNORECASE) + else: + # Word boundary match for simple word tags to avoid partial matches + # e.g., "East" won't match the "east" in "Feast" + escaped_tag = re.escape(tag) + name = re.sub(r'\b' + escaped_tag + r'\b', '', name, flags=re.IGNORECASE) # Remove callsigns in parentheses name = re.sub(r'\([KW][A-Z]{3}(?:-(?:TV|CD|LP|DT|LD))?\)', '', name, flags=re.IGNORECASE) diff --git a/Stream-Mapparr/plugin.py b/Stream-Mapparr/plugin.py index 14249e3..373ed84 100644 --- a/Stream-Mapparr/plugin.py +++ b/Stream-Mapparr/plugin.py @@ -365,7 +365,7 @@ class Plugin: """ if self.fuzzy_matcher: # Use fuzzy matcher's normalization - return self.fuzzy_matcher.normalize_name(name, ignore_tags, remove_quality_tags=True, remove_cinemax=remove_cinemax) + return self.fuzzy_matcher.normalize_name(name, ignore_tags, remove_cinemax=remove_cinemax) # Fallback to basic cleaning if ignore_tags is None: