Refactor fuzzy matcher patterns for granular control

Changes: 1. Categorize HARDCODED_IGNORE_PATTERNS into 4 distinct lists: - QUALITY_PATTERNS: Quality tags ([4K], HD, (SD), etc.) - REGIONAL_PATTERNS: Regional indicators (East) - GEOGRAPHIC_PATTERNS: Geographic prefixes (US:, USA:) - MISC_PATTERNS: Miscellaneous patterns ((CX), (Backup), single-letter tags) 2. Update normalize_name function signature: - Remove remove_quality_tags parameter - Add ignore_quality, ignore_regional, ignore_geographic, ignore_misc (all default to True) - Maintains backward compatibility with default True values 3. Implement dynamic pattern application: - Build patterns_to_apply list based on category flags - Apply only selected pattern categories 4. Improve user_ignored_tags handling: - Tags with brackets/parentheses: literal match - Simple word tags: use word boundaries (\b) to avoid partial matches - Fixes issue where "East" tag would incorrectly match "east" in "Feast" 5. Update version to 25.313.1157 (Julian date: Nov 9, 2025 11:57 AM) This refactoring enables future UI controls for granular pattern filtering while maintaining full backward compatibility.
2025-11-09 18:01:25 +00:00
parent 2b60dc0eb2
commit 4e794879e5
2 changed files with 59 additions and 22 deletions
--- a/Stream-Mapparr/fuzzy_matcher.py
+++ b/Stream-Mapparr/fuzzy_matcher.py
@@ -11,24 +11,20 @@ import logging
 from glob import glob

 # Version: YY.DDD.HHMM (Julian date format: Year.DayOfYear.Time)
-__version__ = "25.310.1806"
+__version__ = "25.313.1157"

 # Setup logging
 LOGGER = logging.getLogger("plugins.fuzzy_matcher")

-# Hardcoded regex patterns to ignore during fuzzy matching
+# Categorized regex patterns for granular control during fuzzy matching
 # Note: All patterns are applied with re.IGNORECASE flag in normalize_name()
-HARDCODED_IGNORE_PATTERNS = [
+
+# Quality-related patterns: [4K], HD, (SD), etc.
+QUALITY_PATTERNS = [
    # Bracketed quality tags: [4K], [UHD], [FHD], [HD], [SD], [Unknown], [Unk], [Slow], [Dead]
    r'\[(4K|UHD|FHD|HD|SD|Unknown|Unk|Slow|Dead)\]',
    r'\[(?:4k|uhd|fhd|hd|sd|unknown|unk|slow|dead)\]',

-    # Single letter tags in parentheses: (A), (B), (C), etc.
-    r'\([A-Z]\)',
-
-    # Regional: " East" or " east"
-    r'\s[Ee][Aa][Ss][Tt]',
-
    # Unbracketed quality tags in middle: " 4K ", " UHD ", " FHD ", " HD ", " SD ", etc.
    r'\s(?:4K|UHD|FHD|HD|SD|Unknown|Unk|Slow|Dead|FD)\s',

@@ -38,15 +34,30 @@ HARDCODED_IGNORE_PATTERNS = [
    # Word boundary quality tags with optional colon: "4K:", "UHD:", "FHD:", "HD:", etc.
    r'\b(?:4K|UHD|FHD|HD|SD|Unknown|Unk|Slow|Dead|FD):?\s',

-    # Special tags
-    r'\s\(CX\)',  # Cinemax tag
-
    # Parenthesized quality tags: (4K), (UHD), (FHD), (HD), (SD), (Unknown), (Unk), (Slow), (Dead), (Backup)
    r'\s\((4K|UHD|FHD|HD|SD|Unknown|Unk|Slow|Dead|FD|Backup)\)',
+]

+# Regional indicator patterns: East, West, etc.
+REGIONAL_PATTERNS = [
+    # Regional: " East" or " east"
+    r'\s[Ee][Aa][Ss][Tt]',
+]
+
+# Geographic prefix patterns: US:, USA:, etc.
+GEOGRAPHIC_PATTERNS = [
    # Geographic prefixes
    r'\bUSA?:\s',  # "US:" or "USA:"
    r'\bUS\s',     # "US " at word boundary
+]
+
+# Miscellaneous patterns: (CX), (Backup), single-letter tags, etc.
+MISC_PATTERNS = [
+    # Single letter tags in parentheses: (A), (B), (C), etc.
+    r'\([A-Z]\)',
+
+    # Special tags
+    r'\s\(CX\)',  # Cinemax tag

    # Backup tags
    r'\([bB]ackup\)',
@@ -181,14 +192,18 @@ class FuzzyMatcher:
            callsign = re.sub(r'-(?:TV|CD|LP|DT|LD)$', '', callsign)
        return callsign
    
-    def normalize_name(self, name, user_ignored_tags=None, remove_quality_tags=True, remove_cinemax=False):
+    def normalize_name(self, name, user_ignored_tags=None, ignore_quality=True, ignore_regional=True,
+                       ignore_geographic=True, ignore_misc=True, remove_cinemax=False):
        """
        Normalize channel or stream name for matching by removing tags, prefixes, and other noise.

        Args:
            name: Name to normalize
            user_ignored_tags: Additional user-configured tags to ignore (list of strings)
-            remove_quality_tags: If True, remove hardcoded quality patterns (for matching only, not display)
+            ignore_quality: If True, remove quality-related patterns (e.g., [4K], HD, (SD))
+            ignore_regional: If True, remove regional indicator patterns (e.g., East)
+            ignore_geographic: If True, remove geographic prefix patterns (e.g., US:, USA)
+            ignore_misc: If True, remove miscellaneous patterns (e.g., (CX), (Backup), single-letter tags)
            remove_cinemax: If True, remove "Cinemax" prefix (useful when channel name contains "max")

        Returns:
@@ -204,15 +219,37 @@ class FuzzyMatcher:
        if remove_cinemax:
            name = re.sub(r'\bCinemax\b\s*', '', name, flags=re.IGNORECASE)

-        # Apply hardcoded ignore patterns only if remove_quality_tags is True
-        if remove_quality_tags:
-            for pattern in HARDCODED_IGNORE_PATTERNS:
-                name = re.sub(pattern, '', name, flags=re.IGNORECASE)
+        # Build list of patterns to apply based on category flags
+        patterns_to_apply = []

-        # Apply user-configured ignored tags
+        if ignore_quality:
+            patterns_to_apply.extend(QUALITY_PATTERNS)
+
+        if ignore_regional:
+            patterns_to_apply.extend(REGIONAL_PATTERNS)
+
+        if ignore_geographic:
+            patterns_to_apply.extend(GEOGRAPHIC_PATTERNS)
+
+        if ignore_misc:
+            patterns_to_apply.extend(MISC_PATTERNS)
+
+        # Apply selected hardcoded patterns
+        for pattern in patterns_to_apply:
+            name = re.sub(pattern, '', name, flags=re.IGNORECASE)
+
+        # Apply user-configured ignored tags with improved handling
        for tag in user_ignored_tags:
-            escaped_tag = re.escape(tag)
-            name = re.sub(escaped_tag, '', name, flags=re.IGNORECASE)
+            # Check if tag contains brackets or parentheses - if so, match literally
+            if '[' in tag or ']' in tag or '(' in tag or ')' in tag:
+                # Literal match for bracketed/parenthesized tags
+                escaped_tag = re.escape(tag)
+                name = re.sub(escaped_tag, '', name, flags=re.IGNORECASE)
+            else:
+                # Word boundary match for simple word tags to avoid partial matches
+                # e.g., "East" won't match the "east" in "Feast"
+                escaped_tag = re.escape(tag)
+                name = re.sub(r'\b' + escaped_tag + r'\b', '', name, flags=re.IGNORECASE)
        
        # Remove callsigns in parentheses
        name = re.sub(r'\([KW][A-Z]{3}(?:-(?:TV|CD|LP|DT|LD))?\)', '', name, flags=re.IGNORECASE)
--- a/Stream-Mapparr/plugin.py
+++ b/Stream-Mapparr/plugin.py
@@ -365,7 +365,7 @@ class Plugin:
        """
        if self.fuzzy_matcher:
            # Use fuzzy matcher's normalization
-            return self.fuzzy_matcher.normalize_name(name, ignore_tags, remove_quality_tags=True, remove_cinemax=remove_cinemax)
+            return self.fuzzy_matcher.normalize_name(name, ignore_tags, remove_cinemax=remove_cinemax)
        
        # Fallback to basic cleaning
        if ignore_tags is None: