From 2414407291f2a339570b18c04fd6632434f3ac66 Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Tue, 11 Nov 2025 01:07:59 +0000
Subject: [PATCH] Fix country prefix handling to not affect other plugins

Changed approach from modifying global GEOGRAPHIC_PATTERNS (which affects
all plugins using fuzzy_matcher.py) to adding a new optional parameter.

Changes:
- Reverted GEOGRAPHIC_PATTERNS to original US-only patterns
- Added new remove_country_prefix parameter to normalize_name() (default: False)
- Updated _clean_channel_name() to use remove_country_prefix=True by default
- Implemented smart prefix detection that avoids removing quality tags (HD, SD, UHD, FHD)
- Added fallback country prefix removal in basic cleaning code
- Updated README to clarify country code prefix handling approach

This ensures backward compatibility for other plugins while enabling
multi-country support for Stream-Mapparr.
---
 README.md                       |  5 +++--
 Stream-Mapparr/fuzzy_matcher.py | 28 ++++++++++++++++++++--------
 Stream-Mapparr/plugin.py        | 23 ++++++++++++++++++-----
 3 files changed, 41 insertions(+), 15 deletions(-)

diff --git a/README.md b/README.md
index 3dc5219..8b2a725 100644
--- a/README.md
+++ b/README.md
@@ -215,9 +215,10 @@ The legacy format is still supported and uses a direct array:
    * Example: `UK_channels.json`
 
 4. **Country Code Prefix Handling**
-   * Stream names may be prefixed with country codes (e.g., `CA: CBC`, `UK BBC One`)
+   * Stream names may be prefixed with country codes (e.g., `CA: CBC`, `UK BBC One`, `USA News`)
    * The plugin automatically removes these prefixes during matching
-   * Supported formats: `CC:`, `CC `, `CCC:`, or `CCC ` (where C = letter)
+   * Supported formats: `CC:` or `CC ` (2-letter codes), `CCC:` or `CCC ` (3-letter codes)
+   * Smart detection avoids removing quality tags like HD, SD, UHD, FHD
 
 ### Tips for Better Matching
 
diff --git a/Stream-Mapparr/fuzzy_matcher.py b/Stream-Mapparr/fuzzy_matcher.py
index 506e14e..ea7ab30 100644
--- a/Stream-Mapparr/fuzzy_matcher.py
+++ b/Stream-Mapparr/fuzzy_matcher.py
@@ -44,14 +44,11 @@ REGIONAL_PATTERNS = [
     r'\s[Ee][Aa][Ss][Tt]',
 ]
 
-# Geographic prefix patterns: US:, USA:, CA:, UK:, etc.
+# Geographic prefix patterns: US:, USA:, etc.
 GEOGRAPHIC_PATTERNS = [
-    # Geographic prefixes at start with colon: "US:", "CA:", "UK:", etc. (any 2-3 letter code followed by colon)
-    r'^[A-Z]{2,3}:\s*',
-    # Geographic prefixes at start with space: "US ", "CA ", "UK ", etc. (any 2-3 letter code followed by space)
-    r'^[A-Z]{2,3}\s+',
-    # Legacy USA pattern for backward compatibility
-    r'\bUSA?:\s',
+    # Geographic prefixes
+    r'\bUSA?:\s',  # "US:" or "USA:"
+    r'\bUS\s',     # "US " at word boundary
 ]
 
 # Miscellaneous patterns: (CX), (Backup), single-letter tags, etc.
@@ -196,7 +193,7 @@ class FuzzyMatcher:
         return callsign
     
     def normalize_name(self, name, user_ignored_tags=None, ignore_quality=True, ignore_regional=True,
-                       ignore_geographic=True, ignore_misc=True, remove_cinemax=False):
+                       ignore_geographic=True, ignore_misc=True, remove_cinemax=False, remove_country_prefix=False):
         """
         Normalize channel or stream name for matching by removing tags, prefixes, and other noise.
 
@@ -208,6 +205,7 @@ class FuzzyMatcher:
             ignore_geographic: If True, remove geographic prefix patterns (e.g., US:, USA)
             ignore_misc: If True, remove miscellaneous patterns (e.g., (CX), (Backup), single-letter tags)
             remove_cinemax: If True, remove "Cinemax" prefix (useful when channel name contains "max")
+            remove_country_prefix: If True, remove country code prefixes (e.g., CA:, UK , DE: ) from start of name
 
         Returns:
             Normalized name
@@ -218,6 +216,20 @@ class FuzzyMatcher:
         # Remove leading parenthetical prefixes like (SP2), (D1), etc.
         name = re.sub(r'^\([^\)]+\)\s*', '', name)
 
+        # Remove country code prefix if requested (e.g., "CA:", "UK ", "USA: ")
+        # This handles multi-country databases where streams may be prefixed with country codes
+        if remove_country_prefix:
+            # Known quality tags that should NOT be removed (to avoid false positives)
+            quality_tags = {'HD', 'SD', 'FD', 'UHD', 'FHD'}
+
+            # Check for 2-3 letter prefix with colon or space at start
+            prefix_match = re.match(r'^([A-Z]{2,3})[:|\s]\s*', name)
+            if prefix_match:
+                prefix = prefix_match.group(1).upper()
+                # Only remove if it's NOT a quality tag
+                if prefix not in quality_tags:
+                    name = name[len(prefix_match.group(0)):]
+
         # Remove "Cinemax" prefix if requested (for channels containing "max")
         if remove_cinemax:
             name = re.sub(r'\bCinemax\b\s*', '', name, flags=re.IGNORECASE)
diff --git a/Stream-Mapparr/plugin.py b/Stream-Mapparr/plugin.py
index 4d3f3e1..89fb149 100644
--- a/Stream-Mapparr/plugin.py
+++ b/Stream-Mapparr/plugin.py
@@ -550,7 +550,7 @@ class Plugin:
         return tags
 
     def _clean_channel_name(self, name, ignore_tags=None, ignore_quality=True, ignore_regional=True,
-                            ignore_geographic=True, ignore_misc=True, remove_cinemax=False):
+                            ignore_geographic=True, ignore_misc=True, remove_cinemax=False, remove_country_prefix=True):
         """
         Remove brackets and their contents from channel name for matching, and remove ignore tags.
         Uses fuzzy matcher's normalization if available, otherwise falls back to basic cleaning.
@@ -563,6 +563,7 @@ class Plugin:
             ignore_geographic: If True, remove geographic prefix patterns (e.g., US:, USA)
             ignore_misc: If True, remove miscellaneous patterns (e.g., (CX), (Backup), single-letter tags)
             remove_cinemax: If True, remove "Cinemax" prefix (for streams when channel contains "max")
+            remove_country_prefix: If True, remove country code prefixes (e.g., CA:, UK ) from start of name
         """
         if self.fuzzy_matcher:
             # Use fuzzy matcher's normalization
@@ -572,22 +573,34 @@ class Plugin:
                 ignore_regional=ignore_regional,
                 ignore_geographic=ignore_geographic,
                 ignore_misc=ignore_misc,
-                remove_cinemax=remove_cinemax
+                remove_cinemax=remove_cinemax,
+                remove_country_prefix=remove_country_prefix
             )
         
         # Fallback to basic cleaning
         if ignore_tags is None:
             ignore_tags = []
-        
+
+        cleaned = name
+
+        # Remove country code prefix if requested
+        if remove_country_prefix:
+            quality_tags = {'HD', 'SD', 'FD', 'UHD', 'FHD'}
+            prefix_match = re.match(r'^([A-Z]{2,3})[:|\s]\s*', cleaned)
+            if prefix_match:
+                prefix = prefix_match.group(1).upper()
+                if prefix not in quality_tags:
+                    cleaned = cleaned[len(prefix_match.group(0)):]
+
         # Remove anything in square brackets or parentheses at the end
-        cleaned = re.sub(r'\s*[\[\(][^\[\]\(\)]*[\]\)]\s*$', '', name)
+        cleaned = re.sub(r'\s*[\[\(][^\[\]\(\)]*[\]\)]\s*$', '', cleaned)
         # Keep removing until no more brackets at the end
         while True:
             new_cleaned = re.sub(r'\s*[\[\(][^\[\]\(\)]*[\]\)]\s*$', '', cleaned)
             if new_cleaned == cleaned:
                 break
             cleaned = new_cleaned
-        
+
         # Remove ignore tags
         for tag in ignore_tags:
             # If tag has brackets/parentheses, match exactly