From 2414407291f2a339570b18c04fd6632434f3ac66 Mon Sep 17 00:00:00 2001 From: Claude Date: Tue, 11 Nov 2025 01:07:59 +0000 Subject: [PATCH] Fix country prefix handling to not affect other plugins Changed approach from modifying global GEOGRAPHIC_PATTERNS (which affects all plugins using fuzzy_matcher.py) to adding a new optional parameter. Changes: - Reverted GEOGRAPHIC_PATTERNS to original US-only patterns - Added new remove_country_prefix parameter to normalize_name() (default: False) - Updated _clean_channel_name() to use remove_country_prefix=True by default - Implemented smart prefix detection that avoids removing quality tags (HD, SD, UHD, FHD) - Added fallback country prefix removal in basic cleaning code - Updated README to clarify country code prefix handling approach This ensures backward compatibility for other plugins while enabling multi-country support for Stream-Mapparr. --- README.md | 5 +++-- Stream-Mapparr/fuzzy_matcher.py | 28 ++++++++++++++++++++-------- Stream-Mapparr/plugin.py | 23 ++++++++++++++++++----- 3 files changed, 41 insertions(+), 15 deletions(-) diff --git a/README.md b/README.md index 3dc5219..8b2a725 100644 --- a/README.md +++ b/README.md @@ -215,9 +215,10 @@ The legacy format is still supported and uses a direct array: * Example: `UK_channels.json` 4. **Country Code Prefix Handling** - * Stream names may be prefixed with country codes (e.g., `CA: CBC`, `UK BBC One`) + * Stream names may be prefixed with country codes (e.g., `CA: CBC`, `UK BBC One`, `USA News`) * The plugin automatically removes these prefixes during matching - * Supported formats: `CC:`, `CC `, `CCC:`, or `CCC ` (where C = letter) + * Supported formats: `CC:` or `CC ` (2-letter codes), `CCC:` or `CCC ` (3-letter codes) + * Smart detection avoids removing quality tags like HD, SD, UHD, FHD ### Tips for Better Matching diff --git a/Stream-Mapparr/fuzzy_matcher.py b/Stream-Mapparr/fuzzy_matcher.py index 506e14e..ea7ab30 100644 --- a/Stream-Mapparr/fuzzy_matcher.py +++ b/Stream-Mapparr/fuzzy_matcher.py @@ -44,14 +44,11 @@ REGIONAL_PATTERNS = [ r'\s[Ee][Aa][Ss][Tt]', ] -# Geographic prefix patterns: US:, USA:, CA:, UK:, etc. +# Geographic prefix patterns: US:, USA:, etc. GEOGRAPHIC_PATTERNS = [ - # Geographic prefixes at start with colon: "US:", "CA:", "UK:", etc. (any 2-3 letter code followed by colon) - r'^[A-Z]{2,3}:\s*', - # Geographic prefixes at start with space: "US ", "CA ", "UK ", etc. (any 2-3 letter code followed by space) - r'^[A-Z]{2,3}\s+', - # Legacy USA pattern for backward compatibility - r'\bUSA?:\s', + # Geographic prefixes + r'\bUSA?:\s', # "US:" or "USA:" + r'\bUS\s', # "US " at word boundary ] # Miscellaneous patterns: (CX), (Backup), single-letter tags, etc. @@ -196,7 +193,7 @@ class FuzzyMatcher: return callsign def normalize_name(self, name, user_ignored_tags=None, ignore_quality=True, ignore_regional=True, - ignore_geographic=True, ignore_misc=True, remove_cinemax=False): + ignore_geographic=True, ignore_misc=True, remove_cinemax=False, remove_country_prefix=False): """ Normalize channel or stream name for matching by removing tags, prefixes, and other noise. @@ -208,6 +205,7 @@ class FuzzyMatcher: ignore_geographic: If True, remove geographic prefix patterns (e.g., US:, USA) ignore_misc: If True, remove miscellaneous patterns (e.g., (CX), (Backup), single-letter tags) remove_cinemax: If True, remove "Cinemax" prefix (useful when channel name contains "max") + remove_country_prefix: If True, remove country code prefixes (e.g., CA:, UK , DE: ) from start of name Returns: Normalized name @@ -218,6 +216,20 @@ class FuzzyMatcher: # Remove leading parenthetical prefixes like (SP2), (D1), etc. name = re.sub(r'^\([^\)]+\)\s*', '', name) + # Remove country code prefix if requested (e.g., "CA:", "UK ", "USA: ") + # This handles multi-country databases where streams may be prefixed with country codes + if remove_country_prefix: + # Known quality tags that should NOT be removed (to avoid false positives) + quality_tags = {'HD', 'SD', 'FD', 'UHD', 'FHD'} + + # Check for 2-3 letter prefix with colon or space at start + prefix_match = re.match(r'^([A-Z]{2,3})[:|\s]\s*', name) + if prefix_match: + prefix = prefix_match.group(1).upper() + # Only remove if it's NOT a quality tag + if prefix not in quality_tags: + name = name[len(prefix_match.group(0)):] + # Remove "Cinemax" prefix if requested (for channels containing "max") if remove_cinemax: name = re.sub(r'\bCinemax\b\s*', '', name, flags=re.IGNORECASE) diff --git a/Stream-Mapparr/plugin.py b/Stream-Mapparr/plugin.py index 4d3f3e1..89fb149 100644 --- a/Stream-Mapparr/plugin.py +++ b/Stream-Mapparr/plugin.py @@ -550,7 +550,7 @@ class Plugin: return tags def _clean_channel_name(self, name, ignore_tags=None, ignore_quality=True, ignore_regional=True, - ignore_geographic=True, ignore_misc=True, remove_cinemax=False): + ignore_geographic=True, ignore_misc=True, remove_cinemax=False, remove_country_prefix=True): """ Remove brackets and their contents from channel name for matching, and remove ignore tags. Uses fuzzy matcher's normalization if available, otherwise falls back to basic cleaning. @@ -563,6 +563,7 @@ class Plugin: ignore_geographic: If True, remove geographic prefix patterns (e.g., US:, USA) ignore_misc: If True, remove miscellaneous patterns (e.g., (CX), (Backup), single-letter tags) remove_cinemax: If True, remove "Cinemax" prefix (for streams when channel contains "max") + remove_country_prefix: If True, remove country code prefixes (e.g., CA:, UK ) from start of name """ if self.fuzzy_matcher: # Use fuzzy matcher's normalization @@ -572,22 +573,34 @@ class Plugin: ignore_regional=ignore_regional, ignore_geographic=ignore_geographic, ignore_misc=ignore_misc, - remove_cinemax=remove_cinemax + remove_cinemax=remove_cinemax, + remove_country_prefix=remove_country_prefix ) # Fallback to basic cleaning if ignore_tags is None: ignore_tags = [] - + + cleaned = name + + # Remove country code prefix if requested + if remove_country_prefix: + quality_tags = {'HD', 'SD', 'FD', 'UHD', 'FHD'} + prefix_match = re.match(r'^([A-Z]{2,3})[:|\s]\s*', cleaned) + if prefix_match: + prefix = prefix_match.group(1).upper() + if prefix not in quality_tags: + cleaned = cleaned[len(prefix_match.group(0)):] + # Remove anything in square brackets or parentheses at the end - cleaned = re.sub(r'\s*[\[\(][^\[\]\(\)]*[\]\)]\s*$', '', name) + cleaned = re.sub(r'\s*[\[\(][^\[\]\(\)]*[\]\)]\s*$', '', cleaned) # Keep removing until no more brackets at the end while True: new_cleaned = re.sub(r'\s*[\[\(][^\[\]\(\)]*[\]\)]\s*$', '', cleaned) if new_cleaned == cleaned: break cleaned = new_cleaned - + # Remove ignore tags for tag in ignore_tags: # If tag has brackets/parentheses, match exactly