Fix country prefix handling to not affect other plugins

Changed approach from modifying global GEOGRAPHIC_PATTERNS (which affects
all plugins using fuzzy_matcher.py) to adding a new optional parameter.

Changes:
- Reverted GEOGRAPHIC_PATTERNS to original US-only patterns
- Added new remove_country_prefix parameter to normalize_name() (default: False)
- Updated _clean_channel_name() to use remove_country_prefix=True by default
- Implemented smart prefix detection that avoids removing quality tags (HD, SD, UHD, FHD)
- Added fallback country prefix removal in basic cleaning code
- Updated README to clarify country code prefix handling approach

This ensures backward compatibility for other plugins while enabling
multi-country support for Stream-Mapparr.
This commit is contained in:
Claude
2025-11-11 01:07:59 +00:00
parent f1d7b1472e
commit 2414407291
3 changed files with 41 additions and 15 deletions

View File

@@ -215,9 +215,10 @@ The legacy format is still supported and uses a direct array:
* Example: `UK_channels.json` * Example: `UK_channels.json`
4. **Country Code Prefix Handling** 4. **Country Code Prefix Handling**
* Stream names may be prefixed with country codes (e.g., `CA: CBC`, `UK BBC One`) * Stream names may be prefixed with country codes (e.g., `CA: CBC`, `UK BBC One`, `USA News`)
* The plugin automatically removes these prefixes during matching * The plugin automatically removes these prefixes during matching
* Supported formats: `CC:`, `CC `, `CCC:`, or `CCC ` (where C = letter) * Supported formats: `CC:` or `CC ` (2-letter codes), `CCC:` or `CCC ` (3-letter codes)
* Smart detection avoids removing quality tags like HD, SD, UHD, FHD
### Tips for Better Matching ### Tips for Better Matching

View File

@@ -44,14 +44,11 @@ REGIONAL_PATTERNS = [
r'\s[Ee][Aa][Ss][Tt]', r'\s[Ee][Aa][Ss][Tt]',
] ]
# Geographic prefix patterns: US:, USA:, CA:, UK:, etc. # Geographic prefix patterns: US:, USA:, etc.
GEOGRAPHIC_PATTERNS = [ GEOGRAPHIC_PATTERNS = [
# Geographic prefixes at start with colon: "US:", "CA:", "UK:", etc. (any 2-3 letter code followed by colon) # Geographic prefixes
r'^[A-Z]{2,3}:\s*', r'\bUSA?:\s', # "US:" or "USA:"
# Geographic prefixes at start with space: "US ", "CA ", "UK ", etc. (any 2-3 letter code followed by space) r'\bUS\s', # "US " at word boundary
r'^[A-Z]{2,3}\s+',
# Legacy USA pattern for backward compatibility
r'\bUSA?:\s',
] ]
# Miscellaneous patterns: (CX), (Backup), single-letter tags, etc. # Miscellaneous patterns: (CX), (Backup), single-letter tags, etc.
@@ -196,7 +193,7 @@ class FuzzyMatcher:
return callsign return callsign
def normalize_name(self, name, user_ignored_tags=None, ignore_quality=True, ignore_regional=True, def normalize_name(self, name, user_ignored_tags=None, ignore_quality=True, ignore_regional=True,
ignore_geographic=True, ignore_misc=True, remove_cinemax=False): ignore_geographic=True, ignore_misc=True, remove_cinemax=False, remove_country_prefix=False):
""" """
Normalize channel or stream name for matching by removing tags, prefixes, and other noise. Normalize channel or stream name for matching by removing tags, prefixes, and other noise.
@@ -208,6 +205,7 @@ class FuzzyMatcher:
ignore_geographic: If True, remove geographic prefix patterns (e.g., US:, USA) ignore_geographic: If True, remove geographic prefix patterns (e.g., US:, USA)
ignore_misc: If True, remove miscellaneous patterns (e.g., (CX), (Backup), single-letter tags) ignore_misc: If True, remove miscellaneous patterns (e.g., (CX), (Backup), single-letter tags)
remove_cinemax: If True, remove "Cinemax" prefix (useful when channel name contains "max") remove_cinemax: If True, remove "Cinemax" prefix (useful when channel name contains "max")
remove_country_prefix: If True, remove country code prefixes (e.g., CA:, UK , DE: ) from start of name
Returns: Returns:
Normalized name Normalized name
@@ -218,6 +216,20 @@ class FuzzyMatcher:
# Remove leading parenthetical prefixes like (SP2), (D1), etc. # Remove leading parenthetical prefixes like (SP2), (D1), etc.
name = re.sub(r'^\([^\)]+\)\s*', '', name) name = re.sub(r'^\([^\)]+\)\s*', '', name)
# Remove country code prefix if requested (e.g., "CA:", "UK ", "USA: ")
# This handles multi-country databases where streams may be prefixed with country codes
if remove_country_prefix:
# Known quality tags that should NOT be removed (to avoid false positives)
quality_tags = {'HD', 'SD', 'FD', 'UHD', 'FHD'}
# Check for 2-3 letter prefix with colon or space at start
prefix_match = re.match(r'^([A-Z]{2,3})[:|\s]\s*', name)
if prefix_match:
prefix = prefix_match.group(1).upper()
# Only remove if it's NOT a quality tag
if prefix not in quality_tags:
name = name[len(prefix_match.group(0)):]
# Remove "Cinemax" prefix if requested (for channels containing "max") # Remove "Cinemax" prefix if requested (for channels containing "max")
if remove_cinemax: if remove_cinemax:
name = re.sub(r'\bCinemax\b\s*', '', name, flags=re.IGNORECASE) name = re.sub(r'\bCinemax\b\s*', '', name, flags=re.IGNORECASE)

View File

@@ -550,7 +550,7 @@ class Plugin:
return tags return tags
def _clean_channel_name(self, name, ignore_tags=None, ignore_quality=True, ignore_regional=True, def _clean_channel_name(self, name, ignore_tags=None, ignore_quality=True, ignore_regional=True,
ignore_geographic=True, ignore_misc=True, remove_cinemax=False): ignore_geographic=True, ignore_misc=True, remove_cinemax=False, remove_country_prefix=True):
""" """
Remove brackets and their contents from channel name for matching, and remove ignore tags. Remove brackets and their contents from channel name for matching, and remove ignore tags.
Uses fuzzy matcher's normalization if available, otherwise falls back to basic cleaning. Uses fuzzy matcher's normalization if available, otherwise falls back to basic cleaning.
@@ -563,6 +563,7 @@ class Plugin:
ignore_geographic: If True, remove geographic prefix patterns (e.g., US:, USA) ignore_geographic: If True, remove geographic prefix patterns (e.g., US:, USA)
ignore_misc: If True, remove miscellaneous patterns (e.g., (CX), (Backup), single-letter tags) ignore_misc: If True, remove miscellaneous patterns (e.g., (CX), (Backup), single-letter tags)
remove_cinemax: If True, remove "Cinemax" prefix (for streams when channel contains "max") remove_cinemax: If True, remove "Cinemax" prefix (for streams when channel contains "max")
remove_country_prefix: If True, remove country code prefixes (e.g., CA:, UK ) from start of name
""" """
if self.fuzzy_matcher: if self.fuzzy_matcher:
# Use fuzzy matcher's normalization # Use fuzzy matcher's normalization
@@ -572,22 +573,34 @@ class Plugin:
ignore_regional=ignore_regional, ignore_regional=ignore_regional,
ignore_geographic=ignore_geographic, ignore_geographic=ignore_geographic,
ignore_misc=ignore_misc, ignore_misc=ignore_misc,
remove_cinemax=remove_cinemax remove_cinemax=remove_cinemax,
remove_country_prefix=remove_country_prefix
) )
# Fallback to basic cleaning # Fallback to basic cleaning
if ignore_tags is None: if ignore_tags is None:
ignore_tags = [] ignore_tags = []
cleaned = name
# Remove country code prefix if requested
if remove_country_prefix:
quality_tags = {'HD', 'SD', 'FD', 'UHD', 'FHD'}
prefix_match = re.match(r'^([A-Z]{2,3})[:|\s]\s*', cleaned)
if prefix_match:
prefix = prefix_match.group(1).upper()
if prefix not in quality_tags:
cleaned = cleaned[len(prefix_match.group(0)):]
# Remove anything in square brackets or parentheses at the end # Remove anything in square brackets or parentheses at the end
cleaned = re.sub(r'\s*[\[\(][^\[\]\(\)]*[\]\)]\s*$', '', name) cleaned = re.sub(r'\s*[\[\(][^\[\]\(\)]*[\]\)]\s*$', '', cleaned)
# Keep removing until no more brackets at the end # Keep removing until no more brackets at the end
while True: while True:
new_cleaned = re.sub(r'\s*[\[\(][^\[\]\(\)]*[\]\)]\s*$', '', cleaned) new_cleaned = re.sub(r'\s*[\[\(][^\[\]\(\)]*[\]\)]\s*$', '', cleaned)
if new_cleaned == cleaned: if new_cleaned == cleaned:
break break
cleaned = new_cleaned cleaned = new_cleaned
# Remove ignore tags # Remove ignore tags
for tag in ignore_tags: for tag in ignore_tags:
# If tag has brackets/parentheses, match exactly # If tag has brackets/parentheses, match exactly