Fix country prefix handling to not affect other plugins
Changed approach from modifying global GEOGRAPHIC_PATTERNS (which affects all plugins using fuzzy_matcher.py) to adding a new optional parameter. Changes: - Reverted GEOGRAPHIC_PATTERNS to original US-only patterns - Added new remove_country_prefix parameter to normalize_name() (default: False) - Updated _clean_channel_name() to use remove_country_prefix=True by default - Implemented smart prefix detection that avoids removing quality tags (HD, SD, UHD, FHD) - Added fallback country prefix removal in basic cleaning code - Updated README to clarify country code prefix handling approach This ensures backward compatibility for other plugins while enabling multi-country support for Stream-Mapparr.
This commit is contained in:
@@ -215,9 +215,10 @@ The legacy format is still supported and uses a direct array:
|
|||||||
* Example: `UK_channels.json`
|
* Example: `UK_channels.json`
|
||||||
|
|
||||||
4. **Country Code Prefix Handling**
|
4. **Country Code Prefix Handling**
|
||||||
* Stream names may be prefixed with country codes (e.g., `CA: CBC`, `UK BBC One`)
|
* Stream names may be prefixed with country codes (e.g., `CA: CBC`, `UK BBC One`, `USA News`)
|
||||||
* The plugin automatically removes these prefixes during matching
|
* The plugin automatically removes these prefixes during matching
|
||||||
* Supported formats: `CC:`, `CC `, `CCC:`, or `CCC ` (where C = letter)
|
* Supported formats: `CC:` or `CC ` (2-letter codes), `CCC:` or `CCC ` (3-letter codes)
|
||||||
|
* Smart detection avoids removing quality tags like HD, SD, UHD, FHD
|
||||||
|
|
||||||
### Tips for Better Matching
|
### Tips for Better Matching
|
||||||
|
|
||||||
|
|||||||
@@ -44,14 +44,11 @@ REGIONAL_PATTERNS = [
|
|||||||
r'\s[Ee][Aa][Ss][Tt]',
|
r'\s[Ee][Aa][Ss][Tt]',
|
||||||
]
|
]
|
||||||
|
|
||||||
# Geographic prefix patterns: US:, USA:, CA:, UK:, etc.
|
# Geographic prefix patterns: US:, USA:, etc.
|
||||||
GEOGRAPHIC_PATTERNS = [
|
GEOGRAPHIC_PATTERNS = [
|
||||||
# Geographic prefixes at start with colon: "US:", "CA:", "UK:", etc. (any 2-3 letter code followed by colon)
|
# Geographic prefixes
|
||||||
r'^[A-Z]{2,3}:\s*',
|
r'\bUSA?:\s', # "US:" or "USA:"
|
||||||
# Geographic prefixes at start with space: "US ", "CA ", "UK ", etc. (any 2-3 letter code followed by space)
|
r'\bUS\s', # "US " at word boundary
|
||||||
r'^[A-Z]{2,3}\s+',
|
|
||||||
# Legacy USA pattern for backward compatibility
|
|
||||||
r'\bUSA?:\s',
|
|
||||||
]
|
]
|
||||||
|
|
||||||
# Miscellaneous patterns: (CX), (Backup), single-letter tags, etc.
|
# Miscellaneous patterns: (CX), (Backup), single-letter tags, etc.
|
||||||
@@ -196,7 +193,7 @@ class FuzzyMatcher:
|
|||||||
return callsign
|
return callsign
|
||||||
|
|
||||||
def normalize_name(self, name, user_ignored_tags=None, ignore_quality=True, ignore_regional=True,
|
def normalize_name(self, name, user_ignored_tags=None, ignore_quality=True, ignore_regional=True,
|
||||||
ignore_geographic=True, ignore_misc=True, remove_cinemax=False):
|
ignore_geographic=True, ignore_misc=True, remove_cinemax=False, remove_country_prefix=False):
|
||||||
"""
|
"""
|
||||||
Normalize channel or stream name for matching by removing tags, prefixes, and other noise.
|
Normalize channel or stream name for matching by removing tags, prefixes, and other noise.
|
||||||
|
|
||||||
@@ -208,6 +205,7 @@ class FuzzyMatcher:
|
|||||||
ignore_geographic: If True, remove geographic prefix patterns (e.g., US:, USA)
|
ignore_geographic: If True, remove geographic prefix patterns (e.g., US:, USA)
|
||||||
ignore_misc: If True, remove miscellaneous patterns (e.g., (CX), (Backup), single-letter tags)
|
ignore_misc: If True, remove miscellaneous patterns (e.g., (CX), (Backup), single-letter tags)
|
||||||
remove_cinemax: If True, remove "Cinemax" prefix (useful when channel name contains "max")
|
remove_cinemax: If True, remove "Cinemax" prefix (useful when channel name contains "max")
|
||||||
|
remove_country_prefix: If True, remove country code prefixes (e.g., CA:, UK , DE: ) from start of name
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
Normalized name
|
Normalized name
|
||||||
@@ -218,6 +216,20 @@ class FuzzyMatcher:
|
|||||||
# Remove leading parenthetical prefixes like (SP2), (D1), etc.
|
# Remove leading parenthetical prefixes like (SP2), (D1), etc.
|
||||||
name = re.sub(r'^\([^\)]+\)\s*', '', name)
|
name = re.sub(r'^\([^\)]+\)\s*', '', name)
|
||||||
|
|
||||||
|
# Remove country code prefix if requested (e.g., "CA:", "UK ", "USA: ")
|
||||||
|
# This handles multi-country databases where streams may be prefixed with country codes
|
||||||
|
if remove_country_prefix:
|
||||||
|
# Known quality tags that should NOT be removed (to avoid false positives)
|
||||||
|
quality_tags = {'HD', 'SD', 'FD', 'UHD', 'FHD'}
|
||||||
|
|
||||||
|
# Check for 2-3 letter prefix with colon or space at start
|
||||||
|
prefix_match = re.match(r'^([A-Z]{2,3})[:|\s]\s*', name)
|
||||||
|
if prefix_match:
|
||||||
|
prefix = prefix_match.group(1).upper()
|
||||||
|
# Only remove if it's NOT a quality tag
|
||||||
|
if prefix not in quality_tags:
|
||||||
|
name = name[len(prefix_match.group(0)):]
|
||||||
|
|
||||||
# Remove "Cinemax" prefix if requested (for channels containing "max")
|
# Remove "Cinemax" prefix if requested (for channels containing "max")
|
||||||
if remove_cinemax:
|
if remove_cinemax:
|
||||||
name = re.sub(r'\bCinemax\b\s*', '', name, flags=re.IGNORECASE)
|
name = re.sub(r'\bCinemax\b\s*', '', name, flags=re.IGNORECASE)
|
||||||
|
|||||||
@@ -550,7 +550,7 @@ class Plugin:
|
|||||||
return tags
|
return tags
|
||||||
|
|
||||||
def _clean_channel_name(self, name, ignore_tags=None, ignore_quality=True, ignore_regional=True,
|
def _clean_channel_name(self, name, ignore_tags=None, ignore_quality=True, ignore_regional=True,
|
||||||
ignore_geographic=True, ignore_misc=True, remove_cinemax=False):
|
ignore_geographic=True, ignore_misc=True, remove_cinemax=False, remove_country_prefix=True):
|
||||||
"""
|
"""
|
||||||
Remove brackets and their contents from channel name for matching, and remove ignore tags.
|
Remove brackets and their contents from channel name for matching, and remove ignore tags.
|
||||||
Uses fuzzy matcher's normalization if available, otherwise falls back to basic cleaning.
|
Uses fuzzy matcher's normalization if available, otherwise falls back to basic cleaning.
|
||||||
@@ -563,6 +563,7 @@ class Plugin:
|
|||||||
ignore_geographic: If True, remove geographic prefix patterns (e.g., US:, USA)
|
ignore_geographic: If True, remove geographic prefix patterns (e.g., US:, USA)
|
||||||
ignore_misc: If True, remove miscellaneous patterns (e.g., (CX), (Backup), single-letter tags)
|
ignore_misc: If True, remove miscellaneous patterns (e.g., (CX), (Backup), single-letter tags)
|
||||||
remove_cinemax: If True, remove "Cinemax" prefix (for streams when channel contains "max")
|
remove_cinemax: If True, remove "Cinemax" prefix (for streams when channel contains "max")
|
||||||
|
remove_country_prefix: If True, remove country code prefixes (e.g., CA:, UK ) from start of name
|
||||||
"""
|
"""
|
||||||
if self.fuzzy_matcher:
|
if self.fuzzy_matcher:
|
||||||
# Use fuzzy matcher's normalization
|
# Use fuzzy matcher's normalization
|
||||||
@@ -572,15 +573,27 @@ class Plugin:
|
|||||||
ignore_regional=ignore_regional,
|
ignore_regional=ignore_regional,
|
||||||
ignore_geographic=ignore_geographic,
|
ignore_geographic=ignore_geographic,
|
||||||
ignore_misc=ignore_misc,
|
ignore_misc=ignore_misc,
|
||||||
remove_cinemax=remove_cinemax
|
remove_cinemax=remove_cinemax,
|
||||||
|
remove_country_prefix=remove_country_prefix
|
||||||
)
|
)
|
||||||
|
|
||||||
# Fallback to basic cleaning
|
# Fallback to basic cleaning
|
||||||
if ignore_tags is None:
|
if ignore_tags is None:
|
||||||
ignore_tags = []
|
ignore_tags = []
|
||||||
|
|
||||||
|
cleaned = name
|
||||||
|
|
||||||
|
# Remove country code prefix if requested
|
||||||
|
if remove_country_prefix:
|
||||||
|
quality_tags = {'HD', 'SD', 'FD', 'UHD', 'FHD'}
|
||||||
|
prefix_match = re.match(r'^([A-Z]{2,3})[:|\s]\s*', cleaned)
|
||||||
|
if prefix_match:
|
||||||
|
prefix = prefix_match.group(1).upper()
|
||||||
|
if prefix not in quality_tags:
|
||||||
|
cleaned = cleaned[len(prefix_match.group(0)):]
|
||||||
|
|
||||||
# Remove anything in square brackets or parentheses at the end
|
# Remove anything in square brackets or parentheses at the end
|
||||||
cleaned = re.sub(r'\s*[\[\(][^\[\]\(\)]*[\]\)]\s*$', '', name)
|
cleaned = re.sub(r'\s*[\[\(][^\[\]\(\)]*[\]\)]\s*$', '', cleaned)
|
||||||
# Keep removing until no more brackets at the end
|
# Keep removing until no more brackets at the end
|
||||||
while True:
|
while True:
|
||||||
new_cleaned = re.sub(r'\s*[\[\(][^\[\]\(\)]*[\]\)]\s*$', '', cleaned)
|
new_cleaned = re.sub(r'\s*[\[\(][^\[\]\(\)]*[\]\)]\s*$', '', cleaned)
|
||||||
|
|||||||
Reference in New Issue
Block a user