Add Cinemax handling for channels containing 'max'

When a channel name contains "max" (case insensitive), the plugin
now removes "Cinemax" from stream names during matching. This allows
channels like "5StarMax" to properly match streams like:
- US: Cinemax 5Starmax
- US 5STARMAX (East) (H)
- US: 5 STARMAX

Changes:
- Add remove_cinemax parameter to normalize_name() in fuzzy_matcher.py
- Add remove_cinemax parameter to _clean_channel_name() in plugin.py
- Detect if channel name contains "max" in _match_streams_to_channel()
- Pass remove_cinemax=True to all stream name cleaning when applicable
- Update fuzzy_match() and find_best_match() to support Cinemax removal

This improves matching accuracy for Cinemax channels that include
the network name in stream names but not in channel names.
This commit is contained in:
Claude
2025-11-06 13:26:15 +00:00
parent 03b8bfc31e
commit 05860d3a2f
2 changed files with 83 additions and 62 deletions

View File

@@ -158,7 +158,7 @@ class FuzzyMatcher:
callsign = re.sub(r'-(?:TV|CD|LP|DT|LD)$', '', callsign) callsign = re.sub(r'-(?:TV|CD|LP|DT|LD)$', '', callsign)
return callsign return callsign
def normalize_name(self, name, user_ignored_tags=None, remove_quality_tags=True): def normalize_name(self, name, user_ignored_tags=None, remove_quality_tags=True, remove_cinemax=False):
""" """
Normalize channel or stream name for matching by removing tags, prefixes, and other noise. Normalize channel or stream name for matching by removing tags, prefixes, and other noise.
@@ -166,6 +166,7 @@ class FuzzyMatcher:
name: Name to normalize name: Name to normalize
user_ignored_tags: Additional user-configured tags to ignore (list of strings) user_ignored_tags: Additional user-configured tags to ignore (list of strings)
remove_quality_tags: If True, remove hardcoded quality patterns (for matching only, not display) remove_quality_tags: If True, remove hardcoded quality patterns (for matching only, not display)
remove_cinemax: If True, remove "Cinemax" prefix (useful when channel name contains "max")
Returns: Returns:
Normalized name Normalized name
@@ -176,6 +177,10 @@ class FuzzyMatcher:
# Remove leading parenthetical prefixes like (SP2), (D1), etc. # Remove leading parenthetical prefixes like (SP2), (D1), etc.
name = re.sub(r'^\([^\)]+\)\s*', '', name) name = re.sub(r'^\([^\)]+\)\s*', '', name)
# Remove "Cinemax" prefix if requested (for channels containing "max")
if remove_cinemax:
name = re.sub(r'\bCinemax\s+', '', name, flags=re.IGNORECASE)
# Apply hardcoded ignore patterns only if remove_quality_tags is True # Apply hardcoded ignore patterns only if remove_quality_tags is True
if remove_quality_tags: if remove_quality_tags:
for pattern in HARDCODED_IGNORE_PATTERNS: for pattern in HARDCODED_IGNORE_PATTERNS:
@@ -315,7 +320,7 @@ class FuzzyMatcher:
tokens = sorted([token for token in cleaned_s.split() if token]) tokens = sorted([token for token in cleaned_s.split() if token])
return " ".join(tokens) return " ".join(tokens)
def find_best_match(self, query_name, candidate_names, user_ignored_tags=None): def find_best_match(self, query_name, candidate_names, user_ignored_tags=None, remove_cinemax=False):
""" """
Find the best fuzzy match for a name among a list of candidate names. Find the best fuzzy match for a name among a list of candidate names.
@@ -323,6 +328,7 @@ class FuzzyMatcher:
query_name: Name to match query_name: Name to match
candidate_names: List of candidate names to match against candidate_names: List of candidate names to match against
user_ignored_tags: User-configured tags to ignore user_ignored_tags: User-configured tags to ignore
remove_cinemax: If True, remove "Cinemax" from candidate names
Returns: Returns:
Tuple of (matched_name, score) or (None, 0) if no match found Tuple of (matched_name, score) or (None, 0) if no match found
@@ -333,7 +339,7 @@ class FuzzyMatcher:
if user_ignored_tags is None: if user_ignored_tags is None:
user_ignored_tags = [] user_ignored_tags = []
# Normalize the query # Normalize the query (channel name - don't remove Cinemax from it)
normalized_query = self.normalize_name(query_name, user_ignored_tags) normalized_query = self.normalize_name(query_name, user_ignored_tags)
if not normalized_query: if not normalized_query:
@@ -346,7 +352,9 @@ class FuzzyMatcher:
best_match = None best_match = None
for candidate in candidate_names: for candidate in candidate_names:
processed_candidate = self.process_string_for_matching(candidate) # Normalize candidate (stream name) with Cinemax removal if requested
candidate_normalized = self.normalize_name(candidate, user_ignored_tags, remove_cinemax=remove_cinemax)
processed_candidate = self.process_string_for_matching(candidate_normalized)
score = self.calculate_similarity(processed_query, processed_candidate) score = self.calculate_similarity(processed_query, processed_candidate)
if score > best_score: if score > best_score:
@@ -361,15 +369,16 @@ class FuzzyMatcher:
return None, 0 return None, 0
def fuzzy_match(self, query_name, candidate_names, user_ignored_tags=None): def fuzzy_match(self, query_name, candidate_names, user_ignored_tags=None, remove_cinemax=False):
""" """
Generic fuzzy matching function that can match any name against a list of candidates. Generic fuzzy matching function that can match any name against a list of candidates.
This is the main entry point for fuzzy matching. This is the main entry point for fuzzy matching.
Args: Args:
query_name: Name to match query_name: Name to match (channel name)
candidate_names: List of candidate names to match against candidate_names: List of candidate names to match against (stream names)
user_ignored_tags: User-configured tags to ignore user_ignored_tags: User-configured tags to ignore
remove_cinemax: If True, remove "Cinemax" from candidate names (for channels with "max")
Returns: Returns:
Tuple of (matched_name, score, match_type) or (None, 0, None) if no match found Tuple of (matched_name, score, match_type) or (None, 0, None) if no match found
@@ -380,7 +389,7 @@ class FuzzyMatcher:
if user_ignored_tags is None: if user_ignored_tags is None:
user_ignored_tags = [] user_ignored_tags = []
# Normalize for matching # Normalize query (channel name - don't remove Cinemax from it)
normalized_query = self.normalize_name(query_name, user_ignored_tags) normalized_query = self.normalize_name(query_name, user_ignored_tags)
if not normalized_query: if not normalized_query:
@@ -395,7 +404,8 @@ class FuzzyMatcher:
normalized_query_nospace = re.sub(r'[\s&\-]+', '', normalized_query_lower) normalized_query_nospace = re.sub(r'[\s&\-]+', '', normalized_query_lower)
for candidate in candidate_names: for candidate in candidate_names:
candidate_normalized = self.normalize_name(candidate, user_ignored_tags) # Normalize candidate (stream name) with Cinemax removal if requested
candidate_normalized = self.normalize_name(candidate, user_ignored_tags, remove_cinemax=remove_cinemax)
candidate_lower = candidate_normalized.lower() candidate_lower = candidate_normalized.lower()
candidate_nospace = re.sub(r'[\s&\-]+', '', candidate_lower) candidate_nospace = re.sub(r'[\s&\-]+', '', candidate_lower)
@@ -415,7 +425,8 @@ class FuzzyMatcher:
# Stage 2: Substring matching # Stage 2: Substring matching
for candidate in candidate_names: for candidate in candidate_names:
candidate_normalized = self.normalize_name(candidate, user_ignored_tags) # Normalize candidate (stream name) with Cinemax removal if requested
candidate_normalized = self.normalize_name(candidate, user_ignored_tags, remove_cinemax=remove_cinemax)
candidate_lower = candidate_normalized.lower() candidate_lower = candidate_normalized.lower()
# Check if one is a substring of the other # Check if one is a substring of the other
@@ -431,7 +442,7 @@ class FuzzyMatcher:
return best_match, int(best_ratio * 100), match_type return best_match, int(best_ratio * 100), match_type
# Stage 3: Fuzzy matching with token sorting # Stage 3: Fuzzy matching with token sorting
fuzzy_match, score = self.find_best_match(query_name, candidate_names, user_ignored_tags) fuzzy_match, score = self.find_best_match(query_name, candidate_names, user_ignored_tags, remove_cinemax=remove_cinemax)
if fuzzy_match: if fuzzy_match:
return fuzzy_match, score, f"fuzzy ({score})" return fuzzy_match, score, f"fuzzy ({score})"

View File

@@ -351,14 +351,19 @@ class Plugin:
logger.warning(f"[Stream-Mapparr] Could not trigger frontend refresh: {e}") logger.warning(f"[Stream-Mapparr] Could not trigger frontend refresh: {e}")
return False return False
def _clean_channel_name(self, name, ignore_tags=None): def _clean_channel_name(self, name, ignore_tags=None, remove_cinemax=False):
""" """
Remove brackets and their contents from channel name for matching, and remove ignore tags. Remove brackets and their contents from channel name for matching, and remove ignore tags.
Uses fuzzy matcher's normalization if available, otherwise falls back to basic cleaning. Uses fuzzy matcher's normalization if available, otherwise falls back to basic cleaning.
Args:
name: Channel or stream name to clean
ignore_tags: List of tags to ignore
remove_cinemax: If True, remove "Cinemax" prefix (for streams when channel contains "max")
""" """
if self.fuzzy_matcher: if self.fuzzy_matcher:
# Use fuzzy matcher's normalization # Use fuzzy matcher's normalization
return self.fuzzy_matcher.normalize_name(name, ignore_tags, remove_quality_tags=True) return self.fuzzy_matcher.normalize_name(name, ignore_tags, remove_quality_tags=True, remove_cinemax=remove_cinemax)
# Fallback to basic cleaning # Fallback to basic cleaning
if ignore_tags is None: if ignore_tags is None:
@@ -494,6 +499,9 @@ class Plugin:
# Get channel info from JSON # Get channel info from JSON
channel_info = self._get_channel_info_from_json(channel_name, channels_data, logger) channel_info = self._get_channel_info_from_json(channel_name, channels_data, logger)
# Check if channel name contains "max" (case insensitive) - used for Cinemax handling
channel_has_max = 'max' in channel_name.lower()
cleaned_channel_name = self._clean_channel_name(channel_name, ignore_tags) cleaned_channel_name = self._clean_channel_name(channel_name, ignore_tags)
if "24/7" in channel_name.lower(): if "24/7" in channel_name.lower():
@@ -521,7 +529,7 @@ class Plugin:
sorted_streams = self._sort_streams_by_quality(matching_streams) sorted_streams = self._sort_streams_by_quality(matching_streams)
logger.info(f"[Stream-Mapparr] Sorted {len(sorted_streams)} streams by quality (callsign matching)") logger.info(f"[Stream-Mapparr] Sorted {len(sorted_streams)} streams by quality (callsign matching)")
cleaned_stream_names = [self._clean_channel_name(s['name'], ignore_tags) for s in sorted_streams] cleaned_stream_names = [self._clean_channel_name(s['name'], ignore_tags, remove_cinemax=channel_has_max) for s in sorted_streams]
match_reason = "Callsign match" match_reason = "Callsign match"
return sorted_streams, cleaned_channel_name, cleaned_stream_names, match_reason return sorted_streams, cleaned_channel_name, cleaned_stream_names, match_reason
@@ -537,19 +545,21 @@ class Plugin:
stream_names = [stream['name'] for stream in all_streams] stream_names = [stream['name'] for stream in all_streams]
# Use fuzzy matcher to find best match # Use fuzzy matcher to find best match
# Pass remove_cinemax flag if channel contains "max"
matched_stream_name, score, match_type = self.fuzzy_matcher.fuzzy_match( matched_stream_name, score, match_type = self.fuzzy_matcher.fuzzy_match(
channel_name, channel_name,
stream_names, stream_names,
ignore_tags ignore_tags,
remove_cinemax=channel_has_max
) )
if matched_stream_name: if matched_stream_name:
# Find all streams that match this name (different qualities) # Find all streams that match this name (different qualities)
matching_streams = [] matching_streams = []
cleaned_matched = self._clean_channel_name(matched_stream_name, ignore_tags) cleaned_matched = self._clean_channel_name(matched_stream_name, ignore_tags, remove_cinemax=channel_has_max)
for stream in all_streams: for stream in all_streams:
cleaned_stream = self._clean_channel_name(stream['name'], ignore_tags) cleaned_stream = self._clean_channel_name(stream['name'], ignore_tags, remove_cinemax=channel_has_max)
if cleaned_stream.lower() == cleaned_matched.lower(): if cleaned_stream.lower() == cleaned_matched.lower():
matching_streams.append(stream) matching_streams.append(stream)
@@ -558,7 +568,7 @@ class Plugin:
sorted_streams = self._sort_streams_by_quality(matching_streams) sorted_streams = self._sort_streams_by_quality(matching_streams)
logger.info(f"[Stream-Mapparr] Found {len(sorted_streams)} streams via fuzzy match (score: {score}, type: {match_type})") logger.info(f"[Stream-Mapparr] Found {len(sorted_streams)} streams via fuzzy match (score: {score}, type: {match_type})")
cleaned_stream_names = [self._clean_channel_name(s['name'], ignore_tags) for s in sorted_streams] cleaned_stream_names = [self._clean_channel_name(s['name'], ignore_tags, remove_cinemax=channel_has_max) for s in sorted_streams]
match_reason = f"Fuzzy match ({match_type}, score: {score})" match_reason = f"Fuzzy match ({match_type}, score: {score})"
return sorted_streams, cleaned_channel_name, cleaned_stream_names, match_reason return sorted_streams, cleaned_channel_name, cleaned_stream_names, match_reason
@@ -582,7 +592,7 @@ class Plugin:
# Look for streams that match this channel name exactly # Look for streams that match this channel name exactly
for stream in all_streams: for stream in all_streams:
cleaned_stream_name = self._clean_channel_name(stream['name'], ignore_tags) cleaned_stream_name = self._clean_channel_name(stream['name'], ignore_tags, remove_cinemax=channel_has_max)
if cleaned_stream_name.lower() == cleaned_channel_name.lower(): if cleaned_stream_name.lower() == cleaned_channel_name.lower():
matching_streams.append(stream) matching_streams.append(stream)
@@ -591,14 +601,14 @@ class Plugin:
sorted_streams = self._sort_streams_by_quality(matching_streams) sorted_streams = self._sort_streams_by_quality(matching_streams)
logger.info(f"[Stream-Mapparr] Found {len(sorted_streams)} streams matching exact channel name") logger.info(f"[Stream-Mapparr] Found {len(sorted_streams)} streams matching exact channel name")
cleaned_stream_names = [self._clean_channel_name(s['name'], ignore_tags) for s in sorted_streams] cleaned_stream_names = [self._clean_channel_name(s['name'], ignore_tags, remove_cinemax=channel_has_max) for s in sorted_streams]
match_reason = "Exact match (channels.json)" match_reason = "Exact match (channels.json)"
return sorted_streams, cleaned_channel_name, cleaned_stream_names, match_reason return sorted_streams, cleaned_channel_name, cleaned_stream_names, match_reason
# Fallback to basic substring matching # Fallback to basic substring matching
for stream in all_streams: for stream in all_streams:
cleaned_stream_name = self._clean_channel_name(stream['name'], ignore_tags) cleaned_stream_name = self._clean_channel_name(stream['name'], ignore_tags, remove_cinemax=channel_has_max)
# Simple case-insensitive substring matching # Simple case-insensitive substring matching
if cleaned_channel_name.lower() in cleaned_stream_name.lower() or cleaned_stream_name.lower() in cleaned_channel_name.lower(): if cleaned_channel_name.lower() in cleaned_stream_name.lower() or cleaned_stream_name.lower() in cleaned_channel_name.lower():
@@ -608,7 +618,7 @@ class Plugin:
sorted_streams = self._sort_streams_by_quality(matching_streams) sorted_streams = self._sort_streams_by_quality(matching_streams)
logger.info(f"[Stream-Mapparr] Found {len(sorted_streams)} streams matching via basic substring match") logger.info(f"[Stream-Mapparr] Found {len(sorted_streams)} streams matching via basic substring match")
cleaned_stream_names = [self._clean_channel_name(s['name'], ignore_tags) for s in sorted_streams] cleaned_stream_names = [self._clean_channel_name(s['name'], ignore_tags, remove_cinemax=channel_has_max) for s in sorted_streams]
match_reason = "Basic substring match" match_reason = "Basic substring match"
return sorted_streams, cleaned_channel_name, cleaned_stream_names, match_reason return sorted_streams, cleaned_channel_name, cleaned_stream_names, match_reason