Add Cinemax handling for channels containing 'max'

When a channel name contains "max" (case insensitive), the plugin
now removes "Cinemax" from stream names during matching. This allows
channels like "5StarMax" to properly match streams like:
- US: Cinemax 5Starmax
- US 5STARMAX (East) (H)
- US: 5 STARMAX

Changes:
- Add remove_cinemax parameter to normalize_name() in fuzzy_matcher.py
- Add remove_cinemax parameter to _clean_channel_name() in plugin.py
- Detect if channel name contains "max" in _match_streams_to_channel()
- Pass remove_cinemax=True to all stream name cleaning when applicable
- Update fuzzy_match() and find_best_match() to support Cinemax removal

This improves matching accuracy for Cinemax channels that include
the network name in stream names but not in channel names.
This commit is contained in:
Claude
2025-11-06 13:26:15 +00:00
parent 03b8bfc31e
commit 05860d3a2f
2 changed files with 83 additions and 62 deletions

View File

@@ -158,29 +158,34 @@ class FuzzyMatcher:
callsign = re.sub(r'-(?:TV|CD|LP|DT|LD)$', '', callsign) callsign = re.sub(r'-(?:TV|CD|LP|DT|LD)$', '', callsign)
return callsign return callsign
def normalize_name(self, name, user_ignored_tags=None, remove_quality_tags=True): def normalize_name(self, name, user_ignored_tags=None, remove_quality_tags=True, remove_cinemax=False):
""" """
Normalize channel or stream name for matching by removing tags, prefixes, and other noise. Normalize channel or stream name for matching by removing tags, prefixes, and other noise.
Args: Args:
name: Name to normalize name: Name to normalize
user_ignored_tags: Additional user-configured tags to ignore (list of strings) user_ignored_tags: Additional user-configured tags to ignore (list of strings)
remove_quality_tags: If True, remove hardcoded quality patterns (for matching only, not display) remove_quality_tags: If True, remove hardcoded quality patterns (for matching only, not display)
remove_cinemax: If True, remove "Cinemax" prefix (useful when channel name contains "max")
Returns: Returns:
Normalized name Normalized name
""" """
if user_ignored_tags is None: if user_ignored_tags is None:
user_ignored_tags = [] user_ignored_tags = []
# Remove leading parenthetical prefixes like (SP2), (D1), etc. # Remove leading parenthetical prefixes like (SP2), (D1), etc.
name = re.sub(r'^\([^\)]+\)\s*', '', name) name = re.sub(r'^\([^\)]+\)\s*', '', name)
# Remove "Cinemax" prefix if requested (for channels containing "max")
if remove_cinemax:
name = re.sub(r'\bCinemax\s+', '', name, flags=re.IGNORECASE)
# Apply hardcoded ignore patterns only if remove_quality_tags is True # Apply hardcoded ignore patterns only if remove_quality_tags is True
if remove_quality_tags: if remove_quality_tags:
for pattern in HARDCODED_IGNORE_PATTERNS: for pattern in HARDCODED_IGNORE_PATTERNS:
name = re.sub(pattern, '', name, flags=re.IGNORECASE) name = re.sub(pattern, '', name, flags=re.IGNORECASE)
# Apply user-configured ignored tags # Apply user-configured ignored tags
for tag in user_ignored_tags: for tag in user_ignored_tags:
escaped_tag = re.escape(tag) escaped_tag = re.escape(tag)
@@ -315,25 +320,26 @@ class FuzzyMatcher:
tokens = sorted([token for token in cleaned_s.split() if token]) tokens = sorted([token for token in cleaned_s.split() if token])
return " ".join(tokens) return " ".join(tokens)
def find_best_match(self, query_name, candidate_names, user_ignored_tags=None): def find_best_match(self, query_name, candidate_names, user_ignored_tags=None, remove_cinemax=False):
""" """
Find the best fuzzy match for a name among a list of candidate names. Find the best fuzzy match for a name among a list of candidate names.
Args: Args:
query_name: Name to match query_name: Name to match
candidate_names: List of candidate names to match against candidate_names: List of candidate names to match against
user_ignored_tags: User-configured tags to ignore user_ignored_tags: User-configured tags to ignore
remove_cinemax: If True, remove "Cinemax" from candidate names
Returns: Returns:
Tuple of (matched_name, score) or (None, 0) if no match found Tuple of (matched_name, score) or (None, 0) if no match found
""" """
if not candidate_names: if not candidate_names:
return None, 0 return None, 0
if user_ignored_tags is None: if user_ignored_tags is None:
user_ignored_tags = [] user_ignored_tags = []
# Normalize the query # Normalize the query (channel name - don't remove Cinemax from it)
normalized_query = self.normalize_name(query_name, user_ignored_tags) normalized_query = self.normalize_name(query_name, user_ignored_tags)
if not normalized_query: if not normalized_query:
@@ -341,12 +347,14 @@ class FuzzyMatcher:
# Process query for token-sort matching # Process query for token-sort matching
processed_query = self.process_string_for_matching(normalized_query) processed_query = self.process_string_for_matching(normalized_query)
best_score = -1.0 best_score = -1.0
best_match = None best_match = None
for candidate in candidate_names: for candidate in candidate_names:
processed_candidate = self.process_string_for_matching(candidate) # Normalize candidate (stream name) with Cinemax removal if requested
candidate_normalized = self.normalize_name(candidate, user_ignored_tags, remove_cinemax=remove_cinemax)
processed_candidate = self.process_string_for_matching(candidate_normalized)
score = self.calculate_similarity(processed_query, processed_candidate) score = self.calculate_similarity(processed_query, processed_candidate)
if score > best_score: if score > best_score:
@@ -361,26 +369,27 @@ class FuzzyMatcher:
return None, 0 return None, 0
def fuzzy_match(self, query_name, candidate_names, user_ignored_tags=None): def fuzzy_match(self, query_name, candidate_names, user_ignored_tags=None, remove_cinemax=False):
""" """
Generic fuzzy matching function that can match any name against a list of candidates. Generic fuzzy matching function that can match any name against a list of candidates.
This is the main entry point for fuzzy matching. This is the main entry point for fuzzy matching.
Args: Args:
query_name: Name to match query_name: Name to match (channel name)
candidate_names: List of candidate names to match against candidate_names: List of candidate names to match against (stream names)
user_ignored_tags: User-configured tags to ignore user_ignored_tags: User-configured tags to ignore
remove_cinemax: If True, remove "Cinemax" from candidate names (for channels with "max")
Returns: Returns:
Tuple of (matched_name, score, match_type) or (None, 0, None) if no match found Tuple of (matched_name, score, match_type) or (None, 0, None) if no match found
""" """
if not candidate_names: if not candidate_names:
return None, 0, None return None, 0, None
if user_ignored_tags is None: if user_ignored_tags is None:
user_ignored_tags = [] user_ignored_tags = []
# Normalize for matching # Normalize query (channel name - don't remove Cinemax from it)
normalized_query = self.normalize_name(query_name, user_ignored_tags) normalized_query = self.normalize_name(query_name, user_ignored_tags)
if not normalized_query: if not normalized_query:
@@ -393,31 +402,33 @@ class FuzzyMatcher:
# Stage 1: Exact match (after normalization) # Stage 1: Exact match (after normalization)
normalized_query_lower = normalized_query.lower() normalized_query_lower = normalized_query.lower()
normalized_query_nospace = re.sub(r'[\s&\-]+', '', normalized_query_lower) normalized_query_nospace = re.sub(r'[\s&\-]+', '', normalized_query_lower)
for candidate in candidate_names: for candidate in candidate_names:
candidate_normalized = self.normalize_name(candidate, user_ignored_tags) # Normalize candidate (stream name) with Cinemax removal if requested
candidate_normalized = self.normalize_name(candidate, user_ignored_tags, remove_cinemax=remove_cinemax)
candidate_lower = candidate_normalized.lower() candidate_lower = candidate_normalized.lower()
candidate_nospace = re.sub(r'[\s&\-]+', '', candidate_lower) candidate_nospace = re.sub(r'[\s&\-]+', '', candidate_lower)
# Exact match # Exact match
if normalized_query_nospace == candidate_nospace: if normalized_query_nospace == candidate_nospace:
return candidate, 100, "exact" return candidate, 100, "exact"
# Very high similarity (97%+) # Very high similarity (97%+)
ratio = self.calculate_similarity(normalized_query_lower, candidate_lower) ratio = self.calculate_similarity(normalized_query_lower, candidate_lower)
if ratio >= 0.97 and ratio > best_ratio: if ratio >= 0.97 and ratio > best_ratio:
best_match = candidate best_match = candidate
best_ratio = ratio best_ratio = ratio
match_type = "exact" match_type = "exact"
if best_match: if best_match:
return best_match, int(best_ratio * 100), match_type return best_match, int(best_ratio * 100), match_type
# Stage 2: Substring matching # Stage 2: Substring matching
for candidate in candidate_names: for candidate in candidate_names:
candidate_normalized = self.normalize_name(candidate, user_ignored_tags) # Normalize candidate (stream name) with Cinemax removal if requested
candidate_normalized = self.normalize_name(candidate, user_ignored_tags, remove_cinemax=remove_cinemax)
candidate_lower = candidate_normalized.lower() candidate_lower = candidate_normalized.lower()
# Check if one is a substring of the other # Check if one is a substring of the other
if normalized_query_lower in candidate_lower or candidate_lower in normalized_query_lower: if normalized_query_lower in candidate_lower or candidate_lower in normalized_query_lower:
# Calculate similarity score # Calculate similarity score
@@ -426,12 +437,12 @@ class FuzzyMatcher:
best_match = candidate best_match = candidate
best_ratio = ratio best_ratio = ratio
match_type = "substring" match_type = "substring"
if best_match and int(best_ratio * 100) >= self.match_threshold: if best_match and int(best_ratio * 100) >= self.match_threshold:
return best_match, int(best_ratio * 100), match_type return best_match, int(best_ratio * 100), match_type
# Stage 3: Fuzzy matching with token sorting # Stage 3: Fuzzy matching with token sorting
fuzzy_match, score = self.find_best_match(query_name, candidate_names, user_ignored_tags) fuzzy_match, score = self.find_best_match(query_name, candidate_names, user_ignored_tags, remove_cinemax=remove_cinemax)
if fuzzy_match: if fuzzy_match:
return fuzzy_match, score, f"fuzzy ({score})" return fuzzy_match, score, f"fuzzy ({score})"

View File

@@ -351,14 +351,19 @@ class Plugin:
logger.warning(f"[Stream-Mapparr] Could not trigger frontend refresh: {e}") logger.warning(f"[Stream-Mapparr] Could not trigger frontend refresh: {e}")
return False return False
def _clean_channel_name(self, name, ignore_tags=None): def _clean_channel_name(self, name, ignore_tags=None, remove_cinemax=False):
""" """
Remove brackets and their contents from channel name for matching, and remove ignore tags. Remove brackets and their contents from channel name for matching, and remove ignore tags.
Uses fuzzy matcher's normalization if available, otherwise falls back to basic cleaning. Uses fuzzy matcher's normalization if available, otherwise falls back to basic cleaning.
Args:
name: Channel or stream name to clean
ignore_tags: List of tags to ignore
remove_cinemax: If True, remove "Cinemax" prefix (for streams when channel contains "max")
""" """
if self.fuzzy_matcher: if self.fuzzy_matcher:
# Use fuzzy matcher's normalization # Use fuzzy matcher's normalization
return self.fuzzy_matcher.normalize_name(name, ignore_tags, remove_quality_tags=True) return self.fuzzy_matcher.normalize_name(name, ignore_tags, remove_quality_tags=True, remove_cinemax=remove_cinemax)
# Fallback to basic cleaning # Fallback to basic cleaning
if ignore_tags is None: if ignore_tags is None:
@@ -488,12 +493,15 @@ class Plugin:
ignore_tags = [] ignore_tags = []
if channels_data is None: if channels_data is None:
channels_data = [] channels_data = []
channel_name = channel['name'] channel_name = channel['name']
# Get channel info from JSON # Get channel info from JSON
channel_info = self._get_channel_info_from_json(channel_name, channels_data, logger) channel_info = self._get_channel_info_from_json(channel_name, channels_data, logger)
# Check if channel name contains "max" (case insensitive) - used for Cinemax handling
channel_has_max = 'max' in channel_name.lower()
cleaned_channel_name = self._clean_channel_name(channel_name, ignore_tags) cleaned_channel_name = self._clean_channel_name(channel_name, ignore_tags)
if "24/7" in channel_name.lower(): if "24/7" in channel_name.lower():
@@ -520,10 +528,10 @@ class Plugin:
if matching_streams: if matching_streams:
sorted_streams = self._sort_streams_by_quality(matching_streams) sorted_streams = self._sort_streams_by_quality(matching_streams)
logger.info(f"[Stream-Mapparr] Sorted {len(sorted_streams)} streams by quality (callsign matching)") logger.info(f"[Stream-Mapparr] Sorted {len(sorted_streams)} streams by quality (callsign matching)")
cleaned_stream_names = [self._clean_channel_name(s['name'], ignore_tags) for s in sorted_streams] cleaned_stream_names = [self._clean_channel_name(s['name'], ignore_tags, remove_cinemax=channel_has_max) for s in sorted_streams]
match_reason = "Callsign match" match_reason = "Callsign match"
return sorted_streams, cleaned_channel_name, cleaned_stream_names, match_reason return sorted_streams, cleaned_channel_name, cleaned_stream_names, match_reason
else: else:
logger.info(f"[Stream-Mapparr] No callsign matches found for {callsign}") logger.info(f"[Stream-Mapparr] No callsign matches found for {callsign}")
@@ -532,33 +540,35 @@ class Plugin:
# Use fuzzy matching if available # Use fuzzy matching if available
if self.fuzzy_matcher: if self.fuzzy_matcher:
logger.info(f"[Stream-Mapparr] Using fuzzy matcher for channel: {channel_name}") logger.info(f"[Stream-Mapparr] Using fuzzy matcher for channel: {channel_name}")
# Get all stream names # Get all stream names
stream_names = [stream['name'] for stream in all_streams] stream_names = [stream['name'] for stream in all_streams]
# Use fuzzy matcher to find best match # Use fuzzy matcher to find best match
# Pass remove_cinemax flag if channel contains "max"
matched_stream_name, score, match_type = self.fuzzy_matcher.fuzzy_match( matched_stream_name, score, match_type = self.fuzzy_matcher.fuzzy_match(
channel_name, channel_name,
stream_names, stream_names,
ignore_tags ignore_tags,
remove_cinemax=channel_has_max
) )
if matched_stream_name: if matched_stream_name:
# Find all streams that match this name (different qualities) # Find all streams that match this name (different qualities)
matching_streams = [] matching_streams = []
cleaned_matched = self._clean_channel_name(matched_stream_name, ignore_tags) cleaned_matched = self._clean_channel_name(matched_stream_name, ignore_tags, remove_cinemax=channel_has_max)
for stream in all_streams: for stream in all_streams:
cleaned_stream = self._clean_channel_name(stream['name'], ignore_tags) cleaned_stream = self._clean_channel_name(stream['name'], ignore_tags, remove_cinemax=channel_has_max)
if cleaned_stream.lower() == cleaned_matched.lower(): if cleaned_stream.lower() == cleaned_matched.lower():
matching_streams.append(stream) matching_streams.append(stream)
if matching_streams: if matching_streams:
sorted_streams = self._sort_streams_by_quality(matching_streams) sorted_streams = self._sort_streams_by_quality(matching_streams)
logger.info(f"[Stream-Mapparr] Found {len(sorted_streams)} streams via fuzzy match (score: {score}, type: {match_type})") logger.info(f"[Stream-Mapparr] Found {len(sorted_streams)} streams via fuzzy match (score: {score}, type: {match_type})")
cleaned_stream_names = [self._clean_channel_name(s['name'], ignore_tags) for s in sorted_streams] cleaned_stream_names = [self._clean_channel_name(s['name'], ignore_tags, remove_cinemax=channel_has_max) for s in sorted_streams]
match_reason = f"Fuzzy match ({match_type}, score: {score})" match_reason = f"Fuzzy match ({match_type}, score: {score})"
return sorted_streams, cleaned_channel_name, cleaned_stream_names, match_reason return sorted_streams, cleaned_channel_name, cleaned_stream_names, match_reason
@@ -582,33 +592,33 @@ class Plugin:
# Look for streams that match this channel name exactly # Look for streams that match this channel name exactly
for stream in all_streams: for stream in all_streams:
cleaned_stream_name = self._clean_channel_name(stream['name'], ignore_tags) cleaned_stream_name = self._clean_channel_name(stream['name'], ignore_tags, remove_cinemax=channel_has_max)
if cleaned_stream_name.lower() == cleaned_channel_name.lower(): if cleaned_stream_name.lower() == cleaned_channel_name.lower():
matching_streams.append(stream) matching_streams.append(stream)
if matching_streams: if matching_streams:
sorted_streams = self._sort_streams_by_quality(matching_streams) sorted_streams = self._sort_streams_by_quality(matching_streams)
logger.info(f"[Stream-Mapparr] Found {len(sorted_streams)} streams matching exact channel name") logger.info(f"[Stream-Mapparr] Found {len(sorted_streams)} streams matching exact channel name")
cleaned_stream_names = [self._clean_channel_name(s['name'], ignore_tags) for s in sorted_streams] cleaned_stream_names = [self._clean_channel_name(s['name'], ignore_tags, remove_cinemax=channel_has_max) for s in sorted_streams]
match_reason = "Exact match (channels.json)" match_reason = "Exact match (channels.json)"
return sorted_streams, cleaned_channel_name, cleaned_stream_names, match_reason return sorted_streams, cleaned_channel_name, cleaned_stream_names, match_reason
# Fallback to basic substring matching # Fallback to basic substring matching
for stream in all_streams: for stream in all_streams:
cleaned_stream_name = self._clean_channel_name(stream['name'], ignore_tags) cleaned_stream_name = self._clean_channel_name(stream['name'], ignore_tags, remove_cinemax=channel_has_max)
# Simple case-insensitive substring matching # Simple case-insensitive substring matching
if cleaned_channel_name.lower() in cleaned_stream_name.lower() or cleaned_stream_name.lower() in cleaned_channel_name.lower(): if cleaned_channel_name.lower() in cleaned_stream_name.lower() or cleaned_stream_name.lower() in cleaned_channel_name.lower():
matching_streams.append(stream) matching_streams.append(stream)
if matching_streams: if matching_streams:
sorted_streams = self._sort_streams_by_quality(matching_streams) sorted_streams = self._sort_streams_by_quality(matching_streams)
logger.info(f"[Stream-Mapparr] Found {len(sorted_streams)} streams matching via basic substring match") logger.info(f"[Stream-Mapparr] Found {len(sorted_streams)} streams matching via basic substring match")
cleaned_stream_names = [self._clean_channel_name(s['name'], ignore_tags) for s in sorted_streams] cleaned_stream_names = [self._clean_channel_name(s['name'], ignore_tags, remove_cinemax=channel_has_max) for s in sorted_streams]
match_reason = "Basic substring match" match_reason = "Basic substring match"
return sorted_streams, cleaned_channel_name, cleaned_stream_names, match_reason return sorted_streams, cleaned_channel_name, cleaned_stream_names, match_reason