diff --git a/Stream-Mapparr/fuzzy_matcher.py b/Stream-Mapparr/fuzzy_matcher.py index 27b49bd..b365393 100644 --- a/Stream-Mapparr/fuzzy_matcher.py +++ b/Stream-Mapparr/fuzzy_matcher.py @@ -158,29 +158,34 @@ class FuzzyMatcher: callsign = re.sub(r'-(?:TV|CD|LP|DT|LD)$', '', callsign) return callsign - def normalize_name(self, name, user_ignored_tags=None, remove_quality_tags=True): + def normalize_name(self, name, user_ignored_tags=None, remove_quality_tags=True, remove_cinemax=False): """ Normalize channel or stream name for matching by removing tags, prefixes, and other noise. - + Args: name: Name to normalize user_ignored_tags: Additional user-configured tags to ignore (list of strings) remove_quality_tags: If True, remove hardcoded quality patterns (for matching only, not display) - + remove_cinemax: If True, remove "Cinemax" prefix (useful when channel name contains "max") + Returns: Normalized name """ if user_ignored_tags is None: user_ignored_tags = [] - + # Remove leading parenthetical prefixes like (SP2), (D1), etc. name = re.sub(r'^\([^\)]+\)\s*', '', name) - + + # Remove "Cinemax" prefix if requested (for channels containing "max") + if remove_cinemax: + name = re.sub(r'\bCinemax\s+', '', name, flags=re.IGNORECASE) + # Apply hardcoded ignore patterns only if remove_quality_tags is True if remove_quality_tags: for pattern in HARDCODED_IGNORE_PATTERNS: name = re.sub(pattern, '', name, flags=re.IGNORECASE) - + # Apply user-configured ignored tags for tag in user_ignored_tags: escaped_tag = re.escape(tag) @@ -315,25 +320,26 @@ class FuzzyMatcher: tokens = sorted([token for token in cleaned_s.split() if token]) return " ".join(tokens) - def find_best_match(self, query_name, candidate_names, user_ignored_tags=None): + def find_best_match(self, query_name, candidate_names, user_ignored_tags=None, remove_cinemax=False): """ Find the best fuzzy match for a name among a list of candidate names. - + Args: query_name: Name to match candidate_names: List of candidate names to match against user_ignored_tags: User-configured tags to ignore - + remove_cinemax: If True, remove "Cinemax" from candidate names + Returns: Tuple of (matched_name, score) or (None, 0) if no match found """ if not candidate_names: return None, 0 - + if user_ignored_tags is None: user_ignored_tags = [] - - # Normalize the query + + # Normalize the query (channel name - don't remove Cinemax from it) normalized_query = self.normalize_name(query_name, user_ignored_tags) if not normalized_query: @@ -341,12 +347,14 @@ class FuzzyMatcher: # Process query for token-sort matching processed_query = self.process_string_for_matching(normalized_query) - + best_score = -1.0 best_match = None - + for candidate in candidate_names: - processed_candidate = self.process_string_for_matching(candidate) + # Normalize candidate (stream name) with Cinemax removal if requested + candidate_normalized = self.normalize_name(candidate, user_ignored_tags, remove_cinemax=remove_cinemax) + processed_candidate = self.process_string_for_matching(candidate_normalized) score = self.calculate_similarity(processed_query, processed_candidate) if score > best_score: @@ -361,26 +369,27 @@ class FuzzyMatcher: return None, 0 - def fuzzy_match(self, query_name, candidate_names, user_ignored_tags=None): + def fuzzy_match(self, query_name, candidate_names, user_ignored_tags=None, remove_cinemax=False): """ Generic fuzzy matching function that can match any name against a list of candidates. This is the main entry point for fuzzy matching. - + Args: - query_name: Name to match - candidate_names: List of candidate names to match against + query_name: Name to match (channel name) + candidate_names: List of candidate names to match against (stream names) user_ignored_tags: User-configured tags to ignore - + remove_cinemax: If True, remove "Cinemax" from candidate names (for channels with "max") + Returns: Tuple of (matched_name, score, match_type) or (None, 0, None) if no match found """ if not candidate_names: return None, 0, None - + if user_ignored_tags is None: user_ignored_tags = [] - - # Normalize for matching + + # Normalize query (channel name - don't remove Cinemax from it) normalized_query = self.normalize_name(query_name, user_ignored_tags) if not normalized_query: @@ -393,31 +402,33 @@ class FuzzyMatcher: # Stage 1: Exact match (after normalization) normalized_query_lower = normalized_query.lower() normalized_query_nospace = re.sub(r'[\s&\-]+', '', normalized_query_lower) - + for candidate in candidate_names: - candidate_normalized = self.normalize_name(candidate, user_ignored_tags) + # Normalize candidate (stream name) with Cinemax removal if requested + candidate_normalized = self.normalize_name(candidate, user_ignored_tags, remove_cinemax=remove_cinemax) candidate_lower = candidate_normalized.lower() candidate_nospace = re.sub(r'[\s&\-]+', '', candidate_lower) - + # Exact match if normalized_query_nospace == candidate_nospace: return candidate, 100, "exact" - + # Very high similarity (97%+) ratio = self.calculate_similarity(normalized_query_lower, candidate_lower) if ratio >= 0.97 and ratio > best_ratio: best_match = candidate best_ratio = ratio match_type = "exact" - + if best_match: return best_match, int(best_ratio * 100), match_type - + # Stage 2: Substring matching for candidate in candidate_names: - candidate_normalized = self.normalize_name(candidate, user_ignored_tags) + # Normalize candidate (stream name) with Cinemax removal if requested + candidate_normalized = self.normalize_name(candidate, user_ignored_tags, remove_cinemax=remove_cinemax) candidate_lower = candidate_normalized.lower() - + # Check if one is a substring of the other if normalized_query_lower in candidate_lower or candidate_lower in normalized_query_lower: # Calculate similarity score @@ -426,12 +437,12 @@ class FuzzyMatcher: best_match = candidate best_ratio = ratio match_type = "substring" - + if best_match and int(best_ratio * 100) >= self.match_threshold: return best_match, int(best_ratio * 100), match_type - + # Stage 3: Fuzzy matching with token sorting - fuzzy_match, score = self.find_best_match(query_name, candidate_names, user_ignored_tags) + fuzzy_match, score = self.find_best_match(query_name, candidate_names, user_ignored_tags, remove_cinemax=remove_cinemax) if fuzzy_match: return fuzzy_match, score, f"fuzzy ({score})" diff --git a/Stream-Mapparr/plugin.py b/Stream-Mapparr/plugin.py index 9d84aa0..f335e9b 100644 --- a/Stream-Mapparr/plugin.py +++ b/Stream-Mapparr/plugin.py @@ -351,14 +351,19 @@ class Plugin: logger.warning(f"[Stream-Mapparr] Could not trigger frontend refresh: {e}") return False - def _clean_channel_name(self, name, ignore_tags=None): + def _clean_channel_name(self, name, ignore_tags=None, remove_cinemax=False): """ Remove brackets and their contents from channel name for matching, and remove ignore tags. Uses fuzzy matcher's normalization if available, otherwise falls back to basic cleaning. + + Args: + name: Channel or stream name to clean + ignore_tags: List of tags to ignore + remove_cinemax: If True, remove "Cinemax" prefix (for streams when channel contains "max") """ if self.fuzzy_matcher: # Use fuzzy matcher's normalization - return self.fuzzy_matcher.normalize_name(name, ignore_tags, remove_quality_tags=True) + return self.fuzzy_matcher.normalize_name(name, ignore_tags, remove_quality_tags=True, remove_cinemax=remove_cinemax) # Fallback to basic cleaning if ignore_tags is None: @@ -488,12 +493,15 @@ class Plugin: ignore_tags = [] if channels_data is None: channels_data = [] - + channel_name = channel['name'] - + # Get channel info from JSON channel_info = self._get_channel_info_from_json(channel_name, channels_data, logger) - + + # Check if channel name contains "max" (case insensitive) - used for Cinemax handling + channel_has_max = 'max' in channel_name.lower() + cleaned_channel_name = self._clean_channel_name(channel_name, ignore_tags) if "24/7" in channel_name.lower(): @@ -520,10 +528,10 @@ class Plugin: if matching_streams: sorted_streams = self._sort_streams_by_quality(matching_streams) logger.info(f"[Stream-Mapparr] Sorted {len(sorted_streams)} streams by quality (callsign matching)") - - cleaned_stream_names = [self._clean_channel_name(s['name'], ignore_tags) for s in sorted_streams] + + cleaned_stream_names = [self._clean_channel_name(s['name'], ignore_tags, remove_cinemax=channel_has_max) for s in sorted_streams] match_reason = "Callsign match" - + return sorted_streams, cleaned_channel_name, cleaned_stream_names, match_reason else: logger.info(f"[Stream-Mapparr] No callsign matches found for {callsign}") @@ -532,33 +540,35 @@ class Plugin: # Use fuzzy matching if available if self.fuzzy_matcher: logger.info(f"[Stream-Mapparr] Using fuzzy matcher for channel: {channel_name}") - + # Get all stream names stream_names = [stream['name'] for stream in all_streams] - + # Use fuzzy matcher to find best match + # Pass remove_cinemax flag if channel contains "max" matched_stream_name, score, match_type = self.fuzzy_matcher.fuzzy_match( channel_name, stream_names, - ignore_tags + ignore_tags, + remove_cinemax=channel_has_max ) if matched_stream_name: # Find all streams that match this name (different qualities) matching_streams = [] - cleaned_matched = self._clean_channel_name(matched_stream_name, ignore_tags) - + cleaned_matched = self._clean_channel_name(matched_stream_name, ignore_tags, remove_cinemax=channel_has_max) + for stream in all_streams: - cleaned_stream = self._clean_channel_name(stream['name'], ignore_tags) - + cleaned_stream = self._clean_channel_name(stream['name'], ignore_tags, remove_cinemax=channel_has_max) + if cleaned_stream.lower() == cleaned_matched.lower(): matching_streams.append(stream) - + if matching_streams: sorted_streams = self._sort_streams_by_quality(matching_streams) logger.info(f"[Stream-Mapparr] Found {len(sorted_streams)} streams via fuzzy match (score: {score}, type: {match_type})") - - cleaned_stream_names = [self._clean_channel_name(s['name'], ignore_tags) for s in sorted_streams] + + cleaned_stream_names = [self._clean_channel_name(s['name'], ignore_tags, remove_cinemax=channel_has_max) for s in sorted_streams] match_reason = f"Fuzzy match ({match_type}, score: {score})" return sorted_streams, cleaned_channel_name, cleaned_stream_names, match_reason @@ -582,33 +592,33 @@ class Plugin: # Look for streams that match this channel name exactly for stream in all_streams: - cleaned_stream_name = self._clean_channel_name(stream['name'], ignore_tags) - + cleaned_stream_name = self._clean_channel_name(stream['name'], ignore_tags, remove_cinemax=channel_has_max) + if cleaned_stream_name.lower() == cleaned_channel_name.lower(): matching_streams.append(stream) - + if matching_streams: sorted_streams = self._sort_streams_by_quality(matching_streams) logger.info(f"[Stream-Mapparr] Found {len(sorted_streams)} streams matching exact channel name") - - cleaned_stream_names = [self._clean_channel_name(s['name'], ignore_tags) for s in sorted_streams] + + cleaned_stream_names = [self._clean_channel_name(s['name'], ignore_tags, remove_cinemax=channel_has_max) for s in sorted_streams] match_reason = "Exact match (channels.json)" return sorted_streams, cleaned_channel_name, cleaned_stream_names, match_reason # Fallback to basic substring matching for stream in all_streams: - cleaned_stream_name = self._clean_channel_name(stream['name'], ignore_tags) - + cleaned_stream_name = self._clean_channel_name(stream['name'], ignore_tags, remove_cinemax=channel_has_max) + # Simple case-insensitive substring matching if cleaned_channel_name.lower() in cleaned_stream_name.lower() or cleaned_stream_name.lower() in cleaned_channel_name.lower(): matching_streams.append(stream) - + if matching_streams: sorted_streams = self._sort_streams_by_quality(matching_streams) logger.info(f"[Stream-Mapparr] Found {len(sorted_streams)} streams matching via basic substring match") - - cleaned_stream_names = [self._clean_channel_name(s['name'], ignore_tags) for s in sorted_streams] + + cleaned_stream_names = [self._clean_channel_name(s['name'], ignore_tags, remove_cinemax=channel_has_max) for s in sorted_streams] match_reason = "Basic substring match" return sorted_streams, cleaned_channel_name, cleaned_stream_names, match_reason