Add Cinemax handling for channels containing 'max'

When a channel name contains "max" (case insensitive), the plugin now removes "Cinemax" from stream names during matching. This allows channels like "5StarMax" to properly match streams like: - US: Cinemax 5Starmax - US 5STARMAX (East) (H) - US: 5 STARMAX Changes: - Add remove_cinemax parameter to normalize_name() in fuzzy_matcher.py - Add remove_cinemax parameter to _clean_channel_name() in plugin.py - Detect if channel name contains "max" in _match_streams_to_channel() - Pass remove_cinemax=True to all stream name cleaning when applicable - Update fuzzy_match() and find_best_match() to support Cinemax removal This improves matching accuracy for Cinemax channels that include the network name in stream names but not in channel names.
2025-11-06 13:26:15 +00:00
parent 03b8bfc31e
commit 05860d3a2f
2 changed files with 83 additions and 62 deletions
--- a/Stream-Mapparr/fuzzy_matcher.py
+++ b/Stream-Mapparr/fuzzy_matcher.py
@@ -158,29 +158,34 @@ class FuzzyMatcher:
            callsign = re.sub(r'-(?:TV|CD|LP|DT|LD)$', '', callsign)
        return callsign
    
-    def normalize_name(self, name, user_ignored_tags=None, remove_quality_tags=True):
+    def normalize_name(self, name, user_ignored_tags=None, remove_quality_tags=True, remove_cinemax=False):
        """
        Normalize channel or stream name for matching by removing tags, prefixes, and other noise.
-        
+
        Args:
            name: Name to normalize
            user_ignored_tags: Additional user-configured tags to ignore (list of strings)
            remove_quality_tags: If True, remove hardcoded quality patterns (for matching only, not display)
-        
+            remove_cinemax: If True, remove "Cinemax" prefix (useful when channel name contains "max")
+
        Returns:
            Normalized name
        """
        if user_ignored_tags is None:
            user_ignored_tags = []
-        
+
        # Remove leading parenthetical prefixes like (SP2), (D1), etc.
        name = re.sub(r'^\([^\)]+\)\s*', '', name)
-        
+
+        # Remove "Cinemax" prefix if requested (for channels containing "max")
+        if remove_cinemax:
+            name = re.sub(r'\bCinemax\s+', '', name, flags=re.IGNORECASE)
+
        # Apply hardcoded ignore patterns only if remove_quality_tags is True
        if remove_quality_tags:
            for pattern in HARDCODED_IGNORE_PATTERNS:
                name = re.sub(pattern, '', name, flags=re.IGNORECASE)
-        
+
        # Apply user-configured ignored tags
        for tag in user_ignored_tags:
            escaped_tag = re.escape(tag)
@@ -315,25 +320,26 @@ class FuzzyMatcher:
        tokens = sorted([token for token in cleaned_s.split() if token])
        return " ".join(tokens)
    
-    def find_best_match(self, query_name, candidate_names, user_ignored_tags=None):
+    def find_best_match(self, query_name, candidate_names, user_ignored_tags=None, remove_cinemax=False):
        """
        Find the best fuzzy match for a name among a list of candidate names.
-        
+
        Args:
            query_name: Name to match
            candidate_names: List of candidate names to match against
            user_ignored_tags: User-configured tags to ignore
-        
+            remove_cinemax: If True, remove "Cinemax" from candidate names
+
        Returns:
            Tuple of (matched_name, score) or (None, 0) if no match found
        """
        if not candidate_names:
            return None, 0
-        
+
        if user_ignored_tags is None:
            user_ignored_tags = []
-        
-        # Normalize the query
+
+        # Normalize the query (channel name - don't remove Cinemax from it)
        normalized_query = self.normalize_name(query_name, user_ignored_tags)
        
        if not normalized_query:
@@ -341,12 +347,14 @@ class FuzzyMatcher:
        
        # Process query for token-sort matching
        processed_query = self.process_string_for_matching(normalized_query)
-        
+
        best_score = -1.0
        best_match = None
-        
+
        for candidate in candidate_names:
-            processed_candidate = self.process_string_for_matching(candidate)
+            # Normalize candidate (stream name) with Cinemax removal if requested
+            candidate_normalized = self.normalize_name(candidate, user_ignored_tags, remove_cinemax=remove_cinemax)
+            processed_candidate = self.process_string_for_matching(candidate_normalized)
            score = self.calculate_similarity(processed_query, processed_candidate)
            
            if score > best_score:
@@ -361,26 +369,27 @@ class FuzzyMatcher:
        
        return None, 0
    
-    def fuzzy_match(self, query_name, candidate_names, user_ignored_tags=None):
+    def fuzzy_match(self, query_name, candidate_names, user_ignored_tags=None, remove_cinemax=False):
        """
        Generic fuzzy matching function that can match any name against a list of candidates.
        This is the main entry point for fuzzy matching.
-        
+
        Args:
-            query_name: Name to match
-            candidate_names: List of candidate names to match against
+            query_name: Name to match (channel name)
+            candidate_names: List of candidate names to match against (stream names)
            user_ignored_tags: User-configured tags to ignore
-        
+            remove_cinemax: If True, remove "Cinemax" from candidate names (for channels with "max")
+
        Returns:
            Tuple of (matched_name, score, match_type) or (None, 0, None) if no match found
        """
        if not candidate_names:
            return None, 0, None
-        
+
        if user_ignored_tags is None:
            user_ignored_tags = []
-        
-        # Normalize for matching
+
+        # Normalize query (channel name - don't remove Cinemax from it)
        normalized_query = self.normalize_name(query_name, user_ignored_tags)
        
        if not normalized_query:
@@ -393,31 +402,33 @@ class FuzzyMatcher:
        # Stage 1: Exact match (after normalization)
        normalized_query_lower = normalized_query.lower()
        normalized_query_nospace = re.sub(r'[\s&\-]+', '', normalized_query_lower)
-        
+
        for candidate in candidate_names:
-            candidate_normalized = self.normalize_name(candidate, user_ignored_tags)
+            # Normalize candidate (stream name) with Cinemax removal if requested
+            candidate_normalized = self.normalize_name(candidate, user_ignored_tags, remove_cinemax=remove_cinemax)
            candidate_lower = candidate_normalized.lower()
            candidate_nospace = re.sub(r'[\s&\-]+', '', candidate_lower)
-            
+
            # Exact match
            if normalized_query_nospace == candidate_nospace:
                return candidate, 100, "exact"
-            
+
            # Very high similarity (97%+)
            ratio = self.calculate_similarity(normalized_query_lower, candidate_lower)
            if ratio >= 0.97 and ratio > best_ratio:
                best_match = candidate
                best_ratio = ratio
                match_type = "exact"
-        
+
        if best_match:
            return best_match, int(best_ratio * 100), match_type
-        
+
        # Stage 2: Substring matching
        for candidate in candidate_names:
-            candidate_normalized = self.normalize_name(candidate, user_ignored_tags)
+            # Normalize candidate (stream name) with Cinemax removal if requested
+            candidate_normalized = self.normalize_name(candidate, user_ignored_tags, remove_cinemax=remove_cinemax)
            candidate_lower = candidate_normalized.lower()
-            
+
            # Check if one is a substring of the other
            if normalized_query_lower in candidate_lower or candidate_lower in normalized_query_lower:
                # Calculate similarity score
@@ -426,12 +437,12 @@ class FuzzyMatcher:
                    best_match = candidate
                    best_ratio = ratio
                    match_type = "substring"
-        
+
        if best_match and int(best_ratio * 100) >= self.match_threshold:
            return best_match, int(best_ratio * 100), match_type
-        
+
        # Stage 3: Fuzzy matching with token sorting
-        fuzzy_match, score = self.find_best_match(query_name, candidate_names, user_ignored_tags)
+        fuzzy_match, score = self.find_best_match(query_name, candidate_names, user_ignored_tags, remove_cinemax=remove_cinemax)
        if fuzzy_match:
            return fuzzy_match, score, f"fuzzy ({score})"
        
--- a/Stream-Mapparr/plugin.py
+++ b/Stream-Mapparr/plugin.py
@@ -351,14 +351,19 @@ class Plugin:
            logger.warning(f"[Stream-Mapparr] Could not trigger frontend refresh: {e}")
        return False

-    def _clean_channel_name(self, name, ignore_tags=None):
+    def _clean_channel_name(self, name, ignore_tags=None, remove_cinemax=False):
        """
        Remove brackets and their contents from channel name for matching, and remove ignore tags.
        Uses fuzzy matcher's normalization if available, otherwise falls back to basic cleaning.
+
+        Args:
+            name: Channel or stream name to clean
+            ignore_tags: List of tags to ignore
+            remove_cinemax: If True, remove "Cinemax" prefix (for streams when channel contains "max")
        """
        if self.fuzzy_matcher:
            # Use fuzzy matcher's normalization
-            return self.fuzzy_matcher.normalize_name(name, ignore_tags, remove_quality_tags=True)
+            return self.fuzzy_matcher.normalize_name(name, ignore_tags, remove_quality_tags=True, remove_cinemax=remove_cinemax)
        
        # Fallback to basic cleaning
        if ignore_tags is None:
@@ -488,12 +493,15 @@ class Plugin:
            ignore_tags = []
        if channels_data is None:
            channels_data = []
-        
+
        channel_name = channel['name']
-        
+
        # Get channel info from JSON
        channel_info = self._get_channel_info_from_json(channel_name, channels_data, logger)
-        
+
+        # Check if channel name contains "max" (case insensitive) - used for Cinemax handling
+        channel_has_max = 'max' in channel_name.lower()
+
        cleaned_channel_name = self._clean_channel_name(channel_name, ignore_tags)
        
        if "24/7" in channel_name.lower():
@@ -520,10 +528,10 @@ class Plugin:
            if matching_streams:
                sorted_streams = self._sort_streams_by_quality(matching_streams)
                logger.info(f"[Stream-Mapparr]   Sorted {len(sorted_streams)} streams by quality (callsign matching)")
-                
-                cleaned_stream_names = [self._clean_channel_name(s['name'], ignore_tags) for s in sorted_streams]
+
+                cleaned_stream_names = [self._clean_channel_name(s['name'], ignore_tags, remove_cinemax=channel_has_max) for s in sorted_streams]
                match_reason = "Callsign match"
-                
+
                return sorted_streams, cleaned_channel_name, cleaned_stream_names, match_reason
            else:
                logger.info(f"[Stream-Mapparr]   No callsign matches found for {callsign}")
@@ -532,33 +540,35 @@ class Plugin:
        # Use fuzzy matching if available
        if self.fuzzy_matcher:
            logger.info(f"[Stream-Mapparr] Using fuzzy matcher for channel: {channel_name}")
-            
+
            # Get all stream names
            stream_names = [stream['name'] for stream in all_streams]
-            
+
            # Use fuzzy matcher to find best match
+            # Pass remove_cinemax flag if channel contains "max"
            matched_stream_name, score, match_type = self.fuzzy_matcher.fuzzy_match(
                channel_name,
                stream_names,
-                ignore_tags
+                ignore_tags,
+                remove_cinemax=channel_has_max
            )
            
            if matched_stream_name:
                # Find all streams that match this name (different qualities)
                matching_streams = []
-                cleaned_matched = self._clean_channel_name(matched_stream_name, ignore_tags)
-                
+                cleaned_matched = self._clean_channel_name(matched_stream_name, ignore_tags, remove_cinemax=channel_has_max)
+
                for stream in all_streams:
-                    cleaned_stream = self._clean_channel_name(stream['name'], ignore_tags)
-                    
+                    cleaned_stream = self._clean_channel_name(stream['name'], ignore_tags, remove_cinemax=channel_has_max)
+
                    if cleaned_stream.lower() == cleaned_matched.lower():
                        matching_streams.append(stream)
-                
+
                if matching_streams:
                    sorted_streams = self._sort_streams_by_quality(matching_streams)
                    logger.info(f"[Stream-Mapparr]   Found {len(sorted_streams)} streams via fuzzy match (score: {score}, type: {match_type})")
-                    
-                    cleaned_stream_names = [self._clean_channel_name(s['name'], ignore_tags) for s in sorted_streams]
+
+                    cleaned_stream_names = [self._clean_channel_name(s['name'], ignore_tags, remove_cinemax=channel_has_max) for s in sorted_streams]
                    match_reason = f"Fuzzy match ({match_type}, score: {score})"
                    
                    return sorted_streams, cleaned_channel_name, cleaned_stream_names, match_reason
@@ -582,33 +592,33 @@ class Plugin:
            
            # Look for streams that match this channel name exactly
            for stream in all_streams:
-                cleaned_stream_name = self._clean_channel_name(stream['name'], ignore_tags)
-                
+                cleaned_stream_name = self._clean_channel_name(stream['name'], ignore_tags, remove_cinemax=channel_has_max)
+
                if cleaned_stream_name.lower() == cleaned_channel_name.lower():
                    matching_streams.append(stream)
-            
+
            if matching_streams:
                sorted_streams = self._sort_streams_by_quality(matching_streams)
                logger.info(f"[Stream-Mapparr]   Found {len(sorted_streams)} streams matching exact channel name")
-                
-                cleaned_stream_names = [self._clean_channel_name(s['name'], ignore_tags) for s in sorted_streams]
+
+                cleaned_stream_names = [self._clean_channel_name(s['name'], ignore_tags, remove_cinemax=channel_has_max) for s in sorted_streams]
                match_reason = "Exact match (channels.json)"
                
                return sorted_streams, cleaned_channel_name, cleaned_stream_names, match_reason
        
        # Fallback to basic substring matching
        for stream in all_streams:
-            cleaned_stream_name = self._clean_channel_name(stream['name'], ignore_tags)
-            
+            cleaned_stream_name = self._clean_channel_name(stream['name'], ignore_tags, remove_cinemax=channel_has_max)
+
            # Simple case-insensitive substring matching
            if cleaned_channel_name.lower() in cleaned_stream_name.lower() or cleaned_stream_name.lower() in cleaned_channel_name.lower():
                matching_streams.append(stream)
-        
+
        if matching_streams:
            sorted_streams = self._sort_streams_by_quality(matching_streams)
            logger.info(f"[Stream-Mapparr]   Found {len(sorted_streams)} streams matching via basic substring match")
-            
-            cleaned_stream_names = [self._clean_channel_name(s['name'], ignore_tags) for s in sorted_streams]
+
+            cleaned_stream_names = [self._clean_channel_name(s['name'], ignore_tags, remove_cinemax=channel_has_max) for s in sorted_streams]
            match_reason = "Basic substring match"
            
            return sorted_streams, cleaned_channel_name, cleaned_stream_names, match_reason