diff --git a/Stream-Mapparr/fuzzy_matcher.py b/Stream-Mapparr/fuzzy_matcher.py index bfefaee..1e6578d 100644 --- a/Stream-Mapparr/fuzzy_matcher.py +++ b/Stream-Mapparr/fuzzy_matcher.py @@ -11,7 +11,7 @@ import logging from glob import glob # Version: YY.DDD.HHMM (Julian date format: Year.DayOfYear.Time) -__version__ = "25.314.1907" +__version__ = "25.317.1200" # Setup logging LOGGER = logging.getLogger("plugins.fuzzy_matcher") @@ -213,6 +213,9 @@ class FuzzyMatcher: if user_ignored_tags is None: user_ignored_tags = [] + # Store original for logging + original_name = name + # Remove leading parenthetical prefixes like (SP2), (D1), etc. name = re.sub(r'^\([^\)]+\)\s*', '', name) @@ -223,7 +226,8 @@ class FuzzyMatcher: quality_tags = {'HD', 'SD', 'FD', 'UHD', 'FHD'} # Check for 2-3 letter prefix with colon or space at start - prefix_match = re.match(r'^([A-Z]{2,3})[:|\s]\s*', name) + # Fixed regex: [:\s] instead of [:|\s] (pipe and backslash were incorrect) + prefix_match = re.match(r'^([A-Z]{2,3})[:\s]\s*', name) if prefix_match: prefix = prefix_match.group(1).upper() # Only remove if it's NOT a quality tag @@ -280,7 +284,11 @@ class FuzzyMatcher: # Clean up whitespace name = re.sub(r'\s+', ' ', name).strip() - + + # Log warning if normalization resulted in empty string (indicates overly aggressive stripping) + if not name: + self.logger.warning(f"normalize_name returned empty string for input: '{original_name}' (original input was stripped too aggressively)") + return name def extract_tags(self, name, user_ignored_tags=None): @@ -346,15 +354,17 @@ class FuzzyMatcher: def calculate_similarity(self, str1, str2): """ Calculate Levenshtein distance-based similarity ratio between two strings. - + Returns: Similarity ratio between 0.0 and 1.0 """ if len(str1) < len(str2): str1, str2 = str2, str1 - - if len(str2) == 0: - return 1.0 if len(str1) == 0 else 0.0 + + # Empty strings should not match anything (including other empty strings) + # This prevents false positives when normalization strips everything + if len(str2) == 0 or len(str1) == 0: + return 0.0 previous_row = list(range(len(str2) + 1)) @@ -429,9 +439,14 @@ class FuzzyMatcher: for candidate in candidate_names: # Normalize candidate (stream name) with Cinemax removal if requested candidate_normalized = self.normalize_name(candidate, user_ignored_tags, remove_cinemax=remove_cinemax) + + # Skip candidates that normalize to empty or very short strings + if not candidate_normalized or len(candidate_normalized) < 2: + continue + processed_candidate = self.process_string_for_matching(candidate_normalized) score = self.calculate_similarity(processed_query, processed_candidate) - + if score > best_score: best_score = score best_match = candidate @@ -481,6 +496,12 @@ class FuzzyMatcher: for candidate in candidate_names: # Normalize candidate (stream name) with Cinemax removal if requested candidate_normalized = self.normalize_name(candidate, user_ignored_tags, remove_cinemax=remove_cinemax) + + # Skip candidates that normalize to empty or very short strings (< 2 chars) + # This prevents false positives where multiple streams all normalize to "" + if not candidate_normalized or len(candidate_normalized) < 2: + continue + candidate_lower = candidate_normalized.lower() candidate_nospace = re.sub(r'[\s&\-]+', '', candidate_lower) @@ -502,6 +523,11 @@ class FuzzyMatcher: for candidate in candidate_names: # Normalize candidate (stream name) with Cinemax removal if requested candidate_normalized = self.normalize_name(candidate, user_ignored_tags, remove_cinemax=remove_cinemax) + + # Skip candidates that normalize to empty or very short strings + if not candidate_normalized or len(candidate_normalized) < 2: + continue + candidate_lower = candidate_normalized.lower() # Check if one is a substring of the other diff --git a/Stream-Mapparr/plugin.py b/Stream-Mapparr/plugin.py index 04aee28..f787506 100644 --- a/Stream-Mapparr/plugin.py +++ b/Stream-Mapparr/plugin.py @@ -33,7 +33,7 @@ class Plugin: """Dispatcharr Stream-Mapparr Plugin""" name = "Stream-Mapparr" - version = "0.5.0" + version = "0.5.1" description = "🎯 Automatically add matching streams to channels based on name similarity and quality precedence with enhanced fuzzy matching" @property @@ -149,13 +149,6 @@ class Plugin: }, ] - # Add channel database section header - static_fields.append({ - "id": "channel_databases_header", - "type": "info", - "label": "📚 Channel Databases", - }) - # Dynamically add channel database enable/disable fields try: databases = self._get_channel_databases() @@ -753,7 +746,8 @@ class Plugin: # Remove country code prefix if requested if remove_country_prefix: quality_tags = {'HD', 'SD', 'FD', 'UHD', 'FHD'} - prefix_match = re.match(r'^([A-Z]{2,3})[:|\s]\s*', cleaned) + # Fixed regex: [:\s] instead of [:|\s] (pipe and backslash were incorrect) + prefix_match = re.match(r'^([A-Z]{2,3})[:\s]\s*', cleaned) if prefix_match: prefix = prefix_match.group(1).upper() if prefix not in quality_tags: @@ -1025,6 +1019,12 @@ class Plugin: ignore_geographic, ignore_misc, remove_cinemax=channel_has_max ) + # Skip if either cleaned name is empty or too short (prevents false positives) + if not cleaned_stream or len(cleaned_stream) < 2: + continue + if not cleaned_matched or len(cleaned_matched) < 2: + continue + if cleaned_stream.lower() == cleaned_matched.lower(): matching_streams.append(stream) @@ -1064,6 +1064,12 @@ class Plugin: ignore_geographic, ignore_misc, remove_cinemax=channel_has_max ) + # Skip if either cleaned name is empty or too short (prevents false positives) + if not cleaned_stream_name or len(cleaned_stream_name) < 2: + continue + if not cleaned_channel_name or len(cleaned_channel_name) < 2: + continue + if cleaned_stream_name.lower() == cleaned_channel_name.lower(): matching_streams.append(stream) @@ -1086,6 +1092,12 @@ class Plugin: ignore_geographic, ignore_misc, remove_cinemax=channel_has_max ) + # Skip if either cleaned name is empty or too short (prevents false positives) + if not cleaned_stream_name or len(cleaned_stream_name) < 2: + continue + if not cleaned_channel_name or len(cleaned_channel_name) < 2: + continue + # Simple case-insensitive substring matching if cleaned_channel_name.lower() in cleaned_stream_name.lower() or cleaned_stream_name.lower() in cleaned_channel_name.lower(): matching_streams.append(stream) diff --git a/test_fuzzy_matcher_fix.py b/test_fuzzy_matcher_fix.py new file mode 100644 index 0000000..e75cbb4 --- /dev/null +++ b/test_fuzzy_matcher_fix.py @@ -0,0 +1,194 @@ +#!/usr/bin/env python3 +""" +Test script to verify the fuzzy matcher bug fix. +Tests that streams which normalize to empty strings don't produce false positive matches. +""" + +import sys +import os + +# Add the Stream-Mapparr directory to the path +sys.path.insert(0, os.path.join(os.path.dirname(__file__), 'Stream-Mapparr')) + +from fuzzy_matcher import FuzzyMatcher + +def test_empty_string_normalization(): + """Test that empty normalized strings don't cause false positive matches.""" + print("=" * 80) + print("Test 1: Empty String Normalization") + print("=" * 80) + + # Create a fuzzy matcher + matcher = FuzzyMatcher(plugin_dir=None, match_threshold=85) + + # Test case 1: Stream names that could normalize to empty strings + test_streams = [ + "BR TNT SD", + "BR MTV SD", + "BR GNT SD", + "BR TLC SD", + "BR BIS SD" + ] + + # Configure user_ignored_tags that would strip channel names + user_ignored_tags = ["TNT", "MTV", "GNT", "TLC", "BIS", "SD", "HD"] + + # Test normalizing these streams + print("\nNormalizing streams with aggressive tags:") + for stream in test_streams: + normalized = matcher.normalize_name(stream, user_ignored_tags, remove_country_prefix=True) + print(f" '{stream}' -> '{normalized}' (len={len(normalized)})") + + print("\n" + "-" * 80) + + # Test matching "GNT" channel against these streams + channel_name = "GNT" + print(f"\nAttempting to match channel '{channel_name}' against test streams...") + + matched_name, score, match_type = matcher.fuzzy_match( + channel_name, + test_streams, + user_ignored_tags, + remove_cinemax=False + ) + + if matched_name: + print(f"✓ Match found: '{matched_name}' with score {score} (type: {match_type})") + # Should only match "BR GNT SD" + if matched_name == "BR GNT SD": + print("✓ PASS: Matched the correct stream!") + else: + print(f"✗ FAIL: Matched wrong stream! Expected 'BR GNT SD', got '{matched_name}'") + return False + else: + print(f"✗ No match found (score: {score})") + print(" This could be acceptable if all streams normalize to empty strings") + + print() + return True + + +def test_empty_string_similarity(): + """Test that empty strings don't match each other with 100% score.""" + print("=" * 80) + print("Test 2: Empty String Similarity") + print("=" * 80) + + matcher = FuzzyMatcher(plugin_dir=None, match_threshold=85) + + # Test empty string comparison + score1 = matcher.calculate_similarity("", "") + print(f"\nSimilarity('', '') = {score1}") + + if score1 == 0.0: + print("✓ PASS: Empty strings return 0.0 similarity (no false positive match)") + else: + print(f"✗ FAIL: Empty strings return {score1} similarity (should be 0.0)") + return False + + # Test empty vs non-empty + score2 = matcher.calculate_similarity("", "test") + print(f"Similarity('', 'test') = {score2}") + + if score2 == 0.0: + print("✓ PASS: Empty string vs non-empty returns 0.0") + else: + print(f"✗ FAIL: Empty vs non-empty returns {score2} (should be 0.0)") + return False + + print() + return True + + +def test_valid_matches_still_work(): + """Test that legitimate matches still work after the fix.""" + print("=" * 80) + print("Test 3: Valid Matches Still Work") + print("=" * 80) + + matcher = FuzzyMatcher(plugin_dir=None, match_threshold=85) + + test_cases = [ + { + "channel": "CNN", + "streams": ["CNN HD", "CNN SD", "Fox News HD"], + "expected": "CNN HD", + "user_tags": [] + }, + { + "channel": "HBO", + "streams": ["HBO East HD", "HBO West SD", "Showtime HD"], + "expected": "HBO East HD", + "user_tags": [] + }, + { + "channel": "ESPN", + "streams": ["ESPN HD", "ESPN2 HD", "Fox Sports HD"], + "expected": "ESPN HD", + "user_tags": [] + } + ] + + all_passed = True + + for i, test in enumerate(test_cases, 1): + channel = test["channel"] + streams = test["streams"] + expected = test["expected"] + user_tags = test["user_tags"] + + print(f"\nTest case {i}: Matching '{channel}' against {streams}") + + matched_name, score, match_type = matcher.fuzzy_match( + channel, + streams, + user_tags, + remove_cinemax=False + ) + + if matched_name == expected: + print(f"✓ PASS: Matched '{matched_name}' (score: {score}, type: {match_type})") + else: + print(f"✗ FAIL: Expected '{expected}', got '{matched_name}'") + all_passed = False + + print() + return all_passed + + +def main(): + """Run all tests.""" + print("\n" + "=" * 80) + print("FUZZY MATCHER BUG FIX VERIFICATION") + print("=" * 80 + "\n") + + results = [] + + # Run tests + results.append(("Empty String Similarity", test_empty_string_similarity())) + results.append(("Empty String Normalization", test_empty_string_normalization())) + results.append(("Valid Matches Still Work", test_valid_matches_still_work())) + + # Print summary + print("=" * 80) + print("TEST SUMMARY") + print("=" * 80) + + for test_name, passed in results: + status = "✓ PASS" if passed else "✗ FAIL" + print(f"{status}: {test_name}") + + all_passed = all(passed for _, passed in results) + + print("\n" + "=" * 80) + if all_passed: + print("✓ ALL TESTS PASSED!") + else: + print("✗ SOME TESTS FAILED") + print("=" * 80 + "\n") + + return 0 if all_passed else 1 + + +if __name__ == "__main__": + sys.exit(main())