Root cause: Stream names were normalizing to empty strings through aggressive pattern stripping, causing multiple unrelated channels (GNT, MTV, TLC, BIS, TNT, etc.) to incorrectly match each other with 100% similarity scores. Changes made: 1. Fixed calculate_similarity() to return 0.0 for empty string comparisons instead of 1.0, preventing false positives 2. Added validation in normalize_name() to log warnings when normalization results in empty strings 3. Added empty string checks (< 2 chars) in all matching stages: - fuzzy_match() Stage 1 (exact match) - fuzzy_match() Stage 2 (substring match) - find_best_match() (token-sort matching) 4. Added validation in plugin.py _match_streams_to_channel() to skip streams with empty/short cleaned names in: - Fuzzy matcher result collection - JSON exact match section - Basic substring matching fallback 5. Fixed country prefix regex pattern from [:|\s] to [:\s] (removed incorrect pipe and backslash characters) Testing: Added comprehensive test suite (test_fuzzy_matcher_fix.py) that verifies empty strings don't match and valid matches still work. All tests pass.
195 lines
5.8 KiB
Python
195 lines
5.8 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Test script to verify the fuzzy matcher bug fix.
|
|
Tests that streams which normalize to empty strings don't produce false positive matches.
|
|
"""
|
|
|
|
import sys
|
|
import os
|
|
|
|
# Add the Stream-Mapparr directory to the path
|
|
sys.path.insert(0, os.path.join(os.path.dirname(__file__), 'Stream-Mapparr'))
|
|
|
|
from fuzzy_matcher import FuzzyMatcher
|
|
|
|
def test_empty_string_normalization():
|
|
"""Test that empty normalized strings don't cause false positive matches."""
|
|
print("=" * 80)
|
|
print("Test 1: Empty String Normalization")
|
|
print("=" * 80)
|
|
|
|
# Create a fuzzy matcher
|
|
matcher = FuzzyMatcher(plugin_dir=None, match_threshold=85)
|
|
|
|
# Test case 1: Stream names that could normalize to empty strings
|
|
test_streams = [
|
|
"BR TNT SD",
|
|
"BR MTV SD",
|
|
"BR GNT SD",
|
|
"BR TLC SD",
|
|
"BR BIS SD"
|
|
]
|
|
|
|
# Configure user_ignored_tags that would strip channel names
|
|
user_ignored_tags = ["TNT", "MTV", "GNT", "TLC", "BIS", "SD", "HD"]
|
|
|
|
# Test normalizing these streams
|
|
print("\nNormalizing streams with aggressive tags:")
|
|
for stream in test_streams:
|
|
normalized = matcher.normalize_name(stream, user_ignored_tags, remove_country_prefix=True)
|
|
print(f" '{stream}' -> '{normalized}' (len={len(normalized)})")
|
|
|
|
print("\n" + "-" * 80)
|
|
|
|
# Test matching "GNT" channel against these streams
|
|
channel_name = "GNT"
|
|
print(f"\nAttempting to match channel '{channel_name}' against test streams...")
|
|
|
|
matched_name, score, match_type = matcher.fuzzy_match(
|
|
channel_name,
|
|
test_streams,
|
|
user_ignored_tags,
|
|
remove_cinemax=False
|
|
)
|
|
|
|
if matched_name:
|
|
print(f"✓ Match found: '{matched_name}' with score {score} (type: {match_type})")
|
|
# Should only match "BR GNT SD"
|
|
if matched_name == "BR GNT SD":
|
|
print("✓ PASS: Matched the correct stream!")
|
|
else:
|
|
print(f"✗ FAIL: Matched wrong stream! Expected 'BR GNT SD', got '{matched_name}'")
|
|
return False
|
|
else:
|
|
print(f"✗ No match found (score: {score})")
|
|
print(" This could be acceptable if all streams normalize to empty strings")
|
|
|
|
print()
|
|
return True
|
|
|
|
|
|
def test_empty_string_similarity():
|
|
"""Test that empty strings don't match each other with 100% score."""
|
|
print("=" * 80)
|
|
print("Test 2: Empty String Similarity")
|
|
print("=" * 80)
|
|
|
|
matcher = FuzzyMatcher(plugin_dir=None, match_threshold=85)
|
|
|
|
# Test empty string comparison
|
|
score1 = matcher.calculate_similarity("", "")
|
|
print(f"\nSimilarity('', '') = {score1}")
|
|
|
|
if score1 == 0.0:
|
|
print("✓ PASS: Empty strings return 0.0 similarity (no false positive match)")
|
|
else:
|
|
print(f"✗ FAIL: Empty strings return {score1} similarity (should be 0.0)")
|
|
return False
|
|
|
|
# Test empty vs non-empty
|
|
score2 = matcher.calculate_similarity("", "test")
|
|
print(f"Similarity('', 'test') = {score2}")
|
|
|
|
if score2 == 0.0:
|
|
print("✓ PASS: Empty string vs non-empty returns 0.0")
|
|
else:
|
|
print(f"✗ FAIL: Empty vs non-empty returns {score2} (should be 0.0)")
|
|
return False
|
|
|
|
print()
|
|
return True
|
|
|
|
|
|
def test_valid_matches_still_work():
|
|
"""Test that legitimate matches still work after the fix."""
|
|
print("=" * 80)
|
|
print("Test 3: Valid Matches Still Work")
|
|
print("=" * 80)
|
|
|
|
matcher = FuzzyMatcher(plugin_dir=None, match_threshold=85)
|
|
|
|
test_cases = [
|
|
{
|
|
"channel": "CNN",
|
|
"streams": ["CNN HD", "CNN SD", "Fox News HD"],
|
|
"expected": "CNN HD",
|
|
"user_tags": []
|
|
},
|
|
{
|
|
"channel": "HBO",
|
|
"streams": ["HBO East HD", "HBO West SD", "Showtime HD"],
|
|
"expected": "HBO East HD",
|
|
"user_tags": []
|
|
},
|
|
{
|
|
"channel": "ESPN",
|
|
"streams": ["ESPN HD", "ESPN2 HD", "Fox Sports HD"],
|
|
"expected": "ESPN HD",
|
|
"user_tags": []
|
|
}
|
|
]
|
|
|
|
all_passed = True
|
|
|
|
for i, test in enumerate(test_cases, 1):
|
|
channel = test["channel"]
|
|
streams = test["streams"]
|
|
expected = test["expected"]
|
|
user_tags = test["user_tags"]
|
|
|
|
print(f"\nTest case {i}: Matching '{channel}' against {streams}")
|
|
|
|
matched_name, score, match_type = matcher.fuzzy_match(
|
|
channel,
|
|
streams,
|
|
user_tags,
|
|
remove_cinemax=False
|
|
)
|
|
|
|
if matched_name == expected:
|
|
print(f"✓ PASS: Matched '{matched_name}' (score: {score}, type: {match_type})")
|
|
else:
|
|
print(f"✗ FAIL: Expected '{expected}', got '{matched_name}'")
|
|
all_passed = False
|
|
|
|
print()
|
|
return all_passed
|
|
|
|
|
|
def main():
|
|
"""Run all tests."""
|
|
print("\n" + "=" * 80)
|
|
print("FUZZY MATCHER BUG FIX VERIFICATION")
|
|
print("=" * 80 + "\n")
|
|
|
|
results = []
|
|
|
|
# Run tests
|
|
results.append(("Empty String Similarity", test_empty_string_similarity()))
|
|
results.append(("Empty String Normalization", test_empty_string_normalization()))
|
|
results.append(("Valid Matches Still Work", test_valid_matches_still_work()))
|
|
|
|
# Print summary
|
|
print("=" * 80)
|
|
print("TEST SUMMARY")
|
|
print("=" * 80)
|
|
|
|
for test_name, passed in results:
|
|
status = "✓ PASS" if passed else "✗ FAIL"
|
|
print(f"{status}: {test_name}")
|
|
|
|
all_passed = all(passed for _, passed in results)
|
|
|
|
print("\n" + "=" * 80)
|
|
if all_passed:
|
|
print("✓ ALL TESTS PASSED!")
|
|
else:
|
|
print("✗ SOME TESTS FAILED")
|
|
print("=" * 80 + "\n")
|
|
|
|
return 0 if all_passed else 1
|
|
|
|
|
|
if __name__ == "__main__":
|
|
sys.exit(main())
|