Merge pull request #15 from PiratesIRC/claude/investigate-fuzzy-matcher-bug-01R5rQiHwBMwKZfAUCcJMzKW
Claude/investigate fuzzy matcher bug 01 r5r qi hw b mw k zf au cc j mz kw
This commit is contained in:
@@ -11,7 +11,7 @@ import logging
|
||||
from glob import glob
|
||||
|
||||
# Version: YY.DDD.HHMM (Julian date format: Year.DayOfYear.Time)
|
||||
__version__ = "25.314.1907"
|
||||
__version__ = "25.317.1200"
|
||||
|
||||
# Setup logging
|
||||
LOGGER = logging.getLogger("plugins.fuzzy_matcher")
|
||||
@@ -213,6 +213,9 @@ class FuzzyMatcher:
|
||||
if user_ignored_tags is None:
|
||||
user_ignored_tags = []
|
||||
|
||||
# Store original for logging
|
||||
original_name = name
|
||||
|
||||
# Remove leading parenthetical prefixes like (SP2), (D1), etc.
|
||||
name = re.sub(r'^\([^\)]+\)\s*', '', name)
|
||||
|
||||
@@ -223,7 +226,8 @@ class FuzzyMatcher:
|
||||
quality_tags = {'HD', 'SD', 'FD', 'UHD', 'FHD'}
|
||||
|
||||
# Check for 2-3 letter prefix with colon or space at start
|
||||
prefix_match = re.match(r'^([A-Z]{2,3})[:|\s]\s*', name)
|
||||
# Fixed regex: [:\s] instead of [:|\s] (pipe and backslash were incorrect)
|
||||
prefix_match = re.match(r'^([A-Z]{2,3})[:\s]\s*', name)
|
||||
if prefix_match:
|
||||
prefix = prefix_match.group(1).upper()
|
||||
# Only remove if it's NOT a quality tag
|
||||
@@ -280,7 +284,11 @@ class FuzzyMatcher:
|
||||
|
||||
# Clean up whitespace
|
||||
name = re.sub(r'\s+', ' ', name).strip()
|
||||
|
||||
|
||||
# Log warning if normalization resulted in empty string (indicates overly aggressive stripping)
|
||||
if not name:
|
||||
self.logger.warning(f"normalize_name returned empty string for input: '{original_name}' (original input was stripped too aggressively)")
|
||||
|
||||
return name
|
||||
|
||||
def extract_tags(self, name, user_ignored_tags=None):
|
||||
@@ -346,15 +354,17 @@ class FuzzyMatcher:
|
||||
def calculate_similarity(self, str1, str2):
|
||||
"""
|
||||
Calculate Levenshtein distance-based similarity ratio between two strings.
|
||||
|
||||
|
||||
Returns:
|
||||
Similarity ratio between 0.0 and 1.0
|
||||
"""
|
||||
if len(str1) < len(str2):
|
||||
str1, str2 = str2, str1
|
||||
|
||||
if len(str2) == 0:
|
||||
return 1.0 if len(str1) == 0 else 0.0
|
||||
|
||||
# Empty strings should not match anything (including other empty strings)
|
||||
# This prevents false positives when normalization strips everything
|
||||
if len(str2) == 0 or len(str1) == 0:
|
||||
return 0.0
|
||||
|
||||
previous_row = list(range(len(str2) + 1))
|
||||
|
||||
@@ -429,9 +439,14 @@ class FuzzyMatcher:
|
||||
for candidate in candidate_names:
|
||||
# Normalize candidate (stream name) with Cinemax removal if requested
|
||||
candidate_normalized = self.normalize_name(candidate, user_ignored_tags, remove_cinemax=remove_cinemax)
|
||||
|
||||
# Skip candidates that normalize to empty or very short strings
|
||||
if not candidate_normalized or len(candidate_normalized) < 2:
|
||||
continue
|
||||
|
||||
processed_candidate = self.process_string_for_matching(candidate_normalized)
|
||||
score = self.calculate_similarity(processed_query, processed_candidate)
|
||||
|
||||
|
||||
if score > best_score:
|
||||
best_score = score
|
||||
best_match = candidate
|
||||
@@ -481,6 +496,12 @@ class FuzzyMatcher:
|
||||
for candidate in candidate_names:
|
||||
# Normalize candidate (stream name) with Cinemax removal if requested
|
||||
candidate_normalized = self.normalize_name(candidate, user_ignored_tags, remove_cinemax=remove_cinemax)
|
||||
|
||||
# Skip candidates that normalize to empty or very short strings (< 2 chars)
|
||||
# This prevents false positives where multiple streams all normalize to ""
|
||||
if not candidate_normalized or len(candidate_normalized) < 2:
|
||||
continue
|
||||
|
||||
candidate_lower = candidate_normalized.lower()
|
||||
candidate_nospace = re.sub(r'[\s&\-]+', '', candidate_lower)
|
||||
|
||||
@@ -502,6 +523,11 @@ class FuzzyMatcher:
|
||||
for candidate in candidate_names:
|
||||
# Normalize candidate (stream name) with Cinemax removal if requested
|
||||
candidate_normalized = self.normalize_name(candidate, user_ignored_tags, remove_cinemax=remove_cinemax)
|
||||
|
||||
# Skip candidates that normalize to empty or very short strings
|
||||
if not candidate_normalized or len(candidate_normalized) < 2:
|
||||
continue
|
||||
|
||||
candidate_lower = candidate_normalized.lower()
|
||||
|
||||
# Check if one is a substring of the other
|
||||
|
||||
@@ -33,7 +33,7 @@ class Plugin:
|
||||
"""Dispatcharr Stream-Mapparr Plugin"""
|
||||
|
||||
name = "Stream-Mapparr"
|
||||
version = "0.5.0"
|
||||
version = "0.5.1"
|
||||
description = "🎯 Automatically add matching streams to channels based on name similarity and quality precedence with enhanced fuzzy matching"
|
||||
|
||||
@property
|
||||
@@ -149,13 +149,6 @@ class Plugin:
|
||||
},
|
||||
]
|
||||
|
||||
# Add channel database section header
|
||||
static_fields.append({
|
||||
"id": "channel_databases_header",
|
||||
"type": "info",
|
||||
"label": "📚 Channel Databases",
|
||||
})
|
||||
|
||||
# Dynamically add channel database enable/disable fields
|
||||
try:
|
||||
databases = self._get_channel_databases()
|
||||
@@ -753,7 +746,8 @@ class Plugin:
|
||||
# Remove country code prefix if requested
|
||||
if remove_country_prefix:
|
||||
quality_tags = {'HD', 'SD', 'FD', 'UHD', 'FHD'}
|
||||
prefix_match = re.match(r'^([A-Z]{2,3})[:|\s]\s*', cleaned)
|
||||
# Fixed regex: [:\s] instead of [:|\s] (pipe and backslash were incorrect)
|
||||
prefix_match = re.match(r'^([A-Z]{2,3})[:\s]\s*', cleaned)
|
||||
if prefix_match:
|
||||
prefix = prefix_match.group(1).upper()
|
||||
if prefix not in quality_tags:
|
||||
@@ -1025,6 +1019,12 @@ class Plugin:
|
||||
ignore_geographic, ignore_misc, remove_cinemax=channel_has_max
|
||||
)
|
||||
|
||||
# Skip if either cleaned name is empty or too short (prevents false positives)
|
||||
if not cleaned_stream or len(cleaned_stream) < 2:
|
||||
continue
|
||||
if not cleaned_matched or len(cleaned_matched) < 2:
|
||||
continue
|
||||
|
||||
if cleaned_stream.lower() == cleaned_matched.lower():
|
||||
matching_streams.append(stream)
|
||||
|
||||
@@ -1064,6 +1064,12 @@ class Plugin:
|
||||
ignore_geographic, ignore_misc, remove_cinemax=channel_has_max
|
||||
)
|
||||
|
||||
# Skip if either cleaned name is empty or too short (prevents false positives)
|
||||
if not cleaned_stream_name or len(cleaned_stream_name) < 2:
|
||||
continue
|
||||
if not cleaned_channel_name or len(cleaned_channel_name) < 2:
|
||||
continue
|
||||
|
||||
if cleaned_stream_name.lower() == cleaned_channel_name.lower():
|
||||
matching_streams.append(stream)
|
||||
|
||||
@@ -1086,6 +1092,12 @@ class Plugin:
|
||||
ignore_geographic, ignore_misc, remove_cinemax=channel_has_max
|
||||
)
|
||||
|
||||
# Skip if either cleaned name is empty or too short (prevents false positives)
|
||||
if not cleaned_stream_name or len(cleaned_stream_name) < 2:
|
||||
continue
|
||||
if not cleaned_channel_name or len(cleaned_channel_name) < 2:
|
||||
continue
|
||||
|
||||
# Simple case-insensitive substring matching
|
||||
if cleaned_channel_name.lower() in cleaned_stream_name.lower() or cleaned_stream_name.lower() in cleaned_channel_name.lower():
|
||||
matching_streams.append(stream)
|
||||
|
||||
Reference in New Issue
Block a user