0.7.3
# Stream-Mapparr Plugin Changelog (v0.7.2 -> v0.7.3) ### Matching Logic Improvements - Implemented a length ratio check (75%) for substring matches to prevent partial word false positives (e.g., preventing "story" from matching "history"). - Added strict validation for numeric tokens; streams must now explicitly contain matching numbers if the channel name includes them (e.g., prevents "BBC1" from matching "CBBC"). ### Features - Added the underlying FuzzyMatcher library version number to the header of generated CSV export files for better debugging. ### Maintenance - Updated the minimum required FuzzyMatcher version to 25.358.0200.
This commit is contained in:
@@ -12,7 +12,7 @@ import unicodedata
|
|||||||
from glob import glob
|
from glob import glob
|
||||||
|
|
||||||
# Version: YY.DDD.HHMM (Julian date format: Year.DayOfYear.Time)
|
# Version: YY.DDD.HHMM (Julian date format: Year.DayOfYear.Time)
|
||||||
__version__ = "25.354.1835"
|
__version__ = "25.358.0200"
|
||||||
|
|
||||||
# Setup logging
|
# Setup logging
|
||||||
LOGGER = logging.getLogger("plugins.fuzzy_matcher")
|
LOGGER = logging.getLogger("plugins.fuzzy_matcher")
|
||||||
@@ -398,7 +398,20 @@ class FuzzyMatcher:
|
|||||||
patterns_to_apply.extend(GEOGRAPHIC_PATTERNS)
|
patterns_to_apply.extend(GEOGRAPHIC_PATTERNS)
|
||||||
|
|
||||||
if ignore_misc:
|
if ignore_misc:
|
||||||
patterns_to_apply.extend(MISC_PATTERNS)
|
# CRITICAL FIX: Only apply MISC_PATTERNS (which removes ALL parentheses) if we're also
|
||||||
|
# ignoring regional tags. Otherwise, MISC_PATTERNS would strip regional indicators like
|
||||||
|
# "(WEST)" even when the user has set ignore_regional=False.
|
||||||
|
# This ensures that "BBC America" won't match "BBC AMERICA (WEST)" when ignore_regional=False
|
||||||
|
if ignore_regional:
|
||||||
|
# Safe to remove ALL parentheses since regional indicators are already being ignored
|
||||||
|
patterns_to_apply.extend(MISC_PATTERNS)
|
||||||
|
else:
|
||||||
|
# User wants to preserve regional indicators - skip MISC_PATTERNS to avoid
|
||||||
|
# removing parenthetical content that might be regional indicators
|
||||||
|
# Note: This means some misc tags like (CX), (B), (PRIME) won't be removed
|
||||||
|
# when ignore_regional=False, but this is the correct behavior to preserve
|
||||||
|
# regional tags like (WEST), (EAST), etc.
|
||||||
|
pass
|
||||||
|
|
||||||
# Apply selected hardcoded patterns
|
# Apply selected hardcoded patterns
|
||||||
for pattern in patterns_to_apply:
|
for pattern in patterns_to_apply:
|
||||||
@@ -406,22 +419,42 @@ class FuzzyMatcher:
|
|||||||
|
|
||||||
# Apply user-configured ignored tags with improved handling
|
# Apply user-configured ignored tags with improved handling
|
||||||
for tag in user_ignored_tags:
|
for tag in user_ignored_tags:
|
||||||
|
escaped_tag = re.escape(tag)
|
||||||
|
|
||||||
# Check if tag contains brackets or parentheses - if so, match literally
|
# Check if tag contains brackets or parentheses - if so, match literally
|
||||||
if '[' in tag or ']' in tag or '(' in tag or ')' in tag:
|
if '[' in tag or ']' in tag or '(' in tag or ')' in tag:
|
||||||
# Literal match for bracketed/parenthesized tags
|
# Literal match for bracketed/parenthesized tags, remove with trailing whitespace
|
||||||
escaped_tag = re.escape(tag)
|
name = re.sub(escaped_tag + r'\s*', '', name, flags=re.IGNORECASE)
|
||||||
name = re.sub(escaped_tag, '', name, flags=re.IGNORECASE)
|
|
||||||
else:
|
else:
|
||||||
# Word boundary match for simple word tags to avoid partial matches
|
# CRITICAL FIX: Word boundaries (\b) only work with alphanumeric characters
|
||||||
# e.g., "East" won't match the "east" in "Feast"
|
# Tags with Unicode/special characters (like ┃NLZIET┃) fail with word boundaries
|
||||||
escaped_tag = re.escape(tag)
|
# Check if tag contains only word characters (alphanumeric + underscore)
|
||||||
name = re.sub(r'\b' + escaped_tag + r'\b', '', name, flags=re.IGNORECASE)
|
if re.match(r'^\w+$', tag):
|
||||||
|
# Safe to use word boundaries for pure word tags
|
||||||
|
# This prevents "East" from matching the "east" in "Feast"
|
||||||
|
name = re.sub(r'\b' + escaped_tag + r'\b', '', name, flags=re.IGNORECASE)
|
||||||
|
else:
|
||||||
|
# Tag contains special/Unicode characters - can't use word boundaries
|
||||||
|
# Match the tag followed by optional whitespace
|
||||||
|
name = re.sub(escaped_tag + r'\s*', '', name, flags=re.IGNORECASE)
|
||||||
|
|
||||||
# Remove callsigns in parentheses
|
# Remove callsigns in parentheses
|
||||||
name = re.sub(r'\([KW][A-Z]{3}(?:-(?:TV|CD|LP|DT|LD))?\)', '', name, flags=re.IGNORECASE)
|
# CRITICAL FIX: Don't remove regional indicators like (WEST), (EAST), etc. when ignore_regional=False
|
||||||
|
# The callsign pattern \([KW][A-Z]{3}...\) accidentally matches (WEST), (WETA), (KOMO), etc.
|
||||||
# Remove other tags in parentheses
|
# We need to exclude known regional indicators even when matching callsigns
|
||||||
name = re.sub(r'\([A-Z0-9]+\)', '', name)
|
if ignore_regional:
|
||||||
|
# Safe to remove callsigns without checking for regional indicators
|
||||||
|
name = re.sub(r'\([KW][A-Z]{3}(?:-(?:TV|CD|LP|DT|LD))?\)', '', name, flags=re.IGNORECASE)
|
||||||
|
else:
|
||||||
|
# Only remove callsigns that are NOT regional indicators
|
||||||
|
# Use negative lookahead to exclude WEST, EAST, etc.
|
||||||
|
# Pattern matches (K or W) + 3 letters, but NOT if those 3 letters form a regional word
|
||||||
|
name = re.sub(r'\([KW](?!EST\)|ACIFIC\)|ENTRAL\)|OUNTAIN\)|TLANTIC\))[A-Z]{3}(?:-(?:TV|CD|LP|DT|LD))?\)', '', name, flags=re.IGNORECASE)
|
||||||
|
|
||||||
|
# Remove other tags in parentheses (but only if we're also ignoring regional tags)
|
||||||
|
# Otherwise this would remove regional indicators like (WEST), (EAST), etc.
|
||||||
|
if ignore_regional:
|
||||||
|
name = re.sub(r'\([A-Z0-9]+\)', '', name)
|
||||||
|
|
||||||
# Remove common pattern fixes
|
# Remove common pattern fixes
|
||||||
name = re.sub(r'^The\s+', '', name, flags=re.IGNORECASE)
|
name = re.sub(r'^The\s+', '', name, flags=re.IGNORECASE)
|
||||||
@@ -569,7 +602,8 @@ class FuzzyMatcher:
|
|||||||
tokens = sorted([token for token in cleaned_s.split() if token])
|
tokens = sorted([token for token in cleaned_s.split() if token])
|
||||||
return " ".join(tokens)
|
return " ".join(tokens)
|
||||||
|
|
||||||
def find_best_match(self, query_name, candidate_names, user_ignored_tags=None, remove_cinemax=False):
|
def find_best_match(self, query_name, candidate_names, user_ignored_tags=None, remove_cinemax=False,
|
||||||
|
ignore_quality=True, ignore_regional=True, ignore_geographic=True, ignore_misc=True):
|
||||||
"""
|
"""
|
||||||
Find the best fuzzy match for a name among a list of candidate names.
|
Find the best fuzzy match for a name among a list of candidate names.
|
||||||
|
|
||||||
@@ -578,6 +612,10 @@ class FuzzyMatcher:
|
|||||||
candidate_names: List of candidate names to match against
|
candidate_names: List of candidate names to match against
|
||||||
user_ignored_tags: User-configured tags to ignore
|
user_ignored_tags: User-configured tags to ignore
|
||||||
remove_cinemax: If True, remove "Cinemax" from candidate names
|
remove_cinemax: If True, remove "Cinemax" from candidate names
|
||||||
|
ignore_quality: If True, remove ALL quality indicators during normalization
|
||||||
|
ignore_regional: If True, remove regional indicator patterns during normalization
|
||||||
|
ignore_geographic: If True, remove ALL country code patterns during normalization
|
||||||
|
ignore_misc: If True, remove ALL content within parentheses during normalization
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
Tuple of (matched_name, score) or (None, 0) if no match found
|
Tuple of (matched_name, score) or (None, 0) if no match found
|
||||||
@@ -589,7 +627,11 @@ class FuzzyMatcher:
|
|||||||
user_ignored_tags = []
|
user_ignored_tags = []
|
||||||
|
|
||||||
# Normalize the query (channel name - don't remove Cinemax from it)
|
# Normalize the query (channel name - don't remove Cinemax from it)
|
||||||
normalized_query = self.normalize_name(query_name, user_ignored_tags)
|
normalized_query = self.normalize_name(query_name, user_ignored_tags,
|
||||||
|
ignore_quality=ignore_quality,
|
||||||
|
ignore_regional=ignore_regional,
|
||||||
|
ignore_geographic=ignore_geographic,
|
||||||
|
ignore_misc=ignore_misc)
|
||||||
|
|
||||||
if not normalized_query:
|
if not normalized_query:
|
||||||
return None, 0
|
return None, 0
|
||||||
@@ -602,7 +644,12 @@ class FuzzyMatcher:
|
|||||||
|
|
||||||
for candidate in candidate_names:
|
for candidate in candidate_names:
|
||||||
# Normalize candidate (stream name) with Cinemax removal if requested
|
# Normalize candidate (stream name) with Cinemax removal if requested
|
||||||
candidate_normalized = self.normalize_name(candidate, user_ignored_tags, remove_cinemax=remove_cinemax)
|
candidate_normalized = self.normalize_name(candidate, user_ignored_tags,
|
||||||
|
ignore_quality=ignore_quality,
|
||||||
|
ignore_regional=ignore_regional,
|
||||||
|
ignore_geographic=ignore_geographic,
|
||||||
|
ignore_misc=ignore_misc,
|
||||||
|
remove_cinemax=remove_cinemax)
|
||||||
|
|
||||||
# Skip candidates that normalize to empty or very short strings
|
# Skip candidates that normalize to empty or very short strings
|
||||||
if not candidate_normalized or len(candidate_normalized) < 2:
|
if not candidate_normalized or len(candidate_normalized) < 2:
|
||||||
@@ -623,7 +670,8 @@ class FuzzyMatcher:
|
|||||||
|
|
||||||
return None, 0
|
return None, 0
|
||||||
|
|
||||||
def fuzzy_match(self, query_name, candidate_names, user_ignored_tags=None, remove_cinemax=False):
|
def fuzzy_match(self, query_name, candidate_names, user_ignored_tags=None, remove_cinemax=False,
|
||||||
|
ignore_quality=True, ignore_regional=True, ignore_geographic=True, ignore_misc=True):
|
||||||
"""
|
"""
|
||||||
Generic fuzzy matching function that can match any name against a list of candidates.
|
Generic fuzzy matching function that can match any name against a list of candidates.
|
||||||
This is the main entry point for fuzzy matching.
|
This is the main entry point for fuzzy matching.
|
||||||
@@ -633,6 +681,10 @@ class FuzzyMatcher:
|
|||||||
candidate_names: List of candidate names to match against (stream names)
|
candidate_names: List of candidate names to match against (stream names)
|
||||||
user_ignored_tags: User-configured tags to ignore
|
user_ignored_tags: User-configured tags to ignore
|
||||||
remove_cinemax: If True, remove "Cinemax" from candidate names (for channels with "max")
|
remove_cinemax: If True, remove "Cinemax" from candidate names (for channels with "max")
|
||||||
|
ignore_quality: If True, remove ALL quality indicators during normalization
|
||||||
|
ignore_regional: If True, remove regional indicator patterns during normalization
|
||||||
|
ignore_geographic: If True, remove ALL country code patterns during normalization
|
||||||
|
ignore_misc: If True, remove ALL content within parentheses during normalization
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
Tuple of (matched_name, score, match_type) or (None, 0, None) if no match found
|
Tuple of (matched_name, score, match_type) or (None, 0, None) if no match found
|
||||||
@@ -644,7 +696,11 @@ class FuzzyMatcher:
|
|||||||
user_ignored_tags = []
|
user_ignored_tags = []
|
||||||
|
|
||||||
# Normalize query (channel name - don't remove Cinemax from it)
|
# Normalize query (channel name - don't remove Cinemax from it)
|
||||||
normalized_query = self.normalize_name(query_name, user_ignored_tags)
|
normalized_query = self.normalize_name(query_name, user_ignored_tags,
|
||||||
|
ignore_quality=ignore_quality,
|
||||||
|
ignore_regional=ignore_regional,
|
||||||
|
ignore_geographic=ignore_geographic,
|
||||||
|
ignore_misc=ignore_misc)
|
||||||
|
|
||||||
if not normalized_query:
|
if not normalized_query:
|
||||||
return None, 0, None
|
return None, 0, None
|
||||||
@@ -659,7 +715,12 @@ class FuzzyMatcher:
|
|||||||
|
|
||||||
for candidate in candidate_names:
|
for candidate in candidate_names:
|
||||||
# Normalize candidate (stream name) with Cinemax removal if requested
|
# Normalize candidate (stream name) with Cinemax removal if requested
|
||||||
candidate_normalized = self.normalize_name(candidate, user_ignored_tags, remove_cinemax=remove_cinemax)
|
candidate_normalized = self.normalize_name(candidate, user_ignored_tags,
|
||||||
|
ignore_quality=ignore_quality,
|
||||||
|
ignore_regional=ignore_regional,
|
||||||
|
ignore_geographic=ignore_geographic,
|
||||||
|
ignore_misc=ignore_misc,
|
||||||
|
remove_cinemax=remove_cinemax)
|
||||||
|
|
||||||
# Skip candidates that normalize to empty or very short strings (< 2 chars)
|
# Skip candidates that normalize to empty or very short strings (< 2 chars)
|
||||||
# This prevents false positives where multiple streams all normalize to ""
|
# This prevents false positives where multiple streams all normalize to ""
|
||||||
@@ -686,7 +747,12 @@ class FuzzyMatcher:
|
|||||||
# Stage 2: Substring matching
|
# Stage 2: Substring matching
|
||||||
for candidate in candidate_names:
|
for candidate in candidate_names:
|
||||||
# Normalize candidate (stream name) with Cinemax removal if requested
|
# Normalize candidate (stream name) with Cinemax removal if requested
|
||||||
candidate_normalized = self.normalize_name(candidate, user_ignored_tags, remove_cinemax=remove_cinemax)
|
candidate_normalized = self.normalize_name(candidate, user_ignored_tags,
|
||||||
|
ignore_quality=ignore_quality,
|
||||||
|
ignore_regional=ignore_regional,
|
||||||
|
ignore_geographic=ignore_geographic,
|
||||||
|
ignore_misc=ignore_misc,
|
||||||
|
remove_cinemax=remove_cinemax)
|
||||||
|
|
||||||
# Skip candidates that normalize to empty or very short strings
|
# Skip candidates that normalize to empty or very short strings
|
||||||
if not candidate_normalized or len(candidate_normalized) < 2:
|
if not candidate_normalized or len(candidate_normalized) < 2:
|
||||||
@@ -696,18 +762,29 @@ class FuzzyMatcher:
|
|||||||
|
|
||||||
# Check if one is a substring of the other
|
# Check if one is a substring of the other
|
||||||
if normalized_query_lower in candidate_lower or candidate_lower in normalized_query_lower:
|
if normalized_query_lower in candidate_lower or candidate_lower in normalized_query_lower:
|
||||||
# Calculate similarity score
|
# CRITICAL FIX: Add length ratio requirement to prevent false positives
|
||||||
ratio = self.calculate_similarity(normalized_query_lower, candidate_lower)
|
# like "story" matching "history" (story is 5 chars, history is 7 chars)
|
||||||
if ratio > best_ratio:
|
# Require strings to be within 75% of same length for substring match
|
||||||
best_match = candidate
|
# This ensures substring matches are semantically meaningful
|
||||||
best_ratio = ratio
|
length_ratio = min(len(normalized_query_lower), len(candidate_lower)) / max(len(normalized_query_lower), len(candidate_lower))
|
||||||
match_type = "substring"
|
if length_ratio >= 0.75:
|
||||||
|
# Calculate similarity score
|
||||||
|
ratio = self.calculate_similarity(normalized_query_lower, candidate_lower)
|
||||||
|
if ratio > best_ratio:
|
||||||
|
best_match = candidate
|
||||||
|
best_ratio = ratio
|
||||||
|
match_type = "substring"
|
||||||
|
|
||||||
if best_match and int(best_ratio * 100) >= self.match_threshold:
|
if best_match and int(best_ratio * 100) >= self.match_threshold:
|
||||||
return best_match, int(best_ratio * 100), match_type
|
return best_match, int(best_ratio * 100), match_type
|
||||||
|
|
||||||
# Stage 3: Fuzzy matching with token sorting
|
# Stage 3: Fuzzy matching with token sorting
|
||||||
fuzzy_match, score = self.find_best_match(query_name, candidate_names, user_ignored_tags, remove_cinemax=remove_cinemax)
|
fuzzy_match, score = self.find_best_match(query_name, candidate_names, user_ignored_tags,
|
||||||
|
remove_cinemax=remove_cinemax,
|
||||||
|
ignore_quality=ignore_quality,
|
||||||
|
ignore_regional=ignore_regional,
|
||||||
|
ignore_geographic=ignore_geographic,
|
||||||
|
ignore_misc=ignore_misc)
|
||||||
if fuzzy_match:
|
if fuzzy_match:
|
||||||
return fuzzy_match, score, f"fuzzy ({score})"
|
return fuzzy_match, score, f"fuzzy ({score})"
|
||||||
|
|
||||||
|
|||||||
@@ -21,6 +21,8 @@ import threading
|
|||||||
|
|
||||||
# Import FuzzyMatcher from the same directory
|
# Import FuzzyMatcher from the same directory
|
||||||
from .fuzzy_matcher import FuzzyMatcher
|
from .fuzzy_matcher import FuzzyMatcher
|
||||||
|
# Import fuzzy_matcher version for CSV header
|
||||||
|
from . import fuzzy_matcher
|
||||||
|
|
||||||
# Django model imports - same approach as Event Channel Managarr
|
# Django model imports - same approach as Event Channel Managarr
|
||||||
from apps.channels.models import Channel, ChannelProfileMembership, ChannelStream, Stream
|
from apps.channels.models import Channel, ChannelProfileMembership, ChannelStream, Stream
|
||||||
@@ -63,8 +65,8 @@ class PluginConfig:
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
# === PLUGIN METADATA ===
|
# === PLUGIN METADATA ===
|
||||||
PLUGIN_VERSION = "0.7.2"
|
PLUGIN_VERSION = "0.7.3"
|
||||||
FUZZY_MATCHER_MIN_VERSION = "25.354.1835" # Requires complete regional patterns support
|
FUZZY_MATCHER_MIN_VERSION = "25.358.0200" # Requires custom ignore tags Unicode fix
|
||||||
|
|
||||||
# === MATCHING SETTINGS ===
|
# === MATCHING SETTINGS ===
|
||||||
DEFAULT_FUZZY_MATCH_THRESHOLD = 85 # Minimum similarity score (0-100)
|
DEFAULT_FUZZY_MATCH_THRESHOLD = 85 # Minimum similarity score (0-100)
|
||||||
@@ -2021,7 +2023,9 @@ class Plugin:
|
|||||||
if self.fuzzy_matcher:
|
if self.fuzzy_matcher:
|
||||||
stream_names = [stream['name'] for stream in working_streams]
|
stream_names = [stream['name'] for stream in working_streams]
|
||||||
matched_stream_name, score, match_type = self.fuzzy_matcher.fuzzy_match(
|
matched_stream_name, score, match_type = self.fuzzy_matcher.fuzzy_match(
|
||||||
channel_name, stream_names, ignore_tags, remove_cinemax=channel_has_max
|
channel_name, stream_names, ignore_tags, remove_cinemax=channel_has_max,
|
||||||
|
ignore_quality=ignore_quality, ignore_regional=ignore_regional,
|
||||||
|
ignore_geographic=ignore_geographic, ignore_misc=ignore_misc
|
||||||
)
|
)
|
||||||
|
|
||||||
if matched_stream_name:
|
if matched_stream_name:
|
||||||
@@ -2055,10 +2059,16 @@ class Plugin:
|
|||||||
|
|
||||||
# Substring match: stream contains channel OR channel contains stream
|
# Substring match: stream contains channel OR channel contains stream
|
||||||
if stream_lower in channel_lower or channel_lower in stream_lower:
|
if stream_lower in channel_lower or channel_lower in stream_lower:
|
||||||
# Calculate similarity to ensure it meets threshold
|
# CRITICAL FIX: Add length ratio requirement to prevent false positives
|
||||||
similarity = self.fuzzy_matcher.calculate_similarity(stream_lower, channel_lower)
|
# like "story" matching "history" (story is 5 chars, history is 7 chars)
|
||||||
if int(similarity * 100) >= self.fuzzy_matcher.match_threshold:
|
# Require strings to be within 75% of same length for substring match
|
||||||
matching_streams.append(stream)
|
# This ensures substring matches are semantically meaningful
|
||||||
|
length_ratio = min(len(stream_lower), len(channel_lower)) / max(len(stream_lower), len(channel_lower))
|
||||||
|
if length_ratio >= 0.75:
|
||||||
|
# Calculate similarity to ensure it meets threshold
|
||||||
|
similarity = self.fuzzy_matcher.calculate_similarity(stream_lower, channel_lower)
|
||||||
|
if int(similarity * 100) >= self.fuzzy_matcher.match_threshold:
|
||||||
|
matching_streams.append(stream)
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# Token-based matching: check if significant tokens overlap
|
# Token-based matching: check if significant tokens overlap
|
||||||
@@ -2245,7 +2255,9 @@ class Plugin:
|
|||||||
try:
|
try:
|
||||||
stream_names = [stream['name'] for stream in all_streams]
|
stream_names = [stream['name'] for stream in all_streams]
|
||||||
matched_stream_name, score, match_type = self.fuzzy_matcher.fuzzy_match(
|
matched_stream_name, score, match_type = self.fuzzy_matcher.fuzzy_match(
|
||||||
channel_name, stream_names, ignore_tags, remove_cinemax=channel_has_max
|
channel_name, stream_names, ignore_tags, remove_cinemax=channel_has_max,
|
||||||
|
ignore_quality=ignore_quality, ignore_regional=ignore_regional,
|
||||||
|
ignore_geographic=ignore_geographic, ignore_misc=ignore_misc
|
||||||
)
|
)
|
||||||
|
|
||||||
if matched_stream_name:
|
if matched_stream_name:
|
||||||
@@ -3279,6 +3291,7 @@ class Plugin:
|
|||||||
# Build header with all settings except login credentials
|
# Build header with all settings except login credentials
|
||||||
header_lines = [
|
header_lines = [
|
||||||
f"# Stream-Mapparr Export v{self.version}",
|
f"# Stream-Mapparr Export v{self.version}",
|
||||||
|
f"# FuzzyMatcher Version: {fuzzy_matcher.__version__}",
|
||||||
f"# Export Date: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}",
|
f"# Export Date: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}",
|
||||||
"#",
|
"#",
|
||||||
"# === Action Performed ===",
|
"# === Action Performed ===",
|
||||||
|
|||||||
Reference in New Issue
Block a user