diff --git a/.gitignore b/.gitignore index 2528e57..1091e65 100644 --- a/.gitignore +++ b/.gitignore @@ -44,3 +44,14 @@ htmlcov/ # Data files (cache) /data/ *.log + +# AI/Tool config +.claude/ +.serena/ +GEMINI.md + +# Local session files +*.zip +*.txt +C--Users-btoll-claude-dispatcharr-Stream-Mapparr/ +docs/superpowers/ diff --git a/Stream-Mapparr/CHANGELOG.md b/Stream-Mapparr/CHANGELOG.md new file mode 100644 index 0000000..39938c5 --- /dev/null +++ b/Stream-Mapparr/CHANGELOG.md @@ -0,0 +1,223 @@ +# Stream-Mapparr CHANGELOG + +## v0.9.0 (April 4, 2026) +**Type**: Performance & UI Enhancement Release + +### Performance Optimizations (ported from Linearr plugin) + +**Levenshtein Acceleration**: +- Uses `rapidfuzz` C extension when available (20-50x faster) +- Pure Python fallback with early termination via new `threshold` parameter +- Combined effect: matching 94 channels x 3,362 streams in ~2s (was ~5 minutes) + +**Normalization Cache**: +- Added `precompute_normalizations()` to cache stream name normalization once before matching loops +- `fuzzy_match()` and `find_best_match()` use cached results via `_get_cached_norm()` / `_get_cached_processed()` +- Eliminates redundant `normalize_name()` calls across all 3 matching stages +- Cache fallback uses stored ignore flags for consistency + +**ETA Calculation**: +- Updated `ESTIMATED_SECONDS_PER_ITEM` from 7.73s to 0.1s to reflect actual performance + +### UI Simplification + +**Profile Name**: Free-text input replaced with dynamic dropdown populated from database + +**Match Sensitivity**: Numeric threshold (0-100) replaced with named presets: +- Relaxed (70), Normal (80), Strict (90), Exact (95) + +**Tag Handling**: Four separate boolean toggles consolidated into single dropdown: +- Strip All Tags (default), Keep Regional Tags, Keep All Tags + +**Channel Database**: Per-country boolean toggles consolidated into single dropdown: +- None, individual country, or All databases + +All changes are backward compatible — legacy field IDs still work as fallbacks. + +### Files Modified +- `fuzzy_matcher.py` v26.095.0100: Cache system, rapidfuzz support, early termination +- `plugin.py` v0.9.0: Precompute calls, UI fields, settings resolvers +- `plugin.json` v0.9.0: Updated field definitions + +### Version Compatibility +| Plugin Version | Required fuzzy_matcher | +|---------------|------------------------| +| 0.9.0 | 26.095.0100+ | +| 0.8.0b | 26.018.0100+ | +| 0.7.4a | 26.018.0100+ | + +--- + +## v0.8.0b (March 11, 2026) +**Type**: Bugfix Release +**Severity**: HIGH (ORM Migration) + +### Bug Fixed: Invalid `group_title` Field on Stream Model + +**Issue**: After migrating from HTTP API to Django ORM in v0.8.0a, the plugin used `group_title` as a field name on the Stream model. This field does not exist — the correct field is `channel_group` (a ForeignKey to `ChannelGroup`). Any action that loads streams (Add Streams, Preview Changes, Load/Process Channels) would fail with: +``` +Cannot resolve keyword 'group_title' into field. +``` + +**Root Cause**: During the ORM migration, the old API response field name `group_title` was carried over into ORM `.values()` queries, but the Django model uses `channel_group` (FK) instead. + +**Fix**: Replaced `group_title` with `channel_group__name` (Django FK traversal) in two locations: +- `_get_all_streams()`: Stream data query +- `_get_stream_groups()`: Distinct stream group name query + +**Files Modified**: +- `plugin.py` v0.8.0b: Fixed ORM field references +- `plugin.json` v0.8.0b: Version bump + +--- + +## v0.7.4a (January 18, 2026) +**Type**: Critical Bugfix Release +**Severity**: HIGH (Stream Matching) + +### Bug Fixed: 4K/8K Quality Tags Not Removed During Normalization + +**Issue**: Streams with "4K" or "8K" quality suffixes were not matching correctly because the space normalization step was splitting "4K" into "4 K" before quality patterns could remove it. + +**Example**: +- Stream: `┃NL┃ RTL 4 4K` +- Expected: Tag removed → "RTL 4" → matches channel +- Actual: "4K" split to "4 K" → patterns fail → "RTL 4 4 K" → no match + +**Root Cause**: The digit-to-letter space normalization (`re.sub(r'(\d)([a-zA-Z])', r'\1 \2', name)`) transformed "4K" into "4 K" before quality patterns could match and remove "4K". + +**Pattern Observed**: +| Quality Suffix | Affected? | Reason | +|---------------|-----------|--------| +| HD, SD, FHD, UHD | No | All letters, not split | +| 4K, 8K | **Yes** | Digit+letter split to "4 K", "8 K" | + +**Fix**: Quality patterns are now applied BEFORE space normalization to prevent "4K"/"8K" from being broken. + +**Files Modified**: +- `fuzzy_matcher.py` v26.018.0100: Moved quality pattern removal before space normalization +- `plugin.py` v0.7.4a: Updated version and minimum fuzzy_matcher requirement + +--- + +## v0.7.3c (December 23, 2025) +**Type**: Critical Bugfix Release +**Severity**: HIGH (Unicode tag users) + +### Bug Fixed: Custom Ignore Tags with Unicode Characters Not Working + +**Issue**: Custom ignore tags containing Unicode or special characters (like `┃NLZIET┃`) were completely ignored during normalization, causing all channels to fail matching. + +**Root Cause**: Code used regex word boundaries (`\b`) for all custom tags. Word boundaries only work with alphanumeric characters. Unicode characters like `┃` (U+2503) are not word characters. + +**Fix**: Smart tag detection - only use word boundaries for pure alphanumeric tags, use literal matching for Unicode/special character tags. + +--- + +## v0.7.4 (December 22, 2025) +**Type**: Critical Bugfix Release +**Severity**: HIGH (Matching Accuracy) + +### Bug #1 Fixed: Substring Matching Too Permissive + +**Issue**: "Story" matched "HISTORY" at threshold 80 because substring matching didn't validate semantic similarity. + +**Fix**: Added 75% length ratio requirement for substring matches. + +### Bug #2 Fixed: Regional Tags Stripped Despite Setting + +**Issue**: `Ignore Regional Tags: False` didn't work - "(WEST)" was still being removed by MISC_PATTERNS and callsign patterns. + +**Fix**: Conditional MISC_PATTERNS application and callsign pattern with negative lookahead for regional indicators. + +--- + +## v0.7.3 (December 21, 2025) +**Type**: Enhancement Release + +### Added FuzzyMatcher Version to CSV Headers + +CSV exports now show both plugin and fuzzy_matcher versions for better troubleshooting: +```csv +# Stream-Mapparr Export v0.7.3 +# FuzzyMatcher Version: 25.354.1835 +``` + +--- + +## v0.7.2 (December 20, 2025) +**Type**: Bugfix Release + +### Fixed: Incomplete Regional Patterns + +Updated fuzzy_matcher dependency to include all 6 US timezone regional indicators (East, West, Pacific, Central, Mountain, Atlantic) instead of just "East". + +--- + +## v0.6.x Series + +### v0.6.17 - M3U Source Prioritization +Added M3U source priority ordering for stream sorting. + +### v0.6.16 - Channel Loading Fix +Fixed channel loading issues with profile filtering. + +### v0.6.15 - Smart Stream Sorting +Implemented quality-based stream sorting using stream_stats (resolution + FPS). + +### v0.6.14 - CSV Headers Enhancement +Added comprehensive CSV headers with action name, execution mode, and settings. + +### v0.6.13 - Channel Groups Filter Fix +Fixed Sort Alternate Streams ignoring channel groups filter setting. + +### v0.6.12 - Sort Streams Fix +Critical fix for Sort Alternate Streams action using wrong API endpoint. + +### v0.6.11 - Dry Run Mode & Sort Streams +Added dry run mode toggle, Sort Alternate Streams action, flexible scheduled task configuration. + +### v0.6.10 - Lock Detection Enhancement +Added Stream model import, enhanced lock detection, manual lock clear action. + +### v0.6.9 - IPTV Checker Integration +Filter dead streams (0x0 resolution) and optional scheduler coordination. + +### v0.6.8 - Quality-Based Stream Ordering +Automatic quality-based stream ordering when assigning streams. + +### v0.6.7 - Deduplication & Decade Fix +Stream deduplication, decade number preservation ("70s" not matching "90s"), plus sign handling. + +### v0.6.3 - Numbered Channel Fix +Fixed false positive matches for numbered channels (Premier Sports 1 vs 2). + +### v0.6.2 - Token Matching Fix +Fixed Sky Cinema channels matching incorrect streams. + +### v0.6.0 - Major Refactor +Replaced Celery Beat with background threading scheduler, operation lock system, WebSocket notifications, centralized configuration. + +--- + +## Upgrade Instructions + +**For v0.7.4a**: +1. Replace `plugin.py` with v0.7.4a +2. Replace `fuzzy_matcher.py` with v26.018.0100 +3. Restart Dispatcharr container +4. Re-run "Match & Assign Streams" + +**IMPORTANT**: Both files must be updated together! + +--- + +## Version Compatibility + +| Plugin Version | Required fuzzy_matcher | +|---------------|------------------------| +| 0.7.4a | 26.018.0100+ | +| 0.7.3c | 25.358.0200+ | +| 0.7.4 | 25.356.0230+ | +| 0.7.3 | 25.354.1835+ | +| 0.7.2 | 25.354.1835+ | diff --git a/Stream-Mapparr/fuzzy_matcher.py b/Stream-Mapparr/fuzzy_matcher.py index 8fc97de..4e99baf 100644 --- a/Stream-Mapparr/fuzzy_matcher.py +++ b/Stream-Mapparr/fuzzy_matcher.py @@ -11,8 +11,15 @@ import logging import unicodedata from glob import glob +# Optional C-accelerated Levenshtein (20-50x faster when available) +try: + from rapidfuzz.distance import Levenshtein as _rf_lev + _USE_RAPIDFUZZ = True +except ImportError: + _USE_RAPIDFUZZ = False + # Version: YY.DDD.HHMM (Julian date format: Year.DayOfYear.Time) -__version__ = "26.018.0100" +__version__ = "26.095.0100" # Setup logging LOGGER = logging.getLogger("plugins.fuzzy_matcher") @@ -124,6 +131,13 @@ class FuzzyMatcher: self.channel_lookup = {} # Callsign -> channel data mapping self.country_codes = None # Track which country databases are currently loaded + # Normalization cache for performance (avoids redundant normalize_name calls) + self._norm_cache = {} # raw_name -> normalized_lower + self._norm_nospace_cache = {} # raw_name -> normalized with spaces/&/- removed + self._processed_cache = {} # raw_name -> process_string_for_matching result + self._cached_ignore_tags = None # user_ignored_tags used during precompute + self._cached_flags = {} # ignore_quality/regional/geographic/misc used during precompute + # Load all channel databases if plugin_dir is provided if self.plugin_dir: self._load_channel_databases() @@ -322,6 +336,61 @@ class FuzzyMatcher: callsign = re.sub(r'-(?:TV|CD|LP|DT|LD)$', '', callsign) return callsign + def precompute_normalizations(self, names, user_ignored_tags=None, + ignore_quality=True, ignore_regional=True, + ignore_geographic=True, ignore_misc=True): + """ + Pre-normalize a list of names and cache the results. + Call this once before matching loops to avoid redundant normalization + when matching many channels against the same stream list. + Flags must match the flags passed to fuzzy_match() for correct results. + """ + self._norm_cache.clear() + self._norm_nospace_cache.clear() + self._processed_cache.clear() + self._cached_ignore_tags = user_ignored_tags + self._cached_flags = { + 'ignore_quality': ignore_quality, + 'ignore_regional': ignore_regional, + 'ignore_geographic': ignore_geographic, + 'ignore_misc': ignore_misc, + } + + for name in names: + norm = self.normalize_name(name, user_ignored_tags, + ignore_quality=ignore_quality, + ignore_regional=ignore_regional, + ignore_geographic=ignore_geographic, + ignore_misc=ignore_misc) + if norm and len(norm) >= 2: + norm_lower = norm.lower() + self._norm_cache[name] = norm_lower + self._norm_nospace_cache[name] = re.sub(r'[\s&\-]+', '', norm_lower) + self._processed_cache[name] = self.process_string_for_matching(norm) + + self.logger.info(f"Pre-normalized {len(self._norm_cache)} stream names (from {len(names)} total)") + + def _get_cached_norm(self, name, user_ignored_tags=None): + """Get cached normalization or compute on the fly using stored flags.""" + if name in self._norm_cache: + return self._norm_cache[name], self._norm_nospace_cache[name] + tags = user_ignored_tags if user_ignored_tags is not None else self._cached_ignore_tags + norm = self.normalize_name(name, tags, **self._cached_flags) + if not norm or len(norm) < 2: + return None, None + norm_lower = norm.lower() + return norm_lower, re.sub(r'[\s&\-]+', '', norm_lower) + + def _get_cached_processed(self, name, user_ignored_tags=None): + """Get cached processed string or compute on the fly using stored flags.""" + if name in self._processed_cache: + return self._processed_cache[name] + tags = user_ignored_tags if user_ignored_tags is not None else self._cached_ignore_tags + norm = self.normalize_name(name, tags, **self._cached_flags) + if not norm or len(norm) < 2: + return None + return self.process_string_for_matching(norm) + def normalize_name(self, name, user_ignored_tags=None, ignore_quality=True, ignore_regional=True, ignore_geographic=True, ignore_misc=True, remove_cinemax=False, remove_country_prefix=False): """ @@ -539,23 +608,43 @@ class FuzzyMatcher: return regional, extra_tags, quality_tags - def calculate_similarity(self, str1, str2): + def calculate_similarity(self, str1, str2, threshold=None): """ Calculate Levenshtein distance-based similarity ratio between two strings. + Args: + str1: First string + str2: Second string + threshold: Optional minimum similarity (0.0-1.0). When set, returns 0.0 + early if the score cannot possibly meet this threshold. + Used with rapidfuzz's score_cutoff and for pure-Python early termination. + Returns: Similarity ratio between 0.0 and 1.0 """ + if len(str1) == 0 or len(str2) == 0: + return 0.0 + + # Fast path: use C-accelerated rapidfuzz when available + if _USE_RAPIDFUZZ: + cutoff = threshold if threshold is not None else 0.0 + return _rf_lev.normalized_similarity(str1, str2, score_cutoff=cutoff) + + # Pure Python fallback with optional early termination if len(str1) < len(str2): str1, str2 = str2, str1 - # Empty strings should not match anything (including other empty strings) - # This prevents false positives when normalization strips everything - if len(str2) == 0 or len(str1) == 0: - return 0.0 - + total_len = len(str1) + len(str2) + + # Early rejection: if strings differ in length too much, max possible + # similarity is bounded. Check before doing the full DP. + if threshold is not None: + max_possible = (total_len - abs(len(str1) - len(str2))) / total_len + if max_possible < threshold: + return 0.0 + previous_row = list(range(len(str2) + 1)) - + for i, c1 in enumerate(str1): current_row = [i + 1] for j, c2 in enumerate(str2): @@ -563,16 +652,22 @@ class FuzzyMatcher: deletions = current_row[j] + 1 substitutions = previous_row[j] + (c1 != c2) current_row.append(min(insertions, deletions, substitutions)) + + # Early termination: check if minimum possible distance in this row + # already makes it impossible to meet the threshold + if threshold is not None: + min_distance_so_far = min(current_row) + # Best case: remaining chars all match perfectly + remaining = len(str1) - i - 1 + best_possible_distance = max(0, min_distance_so_far - remaining) + best_possible_ratio = (total_len - best_possible_distance) / total_len + if best_possible_ratio < threshold: + return 0.0 + previous_row = current_row - + distance = previous_row[-1] - total_len = len(str1) + len(str2) - - if total_len == 0: - return 1.0 - - ratio = (total_len - distance) / total_len - return ratio + return (total_len - distance) / total_len def process_string_for_matching(self, s): """ @@ -651,20 +746,22 @@ class FuzzyMatcher: best_match = None for candidate in candidate_names: - # Normalize candidate (stream name) with Cinemax removal if requested - candidate_normalized = self.normalize_name(candidate, user_ignored_tags, - ignore_quality=ignore_quality, - ignore_regional=ignore_regional, - ignore_geographic=ignore_geographic, - ignore_misc=ignore_misc, - remove_cinemax=remove_cinemax) + # Use cached processed string when available + processed_candidate = self._get_cached_processed(candidate, user_ignored_tags) + if not processed_candidate: + # Fallback: normalize and process on the fly + candidate_normalized = self.normalize_name(candidate, user_ignored_tags, + ignore_quality=ignore_quality, + ignore_regional=ignore_regional, + ignore_geographic=ignore_geographic, + ignore_misc=ignore_misc, + remove_cinemax=remove_cinemax) + if not candidate_normalized or len(candidate_normalized) < 2: + continue + processed_candidate = self.process_string_for_matching(candidate_normalized) - # Skip candidates that normalize to empty or very short strings - if not candidate_normalized or len(candidate_normalized) < 2: - continue - - processed_candidate = self.process_string_for_matching(candidate_normalized) - score = self.calculate_similarity(processed_query, processed_candidate) + score = self.calculate_similarity(processed_query, processed_candidate, + threshold=self.match_threshold / 100.0) if score > best_score: best_score = score @@ -722,28 +819,17 @@ class FuzzyMatcher: normalized_query_nospace = re.sub(r'[\s&\-]+', '', normalized_query_lower) for candidate in candidate_names: - # Normalize candidate (stream name) with Cinemax removal if requested - candidate_normalized = self.normalize_name(candidate, user_ignored_tags, - ignore_quality=ignore_quality, - ignore_regional=ignore_regional, - ignore_geographic=ignore_geographic, - ignore_misc=ignore_misc, - remove_cinemax=remove_cinemax) - - # Skip candidates that normalize to empty or very short strings (< 2 chars) - # This prevents false positives where multiple streams all normalize to "" - if not candidate_normalized or len(candidate_normalized) < 2: + # Use cached normalization when available + candidate_lower, candidate_nospace = self._get_cached_norm(candidate, user_ignored_tags) + if not candidate_lower: continue - candidate_lower = candidate_normalized.lower() - candidate_nospace = re.sub(r'[\s&\-]+', '', candidate_lower) - - # Exact match + # Exact match (space/punctuation insensitive) if normalized_query_nospace == candidate_nospace: return candidate, 100, "exact" # Very high similarity (97%+) - ratio = self.calculate_similarity(normalized_query_lower, candidate_lower) + ratio = self.calculate_similarity(normalized_query_lower, candidate_lower, threshold=0.97) if ratio >= 0.97 and ratio > best_ratio: best_match = candidate best_ratio = ratio @@ -754,30 +840,17 @@ class FuzzyMatcher: # Stage 2: Substring matching for candidate in candidate_names: - # Normalize candidate (stream name) with Cinemax removal if requested - candidate_normalized = self.normalize_name(candidate, user_ignored_tags, - ignore_quality=ignore_quality, - ignore_regional=ignore_regional, - ignore_geographic=ignore_geographic, - ignore_misc=ignore_misc, - remove_cinemax=remove_cinemax) - - # Skip candidates that normalize to empty or very short strings - if not candidate_normalized or len(candidate_normalized) < 2: + # Use cached normalization when available + candidate_lower, _ = self._get_cached_norm(candidate, user_ignored_tags) + if not candidate_lower: continue - candidate_lower = candidate_normalized.lower() - # Check if one is a substring of the other if normalized_query_lower in candidate_lower or candidate_lower in normalized_query_lower: - # CRITICAL FIX: Add length ratio requirement to prevent false positives - # like "story" matching "history" (story is 5 chars, history is 7 chars) - # Require strings to be within 75% of same length for substring match - # This ensures substring matches are semantically meaningful length_ratio = min(len(normalized_query_lower), len(candidate_lower)) / max(len(normalized_query_lower), len(candidate_lower)) if length_ratio >= 0.75: - # Calculate similarity score - ratio = self.calculate_similarity(normalized_query_lower, candidate_lower) + ratio = self.calculate_similarity(normalized_query_lower, candidate_lower, + threshold=self.match_threshold / 100.0) if ratio > best_ratio: best_match = candidate best_ratio = ratio @@ -787,15 +860,27 @@ class FuzzyMatcher: return best_match, int(best_ratio * 100), match_type # Stage 3: Fuzzy matching with token sorting - fuzzy_match, score = self.find_best_match(query_name, candidate_names, user_ignored_tags, - remove_cinemax=remove_cinemax, - ignore_quality=ignore_quality, - ignore_regional=ignore_regional, - ignore_geographic=ignore_geographic, - ignore_misc=ignore_misc) - if fuzzy_match: - return fuzzy_match, score, f"fuzzy ({score})" - + processed_query = self.process_string_for_matching(normalized_query) + best_score = -1.0 + best_fuzzy = None + threshold_ratio = self.match_threshold / 100.0 + + for candidate in candidate_names: + # Use cached processed string when available + processed_candidate = self._get_cached_processed(candidate, user_ignored_tags) + if not processed_candidate: + continue + + score = self.calculate_similarity(processed_query, processed_candidate, + threshold=threshold_ratio) + if score > best_score: + best_score = score + best_fuzzy = candidate + + percentage_score = int(best_score * 100) + if percentage_score >= self.match_threshold and best_fuzzy: + return best_fuzzy, percentage_score, f"fuzzy ({percentage_score})" + return None, 0, None def match_broadcast_channel(self, channel_name): diff --git a/Stream-Mapparr/plugin.json b/Stream-Mapparr/plugin.json index c22b0c0..972c235 100644 --- a/Stream-Mapparr/plugin.json +++ b/Stream-Mapparr/plugin.json @@ -1,22 +1,23 @@ { "name": "Stream-Mapparr", - "version": "0.8.0b", + "version": "0.9.0", "description": "Automatically add matching streams to channels based on name similarity and quality precedence. Supports unlimited stream matching, channel visibility management, and CSV export cleanup.", - "author": "community", + "author": "PiratesIRC", + "license": "MIT", + "repo_url": "https://github.com/PiratesIRC/Stream-Mapparr", + "min_dispatcharr_version": "v0.20.0", "help_url": "https://github.com/PiratesIRC/Stream-Mapparr", "fields": [ {"id": "overwrite_streams", "label": "Overwrite Existing Streams", "type": "boolean", "default": true}, - {"id": "fuzzy_match_threshold", "label": "Fuzzy Match Threshold", "type": "number", "default": 85}, - {"id": "profile_name", "label": "Profile Name", "type": "string", "default": ""}, + {"id": "match_sensitivity", "label": "Match Sensitivity", "type": "select", "default": "normal"}, + {"id": "profile_name", "label": "Channel Profile", "type": "select", "default": ""}, {"id": "selected_groups", "label": "Channel Groups", "type": "string", "default": ""}, {"id": "selected_stream_groups", "label": "Stream Groups", "type": "string", "default": ""}, {"id": "selected_m3us", "label": "M3U Sources", "type": "string", "default": ""}, {"id": "prioritize_quality", "label": "Prioritize Quality Before Source", "type": "boolean", "default": false}, - {"id": "ignore_tags", "label": "Ignore Tags", "type": "string", "default": ""}, - {"id": "ignore_quality_tags", "label": "Ignore Quality Tags", "type": "boolean", "default": true}, - {"id": "ignore_regional_tags", "label": "Ignore Regional Tags", "type": "boolean", "default": true}, - {"id": "ignore_geographic_tags", "label": "Ignore Geographic Tags", "type": "boolean", "default": true}, - {"id": "ignore_misc_tags", "label": "Ignore Misc Tags", "type": "boolean", "default": true}, + {"id": "ignore_tags", "label": "Custom Ignore Tags", "type": "string", "default": ""}, + {"id": "tag_handling", "label": "Tag Handling", "type": "select", "default": "strip_all"}, + {"id": "channel_database", "label": "Channel Database", "type": "select", "default": "US"}, {"id": "visible_channel_limit", "label": "Visible Channel Limit", "type": "number", "default": 1}, {"id": "rate_limiting", "label": "Rate Limiting", "type": "select", "default": "none"}, {"id": "timezone", "label": "Timezone", "type": "select", "default": "US/Central"}, diff --git a/Stream-Mapparr/plugin.py b/Stream-Mapparr/plugin.py index 91b905e..dfa4f49 100644 --- a/Stream-Mapparr/plugin.py +++ b/Stream-Mapparr/plugin.py @@ -58,9 +58,17 @@ class PluginConfig: """ # === PLUGIN METADATA === - PLUGIN_VERSION = "0.8.0b" + PLUGIN_VERSION = "0.9.0" FUZZY_MATCHER_MIN_VERSION = "25.358.0200" # Requires custom ignore tags Unicode fix + # Match sensitivity presets (maps select value to threshold number) + SENSITIVITY_MAP = { + "relaxed": 70, + "normal": 80, + "strict": 90, + "exact": 95, + } + # === MATCHING SETTINGS === DEFAULT_FUZZY_MATCH_THRESHOLD = 85 # Minimum similarity score (0-100) DEFAULT_OVERWRITE_STREAMS = True # Replace existing streams vs append @@ -112,7 +120,7 @@ class PluginConfig: OPERATION_LOCK_TIMEOUT_MINUTES = 10 # Lock expires after 10 minutes (in case of errors) # === PROGRESS TRACKING SETTINGS === - ESTIMATED_SECONDS_PER_ITEM = 7.73 # Historical average time per item (from log analysis) + ESTIMATED_SECONDS_PER_ITEM = 0.1 # Avg time per item with rapidfuzz + normalization cache # === IPTV CHECKER INTEGRATION SETTINGS === DEFAULT_FILTER_DEAD_STREAMS = False # Filter streams with 0x0 resolution (requires IPTV Checker) @@ -315,6 +323,16 @@ class Plugin: except Exception as e: LOGGER.debug(f"[Stream-Mapparr] Error checking version update: {e}") + # Discover channel profiles for dropdown + profile_options = [] + try: + for p in ChannelProfile.objects.all().values('id', 'name'): + profile_options.append({"value": p['name'], "label": p['name']}) + except Exception: + pass + if not profile_options: + profile_options = [{"value": "", "label": "(no profiles found)"}] + static_fields = [ { "id": "version_status", @@ -329,19 +347,25 @@ class Plugin: "help_text": "If enabled, all existing streams will be removed and replaced with matched streams. If disabled, only new streams will be added (existing streams preserved).", }, { - "id": "fuzzy_match_threshold", - "label": "🎯 Fuzzy Match Threshold", - "type": "number", - "default": PluginConfig.DEFAULT_FUZZY_MATCH_THRESHOLD, - "help_text": f"Minimum similarity score (0-100) for fuzzy matching. Higher values require closer matches. Default: {PluginConfig.DEFAULT_FUZZY_MATCH_THRESHOLD}", + "id": "match_sensitivity", + "label": "Match Sensitivity", + "type": "select", + "default": "normal", + "options": [ + {"value": "relaxed", "label": "Relaxed (70) - more matches, more false positives"}, + {"value": "normal", "label": "Normal (80) - balanced"}, + {"value": "strict", "label": "Strict (90) - fewer matches, high confidence"}, + {"value": "exact", "label": "Exact (95) - near-exact matches only"}, + ], + "help_text": "Controls how closely stream names must match channel names.", }, { "id": "profile_name", - "label": "📋 Profile Name", - "type": "string", - "default": PluginConfig.DEFAULT_PROFILE_NAME, - "placeholder": "Sports, Movies, News", - "help_text": "*** Required Field *** - The name(s) of existing Channel Profile(s) to process channels from. Multiple profiles can be specified separated by commas.", + "label": "Channel Profile", + "type": "select", + "default": "", + "options": profile_options, + "help_text": "Channel profile to process. Channels enabled in this profile will be matched.", }, { "id": "selected_groups", @@ -383,32 +407,16 @@ class Plugin: "help_text": "Tags to ignore when matching streams. Use quotes to preserve spaces/special chars (e.g., \" East\" for tags with leading space).", }, { - "id": "ignore_quality_tags", - "label": "🎬 Ignore Quality Tags", - "type": "boolean", - "default": PluginConfig.DEFAULT_IGNORE_QUALITY_TAGS, - "help_text": "If enabled, all quality indicators will be ignored in any format and position (e.g., 4K, [4K], (4K), FHD, [FHD], (FHD), HD, SD at beginning, middle, or end of name).", - }, - { - "id": "ignore_regional_tags", - "label": "🌍 Ignore Regional Tags", - "type": "boolean", - "default": PluginConfig.DEFAULT_IGNORE_REGIONAL_TAGS, - "help_text": "If enabled, hardcoded regional tags like 'East' will be ignored during matching.", - }, - { - "id": "ignore_geographic_tags", - "label": "🗺️ Ignore Geographic Tags", - "type": "boolean", - "default": PluginConfig.DEFAULT_IGNORE_GEOGRAPHIC_TAGS, - "help_text": "If enabled, all country codes will be ignored during matching (e.g., US, USA, US:, |FR|, FR -, [UK], etc.).", - }, - { - "id": "ignore_misc_tags", - "label": "🏷️ Ignore Miscellaneous Tags", - "type": "boolean", - "default": PluginConfig.DEFAULT_IGNORE_MISC_TAGS, - "help_text": "If enabled, all content within parentheses will be ignored during matching (e.g., (CX), (B), (PRIME), (Backup)).", + "id": "tag_handling", + "label": "Tag Handling", + "type": "select", + "default": "strip_all", + "options": [ + {"value": "strip_all", "label": "Strip All Tags - best for most setups"}, + {"value": "keep_regional", "label": "Keep Regional Tags - preserve East/West/Pacific"}, + {"value": "keep_all", "label": "Keep All Tags - strict, exact matching only"}, + ], + "help_text": "Controls which tags are removed during name matching. 'Strip All' removes quality, regional, geographic, and misc tags for best matching.", }, { "id": "visible_channel_limit", @@ -513,32 +521,34 @@ class Plugin: try: databases = self._get_channel_databases() - if databases: + db_options = [{"value": "_none", "label": "None - no channel database"}] for db_info in databases: - db_id = db_info['id'] - db_label = db_info['label'] - db_default = db_info['default'] - - static_fields.append({ - "id": f"db_enabled_{db_id}", - "type": "boolean", - "label": f"Enable {db_label}", - "help_text": f"Enable or disable the {db_label} channel database for matching.", - "default": db_default - }) + db_options.append({"value": db_info['id'], "label": db_info['label']}) + if len(databases) > 1: + db_options.append({"value": "_all", "label": "All databases"}) + # Default to US if available, else first database + default_db = "US" if any(d['id'] == 'US' for d in databases) else databases[0]['id'] + static_fields.append({ + "id": "channel_database", + "label": "Channel Database", + "type": "select", + "default": default_db, + "options": db_options, + "help_text": "Channel name database for callsign and name matching.", + }) else: static_fields.append({ "id": "no_databases_found", "type": "info", - "label": "⚠️ No channel databases found. Place XX_channels.json files in the plugin directory.", + "label": "No channel databases found. Place XX_channels.json files in the plugin directory.", }) except Exception as e: LOGGER.error(f"[Stream-Mapparr] Error loading channel databases for settings: {e}") static_fields.append({ "id": "database_error", "type": "info", - "label": f"⚠️ Error loading channel databases: {e}", + "label": f"Error loading channel databases: {e}", }) return static_fields @@ -1025,6 +1035,62 @@ class Plugin: LOGGER.error(f"[Stream-Mapparr] Error scanning for channel databases: {e}") return databases + def _resolve_match_threshold(self, settings): + """Resolve match threshold from new match_sensitivity select or legacy fuzzy_match_threshold.""" + sensitivity = settings.get("match_sensitivity", "") + threshold = PluginConfig.SENSITIVITY_MAP.get(sensitivity) + if threshold is not None: + return threshold + # Fallback: legacy numeric field + threshold = settings.get("fuzzy_match_threshold", PluginConfig.DEFAULT_FUZZY_MATCH_THRESHOLD) + try: + return max(0, min(100, int(threshold))) + except (ValueError, TypeError): + return PluginConfig.DEFAULT_FUZZY_MATCH_THRESHOLD + + def _resolve_ignore_flags(self, settings): + """Resolve ignore flags from new tag_handling select or legacy individual booleans. + Returns (ignore_quality, ignore_regional, ignore_geographic, ignore_misc) tuple.""" + tag_handling = settings.get("tag_handling", "") + if tag_handling == "strip_all": + return True, True, True, True + elif tag_handling == "keep_regional": + return True, False, True, True + elif tag_handling == "keep_all": + return False, False, False, False + # Fallback: legacy individual booleans + iq = settings.get("ignore_quality_tags", PluginConfig.DEFAULT_IGNORE_QUALITY_TAGS) + ir = settings.get("ignore_regional_tags", PluginConfig.DEFAULT_IGNORE_REGIONAL_TAGS) + ig = settings.get("ignore_geographic_tags", PluginConfig.DEFAULT_IGNORE_GEOGRAPHIC_TAGS) + im = settings.get("ignore_misc_tags", PluginConfig.DEFAULT_IGNORE_MISC_TAGS) + if isinstance(iq, str): iq = iq.lower() in ('true', 'yes', '1') + if isinstance(ir, str): ir = ir.lower() in ('true', 'yes', '1') + if isinstance(ig, str): ig = ig.lower() in ('true', 'yes', '1') + if isinstance(im, str): im = im.lower() in ('true', 'yes', '1') + return bool(iq), bool(ir), bool(ig), bool(im) + + def _resolve_enabled_databases(self, settings): + """Resolve which channel databases are enabled from new channel_database select or legacy db_enabled_XX booleans. + Returns set of enabled country codes (e.g., {'US', 'UK'}) or None for all.""" + db_setting = settings.get("channel_database", "") + if db_setting == "_all": + return None # None means all enabled + elif db_setting == "_none": + return set() # Empty set means none enabled + elif db_setting: + return {db_setting.upper()} + # Fallback: legacy per-database booleans + databases = self._get_channel_databases() + enabled = set() + for db_info in databases: + setting_key = f"db_enabled_{db_info['id']}" + val = settings.get(setting_key, db_info['default']) + if isinstance(val, str): + val = val.lower() in ('true', 'yes', '1') + if val: + enabled.add(db_info['id']) + return enabled if enabled else None + def _initialize_fuzzy_matcher(self, match_threshold=85): """Initialize the fuzzy matcher with configured threshold.""" if self.fuzzy_matcher is None: @@ -1476,19 +1542,10 @@ class Plugin: logger.warning(f"[Stream-Mapparr] No *_channels.json files found in {plugin_dir}") return channels_data + enabled_set = self._resolve_enabled_databases(settings) if settings else None enabled_databases = [] for db_info in databases: - db_id = db_info['id'] - setting_key = f"db_enabled_{db_id}" - if settings: - is_enabled = settings.get(setting_key, db_info['default']) - # Handle string boolean values - if isinstance(is_enabled, str): - is_enabled = is_enabled.lower() in ('true', 'yes', '1') - else: - is_enabled = db_info['default'] - - if is_enabled: + if enabled_set is None or db_info['id'] in enabled_set: enabled_databases.append(db_info) if not enabled_databases: @@ -1751,11 +1808,13 @@ class Plugin: length_ratio = min(len(stream_lower), len(channel_lower)) / max(len(stream_lower), len(channel_lower)) if length_ratio >= 0.75: # Calculate similarity to ensure it meets threshold - similarity = self.fuzzy_matcher.calculate_similarity(stream_lower, channel_lower) + similarity = self.fuzzy_matcher.calculate_similarity( + stream_lower, channel_lower, + threshold=self.fuzzy_matcher.match_threshold / 100.0) if int(similarity * 100) >= self.fuzzy_matcher.match_threshold: matching_streams.append(stream) continue - + # Token-based matching: check if significant tokens overlap # This catches cases like "ca al jazeera" vs "al jazeera english" # Split into tokens (words) @@ -1817,7 +1876,9 @@ class Plugin: if should_check_similarity: # Calculate full string similarity - similarity = self.fuzzy_matcher.calculate_similarity(stream_lower, channel_lower) + similarity = self.fuzzy_matcher.calculate_similarity( + stream_lower, channel_lower, + threshold=self.fuzzy_matcher.match_threshold / 100.0) if int(similarity * 100) >= self.fuzzy_matcher.match_threshold: matching_streams.append(stream) @@ -2154,11 +2215,7 @@ class Plugin: settings = context['settings'] # Initialize fuzzy matcher with configured threshold - match_threshold = settings.get("fuzzy_match_threshold", PluginConfig.DEFAULT_FUZZY_MATCH_THRESHOLD) - try: - match_threshold = int(match_threshold) - except (ValueError, TypeError): - match_threshold = PluginConfig.DEFAULT_FUZZY_MATCH_THRESHOLD + match_threshold = self._resolve_match_threshold(settings) self._initialize_fuzzy_matcher(match_threshold) @@ -2403,19 +2460,12 @@ class Plugin: validation_results.append("❌ Channel Databases: No files found") has_errors = True else: + enabled_set = self._resolve_enabled_databases(settings) enabled_databases = [] for db_info in databases: - db_id = db_info['id'] - setting_key = f"db_enabled_{db_id}" - is_enabled = settings.get(setting_key, db_info['default']) - - # Handle string boolean values - if isinstance(is_enabled, str): - is_enabled = is_enabled.lower() in ('true', 'yes', '1', 'on') - - if is_enabled: + if enabled_set is None or db_info['id'] in enabled_set: enabled_databases.append(db_info['label']) - + if not enabled_databases: validation_results.append("❌ Channel Databases: None enabled") has_errors = True @@ -2477,16 +2527,7 @@ class Plugin: prioritize_quality = prioritize_quality.lower() in ('true', 'yes', '1') self._prioritize_quality = bool(prioritize_quality) - ignore_quality = settings.get("ignore_quality_tags", PluginConfig.DEFAULT_IGNORE_QUALITY_TAGS) - ignore_regional = settings.get("ignore_regional_tags", PluginConfig.DEFAULT_IGNORE_REGIONAL_TAGS) - ignore_geographic = settings.get("ignore_geographic_tags", PluginConfig.DEFAULT_IGNORE_GEOGRAPHIC_TAGS) - ignore_misc = settings.get("ignore_misc_tags", PluginConfig.DEFAULT_IGNORE_MISC_TAGS) - - # Handle boolean string conversions - if isinstance(ignore_quality, str): ignore_quality = ignore_quality.lower() in ('true', 'yes', '1') - if isinstance(ignore_regional, str): ignore_regional = ignore_regional.lower() in ('true', 'yes', '1') - if isinstance(ignore_geographic, str): ignore_geographic = ignore_geographic.lower() in ('true', 'yes', '1') - if isinstance(ignore_misc, str): ignore_misc = ignore_misc.lower() in ('true', 'yes', '1') + ignore_quality, ignore_regional, ignore_geographic, ignore_misc = self._resolve_ignore_flags(settings) profile_names = [name.strip() for name in profile_names_str.split(',') if name.strip()] ignore_tags = self._parse_tags(ignore_tags_str) if ignore_tags_str else [] @@ -2662,7 +2703,7 @@ class Plugin: selected_groups = processed_data.get('selected_groups', []) selected_stream_groups = processed_data.get('selected_stream_groups', []) selected_m3us = processed_data.get('selected_m3us', []) - current_threshold = settings.get('fuzzy_match_threshold', PluginConfig.DEFAULT_FUZZY_MATCH_THRESHOLD) + current_threshold = self._resolve_match_threshold(settings) # Build header with all settings except login credentials header_lines = [ @@ -2702,17 +2743,9 @@ class Plugin: enabled_dbs = [] try: databases = self._get_channel_databases() + enabled_set = self._resolve_enabled_databases(settings) for db_info in databases: - db_id = db_info['id'] - setting_key = f"db_enabled_{db_id}" - # Try to get from settings first, fallback to default - is_enabled = settings.get(setting_key, db_info['default']) - - # Handle string boolean values - if isinstance(is_enabled, str): - is_enabled = is_enabled.lower() in ('true', 'yes', '1', 'on') - - if is_enabled: + if enabled_set is None or db_info['id'] in enabled_set: enabled_dbs.append(db_info['label']) except Exception as e: LOGGER.warning(f"[Stream-Mapparr] Could not determine enabled databases: {e}") @@ -2947,6 +2980,14 @@ class Plugin: ignore_misc = processed_data.get('ignore_misc', True) filter_dead = processed_data.get('filter_dead_streams', PluginConfig.DEFAULT_FILTER_DEAD_STREAMS) + # Pre-normalize stream names for matching performance + if self.fuzzy_matcher and streams: + stream_names = list(set(s['name'] for s in streams)) + self.fuzzy_matcher.precompute_normalizations( + stream_names, ignore_tags, + ignore_quality=ignore_quality, ignore_regional=ignore_regional, + ignore_geographic=ignore_geographic, ignore_misc=ignore_misc) + channel_groups = {} for channel in channels: channel_info = self._get_channel_info_from_json(channel['name'], channels_data, logger) @@ -2963,11 +3004,7 @@ class Plugin: total_channels_to_update = 0 low_match_channels = [] # Track channels with few matches for recommendations threshold_data = {} # Track threshold analysis for recommendations - current_threshold = settings.get('fuzzy_match_threshold', PluginConfig.DEFAULT_FUZZY_MATCH_THRESHOLD) - try: - current_threshold = int(current_threshold) - except (ValueError, TypeError): - current_threshold = 85 + current_threshold = self._resolve_match_threshold(settings) self._send_progress_update("preview_changes", 'running', 30, f'Analyzing {len(channel_groups)} channel groups...', context) @@ -3152,6 +3189,14 @@ class Plugin: ignore_misc = processed_data.get('ignore_misc', True) filter_dead = processed_data.get('filter_dead_streams', PluginConfig.DEFAULT_FILTER_DEAD_STREAMS) + # Pre-normalize stream names for matching performance + if self.fuzzy_matcher and streams: + stream_names = list(set(s['name'] for s in streams)) + self.fuzzy_matcher.precompute_normalizations( + stream_names, ignore_tags, + ignore_quality=ignore_quality, ignore_regional=ignore_regional, + ignore_geographic=ignore_geographic, ignore_misc=ignore_misc) + channel_groups = {} for channel in channels: channel_info = self._get_channel_info_from_json(channel['name'], channels_data, logger) @@ -3266,11 +3311,7 @@ class Plugin: csv_data = [] low_match_channels = [] # Track channels with few matches for recommendations threshold_data = {} # Track threshold analysis for recommendations - current_threshold = settings.get('fuzzy_match_threshold', PluginConfig.DEFAULT_FUZZY_MATCH_THRESHOLD) - try: - current_threshold = int(current_threshold) - except (ValueError, TypeError): - current_threshold = 85 + current_threshold = self._resolve_match_threshold(settings) for group_key, group_channels in channel_groups.items(): sorted_channels = self._sort_channels_by_priority(group_channels) @@ -3499,11 +3540,9 @@ class Plugin: # Initialize fuzzy matcher for callsign extraction if not self.fuzzy_matcher: - match_threshold = settings.get("fuzzy_match_threshold", PluginConfig.DEFAULT_FUZZY_MATCH_THRESHOLD) - if isinstance(match_threshold, str): - match_threshold = int(match_threshold) + match_threshold = self._resolve_match_threshold(settings) self._initialize_fuzzy_matcher(match_threshold) - + # Match channels using US OTA callsign database logger.info("[Stream-Mapparr] Matching channels using US OTA callsign database...") logger.info("[Stream-Mapparr] Note: Only channels with valid US callsigns will be matched")