From 9251e6e83740d5aa54f21f7bcbbeff7d6bfb66ac Mon Sep 17 00:00:00 2001 From: ponzischeme89 Date: Sun, 18 Jan 2026 22:29:51 +1300 Subject: [PATCH] 1.0.0.7 - Matching improves, added library page. Removed schedule scans support --- README.md | 26 ++- frontend/src/App.svelte | 6 +- frontend/src/components/AppSidebar.svelte | 17 +- frontend/src/components/LibraryPanel.svelte | 153 ++++++++++++++++++ .../src/components/TypewriterQuote.svelte | 24 +-- frontend/src/lib/api.js | 10 ++ server/app.py | 143 +++++++++++++++- server/core/database.py | 130 ++++++++++++++- server/core/file_scanner.py | 37 ++--- server/core/subtitle_processor.py | 8 + 10 files changed, 502 insertions(+), 52 deletions(-) create mode 100644 frontend/src/components/LibraryPanel.svelte diff --git a/README.md b/README.md index 1a66a3a..42b0ac5 100644 --- a/README.md +++ b/README.md @@ -102,10 +102,28 @@ networks: ``` -## Roadmap -- [x] Add support for TVMaze -- [ ] Add support for more themes -- [ ] Bring in posters into results list so it's easier to identify movies / TV shows +## Limitations +- API rate limits: OMDb is tight, TMDb is better, TVMaze is polite-but-limited. Heavy scans may hit caps. +- Metadata gaps: If providers don’t have it, Sublogue won’t either. Ratings/plots can be missing or stale. +- Localisation: Only TMDb supports proper language/region data. OMDb/TVMaze are mostly English-only. +- Long plots: Big summaries go in as-is. Your TV may split them across multiple screens. +- Formats: Only .srt is supported. No WebVTT, ASS/SSA, or embedded subs yet. +- Duplicate inserts: Reprocessing the same file will stack multiple plot blocks. +- Offline use: Requires internet for metadata lookups — no offline mode. +- File access: Read-only or locked files cannot be processed. + + ## Roadmap + - [x] TVMaze integration + - [ ] More UI themes (OLED variants, Ocean+, and high-contrast) + - [ ] Poster + backdrop previews in results + - [ ] Smart duplicate-detection (don’t re-insert plot blocks) + - [ ] Automatic rate-limit backoff + retry logic + - [ ] Optional “short plot mode” for long summaries + - [ ] Expanded localisation using TMDb (title, plot, cast where available) + - [ ] Multi-format subtitle support (WebVTT, ASS/SSA) + - [ ] Offline caching of recent metadata lookups + - [ ] Per-scan analytics: success/fail counts, rate-limit warnings + - [ ] CLI mode for batch operations ## Support - Help spread the word about Sublogue by telling your friends about this repo diff --git a/frontend/src/App.svelte b/frontend/src/App.svelte index db585e3..1274b72 100644 --- a/frontend/src/App.svelte +++ b/frontend/src/App.svelte @@ -7,7 +7,7 @@ import SettingsPanel from './components/SettingsPanel.svelte' import ScanPanel from './components/ScanPanel.svelte' import HistoryPanel from './components/HistoryPanel.svelte' - import ScheduledScansPanel from './components/ScheduledScansPanel.svelte' + import LibraryPanel from './components/LibraryPanel.svelte' import { Menu } from 'lucide-svelte' import ToastHost from './components/ToastHost.svelte' import { healthCheck } from './lib/api.js' @@ -164,8 +164,8 @@ onOpenHistory={() => navigateTo('history')} /> {/key} - {:else if currentView === 'scheduled'} - + {:else if currentView === 'library'} + {/if} diff --git a/frontend/src/components/AppSidebar.svelte b/frontend/src/components/AppSidebar.svelte index eae194b..2faa5db 100644 --- a/frontend/src/components/AppSidebar.svelte +++ b/frontend/src/components/AppSidebar.svelte @@ -3,16 +3,13 @@ import { Separator } from "../lib/components/ui/separator"; import { Badge } from "../lib/components/ui/badge"; import { - Calendar, - Download, ChevronLeft, ChevronRight, Github, - Heart, - Package, Scan, Settings, History, + Library, } from "lucide-svelte"; import ThemeSelector from "./ThemeSelector.svelte"; import sublogueLogo from "../assets/sublogue_v2.png"; @@ -115,16 +112,16 @@ className={`w-full rounded-md py-1.5 text-[13px] font-semibold leading-none ${ collapsed ? "justify-center px-0" : "justify-start px-2 gap-2" } ${ - currentView === "scheduled" + currentView === "library" ? "bg-[color:var(--bg-hover)] text-white font-bold" : "text-text-secondary hover:text-white hover:bg-[color:var(--bg-hover)]" }`} - on:click={() => onNavigate("scheduled")} - aria-current={currentView === "scheduled" ? "page" : undefined} + on:click={() => onNavigate("library")} + aria-current={currentView === "library" ? "page" : undefined} > - + {#if !collapsed} - Scheduled Scans + Library {/if} @@ -157,7 +154,7 @@ > {#if !collapsed} v1.0.6 Release Candiatev1.0.7 Release Candiate {:else} v diff --git a/frontend/src/components/LibraryPanel.svelte b/frontend/src/components/LibraryPanel.svelte new file mode 100644 index 0000000..3e5c81b --- /dev/null +++ b/frontend/src/components/LibraryPanel.svelte @@ -0,0 +1,153 @@ + + +
+
+
+

Library Health

+

+ Review subtitles from each scan and spot missing plots, duplicates, and insufficient gaps. +

+
+ +
+ + {#if error} +
+

{error}

+
+ {/if} + + {#if loading} +
Loading library report...
+ {:else if items.length === 0} +
+
+ +
+

No scan data yet

+

Run a scan to populate the library report.

+
+
+
+ {:else} +
+ {#each items as item} +
+
+
+
+ {item.title}{item.year ? ` (${item.year})` : ""} +
+
+ {item.files.length} subtitle file{item.files.length === 1 ? "" : "s"} +
+
+
+ + Missing: {item.health.missing_plot} + + + Duplicates: {item.health.duplicate_plot} + + + Gap issues: {item.health.insufficient_gap} + + +
+
+ + {#if expanded[item.title]} +
+
+ + + + + + + + + + + {#each item.files as file} + + + + + + + {/each} + +
FileStatusPlotIssues
+ {file.display_name || file.name} + {file.status || "Not Loaded"} + {file.has_plot ? "Present" : "Missing"} + + {#if file.issues.length === 0} + Healthy + {:else} +
+ {#each file.issues as issue} +
+ {issue.type.replace("_", " ")} — {issue.reason} +
+ {/each} +
+ {/if} +
+
+
+ {/if} +
+ {/each} +
+ {/if} +
diff --git a/frontend/src/components/TypewriterQuote.svelte b/frontend/src/components/TypewriterQuote.svelte index 6b9129f..db7cfab 100644 --- a/frontend/src/components/TypewriterQuote.svelte +++ b/frontend/src/components/TypewriterQuote.svelte @@ -19,18 +19,18 @@ "This scan is sponsored by existential dread.", ], rude: [ - "Ugh, more files? Seriously?", - "You could've organized these better, you know.", - "Why are there so many files? Get a hobby.", - "I don't get paid enough for this.", - "Your naming conventions are a crime.", - "This is taking forever because of YOUR mess.", - "I've seen better file structures in a dumpster.", - "Oh great, another scan. My favorite.", - "Do you even know what you're looking for?", - "These files are judging you. So am I.", - "Scanning your questionable life choices.", - "I hope you appreciate this. You won't.", + "Ugh, more files? What did you do, collect them competitively?", + "You could've organized these better. You actively chose not to.", + "Why are there so many files? Therapy is cheaper.", + "I don't get paid enough for this. Actually, I don't get paid at all.", + "Your naming conventions aren’t just bad — they’re offensive.", + "This is taking forever because you live like this.", + "I’ve seen better file structures in a crime scene.", + "Oh great, another scan. Thrilling. Electrifying. Life-changing.", + "Do you even know what you're looking for, or are we just clicking things now?", + "These files are judging you. Loudly.", + "Scanning your deeply questionable life choices.", + "I hope you appreciate this. Statistically, you won’t.", ], nice: [ "Taking a moment to find your perfect subtitles.", diff --git a/frontend/src/lib/api.js b/frontend/src/lib/api.js index 3e28369..c49dad4 100644 --- a/frontend/src/lib/api.js +++ b/frontend/src/lib/api.js @@ -332,6 +332,16 @@ export async function getStatistics() { return apiFetch('/statistics') } +// ============ LIBRARY API ============ + +/** + * GET /api/library - Get library health report + * Returns: { success, scans: [...] } + */ +export async function getLibraryReport(limit = 25) { + return apiFetch(`/library?limit=${limit}`) +} + // ============ SCHEDULED SCANS API ============ /** diff --git a/server/app.py b/server/app.py index 616c74d..b1c06bb 100644 --- a/server/app.py +++ b/server/app.py @@ -4,6 +4,8 @@ import logging import os import threading import time +import re +from difflib import SequenceMatcher from datetime import datetime, timezone from pathlib import Path @@ -14,7 +16,8 @@ from core.config_manager import ConfigManager from core.omdb_client import OMDbClient from core.tmdb_client import TMDbClient from core.tvmaze_client import TVMazeClient -from core.subtitle_processor import SubtitleProcessor, SubtitleFormatOptions +from core.subtitle_processor import SubtitleProcessor, SubtitleFormatOptions, SUBLOGUE_TOKEN_PATTERN, SUBLOGUE_SENTINEL +from core.keyword_stripper import get_stripper from core.file_scanner import FileScanner from core.database import DatabaseManager @@ -75,12 +78,13 @@ def perform_scheduled_scan(directory): scan_duration_ms = int((time.time() - start_time) * 1000) files_with_plot = sum(1 for f in files if f.get("has_plot", False)) - db.add_scan_history( + scan_id = db.add_scan_history( directory=directory, files_found=len(files), files_with_plot=files_with_plot, scan_duration_ms=scan_duration_ms ) + db.add_scan_files(scan_id, files) return { "files_found": len(files), @@ -246,6 +250,115 @@ def _merge_format_options(base_options: SubtitleFormatOptions, rule: dict | None ) +def _parse_library_identity(file_info: dict) -> dict: + """Parse title, year, season, and episode from filename metadata.""" + file_name = file_info.get("name", "") + title = file_info.get("title") + year = file_info.get("year") + + if not title: + stripped = get_stripper().clean_filename(file_name, preserve_year=True) + title = stripped.get("cleaned_title") or Path(file_name).stem + year = year or stripped.get("year") + season = stripped.get("season") + episode = stripped.get("episode") + else: + season, episode = get_stripper().extract_season_episode(file_name) + + clean_title = title or Path(file_name).stem + clean_title = clean_title.replace(SUBLOGUE_SENTINEL, "") + clean_title = re.sub(r"<[^>]+>", "", clean_title) + clean_title = SUBLOGUE_TOKEN_PATTERN.sub("", clean_title) + clean_title = re.sub(r"\b(en|eng|english|ita|it|italian|fr|es|de|multi)\b", "", clean_title, flags=re.I) + clean_title = re.sub(r'\s*-\s*copy\b', '', clean_title, flags=re.I) + clean_title = re.sub(r'\s*copy\b', '', clean_title, flags=re.I) + clean_title = re.sub(r"\((\d{4})\)\s*\(\1\)", r"(\1)", clean_title) + if year: + clean_title = re.sub(rf"\s*\({re.escape(str(year))}\)$", "", clean_title) + clean_title = " ".join(clean_title.split()).strip() + + return { + "title": clean_title, + "year": year, + "season": season, + "episode": episode, + } + + +def _group_key(title: str, year: str | None) -> str: + base = title.strip().lower() + return f"{base} ({year})" if year else base + + +def _build_library_items(files: list[dict], latest_results: dict, limit: int) -> list[dict]: + """Aggregate scan files into library items.""" + grouped = {} + for file_info in files: + parsed = _parse_library_identity(file_info) + key = _group_key(parsed["title"], parsed["year"]) + item = grouped.get(key) + if not item: + # Try fuzzy match to existing groups + for existing_key, existing in grouped.items(): + ratio = SequenceMatcher(None, existing["title"].lower(), parsed["title"].lower()).ratio() + if ratio >= 0.88: + key = existing_key + item = existing + break + if not item: + item = grouped.setdefault(key, { + "title": parsed["title"], + "year": parsed["year"], + "files": [], + "health": { + "missing_plot": 0, + "duplicate_plot": 0, + "insufficient_gap": 0 + } + }) + + issues = [] + if not file_info.get("has_plot"): + issues.append({"type": "missing_plot", "reason": "No plot detected"}) + item["health"]["missing_plot"] += 1 + if (file_info.get("plot_marker_count") or 0) > 1: + issues.append({"type": "duplicate_plot", "reason": "Multiple plot markers detected"}) + item["health"]["duplicate_plot"] += 1 + + latest_result = latest_results.get(file_info.get("path")) + if latest_result and latest_result.get("status") == "Insufficient Gap": + issues.append({ + "type": "insufficient_gap", + "reason": latest_result.get("error_message") or "Insufficient gap before first subtitle" + }) + item["health"]["insufficient_gap"] += 1 + + display_name = parsed["title"] + if parsed["season"] is not None and parsed["episode"] is not None: + display_name = f"{parsed['title']} - S{parsed['season']:02d}E{parsed['episode']:02d}" + elif parsed["year"]: + display_name = f"{parsed['title']} ({parsed['year']})" + + item["files"].append({ + **file_info, + "display_name": display_name, + "duplicate_plot": (file_info.get("plot_marker_count") or 0) > 1, + "latest_status": latest_result.get("status") if latest_result else None, + "latest_error": latest_result.get("error_message") if latest_result else None, + "issues": issues + }) + + items = list(grouped.values()) + items.sort( + key=lambda entry: ( + entry["health"]["missing_plot"] + + entry["health"]["duplicate_plot"] + + entry["health"]["insufficient_gap"] + ), + reverse=True + ) + return items[:limit] + def get_format_options_from_settings() -> SubtitleFormatOptions: """Load subtitle formatting options from database settings.""" return SubtitleFormatOptions( @@ -418,12 +531,13 @@ def start_scan(): files_with_plot = sum(1 for f in files if f.get("has_plot", False)) # Save scan history to database - db.add_scan_history( + scan_id = db.add_scan_history( directory=directory, files_found=len(files), files_with_plot=files_with_plot, scan_duration_ms=scan_duration_ms ) + db.add_scan_files(scan_id, files) # Load existing suggested matches for this directory suggested_matches = db.get_suggested_matches_for_directory(directory) @@ -529,12 +643,13 @@ def stream_scan(): files_with_plot = sum(1 for f in all_files if f.get("has_plot", False)) # Save scan history to database - db.add_scan_history( + scan_id = db.add_scan_history( directory=directory, files_found=len(all_files), files_with_plot=files_with_plot, scan_duration_ms=scan_duration_ms ) + db.add_scan_files(scan_id, all_files) # Load existing suggested matches logger.info("Loading suggested matches from database...") @@ -1520,6 +1635,26 @@ def get_scan_history(): }), 500 +@app.route('/api/library', methods=['GET']) +def get_library_report(): + """Get library health report with scan files and issue summaries""" + try: + limit = request.args.get('limit', 200, type=int) + latest_files = db.get_latest_scan_files() + latest_results = db.get_latest_file_results() + + return jsonify({ + "success": True, + "items": _build_library_items(latest_files, latest_results, limit) + }) + except Exception as e: + logger.error(f"Error fetching library report: {e}") + return jsonify({ + "success": False, + "error": str(e) + }), 500 + + @app.route('/api/statistics', methods=['GET']) def get_statistics(): """Get overall statistics""" diff --git a/server/core/database.py b/server/core/database.py index 57f8b10..5d801f7 100644 --- a/server/core/database.py +++ b/server/core/database.py @@ -4,7 +4,7 @@ Handles persistent storage for settings, runs, and history """ from datetime import datetime from pathlib import Path -from sqlalchemy import create_engine, Column, Integer, String, DateTime, Boolean, Float, Text, ForeignKey +from sqlalchemy import create_engine, Column, Integer, String, DateTime, Boolean, Float, Text, ForeignKey, text from sqlalchemy.ext.declarative import declarative_base from sqlalchemy.orm import sessionmaker, relationship, scoped_session import json @@ -85,6 +85,28 @@ class ScanHistory(Base): return f"" +class ScanFile(Base): + """Scan files table - stores file details per scan""" + __tablename__ = 'scan_files' + + id = Column(Integer, primary_key=True) + scan_id = Column(Integer, ForeignKey('scan_history.id'), nullable=False, index=True) + file_path = Column(String(500), nullable=False, index=True) + file_name = Column(String(255), nullable=False) + title = Column(String(255)) + year = Column(String(10)) + has_plot = Column(Boolean, default=False) + plot_marker_count = Column(Integer, default=0) + status = Column(String(100)) + summary = Column(Text) + created_at = Column(DateTime, default=datetime.utcnow, nullable=False) + + scan = relationship("ScanHistory") + + def __repr__(self): + return f"" + + class ScheduledScan(Base): """Scheduled scans table - stores scheduled scan jobs and results""" __tablename__ = 'scheduled_scans' @@ -172,12 +194,32 @@ class DatabaseManager: # Create tables if they don't exist Base.metadata.create_all(self.engine) + self._ensure_scan_files_schema() logger.info(f"Database initialized at {self.db_path}") def get_session(self): """Get a new database session""" return self.Session() + def _ensure_scan_files_schema(self): + """Ensure scan_files table has newer columns in existing databases.""" + session = self.get_session() + try: + columns = session.execute(text("PRAGMA table_info(scan_files)")).fetchall() + if not columns: + return + existing = {row[1] for row in columns} # column name is index 1 + if "title" not in existing: + session.execute(text("ALTER TABLE scan_files ADD COLUMN title VARCHAR(255)")) + if "year" not in existing: + session.execute(text("ALTER TABLE scan_files ADD COLUMN year VARCHAR(10)")) + session.commit() + except Exception as e: + session.rollback() + logger.error(f"Error migrating scan_files schema: {e}") + finally: + session.close() + def close_session(self): """Close the session""" self.Session.remove() @@ -387,6 +429,7 @@ class DatabaseManager: session.add(scan) session.commit() logger.info(f"Scan history saved for {directory}") + return scan.id except Exception as e: session.rollback() logger.error(f"Error saving scan history: {e}") @@ -416,6 +459,91 @@ class DatabaseManager: finally: session.close() + def add_scan_files(self, scan_id, files): + """Persist file details for a scan""" + session = self.get_session() + try: + for file_info in files: + session.add(ScanFile( + scan_id=scan_id, + file_path=file_info.get("path"), + file_name=file_info.get("name"), + title=file_info.get("title"), + year=file_info.get("year"), + has_plot=bool(file_info.get("has_plot")), + plot_marker_count=int(file_info.get("plot_marker_count") or 0), + status=file_info.get("status"), + summary=file_info.get("summary", "") + )) + session.commit() + except Exception as e: + session.rollback() + logger.error(f"Error saving scan files: {e}") + raise + finally: + session.close() + + def get_scan_files(self, scan_id): + """Get all files for a scan""" + session = self.get_session() + try: + files = session.query(ScanFile).filter_by(scan_id=scan_id).all() + return [ + { + "path": f.file_path, + "name": f.file_name, + "title": f.title, + "year": f.year, + "has_plot": f.has_plot, + "plot_marker_count": f.plot_marker_count, + "status": f.status, + "summary": f.summary + } + for f in files + ] + finally: + session.close() + + def get_latest_scan_files(self): + """Get latest scan entry per file path""" + session = self.get_session() + try: + files = session.query(ScanFile).order_by(ScanFile.created_at.desc()).all() + latest = {} + for file_entry in files: + if file_entry.file_path in latest: + continue + latest[file_entry.file_path] = { + "path": file_entry.file_path, + "name": file_entry.file_name, + "title": file_entry.title, + "year": file_entry.year, + "has_plot": file_entry.has_plot, + "plot_marker_count": file_entry.plot_marker_count, + "status": file_entry.status, + "summary": file_entry.summary + } + return list(latest.values()) + finally: + session.close() + + def get_latest_file_results(self): + """Get latest processing result per file""" + session = self.get_session() + try: + results = session.query(FileResult).order_by(FileResult.processed_at.desc()).all() + latest = {} + for result in results: + if result.file_path not in latest: + latest[result.file_path] = { + "status": result.status, + "error_message": result.error_message, + "processed_at": result.processed_at.isoformat() if result.processed_at else None + } + return latest + finally: + session.close() + # ============ SCHEDULED SCAN OPERATIONS ============ def create_scheduled_scan(self, directory, scheduled_for): diff --git a/server/core/file_scanner.py b/server/core/file_scanner.py index 42f7283..1a3c1d4 100644 --- a/server/core/file_scanner.py +++ b/server/core/file_scanner.py @@ -24,7 +24,7 @@ logger.addHandler(handler) import sys sys.path.insert(0, str(Path(__file__).parent)) -from subtitle_processor import parse_srt +from subtitle_processor import parse_srt, SUBLOGUE_SENTINEL, SUBLOGUE_TOKEN_PATTERN class FileScanner: @@ -114,7 +114,8 @@ class FileScanner: # -------------------------------------------- try: - has_plot = cls._check_has_plot(file_path) + plot_marker_count = cls._count_plot_markers(file_path) + has_plot = plot_marker_count > 0 logger.debug( "Plot check for %s: %s", file_path.name, @@ -148,6 +149,8 @@ class FileScanner: "path": str(file_path), "name": file_path.name, "has_plot": has_plot, + "plot_marker_count": plot_marker_count, + "duplicate_plot": plot_marker_count > 1, "status": "Has Plot" if has_plot else "Not Loaded", "summary": metadata.get("summary", ""), "plot": metadata.get("summary", ""), @@ -216,30 +219,25 @@ class FileScanner: ) @classmethod - def _check_has_plot(cls, file_path: Path) -> bool: + def _count_plot_markers(cls, file_path: Path) -> int: """ - Check first N lines for Sublogue signature. + Count Sublogue plot markers to detect duplicates. """ - logger.debug("Scanning for plot marker in %s", file_path.name) + logger.debug("Scanning for plot markers in %s", file_path.name) try: - with file_path.open("r", encoding="utf-8", errors="ignore") as f: - for i, line in enumerate(f): - if i >= cls.PLOT_SCAN_LINES: - break - if "generated by sublogue" in line.lower(): - logger.debug( - "Plot marker found in %s (line %d)", - file_path.name, i + 1 - ) - return True + content = file_path.read_text(encoding="utf-8", errors="ignore") + lower_content = content.lower() + generated_count = lower_content.count("generated by sublogue") + if generated_count > 0: + return generated_count + return content.count(SUBLOGUE_SENTINEL) except Exception as e: logger.error( "Error reading file during plot scan: %s (%s)", file_path, e ) - - return False + return 0 @classmethod def _extract_metadata(cls, file_path: Path) -> Dict: @@ -270,6 +268,7 @@ class FileScanner: plot_text = blocks[1].text plot_text = plot_text.split("Generated by Sublogue")[0].strip() + plot_text = SUBLOGUE_TOKEN_PATTERN.sub("", plot_text).strip() metadata["summary"] = plot_text # -------------------------------------------- @@ -279,7 +278,9 @@ class FileScanner: header_lines = blocks[0].text.split("\n") if header_lines: - first_line = header_lines[0] + first_line = header_lines[0].strip() + if first_line == SUBLOGUE_SENTINEL and len(header_lines) > 1: + first_line = header_lines[1].strip() year_match = re.search(r"\((\d{4})\)", first_line) if year_match: metadata["year"] = year_match.group(1) diff --git a/server/core/subtitle_processor.py b/server/core/subtitle_processor.py index bfcbfdc..cd998a9 100644 --- a/server/core/subtitle_processor.py +++ b/server/core/subtitle_processor.py @@ -1463,6 +1463,14 @@ class SubtitleProcessor: ) # ───────────────────────────────────────────────────────────── + if insertion_position != "end" and not intro_blocks: + return { + "success": False, + "error": "Insufficient gap before first subtitle", + "status": "Insufficient Gap", + "summary": "" + } + # PHASE 5: Combine intro + original subtitles # # NOTE: We're ONLY renumbering indices (1, 2, 3...), NOT timestamps!