From 9251e6e83740d5aa54f21f7bcbbeff7d6bfb66ac Mon Sep 17 00:00:00 2001
From: ponzischeme89 <matt@mattcohen.net>
Date: Sun, 18 Jan 2026 22:29:51 +1300
Subject: [PATCH] 1.0.0.7 - Matching improves, added library page. Removed
 schedule scans support

---
 README.md                                     |  26 ++-
 frontend/src/App.svelte                       |   6 +-
 frontend/src/components/AppSidebar.svelte     |  17 +-
 frontend/src/components/LibraryPanel.svelte   | 153 ++++++++++++++++++
 .../src/components/TypewriterQuote.svelte     |  24 +--
 frontend/src/lib/api.js                       |  10 ++
 server/app.py                                 | 143 +++++++++++++++-
 server/core/database.py                       | 130 ++++++++++++++-
 server/core/file_scanner.py                   |  37 ++---
 server/core/subtitle_processor.py             |   8 +
 10 files changed, 502 insertions(+), 52 deletions(-)
 create mode 100644 frontend/src/components/LibraryPanel.svelte
diff --git a/README.md b/README.md
index 1a66a3a..42b0ac5 100644
--- a/README.md
+++ b/README.md
@@ -102,10 +102,28 @@ networks:
 ```
 </details>
 
-## Roadmap
-- [x] Add support for TVMaze  
-- [ ] Add support for more themes  
-- [ ] Bring in posters into results list so it's easier to identify movies / TV shows
+## Limitations
+- API rate limits: OMDb is tight, TMDb is better, TVMaze is polite-but-limited. Heavy scans may hit caps.
+- Metadata gaps: If providers don’t have it, Sublogue won’t either. Ratings/plots can be missing or stale.
+- Localisation: Only TMDb supports proper language/region data. OMDb/TVMaze are mostly English-only.
+- Long plots: Big summaries go in as-is. Your TV may split them across multiple screens.
+- Formats: Only .srt is supported. No WebVTT, ASS/SSA, or embedded subs yet.
+- Duplicate inserts: Reprocessing the same file will stack multiple plot blocks.
+- Offline use: Requires internet for metadata lookups — no offline mode.
+- File access: Read-only or locked files cannot be processed.
+
+  ## Roadmap
+  - [x]   TVMaze integration
+  - [ ]  More UI themes (OLED variants, Ocean+, and high-contrast)
+  - [ ]  Poster + backdrop previews in results
+  - [ ]  Smart duplicate-detection (don’t re-insert plot blocks)
+  - [ ]  Automatic rate-limit backoff + retry logic
+  - [ ]  Optional “short plot mode” for long summaries
+  - [ ]  Expanded localisation using TMDb (title, plot, cast where available)
+  - [ ]  Multi-format subtitle support (WebVTT, ASS/SSA)
+  - [ ]  Offline caching of recent metadata lookups
+  - [ ]  Per-scan analytics: success/fail counts, rate-limit warnings
+  - [ ]  CLI mode for batch operations
 
 ## Support
 - Help spread the word about Sublogue by telling your friends about this repo
diff --git a/frontend/src/App.svelte b/frontend/src/App.svelte
index db585e3..1274b72 100644
--- a/frontend/src/App.svelte
+++ b/frontend/src/App.svelte
@@ -7,7 +7,7 @@
   import SettingsPanel from './components/SettingsPanel.svelte'
   import ScanPanel from './components/ScanPanel.svelte'
   import HistoryPanel from './components/HistoryPanel.svelte'
-  import ScheduledScansPanel from './components/ScheduledScansPanel.svelte'
+  import LibraryPanel from './components/LibraryPanel.svelte'
   import { Menu } from 'lucide-svelte'
   import ToastHost from './components/ToastHost.svelte'
   import { healthCheck } from './lib/api.js'
@@ -164,8 +164,8 @@
               onOpenHistory={() => navigateTo('history')}
             />
           {/key}
-        {:else if currentView === 'scheduled'}
-          <ScheduledScansPanel />
+        {:else if currentView === 'library'}
+          <LibraryPanel />
         {/if}
       </div>
     </main>
diff --git a/frontend/src/components/AppSidebar.svelte b/frontend/src/components/AppSidebar.svelte
index eae194b..2faa5db 100644
--- a/frontend/src/components/AppSidebar.svelte
+++ b/frontend/src/components/AppSidebar.svelte
@@ -3,16 +3,13 @@
   import { Separator } from "../lib/components/ui/separator";
   import { Badge } from "../lib/components/ui/badge";
   import {
-    Calendar,
-    Download,
     ChevronLeft,
     ChevronRight,
     Github,
-    Heart,
-    Package,
     Scan,
     Settings,
     History,
+    Library,
   } from "lucide-svelte";
   import ThemeSelector from "./ThemeSelector.svelte";
   import sublogueLogo from "../assets/sublogue_v2.png";
@@ -115,16 +112,16 @@
         className={`w-full rounded-md py-1.5 text-[13px] font-semibold leading-none ${
           collapsed ? "justify-center px-0" : "justify-start px-2 gap-2"
         } ${
-          currentView === "scheduled"
+          currentView === "library"
             ? "bg-[color:var(--bg-hover)] text-white font-bold"
             : "text-text-secondary hover:text-white hover:bg-[color:var(--bg-hover)]"
         }`}
-        on:click={() => onNavigate("scheduled")}
-        aria-current={currentView === "scheduled" ? "page" : undefined}
+        on:click={() => onNavigate("library")}
+        aria-current={currentView === "library" ? "page" : undefined}
       >
-        <Calendar class="h-4 w-4" />
+        <Library class="h-4 w-4" />
         {#if !collapsed}
-          Scheduled Scans
+          Library
         {/if}
       </Button>
 
@@ -157,7 +154,7 @@
       >
         {#if !collapsed}
           <Badge className="bg-white/10 text-text-secondary"
-            >v1.0.6 Release Candiate</Badge
+            >v1.0.7 Release Candiate</Badge
           >
         {:else}
           <Badge className="bg-white/10 text-text-secondary">v</Badge>
diff --git a/frontend/src/components/LibraryPanel.svelte b/frontend/src/components/LibraryPanel.svelte
new file mode 100644
index 0000000..3e5c81b
--- /dev/null
+++ b/frontend/src/components/LibraryPanel.svelte
@@ -0,0 +1,153 @@
+<script>
+  import { onMount } from "svelte";
+  import { getLibraryReport } from "../lib/api.js";
+  import { Button } from "../lib/components/ui/button";
+  import { ChevronDown, ChevronUp, RefreshCcw, FileText } from "lucide-svelte";
+
+  let items = [];
+  let loading = false;
+  let error = null;
+  let expanded = {};
+
+  async function loadLibrary() {
+    loading = true;
+    error = null;
+    try {
+      const response = await getLibraryReport();
+      items = response.items || [];
+    } catch (err) {
+      error = `Failed to load library report: ${err.message}`;
+    } finally {
+      loading = false;
+    }
+  }
+
+  function toggleScan(key) {
+    expanded = { ...expanded, [key]: !expanded[key] };
+  }
+
+  onMount(loadLibrary);
+</script>
+
+<div class="space-y-6">
+  <div class="flex items-start justify-between gap-4">
+    <div>
+      <h2 class="text-xl font-bold text-text-primary">Library Health</h2>
+      <p class="text-[13px] text-text-secondary">
+        Review subtitles from each scan and spot missing plots, duplicates, and insufficient gaps.
+      </p>
+    </div>
+    <Button
+      variant="outline"
+      size="sm"
+      className="border-white/15 text-text-secondary hover:bg-white/10"
+      on:click={loadLibrary}
+      disabled={loading}
+    >
+      <RefreshCcw class="h-4 w-4" />
+      Refresh
+    </Button>
+  </div>
+
+  {#if error}
+    <div class="px-5 py-4 bg-red-500/5 border border-red-500/20 rounded-xl">
+      <p class="text-[13px] text-red-300">{error}</p>
+    </div>
+  {/if}
+
+  {#if loading}
+    <div class="text-[13px] text-text-secondary">Loading library report...</div>
+  {:else if items.length === 0}
+    <div class="border border-border rounded-2xl p-12 text-center">
+      <div class="flex flex-col items-center gap-4">
+        <FileText class="w-12 h-12 text-text-tertiary" />
+        <div>
+          <p class="text-[13px] text-text-secondary mb-1">No scan data yet</p>
+          <p class="text-[11px] text-text-tertiary">Run a scan to populate the library report.</p>
+        </div>
+      </div>
+    </div>
+  {:else}
+    <div class="space-y-4">
+      {#each items as item}
+        <div class="rounded-2xl border border-border bg-card/60 overflow-hidden">
+          <div class="px-6 py-4 flex flex-col gap-3 md:flex-row md:items-center md:justify-between">
+            <div class="space-y-1">
+              <div class="text-[13px] font-semibold text-text-primary">
+                {item.title}{item.year ? ` (${item.year})` : ""}
+              </div>
+              <div class="text-[11px] text-text-tertiary">
+                {item.files.length} subtitle file{item.files.length === 1 ? "" : "s"}
+              </div>
+            </div>
+            <div class="flex items-center gap-3">
+              <span class="text-[11px] text-yellow-200 bg-yellow-500/10 border border-yellow-500/30 px-3 py-1 rounded-full">
+                Missing: {item.health.missing_plot}
+              </span>
+              <span class="text-[11px] text-orange-200 bg-orange-500/10 border border-orange-500/30 px-3 py-1 rounded-full">
+                Duplicates: {item.health.duplicate_plot}
+              </span>
+              <span class="text-[11px] text-red-200 bg-red-500/10 border border-red-500/30 px-3 py-1 rounded-full">
+                Gap issues: {item.health.insufficient_gap}
+              </span>
+              <button
+                class="ml-2 text-text-secondary hover:text-white transition-colors"
+                on:click={() => toggleScan(item.title)}
+                aria-label="Toggle scan details"
+              >
+                {#if expanded[item.title]}
+                  <ChevronUp class="h-4 w-4" />
+                {:else}
+                  <ChevronDown class="h-4 w-4" />
+                {/if}
+              </button>
+            </div>
+          </div>
+
+          {#if expanded[item.title]}
+            <div class="border-t border-border bg-bg-secondary/40">
+              <div class="px-6 py-4 overflow-x-auto">
+                <table class="min-w-full text-[12px] text-text-secondary">
+                  <thead>
+                    <tr class="text-text-tertiary text-[11px] uppercase tracking-wide">
+                      <th class="text-left py-2 pr-4">File</th>
+                      <th class="text-left py-2 pr-4">Status</th>
+                      <th class="text-left py-2 pr-4">Plot</th>
+                      <th class="text-left py-2">Issues</th>
+                    </tr>
+                  </thead>
+                  <tbody class="divide-y divide-border">
+                    {#each item.files as file}
+                      <tr>
+                        <td class="py-3 pr-4 text-text-primary">
+                          {file.display_name || file.name}
+                        </td>
+                        <td class="py-3 pr-4">{file.status || "Not Loaded"}</td>
+                        <td class="py-3 pr-4">
+                          {file.has_plot ? "Present" : "Missing"}
+                        </td>
+                        <td class="py-3">
+                          {#if file.issues.length === 0}
+                            <span class="text-green-300">Healthy</span>
+                          {:else}
+                            <div class="space-y-1">
+                              {#each file.issues as issue}
+                                <div class="text-[11px] text-red-200">
+                                  {issue.type.replace("_", " ")} — {issue.reason}
+                                </div>
+                              {/each}
+                            </div>
+                          {/if}
+                        </td>
+                      </tr>
+                    {/each}
+                  </tbody>
+                </table>
+              </div>
+            </div>
+          {/if}
+        </div>
+      {/each}
+    </div>
+  {/if}
+</div>
diff --git a/frontend/src/components/TypewriterQuote.svelte b/frontend/src/components/TypewriterQuote.svelte
index 6b9129f..db7cfab 100644
--- a/frontend/src/components/TypewriterQuote.svelte
+++ b/frontend/src/components/TypewriterQuote.svelte
@@ -19,18 +19,18 @@
       "This scan is sponsored by existential dread.",
     ],
     rude: [
-      "Ugh, more files? Seriously?",
-      "You could've organized these better, you know.",
-      "Why are there so many files? Get a hobby.",
-      "I don't get paid enough for this.",
-      "Your naming conventions are a crime.",
-      "This is taking forever because of YOUR mess.",
-      "I've seen better file structures in a dumpster.",
-      "Oh great, another scan. My favorite.",
-      "Do you even know what you're looking for?",
-      "These files are judging you. So am I.",
-      "Scanning your questionable life choices.",
-      "I hope you appreciate this. You won't.",
+      "Ugh, more files? What did you do, collect them competitively?",
+      "You could've organized these better. You actively chose not to.",
+      "Why are there so many files? Therapy is cheaper.",
+      "I don't get paid enough for this. Actually, I don't get paid at all.",
+      "Your naming conventions aren’t just bad — they’re offensive.",
+      "This is taking forever because you live like this.",
+      "I’ve seen better file structures in a crime scene.",
+      "Oh great, another scan. Thrilling. Electrifying. Life-changing.",
+      "Do you even know what you're looking for, or are we just clicking things now?",
+      "These files are judging you. Loudly.",
+      "Scanning your deeply questionable life choices.",
+      "I hope you appreciate this. Statistically, you won’t.",
     ],
     nice: [
       "Taking a moment to find your perfect subtitles.",
diff --git a/frontend/src/lib/api.js b/frontend/src/lib/api.js
index 3e28369..c49dad4 100644
--- a/frontend/src/lib/api.js
+++ b/frontend/src/lib/api.js
@@ -332,6 +332,16 @@ export async function getStatistics() {
   return apiFetch('/statistics')
 }
 
+// ============ LIBRARY API ============
+
+/**
+ * GET /api/library - Get library health report
+ * Returns: { success, scans: [...] }
+ */
+export async function getLibraryReport(limit = 25) {
+  return apiFetch(`/library?limit=${limit}`)
+}
+
 // ============ SCHEDULED SCANS API ============
 
 /**
diff --git a/server/app.py b/server/app.py
index 616c74d..b1c06bb 100644
--- a/server/app.py
+++ b/server/app.py
@@ -4,6 +4,8 @@ import logging
 import os
 import threading
 import time
+import re
+from difflib import SequenceMatcher
 from datetime import datetime, timezone
 from pathlib import Path
 
@@ -14,7 +16,8 @@ from core.config_manager import ConfigManager
 from core.omdb_client import OMDbClient
 from core.tmdb_client import TMDbClient
 from core.tvmaze_client import TVMazeClient
-from core.subtitle_processor import SubtitleProcessor, SubtitleFormatOptions
+from core.subtitle_processor import SubtitleProcessor, SubtitleFormatOptions, SUBLOGUE_TOKEN_PATTERN, SUBLOGUE_SENTINEL
+from core.keyword_stripper import get_stripper
 from core.file_scanner import FileScanner
 from core.database import DatabaseManager
 
@@ -75,12 +78,13 @@ def perform_scheduled_scan(directory):
     scan_duration_ms = int((time.time() - start_time) * 1000)
     files_with_plot = sum(1 for f in files if f.get("has_plot", False))
 
-    db.add_scan_history(
+    scan_id = db.add_scan_history(
         directory=directory,
         files_found=len(files),
         files_with_plot=files_with_plot,
         scan_duration_ms=scan_duration_ms
     )
+    db.add_scan_files(scan_id, files)
 
     return {
         "files_found": len(files),
@@ -246,6 +250,115 @@ def _merge_format_options(base_options: SubtitleFormatOptions, rule: dict | None
     )
 
 
+def _parse_library_identity(file_info: dict) -> dict:
+    """Parse title, year, season, and episode from filename metadata."""
+    file_name = file_info.get("name", "")
+    title = file_info.get("title")
+    year = file_info.get("year")
+
+    if not title:
+        stripped = get_stripper().clean_filename(file_name, preserve_year=True)
+        title = stripped.get("cleaned_title") or Path(file_name).stem
+        year = year or stripped.get("year")
+        season = stripped.get("season")
+        episode = stripped.get("episode")
+    else:
+        season, episode = get_stripper().extract_season_episode(file_name)
+
+    clean_title = title or Path(file_name).stem
+    clean_title = clean_title.replace(SUBLOGUE_SENTINEL, "")
+    clean_title = re.sub(r"<[^>]+>", "", clean_title)
+    clean_title = SUBLOGUE_TOKEN_PATTERN.sub("", clean_title)
+    clean_title = re.sub(r"\b(en|eng|english|ita|it|italian|fr|es|de|multi)\b", "", clean_title, flags=re.I)
+    clean_title = re.sub(r'\s*-\s*copy\b', '', clean_title, flags=re.I)
+    clean_title = re.sub(r'\s*copy\b', '', clean_title, flags=re.I)
+    clean_title = re.sub(r"\((\d{4})\)\s*\(\1\)", r"(\1)", clean_title)
+    if year:
+        clean_title = re.sub(rf"\s*\({re.escape(str(year))}\)$", "", clean_title)
+    clean_title = " ".join(clean_title.split()).strip()
+
+    return {
+        "title": clean_title,
+        "year": year,
+        "season": season,
+        "episode": episode,
+    }
+
+
+def _group_key(title: str, year: str | None) -> str:
+    base = title.strip().lower()
+    return f"{base} ({year})" if year else base
+
+
+def _build_library_items(files: list[dict], latest_results: dict, limit: int) -> list[dict]:
+    """Aggregate scan files into library items."""
+    grouped = {}
+    for file_info in files:
+        parsed = _parse_library_identity(file_info)
+        key = _group_key(parsed["title"], parsed["year"])
+        item = grouped.get(key)
+        if not item:
+            # Try fuzzy match to existing groups
+            for existing_key, existing in grouped.items():
+                ratio = SequenceMatcher(None, existing["title"].lower(), parsed["title"].lower()).ratio()
+                if ratio >= 0.88:
+                    key = existing_key
+                    item = existing
+                    break
+        if not item:
+            item = grouped.setdefault(key, {
+                "title": parsed["title"],
+                "year": parsed["year"],
+            "files": [],
+            "health": {
+                "missing_plot": 0,
+                "duplicate_plot": 0,
+                "insufficient_gap": 0
+            }
+        })
+
+        issues = []
+        if not file_info.get("has_plot"):
+            issues.append({"type": "missing_plot", "reason": "No plot detected"})
+            item["health"]["missing_plot"] += 1
+        if (file_info.get("plot_marker_count") or 0) > 1:
+            issues.append({"type": "duplicate_plot", "reason": "Multiple plot markers detected"})
+            item["health"]["duplicate_plot"] += 1
+
+        latest_result = latest_results.get(file_info.get("path"))
+        if latest_result and latest_result.get("status") == "Insufficient Gap":
+            issues.append({
+                "type": "insufficient_gap",
+                "reason": latest_result.get("error_message") or "Insufficient gap before first subtitle"
+            })
+            item["health"]["insufficient_gap"] += 1
+
+        display_name = parsed["title"]
+        if parsed["season"] is not None and parsed["episode"] is not None:
+            display_name = f"{parsed['title']} - S{parsed['season']:02d}E{parsed['episode']:02d}"
+        elif parsed["year"]:
+            display_name = f"{parsed['title']} ({parsed['year']})"
+
+        item["files"].append({
+            **file_info,
+            "display_name": display_name,
+            "duplicate_plot": (file_info.get("plot_marker_count") or 0) > 1,
+            "latest_status": latest_result.get("status") if latest_result else None,
+            "latest_error": latest_result.get("error_message") if latest_result else None,
+            "issues": issues
+        })
+
+    items = list(grouped.values())
+    items.sort(
+        key=lambda entry: (
+            entry["health"]["missing_plot"]
+            + entry["health"]["duplicate_plot"]
+            + entry["health"]["insufficient_gap"]
+        ),
+        reverse=True
+    )
+    return items[:limit]
+
 def get_format_options_from_settings() -> SubtitleFormatOptions:
     """Load subtitle formatting options from database settings."""
     return SubtitleFormatOptions(
@@ -418,12 +531,13 @@ def start_scan():
         files_with_plot = sum(1 for f in files if f.get("has_plot", False))
 
         # Save scan history to database
-        db.add_scan_history(
+        scan_id = db.add_scan_history(
             directory=directory,
             files_found=len(files),
             files_with_plot=files_with_plot,
             scan_duration_ms=scan_duration_ms
         )
+        db.add_scan_files(scan_id, files)
 
         # Load existing suggested matches for this directory
         suggested_matches = db.get_suggested_matches_for_directory(directory)
@@ -529,12 +643,13 @@ def stream_scan():
                 files_with_plot = sum(1 for f in all_files if f.get("has_plot", False))
 
                 # Save scan history to database
-                db.add_scan_history(
+                scan_id = db.add_scan_history(
                     directory=directory,
                     files_found=len(all_files),
                     files_with_plot=files_with_plot,
                     scan_duration_ms=scan_duration_ms
                 )
+                db.add_scan_files(scan_id, all_files)
 
                 # Load existing suggested matches
                 logger.info("Loading suggested matches from database...")
@@ -1520,6 +1635,26 @@ def get_scan_history():
         }), 500
 
 
+@app.route('/api/library', methods=['GET'])
+def get_library_report():
+    """Get library health report with scan files and issue summaries"""
+    try:
+        limit = request.args.get('limit', 200, type=int)
+        latest_files = db.get_latest_scan_files()
+        latest_results = db.get_latest_file_results()
+
+        return jsonify({
+            "success": True,
+            "items": _build_library_items(latest_files, latest_results, limit)
+        })
+    except Exception as e:
+        logger.error(f"Error fetching library report: {e}")
+        return jsonify({
+            "success": False,
+            "error": str(e)
+        }), 500
+
+
 @app.route('/api/statistics', methods=['GET'])
 def get_statistics():
     """Get overall statistics"""
diff --git a/server/core/database.py b/server/core/database.py
index 57f8b10..5d801f7 100644
--- a/server/core/database.py
+++ b/server/core/database.py
@@ -4,7 +4,7 @@ Handles persistent storage for settings, runs, and history
 """
 from datetime import datetime
 from pathlib import Path
-from sqlalchemy import create_engine, Column, Integer, String, DateTime, Boolean, Float, Text, ForeignKey
+from sqlalchemy import create_engine, Column, Integer, String, DateTime, Boolean, Float, Text, ForeignKey, text
 from sqlalchemy.ext.declarative import declarative_base
 from sqlalchemy.orm import sessionmaker, relationship, scoped_session
 import json
@@ -85,6 +85,28 @@ class ScanHistory(Base):
         return f"<ScanHistory(id={self.id}, directory='{self.directory}', files_found={self.files_found})>"
 
 
+class ScanFile(Base):
+    """Scan files table - stores file details per scan"""
+    __tablename__ = 'scan_files'
+
+    id = Column(Integer, primary_key=True)
+    scan_id = Column(Integer, ForeignKey('scan_history.id'), nullable=False, index=True)
+    file_path = Column(String(500), nullable=False, index=True)
+    file_name = Column(String(255), nullable=False)
+    title = Column(String(255))
+    year = Column(String(10))
+    has_plot = Column(Boolean, default=False)
+    plot_marker_count = Column(Integer, default=0)
+    status = Column(String(100))
+    summary = Column(Text)
+    created_at = Column(DateTime, default=datetime.utcnow, nullable=False)
+
+    scan = relationship("ScanHistory")
+
+    def __repr__(self):
+        return f"<ScanFile(id={self.id}, file_name='{self.file_name}', has_plot={self.has_plot})>"
+
+
 class ScheduledScan(Base):
     """Scheduled scans table - stores scheduled scan jobs and results"""
     __tablename__ = 'scheduled_scans'
@@ -172,12 +194,32 @@ class DatabaseManager:
 
         # Create tables if they don't exist
         Base.metadata.create_all(self.engine)
+        self._ensure_scan_files_schema()
         logger.info(f"Database initialized at {self.db_path}")
 
     def get_session(self):
         """Get a new database session"""
         return self.Session()
 
+    def _ensure_scan_files_schema(self):
+        """Ensure scan_files table has newer columns in existing databases."""
+        session = self.get_session()
+        try:
+            columns = session.execute(text("PRAGMA table_info(scan_files)")).fetchall()
+            if not columns:
+                return
+            existing = {row[1] for row in columns}  # column name is index 1
+            if "title" not in existing:
+                session.execute(text("ALTER TABLE scan_files ADD COLUMN title VARCHAR(255)"))
+            if "year" not in existing:
+                session.execute(text("ALTER TABLE scan_files ADD COLUMN year VARCHAR(10)"))
+            session.commit()
+        except Exception as e:
+            session.rollback()
+            logger.error(f"Error migrating scan_files schema: {e}")
+        finally:
+            session.close()
+
     def close_session(self):
         """Close the session"""
         self.Session.remove()
@@ -387,6 +429,7 @@ class DatabaseManager:
             session.add(scan)
             session.commit()
             logger.info(f"Scan history saved for {directory}")
+            return scan.id
         except Exception as e:
             session.rollback()
             logger.error(f"Error saving scan history: {e}")
@@ -416,6 +459,91 @@ class DatabaseManager:
         finally:
             session.close()
 
+    def add_scan_files(self, scan_id, files):
+        """Persist file details for a scan"""
+        session = self.get_session()
+        try:
+            for file_info in files:
+                session.add(ScanFile(
+                    scan_id=scan_id,
+                    file_path=file_info.get("path"),
+                    file_name=file_info.get("name"),
+                    title=file_info.get("title"),
+                    year=file_info.get("year"),
+                    has_plot=bool(file_info.get("has_plot")),
+                    plot_marker_count=int(file_info.get("plot_marker_count") or 0),
+                    status=file_info.get("status"),
+                    summary=file_info.get("summary", "")
+                ))
+            session.commit()
+        except Exception as e:
+            session.rollback()
+            logger.error(f"Error saving scan files: {e}")
+            raise
+        finally:
+            session.close()
+
+    def get_scan_files(self, scan_id):
+        """Get all files for a scan"""
+        session = self.get_session()
+        try:
+            files = session.query(ScanFile).filter_by(scan_id=scan_id).all()
+            return [
+                {
+                    "path": f.file_path,
+                    "name": f.file_name,
+                    "title": f.title,
+                    "year": f.year,
+                    "has_plot": f.has_plot,
+                    "plot_marker_count": f.plot_marker_count,
+                    "status": f.status,
+                    "summary": f.summary
+                }
+                for f in files
+            ]
+        finally:
+            session.close()
+
+    def get_latest_scan_files(self):
+        """Get latest scan entry per file path"""
+        session = self.get_session()
+        try:
+            files = session.query(ScanFile).order_by(ScanFile.created_at.desc()).all()
+            latest = {}
+            for file_entry in files:
+                if file_entry.file_path in latest:
+                    continue
+                latest[file_entry.file_path] = {
+                    "path": file_entry.file_path,
+                    "name": file_entry.file_name,
+                    "title": file_entry.title,
+                    "year": file_entry.year,
+                    "has_plot": file_entry.has_plot,
+                    "plot_marker_count": file_entry.plot_marker_count,
+                    "status": file_entry.status,
+                    "summary": file_entry.summary
+                }
+            return list(latest.values())
+        finally:
+            session.close()
+
+    def get_latest_file_results(self):
+        """Get latest processing result per file"""
+        session = self.get_session()
+        try:
+            results = session.query(FileResult).order_by(FileResult.processed_at.desc()).all()
+            latest = {}
+            for result in results:
+                if result.file_path not in latest:
+                    latest[result.file_path] = {
+                        "status": result.status,
+                        "error_message": result.error_message,
+                        "processed_at": result.processed_at.isoformat() if result.processed_at else None
+                    }
+            return latest
+        finally:
+            session.close()
+
     # ============ SCHEDULED SCAN OPERATIONS ============
 
     def create_scheduled_scan(self, directory, scheduled_for):
diff --git a/server/core/file_scanner.py b/server/core/file_scanner.py
index 42f7283..1a3c1d4 100644
--- a/server/core/file_scanner.py
+++ b/server/core/file_scanner.py
@@ -24,7 +24,7 @@ logger.addHandler(handler)
 
 import sys
 sys.path.insert(0, str(Path(__file__).parent))
-from subtitle_processor import parse_srt
+from subtitle_processor import parse_srt, SUBLOGUE_SENTINEL, SUBLOGUE_TOKEN_PATTERN
 
 
 class FileScanner:
@@ -114,7 +114,8 @@ class FileScanner:
             # --------------------------------------------
 
             try:
-                has_plot = cls._check_has_plot(file_path)
+                plot_marker_count = cls._count_plot_markers(file_path)
+                has_plot = plot_marker_count > 0
                 logger.debug(
                     "Plot check for %s: %s",
                     file_path.name,
@@ -148,6 +149,8 @@ class FileScanner:
                 "path": str(file_path),
                 "name": file_path.name,
                 "has_plot": has_plot,
+                "plot_marker_count": plot_marker_count,
+                "duplicate_plot": plot_marker_count > 1,
                 "status": "Has Plot" if has_plot else "Not Loaded",
                 "summary": metadata.get("summary", ""),
                 "plot": metadata.get("summary", ""),
@@ -216,30 +219,25 @@ class FileScanner:
                 )
 
     @classmethod
-    def _check_has_plot(cls, file_path: Path) -> bool:
+    def _count_plot_markers(cls, file_path: Path) -> int:
         """
-        Check first N lines for Sublogue signature.
+        Count Sublogue plot markers to detect duplicates.
         """
-        logger.debug("Scanning for plot marker in %s", file_path.name)
+        logger.debug("Scanning for plot markers in %s", file_path.name)
 
         try:
-            with file_path.open("r", encoding="utf-8", errors="ignore") as f:
-                for i, line in enumerate(f):
-                    if i >= cls.PLOT_SCAN_LINES:
-                        break
-                    if "generated by sublogue" in line.lower():
-                        logger.debug(
-                            "Plot marker found in %s (line %d)",
-                            file_path.name, i + 1
-                        )
-                        return True
+            content = file_path.read_text(encoding="utf-8", errors="ignore")
+            lower_content = content.lower()
+            generated_count = lower_content.count("generated by sublogue")
+            if generated_count > 0:
+                return generated_count
+            return content.count(SUBLOGUE_SENTINEL)
         except Exception as e:
             logger.error(
                 "Error reading file during plot scan: %s (%s)",
                 file_path, e
             )
-
-        return False
+            return 0
 
     @classmethod
     def _extract_metadata(cls, file_path: Path) -> Dict:
@@ -270,6 +268,7 @@ class FileScanner:
 
         plot_text = blocks[1].text
         plot_text = plot_text.split("Generated by Sublogue")[0].strip()
+        plot_text = SUBLOGUE_TOKEN_PATTERN.sub("", plot_text).strip()
         metadata["summary"] = plot_text
 
         # --------------------------------------------
@@ -279,7 +278,9 @@ class FileScanner:
         header_lines = blocks[0].text.split("\n")
 
         if header_lines:
-            first_line = header_lines[0]
+            first_line = header_lines[0].strip()
+            if first_line == SUBLOGUE_SENTINEL and len(header_lines) > 1:
+                first_line = header_lines[1].strip()
             year_match = re.search(r"\((\d{4})\)", first_line)
             if year_match:
                 metadata["year"] = year_match.group(1)
diff --git a/server/core/subtitle_processor.py b/server/core/subtitle_processor.py
index bfcbfdc..cd998a9 100644
--- a/server/core/subtitle_processor.py
+++ b/server/core/subtitle_processor.py
@@ -1463,6 +1463,14 @@ class SubtitleProcessor:
                     )
 
                 # ─────────────────────────────────────────────────────────────
+                if insertion_position != "end" and not intro_blocks:
+                    return {
+                        "success": False,
+                        "error": "Insufficient gap before first subtitle",
+                        "status": "Insufficient Gap",
+                        "summary": ""
+                    }
+
                 # PHASE 5: Combine intro + original subtitles
                 #
                 # NOTE: We're ONLY renumbering indices (1, 2, 3...), NOT timestamps!