1.0.0.7 - Matching improves, added library page. Removed schedule scans support
This commit is contained in:
@@ -102,10 +102,28 @@ networks:
|
||||
```
|
||||
</details>
|
||||
|
||||
## Limitations
|
||||
- API rate limits: OMDb is tight, TMDb is better, TVMaze is polite-but-limited. Heavy scans may hit caps.
|
||||
- Metadata gaps: If providers don’t have it, Sublogue won’t either. Ratings/plots can be missing or stale.
|
||||
- Localisation: Only TMDb supports proper language/region data. OMDb/TVMaze are mostly English-only.
|
||||
- Long plots: Big summaries go in as-is. Your TV may split them across multiple screens.
|
||||
- Formats: Only .srt is supported. No WebVTT, ASS/SSA, or embedded subs yet.
|
||||
- Duplicate inserts: Reprocessing the same file will stack multiple plot blocks.
|
||||
- Offline use: Requires internet for metadata lookups — no offline mode.
|
||||
- File access: Read-only or locked files cannot be processed.
|
||||
|
||||
## Roadmap
|
||||
- [x] Add support for TVMaze
|
||||
- [ ] Add support for more themes
|
||||
- [ ] Bring in posters into results list so it's easier to identify movies / TV shows
|
||||
- [x] TVMaze integration
|
||||
- [ ] More UI themes (OLED variants, Ocean+, and high-contrast)
|
||||
- [ ] Poster + backdrop previews in results
|
||||
- [ ] Smart duplicate-detection (don’t re-insert plot blocks)
|
||||
- [ ] Automatic rate-limit backoff + retry logic
|
||||
- [ ] Optional “short plot mode” for long summaries
|
||||
- [ ] Expanded localisation using TMDb (title, plot, cast where available)
|
||||
- [ ] Multi-format subtitle support (WebVTT, ASS/SSA)
|
||||
- [ ] Offline caching of recent metadata lookups
|
||||
- [ ] Per-scan analytics: success/fail counts, rate-limit warnings
|
||||
- [ ] CLI mode for batch operations
|
||||
|
||||
## Support
|
||||
- Help spread the word about Sublogue by telling your friends about this repo
|
||||
|
||||
@@ -7,7 +7,7 @@
|
||||
import SettingsPanel from './components/SettingsPanel.svelte'
|
||||
import ScanPanel from './components/ScanPanel.svelte'
|
||||
import HistoryPanel from './components/HistoryPanel.svelte'
|
||||
import ScheduledScansPanel from './components/ScheduledScansPanel.svelte'
|
||||
import LibraryPanel from './components/LibraryPanel.svelte'
|
||||
import { Menu } from 'lucide-svelte'
|
||||
import ToastHost from './components/ToastHost.svelte'
|
||||
import { healthCheck } from './lib/api.js'
|
||||
@@ -164,8 +164,8 @@
|
||||
onOpenHistory={() => navigateTo('history')}
|
||||
/>
|
||||
{/key}
|
||||
{:else if currentView === 'scheduled'}
|
||||
<ScheduledScansPanel />
|
||||
{:else if currentView === 'library'}
|
||||
<LibraryPanel />
|
||||
{/if}
|
||||
</div>
|
||||
</main>
|
||||
|
||||
@@ -3,16 +3,13 @@
|
||||
import { Separator } from "../lib/components/ui/separator";
|
||||
import { Badge } from "../lib/components/ui/badge";
|
||||
import {
|
||||
Calendar,
|
||||
Download,
|
||||
ChevronLeft,
|
||||
ChevronRight,
|
||||
Github,
|
||||
Heart,
|
||||
Package,
|
||||
Scan,
|
||||
Settings,
|
||||
History,
|
||||
Library,
|
||||
} from "lucide-svelte";
|
||||
import ThemeSelector from "./ThemeSelector.svelte";
|
||||
import sublogueLogo from "../assets/sublogue_v2.png";
|
||||
@@ -115,16 +112,16 @@
|
||||
className={`w-full rounded-md py-1.5 text-[13px] font-semibold leading-none ${
|
||||
collapsed ? "justify-center px-0" : "justify-start px-2 gap-2"
|
||||
} ${
|
||||
currentView === "scheduled"
|
||||
currentView === "library"
|
||||
? "bg-[color:var(--bg-hover)] text-white font-bold"
|
||||
: "text-text-secondary hover:text-white hover:bg-[color:var(--bg-hover)]"
|
||||
}`}
|
||||
on:click={() => onNavigate("scheduled")}
|
||||
aria-current={currentView === "scheduled" ? "page" : undefined}
|
||||
on:click={() => onNavigate("library")}
|
||||
aria-current={currentView === "library" ? "page" : undefined}
|
||||
>
|
||||
<Calendar class="h-4 w-4" />
|
||||
<Library class="h-4 w-4" />
|
||||
{#if !collapsed}
|
||||
Scheduled Scans
|
||||
Library
|
||||
{/if}
|
||||
</Button>
|
||||
|
||||
@@ -157,7 +154,7 @@
|
||||
>
|
||||
{#if !collapsed}
|
||||
<Badge className="bg-white/10 text-text-secondary"
|
||||
>v1.0.6 Release Candiate</Badge
|
||||
>v1.0.7 Release Candiate</Badge
|
||||
>
|
||||
{:else}
|
||||
<Badge className="bg-white/10 text-text-secondary">v</Badge>
|
||||
|
||||
@@ -0,0 +1,153 @@
|
||||
<script>
|
||||
import { onMount } from "svelte";
|
||||
import { getLibraryReport } from "../lib/api.js";
|
||||
import { Button } from "../lib/components/ui/button";
|
||||
import { ChevronDown, ChevronUp, RefreshCcw, FileText } from "lucide-svelte";
|
||||
|
||||
let items = [];
|
||||
let loading = false;
|
||||
let error = null;
|
||||
let expanded = {};
|
||||
|
||||
async function loadLibrary() {
|
||||
loading = true;
|
||||
error = null;
|
||||
try {
|
||||
const response = await getLibraryReport();
|
||||
items = response.items || [];
|
||||
} catch (err) {
|
||||
error = `Failed to load library report: ${err.message}`;
|
||||
} finally {
|
||||
loading = false;
|
||||
}
|
||||
}
|
||||
|
||||
function toggleScan(key) {
|
||||
expanded = { ...expanded, [key]: !expanded[key] };
|
||||
}
|
||||
|
||||
onMount(loadLibrary);
|
||||
</script>
|
||||
|
||||
<div class="space-y-6">
|
||||
<div class="flex items-start justify-between gap-4">
|
||||
<div>
|
||||
<h2 class="text-xl font-bold text-text-primary">Library Health</h2>
|
||||
<p class="text-[13px] text-text-secondary">
|
||||
Review subtitles from each scan and spot missing plots, duplicates, and insufficient gaps.
|
||||
</p>
|
||||
</div>
|
||||
<Button
|
||||
variant="outline"
|
||||
size="sm"
|
||||
className="border-white/15 text-text-secondary hover:bg-white/10"
|
||||
on:click={loadLibrary}
|
||||
disabled={loading}
|
||||
>
|
||||
<RefreshCcw class="h-4 w-4" />
|
||||
Refresh
|
||||
</Button>
|
||||
</div>
|
||||
|
||||
{#if error}
|
||||
<div class="px-5 py-4 bg-red-500/5 border border-red-500/20 rounded-xl">
|
||||
<p class="text-[13px] text-red-300">{error}</p>
|
||||
</div>
|
||||
{/if}
|
||||
|
||||
{#if loading}
|
||||
<div class="text-[13px] text-text-secondary">Loading library report...</div>
|
||||
{:else if items.length === 0}
|
||||
<div class="border border-border rounded-2xl p-12 text-center">
|
||||
<div class="flex flex-col items-center gap-4">
|
||||
<FileText class="w-12 h-12 text-text-tertiary" />
|
||||
<div>
|
||||
<p class="text-[13px] text-text-secondary mb-1">No scan data yet</p>
|
||||
<p class="text-[11px] text-text-tertiary">Run a scan to populate the library report.</p>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
{:else}
|
||||
<div class="space-y-4">
|
||||
{#each items as item}
|
||||
<div class="rounded-2xl border border-border bg-card/60 overflow-hidden">
|
||||
<div class="px-6 py-4 flex flex-col gap-3 md:flex-row md:items-center md:justify-between">
|
||||
<div class="space-y-1">
|
||||
<div class="text-[13px] font-semibold text-text-primary">
|
||||
{item.title}{item.year ? ` (${item.year})` : ""}
|
||||
</div>
|
||||
<div class="text-[11px] text-text-tertiary">
|
||||
{item.files.length} subtitle file{item.files.length === 1 ? "" : "s"}
|
||||
</div>
|
||||
</div>
|
||||
<div class="flex items-center gap-3">
|
||||
<span class="text-[11px] text-yellow-200 bg-yellow-500/10 border border-yellow-500/30 px-3 py-1 rounded-full">
|
||||
Missing: {item.health.missing_plot}
|
||||
</span>
|
||||
<span class="text-[11px] text-orange-200 bg-orange-500/10 border border-orange-500/30 px-3 py-1 rounded-full">
|
||||
Duplicates: {item.health.duplicate_plot}
|
||||
</span>
|
||||
<span class="text-[11px] text-red-200 bg-red-500/10 border border-red-500/30 px-3 py-1 rounded-full">
|
||||
Gap issues: {item.health.insufficient_gap}
|
||||
</span>
|
||||
<button
|
||||
class="ml-2 text-text-secondary hover:text-white transition-colors"
|
||||
on:click={() => toggleScan(item.title)}
|
||||
aria-label="Toggle scan details"
|
||||
>
|
||||
{#if expanded[item.title]}
|
||||
<ChevronUp class="h-4 w-4" />
|
||||
{:else}
|
||||
<ChevronDown class="h-4 w-4" />
|
||||
{/if}
|
||||
</button>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{#if expanded[item.title]}
|
||||
<div class="border-t border-border bg-bg-secondary/40">
|
||||
<div class="px-6 py-4 overflow-x-auto">
|
||||
<table class="min-w-full text-[12px] text-text-secondary">
|
||||
<thead>
|
||||
<tr class="text-text-tertiary text-[11px] uppercase tracking-wide">
|
||||
<th class="text-left py-2 pr-4">File</th>
|
||||
<th class="text-left py-2 pr-4">Status</th>
|
||||
<th class="text-left py-2 pr-4">Plot</th>
|
||||
<th class="text-left py-2">Issues</th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody class="divide-y divide-border">
|
||||
{#each item.files as file}
|
||||
<tr>
|
||||
<td class="py-3 pr-4 text-text-primary">
|
||||
{file.display_name || file.name}
|
||||
</td>
|
||||
<td class="py-3 pr-4">{file.status || "Not Loaded"}</td>
|
||||
<td class="py-3 pr-4">
|
||||
{file.has_plot ? "Present" : "Missing"}
|
||||
</td>
|
||||
<td class="py-3">
|
||||
{#if file.issues.length === 0}
|
||||
<span class="text-green-300">Healthy</span>
|
||||
{:else}
|
||||
<div class="space-y-1">
|
||||
{#each file.issues as issue}
|
||||
<div class="text-[11px] text-red-200">
|
||||
{issue.type.replace("_", " ")} — {issue.reason}
|
||||
</div>
|
||||
{/each}
|
||||
</div>
|
||||
{/if}
|
||||
</td>
|
||||
</tr>
|
||||
{/each}
|
||||
</tbody>
|
||||
</table>
|
||||
</div>
|
||||
</div>
|
||||
{/if}
|
||||
</div>
|
||||
{/each}
|
||||
</div>
|
||||
{/if}
|
||||
</div>
|
||||
@@ -19,18 +19,18 @@
|
||||
"This scan is sponsored by existential dread.",
|
||||
],
|
||||
rude: [
|
||||
"Ugh, more files? Seriously?",
|
||||
"You could've organized these better, you know.",
|
||||
"Why are there so many files? Get a hobby.",
|
||||
"I don't get paid enough for this.",
|
||||
"Your naming conventions are a crime.",
|
||||
"This is taking forever because of YOUR mess.",
|
||||
"I've seen better file structures in a dumpster.",
|
||||
"Oh great, another scan. My favorite.",
|
||||
"Do you even know what you're looking for?",
|
||||
"These files are judging you. So am I.",
|
||||
"Scanning your questionable life choices.",
|
||||
"I hope you appreciate this. You won't.",
|
||||
"Ugh, more files? What did you do, collect them competitively?",
|
||||
"You could've organized these better. You actively chose not to.",
|
||||
"Why are there so many files? Therapy is cheaper.",
|
||||
"I don't get paid enough for this. Actually, I don't get paid at all.",
|
||||
"Your naming conventions aren’t just bad — they’re offensive.",
|
||||
"This is taking forever because you live like this.",
|
||||
"I’ve seen better file structures in a crime scene.",
|
||||
"Oh great, another scan. Thrilling. Electrifying. Life-changing.",
|
||||
"Do you even know what you're looking for, or are we just clicking things now?",
|
||||
"These files are judging you. Loudly.",
|
||||
"Scanning your deeply questionable life choices.",
|
||||
"I hope you appreciate this. Statistically, you won’t.",
|
||||
],
|
||||
nice: [
|
||||
"Taking a moment to find your perfect subtitles.",
|
||||
|
||||
@@ -332,6 +332,16 @@ export async function getStatistics() {
|
||||
return apiFetch('/statistics')
|
||||
}
|
||||
|
||||
// ============ LIBRARY API ============
|
||||
|
||||
/**
|
||||
* GET /api/library - Get library health report
|
||||
* Returns: { success, scans: [...] }
|
||||
*/
|
||||
export async function getLibraryReport(limit = 25) {
|
||||
return apiFetch(`/library?limit=${limit}`)
|
||||
}
|
||||
|
||||
// ============ SCHEDULED SCANS API ============
|
||||
|
||||
/**
|
||||
|
||||
+139
-4
@@ -4,6 +4,8 @@ import logging
|
||||
import os
|
||||
import threading
|
||||
import time
|
||||
import re
|
||||
from difflib import SequenceMatcher
|
||||
from datetime import datetime, timezone
|
||||
from pathlib import Path
|
||||
|
||||
@@ -14,7 +16,8 @@ from core.config_manager import ConfigManager
|
||||
from core.omdb_client import OMDbClient
|
||||
from core.tmdb_client import TMDbClient
|
||||
from core.tvmaze_client import TVMazeClient
|
||||
from core.subtitle_processor import SubtitleProcessor, SubtitleFormatOptions
|
||||
from core.subtitle_processor import SubtitleProcessor, SubtitleFormatOptions, SUBLOGUE_TOKEN_PATTERN, SUBLOGUE_SENTINEL
|
||||
from core.keyword_stripper import get_stripper
|
||||
from core.file_scanner import FileScanner
|
||||
from core.database import DatabaseManager
|
||||
|
||||
@@ -75,12 +78,13 @@ def perform_scheduled_scan(directory):
|
||||
scan_duration_ms = int((time.time() - start_time) * 1000)
|
||||
files_with_plot = sum(1 for f in files if f.get("has_plot", False))
|
||||
|
||||
db.add_scan_history(
|
||||
scan_id = db.add_scan_history(
|
||||
directory=directory,
|
||||
files_found=len(files),
|
||||
files_with_plot=files_with_plot,
|
||||
scan_duration_ms=scan_duration_ms
|
||||
)
|
||||
db.add_scan_files(scan_id, files)
|
||||
|
||||
return {
|
||||
"files_found": len(files),
|
||||
@@ -246,6 +250,115 @@ def _merge_format_options(base_options: SubtitleFormatOptions, rule: dict | None
|
||||
)
|
||||
|
||||
|
||||
def _parse_library_identity(file_info: dict) -> dict:
|
||||
"""Parse title, year, season, and episode from filename metadata."""
|
||||
file_name = file_info.get("name", "")
|
||||
title = file_info.get("title")
|
||||
year = file_info.get("year")
|
||||
|
||||
if not title:
|
||||
stripped = get_stripper().clean_filename(file_name, preserve_year=True)
|
||||
title = stripped.get("cleaned_title") or Path(file_name).stem
|
||||
year = year or stripped.get("year")
|
||||
season = stripped.get("season")
|
||||
episode = stripped.get("episode")
|
||||
else:
|
||||
season, episode = get_stripper().extract_season_episode(file_name)
|
||||
|
||||
clean_title = title or Path(file_name).stem
|
||||
clean_title = clean_title.replace(SUBLOGUE_SENTINEL, "")
|
||||
clean_title = re.sub(r"<[^>]+>", "", clean_title)
|
||||
clean_title = SUBLOGUE_TOKEN_PATTERN.sub("", clean_title)
|
||||
clean_title = re.sub(r"\b(en|eng|english|ita|it|italian|fr|es|de|multi)\b", "", clean_title, flags=re.I)
|
||||
clean_title = re.sub(r'\s*-\s*copy\b', '', clean_title, flags=re.I)
|
||||
clean_title = re.sub(r'\s*copy\b', '', clean_title, flags=re.I)
|
||||
clean_title = re.sub(r"\((\d{4})\)\s*\(\1\)", r"(\1)", clean_title)
|
||||
if year:
|
||||
clean_title = re.sub(rf"\s*\({re.escape(str(year))}\)$", "", clean_title)
|
||||
clean_title = " ".join(clean_title.split()).strip()
|
||||
|
||||
return {
|
||||
"title": clean_title,
|
||||
"year": year,
|
||||
"season": season,
|
||||
"episode": episode,
|
||||
}
|
||||
|
||||
|
||||
def _group_key(title: str, year: str | None) -> str:
|
||||
base = title.strip().lower()
|
||||
return f"{base} ({year})" if year else base
|
||||
|
||||
|
||||
def _build_library_items(files: list[dict], latest_results: dict, limit: int) -> list[dict]:
|
||||
"""Aggregate scan files into library items."""
|
||||
grouped = {}
|
||||
for file_info in files:
|
||||
parsed = _parse_library_identity(file_info)
|
||||
key = _group_key(parsed["title"], parsed["year"])
|
||||
item = grouped.get(key)
|
||||
if not item:
|
||||
# Try fuzzy match to existing groups
|
||||
for existing_key, existing in grouped.items():
|
||||
ratio = SequenceMatcher(None, existing["title"].lower(), parsed["title"].lower()).ratio()
|
||||
if ratio >= 0.88:
|
||||
key = existing_key
|
||||
item = existing
|
||||
break
|
||||
if not item:
|
||||
item = grouped.setdefault(key, {
|
||||
"title": parsed["title"],
|
||||
"year": parsed["year"],
|
||||
"files": [],
|
||||
"health": {
|
||||
"missing_plot": 0,
|
||||
"duplicate_plot": 0,
|
||||
"insufficient_gap": 0
|
||||
}
|
||||
})
|
||||
|
||||
issues = []
|
||||
if not file_info.get("has_plot"):
|
||||
issues.append({"type": "missing_plot", "reason": "No plot detected"})
|
||||
item["health"]["missing_plot"] += 1
|
||||
if (file_info.get("plot_marker_count") or 0) > 1:
|
||||
issues.append({"type": "duplicate_plot", "reason": "Multiple plot markers detected"})
|
||||
item["health"]["duplicate_plot"] += 1
|
||||
|
||||
latest_result = latest_results.get(file_info.get("path"))
|
||||
if latest_result and latest_result.get("status") == "Insufficient Gap":
|
||||
issues.append({
|
||||
"type": "insufficient_gap",
|
||||
"reason": latest_result.get("error_message") or "Insufficient gap before first subtitle"
|
||||
})
|
||||
item["health"]["insufficient_gap"] += 1
|
||||
|
||||
display_name = parsed["title"]
|
||||
if parsed["season"] is not None and parsed["episode"] is not None:
|
||||
display_name = f"{parsed['title']} - S{parsed['season']:02d}E{parsed['episode']:02d}"
|
||||
elif parsed["year"]:
|
||||
display_name = f"{parsed['title']} ({parsed['year']})"
|
||||
|
||||
item["files"].append({
|
||||
**file_info,
|
||||
"display_name": display_name,
|
||||
"duplicate_plot": (file_info.get("plot_marker_count") or 0) > 1,
|
||||
"latest_status": latest_result.get("status") if latest_result else None,
|
||||
"latest_error": latest_result.get("error_message") if latest_result else None,
|
||||
"issues": issues
|
||||
})
|
||||
|
||||
items = list(grouped.values())
|
||||
items.sort(
|
||||
key=lambda entry: (
|
||||
entry["health"]["missing_plot"]
|
||||
+ entry["health"]["duplicate_plot"]
|
||||
+ entry["health"]["insufficient_gap"]
|
||||
),
|
||||
reverse=True
|
||||
)
|
||||
return items[:limit]
|
||||
|
||||
def get_format_options_from_settings() -> SubtitleFormatOptions:
|
||||
"""Load subtitle formatting options from database settings."""
|
||||
return SubtitleFormatOptions(
|
||||
@@ -418,12 +531,13 @@ def start_scan():
|
||||
files_with_plot = sum(1 for f in files if f.get("has_plot", False))
|
||||
|
||||
# Save scan history to database
|
||||
db.add_scan_history(
|
||||
scan_id = db.add_scan_history(
|
||||
directory=directory,
|
||||
files_found=len(files),
|
||||
files_with_plot=files_with_plot,
|
||||
scan_duration_ms=scan_duration_ms
|
||||
)
|
||||
db.add_scan_files(scan_id, files)
|
||||
|
||||
# Load existing suggested matches for this directory
|
||||
suggested_matches = db.get_suggested_matches_for_directory(directory)
|
||||
@@ -529,12 +643,13 @@ def stream_scan():
|
||||
files_with_plot = sum(1 for f in all_files if f.get("has_plot", False))
|
||||
|
||||
# Save scan history to database
|
||||
db.add_scan_history(
|
||||
scan_id = db.add_scan_history(
|
||||
directory=directory,
|
||||
files_found=len(all_files),
|
||||
files_with_plot=files_with_plot,
|
||||
scan_duration_ms=scan_duration_ms
|
||||
)
|
||||
db.add_scan_files(scan_id, all_files)
|
||||
|
||||
# Load existing suggested matches
|
||||
logger.info("Loading suggested matches from database...")
|
||||
@@ -1520,6 +1635,26 @@ def get_scan_history():
|
||||
}), 500
|
||||
|
||||
|
||||
@app.route('/api/library', methods=['GET'])
|
||||
def get_library_report():
|
||||
"""Get library health report with scan files and issue summaries"""
|
||||
try:
|
||||
limit = request.args.get('limit', 200, type=int)
|
||||
latest_files = db.get_latest_scan_files()
|
||||
latest_results = db.get_latest_file_results()
|
||||
|
||||
return jsonify({
|
||||
"success": True,
|
||||
"items": _build_library_items(latest_files, latest_results, limit)
|
||||
})
|
||||
except Exception as e:
|
||||
logger.error(f"Error fetching library report: {e}")
|
||||
return jsonify({
|
||||
"success": False,
|
||||
"error": str(e)
|
||||
}), 500
|
||||
|
||||
|
||||
@app.route('/api/statistics', methods=['GET'])
|
||||
def get_statistics():
|
||||
"""Get overall statistics"""
|
||||
|
||||
+129
-1
@@ -4,7 +4,7 @@ Handles persistent storage for settings, runs, and history
|
||||
"""
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
from sqlalchemy import create_engine, Column, Integer, String, DateTime, Boolean, Float, Text, ForeignKey
|
||||
from sqlalchemy import create_engine, Column, Integer, String, DateTime, Boolean, Float, Text, ForeignKey, text
|
||||
from sqlalchemy.ext.declarative import declarative_base
|
||||
from sqlalchemy.orm import sessionmaker, relationship, scoped_session
|
||||
import json
|
||||
@@ -85,6 +85,28 @@ class ScanHistory(Base):
|
||||
return f"<ScanHistory(id={self.id}, directory='{self.directory}', files_found={self.files_found})>"
|
||||
|
||||
|
||||
class ScanFile(Base):
|
||||
"""Scan files table - stores file details per scan"""
|
||||
__tablename__ = 'scan_files'
|
||||
|
||||
id = Column(Integer, primary_key=True)
|
||||
scan_id = Column(Integer, ForeignKey('scan_history.id'), nullable=False, index=True)
|
||||
file_path = Column(String(500), nullable=False, index=True)
|
||||
file_name = Column(String(255), nullable=False)
|
||||
title = Column(String(255))
|
||||
year = Column(String(10))
|
||||
has_plot = Column(Boolean, default=False)
|
||||
plot_marker_count = Column(Integer, default=0)
|
||||
status = Column(String(100))
|
||||
summary = Column(Text)
|
||||
created_at = Column(DateTime, default=datetime.utcnow, nullable=False)
|
||||
|
||||
scan = relationship("ScanHistory")
|
||||
|
||||
def __repr__(self):
|
||||
return f"<ScanFile(id={self.id}, file_name='{self.file_name}', has_plot={self.has_plot})>"
|
||||
|
||||
|
||||
class ScheduledScan(Base):
|
||||
"""Scheduled scans table - stores scheduled scan jobs and results"""
|
||||
__tablename__ = 'scheduled_scans'
|
||||
@@ -172,12 +194,32 @@ class DatabaseManager:
|
||||
|
||||
# Create tables if they don't exist
|
||||
Base.metadata.create_all(self.engine)
|
||||
self._ensure_scan_files_schema()
|
||||
logger.info(f"Database initialized at {self.db_path}")
|
||||
|
||||
def get_session(self):
|
||||
"""Get a new database session"""
|
||||
return self.Session()
|
||||
|
||||
def _ensure_scan_files_schema(self):
|
||||
"""Ensure scan_files table has newer columns in existing databases."""
|
||||
session = self.get_session()
|
||||
try:
|
||||
columns = session.execute(text("PRAGMA table_info(scan_files)")).fetchall()
|
||||
if not columns:
|
||||
return
|
||||
existing = {row[1] for row in columns} # column name is index 1
|
||||
if "title" not in existing:
|
||||
session.execute(text("ALTER TABLE scan_files ADD COLUMN title VARCHAR(255)"))
|
||||
if "year" not in existing:
|
||||
session.execute(text("ALTER TABLE scan_files ADD COLUMN year VARCHAR(10)"))
|
||||
session.commit()
|
||||
except Exception as e:
|
||||
session.rollback()
|
||||
logger.error(f"Error migrating scan_files schema: {e}")
|
||||
finally:
|
||||
session.close()
|
||||
|
||||
def close_session(self):
|
||||
"""Close the session"""
|
||||
self.Session.remove()
|
||||
@@ -387,6 +429,7 @@ class DatabaseManager:
|
||||
session.add(scan)
|
||||
session.commit()
|
||||
logger.info(f"Scan history saved for {directory}")
|
||||
return scan.id
|
||||
except Exception as e:
|
||||
session.rollback()
|
||||
logger.error(f"Error saving scan history: {e}")
|
||||
@@ -416,6 +459,91 @@ class DatabaseManager:
|
||||
finally:
|
||||
session.close()
|
||||
|
||||
def add_scan_files(self, scan_id, files):
|
||||
"""Persist file details for a scan"""
|
||||
session = self.get_session()
|
||||
try:
|
||||
for file_info in files:
|
||||
session.add(ScanFile(
|
||||
scan_id=scan_id,
|
||||
file_path=file_info.get("path"),
|
||||
file_name=file_info.get("name"),
|
||||
title=file_info.get("title"),
|
||||
year=file_info.get("year"),
|
||||
has_plot=bool(file_info.get("has_plot")),
|
||||
plot_marker_count=int(file_info.get("plot_marker_count") or 0),
|
||||
status=file_info.get("status"),
|
||||
summary=file_info.get("summary", "")
|
||||
))
|
||||
session.commit()
|
||||
except Exception as e:
|
||||
session.rollback()
|
||||
logger.error(f"Error saving scan files: {e}")
|
||||
raise
|
||||
finally:
|
||||
session.close()
|
||||
|
||||
def get_scan_files(self, scan_id):
|
||||
"""Get all files for a scan"""
|
||||
session = self.get_session()
|
||||
try:
|
||||
files = session.query(ScanFile).filter_by(scan_id=scan_id).all()
|
||||
return [
|
||||
{
|
||||
"path": f.file_path,
|
||||
"name": f.file_name,
|
||||
"title": f.title,
|
||||
"year": f.year,
|
||||
"has_plot": f.has_plot,
|
||||
"plot_marker_count": f.plot_marker_count,
|
||||
"status": f.status,
|
||||
"summary": f.summary
|
||||
}
|
||||
for f in files
|
||||
]
|
||||
finally:
|
||||
session.close()
|
||||
|
||||
def get_latest_scan_files(self):
|
||||
"""Get latest scan entry per file path"""
|
||||
session = self.get_session()
|
||||
try:
|
||||
files = session.query(ScanFile).order_by(ScanFile.created_at.desc()).all()
|
||||
latest = {}
|
||||
for file_entry in files:
|
||||
if file_entry.file_path in latest:
|
||||
continue
|
||||
latest[file_entry.file_path] = {
|
||||
"path": file_entry.file_path,
|
||||
"name": file_entry.file_name,
|
||||
"title": file_entry.title,
|
||||
"year": file_entry.year,
|
||||
"has_plot": file_entry.has_plot,
|
||||
"plot_marker_count": file_entry.plot_marker_count,
|
||||
"status": file_entry.status,
|
||||
"summary": file_entry.summary
|
||||
}
|
||||
return list(latest.values())
|
||||
finally:
|
||||
session.close()
|
||||
|
||||
def get_latest_file_results(self):
|
||||
"""Get latest processing result per file"""
|
||||
session = self.get_session()
|
||||
try:
|
||||
results = session.query(FileResult).order_by(FileResult.processed_at.desc()).all()
|
||||
latest = {}
|
||||
for result in results:
|
||||
if result.file_path not in latest:
|
||||
latest[result.file_path] = {
|
||||
"status": result.status,
|
||||
"error_message": result.error_message,
|
||||
"processed_at": result.processed_at.isoformat() if result.processed_at else None
|
||||
}
|
||||
return latest
|
||||
finally:
|
||||
session.close()
|
||||
|
||||
# ============ SCHEDULED SCAN OPERATIONS ============
|
||||
|
||||
def create_scheduled_scan(self, directory, scheduled_for):
|
||||
|
||||
+19
-18
@@ -24,7 +24,7 @@ logger.addHandler(handler)
|
||||
|
||||
import sys
|
||||
sys.path.insert(0, str(Path(__file__).parent))
|
||||
from subtitle_processor import parse_srt
|
||||
from subtitle_processor import parse_srt, SUBLOGUE_SENTINEL, SUBLOGUE_TOKEN_PATTERN
|
||||
|
||||
|
||||
class FileScanner:
|
||||
@@ -114,7 +114,8 @@ class FileScanner:
|
||||
# --------------------------------------------
|
||||
|
||||
try:
|
||||
has_plot = cls._check_has_plot(file_path)
|
||||
plot_marker_count = cls._count_plot_markers(file_path)
|
||||
has_plot = plot_marker_count > 0
|
||||
logger.debug(
|
||||
"Plot check for %s: %s",
|
||||
file_path.name,
|
||||
@@ -148,6 +149,8 @@ class FileScanner:
|
||||
"path": str(file_path),
|
||||
"name": file_path.name,
|
||||
"has_plot": has_plot,
|
||||
"plot_marker_count": plot_marker_count,
|
||||
"duplicate_plot": plot_marker_count > 1,
|
||||
"status": "Has Plot" if has_plot else "Not Loaded",
|
||||
"summary": metadata.get("summary", ""),
|
||||
"plot": metadata.get("summary", ""),
|
||||
@@ -216,30 +219,25 @@ class FileScanner:
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def _check_has_plot(cls, file_path: Path) -> bool:
|
||||
def _count_plot_markers(cls, file_path: Path) -> int:
|
||||
"""
|
||||
Check first N lines for Sublogue signature.
|
||||
Count Sublogue plot markers to detect duplicates.
|
||||
"""
|
||||
logger.debug("Scanning for plot marker in %s", file_path.name)
|
||||
logger.debug("Scanning for plot markers in %s", file_path.name)
|
||||
|
||||
try:
|
||||
with file_path.open("r", encoding="utf-8", errors="ignore") as f:
|
||||
for i, line in enumerate(f):
|
||||
if i >= cls.PLOT_SCAN_LINES:
|
||||
break
|
||||
if "generated by sublogue" in line.lower():
|
||||
logger.debug(
|
||||
"Plot marker found in %s (line %d)",
|
||||
file_path.name, i + 1
|
||||
)
|
||||
return True
|
||||
content = file_path.read_text(encoding="utf-8", errors="ignore")
|
||||
lower_content = content.lower()
|
||||
generated_count = lower_content.count("generated by sublogue")
|
||||
if generated_count > 0:
|
||||
return generated_count
|
||||
return content.count(SUBLOGUE_SENTINEL)
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
"Error reading file during plot scan: %s (%s)",
|
||||
file_path, e
|
||||
)
|
||||
|
||||
return False
|
||||
return 0
|
||||
|
||||
@classmethod
|
||||
def _extract_metadata(cls, file_path: Path) -> Dict:
|
||||
@@ -270,6 +268,7 @@ class FileScanner:
|
||||
|
||||
plot_text = blocks[1].text
|
||||
plot_text = plot_text.split("Generated by Sublogue")[0].strip()
|
||||
plot_text = SUBLOGUE_TOKEN_PATTERN.sub("", plot_text).strip()
|
||||
metadata["summary"] = plot_text
|
||||
|
||||
# --------------------------------------------
|
||||
@@ -279,7 +278,9 @@ class FileScanner:
|
||||
header_lines = blocks[0].text.split("\n")
|
||||
|
||||
if header_lines:
|
||||
first_line = header_lines[0]
|
||||
first_line = header_lines[0].strip()
|
||||
if first_line == SUBLOGUE_SENTINEL and len(header_lines) > 1:
|
||||
first_line = header_lines[1].strip()
|
||||
year_match = re.search(r"\((\d{4})\)", first_line)
|
||||
if year_match:
|
||||
metadata["year"] = year_match.group(1)
|
||||
|
||||
@@ -1463,6 +1463,14 @@ class SubtitleProcessor:
|
||||
)
|
||||
|
||||
# ─────────────────────────────────────────────────────────────
|
||||
if insertion_position != "end" and not intro_blocks:
|
||||
return {
|
||||
"success": False,
|
||||
"error": "Insufficient gap before first subtitle",
|
||||
"status": "Insufficient Gap",
|
||||
"summary": ""
|
||||
}
|
||||
|
||||
# PHASE 5: Combine intro + original subtitles
|
||||
#
|
||||
# NOTE: We're ONLY renumbering indices (1, 2, 3...), NOT timestamps!
|
||||
|
||||
Reference in New Issue
Block a user