1.0.0.7 - Matching improves, added library page. Removed schedule scans support
This commit is contained in:
+129
-1
@@ -4,7 +4,7 @@ Handles persistent storage for settings, runs, and history
|
||||
"""
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
from sqlalchemy import create_engine, Column, Integer, String, DateTime, Boolean, Float, Text, ForeignKey
|
||||
from sqlalchemy import create_engine, Column, Integer, String, DateTime, Boolean, Float, Text, ForeignKey, text
|
||||
from sqlalchemy.ext.declarative import declarative_base
|
||||
from sqlalchemy.orm import sessionmaker, relationship, scoped_session
|
||||
import json
|
||||
@@ -85,6 +85,28 @@ class ScanHistory(Base):
|
||||
return f"<ScanHistory(id={self.id}, directory='{self.directory}', files_found={self.files_found})>"
|
||||
|
||||
|
||||
class ScanFile(Base):
|
||||
"""Scan files table - stores file details per scan"""
|
||||
__tablename__ = 'scan_files'
|
||||
|
||||
id = Column(Integer, primary_key=True)
|
||||
scan_id = Column(Integer, ForeignKey('scan_history.id'), nullable=False, index=True)
|
||||
file_path = Column(String(500), nullable=False, index=True)
|
||||
file_name = Column(String(255), nullable=False)
|
||||
title = Column(String(255))
|
||||
year = Column(String(10))
|
||||
has_plot = Column(Boolean, default=False)
|
||||
plot_marker_count = Column(Integer, default=0)
|
||||
status = Column(String(100))
|
||||
summary = Column(Text)
|
||||
created_at = Column(DateTime, default=datetime.utcnow, nullable=False)
|
||||
|
||||
scan = relationship("ScanHistory")
|
||||
|
||||
def __repr__(self):
|
||||
return f"<ScanFile(id={self.id}, file_name='{self.file_name}', has_plot={self.has_plot})>"
|
||||
|
||||
|
||||
class ScheduledScan(Base):
|
||||
"""Scheduled scans table - stores scheduled scan jobs and results"""
|
||||
__tablename__ = 'scheduled_scans'
|
||||
@@ -172,12 +194,32 @@ class DatabaseManager:
|
||||
|
||||
# Create tables if they don't exist
|
||||
Base.metadata.create_all(self.engine)
|
||||
self._ensure_scan_files_schema()
|
||||
logger.info(f"Database initialized at {self.db_path}")
|
||||
|
||||
def get_session(self):
|
||||
"""Get a new database session"""
|
||||
return self.Session()
|
||||
|
||||
def _ensure_scan_files_schema(self):
|
||||
"""Ensure scan_files table has newer columns in existing databases."""
|
||||
session = self.get_session()
|
||||
try:
|
||||
columns = session.execute(text("PRAGMA table_info(scan_files)")).fetchall()
|
||||
if not columns:
|
||||
return
|
||||
existing = {row[1] for row in columns} # column name is index 1
|
||||
if "title" not in existing:
|
||||
session.execute(text("ALTER TABLE scan_files ADD COLUMN title VARCHAR(255)"))
|
||||
if "year" not in existing:
|
||||
session.execute(text("ALTER TABLE scan_files ADD COLUMN year VARCHAR(10)"))
|
||||
session.commit()
|
||||
except Exception as e:
|
||||
session.rollback()
|
||||
logger.error(f"Error migrating scan_files schema: {e}")
|
||||
finally:
|
||||
session.close()
|
||||
|
||||
def close_session(self):
|
||||
"""Close the session"""
|
||||
self.Session.remove()
|
||||
@@ -387,6 +429,7 @@ class DatabaseManager:
|
||||
session.add(scan)
|
||||
session.commit()
|
||||
logger.info(f"Scan history saved for {directory}")
|
||||
return scan.id
|
||||
except Exception as e:
|
||||
session.rollback()
|
||||
logger.error(f"Error saving scan history: {e}")
|
||||
@@ -416,6 +459,91 @@ class DatabaseManager:
|
||||
finally:
|
||||
session.close()
|
||||
|
||||
def add_scan_files(self, scan_id, files):
|
||||
"""Persist file details for a scan"""
|
||||
session = self.get_session()
|
||||
try:
|
||||
for file_info in files:
|
||||
session.add(ScanFile(
|
||||
scan_id=scan_id,
|
||||
file_path=file_info.get("path"),
|
||||
file_name=file_info.get("name"),
|
||||
title=file_info.get("title"),
|
||||
year=file_info.get("year"),
|
||||
has_plot=bool(file_info.get("has_plot")),
|
||||
plot_marker_count=int(file_info.get("plot_marker_count") or 0),
|
||||
status=file_info.get("status"),
|
||||
summary=file_info.get("summary", "")
|
||||
))
|
||||
session.commit()
|
||||
except Exception as e:
|
||||
session.rollback()
|
||||
logger.error(f"Error saving scan files: {e}")
|
||||
raise
|
||||
finally:
|
||||
session.close()
|
||||
|
||||
def get_scan_files(self, scan_id):
|
||||
"""Get all files for a scan"""
|
||||
session = self.get_session()
|
||||
try:
|
||||
files = session.query(ScanFile).filter_by(scan_id=scan_id).all()
|
||||
return [
|
||||
{
|
||||
"path": f.file_path,
|
||||
"name": f.file_name,
|
||||
"title": f.title,
|
||||
"year": f.year,
|
||||
"has_plot": f.has_plot,
|
||||
"plot_marker_count": f.plot_marker_count,
|
||||
"status": f.status,
|
||||
"summary": f.summary
|
||||
}
|
||||
for f in files
|
||||
]
|
||||
finally:
|
||||
session.close()
|
||||
|
||||
def get_latest_scan_files(self):
|
||||
"""Get latest scan entry per file path"""
|
||||
session = self.get_session()
|
||||
try:
|
||||
files = session.query(ScanFile).order_by(ScanFile.created_at.desc()).all()
|
||||
latest = {}
|
||||
for file_entry in files:
|
||||
if file_entry.file_path in latest:
|
||||
continue
|
||||
latest[file_entry.file_path] = {
|
||||
"path": file_entry.file_path,
|
||||
"name": file_entry.file_name,
|
||||
"title": file_entry.title,
|
||||
"year": file_entry.year,
|
||||
"has_plot": file_entry.has_plot,
|
||||
"plot_marker_count": file_entry.plot_marker_count,
|
||||
"status": file_entry.status,
|
||||
"summary": file_entry.summary
|
||||
}
|
||||
return list(latest.values())
|
||||
finally:
|
||||
session.close()
|
||||
|
||||
def get_latest_file_results(self):
|
||||
"""Get latest processing result per file"""
|
||||
session = self.get_session()
|
||||
try:
|
||||
results = session.query(FileResult).order_by(FileResult.processed_at.desc()).all()
|
||||
latest = {}
|
||||
for result in results:
|
||||
if result.file_path not in latest:
|
||||
latest[result.file_path] = {
|
||||
"status": result.status,
|
||||
"error_message": result.error_message,
|
||||
"processed_at": result.processed_at.isoformat() if result.processed_at else None
|
||||
}
|
||||
return latest
|
||||
finally:
|
||||
session.close()
|
||||
|
||||
# ============ SCHEDULED SCAN OPERATIONS ============
|
||||
|
||||
def create_scheduled_scan(self, directory, scheduled_for):
|
||||
|
||||
+19
-18
@@ -24,7 +24,7 @@ logger.addHandler(handler)
|
||||
|
||||
import sys
|
||||
sys.path.insert(0, str(Path(__file__).parent))
|
||||
from subtitle_processor import parse_srt
|
||||
from subtitle_processor import parse_srt, SUBLOGUE_SENTINEL, SUBLOGUE_TOKEN_PATTERN
|
||||
|
||||
|
||||
class FileScanner:
|
||||
@@ -114,7 +114,8 @@ class FileScanner:
|
||||
# --------------------------------------------
|
||||
|
||||
try:
|
||||
has_plot = cls._check_has_plot(file_path)
|
||||
plot_marker_count = cls._count_plot_markers(file_path)
|
||||
has_plot = plot_marker_count > 0
|
||||
logger.debug(
|
||||
"Plot check for %s: %s",
|
||||
file_path.name,
|
||||
@@ -148,6 +149,8 @@ class FileScanner:
|
||||
"path": str(file_path),
|
||||
"name": file_path.name,
|
||||
"has_plot": has_plot,
|
||||
"plot_marker_count": plot_marker_count,
|
||||
"duplicate_plot": plot_marker_count > 1,
|
||||
"status": "Has Plot" if has_plot else "Not Loaded",
|
||||
"summary": metadata.get("summary", ""),
|
||||
"plot": metadata.get("summary", ""),
|
||||
@@ -216,30 +219,25 @@ class FileScanner:
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def _check_has_plot(cls, file_path: Path) -> bool:
|
||||
def _count_plot_markers(cls, file_path: Path) -> int:
|
||||
"""
|
||||
Check first N lines for Sublogue signature.
|
||||
Count Sublogue plot markers to detect duplicates.
|
||||
"""
|
||||
logger.debug("Scanning for plot marker in %s", file_path.name)
|
||||
logger.debug("Scanning for plot markers in %s", file_path.name)
|
||||
|
||||
try:
|
||||
with file_path.open("r", encoding="utf-8", errors="ignore") as f:
|
||||
for i, line in enumerate(f):
|
||||
if i >= cls.PLOT_SCAN_LINES:
|
||||
break
|
||||
if "generated by sublogue" in line.lower():
|
||||
logger.debug(
|
||||
"Plot marker found in %s (line %d)",
|
||||
file_path.name, i + 1
|
||||
)
|
||||
return True
|
||||
content = file_path.read_text(encoding="utf-8", errors="ignore")
|
||||
lower_content = content.lower()
|
||||
generated_count = lower_content.count("generated by sublogue")
|
||||
if generated_count > 0:
|
||||
return generated_count
|
||||
return content.count(SUBLOGUE_SENTINEL)
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
"Error reading file during plot scan: %s (%s)",
|
||||
file_path, e
|
||||
)
|
||||
|
||||
return False
|
||||
return 0
|
||||
|
||||
@classmethod
|
||||
def _extract_metadata(cls, file_path: Path) -> Dict:
|
||||
@@ -270,6 +268,7 @@ class FileScanner:
|
||||
|
||||
plot_text = blocks[1].text
|
||||
plot_text = plot_text.split("Generated by Sublogue")[0].strip()
|
||||
plot_text = SUBLOGUE_TOKEN_PATTERN.sub("", plot_text).strip()
|
||||
metadata["summary"] = plot_text
|
||||
|
||||
# --------------------------------------------
|
||||
@@ -279,7 +278,9 @@ class FileScanner:
|
||||
header_lines = blocks[0].text.split("\n")
|
||||
|
||||
if header_lines:
|
||||
first_line = header_lines[0]
|
||||
first_line = header_lines[0].strip()
|
||||
if first_line == SUBLOGUE_SENTINEL and len(header_lines) > 1:
|
||||
first_line = header_lines[1].strip()
|
||||
year_match = re.search(r"\((\d{4})\)", first_line)
|
||||
if year_match:
|
||||
metadata["year"] = year_match.group(1)
|
||||
|
||||
@@ -1463,6 +1463,14 @@ class SubtitleProcessor:
|
||||
)
|
||||
|
||||
# ─────────────────────────────────────────────────────────────
|
||||
if insertion_position != "end" and not intro_blocks:
|
||||
return {
|
||||
"success": False,
|
||||
"error": "Insufficient gap before first subtitle",
|
||||
"status": "Insufficient Gap",
|
||||
"summary": ""
|
||||
}
|
||||
|
||||
# PHASE 5: Combine intro + original subtitles
|
||||
#
|
||||
# NOTE: We're ONLY renumbering indices (1, 2, 3...), NOT timestamps!
|
||||
|
||||
Reference in New Issue
Block a user