This commit is contained in:
ponzischeme89
2026-01-29 15:15:45 +13:00
parent abc15e63d6
commit 10b3d2277b
3 changed files with 187 additions and 13 deletions
+99 -7
View File
@@ -9,48 +9,113 @@ from core.subtitle_processor import SubtitleBlock, parse_srt, format_srt
logger = get_logger(__name__)
# ======================================================================
# ENUMERATE ALL .srt FILES IN PROVIDED FOLDERS
# ======================================================================
def enumerate_srt_files(folders: Iterable[str]) -> List[Path]:
"""
Recursively enumerate all .srt files in the given folders.
"""
files: List[Path] = []
logger.info("Starting SRT file enumeration...")
for folder in folders:
logger.debug("Inspecting provided folder entry: %r", folder)
if not folder:
logger.debug("Skipping empty folder entry.")
continue
path = Path(folder)
if not path.exists():
logger.warning("Automation folder does not exist: %s", folder)
continue
if not path.is_dir():
logger.warning("Path is not a directory, skipping: %s", folder)
continue
files.extend([p for p in path.rglob("*.srt") if p.is_file()])
logger.info("Scanning folder recursively: %s", folder)
found = [p for p in path.rglob("*.srt") if p.is_file()]
logger.info("Found %d SRT files in %s", len(found), folder)
files.extend(found)
logger.info("Finished enumeration. Total SRT files: %d", len(files))
return files
def remove_lines_matching_patterns(file_path: str, patterns: List[str], dry_run: bool = False) -> Tuple[bool, int]:
"""Remove subtitle lines containing any of the provided patterns."""
# ======================================================================
# REMOVE SUBTITLE LINES MATCHING PATTERNS
# ======================================================================
def remove_lines_matching_patterns(
file_path: str,
patterns: List[str],
dry_run: bool = False
) -> Tuple[bool, int]:
"""
Remove any subtitle lines that contain any of the specified patterns.
"""
logger.info("Starting removal process for file: %s", file_path)
if not patterns:
logger.warning("No patterns provided — skipping file.")
return False, 0
# Preprocess patterns
lowered_patterns = [p.lower().strip() for p in patterns if p]
logger.debug("Normalized matching patterns: %s", lowered_patterns)
path = Path(file_path)
if not path.exists():
logger.error("File not found: %s", file_path)
raise FileNotFoundError(f"File not found: {file_path}")
# Read file
logger.debug("Reading SRT file...")
content = path.read_text(encoding="utf-8", errors="ignore")
blocks = parse_srt(content)
lowered_patterns = [p.lower() for p in patterns if p]
logger.debug("Parsing SRT blocks...")
blocks = parse_srt(content)
logger.info("Parsed %d subtitle blocks from file.", len(blocks))
removed_lines = 0
updated_blocks: List[SubtitleBlock] = []
# ------------------------------------------------------------------
# Process blocks
# ------------------------------------------------------------------
for block in blocks:
logger.debug("Processing block #%d (%s%s)",
block.index, block.start_time, block.end_time)
lines = block.text.splitlines()
kept_lines = []
for line in lines:
line_lower = line.lower()
if any(pattern in line_lower for pattern in lowered_patterns):
# Log each check
match_hit = any(pattern in line_lower for pattern in lowered_patterns)
if match_hit:
removed_lines += 1
logger.debug(
"Removing line in block %d: %r (matched pattern)",
block.index, line
)
continue
kept_lines.append(line)
if kept_lines:
logger.debug(
"Block %d kept with %d/%d lines remaining.",
block.index, len(kept_lines), len(lines)
)
updated_blocks.append(
SubtitleBlock(
index=block.index,
@@ -59,16 +124,43 @@ def remove_lines_matching_patterns(file_path: str, patterns: List[str], dry_run:
text="\n".join(kept_lines).strip(),
)
)
else:
logger.debug("Block %d removed entirely — all lines matched patterns.", block.index)
# ------------------------------------------------------------------
# No changes detected
# ------------------------------------------------------------------
if removed_lines == 0:
logger.info("No lines removed from file: %s", file_path)
return False, 0
logger.info(
"Removed %d lines across SRT blocks (%d remaining blocks → will renumber).",
removed_lines, len(updated_blocks)
)
# ------------------------------------------------------------------
# Renumber blocks
# ------------------------------------------------------------------
renumbered = [
SubtitleBlock(i + 1, b.start_time, b.end_time, b.text)
for i, b in enumerate(updated_blocks)
]
if not dry_run:
logger.debug(
"Renumbered blocks: old count=%d, new count=%d",
len(blocks), len(renumbered)
)
# ------------------------------------------------------------------
# Write changes
# ------------------------------------------------------------------
if dry_run:
logger.info("Dry-run mode — changes NOT written to disk for: %s", file_path)
else:
logger.info("Writing updated SRT file to disk: %s", file_path)
path.write_text(format_srt(renumbered), encoding="utf-8")
logger.info("Completed processing for file: %s", file_path)
return True, removed_lines
+87 -5
View File
@@ -14,61 +14,102 @@ logger = get_logger(__name__)
class AutomationEngine:
"""Scheduler for automation rules."""
"""Scheduler for automation rules with aggressive diagnostic logging."""
def __init__(self, db_manager):
self.db_manager = db_manager
self._scheduler = BackgroundScheduler(daemon=True)
self._lock = threading.Lock()
self._started = False
logger.debug("AutomationEngine initialized with db_manager=%s", type(db_manager).__name__)
# ------------------------------------------------------------------
# START / STOP
# ------------------------------------------------------------------
def start(self):
with self._lock:
if self._started:
logger.warning("Automation scheduler start() called, but it's already started.")
return
logger.info("Starting automation scheduler...")
self._scheduler.start()
self._started = True
self.reload_rules()
logger.info("Automation scheduler started")
logger.info("Automation scheduler successfully started.")
def shutdown(self):
with self._lock:
if not self._started:
logger.warning("shutdown() called but scheduler was not started.")
return
logger.info("Shutting down automation scheduler...")
self._scheduler.shutdown(wait=False)
self._started = False
logger.info("Automation scheduler stopped")
logger.info("Automation scheduler fully stopped.")
# ------------------------------------------------------------------
# RULE MANAGEMENT
# ------------------------------------------------------------------
def reload_rules(self):
"""Reload all automation rules from storage."""
logger.info("Reloading automation rules from database...")
with self._lock:
self._scheduler.remove_all_jobs()
logger.debug("Cleared all scheduled jobs.")
rules = self._load_rules()
logger.info("Loaded %d automation rules.", len(rules))
for rule in rules:
logger.debug("Evaluating rule %s (enabled=%s, schedule=%s)", rule.id, rule.enabled, rule.schedule)
if not rule.enabled:
logger.info("Rule %s is disabled — skipping scheduling.", rule.id)
continue
self._schedule_rule(rule)
def run_rule_now(self, rule_id: str, dry_run: bool = False) -> dict:
logger.info("Manual execution requested for rule %s (dry_run=%s)", rule_id, dry_run)
rule = self._get_rule(rule_id)
if not rule:
logger.error("Cannot execute — rule %s not found.", rule_id)
return {"success": False, "error": "Rule not found"}
return self._execute_rule(rule, dry_run=dry_run)
def _load_rules(self) -> List[AutomationRule]:
logger.debug("Fetching automation rules from database...")
rules_raw = self.db_manager.get_automation_rules()
return [AutomationRule.from_dict(r) for r in rules_raw]
rules = [AutomationRule.from_dict(r) for r in rules_raw]
logger.debug("Database returned %d rules.", len(rules))
return rules
def _get_rule(self, rule_id: str) -> Optional[AutomationRule]:
logger.debug("Fetching rule %s from database...", rule_id)
raw = self.db_manager.get_automation_rule(rule_id)
if raw:
logger.debug("Rule %s found.", rule_id)
else:
logger.warning("Rule %s not found.", rule_id)
return AutomationRule.from_dict(raw) if raw else None
# ------------------------------------------------------------------
# SCHEDULING
# ------------------------------------------------------------------
def _schedule_rule(self, rule: AutomationRule):
logger.info("Scheduling rule %s with cron: %s", rule.id, rule.schedule)
try:
trigger = CronTrigger.from_crontab(rule.schedule)
except ValueError as e:
logger.error("Invalid cron schedule for rule %s: %s", rule.id, e)
return
self._scheduler.add_job(
self._run_rule_job,
trigger=trigger,
@@ -79,28 +120,61 @@ class AutomationEngine:
max_instances=1,
)
logger.info("Rule %s scheduled successfully.", rule.id)
# ------------------------------------------------------------------
# JOB EXECUTION
# ------------------------------------------------------------------
def _run_rule_job(self, rule_id: str):
logger.info("Executing scheduled job for rule %s...", rule_id)
rule = self._get_rule(rule_id)
if not rule or not rule.enabled:
if not rule:
logger.error("Scheduled rule %s no longer exists in database.", rule_id)
return
if not rule.enabled:
logger.info("Scheduled rule %s is now disabled — skipping execution.", rule_id)
return
self._execute_rule(rule, dry_run=False)
# ------------------------------------------------------------------
# CORE EXECUTION LOGIC
# ------------------------------------------------------------------
def _execute_rule(self, rule: AutomationRule, dry_run: bool) -> dict:
logger.info(
"Executing rule %s (dry_run=%s). Target folders: %s | Patterns: %s",
rule.id, dry_run, rule.target_folders, rule.patterns
)
files = enumerate_srt_files(rule.target_folders)
logger.info("Rule %s scanning %d SRT files...", rule.id, len(files))
modified = 0
total_removed = 0
errors: List[str] = []
for file_path in files:
logger.debug("Processing file: %s", file_path)
try:
did_modify, removed_lines = remove_lines_matching_patterns(
str(file_path),
rule.patterns,
dry_run=dry_run,
)
logger.debug(
"File processed: modified=%s, removed_lines=%d, path=%s",
did_modify, removed_lines, file_path
)
if did_modify:
modified += 1
total_removed += removed_lines
# log into DB
self.db_manager.add_automation_log(
rule_id=rule.id,
file_path=str(file_path),
@@ -109,8 +183,11 @@ class AutomationEngine:
dry_run=dry_run,
error_message=None,
)
except Exception as e:
logger.exception("Error while processing file %s under rule %s", file_path, rule.id)
errors.append(f"{file_path}: {e}")
self.db_manager.add_automation_log(
rule_id=rule.id,
file_path=str(file_path),
@@ -120,6 +197,11 @@ class AutomationEngine:
error_message=str(e),
)
logger.info(
"Finished executing rule %s. Files scanned=%d | Modified=%d | Removed lines=%d | Errors=%d",
rule.id, len(files), modified, total_removed, len(errors)
)
return {
"success": True,
"rule_id": rule.id,