1.1.0 - automations, clean only mode, bug fixes

2026-01-19 02:10:08 +13:00
parent 93e8b38e24
commit 9345ac4331
25 changed files with 2690 additions and 499 deletions
@@ -1373,6 +1373,8 @@ class SubtitleProcessor:
                # ─────────────────────────────────────────────────────────────
                original = file_path.read_text(encoding="utf-8", errors="ignore")
                subs = parse_srt(original)
+                stripper = get_stripper()
+                detected_keywords = stripper.detect_subtitle_watermarks(original)

                if not subs:
                    return self._fail("No valid subtitle blocks found")
@@ -1540,6 +1542,116 @@ class SubtitleProcessor:
            logger.error(f"Could not acquire lock for {file_path.name}: {e}")
            return self._fail(f"File is being processed by another task: {e}")

+    def clean_file(
+        self,
+        file_path: str | Path,
+        clean_subtitle_content: bool = True,
+    ) -> dict:
+        """Clean ad/watermark content from a subtitle file without inserting plots."""
+        file_path = Path(file_path)
+
+        if not file_path.exists():
+            return self._fail("File not found")
+
+        if file_path.stat().st_size > self.MAX_SRT_BYTES:
+            return self._fail("Subtitle file too large")
+
+        try:
+            with file_lock(file_path, timeout=30.0):
+                original = file_path.read_text(encoding="utf-8", errors="ignore")
+                subs = parse_srt(original)
+
+                if not subs:
+                    return self._fail("No valid subtitle blocks found")
+
+                original_blocks = subs
+                removed_count = 0
+                modified_count = 0
+
+                if clean_subtitle_content:
+                    cleaned_blocks: List[SubtitleBlock] = []
+
+                    for block in original_blocks:
+                        text = block.text
+                        if stripper.should_remove_subtitle_block(text):
+                            removed_count += 1
+                            continue
+
+                        cleaned_text = stripper.clean_subtitle_text(text)
+                        if not cleaned_text.strip():
+                            removed_count += 1
+                            continue
+
+                        if cleaned_text != text:
+                            modified_count += 1
+
+                        cleaned_blocks.append(
+                            SubtitleBlock(
+                                block.index,
+                                block.start_time,
+                                block.end_time,
+                                cleaned_text,
+                            )
+                        )
+                else:
+                    cleaned_blocks = list(original_blocks)
+
+                sanitized = sanitize_all_blocks(cleaned_blocks)
+                if len(sanitized) < len(cleaned_blocks):
+                    removed_count += len(cleaned_blocks) - len(sanitized)
+
+                if not sanitized:
+                    return self._fail("No dialogue subtitles found after cleaning")
+
+                renumbered = [
+                    SubtitleBlock(i + 1, b.start_time, b.end_time, b.text)
+                    for i, b in enumerate(sanitized)
+                ]
+
+                changed = len(renumbered) != len(original_blocks)
+                if not changed:
+                    for updated, original_block in zip(renumbered, original_blocks):
+                        if (
+                            updated.start_time != original_block.start_time
+                            or updated.end_time != original_block.end_time
+                            or updated.text != original_block.text
+                        ):
+                            changed = True
+                            break
+
+                if not changed:
+                    return {
+                        "success": True,
+                        "status": "Skipped",
+                        "summary": "No changes needed",
+                        "removed_blocks": 0,
+                        "modified_blocks": 0,
+                        "clean_keywords": detected_keywords,
+                    }
+
+                tmp = file_path.with_suffix(".srt.tmp")
+                tmp.write_text(format_srt(renumbered), encoding="utf-8")
+                tmp.replace(file_path)
+
+                summary = (
+                    f"Removed {removed_count} ad blocks, modified {modified_count} blocks"
+                    if clean_subtitle_content
+                    else "Cleaned subtitle content"
+                )
+
+                return {
+                    "success": True,
+                    "status": "Cleaned",
+                    "summary": summary,
+                    "removed_blocks": removed_count,
+                    "modified_blocks": modified_count,
+                    "clean_keywords": detected_keywords,
+                }
+
+        except FileLockError as e:
+            logger.error(f"Could not acquire lock for {file_path.name}: {e}")
+            return self._fail(f"File is being processed by another task: {e}")
+
    # ========================================================
    # Metadata fetching
    # ========================================================