167 lines
5.4 KiB
Python
167 lines
5.4 KiB
Python
from __future__ import annotations
|
|
|
|
from pathlib import Path
|
|
from typing import Iterable, List, Tuple
|
|
|
|
from logging_utils import get_logger
|
|
from core.subtitle_processor import SubtitleBlock, parse_srt, format_srt
|
|
|
|
logger = get_logger(__name__)
|
|
|
|
|
|
# ======================================================================
|
|
# ENUMERATE ALL .srt FILES IN PROVIDED FOLDERS
|
|
# ======================================================================
|
|
def enumerate_srt_files(folders: Iterable[str]) -> List[Path]:
|
|
"""
|
|
Recursively enumerate all .srt files in the given folders.
|
|
"""
|
|
files: List[Path] = []
|
|
logger.info("Starting SRT file enumeration...")
|
|
|
|
for folder in folders:
|
|
logger.debug("Inspecting provided folder entry: %r", folder)
|
|
|
|
if not folder:
|
|
logger.debug("Skipping empty folder entry.")
|
|
continue
|
|
|
|
path = Path(folder)
|
|
|
|
if not path.exists():
|
|
logger.warning("Automation folder does not exist: %s", folder)
|
|
continue
|
|
|
|
if not path.is_dir():
|
|
logger.warning("Path is not a directory, skipping: %s", folder)
|
|
continue
|
|
|
|
logger.info("Scanning folder recursively: %s", folder)
|
|
found = [p for p in path.rglob("*.srt") if p.is_file()]
|
|
|
|
logger.info("Found %d SRT files in %s", len(found), folder)
|
|
|
|
files.extend(found)
|
|
|
|
logger.info("Finished enumeration. Total SRT files: %d", len(files))
|
|
return files
|
|
|
|
|
|
# ======================================================================
|
|
# REMOVE SUBTITLE LINES MATCHING PATTERNS
|
|
# ======================================================================
|
|
def remove_lines_matching_patterns(
|
|
file_path: str,
|
|
patterns: List[str],
|
|
dry_run: bool = False
|
|
) -> Tuple[bool, int]:
|
|
"""
|
|
Remove any subtitle lines that contain any of the specified patterns.
|
|
"""
|
|
logger.info("Starting removal process for file: %s", file_path)
|
|
|
|
if not patterns:
|
|
logger.warning("No patterns provided — skipping file.")
|
|
return False, 0
|
|
|
|
# Preprocess patterns
|
|
lowered_patterns = [p.lower().strip() for p in patterns if p]
|
|
logger.debug("Normalized matching patterns: %s", lowered_patterns)
|
|
|
|
path = Path(file_path)
|
|
|
|
if not path.exists():
|
|
logger.error("File not found: %s", file_path)
|
|
raise FileNotFoundError(f"File not found: {file_path}")
|
|
|
|
# Read file
|
|
logger.debug("Reading SRT file...")
|
|
content = path.read_text(encoding="utf-8", errors="ignore")
|
|
|
|
logger.debug("Parsing SRT blocks...")
|
|
blocks = parse_srt(content)
|
|
logger.info("Parsed %d subtitle blocks from file.", len(blocks))
|
|
|
|
removed_lines = 0
|
|
updated_blocks: List[SubtitleBlock] = []
|
|
|
|
# ------------------------------------------------------------------
|
|
# Process blocks
|
|
# ------------------------------------------------------------------
|
|
for block in blocks:
|
|
logger.debug("Processing block #%d (%s → %s)",
|
|
block.index, block.start_time, block.end_time)
|
|
|
|
lines = block.text.splitlines()
|
|
kept_lines = []
|
|
|
|
for line in lines:
|
|
line_lower = line.lower()
|
|
|
|
# Log each check
|
|
match_hit = any(pattern in line_lower for pattern in lowered_patterns)
|
|
|
|
if match_hit:
|
|
removed_lines += 1
|
|
logger.debug(
|
|
"Removing line in block %d: %r (matched pattern)",
|
|
block.index, line
|
|
)
|
|
continue
|
|
|
|
kept_lines.append(line)
|
|
|
|
if kept_lines:
|
|
logger.debug(
|
|
"Block %d kept with %d/%d lines remaining.",
|
|
block.index, len(kept_lines), len(lines)
|
|
)
|
|
updated_blocks.append(
|
|
SubtitleBlock(
|
|
index=block.index,
|
|
start_time=block.start_time,
|
|
end_time=block.end_time,
|
|
text="\n".join(kept_lines).strip(),
|
|
)
|
|
)
|
|
else:
|
|
logger.debug("Block %d removed entirely — all lines matched patterns.", block.index)
|
|
|
|
# ------------------------------------------------------------------
|
|
# No changes detected
|
|
# ------------------------------------------------------------------
|
|
if removed_lines == 0:
|
|
logger.info("No lines removed from file: %s", file_path)
|
|
return False, 0
|
|
|
|
logger.info(
|
|
"Removed %d lines across SRT blocks (%d remaining blocks → will renumber).",
|
|
removed_lines, len(updated_blocks)
|
|
)
|
|
|
|
# ------------------------------------------------------------------
|
|
# Renumber blocks
|
|
# ------------------------------------------------------------------
|
|
renumbered = [
|
|
SubtitleBlock(i + 1, b.start_time, b.end_time, b.text)
|
|
for i, b in enumerate(updated_blocks)
|
|
]
|
|
|
|
logger.debug(
|
|
"Renumbered blocks: old count=%d, new count=%d",
|
|
len(blocks), len(renumbered)
|
|
)
|
|
|
|
# ------------------------------------------------------------------
|
|
# Write changes
|
|
# ------------------------------------------------------------------
|
|
if dry_run:
|
|
logger.info("Dry-run mode — changes NOT written to disk for: %s", file_path)
|
|
else:
|
|
logger.info("Writing updated SRT file to disk: %s", file_path)
|
|
path.write_text(format_srt(renumbered), encoding="utf-8")
|
|
|
|
logger.info("Completed processing for file: %s", file_path)
|
|
|
|
return True, removed_lines
|