Files

167 lines
5.4 KiB
Python
Raw Permalink Normal View History

from __future__ import annotations
from pathlib import Path
from typing import Iterable, List, Tuple
from logging_utils import get_logger
from core.subtitle_processor import SubtitleBlock, parse_srt, format_srt
logger = get_logger(__name__)
2026-01-29 15:15:45 +13:00
# ======================================================================
# ENUMERATE ALL .srt FILES IN PROVIDED FOLDERS
# ======================================================================
def enumerate_srt_files(folders: Iterable[str]) -> List[Path]:
2026-01-29 15:15:45 +13:00
"""
Recursively enumerate all .srt files in the given folders.
"""
files: List[Path] = []
2026-01-29 15:15:45 +13:00
logger.info("Starting SRT file enumeration...")
for folder in folders:
2026-01-29 15:15:45 +13:00
logger.debug("Inspecting provided folder entry: %r", folder)
if not folder:
2026-01-29 15:15:45 +13:00
logger.debug("Skipping empty folder entry.")
continue
2026-01-29 15:15:45 +13:00
path = Path(folder)
2026-01-29 15:15:45 +13:00
if not path.exists():
logger.warning("Automation folder does not exist: %s", folder)
continue
2026-01-29 15:15:45 +13:00
if not path.is_dir():
2026-01-29 15:15:45 +13:00
logger.warning("Path is not a directory, skipping: %s", folder)
continue
2026-01-29 15:15:45 +13:00
logger.info("Scanning folder recursively: %s", folder)
found = [p for p in path.rglob("*.srt") if p.is_file()]
logger.info("Found %d SRT files in %s", len(found), folder)
files.extend(found)
logger.info("Finished enumeration. Total SRT files: %d", len(files))
return files
2026-01-29 15:15:45 +13:00
# ======================================================================
# REMOVE SUBTITLE LINES MATCHING PATTERNS
# ======================================================================
def remove_lines_matching_patterns(
file_path: str,
patterns: List[str],
dry_run: bool = False
) -> Tuple[bool, int]:
"""
Remove any subtitle lines that contain any of the specified patterns.
"""
logger.info("Starting removal process for file: %s", file_path)
if not patterns:
2026-01-29 15:15:45 +13:00
logger.warning("No patterns provided — skipping file.")
return False, 0
2026-01-29 15:15:45 +13:00
# Preprocess patterns
lowered_patterns = [p.lower().strip() for p in patterns if p]
logger.debug("Normalized matching patterns: %s", lowered_patterns)
path = Path(file_path)
2026-01-29 15:15:45 +13:00
if not path.exists():
2026-01-29 15:15:45 +13:00
logger.error("File not found: %s", file_path)
raise FileNotFoundError(f"File not found: {file_path}")
2026-01-29 15:15:45 +13:00
# Read file
logger.debug("Reading SRT file...")
content = path.read_text(encoding="utf-8", errors="ignore")
2026-01-29 15:15:45 +13:00
logger.debug("Parsing SRT blocks...")
blocks = parse_srt(content)
2026-01-29 15:15:45 +13:00
logger.info("Parsed %d subtitle blocks from file.", len(blocks))
removed_lines = 0
updated_blocks: List[SubtitleBlock] = []
2026-01-29 15:15:45 +13:00
# ------------------------------------------------------------------
# Process blocks
# ------------------------------------------------------------------
for block in blocks:
2026-01-29 15:15:45 +13:00
logger.debug("Processing block #%d (%s%s)",
block.index, block.start_time, block.end_time)
lines = block.text.splitlines()
kept_lines = []
2026-01-29 15:15:45 +13:00
for line in lines:
line_lower = line.lower()
2026-01-29 15:15:45 +13:00
# Log each check
match_hit = any(pattern in line_lower for pattern in lowered_patterns)
if match_hit:
removed_lines += 1
2026-01-29 15:15:45 +13:00
logger.debug(
"Removing line in block %d: %r (matched pattern)",
block.index, line
)
continue
2026-01-29 15:15:45 +13:00
kept_lines.append(line)
if kept_lines:
2026-01-29 15:15:45 +13:00
logger.debug(
"Block %d kept with %d/%d lines remaining.",
block.index, len(kept_lines), len(lines)
)
updated_blocks.append(
SubtitleBlock(
index=block.index,
start_time=block.start_time,
end_time=block.end_time,
text="\n".join(kept_lines).strip(),
)
)
2026-01-29 15:15:45 +13:00
else:
logger.debug("Block %d removed entirely — all lines matched patterns.", block.index)
2026-01-29 15:15:45 +13:00
# ------------------------------------------------------------------
# No changes detected
# ------------------------------------------------------------------
if removed_lines == 0:
2026-01-29 15:15:45 +13:00
logger.info("No lines removed from file: %s", file_path)
return False, 0
2026-01-29 15:15:45 +13:00
logger.info(
"Removed %d lines across SRT blocks (%d remaining blocks → will renumber).",
removed_lines, len(updated_blocks)
)
# ------------------------------------------------------------------
# Renumber blocks
# ------------------------------------------------------------------
renumbered = [
SubtitleBlock(i + 1, b.start_time, b.end_time, b.text)
for i, b in enumerate(updated_blocks)
]
2026-01-29 15:15:45 +13:00
logger.debug(
"Renumbered blocks: old count=%d, new count=%d",
len(blocks), len(renumbered)
)
# ------------------------------------------------------------------
# Write changes
# ------------------------------------------------------------------
if dry_run:
logger.info("Dry-run mode — changes NOT written to disk for: %s", file_path)
else:
logger.info("Writing updated SRT file to disk: %s", file_path)
path.write_text(format_srt(renumbered), encoding="utf-8")
2026-01-29 15:15:45 +13:00
logger.info("Completed processing for file: %s", file_path)
return True, removed_lines