more episodes 1 2 3 4

This commit is contained in:
2026-03-04 19:08:15 +08:00
parent 2395c048ff
commit 9330766b1b
18 changed files with 5620 additions and 950 deletions

View File

@@ -11,6 +11,7 @@ Usage:
"""
import os
import re
import sys
import json
from pathlib import Path
@@ -113,6 +114,14 @@ def transcribe_video(video_path: Path) -> dict:
return result
def extract_episode_code(filename: str) -> str:
"""Extract SxxExx code from filename (e.g., 'S01E01_The_Eye.mp4' -> 'S01E01')."""
match = re.search(r'(S\d{2}E\d{2})', filename, re.IGNORECASE)
if match:
return match.group(1).upper()
return None
def process_video(video_path: Path, force: bool = False) -> Path:
"""
Process a single video file.
@@ -121,6 +130,11 @@ def process_video(video_path: Path, force: bool = False) -> Path:
progress = load_progress()
filename = video_path.name
# Extract episode code for output naming (e.g., S01E01_The_Name.mp4 -> S01E01)
episode_code = extract_episode_code(filename)
if not episode_code:
raise ValueError(f"Could not extract episode code (SxxExx) from filename: {filename}")
# Check if already processed
if not force and filename in progress and progress[filename].get("status") == "completed":
output_path = Path(progress[filename]["output_file"])
@@ -130,6 +144,7 @@ def process_video(video_path: Path, force: bool = False) -> Path:
print(f"\n{'='*50}")
print(f"Processing: {filename}")
print(f"Episode code: {episode_code}")
print(f"{'='*50}")
try:
@@ -139,8 +154,8 @@ def process_video(video_path: Path, force: bool = False) -> Path:
transcript_data = transcribe_video(video_path)
# Save to JSON
output_filename = video_path.stem + "_assemblyai.json"
# Save to JSON using episode code only (drop the name part)
output_filename = f"{episode_code}_assemblyai.json"
output_path = OUTPUT_DIR / output_filename
with open(output_path, 'w', encoding='utf-8') as f: