more episodes 1 2 3 4
This commit is contained in:
@@ -11,6 +11,7 @@ Usage:
|
||||
"""
|
||||
|
||||
import os
|
||||
import re
|
||||
import sys
|
||||
import json
|
||||
from pathlib import Path
|
||||
@@ -113,6 +114,14 @@ def transcribe_video(video_path: Path) -> dict:
|
||||
return result
|
||||
|
||||
|
||||
def extract_episode_code(filename: str) -> str:
|
||||
"""Extract SxxExx code from filename (e.g., 'S01E01_The_Eye.mp4' -> 'S01E01')."""
|
||||
match = re.search(r'(S\d{2}E\d{2})', filename, re.IGNORECASE)
|
||||
if match:
|
||||
return match.group(1).upper()
|
||||
return None
|
||||
|
||||
|
||||
def process_video(video_path: Path, force: bool = False) -> Path:
|
||||
"""
|
||||
Process a single video file.
|
||||
@@ -121,6 +130,11 @@ def process_video(video_path: Path, force: bool = False) -> Path:
|
||||
progress = load_progress()
|
||||
filename = video_path.name
|
||||
|
||||
# Extract episode code for output naming (e.g., S01E01_The_Name.mp4 -> S01E01)
|
||||
episode_code = extract_episode_code(filename)
|
||||
if not episode_code:
|
||||
raise ValueError(f"Could not extract episode code (SxxExx) from filename: {filename}")
|
||||
|
||||
# Check if already processed
|
||||
if not force and filename in progress and progress[filename].get("status") == "completed":
|
||||
output_path = Path(progress[filename]["output_file"])
|
||||
@@ -130,6 +144,7 @@ def process_video(video_path: Path, force: bool = False) -> Path:
|
||||
|
||||
print(f"\n{'='*50}")
|
||||
print(f"Processing: {filename}")
|
||||
print(f"Episode code: {episode_code}")
|
||||
print(f"{'='*50}")
|
||||
|
||||
try:
|
||||
@@ -139,8 +154,8 @@ def process_video(video_path: Path, force: bool = False) -> Path:
|
||||
|
||||
transcript_data = transcribe_video(video_path)
|
||||
|
||||
# Save to JSON
|
||||
output_filename = video_path.stem + "_assemblyai.json"
|
||||
# Save to JSON using episode code only (drop the name part)
|
||||
output_filename = f"{episode_code}_assemblyai.json"
|
||||
output_path = OUTPUT_DIR / output_filename
|
||||
|
||||
with open(output_path, 'w', encoding='utf-8') as f:
|
||||
|
||||
Reference in New Issue
Block a user