diff --git a/step4_translate.py b/step4_translate.py index 09f04a2..7b1e228 100644 --- a/step4_translate.py +++ b/step4_translate.py @@ -238,10 +238,19 @@ def translate_file(input_path: Path, client: OpenAI, model: str) -> Path: # Check cache for each line cached_count = 0 + skipped_count = 0 to_translate = [] for line_data in parsed_lines: english = line_data["english"] + speaker = line_data["speaker"] + + # Skip translation for opening song (speaker is "Song") + if speaker == "Song": + line_data["chinese"] = english # Keep English for song lyrics + skipped_count += 1 + continue + cached = get_cached_translation(english) if cached: line_data["chinese"] = cached @@ -250,6 +259,7 @@ def translate_file(input_path: Path, client: OpenAI, model: str) -> Path: to_translate.append(line_data) print(f" Cached translations: {cached_count}") + print(f" Skipped (Song): {skipped_count}") print(f" To translate: {len(to_translate)}") # Translate in batches @@ -274,10 +284,10 @@ def translate_file(input_path: Path, client: OpenAI, model: str) -> Path: if translation and translation != "[Translation failed]": save_translation_cache(line_data["english"], translation) - # Combine results (cached + translated) + # Combine results (cached + translated + skipped) result = parsed_lines - print(f" Translation complete: {cached_count} from cache, {translated_count} new") + print(f" Translation complete: {cached_count} from cache, {translated_count} new, {skipped_count} skipped (Song)") # Save JSON output output_filename = input_path.stem.replace("_speakers", "") + "_translated.json"