Add dummy line THE END

2026-03-05 17:47:54 +08:00
parent 323d14301d
commit f161e29c7e
11 changed files with 64 additions and 14 deletions
--- a/_speakers/S01E02_speakers.txt
+++ b/_speakers/S01E02_speakers.txt
@@ -35,3 +35,4 @@
 [03:44](Malabar) Thank you, sun.
 [03:47](Narrator) The inside of our star.
 [03:48](Narrator) The sun is made of atoms which bump into each other, merge, and produce heat.
+[04:00](Narrator) THE END
--- a/_speakers/S01E03_speakers.txt
+++ b/_speakers/S01E03_speakers.txt
@@ -68,3 +68,4 @@
 [03:46](Narrator) A long time ago, a very big rock hit the Earth.
 [03:50](Narrator) Projecting piles of rocks and dust.
 [03:52](Narrator) They were pulled together and formed the moon.
+[03:55](Narrator) THE END
--- a/_speakers/S01E04_speakers.txt
+++ b/_speakers/S01E04_speakers.txt
@@ -58,3 +58,4 @@
 [03:45](Narrator) Comets are balls made of ice and dust.
 [03:48](Narrator) They travel very fast and they're very old.
 [03:50](Narrator) When they pass close to the sun, they warm up and leave a tail made of vapor and dust.
+[03:55](Narrator) THE END
--- a/_speakers/S02E02_speakers.txt
+++ b/_speakers/S02E02_speakers.txt
@@ -87,3 +87,4 @@
 [03:43](Malabar) Good night and sweet dreams, Giraffe.
 [03:46](Volcanoes) Long ago, the surface of Mars shook so much that immense canyons were formed.
 [03:51](Volcanoes) It's called the Labyrinth of the Night, and it ends in the Valles Marineris.
+[03:55](Narrator) THE END
--- a/_translated/S01E02_translated.json
+++ b/_translated/S01E02_translated.json
@@ -220,5 +220,11 @@
    "speaker": "Narrator",
    "english": "The sun is made of atoms which bump into each other, merge, and produce heat.",
    "chinese": "太阳是由原子组成的，它们会相互碰撞、合并，并产生热量。"
+  },
+  {
+    "timestamp": "[04:00]",
+    "speaker": "Narrator",
+    "english": "THE END",
+    "chinese": "THE END"
  }
 ]
--- a/_translated/S01E03_translated.json
+++ b/_translated/S01E03_translated.json
@@ -418,5 +418,11 @@
    "speaker": "Narrator",
    "english": "They were pulled together and formed the moon.",
    "chinese": "它们被引力拉到一起，就形成了月亮。"
+  },
+  {
+    "timestamp": "[03:55]",
+    "speaker": "Narrator",
+    "english": "THE END",
+    "chinese": "THE END"
  }
 ]
--- a/_translated/S01E04_translated.json
+++ b/_translated/S01E04_translated.json
@@ -358,5 +358,11 @@
    "speaker": "Narrator",
    "english": "When they pass close to the sun, they warm up and leave a tail made of vapor and dust.",
    "chinese": "当它们靠近太阳时，就会变热，留下由蒸汽和尘埃组成的尾巴。"
+  },
+  {
+    "timestamp": "[03:55]",
+    "speaker": "Narrator",
+    "english": "THE END",
+    "chinese": "THE END"
  }
 ]
--- a/_translated/S02E02_translated.json
+++ b/_translated/S02E02_translated.json
@@ -532,5 +532,11 @@
    "speaker": "Volcanoes",
    "english": "It's called the Labyrinth of the Night, and it ends in the Valles Marineris.",
    "chinese": "它叫做“黑夜迷宫”，一直延伸到水手谷。"
+  },
+  {
+    "timestamp": "[03:55]",
+    "speaker": "Narrator",
+    "english": "THE END",
+    "chinese": "THE END"
  }
 ]
--- a/step2_format.py
+++ b/step2_format.py
@@ -293,6 +293,9 @@ def format_lines(transcript_data: Dict[str, Any]) -> str:
    if joined_song:
        lines.append(f"[00:01](Song) {joined_song}")
    
+    # Track the last utterance for calculating THE END timestamp
+    last_utt = None
+    
    # Format remaining lines (skip those within first 15s as they're in the joined song)
    for utt in merged:
        # Skip utterances within opening song window (they're already included in joined_song)
@@ -313,6 +316,18 @@ def format_lines(transcript_data: Dict[str, Any]) -> str:
        timestamp = format_timestamp(utt.get("start", 0))
        
        lines.append(f"{timestamp}(Speaker {speaker}) {text}")
+        last_utt = utt
+    
+    # Add dummy "THE END" line after the last line
+    # Calculate timestamp based on the duration of the last line
+    if last_utt:
+        last_start = last_utt.get("start", 0)
+        last_end = last_utt.get("end", 0)
+        duration = last_end - last_start
+        # THE END timestamp = last line start + duration (same as last line's end time)
+        the_end_time = last_start + duration
+        the_end_timestamp = format_timestamp(the_end_time)
+        lines.append(f"{the_end_timestamp}(Narrator) THE END")
    
    return '\n'.join(lines)

--- a/step3_infer_speakers.py
+++ b/step3_infer_speakers.py
@@ -98,9 +98,8 @@ def get_llm_config() -> Tuple[str, str]:

 def parse_lines(lines_text: str) -> List[Tuple[str, str, str]]:
    """Parse formatted lines. Returns list of (timestamp, speaker_label, text)."""
-    # Pattern to match both (Speaker X) and (Song) formats
-    # Speaker "Song" is reserved for the opening song
-    pattern = r'^(\[\d{2}:\d{2}\])\((Speaker [A-Z?]|Song)\) (.+)$'
+    # Pattern to match speaker formats: (Speaker X), (Song), (Narrator), etc.
+    pattern = r'^(\[\d{2}:\d{2}\])\(([^)]+)\) (.+)$'
    result = []
    
    for line in lines_text.strip().split('\n'):
@@ -113,9 +112,11 @@ def parse_lines(lines_text: str) -> List[Tuple[str, str, str]]:
            timestamp = match.group(1)
            speaker_raw = match.group(2)
            text = match.group(3)
-            # Normalize: "Speaker X" -> "X", "Song" -> "Song"
+            # Normalize: "Speaker X" -> "X", "Song" -> "Song", "Narrator" -> "Narrator"
            if speaker_raw == "Song":
                speaker = "Song"
+            elif speaker_raw == "Narrator":
+                speaker = "Narrator"
            else:
                # Extract letter from "Speaker X"
                speaker = speaker_raw.replace("Speaker ", "")
@@ -205,8 +206,8 @@ def identify_malabar(dialogue_lines: List[Tuple[str, str, str]],
    # Output ALL lines in chronological order (preserving original order)
    samples = []
    for ts, spk, text in dialogue_lines:
-        # Skip Song speaker for Malabar identification
-        if spk == "Song":
+        # Skip Song and Narrator speakers for Malabar identification
+        if spk in ("Song", "Narrator"):
            continue
        # Only include speakers we're trying to identify
        if spk in speakers:
@@ -269,8 +270,8 @@ def identify_speaker(speaker: str,
        elif spk in known_names:
            # Known speaker - use real name
            full_dialogue_lines.append(f'{known_names[spk]}: "{text}"')
-        elif spk == "Song":
-            full_dialogue_lines.append(f'Song: "{text}"')
+        elif spk in ("Song", "Narrator"):
+            full_dialogue_lines.append(f'{spk}: "{text}"')
        else:
            # Unknown speaker - keep as Speaker X
            full_dialogue_lines.append(f'Speaker {spk}: "{text}"')
@@ -339,9 +340,9 @@ def process_lines_file(input_path: Path, client: OpenAI, model: str, force: bool
        print("  No valid lines found!")
        return None
    
-    # Get unique speakers (excluding "Song" - already known)
+    # Get unique speakers (excluding "Song" and "Narrator" - already known)
    all_speakers = set(speaker for _, speaker, _ in lines)
-    speakers_to_identify = [s for s in all_speakers if s != "Song"]
+    speakers_to_identify = [s for s in all_speakers if s not in ("Song", "Narrator")]
    
    print(f"  Speakers to identify: {', '.join(sorted(speakers_to_identify))}")
    
@@ -431,9 +432,9 @@ def apply_speaker_names(lines: List[Tuple[str, str, str]], mapping: Dict[str, st
    result_lines = []
    
    for timestamp, speaker, text in lines:
-        # "Song" speaker is already correctly labeled - pass through unchanged
-        if speaker == "Song":
-            speaker_name = "Song"
+        # "Song" and "Narrator" speakers are already correctly labeled - pass through unchanged
+        if speaker in ("Song", "Narrator"):
+            speaker_name = speaker
        else:
            speaker_name = mapping.get(speaker, f"Speaker_{speaker}")
        result_lines.append(f"{timestamp}({speaker_name}) {text}")
--- a/step4_translate.py
+++ b/step4_translate.py
@@ -247,6 +247,12 @@ def translate_file(input_path: Path, client: OpenAI, model: str) -> Path:
            skipped_count += 1
            continue
        
+        # Skip dummy "THE END" line (added by step 2 for timing purposes)
+        if english.strip() == "THE END":
+            line_data["chinese"] = english  # Keep as-is
+            skipped_count += 1
+            continue
+        
        cached = get_cached_translation(english)
        if cached:
            line_data["chinese"] = cached