Add dummy line THE END
This commit is contained in:
@@ -98,9 +98,8 @@ def get_llm_config() -> Tuple[str, str]:
|
||||
|
||||
def parse_lines(lines_text: str) -> List[Tuple[str, str, str]]:
|
||||
"""Parse formatted lines. Returns list of (timestamp, speaker_label, text)."""
|
||||
# Pattern to match both (Speaker X) and (Song) formats
|
||||
# Speaker "Song" is reserved for the opening song
|
||||
pattern = r'^(\[\d{2}:\d{2}\])\((Speaker [A-Z?]|Song)\) (.+)$'
|
||||
# Pattern to match speaker formats: (Speaker X), (Song), (Narrator), etc.
|
||||
pattern = r'^(\[\d{2}:\d{2}\])\(([^)]+)\) (.+)$'
|
||||
result = []
|
||||
|
||||
for line in lines_text.strip().split('\n'):
|
||||
@@ -113,9 +112,11 @@ def parse_lines(lines_text: str) -> List[Tuple[str, str, str]]:
|
||||
timestamp = match.group(1)
|
||||
speaker_raw = match.group(2)
|
||||
text = match.group(3)
|
||||
# Normalize: "Speaker X" -> "X", "Song" -> "Song"
|
||||
# Normalize: "Speaker X" -> "X", "Song" -> "Song", "Narrator" -> "Narrator"
|
||||
if speaker_raw == "Song":
|
||||
speaker = "Song"
|
||||
elif speaker_raw == "Narrator":
|
||||
speaker = "Narrator"
|
||||
else:
|
||||
# Extract letter from "Speaker X"
|
||||
speaker = speaker_raw.replace("Speaker ", "")
|
||||
@@ -205,8 +206,8 @@ def identify_malabar(dialogue_lines: List[Tuple[str, str, str]],
|
||||
# Output ALL lines in chronological order (preserving original order)
|
||||
samples = []
|
||||
for ts, spk, text in dialogue_lines:
|
||||
# Skip Song speaker for Malabar identification
|
||||
if spk == "Song":
|
||||
# Skip Song and Narrator speakers for Malabar identification
|
||||
if spk in ("Song", "Narrator"):
|
||||
continue
|
||||
# Only include speakers we're trying to identify
|
||||
if spk in speakers:
|
||||
@@ -269,8 +270,8 @@ def identify_speaker(speaker: str,
|
||||
elif spk in known_names:
|
||||
# Known speaker - use real name
|
||||
full_dialogue_lines.append(f'{known_names[spk]}: "{text}"')
|
||||
elif spk == "Song":
|
||||
full_dialogue_lines.append(f'Song: "{text}"')
|
||||
elif spk in ("Song", "Narrator"):
|
||||
full_dialogue_lines.append(f'{spk}: "{text}"')
|
||||
else:
|
||||
# Unknown speaker - keep as Speaker X
|
||||
full_dialogue_lines.append(f'Speaker {spk}: "{text}"')
|
||||
@@ -339,9 +340,9 @@ def process_lines_file(input_path: Path, client: OpenAI, model: str, force: bool
|
||||
print(" No valid lines found!")
|
||||
return None
|
||||
|
||||
# Get unique speakers (excluding "Song" - already known)
|
||||
# Get unique speakers (excluding "Song" and "Narrator" - already known)
|
||||
all_speakers = set(speaker for _, speaker, _ in lines)
|
||||
speakers_to_identify = [s for s in all_speakers if s != "Song"]
|
||||
speakers_to_identify = [s for s in all_speakers if s not in ("Song", "Narrator")]
|
||||
|
||||
print(f" Speakers to identify: {', '.join(sorted(speakers_to_identify))}")
|
||||
|
||||
@@ -431,9 +432,9 @@ def apply_speaker_names(lines: List[Tuple[str, str, str]], mapping: Dict[str, st
|
||||
result_lines = []
|
||||
|
||||
for timestamp, speaker, text in lines:
|
||||
# "Song" speaker is already correctly labeled - pass through unchanged
|
||||
if speaker == "Song":
|
||||
speaker_name = "Song"
|
||||
# "Song" and "Narrator" speakers are already correctly labeled - pass through unchanged
|
||||
if speaker in ("Song", "Narrator"):
|
||||
speaker_name = speaker
|
||||
else:
|
||||
speaker_name = mapping.get(speaker, f"Speaker_{speaker}")
|
||||
result_lines.append(f"{timestamp}({speaker_name}) {text}")
|
||||
|
||||
Reference in New Issue
Block a user