fix speaker recognize
This commit is contained in:
@@ -124,10 +124,18 @@ def parse_lines(lines_text: str) -> List[Tuple[str, str, str]]:
|
||||
return result
|
||||
|
||||
|
||||
def save_debug(filename: str, request: str, response: str, step: int):
|
||||
def save_debug(filename: str, request: str, response: str, step: int, model: str = "", endpoint: str = ""):
|
||||
"""Save debug info to _speakers_debug folder."""
|
||||
debug_file = DEBUG_DIR / f"{filename}_step{step}.txt"
|
||||
with open(debug_file, 'w', encoding='utf-8') as f:
|
||||
f.write("=" * 60 + "\n")
|
||||
f.write("DEBUG INFO:\n")
|
||||
f.write("=" * 60 + "\n")
|
||||
if model:
|
||||
f.write(f"Model: {model}\n")
|
||||
if endpoint:
|
||||
f.write(f"Endpoint: {endpoint}\n")
|
||||
f.write("\n")
|
||||
f.write("=" * 60 + "\n")
|
||||
f.write("REQUEST:\n")
|
||||
f.write("=" * 60 + "\n\n")
|
||||
@@ -139,86 +147,53 @@ def save_debug(filename: str, request: str, response: str, step: int):
|
||||
f.write(response)
|
||||
|
||||
|
||||
def extract_name_from_response(text: str) -> str:
|
||||
"""Extract a single name from LLM response text."""
|
||||
text = text.strip()
|
||||
|
||||
# Expanded list of valid names - includes celestial bodies and other entities
|
||||
def ask_llm_for_name(prompt: str, client: OpenAI, model: str, debug_filename: str, step: int, exclude_names: list = None, base_url: str = "") -> str:
|
||||
"""Ask LLM for a single name. Returns the name or raises exception if invalid."""
|
||||
# Valid speaker names
|
||||
valid_names = ['Malabar', 'Moon', 'Earth', 'Mars', 'Sun', 'Jupiter', 'Saturn', 'Venus',
|
||||
'Mercury', 'Neptune', 'Uranus', 'Pluto', 'Galaxy', 'Star', 'Kangaroo',
|
||||
'Giraffe', 'Volcano', 'Volcanoes', 'Sea', 'Ocean', 'Wave', 'Comet',
|
||||
'Asteroid', 'Meteor', 'Nebula', 'Black Hole', 'Alien', 'Robot', 'Scientist']
|
||||
|
||||
# Check if the response is just a single word (the name)
|
||||
if ' ' not in text and len(text) > 1:
|
||||
return text.strip('"\'')
|
||||
# Filter out excluded names
|
||||
if exclude_names:
|
||||
valid_names = [n for n in valid_names if n not in exclude_names]
|
||||
|
||||
# Look for explicit "Answer: X" or "Name: X" patterns
|
||||
answer_match = re.search(r'(?:answer|name|is)[:\s]+["\']?([A-Z][a-z]+)', text, re.IGNORECASE)
|
||||
if answer_match:
|
||||
return answer_match.group(1)
|
||||
|
||||
# Check last few lines for a valid name
|
||||
lines = text.split('\n')
|
||||
for line in reversed(lines[-5:]): # Check last 5 lines
|
||||
line = line.strip().strip('"\'')
|
||||
for name in valid_names:
|
||||
if line.lower() == name.lower():
|
||||
return name
|
||||
if re.search(rf'\b{name}\b', line, re.IGNORECASE):
|
||||
return name
|
||||
|
||||
# Default: return first valid name found
|
||||
for name in valid_names:
|
||||
if re.search(rf'\b{name}\b', text, re.IGNORECASE):
|
||||
return name
|
||||
|
||||
# If no known name found, extract any capitalized word as potential name
|
||||
for line in text.split('\n'):
|
||||
line = line.strip()
|
||||
match = re.search(r'\b([A-Z][a-z]{2,})\b', line)
|
||||
if match:
|
||||
word = match.group(1)
|
||||
if word.lower() not in ['the', 'and', 'but', 'for', 'are', 'was', 'were', 'been', 'this', 'that']:
|
||||
return word
|
||||
|
||||
return ""
|
||||
|
||||
|
||||
def ask_llm_for_name(prompt: str, client: OpenAI, model: str, debug_filename: str, step: int) -> str:
|
||||
"""Ask LLM for a single name. Returns the name or empty string if failed."""
|
||||
try:
|
||||
response = client.chat.completions.create(
|
||||
model=model,
|
||||
messages=[
|
||||
{"role": "system", "content": "Reply with ONLY a single word - the name. No explanation."},
|
||||
{"role": "user", "content": prompt}
|
||||
],
|
||||
temperature=0.0,
|
||||
max_tokens=1000
|
||||
max_tokens=20, # Short response expected
|
||||
extra_body={"thinking": {"type": "disabled"}} # Disable thinking
|
||||
)
|
||||
|
||||
message = response.choices[0].message
|
||||
raw_result = message.content or ""
|
||||
|
||||
# If content is empty but reasoning_content exists, use that
|
||||
if not raw_result and hasattr(message, 'reasoning_content') and message.reasoning_content:
|
||||
raw_result = message.reasoning_content
|
||||
|
||||
# Extract name from the response
|
||||
result = extract_name_from_response(raw_result)
|
||||
raw_result = message.content.strip() if message.content else ""
|
||||
|
||||
# Save debug info
|
||||
save_debug(debug_filename, prompt, f"RAW: {raw_result[:800]}\n\nEXTRACTED: {result}", step)
|
||||
save_debug(debug_filename, prompt, f"RAW: {raw_result}", step, model=model, endpoint=base_url or "OpenAI default")
|
||||
|
||||
# Simple validation: result should be one of the valid names
|
||||
if raw_result in valid_names:
|
||||
return raw_result
|
||||
|
||||
# Check case-insensitive match
|
||||
for name in valid_names:
|
||||
if raw_result.lower() == name.lower():
|
||||
return name
|
||||
|
||||
raise ValueError(f"Invalid response from LLM: expected one of {valid_names}, got '{raw_result}'")
|
||||
|
||||
return result
|
||||
except Exception as e:
|
||||
save_debug(debug_filename, prompt, f"ERROR: {e}", step)
|
||||
return ""
|
||||
save_debug(debug_filename, prompt, f"ERROR: {e}", step, model=model, endpoint=base_url or "OpenAI default")
|
||||
raise # Re-raise the exception
|
||||
|
||||
|
||||
def identify_malabar(dialogue_lines: List[Tuple[str, str, str]],
|
||||
client: OpenAI, model: str, debug_filename: str) -> Optional[str]:
|
||||
client: OpenAI, model: str, debug_filename: str, base_url: str = "") -> Optional[str]:
|
||||
"""Identify which speaker is Malabar."""
|
||||
# Only consider single-letter speakers (exclude "?", "Song", and other special markers)
|
||||
speakers = sorted(set(speaker for _, speaker, _ in dialogue_lines
|
||||
@@ -227,30 +202,54 @@ def identify_malabar(dialogue_lines: List[Tuple[str, str, str]],
|
||||
if not speakers:
|
||||
return None
|
||||
|
||||
# Get sample lines from each speaker
|
||||
# Output ALL lines in chronological order (preserving original order)
|
||||
samples = []
|
||||
for speaker in speakers:
|
||||
lines = [(ts, text) for ts, spk, text in dialogue_lines
|
||||
if spk == speaker][:3]
|
||||
for ts, text in lines:
|
||||
samples.append(f'{speaker}: "{text}"')
|
||||
for ts, spk, text in dialogue_lines:
|
||||
# Skip Song speaker for Malabar identification
|
||||
if spk == "Song":
|
||||
continue
|
||||
# Only include speakers we're trying to identify
|
||||
if spk in speakers:
|
||||
samples.append(f'{spk}: "{text}"')
|
||||
|
||||
sample_text = '\n'.join(samples)
|
||||
|
||||
prompt = f"""Little Malabar dialogue. Malabar is the boy who addresses Kangaroo/Giraffe.
|
||||
prompt = f"""Little Malabar dialogue. Malabar is a boy who talks to stars, planets and animals.
|
||||
|
||||
{sample_text}
|
||||
|
||||
Which speaker letter is Malabar? Reply with ONLY A, B, or C:"""
|
||||
Which speaker letter is Malabar? Reply with ONLY the letter A, B, or C."""
|
||||
|
||||
result = ask_llm_for_name(prompt, client, model, debug_filename, 1)
|
||||
|
||||
# Extract the letter
|
||||
match = re.search(r'\b([A-Z])\b', result.upper())
|
||||
if match and match.group(1) in speakers:
|
||||
return match.group(1)
|
||||
|
||||
return None
|
||||
try:
|
||||
response = client.chat.completions.create(
|
||||
model=model,
|
||||
messages=[
|
||||
{"role": "user", "content": prompt}
|
||||
],
|
||||
temperature=0.0,
|
||||
max_tokens=10, # Short response expected
|
||||
extra_body={"thinking": {"type": "disabled"}} # Disable thinking
|
||||
)
|
||||
|
||||
message = response.choices[0].message
|
||||
raw_result = message.content.strip() if message.content else ""
|
||||
|
||||
# Get endpoint for debug info
|
||||
endpoint = base_url or (str(client.base_url) if hasattr(client, 'base_url') else "OpenAI default")
|
||||
|
||||
# Save debug info
|
||||
save_debug(debug_filename, prompt, f"RAW: {raw_result}", 1, model=model, endpoint=endpoint)
|
||||
|
||||
# Simple validation: result should be a single letter in speakers list
|
||||
if raw_result and len(raw_result) == 1 and raw_result.upper() in speakers:
|
||||
return raw_result.upper()
|
||||
|
||||
raise ValueError(f"Invalid response from LLM: expected single letter A/B/C, got '{raw_result}'")
|
||||
|
||||
except Exception as e:
|
||||
endpoint = base_url or (str(client.base_url) if hasattr(client, 'base_url') else "OpenAI default")
|
||||
save_debug(debug_filename, prompt, f"ERROR: {e}", 1, model=model, endpoint=endpoint)
|
||||
raise # Re-raise the exception
|
||||
|
||||
|
||||
def identify_speaker(speaker: str,
|
||||
@@ -258,19 +257,25 @@ def identify_speaker(speaker: str,
|
||||
known_names: Dict[str, str],
|
||||
client: OpenAI, model: str, debug_filename: str, step: int) -> str:
|
||||
"""Identify a single speaker's name."""
|
||||
# Get this speaker's lines
|
||||
speaker_lines = [(ts, text) for ts, spk, text in dialogue_lines
|
||||
if spk == speaker]
|
||||
# Build the full dialogue with proper speaker names
|
||||
# For known speakers, use their real name
|
||||
# For the target speaker, keep as "Speaker X"
|
||||
# For unknown speakers, keep as "Speaker X"
|
||||
full_dialogue_lines = []
|
||||
for ts, spk, text in dialogue_lines:
|
||||
if spk == speaker:
|
||||
# Target speaker - keep as Speaker X (we're trying to identify them)
|
||||
full_dialogue_lines.append(f'Speaker {spk}: "{text}"')
|
||||
elif spk in known_names:
|
||||
# Known speaker - use real name
|
||||
full_dialogue_lines.append(f'{known_names[spk]}: "{text}"')
|
||||
elif spk == "Song":
|
||||
full_dialogue_lines.append(f'Song: "{text}"')
|
||||
else:
|
||||
# Unknown speaker - keep as Speaker X
|
||||
full_dialogue_lines.append(f'Speaker {spk}: "{text}"')
|
||||
|
||||
# Prioritize lines with identifying keywords - Mars mentions first
|
||||
mars_lines = [l for l in speaker_lines if 'mars' in l[1].lower()]
|
||||
other_priority = [l for l in speaker_lines if 'mars' not in l[1].lower() and
|
||||
any(k in l[1].lower() for k in ['surface', 'volcanoes', 'craters', 'my surface', 'up here', 'labyrinth'])]
|
||||
other_lines = [l for l in speaker_lines if l not in mars_lines and l not in other_priority]
|
||||
|
||||
# Combine: Mars lines first, then other priority, then others, max 8 lines
|
||||
selected_lines = (mars_lines + other_priority + other_lines)[:8]
|
||||
speaker_sample = '\n'.join([f'{ts} "{text}"' for ts, text in selected_lines])
|
||||
full_dialogue = '\n'.join(full_dialogue_lines)
|
||||
|
||||
# Build list of who we already know
|
||||
known_info = "Known: " + ", ".join([f"Speaker {s} = {n}" for s, n in known_names.items()]) if known_names else ""
|
||||
@@ -278,28 +283,33 @@ def identify_speaker(speaker: str,
|
||||
prompt = f"""Little Malabar dialogue. {known_info}
|
||||
|
||||
CONTEXT:
|
||||
- Malabar is the main character (a boy) who explores space
|
||||
- Malabar is a boy who talks to stars, planets and animals
|
||||
- Other speakers are usually celestial bodies (Moon, Earth, Mars, Sun, etc.)
|
||||
- BUT speakers can also be other entities: volcanoes, the sea, a comet, a star, etc.
|
||||
- Look at what the speaker talks about to identify them
|
||||
- Look at what the speaker talks about AND what others say to them to identify them
|
||||
|
||||
IDENTIFICATION GUIDELINES:
|
||||
- Speaker mentions "my surface" + warm/shaking → likely Earth
|
||||
- Speaker mentions being "up here" with no ocean → likely Moon
|
||||
- Speaker mentions "us volcanoes on Mars" → could be Mars OR Volcanoes
|
||||
- Speaker says "us volcanoes on Mars" → this is Volcanoes (not Mars!)
|
||||
- Speaker is spoken TO about Mars/volcanoes → could be Mars
|
||||
- Speaker mentions the sea/ocean/waves → could be Sea/Ocean
|
||||
- Speaker suggests going TO a place → likely describing that place from outside
|
||||
- Use your judgment based on context and content
|
||||
|
||||
Speaker {speaker}'s lines:
|
||||
{speaker_sample}
|
||||
FULL DIALOGUE:
|
||||
{full_dialogue}
|
||||
|
||||
Who is Speaker {speaker}? Reply with a single descriptive name (e.g., "Moon", "Earth", "Mars", "Volcanoes", "Sea", "Sun", "Comet", "Star"):"""
|
||||
Who is Speaker {speaker}? Reply with ONLY the name, nothing else. Examples: Moon, Earth, Mars, Volcanoes, Sea, Sun, Jupiter:"""
|
||||
|
||||
return ask_llm_for_name(prompt, client, model, debug_filename, step)
|
||||
# Get list of already known names to exclude from extraction
|
||||
known_names_list = list(known_names.values()) if known_names else []
|
||||
# Get base_url from client for debug info
|
||||
base_url = client.base_url if hasattr(client, 'base_url') else ""
|
||||
return ask_llm_for_name(prompt, client, model, debug_filename, step, exclude_names=known_names_list, base_url=base_url)
|
||||
|
||||
|
||||
def process_lines_file(input_path: Path, client: OpenAI, model: str, force: bool = False) -> Path:
|
||||
def process_lines_file(input_path: Path, client: OpenAI, model: str, force: bool = False, base_url: str = "") -> Path:
|
||||
"""Process a single lines file using multi-step approach."""
|
||||
progress = load_progress()
|
||||
filename = input_path.name
|
||||
@@ -347,16 +357,17 @@ def process_lines_file(input_path: Path, client: OpenAI, model: str, force: bool
|
||||
|
||||
# Step 1: Identify Malabar (from regular speakers only)
|
||||
print(f" Step 1: Identifying Malabar...")
|
||||
malabar_speaker = identify_malabar(lines, client, model, debug_filename)
|
||||
|
||||
if malabar_speaker:
|
||||
try:
|
||||
malabar_speaker = identify_malabar(lines, client, model, debug_filename, base_url)
|
||||
final_mapping[malabar_speaker] = "Malabar"
|
||||
print(f" Identified Speaker {malabar_speaker} = Malabar")
|
||||
elif regular_speakers:
|
||||
# Fallback: assume first regular speaker alphabetically is Malabar
|
||||
malabar_speaker = sorted(regular_speakers)[0]
|
||||
final_mapping[malabar_speaker] = "Malabar"
|
||||
print(f" Fallback: Speaker {malabar_speaker} = Malabar")
|
||||
except Exception as e:
|
||||
print(f" Error: {e}")
|
||||
if regular_speakers:
|
||||
# Fallback: assume first regular speaker alphabetically is Malabar
|
||||
malabar_speaker = sorted(regular_speakers)[0]
|
||||
final_mapping[malabar_speaker] = "Malabar"
|
||||
print(f" Fallback: Speaker {malabar_speaker} = Malabar")
|
||||
|
||||
# Step 2+: Identify remaining regular speakers one by one
|
||||
remaining = [s for s in regular_speakers if s not in final_mapping]
|
||||
@@ -364,26 +375,25 @@ def process_lines_file(input_path: Path, client: OpenAI, model: str, force: bool
|
||||
|
||||
for speaker in remaining:
|
||||
print(f" Step {step}: Identifying Speaker {speaker}...")
|
||||
name = identify_speaker(speaker, lines, final_mapping, client, model, debug_filename, step)
|
||||
|
||||
if name and len(name) > 1:
|
||||
try:
|
||||
name = identify_speaker(speaker, lines, final_mapping, client, model, debug_filename, step)
|
||||
final_mapping[speaker] = name
|
||||
print(f" Identified Speaker {speaker} = {name}")
|
||||
else:
|
||||
except Exception as e:
|
||||
print(f" Error: {e}")
|
||||
final_mapping[speaker] = f"Speaker_{speaker}"
|
||||
print(f" Fallback: Speaker {speaker} = Speaker_{speaker}")
|
||||
|
||||
step += 1
|
||||
|
||||
# Handle unknown speakers (like "?")
|
||||
for speaker in unknown_speakers:
|
||||
print(f" Step {step}: Identifying unknown Speaker {speaker}...")
|
||||
# Try to identify based on content
|
||||
name = identify_speaker(speaker, lines, final_mapping, client, model, debug_filename, step)
|
||||
if name and len(name) > 1 and name.lower() not in ['unknown', 'speaker', 'name']:
|
||||
try:
|
||||
name = identify_speaker(speaker, lines, final_mapping, client, model, debug_filename, step)
|
||||
final_mapping[speaker] = name
|
||||
print(f" Identified Speaker {speaker} = {name}")
|
||||
else:
|
||||
except Exception as e:
|
||||
print(f" Error: {e}")
|
||||
final_mapping[speaker] = "Unknown"
|
||||
print(f" Marked Speaker {speaker} = Unknown")
|
||||
step += 1
|
||||
@@ -449,6 +459,9 @@ def main():
|
||||
base_url, model = get_llm_config()
|
||||
client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"), base_url=base_url)
|
||||
|
||||
print(f"Using model: {model}")
|
||||
print(f"Endpoint: {base_url or 'OpenAI default'}")
|
||||
|
||||
# Discover input files
|
||||
lines_files = get_input_files()
|
||||
|
||||
@@ -468,7 +481,7 @@ def main():
|
||||
|
||||
for input_path in lines_files:
|
||||
try:
|
||||
output_path = process_lines_file(input_path, client, model, force=force)
|
||||
output_path = process_lines_file(input_path, client, model, force=force, base_url=base_url or "")
|
||||
if output_path:
|
||||
success_count += 1
|
||||
except Exception as e:
|
||||
|
||||
Reference in New Issue
Block a user