#!/usr/bin/env python3 """ Step 5: Assign colors to speakers based on their characteristics. Input: Speaker files in "_speakers/" folder Output: _colors.json with speaker-color mappings Output format: { "Malabar": "golden", "Moon": "silver", "Earth": "green", ... } Usage: uv run step5_assign_colors.py Environment Variables: OPENAI_API_KEY - Required OPENAI_BASE_URL - Optional (for Kimi/GLM APIs) LLM_MODEL - Optional (e.g., "glm-4.5-air") """ import os import re import sys import json from pathlib import Path from typing import List, Dict, Tuple, Optional, Set from openai import OpenAI # ============== Configuration ============== INPUT_DIR = Path("_speakers") OUTPUT_FILE = Path("_colors.json") # Fixed color assignments FIXED_COLORS = { "Malabar": "#000000", # Black "Mars": "#FF0000", # Red "Venus": "#FFD700", # Gold "Sun": "#FFA500", # Bright gold/orange "Earth": "#228B22" # Forest green } # Default configurations for different providers DEFAULT_CONFIGS = { "openai": { "base_url": None, "model": "gpt-4o-mini" }, "moonshot": { "base_url": "https://api.moonshot.cn/v1", "model": "kimi-latest" }, "bigmodel": { # Zhipu AI (GLM) "base_url": "https://open.bigmodel.cn/api/paas/v4", "model": "glm-4.5-air" } } def get_llm_config() -> Tuple[str, str]: """Get LLM configuration from environment.""" api_key = os.getenv("OPENAI_API_KEY") if not api_key: raise ValueError("OPENAI_API_KEY environment variable is required") base_url = os.getenv("OPENAI_BASE_URL") model = os.getenv("LLM_MODEL") if base_url: if model: return base_url, model if "bigmodel" in base_url: return base_url, DEFAULT_CONFIGS["bigmodel"]["model"] elif "moonshot" in base_url or "kimi" in base_url: return base_url, DEFAULT_CONFIGS["moonshot"]["model"] else: return base_url, DEFAULT_CONFIGS["openai"]["model"] else: return None, model or DEFAULT_CONFIGS["openai"]["model"] def collect_speakers(input_dir: Path) -> Set[str]: """Collect all unique speakers from speaker files.""" speakers = set() for file_path in input_dir.glob("*_speakers.txt"): with open(file_path, 'r', encoding='utf-8') as f: for line in f: line = line.strip() if not line: continue # Parse line: [timestamp](Speaker) text match = re.match(r'^\[\d{2}:\d{2}\]\(([^)]+)\)', line) if match: speakers.add(match.group(1)) return speakers def get_unique_fallback_color(index: int) -> str: """Generate a unique fallback color from a palette.""" # Distinct color palette for fallback (dark enough for white background) palette = [ "#8B4513", # Saddle Brown "#556B2F", # Dark Olive Green "#483D8B", # Dark Slate Blue "#2F4F4F", # Dark Slate Gray "#8B008B", # Dark Magenta "#4B0082", # Indigo "#191970", # Midnight Blue "#006400", # Dark Green "#8B0000", # Dark Red "#B8860B", # Dark Goldenrod "#5F9EA0", # Cadet Blue "#708090", # Slate Gray "#CD853F", # Peru "#BC8F8F", # Rosy Brown "#4682B4", # Steel Blue "#6B8E23", # Olive Drab "#9370DB", # Medium Purple "#8FBC8F", # Dark Sea Green "#CD5C5C", # Indian Red "#4169E1", # Royal Blue ] return palette[index % len(palette)] def call_llm_for_colors(speakers: List[str], client: OpenAI, model: str, existing_mapping: Dict[str, str], attempt: int = 1) -> Dict[str, str]: """Call LLM to assign colors to speakers. Returns parsed color mapping.""" speakers_list = ", ".join(speakers) existing_info = "" if existing_mapping: existing_colors = [f" - {k} → {v}" for k, v in existing_mapping.items()] existing_info = f"\nAlready assigned:\n" + "\n".join(existing_colors) prompt = f"""Assign CSS hex color codes to each speaker from "Little Malabar" based on their characteristics. Speakers to assign colors: {speakers_list}{existing_info} Color assignment guidelines (use hex codes like #FF0000): - Moon → #A9A9A9 (dark gray) - avoid light colors - Jupiter → #D2691E (chocolate/orange) - Galaxy → #9370DB (medium purple) or #FF69B4 (hot pink) - Star → #DAA520 (goldenrod) or #B8860B (dark goldenrod) - avoid white/light colors - Volcano → #8B0000 (dark red) or #FF4500 (orange red) - Kangaroo/Giraffe → #D2691E (chocolate) or #8B4513 (saddle brown) - Song → #4682B4 (steel blue) or #9370DB (medium purple) - avoid light colors - Asteroids → #696969 (dim gray) or #A9A9A9 (dark gray) - Atoms → #20B2AA (light sea green) or #008B8B (dark cyan) - Comet → #FFD700 (gold) or #DAA520 (goldenrod) - Narrator → #708090 (slate gray) or #778899 (light slate gray) IMPORTANT: - Do NOT use light colors like #FFFFFF (white), #FFFACD, #87CEEB, #C0C0C0 - All colors must be dark enough to read on white backgrounds - Each speaker should have a UNIQUE color (no duplicates!) Fixed assignments (DO NOT change these): - Malabar → #000000 (black) - Mars → #FF0000 (red) - Venus → #FFD700 (gold) - Sun → #FFA500 (bright gold) - Earth → #228B22 (green) Reply with ONLY a JSON object mapping the remaining speaker names to hex color codes: {{"SpeakerName": "#RRGGBB", ...}} JSON:""" try: response = client.chat.completions.create( model=model, messages=[ {"role": "system", "content": "You assign colors to characters. Reply with ONLY valid JSON."}, {"role": "user", "content": prompt} ], temperature=0.3, max_tokens=500, extra_body={"thinking": {"type": "disabled"}} # Disable thinking ) message = response.choices[0].message result = message.content or "" # GLM models may put response in reasoning_content if not result and hasattr(message, 'reasoning_content') and message.reasoning_content: result = message.reasoning_content # Try to parse JSON json_match = re.search(r'\{[^}]+\}', result) if json_match: try: parsed = json.loads(json_match.group()) return {k: v for k, v in parsed.items() if k in speakers} except json.JSONDecodeError: print(f" Warning: Could not parse JSON response on attempt {attempt}") return {} except Exception as e: print(f" Error calling LLM on attempt {attempt}: {e}") return {} def assign_colors(speakers: Set[str], client: OpenAI, model: str) -> Dict[str, str]: """Assign colors to speakers using LLM with retry logic.""" # Start with fixed colors color_mapping = FIXED_COLORS.copy() # Filter out speakers that already have fixed colors remaining_speakers = [s for s in speakers if s not in color_mapping] if not remaining_speakers: return color_mapping max_retries = 3 for attempt in range(1, max_retries + 1): # Get speakers that still need colors still_need_colors = [s for s in remaining_speakers if s not in color_mapping] if not still_need_colors: break # All speakers have colors if attempt > 1: print(f" Retry {attempt-1}: {len(still_need_colors)} speakers still need colors...") # Call LLM to get colors llm_result = call_llm_for_colors(still_need_colors, client, model, color_mapping, attempt) # Merge results for speaker, color in llm_result.items(): if speaker in still_need_colors: color_mapping[speaker] = color # Check for any remaining speakers without colors still_need_colors = [s for s in remaining_speakers if s not in color_mapping] if still_need_colors: print(f" Using fallback colors for {len(still_need_colors)} speakers...") # Assign unique fallback colors from palette for idx, speaker in enumerate(still_need_colors): color_mapping[speaker] = get_unique_fallback_color(idx) return color_mapping def main(): # Get LLM config base_url, model = get_llm_config() client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"), base_url=base_url) print(f"Using model: {model}") print(f"Endpoint: {base_url or 'OpenAI default'}") # Check input directory if not INPUT_DIR.exists(): print(f"Error: Input directory {INPUT_DIR}/ not found") sys.exit(1) # Collect all speakers print(f"\nCollecting speakers from {INPUT_DIR}/...") speakers = collect_speakers(INPUT_DIR) if not speakers: print("Error: No speakers found") sys.exit(1) print(f"Found {len(speakers)} unique speakers:") for speaker in sorted(speakers): if speaker in FIXED_COLORS: print(f" - {speaker}: {FIXED_COLORS[speaker]} (fixed)") else: print(f" - {speaker}") # Assign colors print(f"\nAssigning colors...") color_mapping = assign_colors(speakers, client, model) print(f"\nFinal color assignments:") for speaker, color in sorted(color_mapping.items()): fixed = " (fixed)" if speaker in FIXED_COLORS else "" print(f" - {speaker}: {color}{fixed}") # Save to JSON with open(OUTPUT_FILE, 'w', encoding='utf-8') as f: json.dump(color_mapping, f, ensure_ascii=False, indent=2) print(f"\nSaved to: {OUTPUT_FILE}") print(f"\nStep 5 Complete!") if __name__ == "__main__": main()