302 lines
9.7 KiB
Python
302 lines
9.7 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Step 5: Assign colors to speakers based on their characteristics.
|
|
|
|
Input: Speaker files in "_speakers/" folder
|
|
Output: _colors.json with speaker-color mappings
|
|
|
|
Output format:
|
|
{
|
|
"Malabar": "golden",
|
|
"Moon": "silver",
|
|
"Earth": "green",
|
|
...
|
|
}
|
|
|
|
Usage:
|
|
uv run step5_assign_colors.py
|
|
|
|
Environment Variables:
|
|
OPENAI_API_KEY - Required
|
|
OPENAI_BASE_URL - Optional (for Kimi/GLM APIs)
|
|
LLM_MODEL - Optional (e.g., "glm-4.5-air")
|
|
"""
|
|
|
|
import os
|
|
import re
|
|
import sys
|
|
import json
|
|
from pathlib import Path
|
|
from typing import List, Dict, Tuple, Optional, Set
|
|
from openai import OpenAI
|
|
|
|
# ============== Configuration ==============
|
|
|
|
INPUT_DIR = Path("_speakers")
|
|
OUTPUT_FILE = Path("_colors.json")
|
|
|
|
# Fixed color assignments
|
|
FIXED_COLORS = {
|
|
"Malabar": "#000000", # Black
|
|
"Mars": "#FF0000", # Red
|
|
"Venus": "#FFD700", # Gold
|
|
"Sun": "#FFA500", # Bright gold/orange
|
|
"Earth": "#228B22" # Forest green
|
|
}
|
|
|
|
# Default configurations for different providers
|
|
DEFAULT_CONFIGS = {
|
|
"openai": {
|
|
"base_url": None,
|
|
"model": "gpt-4o-mini"
|
|
},
|
|
"moonshot": {
|
|
"base_url": "https://api.moonshot.cn/v1",
|
|
"model": "kimi-latest"
|
|
},
|
|
"bigmodel": { # Zhipu AI (GLM)
|
|
"base_url": "https://open.bigmodel.cn/api/paas/v4",
|
|
"model": "glm-4.5-air"
|
|
}
|
|
}
|
|
|
|
|
|
def get_llm_config() -> Tuple[str, str]:
|
|
"""Get LLM configuration from environment."""
|
|
api_key = os.getenv("OPENAI_API_KEY")
|
|
if not api_key:
|
|
raise ValueError("OPENAI_API_KEY environment variable is required")
|
|
|
|
base_url = os.getenv("OPENAI_BASE_URL")
|
|
model = os.getenv("LLM_MODEL")
|
|
|
|
if base_url:
|
|
if model:
|
|
return base_url, model
|
|
if "bigmodel" in base_url:
|
|
return base_url, DEFAULT_CONFIGS["bigmodel"]["model"]
|
|
elif "moonshot" in base_url or "kimi" in base_url:
|
|
return base_url, DEFAULT_CONFIGS["moonshot"]["model"]
|
|
else:
|
|
return base_url, DEFAULT_CONFIGS["openai"]["model"]
|
|
else:
|
|
return None, model or DEFAULT_CONFIGS["openai"]["model"]
|
|
|
|
|
|
def collect_speakers(input_dir: Path) -> Set[str]:
|
|
"""Collect all unique speakers from speaker files."""
|
|
speakers = set()
|
|
|
|
for file_path in input_dir.glob("*_speakers.txt"):
|
|
with open(file_path, 'r', encoding='utf-8') as f:
|
|
for line in f:
|
|
line = line.strip()
|
|
if not line:
|
|
continue
|
|
# Parse line: [timestamp](Speaker) text
|
|
match = re.match(r'^\[\d{2}:\d{2}\]\(([^)]+)\)', line)
|
|
if match:
|
|
speakers.add(match.group(1))
|
|
|
|
return speakers
|
|
|
|
|
|
def get_unique_fallback_color(index: int) -> str:
|
|
"""Generate a unique fallback color from a palette."""
|
|
# Distinct color palette for fallback (dark enough for white background)
|
|
palette = [
|
|
"#8B4513", # Saddle Brown
|
|
"#556B2F", # Dark Olive Green
|
|
"#483D8B", # Dark Slate Blue
|
|
"#2F4F4F", # Dark Slate Gray
|
|
"#8B008B", # Dark Magenta
|
|
"#4B0082", # Indigo
|
|
"#191970", # Midnight Blue
|
|
"#006400", # Dark Green
|
|
"#8B0000", # Dark Red
|
|
"#B8860B", # Dark Goldenrod
|
|
"#5F9EA0", # Cadet Blue
|
|
"#708090", # Slate Gray
|
|
"#CD853F", # Peru
|
|
"#BC8F8F", # Rosy Brown
|
|
"#4682B4", # Steel Blue
|
|
"#6B8E23", # Olive Drab
|
|
"#9370DB", # Medium Purple
|
|
"#8FBC8F", # Dark Sea Green
|
|
"#CD5C5C", # Indian Red
|
|
"#4169E1", # Royal Blue
|
|
]
|
|
return palette[index % len(palette)]
|
|
|
|
|
|
def call_llm_for_colors(speakers: List[str], client: OpenAI, model: str,
|
|
existing_mapping: Dict[str, str], attempt: int = 1) -> Dict[str, str]:
|
|
"""Call LLM to assign colors to speakers. Returns parsed color mapping."""
|
|
speakers_list = ", ".join(speakers)
|
|
|
|
existing_info = ""
|
|
if existing_mapping:
|
|
existing_colors = [f" - {k} → {v}" for k, v in existing_mapping.items()]
|
|
existing_info = f"\nAlready assigned:\n" + "\n".join(existing_colors)
|
|
|
|
prompt = f"""Assign CSS hex color codes to each speaker from "Little Malabar" based on their characteristics.
|
|
|
|
Speakers to assign colors:
|
|
{speakers_list}{existing_info}
|
|
|
|
Color assignment guidelines (use hex codes like #FF0000):
|
|
- Moon → #A9A9A9 (dark gray) - avoid light colors
|
|
- Jupiter → #D2691E (chocolate/orange)
|
|
- Galaxy → #9370DB (medium purple) or #FF69B4 (hot pink)
|
|
- Star → #DAA520 (goldenrod) or #B8860B (dark goldenrod) - avoid white/light colors
|
|
- Volcano → #8B0000 (dark red) or #FF4500 (orange red)
|
|
- Kangaroo/Giraffe → #D2691E (chocolate) or #8B4513 (saddle brown)
|
|
- Song → #4682B4 (steel blue) or #9370DB (medium purple) - avoid light colors
|
|
- Asteroids → #696969 (dim gray) or #A9A9A9 (dark gray)
|
|
- Atoms → #20B2AA (light sea green) or #008B8B (dark cyan)
|
|
- Comet → #FFD700 (gold) or #DAA520 (goldenrod)
|
|
- Narrator → #708090 (slate gray) or #778899 (light slate gray)
|
|
|
|
IMPORTANT:
|
|
- Do NOT use light colors like #FFFFFF (white), #FFFACD, #87CEEB, #C0C0C0
|
|
- All colors must be dark enough to read on white backgrounds
|
|
- Each speaker should have a UNIQUE color (no duplicates!)
|
|
|
|
Fixed assignments (DO NOT change these):
|
|
- Malabar → #000000 (black)
|
|
- Mars → #FF0000 (red)
|
|
- Venus → #FFD700 (gold)
|
|
- Sun → #FFA500 (bright gold)
|
|
- Earth → #228B22 (green)
|
|
|
|
Reply with ONLY a JSON object mapping the remaining speaker names to hex color codes:
|
|
{{"SpeakerName": "#RRGGBB", ...}}
|
|
|
|
JSON:"""
|
|
|
|
try:
|
|
response = client.chat.completions.create(
|
|
model=model,
|
|
messages=[
|
|
{"role": "system", "content": "You assign colors to characters. Reply with ONLY valid JSON."},
|
|
{"role": "user", "content": prompt}
|
|
],
|
|
temperature=0.3,
|
|
max_tokens=500,
|
|
extra_body={"thinking": {"type": "disabled"}} # Disable thinking
|
|
)
|
|
|
|
message = response.choices[0].message
|
|
result = message.content or ""
|
|
|
|
# GLM models may put response in reasoning_content
|
|
if not result and hasattr(message, 'reasoning_content') and message.reasoning_content:
|
|
result = message.reasoning_content
|
|
|
|
# Try to parse JSON
|
|
json_match = re.search(r'\{[^}]+\}', result)
|
|
if json_match:
|
|
try:
|
|
parsed = json.loads(json_match.group())
|
|
return {k: v for k, v in parsed.items() if k in speakers}
|
|
except json.JSONDecodeError:
|
|
print(f" Warning: Could not parse JSON response on attempt {attempt}")
|
|
|
|
return {}
|
|
|
|
except Exception as e:
|
|
print(f" Error calling LLM on attempt {attempt}: {e}")
|
|
return {}
|
|
|
|
|
|
def assign_colors(speakers: Set[str], client: OpenAI, model: str) -> Dict[str, str]:
|
|
"""Assign colors to speakers using LLM with retry logic."""
|
|
# Start with fixed colors
|
|
color_mapping = FIXED_COLORS.copy()
|
|
|
|
# Filter out speakers that already have fixed colors
|
|
remaining_speakers = [s for s in speakers if s not in color_mapping]
|
|
|
|
if not remaining_speakers:
|
|
return color_mapping
|
|
|
|
max_retries = 3
|
|
|
|
for attempt in range(1, max_retries + 1):
|
|
# Get speakers that still need colors
|
|
still_need_colors = [s for s in remaining_speakers if s not in color_mapping]
|
|
|
|
if not still_need_colors:
|
|
break # All speakers have colors
|
|
|
|
if attempt > 1:
|
|
print(f" Retry {attempt-1}: {len(still_need_colors)} speakers still need colors...")
|
|
|
|
# Call LLM to get colors
|
|
llm_result = call_llm_for_colors(still_need_colors, client, model, color_mapping, attempt)
|
|
|
|
# Merge results
|
|
for speaker, color in llm_result.items():
|
|
if speaker in still_need_colors:
|
|
color_mapping[speaker] = color
|
|
|
|
# Check for any remaining speakers without colors
|
|
still_need_colors = [s for s in remaining_speakers if s not in color_mapping]
|
|
|
|
if still_need_colors:
|
|
print(f" Using fallback colors for {len(still_need_colors)} speakers...")
|
|
# Assign unique fallback colors from palette
|
|
for idx, speaker in enumerate(still_need_colors):
|
|
color_mapping[speaker] = get_unique_fallback_color(idx)
|
|
|
|
return color_mapping
|
|
|
|
|
|
def main():
|
|
# Get LLM config
|
|
base_url, model = get_llm_config()
|
|
client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"), base_url=base_url)
|
|
|
|
print(f"Using model: {model}")
|
|
print(f"Endpoint: {base_url or 'OpenAI default'}")
|
|
|
|
# Check input directory
|
|
if not INPUT_DIR.exists():
|
|
print(f"Error: Input directory {INPUT_DIR}/ not found")
|
|
sys.exit(1)
|
|
|
|
# Collect all speakers
|
|
print(f"\nCollecting speakers from {INPUT_DIR}/...")
|
|
speakers = collect_speakers(INPUT_DIR)
|
|
|
|
if not speakers:
|
|
print("Error: No speakers found")
|
|
sys.exit(1)
|
|
|
|
print(f"Found {len(speakers)} unique speakers:")
|
|
for speaker in sorted(speakers):
|
|
if speaker in FIXED_COLORS:
|
|
print(f" - {speaker}: {FIXED_COLORS[speaker]} (fixed)")
|
|
else:
|
|
print(f" - {speaker}")
|
|
|
|
# Assign colors
|
|
print(f"\nAssigning colors...")
|
|
color_mapping = assign_colors(speakers, client, model)
|
|
|
|
print(f"\nFinal color assignments:")
|
|
for speaker, color in sorted(color_mapping.items()):
|
|
fixed = " (fixed)" if speaker in FIXED_COLORS else ""
|
|
print(f" - {speaker}: {color}{fixed}")
|
|
|
|
# Save to JSON
|
|
with open(OUTPUT_FILE, 'w', encoding='utf-8') as f:
|
|
json.dump(color_mapping, f, ensure_ascii=False, indent=2)
|
|
|
|
print(f"\nSaved to: {OUTPUT_FILE}")
|
|
print(f"\nStep 5 Complete!")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|