748 lines
27 KiB
Python
748 lines
27 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Transcribe episodes with speaker diarization and name inference.
|
|
|
|
Requirements:
|
|
uv sync
|
|
# or: uv run transcribe_episodes.py (auto-installs deps)
|
|
|
|
Environment Variables:
|
|
ASSEMBLYAI_API_KEY - Your AssemblyAI API key
|
|
OPENAI_API_KEY - Your OpenAI/Kimi API key
|
|
OPENAI_BASE_URL - (Optional) API base URL
|
|
- Regular Kimi: https://api.moonshot.cn/v1
|
|
- Kimi Code: https://api.kimi.com/coding/v1
|
|
LLM_MODEL - (Optional) Model name, e.g., "kimi-for-coding"
|
|
|
|
Usage:
|
|
uv run transcribe_episodes.py
|
|
uv run transcribe_episodes.py status
|
|
uv run transcribe_episodes.py reset [filename]
|
|
uv run transcribe_episodes.py test-llm
|
|
"""
|
|
|
|
import os
|
|
import sys
|
|
import json
|
|
import re
|
|
from pathlib import Path
|
|
from dataclasses import dataclass, asdict
|
|
from typing import List, Optional, Dict
|
|
from datetime import timedelta
|
|
|
|
import assemblyai as aai
|
|
from openai import OpenAI
|
|
|
|
|
|
# ============== Configuration ==============
|
|
|
|
EPISODES_DIR = Path("episodes")
|
|
OUTPUT_DIR = Path("transcripts")
|
|
PROGRESS_FILE = Path(".transcription_progress.json")
|
|
|
|
# Characters to recognize
|
|
CHARACTERS = ["Malabar", "Sun", "Jupiter", "Kangarro", "Mole"]
|
|
|
|
# LLM Configuration
|
|
# For Kimi Code API: set OPENAI_BASE_URL="https://api.kimi.com/coding/v1" and LLM_MODEL="kimi-for-coding"
|
|
DEFAULT_LLM_BASE_URL = "https://api.moonshot.cn/v1" # Default to regular Kimi
|
|
DEFAULT_LLM_MODEL = "kimi-latest"
|
|
|
|
# Patterns for non-word utterances to merge with adjacent lines
|
|
# These are sounds, modal particles, short acknowledgments
|
|
NON_WORD_PATTERNS = [
|
|
r'^[\s]*[嗯|啊|哦|呃|唉|哎|哈|哼|哟|哼|唔|呦|啊哈|哦豁|哎呀|哎哟|呜呼]+[\s]*$', # Chinese modal particles
|
|
r'^[\s]*[Mm]hm+[\s]*$', # Mhm
|
|
r'^[\s]*[Uu]h+[\s]*$', # Uh
|
|
r'^[\s]*[Uu]m+[\s]*$', # Um
|
|
r'^[\s]*[Aa]h+[\s]*$', # Ah
|
|
r'^[\s]*[Oo]h+[\s]*$', # Oh
|
|
r'^[\s]*[Hh]uh+[\s]*$', # Huh
|
|
r'^[\s]*[Hh]mm+[\s]*$', # Hmm
|
|
r'^[\s]*[Yy]eah?[\s]*$', # Yeah (standalone)
|
|
r'^[\s]*[Nn]o+[\s]*$', # No (standalone)
|
|
r'^[\s]*[Oo]k+[\s]*$', # Ok
|
|
r'^[\s]*[Oo]kay+[\s]*$', # Okay
|
|
r'^[\s]*[Rr]ight+[\s]*$', # Right (standalone)
|
|
r'^[\s]*[Ww]hat+[\s]*$', # What (standalone)
|
|
r'^[\s]*\([^)]*\)[\s]*$', # (laughs), (coughs), etc.
|
|
r'^[\s]*\[[^\]]*\][\s]*$', # [laughs], [coughs], etc.
|
|
]
|
|
|
|
NON_WORD_REGEX = re.compile('|'.join(f'({p})' for p in NON_WORD_PATTERNS), re.IGNORECASE)
|
|
|
|
|
|
# ============== Data Classes ==============
|
|
|
|
@dataclass
|
|
class Utterance:
|
|
"""A single utterance from a speaker."""
|
|
speaker: str # Original speaker label (A, B, C...)
|
|
text: str
|
|
start_ms: int
|
|
end_ms: int
|
|
inferred_name: Optional[str] = None
|
|
|
|
@property
|
|
def timestamp(self) -> str:
|
|
"""Format timestamp as [mm:ss]"""
|
|
seconds = self.start_ms // 1000
|
|
minutes = seconds // 60
|
|
secs = seconds % 60
|
|
return f"[{minutes:02d}:{secs:02d}]"
|
|
|
|
|
|
@dataclass
|
|
class EpisodeProgress:
|
|
"""Track progress for an episode."""
|
|
filename: str
|
|
status: str # "pending", "transcribing", "naming", "completed", "error"
|
|
error_message: Optional[str] = None
|
|
output_file: Optional[str] = None
|
|
|
|
|
|
# ============== Progress Manager ==============
|
|
|
|
class ProgressManager:
|
|
"""Manages progress tracking to avoid re-processing."""
|
|
|
|
def __init__(self, progress_file: Path = PROGRESS_FILE):
|
|
self.progress_file = progress_file
|
|
self.progress: Dict[str, dict] = self._load()
|
|
|
|
def _load(self) -> Dict[str, dict]:
|
|
if self.progress_file.exists():
|
|
with open(self.progress_file, 'r', encoding='utf-8') as f:
|
|
return json.load(f)
|
|
return {}
|
|
|
|
def save(self):
|
|
with open(self.progress_file, 'w', encoding='utf-8') as f:
|
|
json.dump(self.progress, f, indent=2, ensure_ascii=False)
|
|
|
|
def get_status(self, filename: str) -> Optional[str]:
|
|
return self.progress.get(filename, {}).get('status')
|
|
|
|
def set_status(self, filename: str, status: str, error_message: str = None, output_file: str = None):
|
|
if filename not in self.progress:
|
|
self.progress[filename] = {}
|
|
self.progress[filename]['status'] = status
|
|
if error_message:
|
|
self.progress[filename]['error'] = error_message
|
|
if output_file:
|
|
self.progress[filename]['output_file'] = output_file
|
|
self.save()
|
|
|
|
def is_completed(self, filename: str) -> bool:
|
|
return self.get_status(filename) == "completed"
|
|
|
|
def reset(self, filename: str = None):
|
|
"""Reset progress for a file or all files."""
|
|
if filename:
|
|
if filename in self.progress:
|
|
del self.progress[filename]
|
|
else:
|
|
self.progress = {}
|
|
self.save()
|
|
|
|
|
|
# ============== Transcription Service ==============
|
|
|
|
class AssemblyAITranscriber:
|
|
"""Handles transcription with speaker diarization using AssemblyAI."""
|
|
|
|
def __init__(self, api_key: str):
|
|
aai.settings.api_key = api_key
|
|
|
|
def transcribe(self, audio_file: Path, on_progress=None) -> List[Utterance]:
|
|
"""
|
|
Transcribe audio file with speaker diarization.
|
|
Returns list of Utterance objects.
|
|
"""
|
|
config = aai.TranscriptionConfig(
|
|
speaker_labels=True,
|
|
speech_models=["universal-2"],
|
|
language_detection=True, # Auto-detect language (English, Chinese, etc.)
|
|
)
|
|
|
|
transcriber = aai.Transcriber(config=config)
|
|
|
|
print(f" Uploading {audio_file.name}...")
|
|
transcript = transcriber.transcribe(str(audio_file))
|
|
|
|
if transcript.status == aai.TranscriptStatus.error:
|
|
raise Exception(f"Transcription failed: {transcript.error}")
|
|
|
|
print(f" Transcription complete. Processing utterances...")
|
|
|
|
utterances = []
|
|
for utt in transcript.utterances:
|
|
utterances.append(Utterance(
|
|
speaker=utt.speaker,
|
|
text=utt.text.strip(),
|
|
start_ms=utt.start,
|
|
end_ms=utt.end
|
|
))
|
|
|
|
return utterances
|
|
|
|
|
|
# ============== Speaker Naming Service ==============
|
|
|
|
class SpeakerNamer:
|
|
"""Uses LLM to infer speaker names from context."""
|
|
|
|
# Kimi/Moonshot API endpoints to try
|
|
KIMI_ENDPOINTS = [
|
|
"https://api.moonshot.cn/v1",
|
|
"https://api.moonshot.ai/v1",
|
|
]
|
|
|
|
def __init__(self, api_key: str, base_url: Optional[str] = None, model: Optional[str] = None):
|
|
# Determine API type
|
|
base_url = base_url or DEFAULT_LLM_BASE_URL
|
|
is_kimi = "moonshot" in base_url or "kimi" in base_url
|
|
|
|
# Use provided model or default based on API
|
|
if model:
|
|
self.model = model
|
|
elif is_kimi:
|
|
self.model = DEFAULT_LLM_MODEL
|
|
else:
|
|
self.model = "gpt-4o-mini"
|
|
|
|
# Debug: Show which API is being used (without exposing the key)
|
|
print(f" LLM Config: Using {'Kimi' if is_kimi else 'OpenAI'} API")
|
|
print(f" Base URL: {base_url}")
|
|
print(f" Model: {self.model}")
|
|
print(f" API Key set: {'Yes (starts with ' + api_key[:8] + '...)' if api_key else 'NO - MISSING!'}")
|
|
|
|
self.api_key = api_key
|
|
self.base_url = base_url
|
|
self.is_kimi = is_kimi
|
|
|
|
self.client = OpenAI(
|
|
api_key=api_key,
|
|
base_url=base_url
|
|
)
|
|
|
|
def _try_infer_with_endpoint(self, endpoint: str, prompt: str, speakers: List[str]) -> Optional[Dict[str, str]]:
|
|
"""Try to infer speakers using a specific endpoint."""
|
|
client = OpenAI(api_key=self.api_key, base_url=endpoint)
|
|
|
|
response = client.chat.completions.create(
|
|
model=self.model,
|
|
messages=[
|
|
{"role": "system", "content": "You are a helpful assistant that identifies speakers in a dialogue based on context and speaking style."},
|
|
{"role": "user", "content": prompt}
|
|
],
|
|
temperature=0.3
|
|
)
|
|
|
|
result = response.choices[0].message.content
|
|
return self._parse_naming_response(result, speakers)
|
|
|
|
def infer_speakers(self, utterances: List[Utterance]) -> Dict[str, str]:
|
|
"""
|
|
Infer speaker names based on context.
|
|
Returns mapping of speaker_label -> name
|
|
"""
|
|
# Get unique speaker labels
|
|
speakers = sorted(set(utt.speaker for utt in utterances))
|
|
|
|
# Build context with samples from each speaker
|
|
speaker_samples = {s: [] for s in speakers}
|
|
for utt in utterances:
|
|
if len(speaker_samples[utt.speaker]) < 5: # Sample up to 5 utterances
|
|
speaker_samples[utt.speaker].append(utt.text)
|
|
|
|
# Build prompt
|
|
prompt = self._build_naming_prompt(speaker_samples)
|
|
|
|
print(f" Sending to LLM for speaker naming...")
|
|
|
|
# Try different endpoints for Kimi
|
|
endpoints_to_try = []
|
|
if self.is_kimi:
|
|
# Start with configured base_url, then try alternatives
|
|
if self.base_url:
|
|
endpoints_to_try.append(self.base_url)
|
|
for ep in self.KIMI_ENDPOINTS:
|
|
if ep not in endpoints_to_try:
|
|
endpoints_to_try.append(ep)
|
|
else:
|
|
endpoints_to_try = [self.base_url] if self.base_url else [None]
|
|
|
|
last_error = None
|
|
for endpoint in endpoints_to_try:
|
|
try:
|
|
print(f" Trying endpoint: {endpoint or 'default (OpenAI)'}")
|
|
if self.is_kimi:
|
|
mapping = self._try_infer_with_endpoint(endpoint, prompt, speakers)
|
|
else:
|
|
# Use default client for OpenAI
|
|
response = self.client.chat.completions.create(
|
|
model=self.model,
|
|
messages=[
|
|
{"role": "system", "content": "You are a helpful assistant that identifies speakers in a dialogue based on context and speaking style."},
|
|
{"role": "user", "content": prompt}
|
|
],
|
|
temperature=0.3
|
|
)
|
|
result = response.choices[0].message.content
|
|
mapping = self._parse_naming_response(result, speakers)
|
|
|
|
print(f" Identified speakers: {mapping}")
|
|
return mapping
|
|
|
|
except Exception as e:
|
|
last_error = str(e)
|
|
print(f" Failed: {last_error[:100]}...")
|
|
continue # Try next endpoint
|
|
|
|
# All endpoints failed
|
|
print(f"\n Warning: Speaker naming failed on all endpoints.")
|
|
print(f" Last error: {last_error}")
|
|
|
|
# Provide helpful guidance for authentication errors
|
|
if "401" in str(last_error) or "403" in str(last_error) or "Invalid Authentication" in str(last_error):
|
|
print("\n === Authentication Error ===")
|
|
print(" Your API key was rejected. Please check:")
|
|
print(" 1. Is OPENAI_API_KEY set to your Kimi API key?")
|
|
print(" Get your key from: https://platform.moonshot.cn/")
|
|
print(" 2. Does your API key have sufficient balance/credits?")
|
|
print(" 3. Kimi Code API (api.kimi.com/coding) requires special access.")
|
|
print(" For regular Kimi, use: export OPENAI_BASE_URL='https://api.moonshot.cn/v1'")
|
|
print(" 4. The script will continue with generic speaker labels (Speaker A, B, etc.)")
|
|
print(" =============================\n")
|
|
|
|
# Fallback to speaker labels
|
|
return {s: f"Speaker {s}" for s in speakers}
|
|
|
|
def _build_naming_prompt(self, speaker_samples: Dict[str, List[str]]) -> str:
|
|
prompt = """I have a transcript from a video featuring the following characters: """ + ", ".join(CHARACTERS) + """.
|
|
|
|
Below are sample utterances from each speaker. Based on the context, speaking style, and content, please identify which character is which speaker.
|
|
|
|
"""
|
|
for speaker, samples in speaker_samples.items():
|
|
prompt += f"Speaker {speaker}:\n"
|
|
for i, sample in enumerate(samples, 1):
|
|
prompt += f" {i}. \"{sample}\"\n"
|
|
prompt += "\n"
|
|
|
|
prompt += """Please respond with a JSON object mapping speaker labels to character names.
|
|
Example: {"A": "Malabar", "B": "Sun", "C": "Jupiter"}
|
|
|
|
Only use the character names from the list provided. If you're unsure, make your best guess based on the speaking style and context."""
|
|
|
|
return prompt
|
|
|
|
def _parse_naming_response(self, response: str, valid_speakers: List[str]) -> Dict[str, str]:
|
|
"""Parse LLM response to extract speaker mapping."""
|
|
# Try to find JSON in the response
|
|
import json
|
|
|
|
# Look for JSON block
|
|
json_match = re.search(r'\{[^}]+\}', response)
|
|
if json_match:
|
|
try:
|
|
mapping = json.loads(json_match.group())
|
|
# Validate keys
|
|
return {k: v for k, v in mapping.items() if k in valid_speakers}
|
|
except json.JSONDecodeError:
|
|
pass
|
|
|
|
# Fallback: look for "Speaker X: Name" pattern
|
|
mapping = {}
|
|
for speaker in valid_speakers:
|
|
pattern = rf'["\']?{speaker}["\']?\s*[:=]\s*["\']?([^"\'\n,]+)'
|
|
match = re.search(pattern, response, re.IGNORECASE)
|
|
if match:
|
|
name = match.group(1).strip()
|
|
# Ensure name is in our character list
|
|
for char in CHARACTERS:
|
|
if char.lower() in name.lower():
|
|
mapping[speaker] = char
|
|
break
|
|
else:
|
|
mapping[speaker] = name
|
|
|
|
# Fill in any missing speakers
|
|
for speaker in valid_speakers:
|
|
if speaker not in mapping:
|
|
mapping[speaker] = f"Speaker {speaker}"
|
|
|
|
return mapping
|
|
|
|
|
|
# ============== Output Formatter ==============
|
|
|
|
class OutputFormatter:
|
|
"""Formats and merges utterances for output."""
|
|
|
|
@staticmethod
|
|
def is_non_word(text: str) -> bool:
|
|
"""Check if text is a non-word utterance (sound, modal particle)."""
|
|
return bool(NON_WORD_REGEX.match(text.strip()))
|
|
|
|
@classmethod
|
|
def merge_utterances(cls, utterances: List[Utterance]) -> List[Utterance]:
|
|
"""
|
|
Merge consecutive utterances from the same speaker that are non-words
|
|
with adjacent meaningful utterances.
|
|
"""
|
|
if not utterances:
|
|
return []
|
|
|
|
merged = []
|
|
i = 0
|
|
|
|
while i < len(utterances):
|
|
current = utterances[i]
|
|
|
|
# Check if current is a non-word
|
|
if cls.is_non_word(current.text):
|
|
# Look ahead to find the next meaningful utterance from same speaker
|
|
j = i + 1
|
|
while j < len(utterances) and utterances[j].speaker == current.speaker:
|
|
if not cls.is_non_word(utterances[j].text):
|
|
# Merge current into the next meaningful one
|
|
utterances[j].text = current.text + " " + utterances[j].text
|
|
utterances[j].start_ms = current.start_ms
|
|
i = j
|
|
break
|
|
j += 1
|
|
else:
|
|
# No meaningful utterance found, keep as is
|
|
merged.append(current)
|
|
i += 1
|
|
else:
|
|
# Check if previous was a non-word from same speaker
|
|
if merged and merged[-1].speaker == current.speaker and cls.is_non_word(merged[-1].text):
|
|
# Merge previous into current
|
|
current.text = merged[-1].text + " " + current.text
|
|
current.start_ms = merged[-1].start_ms
|
|
merged.pop()
|
|
|
|
merged.append(current)
|
|
i += 1
|
|
|
|
return merged
|
|
|
|
@classmethod
|
|
def format_output(cls, utterances: List[Utterance], speaker_mapping: Dict[str, str]) -> str:
|
|
"""Format utterances to output string."""
|
|
# Apply speaker names
|
|
for utt in utterances:
|
|
utt.inferred_name = speaker_mapping.get(utt.speaker, f"Speaker {utt.speaker}")
|
|
|
|
# Merge consecutive non-words
|
|
merged = cls.merge_utterances(utterances)
|
|
|
|
# Format lines
|
|
lines = []
|
|
for utt in merged:
|
|
# Skip standalone non-words unless they're at the end
|
|
if cls.is_non_word(utt.text) and utt != merged[-1]:
|
|
continue
|
|
|
|
speaker_name = utt.inferred_name or f"Speaker {utt.speaker}"
|
|
lines.append(f"{utt.timestamp}({speaker_name}) {utt.text}")
|
|
|
|
return '\n'.join(lines)
|
|
|
|
|
|
# ============== Main Processor ==============
|
|
|
|
class EpisodeProcessor:
|
|
"""Main processor that orchestrates transcription and naming."""
|
|
|
|
def __init__(self):
|
|
# Check API keys
|
|
assembly_key = os.getenv("ASSEMBLYAI_API_KEY")
|
|
openai_key = os.getenv("OPENAI_API_KEY")
|
|
|
|
if not assembly_key:
|
|
raise ValueError("ASSEMBLYAI_API_KEY environment variable is required")
|
|
if not openai_key:
|
|
raise ValueError("OPENAI_API_KEY environment variable is required")
|
|
|
|
self.transcriber = AssemblyAITranscriber(assembly_key)
|
|
|
|
# Get LLM configuration from environment
|
|
openai_base = os.getenv("OPENAI_BASE_URL", DEFAULT_LLM_BASE_URL)
|
|
llm_model = os.getenv("LLM_MODEL") # e.g., "kimi-for-coding"
|
|
|
|
self.namer = SpeakerNamer(openai_key, openai_base, llm_model)
|
|
|
|
self.progress = ProgressManager()
|
|
self.formatter = OutputFormatter()
|
|
|
|
# Ensure output directory exists
|
|
OUTPUT_DIR.mkdir(exist_ok=True)
|
|
|
|
def process_episode(self, video_file: Path) -> bool:
|
|
"""Process a single episode. Returns True on success."""
|
|
filename = video_file.name
|
|
|
|
# Check if already completed
|
|
if self.progress.is_completed(filename):
|
|
print(f"Skipping {filename} (already completed)")
|
|
return True
|
|
|
|
print(f"\n{'='*50}")
|
|
print(f"Processing: {filename}")
|
|
print(f"{'='*50}")
|
|
|
|
try:
|
|
# Step 1: Transcription
|
|
self.progress.set_status(filename, "transcribing")
|
|
utterances = self.transcriber.transcribe(video_file)
|
|
|
|
if not utterances:
|
|
print(f" No utterances found in {filename}")
|
|
self.progress.set_status(filename, "completed", output_file=None)
|
|
return True
|
|
|
|
print(f" Found {len(utterances)} utterances")
|
|
|
|
# Step 2: Speaker naming
|
|
self.progress.set_status(filename, "naming")
|
|
speaker_mapping = self.namer.infer_speakers(utterances)
|
|
|
|
# Step 3: Format and save
|
|
output_text = self.formatter.format_output(utterances, speaker_mapping)
|
|
|
|
output_filename = video_file.stem + ".txt"
|
|
output_path = OUTPUT_DIR / output_filename
|
|
|
|
with open(output_path, 'w', encoding='utf-8') as f:
|
|
f.write(output_text)
|
|
|
|
self.progress.set_status(filename, "completed", output_file=str(output_path))
|
|
print(f" Saved to: {output_path}")
|
|
|
|
return True
|
|
|
|
except Exception as e:
|
|
error_msg = str(e)
|
|
self.progress.set_status(filename, "error", error_message=error_msg)
|
|
print(f" ERROR: {error_msg}")
|
|
return False
|
|
|
|
def process_all(self):
|
|
"""Process all video files in episodes directory."""
|
|
# Find all video files
|
|
video_extensions = {'.mp4', '.mkv', '.avi', '.mov', '.webm'}
|
|
video_files = [
|
|
f for f in EPISODES_DIR.iterdir()
|
|
if f.is_file() and f.suffix.lower() in video_extensions
|
|
]
|
|
|
|
if not video_files:
|
|
print(f"No video files found in {EPISODES_DIR}")
|
|
return
|
|
|
|
print(f"Found {len(video_files)} video file(s) to process")
|
|
|
|
success_count = 0
|
|
fail_count = 0
|
|
|
|
for video_file in sorted(video_files):
|
|
if self.process_episode(video_file):
|
|
success_count += 1
|
|
else:
|
|
fail_count += 1
|
|
|
|
print(f"\n{'='*50}")
|
|
print(f"Processing complete: {success_count} succeeded, {fail_count} failed")
|
|
print(f"Transcripts saved to: {OUTPUT_DIR}")
|
|
print(f"Progress tracked in: {PROGRESS_FILE}")
|
|
|
|
|
|
# ============== CLI ==============
|
|
|
|
def print_usage():
|
|
print("""
|
|
Usage: uv run transcribe_episodes.py [command]
|
|
|
|
Commands:
|
|
(none) Process all episodes
|
|
reset Reset progress for all files (will re-process everything)
|
|
reset <file> Reset progress for specific file
|
|
status Show current progress status
|
|
test-llm Test LLM API connection (diagnostic)
|
|
|
|
Environment Variables:
|
|
ASSEMBLYAI_API_KEY Required - Your AssemblyAI API key
|
|
OPENAI_API_KEY Required - Your OpenAI/Kimi API key
|
|
OPENAI_BASE_URL Optional - API endpoint URL
|
|
- Regular Kimi: https://api.moonshot.cn/v1
|
|
- Kimi Code: https://api.kimi.com/coding/v1
|
|
LLM_MODEL Optional - Model name (e.g., "kimi-for-coding")
|
|
|
|
Examples:
|
|
# Regular Kimi
|
|
export ASSEMBLYAI_API_KEY="your-assembly-key"
|
|
export OPENAI_API_KEY="your-kimi-key"
|
|
uv run transcribe_episodes.py
|
|
|
|
# Kimi Code
|
|
export ASSEMBLYAI_API_KEY="your-assembly-key"
|
|
export OPENAI_API_KEY="your-kimi-code-key"
|
|
export OPENAI_BASE_URL="https://api.kimi.com/coding/v1"
|
|
export LLM_MODEL="kimi-for-coding"
|
|
uv run transcribe_episodes.py
|
|
|
|
# Test connection:
|
|
uv run transcribe_episodes.py test-llm
|
|
""")
|
|
|
|
|
|
def test_llm_connection():
|
|
"""Test LLM API connection and print diagnostic info."""
|
|
api_key = os.getenv("OPENAI_API_KEY")
|
|
base_url = os.getenv("OPENAI_BASE_URL", DEFAULT_LLM_BASE_URL)
|
|
model = os.getenv("LLM_MODEL") # e.g., "kimi-for-coding"
|
|
|
|
print("=" * 50)
|
|
print("LLM API Connection Test")
|
|
print("=" * 50)
|
|
print(f"\nConfiguration:")
|
|
print(f" Base URL: {base_url}")
|
|
print(f" Model: {model or '(auto-detect)'}")
|
|
print(f" API Key: {'Set (starts with ' + api_key[:12] + '...)' if api_key else 'NOT SET'}")
|
|
|
|
if not api_key:
|
|
print("\n❌ ERROR: OPENAI_API_KEY environment variable is not set!")
|
|
print("\nTo fix:")
|
|
print(" export OPENAI_API_KEY='your-api-key-here'")
|
|
return
|
|
|
|
# Try connecting
|
|
print(f"\nTesting connection...")
|
|
|
|
is_kimi = "moonshot" in base_url or "kimi" in base_url
|
|
if not model:
|
|
model = "kimi-latest" if is_kimi else "gpt-4o-mini"
|
|
|
|
endpoints_to_try = [base_url]
|
|
|
|
for endpoint in endpoints_to_try:
|
|
print(f"\n Trying endpoint: {endpoint}")
|
|
print(f" Model: {model}")
|
|
try:
|
|
client = OpenAI(api_key=api_key, base_url=endpoint)
|
|
|
|
response = client.chat.completions.create(
|
|
model=model,
|
|
messages=[{"role": "user", "content": "Say 'Hello' and nothing else."}],
|
|
max_tokens=10
|
|
)
|
|
|
|
print(f" ✅ SUCCESS!")
|
|
print(f" Response: '{response.choices[0].message.content}'")
|
|
print(f"\n✅ LLM API is working correctly!")
|
|
return
|
|
|
|
except Exception as e:
|
|
error_str = str(e)
|
|
print(f" ❌ Failed: {error_str}")
|
|
|
|
if "401" in error_str or "403" in error_str:
|
|
print("\n Authentication Error - Possible causes:")
|
|
print(" 1. API key is incorrect or has been revoked")
|
|
print(" 2. API key has no credits remaining")
|
|
print(" 3. Wrong API key for the selected service")
|
|
print(" 4. Kimi Code API requires special access (only for Coding Agents)")
|
|
print("\n To fix:")
|
|
print(" - For regular Kimi, use: export OPENAI_BASE_URL='https://api.moonshot.cn/v1'")
|
|
print(" - For Kimi Code: Ensure your account has Coding Agent access")
|
|
print(" - Get a regular Kimi API key from: https://platform.moonshot.cn/")
|
|
|
|
print("\n❌ All endpoints failed. Please check your API key and try again.")
|
|
|
|
|
|
def show_status():
|
|
"""Show current progress status."""
|
|
progress = ProgressManager()
|
|
|
|
print(f"\nProgress Status ({PROGRESS_FILE}):")
|
|
print("-" * 50)
|
|
|
|
if not progress.progress:
|
|
print("No progress recorded yet.")
|
|
return
|
|
|
|
for filename, data in sorted(progress.progress.items()):
|
|
status = data.get('status', 'unknown')
|
|
output = data.get('output_file', '-')
|
|
error = data.get('error', '')
|
|
|
|
status_icon = "✓" if status == "completed" else "✗" if status == "error" else "⋯"
|
|
print(f"{status_icon} {filename}: {status}")
|
|
if output and status == "completed":
|
|
print(f" Output: {output}")
|
|
if error:
|
|
print(f" Error: {error}")
|
|
|
|
|
|
def main():
|
|
args = sys.argv[1:]
|
|
|
|
if len(args) > 0:
|
|
command = args[0]
|
|
|
|
if command == "help" or command == "--help" or command == "-h":
|
|
print_usage()
|
|
return
|
|
|
|
elif command == "status":
|
|
show_status()
|
|
return
|
|
|
|
elif command == "reset":
|
|
progress = ProgressManager()
|
|
if len(args) > 1:
|
|
target = args[1]
|
|
progress.reset(target)
|
|
print(f"Reset progress for: {target}")
|
|
else:
|
|
progress.reset()
|
|
print("Reset all progress")
|
|
return
|
|
|
|
elif command == "test-llm":
|
|
test_llm_connection()
|
|
return
|
|
|
|
else:
|
|
print(f"Unknown command: {command}")
|
|
print_usage()
|
|
return
|
|
|
|
# Default: process all episodes
|
|
try:
|
|
processor = EpisodeProcessor()
|
|
processor.process_all()
|
|
except ValueError as e:
|
|
print(f"Error: {e}")
|
|
print("\nPlease set the required environment variables:")
|
|
print(" export ASSEMBLYAI_API_KEY='your-key'")
|
|
print(" export OPENAI_API_KEY='your-key'")
|
|
print("\nOptional (for Kimi Code):")
|
|
print(" export OPENAI_BASE_URL='https://api.kimi.com/coding/v1'")
|
|
print(" export LLM_MODEL='kimi-for-coding'")
|
|
print("\nFor regular Kimi, the base URL defaults to https://api.moonshot.cn/v1")
|
|
sys.exit(1)
|
|
except Exception as e:
|
|
print(f"Unexpected error: {e}")
|
|
import traceback
|
|
traceback.print_exc()
|
|
sys.exit(1)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|