fix speaker recognize

2026-03-04 17:02:59 +08:00
parent e085eeddbc
commit 2395c048ff
18 changed files with 652 additions and 5794 deletions
--- a/debug_llm.py
+++ b/debug_llm.py
@@ -0,0 +1,235 @@
+#!/usr/bin/env python3
+"""
+Debug tool: Replay a request from a debug file.
+
+Usage:
+    uv run debug_llm.py _speakers_debug/S01E01_lines_step1.txt
+
+This will:
+1. Read the request from the debug file
+2. Use the same model/endpoint as Step 3
+3. Send the request
+4. Print the raw response to stdout
+"""
+
+import os
+import re
+import sys
+import json
+from pathlib import Path
+from openai import OpenAI
+
+
+def get_llm_config():
+    """Get LLM configuration from environment (same as Step 3)."""
+    api_key = os.getenv("OPENAI_API_KEY")
+    if not api_key:
+        raise ValueError("OPENAI_API_KEY environment variable is required")
+    
+    base_url = os.getenv("OPENAI_BASE_URL")
+    model = os.getenv("LLM_MODEL", "glm-4.5-air")
+    
+    return base_url, model, api_key
+
+
+def extract_request_from_debug(debug_path: Path) -> str:
+    """Extract the request section from a debug file."""
+    with open(debug_path, 'r', encoding='utf-8') as f:
+        content = f.read()
+    
+    # Find the request section
+    # Pattern: ========\nREQUEST:\n========\n\n<content>\n\n========\nRESPONSE:
+    match = re.search(
+        r'={10,}\s*\nREQUEST:\s*\n={10,}\s*\n\n(.*?)\n\n={10,}\s*\nRESPONSE:',
+        content,
+        re.DOTALL
+    )
+    
+    if match:
+        return match.group(1).strip()
+    
+    # Fallback: try simpler pattern
+    lines = content.split('\n')
+    in_request = False
+    request_lines = []
+    
+    for line in lines:
+        if 'REQUEST:' in line and '=' in lines[lines.index(line) - 1] if lines.index(line) > 0 else False:
+            in_request = True
+            continue
+        if in_request:
+            if line.startswith('=' * 10) and 'RESPONSE:' in lines[lines.index(line) + 1] if lines.index(line) + 1 < len(lines) else False:
+                break
+            request_lines.append(line)
+    
+    return '\n'.join(request_lines).strip()
+
+
+def send_request(prompt: str, model: str, base_url: str, api_key: str, disable_thinking: bool = False):
+    """Send request to LLM and return raw response."""
+    client = OpenAI(api_key=api_key, base_url=base_url)
+    
+    # Add explicit instruction (same as Step 3)
+    full_prompt = prompt + "\n\nIMPORTANT: Reply with ONLY the answer. Do NOT write any explanation, thinking, or analysis. Just output the answer."
+    
+    # Build the request payload (for display)
+    request_payload = {
+        "model": model,
+        "messages": [
+            {"role": "user", "content": full_prompt}
+        ],
+        "temperature": 0.0,
+        "max_tokens": 300  # Allow full response
+    }
+    
+    # Add thinking parameter if disabling
+    if disable_thinking:
+        request_payload["thinking"] = {"type": "disabled"}
+    
+    print("=" * 60)
+    print("REQUEST TO SERVER:")
+    print("=" * 60)
+    print(f"URL: {base_url}/chat/completions")
+    print(f"Headers: {{")
+    print(f"  'Authorization': 'Bearer ***{api_key[-8:]}',")
+    print(f"  'Content-Type': 'application/json'")
+    print(f"}}")
+    print(f"\nBody:")
+    print(json.dumps(request_payload, ensure_ascii=False, indent=2))
+    print("=" * 60)
+    
+    # Build API call parameters
+    api_params = {
+        "model": model,
+        "messages": [
+            {"role": "user", "content": full_prompt}
+        ],
+        "temperature": 0.0,
+        "max_tokens": 300  # Allow full response
+    }
+    
+    # For GLM API, use extra_body for non-standard parameters
+    if disable_thinking:
+        api_params["extra_body"] = {"thinking": {"type": "disabled"}}
+    
+    response = client.chat.completions.create(**api_params)
+    
+    message = response.choices[0].message
+    
+    # Check both content and reasoning_content
+    result = {}
+    result['content'] = message.content or ""
+    
+    if hasattr(message, 'reasoning_content') and message.reasoning_content:
+        result['reasoning_content'] = message.reasoning_content
+    else:
+        result['reasoning_content'] = ""
+    
+    return result, response
+
+
+def main():
+    # Parse arguments
+    args = sys.argv[1:]
+    disable_thinking = "-nt" in args or "--no-thinking" in args
+    
+    # Remove flags from args to find the file path
+    file_args = [a for a in args if not a.startswith("-")]
+    
+    if len(file_args) < 1:
+        print("Usage: uv run debug_llm.py [-nt] <debug_file_path>")
+        print("  -nt, --no-thinking  Disable model thinking")
+        print("Example: uv run debug_llm.py _speakers_debug/S01E01_lines_step1.txt")
+        print("         uv run debug_llm.py -nt _speakers_debug/S01E01_lines_step1.txt")
+        sys.exit(1)
+    
+    debug_path = Path(file_args[0])
+    
+    if not debug_path.exists():
+        print(f"Error: File not found: {debug_path}")
+        sys.exit(1)
+    
+    # Get config
+    base_url, model, api_key = get_llm_config()
+    
+    print(f"Configuration:")
+    print(f"  Model: {model}")
+    print(f"  Endpoint: {base_url or 'OpenAI default'}")
+    print(f"  Debug file: {debug_path}")
+    print(f"  Disable thinking: {disable_thinking}")
+    
+    # Extract request
+    request = extract_request_from_debug(debug_path)
+    
+    if not request:
+        print("Error: Could not extract request from debug file")
+        sys.exit(1)
+    
+    print("\n" + "=" * 60)
+    print("EXTRACTED PROMPT (from debug file):")
+    print("=" * 60)
+    print(request)
+    print("=" * 60)
+    
+    # Send request
+    print("\nSending request to server...")
+    
+    result, raw_response = send_request(request, model, base_url, api_key, disable_thinking=disable_thinking)
+    
+    print("\n" + "=" * 60)
+    print("RAW RESPONSE OBJECT:")
+    print("=" * 60)
+    print(f"\nType: {type(raw_response)}")
+    print(f"\nDir: {[x for x in dir(raw_response) if not x.startswith('_')]}")
+    print(f"\nChoices: {len(raw_response.choices)}")
+    
+    if raw_response.choices:
+        choice = raw_response.choices[0]
+        print(f"\nChoice 0:")
+        print(f"  finish_reason: {choice.finish_reason}")
+        print(f"  index: {choice.index}")
+        
+        message = choice.message
+        print(f"\n  message:")
+        print(f"    type: {type(message)}")
+        print(f"    dir: {[x for x in dir(message) if not x.startswith('_')]}")
+        print(f"    content: {repr(message.content)}")
+        
+        if hasattr(message, 'reasoning_content'):
+            print(f"    reasoning_content: {repr(getattr(message, 'reasoning_content', None))}")
+        else:
+            print(f"    reasoning_content: (attribute not present)")
+        
+        if hasattr(message, 'role'):
+            print(f"    role: {message.role}")
+    
+    print(f"\nUsage:")
+    if hasattr(raw_response, 'usage') and raw_response.usage:
+        usage = raw_response.usage
+        print(f"  completion_tokens: {getattr(usage, 'completion_tokens', 'N/A')}")
+        print(f"  prompt_tokens: {getattr(usage, 'prompt_tokens', 'N/A')}")
+        print(f"  total_tokens: {getattr(usage, 'total_tokens', 'N/A')}")
+    else:
+        print("  (not available)")
+    
+    print(f"\nModel: {getattr(raw_response, 'model', 'N/A')}")
+    print(f"Object: {getattr(raw_response, 'object', 'N/A')}")
+    print(f"Created: {getattr(raw_response, 'created', 'N/A')}")
+    print(f"ID: {getattr(raw_response, 'id', 'N/A')}")
+    
+    print("\n" + "=" * 60)
+    print("PARSED RESULT:")
+    print("=" * 60)
+    
+    if result['reasoning_content']:
+        print("\n[reasoning_content]:")
+        print(result['reasoning_content'])
+    
+    print("\n[content]:")
+    print(result['content'] if result['content'] else "(empty)")
+    
+    print("\n" + "=" * 60)
+
+
+if __name__ == "__main__":
+    main()