#!/usr/bin/env python3 """ Debug tool: Replay a request from a debug file. Usage: uv run debug_llm.py _speakers_debug/S01E01_lines_step1.txt This will: 1. Read the request from the debug file 2. Use the same model/endpoint as Step 3 3. Send the request 4. Print the raw response to stdout """ import os import re import sys import json from pathlib import Path from openai import OpenAI def get_llm_config(): """Get LLM configuration from environment (same as Step 3).""" api_key = os.getenv("OPENAI_API_KEY") if not api_key: raise ValueError("OPENAI_API_KEY environment variable is required") base_url = os.getenv("OPENAI_BASE_URL") model = os.getenv("LLM_MODEL", "glm-4.5-air") return base_url, model, api_key def extract_request_from_debug(debug_path: Path) -> str: """Extract the request section from a debug file.""" with open(debug_path, 'r', encoding='utf-8') as f: content = f.read() # Find the request section # Pattern: ========\nREQUEST:\n========\n\n\n\n========\nRESPONSE: match = re.search( r'={10,}\s*\nREQUEST:\s*\n={10,}\s*\n\n(.*?)\n\n={10,}\s*\nRESPONSE:', content, re.DOTALL ) if match: return match.group(1).strip() # Fallback: try simpler pattern lines = content.split('\n') in_request = False request_lines = [] for line in lines: if 'REQUEST:' in line and '=' in lines[lines.index(line) - 1] if lines.index(line) > 0 else False: in_request = True continue if in_request: if line.startswith('=' * 10) and 'RESPONSE:' in lines[lines.index(line) + 1] if lines.index(line) + 1 < len(lines) else False: break request_lines.append(line) return '\n'.join(request_lines).strip() def send_request(prompt: str, model: str, base_url: str, api_key: str, disable_thinking: bool = False): """Send request to LLM and return raw response.""" client = OpenAI(api_key=api_key, base_url=base_url) # Add explicit instruction (same as Step 3) full_prompt = prompt + "\n\nIMPORTANT: Reply with ONLY the answer. Do NOT write any explanation, thinking, or analysis. Just output the answer." # Build the request payload (for display) request_payload = { "model": model, "messages": [ {"role": "user", "content": full_prompt} ], "temperature": 0.0, "max_tokens": 300 # Allow full response } # Add thinking parameter if disabling if disable_thinking: request_payload["thinking"] = {"type": "disabled"} print("=" * 60) print("REQUEST TO SERVER:") print("=" * 60) print(f"URL: {base_url}/chat/completions") print(f"Headers: {{") print(f" 'Authorization': 'Bearer ***{api_key[-8:]}',") print(f" 'Content-Type': 'application/json'") print(f"}}") print(f"\nBody:") print(json.dumps(request_payload, ensure_ascii=False, indent=2)) print("=" * 60) # Build API call parameters api_params = { "model": model, "messages": [ {"role": "user", "content": full_prompt} ], "temperature": 0.0, "max_tokens": 300 # Allow full response } # For GLM API, use extra_body for non-standard parameters if disable_thinking: api_params["extra_body"] = {"thinking": {"type": "disabled"}} response = client.chat.completions.create(**api_params) message = response.choices[0].message # Check both content and reasoning_content result = {} result['content'] = message.content or "" if hasattr(message, 'reasoning_content') and message.reasoning_content: result['reasoning_content'] = message.reasoning_content else: result['reasoning_content'] = "" return result, response def main(): # Parse arguments args = sys.argv[1:] disable_thinking = "-nt" in args or "--no-thinking" in args # Remove flags from args to find the file path file_args = [a for a in args if not a.startswith("-")] if len(file_args) < 1: print("Usage: uv run debug_llm.py [-nt] ") print(" -nt, --no-thinking Disable model thinking") print("Example: uv run debug_llm.py _speakers_debug/S01E01_lines_step1.txt") print(" uv run debug_llm.py -nt _speakers_debug/S01E01_lines_step1.txt") sys.exit(1) debug_path = Path(file_args[0]) if not debug_path.exists(): print(f"Error: File not found: {debug_path}") sys.exit(1) # Get config base_url, model, api_key = get_llm_config() print(f"Configuration:") print(f" Model: {model}") print(f" Endpoint: {base_url or 'OpenAI default'}") print(f" Debug file: {debug_path}") print(f" Disable thinking: {disable_thinking}") # Extract request request = extract_request_from_debug(debug_path) if not request: print("Error: Could not extract request from debug file") sys.exit(1) print("\n" + "=" * 60) print("EXTRACTED PROMPT (from debug file):") print("=" * 60) print(request) print("=" * 60) # Send request print("\nSending request to server...") result, raw_response = send_request(request, model, base_url, api_key, disable_thinking=disable_thinking) print("\n" + "=" * 60) print("RAW RESPONSE OBJECT:") print("=" * 60) print(f"\nType: {type(raw_response)}") print(f"\nDir: {[x for x in dir(raw_response) if not x.startswith('_')]}") print(f"\nChoices: {len(raw_response.choices)}") if raw_response.choices: choice = raw_response.choices[0] print(f"\nChoice 0:") print(f" finish_reason: {choice.finish_reason}") print(f" index: {choice.index}") message = choice.message print(f"\n message:") print(f" type: {type(message)}") print(f" dir: {[x for x in dir(message) if not x.startswith('_')]}") print(f" content: {repr(message.content)}") if hasattr(message, 'reasoning_content'): print(f" reasoning_content: {repr(getattr(message, 'reasoning_content', None))}") else: print(f" reasoning_content: (attribute not present)") if hasattr(message, 'role'): print(f" role: {message.role}") print(f"\nUsage:") if hasattr(raw_response, 'usage') and raw_response.usage: usage = raw_response.usage print(f" completion_tokens: {getattr(usage, 'completion_tokens', 'N/A')}") print(f" prompt_tokens: {getattr(usage, 'prompt_tokens', 'N/A')}") print(f" total_tokens: {getattr(usage, 'total_tokens', 'N/A')}") else: print(" (not available)") print(f"\nModel: {getattr(raw_response, 'model', 'N/A')}") print(f"Object: {getattr(raw_response, 'object', 'N/A')}") print(f"Created: {getattr(raw_response, 'created', 'N/A')}") print(f"ID: {getattr(raw_response, 'id', 'N/A')}") print("\n" + "=" * 60) print("PARSED RESULT:") print("=" * 60) if result['reasoning_content']: print("\n[reasoning_content]:") print(result['reasoning_content']) print("\n[content]:") print(result['content'] if result['content'] else "(empty)") print("\n" + "=" * 60) if __name__ == "__main__": main()