Files
malabar/debug_llm.py
2026-03-04 17:02:59 +08:00

236 lines
7.4 KiB
Python

#!/usr/bin/env python3
"""
Debug tool: Replay a request from a debug file.
Usage:
uv run debug_llm.py _speakers_debug/S01E01_lines_step1.txt
This will:
1. Read the request from the debug file
2. Use the same model/endpoint as Step 3
3. Send the request
4. Print the raw response to stdout
"""
import os
import re
import sys
import json
from pathlib import Path
from openai import OpenAI
def get_llm_config():
"""Get LLM configuration from environment (same as Step 3)."""
api_key = os.getenv("OPENAI_API_KEY")
if not api_key:
raise ValueError("OPENAI_API_KEY environment variable is required")
base_url = os.getenv("OPENAI_BASE_URL")
model = os.getenv("LLM_MODEL", "glm-4.5-air")
return base_url, model, api_key
def extract_request_from_debug(debug_path: Path) -> str:
"""Extract the request section from a debug file."""
with open(debug_path, 'r', encoding='utf-8') as f:
content = f.read()
# Find the request section
# Pattern: ========\nREQUEST:\n========\n\n<content>\n\n========\nRESPONSE:
match = re.search(
r'={10,}\s*\nREQUEST:\s*\n={10,}\s*\n\n(.*?)\n\n={10,}\s*\nRESPONSE:',
content,
re.DOTALL
)
if match:
return match.group(1).strip()
# Fallback: try simpler pattern
lines = content.split('\n')
in_request = False
request_lines = []
for line in lines:
if 'REQUEST:' in line and '=' in lines[lines.index(line) - 1] if lines.index(line) > 0 else False:
in_request = True
continue
if in_request:
if line.startswith('=' * 10) and 'RESPONSE:' in lines[lines.index(line) + 1] if lines.index(line) + 1 < len(lines) else False:
break
request_lines.append(line)
return '\n'.join(request_lines).strip()
def send_request(prompt: str, model: str, base_url: str, api_key: str, disable_thinking: bool = False):
"""Send request to LLM and return raw response."""
client = OpenAI(api_key=api_key, base_url=base_url)
# Add explicit instruction (same as Step 3)
full_prompt = prompt + "\n\nIMPORTANT: Reply with ONLY the answer. Do NOT write any explanation, thinking, or analysis. Just output the answer."
# Build the request payload (for display)
request_payload = {
"model": model,
"messages": [
{"role": "user", "content": full_prompt}
],
"temperature": 0.0,
"max_tokens": 300 # Allow full response
}
# Add thinking parameter if disabling
if disable_thinking:
request_payload["thinking"] = {"type": "disabled"}
print("=" * 60)
print("REQUEST TO SERVER:")
print("=" * 60)
print(f"URL: {base_url}/chat/completions")
print(f"Headers: {{")
print(f" 'Authorization': 'Bearer ***{api_key[-8:]}',")
print(f" 'Content-Type': 'application/json'")
print(f"}}")
print(f"\nBody:")
print(json.dumps(request_payload, ensure_ascii=False, indent=2))
print("=" * 60)
# Build API call parameters
api_params = {
"model": model,
"messages": [
{"role": "user", "content": full_prompt}
],
"temperature": 0.0,
"max_tokens": 300 # Allow full response
}
# For GLM API, use extra_body for non-standard parameters
if disable_thinking:
api_params["extra_body"] = {"thinking": {"type": "disabled"}}
response = client.chat.completions.create(**api_params)
message = response.choices[0].message
# Check both content and reasoning_content
result = {}
result['content'] = message.content or ""
if hasattr(message, 'reasoning_content') and message.reasoning_content:
result['reasoning_content'] = message.reasoning_content
else:
result['reasoning_content'] = ""
return result, response
def main():
# Parse arguments
args = sys.argv[1:]
disable_thinking = "-nt" in args or "--no-thinking" in args
# Remove flags from args to find the file path
file_args = [a for a in args if not a.startswith("-")]
if len(file_args) < 1:
print("Usage: uv run debug_llm.py [-nt] <debug_file_path>")
print(" -nt, --no-thinking Disable model thinking")
print("Example: uv run debug_llm.py _speakers_debug/S01E01_lines_step1.txt")
print(" uv run debug_llm.py -nt _speakers_debug/S01E01_lines_step1.txt")
sys.exit(1)
debug_path = Path(file_args[0])
if not debug_path.exists():
print(f"Error: File not found: {debug_path}")
sys.exit(1)
# Get config
base_url, model, api_key = get_llm_config()
print(f"Configuration:")
print(f" Model: {model}")
print(f" Endpoint: {base_url or 'OpenAI default'}")
print(f" Debug file: {debug_path}")
print(f" Disable thinking: {disable_thinking}")
# Extract request
request = extract_request_from_debug(debug_path)
if not request:
print("Error: Could not extract request from debug file")
sys.exit(1)
print("\n" + "=" * 60)
print("EXTRACTED PROMPT (from debug file):")
print("=" * 60)
print(request)
print("=" * 60)
# Send request
print("\nSending request to server...")
result, raw_response = send_request(request, model, base_url, api_key, disable_thinking=disable_thinking)
print("\n" + "=" * 60)
print("RAW RESPONSE OBJECT:")
print("=" * 60)
print(f"\nType: {type(raw_response)}")
print(f"\nDir: {[x for x in dir(raw_response) if not x.startswith('_')]}")
print(f"\nChoices: {len(raw_response.choices)}")
if raw_response.choices:
choice = raw_response.choices[0]
print(f"\nChoice 0:")
print(f" finish_reason: {choice.finish_reason}")
print(f" index: {choice.index}")
message = choice.message
print(f"\n message:")
print(f" type: {type(message)}")
print(f" dir: {[x for x in dir(message) if not x.startswith('_')]}")
print(f" content: {repr(message.content)}")
if hasattr(message, 'reasoning_content'):
print(f" reasoning_content: {repr(getattr(message, 'reasoning_content', None))}")
else:
print(f" reasoning_content: (attribute not present)")
if hasattr(message, 'role'):
print(f" role: {message.role}")
print(f"\nUsage:")
if hasattr(raw_response, 'usage') and raw_response.usage:
usage = raw_response.usage
print(f" completion_tokens: {getattr(usage, 'completion_tokens', 'N/A')}")
print(f" prompt_tokens: {getattr(usage, 'prompt_tokens', 'N/A')}")
print(f" total_tokens: {getattr(usage, 'total_tokens', 'N/A')}")
else:
print(" (not available)")
print(f"\nModel: {getattr(raw_response, 'model', 'N/A')}")
print(f"Object: {getattr(raw_response, 'object', 'N/A')}")
print(f"Created: {getattr(raw_response, 'created', 'N/A')}")
print(f"ID: {getattr(raw_response, 'id', 'N/A')}")
print("\n" + "=" * 60)
print("PARSED RESULT:")
print("=" * 60)
if result['reasoning_content']:
print("\n[reasoning_content]:")
print(result['reasoning_content'])
print("\n[content]:")
print(result['content'] if result['content'] else "(empty)")
print("\n" + "=" * 60)
if __name__ == "__main__":
main()