236 lines
7.4 KiB
Python
236 lines
7.4 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Debug tool: Replay a request from a debug file.
|
|
|
|
Usage:
|
|
uv run debug_llm.py _speakers_debug/S01E01_lines_step1.txt
|
|
|
|
This will:
|
|
1. Read the request from the debug file
|
|
2. Use the same model/endpoint as Step 3
|
|
3. Send the request
|
|
4. Print the raw response to stdout
|
|
"""
|
|
|
|
import os
|
|
import re
|
|
import sys
|
|
import json
|
|
from pathlib import Path
|
|
from openai import OpenAI
|
|
|
|
|
|
def get_llm_config():
|
|
"""Get LLM configuration from environment (same as Step 3)."""
|
|
api_key = os.getenv("OPENAI_API_KEY")
|
|
if not api_key:
|
|
raise ValueError("OPENAI_API_KEY environment variable is required")
|
|
|
|
base_url = os.getenv("OPENAI_BASE_URL")
|
|
model = os.getenv("LLM_MODEL", "glm-4.5-air")
|
|
|
|
return base_url, model, api_key
|
|
|
|
|
|
def extract_request_from_debug(debug_path: Path) -> str:
|
|
"""Extract the request section from a debug file."""
|
|
with open(debug_path, 'r', encoding='utf-8') as f:
|
|
content = f.read()
|
|
|
|
# Find the request section
|
|
# Pattern: ========\nREQUEST:\n========\n\n<content>\n\n========\nRESPONSE:
|
|
match = re.search(
|
|
r'={10,}\s*\nREQUEST:\s*\n={10,}\s*\n\n(.*?)\n\n={10,}\s*\nRESPONSE:',
|
|
content,
|
|
re.DOTALL
|
|
)
|
|
|
|
if match:
|
|
return match.group(1).strip()
|
|
|
|
# Fallback: try simpler pattern
|
|
lines = content.split('\n')
|
|
in_request = False
|
|
request_lines = []
|
|
|
|
for line in lines:
|
|
if 'REQUEST:' in line and '=' in lines[lines.index(line) - 1] if lines.index(line) > 0 else False:
|
|
in_request = True
|
|
continue
|
|
if in_request:
|
|
if line.startswith('=' * 10) and 'RESPONSE:' in lines[lines.index(line) + 1] if lines.index(line) + 1 < len(lines) else False:
|
|
break
|
|
request_lines.append(line)
|
|
|
|
return '\n'.join(request_lines).strip()
|
|
|
|
|
|
def send_request(prompt: str, model: str, base_url: str, api_key: str, disable_thinking: bool = False):
|
|
"""Send request to LLM and return raw response."""
|
|
client = OpenAI(api_key=api_key, base_url=base_url)
|
|
|
|
# Add explicit instruction (same as Step 3)
|
|
full_prompt = prompt + "\n\nIMPORTANT: Reply with ONLY the answer. Do NOT write any explanation, thinking, or analysis. Just output the answer."
|
|
|
|
# Build the request payload (for display)
|
|
request_payload = {
|
|
"model": model,
|
|
"messages": [
|
|
{"role": "user", "content": full_prompt}
|
|
],
|
|
"temperature": 0.0,
|
|
"max_tokens": 300 # Allow full response
|
|
}
|
|
|
|
# Add thinking parameter if disabling
|
|
if disable_thinking:
|
|
request_payload["thinking"] = {"type": "disabled"}
|
|
|
|
print("=" * 60)
|
|
print("REQUEST TO SERVER:")
|
|
print("=" * 60)
|
|
print(f"URL: {base_url}/chat/completions")
|
|
print(f"Headers: {{")
|
|
print(f" 'Authorization': 'Bearer ***{api_key[-8:]}',")
|
|
print(f" 'Content-Type': 'application/json'")
|
|
print(f"}}")
|
|
print(f"\nBody:")
|
|
print(json.dumps(request_payload, ensure_ascii=False, indent=2))
|
|
print("=" * 60)
|
|
|
|
# Build API call parameters
|
|
api_params = {
|
|
"model": model,
|
|
"messages": [
|
|
{"role": "user", "content": full_prompt}
|
|
],
|
|
"temperature": 0.0,
|
|
"max_tokens": 300 # Allow full response
|
|
}
|
|
|
|
# For GLM API, use extra_body for non-standard parameters
|
|
if disable_thinking:
|
|
api_params["extra_body"] = {"thinking": {"type": "disabled"}}
|
|
|
|
response = client.chat.completions.create(**api_params)
|
|
|
|
message = response.choices[0].message
|
|
|
|
# Check both content and reasoning_content
|
|
result = {}
|
|
result['content'] = message.content or ""
|
|
|
|
if hasattr(message, 'reasoning_content') and message.reasoning_content:
|
|
result['reasoning_content'] = message.reasoning_content
|
|
else:
|
|
result['reasoning_content'] = ""
|
|
|
|
return result, response
|
|
|
|
|
|
def main():
|
|
# Parse arguments
|
|
args = sys.argv[1:]
|
|
disable_thinking = "-nt" in args or "--no-thinking" in args
|
|
|
|
# Remove flags from args to find the file path
|
|
file_args = [a for a in args if not a.startswith("-")]
|
|
|
|
if len(file_args) < 1:
|
|
print("Usage: uv run debug_llm.py [-nt] <debug_file_path>")
|
|
print(" -nt, --no-thinking Disable model thinking")
|
|
print("Example: uv run debug_llm.py _speakers_debug/S01E01_lines_step1.txt")
|
|
print(" uv run debug_llm.py -nt _speakers_debug/S01E01_lines_step1.txt")
|
|
sys.exit(1)
|
|
|
|
debug_path = Path(file_args[0])
|
|
|
|
if not debug_path.exists():
|
|
print(f"Error: File not found: {debug_path}")
|
|
sys.exit(1)
|
|
|
|
# Get config
|
|
base_url, model, api_key = get_llm_config()
|
|
|
|
print(f"Configuration:")
|
|
print(f" Model: {model}")
|
|
print(f" Endpoint: {base_url or 'OpenAI default'}")
|
|
print(f" Debug file: {debug_path}")
|
|
print(f" Disable thinking: {disable_thinking}")
|
|
|
|
# Extract request
|
|
request = extract_request_from_debug(debug_path)
|
|
|
|
if not request:
|
|
print("Error: Could not extract request from debug file")
|
|
sys.exit(1)
|
|
|
|
print("\n" + "=" * 60)
|
|
print("EXTRACTED PROMPT (from debug file):")
|
|
print("=" * 60)
|
|
print(request)
|
|
print("=" * 60)
|
|
|
|
# Send request
|
|
print("\nSending request to server...")
|
|
|
|
result, raw_response = send_request(request, model, base_url, api_key, disable_thinking=disable_thinking)
|
|
|
|
print("\n" + "=" * 60)
|
|
print("RAW RESPONSE OBJECT:")
|
|
print("=" * 60)
|
|
print(f"\nType: {type(raw_response)}")
|
|
print(f"\nDir: {[x for x in dir(raw_response) if not x.startswith('_')]}")
|
|
print(f"\nChoices: {len(raw_response.choices)}")
|
|
|
|
if raw_response.choices:
|
|
choice = raw_response.choices[0]
|
|
print(f"\nChoice 0:")
|
|
print(f" finish_reason: {choice.finish_reason}")
|
|
print(f" index: {choice.index}")
|
|
|
|
message = choice.message
|
|
print(f"\n message:")
|
|
print(f" type: {type(message)}")
|
|
print(f" dir: {[x for x in dir(message) if not x.startswith('_')]}")
|
|
print(f" content: {repr(message.content)}")
|
|
|
|
if hasattr(message, 'reasoning_content'):
|
|
print(f" reasoning_content: {repr(getattr(message, 'reasoning_content', None))}")
|
|
else:
|
|
print(f" reasoning_content: (attribute not present)")
|
|
|
|
if hasattr(message, 'role'):
|
|
print(f" role: {message.role}")
|
|
|
|
print(f"\nUsage:")
|
|
if hasattr(raw_response, 'usage') and raw_response.usage:
|
|
usage = raw_response.usage
|
|
print(f" completion_tokens: {getattr(usage, 'completion_tokens', 'N/A')}")
|
|
print(f" prompt_tokens: {getattr(usage, 'prompt_tokens', 'N/A')}")
|
|
print(f" total_tokens: {getattr(usage, 'total_tokens', 'N/A')}")
|
|
else:
|
|
print(" (not available)")
|
|
|
|
print(f"\nModel: {getattr(raw_response, 'model', 'N/A')}")
|
|
print(f"Object: {getattr(raw_response, 'object', 'N/A')}")
|
|
print(f"Created: {getattr(raw_response, 'created', 'N/A')}")
|
|
print(f"ID: {getattr(raw_response, 'id', 'N/A')}")
|
|
|
|
print("\n" + "=" * 60)
|
|
print("PARSED RESULT:")
|
|
print("=" * 60)
|
|
|
|
if result['reasoning_content']:
|
|
print("\n[reasoning_content]:")
|
|
print(result['reasoning_content'])
|
|
|
|
print("\n[content]:")
|
|
print(result['content'] if result['content'] else "(empty)")
|
|
|
|
print("\n" + "=" * 60)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|