malabar/debug_llm.py

#!/usr/bin/env python3
"""
Debug tool: Replay a request from a debug file.

Usage:
    uv run debug_llm.py _speakers_debug/S01E01_lines_step1.txt

This will:
1. Read the request from the debug file
2. Use the same model/endpoint as Step 3
3. Send the request
4. Print the raw response to stdout
"""

import os
import re
import sys
import json
from pathlib import Path
from openai import OpenAI


def get_llm_config():
    """Get LLM configuration from environment (same as Step 3)."""
    api_key = os.getenv("OPENAI_API_KEY")
    if not api_key:
        raise ValueError("OPENAI_API_KEY environment variable is required")

    base_url = os.getenv("OPENAI_BASE_URL")
    model = os.getenv("LLM_MODEL", "glm-4.5-air")

    return base_url, model, api_key


def extract_request_from_debug(debug_path: Path) -> str:
    """Extract the request section from a debug file."""
    with open(debug_path, 'r', encoding='utf-8') as f:
        content = f.read()

    # Find the request section
    # Pattern: ========\nREQUEST:\n========\n\n<content>\n\n========\nRESPONSE:
    match = re.search(
        r'={10,}\s*\nREQUEST:\s*\n={10,}\s*\n\n(.*?)\n\n={10,}\s*\nRESPONSE:',
        content,
        re.DOTALL
    )

    if match:
        return match.group(1).strip()

    # Fallback: try simpler pattern
    lines = content.split('\n')
    in_request = False
    request_lines = []

    for line in lines:
        if 'REQUEST:' in line and '=' in lines[lines.index(line) - 1] if lines.index(line) > 0 else False:
            in_request = True
            continue
        if in_request:
            if line.startswith('=' * 10) and 'RESPONSE:' in lines[lines.index(line) + 1] if lines.index(line) + 1 < len(lines) else False:
                break
            request_lines.append(line)

    return '\n'.join(request_lines).strip()


def send_request(prompt: str, model: str, base_url: str, api_key: str, disable_thinking: bool = False):
    """Send request to LLM and return raw response."""
    client = OpenAI(api_key=api_key, base_url=base_url)

    # Add explicit instruction (same as Step 3)
    full_prompt = prompt + "\n\nIMPORTANT: Reply with ONLY the answer. Do NOT write any explanation, thinking, or analysis. Just output the answer."

    # Build the request payload (for display)
    request_payload = {
        "model": model,
        "messages": [
            {"role": "user", "content": full_prompt}
        ],
        "temperature": 0.0,
        "max_tokens": 300  # Allow full response
    }

    # Add thinking parameter if disabling
    if disable_thinking:
        request_payload["thinking"] = {"type": "disabled"}

    print("=" * 60)
    print("REQUEST TO SERVER:")
    print("=" * 60)
    print(f"URL: {base_url}/chat/completions")
    print(f"Headers: {{")
    print(f"  'Authorization': 'Bearer ***{api_key[-8:]}',")
    print(f"  'Content-Type': 'application/json'")
    print(f"}}")
    print(f"\nBody:")
    print(json.dumps(request_payload, ensure_ascii=False, indent=2))
    print("=" * 60)

    # Build API call parameters
    api_params = {
        "model": model,
        "messages": [
            {"role": "user", "content": full_prompt}
        ],
        "temperature": 0.0,
        "max_tokens": 300  # Allow full response
    }

    # For GLM API, use extra_body for non-standard parameters
    if disable_thinking:
        api_params["extra_body"] = {"thinking": {"type": "disabled"}}

    response = client.chat.completions.create(**api_params)

    message = response.choices[0].message

    # Check both content and reasoning_content
    result = {}
    result['content'] = message.content or ""

    if hasattr(message, 'reasoning_content') and message.reasoning_content:
        result['reasoning_content'] = message.reasoning_content
    else:
        result['reasoning_content'] = ""

    return result, response


def main():
    # Parse arguments
    args = sys.argv[1:]
    disable_thinking = "-nt" in args or "--no-thinking" in args

    # Remove flags from args to find the file path
    file_args = [a for a in args if not a.startswith("-")]

    if len(file_args) < 1:
        print("Usage: uv run debug_llm.py [-nt] <debug_file_path>")
        print("  -nt, --no-thinking  Disable model thinking")
        print("Example: uv run debug_llm.py _speakers_debug/S01E01_lines_step1.txt")
        print("         uv run debug_llm.py -nt _speakers_debug/S01E01_lines_step1.txt")
        sys.exit(1)

    debug_path = Path(file_args[0])

    if not debug_path.exists():
        print(f"Error: File not found: {debug_path}")
        sys.exit(1)

    # Get config
    base_url, model, api_key = get_llm_config()

    print(f"Configuration:")
    print(f"  Model: {model}")
    print(f"  Endpoint: {base_url or 'OpenAI default'}")
    print(f"  Debug file: {debug_path}")
    print(f"  Disable thinking: {disable_thinking}")

    # Extract request
    request = extract_request_from_debug(debug_path)

    if not request:
        print("Error: Could not extract request from debug file")
        sys.exit(1)

    print("\n" + "=" * 60)
    print("EXTRACTED PROMPT (from debug file):")
    print("=" * 60)
    print(request)
    print("=" * 60)

    # Send request
    print("\nSending request to server...")

    result, raw_response = send_request(request, model, base_url, api_key, disable_thinking=disable_thinking)

    print("\n" + "=" * 60)
    print("RAW RESPONSE OBJECT:")
    print("=" * 60)
    print(f"\nType: {type(raw_response)}")
    print(f"\nDir: {[x for x in dir(raw_response) if not x.startswith('_')]}")
    print(f"\nChoices: {len(raw_response.choices)}")

    if raw_response.choices:
        choice = raw_response.choices[0]
        print(f"\nChoice 0:")
        print(f"  finish_reason: {choice.finish_reason}")
        print(f"  index: {choice.index}")

        message = choice.message
        print(f"\n  message:")
        print(f"    type: {type(message)}")
        print(f"    dir: {[x for x in dir(message) if not x.startswith('_')]}")
        print(f"    content: {repr(message.content)}")

        if hasattr(message, 'reasoning_content'):
            print(f"    reasoning_content: {repr(getattr(message, 'reasoning_content', None))}")
        else:
            print(f"    reasoning_content: (attribute not present)")

        if hasattr(message, 'role'):
            print(f"    role: {message.role}")

    print(f"\nUsage:")
    if hasattr(raw_response, 'usage') and raw_response.usage:
        usage = raw_response.usage
        print(f"  completion_tokens: {getattr(usage, 'completion_tokens', 'N/A')}")
        print(f"  prompt_tokens: {getattr(usage, 'prompt_tokens', 'N/A')}")
        print(f"  total_tokens: {getattr(usage, 'total_tokens', 'N/A')}")
    else:
        print("  (not available)")

    print(f"\nModel: {getattr(raw_response, 'model', 'N/A')}")
    print(f"Object: {getattr(raw_response, 'object', 'N/A')}")
    print(f"Created: {getattr(raw_response, 'created', 'N/A')}")
    print(f"ID: {getattr(raw_response, 'id', 'N/A')}")

    print("\n" + "=" * 60)
    print("PARSED RESULT:")
    print("=" * 60)

    if result['reasoning_content']:
        print("\n[reasoning_content]:")
        print(result['reasoning_content'])

    print("\n[content]:")
    print(result['content'] if result['content'] else "(empty)")

    print("\n" + "=" * 60)


if __name__ == "__main__":
    main()