Spaces:

empirenexus
/

TranscriptWriting

Sleeping

File size: 10,090 Bytes

56da263

#!/usr/bin/env python3
"""

LLM Timeout Fixer and Configuration Utility



This script helps diagnose and fix LLM timeout issues, particularly

when the node.js server or model loading causes the app to hang.



Usage:

    python fix_llm_timeout.py --test      # Test LLM connectivity

    python fix_llm_timeout.py --fix       # Apply recommended fixes

    python fix_llm_timeout.py --config    # Show current configuration

"""

import os
import sys
import argparse

def print_banner():
    print("=" * 70)
    print("  TranscriptorAI - LLM Timeout Diagnostic & Fix Utility")
    print("=" * 70)
    print()

def test_llm_connectivity():
    """Test if LLM backends are accessible"""
    print("[1/4] Testing LLM Backend Connectivity...")
    print()

    # Test HuggingFace API
    print("  Testing HuggingFace API...")
    hf_token = os.getenv("HUGGINGFACE_TOKEN", "")

    if not hf_token:
        print("  ✗ HUGGINGFACE_TOKEN not set")
        print("    Set it with: export HUGGINGFACE_TOKEN='your_token_here'")
        hf_available = False
    else:
        try:
            from huggingface_hub import InferenceClient
            client = InferenceClient(token=hf_token)
            # Quick test
            result = client.text_generation(
                "Test",
                model="mistralai/Mixtral-8x7B-Instruct-v0.1",
                max_new_tokens=10,
                timeout=10
            )
            print("  ✓ HuggingFace API is accessible")
            hf_available = True
        except Exception as e:
            print(f"  ✗ HuggingFace API failed: {e}")
            hf_available = False

    print()

    # Test LMStudio
    print("  Testing LMStudio...")
    lmstudio_url = os.getenv("LM_STUDIO_URL", "http://192.168.1.245:1234")

    try:
        import requests
        response = requests.get(f"{lmstudio_url}/v1/models", timeout=5)
        if response.status_code == 200:
            print(f"  ✓ LMStudio is accessible at {lmstudio_url}")
            lmstudio_available = True
        else:
            print(f"  ✗ LMStudio returned status {response.status_code}")
            lmstudio_available = False
    except Exception as e:
        print(f"  ✗ LMStudio not accessible: {e}")
        print(f"    Checked URL: {lmstudio_url}")
        lmstudio_available = False

    print()
    print("=" * 70)
    print("SUMMARY:")
    print(f"  HuggingFace API: {'✓ Available' if hf_available else '✗ Not Available'}")
    print(f"  LMStudio:        {'✓ Available' if lmstudio_available else '✗ Not Available'}")
    print("=" * 70)
    print()

    if not hf_available and not lmstudio_available:
        print("⚠ WARNING: No LLM backends are available!")
        print()
        print("RECOMMENDED ACTIONS:")
        print("1. For HuggingFace API:")
        print("   export HUGGINGFACE_TOKEN='your_hf_token_here'")
        print()
        print("2. For LMStudio:")
        print("   - Start LMStudio server")
        print("   - Load a model (recommended: Mistral 7B or smaller)")
        print("   - Verify it's running at: http://localhost:1234")
        print("   - Set URL: export LM_STUDIO_URL='http://localhost:1234'")
        print()
        return False

    return True

def show_current_config():
    """Display current configuration"""
    print("[2/4] Current Configuration...")
    print()

    config_items = [
        ("LLM Backend", os.getenv("LLM_BACKEND", "hf_api")),
        ("HuggingFace Model", os.getenv("HF_MODEL", "mistralai/Mixtral-8x7B-Instruct-v0.1")),
        ("LMStudio URL", os.getenv("LM_STUDIO_URL", "http://192.168.1.245:1234")),
        ("Max Tokens", os.getenv("MAX_TOKENS_PER_REQUEST", "300")),
        ("LLM Timeout", os.getenv("LLM_TIMEOUT", "120")),
        ("Temperature", os.getenv("LLM_TEMPERATURE", "0.3")),
    ]

    for key, value in config_items:
        print(f"  {key:20s}: {value}")

    print()

def apply_fixes():
    """Apply recommended configuration fixes"""
    print("[3/4] Applying Recommended Fixes...")
    print()

    fixes_applied = []

    # Create .env file with recommended settings
    env_content = """# TranscriptorAI LLM Configuration - Optimized for Stability

# Generated by fix_llm_timeout.py



# Use HuggingFace API (more stable than local models)

LLM_BACKEND=hf_api



# Set your HuggingFace token here

HUGGINGFACE_TOKEN=your_token_here



# Use a lighter, faster model

HF_MODEL=mistralai/Mistral-7B-Instruct-v0.2



# Reduce token requirements to prevent timeouts

MAX_TOKENS_PER_REQUEST=200



# Aggressive timeout (60 seconds instead of 120)

LLM_TIMEOUT=60



# Lower temperature for more consistent output

LLM_TEMPERATURE=0.3



# LMStudio configuration (if using local)

LM_STUDIO_URL=http://localhost:1234



# Chunking optimization

MAX_CHUNK_TOKENS=4000

OVERLAP_TOKENS=100

"""

    env_path = "/home/john/TranscriptorEnhanced/.env"

    try:
        with open(env_path, 'w') as f:
            f.write(env_content)
        print(f"  ✓ Created optimized .env file at {env_path}")
        fixes_applied.append("Created .env configuration")
    except Exception as e:
        print(f"  ✗ Failed to create .env file: {e}")

    # Create a startup script
    startup_script = """#!/bin/bash

# TranscriptorAI Startup Script with LLM Health Check



echo "==================================="

echo "  TranscriptorAI Startup"

echo "==================================="

echo



# Load environment variables

if [ -f .env ]; then

    export $(cat .env | grep -v '^#' | xargs)

    echo "✓ Loaded .env configuration"

else

    echo "⚠ No .env file found, using defaults"

fi



echo

echo "Testing LLM connectivity..."

python fix_llm_timeout.py --test



if [ $? -ne 0 ]; then

    echo

    echo "⚠ LLM connectivity issues detected!"

    echo "Continue anyway? (y/n)"

    read -r response

    if [ "$response" != "y" ]; then

        echo "Startup cancelled"

        exit 1

    fi

fi



echo

echo "Starting application..."

python app.py

"""

    startup_path = "/home/john/TranscriptorEnhanced/start.sh"

    try:
        with open(startup_path, 'w') as f:
            f.write(startup_script)
        os.chmod(startup_path, 0o755)
        print(f"  ✓ Created startup script at {startup_path}")
        print(f"    Run with: ./start.sh")
        fixes_applied.append("Created startup script")
    except Exception as e:
        print(f"  ✗ Failed to create startup script: {e}")

    print()
    print("=" * 70)
    print("FIXES APPLIED:")
    for fix in fixes_applied:
        print(f"  - {fix}")
    print("=" * 70)
    print()

    print("NEXT STEPS:")
    print("1. Edit .env file and add your HUGGINGFACE_TOKEN")
    print("2. Run: ./start.sh")
    print("   OR: source .env && python app.py")
    print()

def diagnose_hanging_issue():
    """Diagnose why the app might be hanging"""
    print("[4/4] Diagnosing Potential Hang Issues...")
    print()

    issues_found = []

    # Check if we're using a heavy model
    model = os.getenv("HF_MODEL", "mistralai/Mixtral-8x7B-Instruct-v0.1")
    if "Mixtral-8x7B" in model or "70B" in model or "33B" in model:
        issues_found.append({
            "issue": "Using a large model that may cause timeouts",
            "solution": "Switch to a lighter model like Mistral-7B-Instruct-v0.2"
        })

    # Check timeout settings
    timeout = int(os.getenv("LLM_TIMEOUT", "120"))
    if timeout > 90:
        issues_found.append({
            "issue": f"LLM timeout is high ({timeout}s), may cause hanging appearance",
            "solution": "Reduce to 60 seconds for faster failure detection"
        })

    # Check max tokens
    max_tokens = int(os.getenv("MAX_TOKENS_PER_REQUEST", "300"))
    if max_tokens > 500:
        issues_found.append({
            "issue": f"Max tokens is high ({max_tokens}), slows generation",
            "solution": "Reduce to 200-300 tokens"
        })

    if not issues_found:
        print("  ✓ No obvious configuration issues detected")
    else:
        print("  Issues detected:")
        for i, item in enumerate(issues_found, 1):
            print(f"\n  {i}. {item['issue']}")
            print(f"     Solution: {item['solution']}")

    print()
    print("=" * 70)
    print("COMMON CAUSES OF HANGING:")
    print("  1. Model server (LMStudio/node.js) running out of memory")
    print("  2. Network timeout to HuggingFace API")
    print("  3. Model too large for available resources")
    print("  4. Multiple concurrent requests overloading server")
    print()
    print("PREVENTION:")
    print("  - Use the robust LLM wrapper (llm_robust.py) - already integrated")
    print("  - Set aggressive timeouts (60s max)")
    print("  - Use lighter models (Mistral-7B instead of Mixtral-8x7B)")
    print("  - Process transcripts in smaller batches")
    print("=" * 70)
    print()

def main():
    parser = argparse.ArgumentParser(description="Fix LLM timeout issues")
    parser.add_argument("--test", action="store_true", help="Test LLM connectivity")
    parser.add_argument("--fix", action="store_true", help="Apply recommended fixes")
    parser.add_argument("--config", action="store_true", help="Show current config")
    parser.add_argument("--diagnose", action="store_true", help="Diagnose hanging issues")

    args = parser.parse_args()

    print_banner()

    if not any(vars(args).values()):
        # No arguments, run all
        test_llm_connectivity()
        show_current_config()
        apply_fixes()
        diagnose_hanging_issue()
    else:
        if args.test:
            success = test_llm_connectivity()
            sys.exit(0 if success else 1)
        if args.config:
            show_current_config()
        if args.fix:
            apply_fixes()
        if args.diagnose:
            diagnose_hanging_issue()

if __name__ == "__main__":
    main()