|
|
| """
|
| LLM Timeout Fixer and Configuration Utility
|
|
|
| This script helps diagnose and fix LLM timeout issues, particularly
|
| when the node.js server or model loading causes the app to hang.
|
|
|
| Usage:
|
| python fix_llm_timeout.py --test # Test LLM connectivity
|
| python fix_llm_timeout.py --fix # Apply recommended fixes
|
| python fix_llm_timeout.py --config # Show current configuration
|
| """
|
|
|
| import os
|
| import sys
|
| import argparse
|
|
|
| def print_banner():
|
| print("=" * 70)
|
| print(" TranscriptorAI - LLM Timeout Diagnostic & Fix Utility")
|
| print("=" * 70)
|
| print()
|
|
|
| def test_llm_connectivity():
|
| """Test if LLM backends are accessible"""
|
| print("[1/4] Testing LLM Backend Connectivity...")
|
| print()
|
|
|
|
|
| print(" Testing HuggingFace API...")
|
| hf_token = os.getenv("HUGGINGFACE_TOKEN", "")
|
|
|
| if not hf_token:
|
| print(" ✗ HUGGINGFACE_TOKEN not set")
|
| print(" Set it with: export HUGGINGFACE_TOKEN='your_token_here'")
|
| hf_available = False
|
| else:
|
| try:
|
| from huggingface_hub import InferenceClient
|
| client = InferenceClient(token=hf_token)
|
|
|
| result = client.text_generation(
|
| "Test",
|
| model="mistralai/Mixtral-8x7B-Instruct-v0.1",
|
| max_new_tokens=10,
|
| timeout=10
|
| )
|
| print(" ✓ HuggingFace API is accessible")
|
| hf_available = True
|
| except Exception as e:
|
| print(f" ✗ HuggingFace API failed: {e}")
|
| hf_available = False
|
|
|
| print()
|
|
|
|
|
| print(" Testing LMStudio...")
|
| lmstudio_url = os.getenv("LM_STUDIO_URL", "http://192.168.1.245:1234")
|
|
|
| try:
|
| import requests
|
| response = requests.get(f"{lmstudio_url}/v1/models", timeout=5)
|
| if response.status_code == 200:
|
| print(f" ✓ LMStudio is accessible at {lmstudio_url}")
|
| lmstudio_available = True
|
| else:
|
| print(f" ✗ LMStudio returned status {response.status_code}")
|
| lmstudio_available = False
|
| except Exception as e:
|
| print(f" ✗ LMStudio not accessible: {e}")
|
| print(f" Checked URL: {lmstudio_url}")
|
| lmstudio_available = False
|
|
|
| print()
|
| print("=" * 70)
|
| print("SUMMARY:")
|
| print(f" HuggingFace API: {'✓ Available' if hf_available else '✗ Not Available'}")
|
| print(f" LMStudio: {'✓ Available' if lmstudio_available else '✗ Not Available'}")
|
| print("=" * 70)
|
| print()
|
|
|
| if not hf_available and not lmstudio_available:
|
| print("⚠ WARNING: No LLM backends are available!")
|
| print()
|
| print("RECOMMENDED ACTIONS:")
|
| print("1. For HuggingFace API:")
|
| print(" export HUGGINGFACE_TOKEN='your_hf_token_here'")
|
| print()
|
| print("2. For LMStudio:")
|
| print(" - Start LMStudio server")
|
| print(" - Load a model (recommended: Mistral 7B or smaller)")
|
| print(" - Verify it's running at: http://localhost:1234")
|
| print(" - Set URL: export LM_STUDIO_URL='http://localhost:1234'")
|
| print()
|
| return False
|
|
|
| return True
|
|
|
| def show_current_config():
|
| """Display current configuration"""
|
| print("[2/4] Current Configuration...")
|
| print()
|
|
|
| config_items = [
|
| ("LLM Backend", os.getenv("LLM_BACKEND", "hf_api")),
|
| ("HuggingFace Model", os.getenv("HF_MODEL", "mistralai/Mixtral-8x7B-Instruct-v0.1")),
|
| ("LMStudio URL", os.getenv("LM_STUDIO_URL", "http://192.168.1.245:1234")),
|
| ("Max Tokens", os.getenv("MAX_TOKENS_PER_REQUEST", "300")),
|
| ("LLM Timeout", os.getenv("LLM_TIMEOUT", "120")),
|
| ("Temperature", os.getenv("LLM_TEMPERATURE", "0.3")),
|
| ]
|
|
|
| for key, value in config_items:
|
| print(f" {key:20s}: {value}")
|
|
|
| print()
|
|
|
| def apply_fixes():
|
| """Apply recommended configuration fixes"""
|
| print("[3/4] Applying Recommended Fixes...")
|
| print()
|
|
|
| fixes_applied = []
|
|
|
|
|
| env_content = """# TranscriptorAI LLM Configuration - Optimized for Stability
|
| # Generated by fix_llm_timeout.py
|
|
|
| # Use HuggingFace API (more stable than local models)
|
| LLM_BACKEND=hf_api
|
|
|
| # Set your HuggingFace token here
|
| HUGGINGFACE_TOKEN=your_token_here
|
|
|
| # Use a lighter, faster model
|
| HF_MODEL=mistralai/Mistral-7B-Instruct-v0.2
|
|
|
| # Reduce token requirements to prevent timeouts
|
| MAX_TOKENS_PER_REQUEST=200
|
|
|
| # Aggressive timeout (60 seconds instead of 120)
|
| LLM_TIMEOUT=60
|
|
|
| # Lower temperature for more consistent output
|
| LLM_TEMPERATURE=0.3
|
|
|
| # LMStudio configuration (if using local)
|
| LM_STUDIO_URL=http://localhost:1234
|
|
|
| # Chunking optimization
|
| MAX_CHUNK_TOKENS=4000
|
| OVERLAP_TOKENS=100
|
| """
|
|
|
| env_path = "/home/john/TranscriptorEnhanced/.env"
|
|
|
| try:
|
| with open(env_path, 'w') as f:
|
| f.write(env_content)
|
| print(f" ✓ Created optimized .env file at {env_path}")
|
| fixes_applied.append("Created .env configuration")
|
| except Exception as e:
|
| print(f" ✗ Failed to create .env file: {e}")
|
|
|
|
|
| startup_script = """#!/bin/bash
|
| # TranscriptorAI Startup Script with LLM Health Check
|
|
|
| echo "==================================="
|
| echo " TranscriptorAI Startup"
|
| echo "==================================="
|
| echo
|
|
|
| # Load environment variables
|
| if [ -f .env ]; then
|
| export $(cat .env | grep -v '^#' | xargs)
|
| echo "✓ Loaded .env configuration"
|
| else
|
| echo "⚠ No .env file found, using defaults"
|
| fi
|
|
|
| echo
|
| echo "Testing LLM connectivity..."
|
| python fix_llm_timeout.py --test
|
|
|
| if [ $? -ne 0 ]; then
|
| echo
|
| echo "⚠ LLM connectivity issues detected!"
|
| echo "Continue anyway? (y/n)"
|
| read -r response
|
| if [ "$response" != "y" ]; then
|
| echo "Startup cancelled"
|
| exit 1
|
| fi
|
| fi
|
|
|
| echo
|
| echo "Starting application..."
|
| python app.py
|
| """
|
|
|
| startup_path = "/home/john/TranscriptorEnhanced/start.sh"
|
|
|
| try:
|
| with open(startup_path, 'w') as f:
|
| f.write(startup_script)
|
| os.chmod(startup_path, 0o755)
|
| print(f" ✓ Created startup script at {startup_path}")
|
| print(f" Run with: ./start.sh")
|
| fixes_applied.append("Created startup script")
|
| except Exception as e:
|
| print(f" ✗ Failed to create startup script: {e}")
|
|
|
| print()
|
| print("=" * 70)
|
| print("FIXES APPLIED:")
|
| for fix in fixes_applied:
|
| print(f" - {fix}")
|
| print("=" * 70)
|
| print()
|
|
|
| print("NEXT STEPS:")
|
| print("1. Edit .env file and add your HUGGINGFACE_TOKEN")
|
| print("2. Run: ./start.sh")
|
| print(" OR: source .env && python app.py")
|
| print()
|
|
|
| def diagnose_hanging_issue():
|
| """Diagnose why the app might be hanging"""
|
| print("[4/4] Diagnosing Potential Hang Issues...")
|
| print()
|
|
|
| issues_found = []
|
|
|
|
|
| model = os.getenv("HF_MODEL", "mistralai/Mixtral-8x7B-Instruct-v0.1")
|
| if "Mixtral-8x7B" in model or "70B" in model or "33B" in model:
|
| issues_found.append({
|
| "issue": "Using a large model that may cause timeouts",
|
| "solution": "Switch to a lighter model like Mistral-7B-Instruct-v0.2"
|
| })
|
|
|
|
|
| timeout = int(os.getenv("LLM_TIMEOUT", "120"))
|
| if timeout > 90:
|
| issues_found.append({
|
| "issue": f"LLM timeout is high ({timeout}s), may cause hanging appearance",
|
| "solution": "Reduce to 60 seconds for faster failure detection"
|
| })
|
|
|
|
|
| max_tokens = int(os.getenv("MAX_TOKENS_PER_REQUEST", "300"))
|
| if max_tokens > 500:
|
| issues_found.append({
|
| "issue": f"Max tokens is high ({max_tokens}), slows generation",
|
| "solution": "Reduce to 200-300 tokens"
|
| })
|
|
|
| if not issues_found:
|
| print(" ✓ No obvious configuration issues detected")
|
| else:
|
| print(" Issues detected:")
|
| for i, item in enumerate(issues_found, 1):
|
| print(f"\n {i}. {item['issue']}")
|
| print(f" Solution: {item['solution']}")
|
|
|
| print()
|
| print("=" * 70)
|
| print("COMMON CAUSES OF HANGING:")
|
| print(" 1. Model server (LMStudio/node.js) running out of memory")
|
| print(" 2. Network timeout to HuggingFace API")
|
| print(" 3. Model too large for available resources")
|
| print(" 4. Multiple concurrent requests overloading server")
|
| print()
|
| print("PREVENTION:")
|
| print(" - Use the robust LLM wrapper (llm_robust.py) - already integrated")
|
| print(" - Set aggressive timeouts (60s max)")
|
| print(" - Use lighter models (Mistral-7B instead of Mixtral-8x7B)")
|
| print(" - Process transcripts in smaller batches")
|
| print("=" * 70)
|
| print()
|
|
|
| def main():
|
| parser = argparse.ArgumentParser(description="Fix LLM timeout issues")
|
| parser.add_argument("--test", action="store_true", help="Test LLM connectivity")
|
| parser.add_argument("--fix", action="store_true", help="Apply recommended fixes")
|
| parser.add_argument("--config", action="store_true", help="Show current config")
|
| parser.add_argument("--diagnose", action="store_true", help="Diagnose hanging issues")
|
|
|
| args = parser.parse_args()
|
|
|
| print_banner()
|
|
|
| if not any(vars(args).values()):
|
|
|
| test_llm_connectivity()
|
| show_current_config()
|
| apply_fixes()
|
| diagnose_hanging_issue()
|
| else:
|
| if args.test:
|
| success = test_llm_connectivity()
|
| sys.exit(0 if success else 1)
|
| if args.config:
|
| show_current_config()
|
| if args.fix:
|
| apply_fixes()
|
| if args.diagnose:
|
| diagnose_hanging_issue()
|
|
|
| if __name__ == "__main__":
|
| main()
|
|
|