#!/usr/bin/env python3 """ LLM Timeout Fixer and Configuration Utility This script helps diagnose and fix LLM timeout issues, particularly when the node.js server or model loading causes the app to hang. Usage: python fix_llm_timeout.py --test # Test LLM connectivity python fix_llm_timeout.py --fix # Apply recommended fixes python fix_llm_timeout.py --config # Show current configuration """ import os import sys import argparse def print_banner(): print("=" * 70) print(" TranscriptorAI - LLM Timeout Diagnostic & Fix Utility") print("=" * 70) print() def test_llm_connectivity(): """Test if LLM backends are accessible""" print("[1/4] Testing LLM Backend Connectivity...") print() # Test HuggingFace API print(" Testing HuggingFace API...") hf_token = os.getenv("HUGGINGFACE_TOKEN", "") if not hf_token: print(" ✗ HUGGINGFACE_TOKEN not set") print(" Set it with: export HUGGINGFACE_TOKEN='your_token_here'") hf_available = False else: try: from huggingface_hub import InferenceClient client = InferenceClient(token=hf_token) # Quick test result = client.text_generation( "Test", model="mistralai/Mixtral-8x7B-Instruct-v0.1", max_new_tokens=10, timeout=10 ) print(" ✓ HuggingFace API is accessible") hf_available = True except Exception as e: print(f" ✗ HuggingFace API failed: {e}") hf_available = False print() # Test LMStudio print(" Testing LMStudio...") lmstudio_url = os.getenv("LM_STUDIO_URL", "http://192.168.1.245:1234") try: import requests response = requests.get(f"{lmstudio_url}/v1/models", timeout=5) if response.status_code == 200: print(f" ✓ LMStudio is accessible at {lmstudio_url}") lmstudio_available = True else: print(f" ✗ LMStudio returned status {response.status_code}") lmstudio_available = False except Exception as e: print(f" ✗ LMStudio not accessible: {e}") print(f" Checked URL: {lmstudio_url}") lmstudio_available = False print() print("=" * 70) print("SUMMARY:") print(f" HuggingFace API: {'✓ Available' if hf_available else '✗ Not Available'}") print(f" LMStudio: {'✓ Available' if lmstudio_available else '✗ Not Available'}") print("=" * 70) print() if not hf_available and not lmstudio_available: print("⚠ WARNING: No LLM backends are available!") print() print("RECOMMENDED ACTIONS:") print("1. For HuggingFace API:") print(" export HUGGINGFACE_TOKEN='your_hf_token_here'") print() print("2. For LMStudio:") print(" - Start LMStudio server") print(" - Load a model (recommended: Mistral 7B or smaller)") print(" - Verify it's running at: http://localhost:1234") print(" - Set URL: export LM_STUDIO_URL='http://localhost:1234'") print() return False return True def show_current_config(): """Display current configuration""" print("[2/4] Current Configuration...") print() config_items = [ ("LLM Backend", os.getenv("LLM_BACKEND", "hf_api")), ("HuggingFace Model", os.getenv("HF_MODEL", "mistralai/Mixtral-8x7B-Instruct-v0.1")), ("LMStudio URL", os.getenv("LM_STUDIO_URL", "http://192.168.1.245:1234")), ("Max Tokens", os.getenv("MAX_TOKENS_PER_REQUEST", "300")), ("LLM Timeout", os.getenv("LLM_TIMEOUT", "120")), ("Temperature", os.getenv("LLM_TEMPERATURE", "0.3")), ] for key, value in config_items: print(f" {key:20s}: {value}") print() def apply_fixes(): """Apply recommended configuration fixes""" print("[3/4] Applying Recommended Fixes...") print() fixes_applied = [] # Create .env file with recommended settings env_content = """# TranscriptorAI LLM Configuration - Optimized for Stability # Generated by fix_llm_timeout.py # Use HuggingFace API (more stable than local models) LLM_BACKEND=hf_api # Set your HuggingFace token here HUGGINGFACE_TOKEN=your_token_here # Use a lighter, faster model HF_MODEL=mistralai/Mistral-7B-Instruct-v0.2 # Reduce token requirements to prevent timeouts MAX_TOKENS_PER_REQUEST=200 # Aggressive timeout (60 seconds instead of 120) LLM_TIMEOUT=60 # Lower temperature for more consistent output LLM_TEMPERATURE=0.3 # LMStudio configuration (if using local) LM_STUDIO_URL=http://localhost:1234 # Chunking optimization MAX_CHUNK_TOKENS=4000 OVERLAP_TOKENS=100 """ env_path = "/home/john/TranscriptorEnhanced/.env" try: with open(env_path, 'w') as f: f.write(env_content) print(f" ✓ Created optimized .env file at {env_path}") fixes_applied.append("Created .env configuration") except Exception as e: print(f" ✗ Failed to create .env file: {e}") # Create a startup script startup_script = """#!/bin/bash # TranscriptorAI Startup Script with LLM Health Check echo "===================================" echo " TranscriptorAI Startup" echo "===================================" echo # Load environment variables if [ -f .env ]; then export $(cat .env | grep -v '^#' | xargs) echo "✓ Loaded .env configuration" else echo "⚠ No .env file found, using defaults" fi echo echo "Testing LLM connectivity..." python fix_llm_timeout.py --test if [ $? -ne 0 ]; then echo echo "⚠ LLM connectivity issues detected!" echo "Continue anyway? (y/n)" read -r response if [ "$response" != "y" ]; then echo "Startup cancelled" exit 1 fi fi echo echo "Starting application..." python app.py """ startup_path = "/home/john/TranscriptorEnhanced/start.sh" try: with open(startup_path, 'w') as f: f.write(startup_script) os.chmod(startup_path, 0o755) print(f" ✓ Created startup script at {startup_path}") print(f" Run with: ./start.sh") fixes_applied.append("Created startup script") except Exception as e: print(f" ✗ Failed to create startup script: {e}") print() print("=" * 70) print("FIXES APPLIED:") for fix in fixes_applied: print(f" - {fix}") print("=" * 70) print() print("NEXT STEPS:") print("1. Edit .env file and add your HUGGINGFACE_TOKEN") print("2. Run: ./start.sh") print(" OR: source .env && python app.py") print() def diagnose_hanging_issue(): """Diagnose why the app might be hanging""" print("[4/4] Diagnosing Potential Hang Issues...") print() issues_found = [] # Check if we're using a heavy model model = os.getenv("HF_MODEL", "mistralai/Mixtral-8x7B-Instruct-v0.1") if "Mixtral-8x7B" in model or "70B" in model or "33B" in model: issues_found.append({ "issue": "Using a large model that may cause timeouts", "solution": "Switch to a lighter model like Mistral-7B-Instruct-v0.2" }) # Check timeout settings timeout = int(os.getenv("LLM_TIMEOUT", "120")) if timeout > 90: issues_found.append({ "issue": f"LLM timeout is high ({timeout}s), may cause hanging appearance", "solution": "Reduce to 60 seconds for faster failure detection" }) # Check max tokens max_tokens = int(os.getenv("MAX_TOKENS_PER_REQUEST", "300")) if max_tokens > 500: issues_found.append({ "issue": f"Max tokens is high ({max_tokens}), slows generation", "solution": "Reduce to 200-300 tokens" }) if not issues_found: print(" ✓ No obvious configuration issues detected") else: print(" Issues detected:") for i, item in enumerate(issues_found, 1): print(f"\n {i}. {item['issue']}") print(f" Solution: {item['solution']}") print() print("=" * 70) print("COMMON CAUSES OF HANGING:") print(" 1. Model server (LMStudio/node.js) running out of memory") print(" 2. Network timeout to HuggingFace API") print(" 3. Model too large for available resources") print(" 4. Multiple concurrent requests overloading server") print() print("PREVENTION:") print(" - Use the robust LLM wrapper (llm_robust.py) - already integrated") print(" - Set aggressive timeouts (60s max)") print(" - Use lighter models (Mistral-7B instead of Mixtral-8x7B)") print(" - Process transcripts in smaller batches") print("=" * 70) print() def main(): parser = argparse.ArgumentParser(description="Fix LLM timeout issues") parser.add_argument("--test", action="store_true", help="Test LLM connectivity") parser.add_argument("--fix", action="store_true", help="Apply recommended fixes") parser.add_argument("--config", action="store_true", help="Show current config") parser.add_argument("--diagnose", action="store_true", help="Diagnose hanging issues") args = parser.parse_args() print_banner() if not any(vars(args).values()): # No arguments, run all test_llm_connectivity() show_current_config() apply_fixes() diagnose_hanging_issue() else: if args.test: success = test_llm_connectivity() sys.exit(0 if success else 1) if args.config: show_current_config() if args.fix: apply_fixes() if args.diagnose: diagnose_hanging_issue() if __name__ == "__main__": main()