Spaces:
Sleeping
Sleeping
| #!/usr/bin/env python3 | |
| """ | |
| LLM Timeout Fixer and Configuration Utility | |
| This script helps diagnose and fix LLM timeout issues, particularly | |
| when the node.js server or model loading causes the app to hang. | |
| Usage: | |
| python fix_llm_timeout.py --test # Test LLM connectivity | |
| python fix_llm_timeout.py --fix # Apply recommended fixes | |
| python fix_llm_timeout.py --config # Show current configuration | |
| """ | |
| import os | |
| import sys | |
| import argparse | |
| def print_banner(): | |
| print("=" * 70) | |
| print(" TranscriptorAI - LLM Timeout Diagnostic & Fix Utility") | |
| print("=" * 70) | |
| print() | |
| def test_llm_connectivity(): | |
| """Test if LLM backends are accessible""" | |
| print("[1/4] Testing LLM Backend Connectivity...") | |
| print() | |
| # Test HuggingFace API | |
| print(" Testing HuggingFace API...") | |
| hf_token = os.getenv("HUGGINGFACE_TOKEN", "") | |
| if not hf_token: | |
| print(" β HUGGINGFACE_TOKEN not set") | |
| print(" Set it with: export HUGGINGFACE_TOKEN='your_token_here'") | |
| hf_available = False | |
| else: | |
| try: | |
| from huggingface_hub import InferenceClient | |
| client = InferenceClient(token=hf_token) | |
| # Quick test | |
| result = client.text_generation( | |
| "Test", | |
| model="mistralai/Mixtral-8x7B-Instruct-v0.1", | |
| max_new_tokens=10, | |
| timeout=10 | |
| ) | |
| print(" β HuggingFace API is accessible") | |
| hf_available = True | |
| except Exception as e: | |
| print(f" β HuggingFace API failed: {e}") | |
| hf_available = False | |
| print() | |
| # Test LMStudio | |
| print(" Testing LMStudio...") | |
| lmstudio_url = os.getenv("LM_STUDIO_URL", "http://192.168.1.245:1234") | |
| try: | |
| import requests | |
| response = requests.get(f"{lmstudio_url}/v1/models", timeout=5) | |
| if response.status_code == 200: | |
| print(f" β LMStudio is accessible at {lmstudio_url}") | |
| lmstudio_available = True | |
| else: | |
| print(f" β LMStudio returned status {response.status_code}") | |
| lmstudio_available = False | |
| except Exception as e: | |
| print(f" β LMStudio not accessible: {e}") | |
| print(f" Checked URL: {lmstudio_url}") | |
| lmstudio_available = False | |
| print() | |
| print("=" * 70) | |
| print("SUMMARY:") | |
| print(f" HuggingFace API: {'β Available' if hf_available else 'β Not Available'}") | |
| print(f" LMStudio: {'β Available' if lmstudio_available else 'β Not Available'}") | |
| print("=" * 70) | |
| print() | |
| if not hf_available and not lmstudio_available: | |
| print("β WARNING: No LLM backends are available!") | |
| print() | |
| print("RECOMMENDED ACTIONS:") | |
| print("1. For HuggingFace API:") | |
| print(" export HUGGINGFACE_TOKEN='your_hf_token_here'") | |
| print() | |
| print("2. For LMStudio:") | |
| print(" - Start LMStudio server") | |
| print(" - Load a model (recommended: Mistral 7B or smaller)") | |
| print(" - Verify it's running at: http://localhost:1234") | |
| print(" - Set URL: export LM_STUDIO_URL='http://localhost:1234'") | |
| print() | |
| return False | |
| return True | |
| def show_current_config(): | |
| """Display current configuration""" | |
| print("[2/4] Current Configuration...") | |
| print() | |
| config_items = [ | |
| ("LLM Backend", os.getenv("LLM_BACKEND", "hf_api")), | |
| ("HuggingFace Model", os.getenv("HF_MODEL", "mistralai/Mixtral-8x7B-Instruct-v0.1")), | |
| ("LMStudio URL", os.getenv("LM_STUDIO_URL", "http://192.168.1.245:1234")), | |
| ("Max Tokens", os.getenv("MAX_TOKENS_PER_REQUEST", "300")), | |
| ("LLM Timeout", os.getenv("LLM_TIMEOUT", "120")), | |
| ("Temperature", os.getenv("LLM_TEMPERATURE", "0.3")), | |
| ] | |
| for key, value in config_items: | |
| print(f" {key:20s}: {value}") | |
| print() | |
| def apply_fixes(): | |
| """Apply recommended configuration fixes""" | |
| print("[3/4] Applying Recommended Fixes...") | |
| print() | |
| fixes_applied = [] | |
| # Create .env file with recommended settings | |
| env_content = """# TranscriptorAI LLM Configuration - Optimized for Stability | |
| # Generated by fix_llm_timeout.py | |
| # Use HuggingFace API (more stable than local models) | |
| LLM_BACKEND=hf_api | |
| # Set your HuggingFace token here | |
| HUGGINGFACE_TOKEN=your_token_here | |
| # Use a lighter, faster model | |
| HF_MODEL=mistralai/Mistral-7B-Instruct-v0.2 | |
| # Reduce token requirements to prevent timeouts | |
| MAX_TOKENS_PER_REQUEST=200 | |
| # Aggressive timeout (60 seconds instead of 120) | |
| LLM_TIMEOUT=60 | |
| # Lower temperature for more consistent output | |
| LLM_TEMPERATURE=0.3 | |
| # LMStudio configuration (if using local) | |
| LM_STUDIO_URL=http://localhost:1234 | |
| # Chunking optimization | |
| MAX_CHUNK_TOKENS=4000 | |
| OVERLAP_TOKENS=100 | |
| """ | |
| env_path = "/home/john/TranscriptorEnhanced/.env" | |
| try: | |
| with open(env_path, 'w') as f: | |
| f.write(env_content) | |
| print(f" β Created optimized .env file at {env_path}") | |
| fixes_applied.append("Created .env configuration") | |
| except Exception as e: | |
| print(f" β Failed to create .env file: {e}") | |
| # Create a startup script | |
| startup_script = """#!/bin/bash | |
| # TranscriptorAI Startup Script with LLM Health Check | |
| echo "===================================" | |
| echo " TranscriptorAI Startup" | |
| echo "===================================" | |
| echo | |
| # Load environment variables | |
| if [ -f .env ]; then | |
| export $(cat .env | grep -v '^#' | xargs) | |
| echo "β Loaded .env configuration" | |
| else | |
| echo "β No .env file found, using defaults" | |
| fi | |
| echo | |
| echo "Testing LLM connectivity..." | |
| python fix_llm_timeout.py --test | |
| if [ $? -ne 0 ]; then | |
| echo | |
| echo "β LLM connectivity issues detected!" | |
| echo "Continue anyway? (y/n)" | |
| read -r response | |
| if [ "$response" != "y" ]; then | |
| echo "Startup cancelled" | |
| exit 1 | |
| fi | |
| fi | |
| echo | |
| echo "Starting application..." | |
| python app.py | |
| """ | |
| startup_path = "/home/john/TranscriptorEnhanced/start.sh" | |
| try: | |
| with open(startup_path, 'w') as f: | |
| f.write(startup_script) | |
| os.chmod(startup_path, 0o755) | |
| print(f" β Created startup script at {startup_path}") | |
| print(f" Run with: ./start.sh") | |
| fixes_applied.append("Created startup script") | |
| except Exception as e: | |
| print(f" β Failed to create startup script: {e}") | |
| print() | |
| print("=" * 70) | |
| print("FIXES APPLIED:") | |
| for fix in fixes_applied: | |
| print(f" - {fix}") | |
| print("=" * 70) | |
| print() | |
| print("NEXT STEPS:") | |
| print("1. Edit .env file and add your HUGGINGFACE_TOKEN") | |
| print("2. Run: ./start.sh") | |
| print(" OR: source .env && python app.py") | |
| print() | |
| def diagnose_hanging_issue(): | |
| """Diagnose why the app might be hanging""" | |
| print("[4/4] Diagnosing Potential Hang Issues...") | |
| print() | |
| issues_found = [] | |
| # Check if we're using a heavy model | |
| model = os.getenv("HF_MODEL", "mistralai/Mixtral-8x7B-Instruct-v0.1") | |
| if "Mixtral-8x7B" in model or "70B" in model or "33B" in model: | |
| issues_found.append({ | |
| "issue": "Using a large model that may cause timeouts", | |
| "solution": "Switch to a lighter model like Mistral-7B-Instruct-v0.2" | |
| }) | |
| # Check timeout settings | |
| timeout = int(os.getenv("LLM_TIMEOUT", "120")) | |
| if timeout > 90: | |
| issues_found.append({ | |
| "issue": f"LLM timeout is high ({timeout}s), may cause hanging appearance", | |
| "solution": "Reduce to 60 seconds for faster failure detection" | |
| }) | |
| # Check max tokens | |
| max_tokens = int(os.getenv("MAX_TOKENS_PER_REQUEST", "300")) | |
| if max_tokens > 500: | |
| issues_found.append({ | |
| "issue": f"Max tokens is high ({max_tokens}), slows generation", | |
| "solution": "Reduce to 200-300 tokens" | |
| }) | |
| if not issues_found: | |
| print(" β No obvious configuration issues detected") | |
| else: | |
| print(" Issues detected:") | |
| for i, item in enumerate(issues_found, 1): | |
| print(f"\n {i}. {item['issue']}") | |
| print(f" Solution: {item['solution']}") | |
| print() | |
| print("=" * 70) | |
| print("COMMON CAUSES OF HANGING:") | |
| print(" 1. Model server (LMStudio/node.js) running out of memory") | |
| print(" 2. Network timeout to HuggingFace API") | |
| print(" 3. Model too large for available resources") | |
| print(" 4. Multiple concurrent requests overloading server") | |
| print() | |
| print("PREVENTION:") | |
| print(" - Use the robust LLM wrapper (llm_robust.py) - already integrated") | |
| print(" - Set aggressive timeouts (60s max)") | |
| print(" - Use lighter models (Mistral-7B instead of Mixtral-8x7B)") | |
| print(" - Process transcripts in smaller batches") | |
| print("=" * 70) | |
| print() | |
| def main(): | |
| parser = argparse.ArgumentParser(description="Fix LLM timeout issues") | |
| parser.add_argument("--test", action="store_true", help="Test LLM connectivity") | |
| parser.add_argument("--fix", action="store_true", help="Apply recommended fixes") | |
| parser.add_argument("--config", action="store_true", help="Show current config") | |
| parser.add_argument("--diagnose", action="store_true", help="Diagnose hanging issues") | |
| args = parser.parse_args() | |
| print_banner() | |
| if not any(vars(args).values()): | |
| # No arguments, run all | |
| test_llm_connectivity() | |
| show_current_config() | |
| apply_fixes() | |
| diagnose_hanging_issue() | |
| else: | |
| if args.test: | |
| success = test_llm_connectivity() | |
| sys.exit(0 if success else 1) | |
| if args.config: | |
| show_current_config() | |
| if args.fix: | |
| apply_fixes() | |
| if args.diagnose: | |
| diagnose_hanging_issue() | |
| if __name__ == "__main__": | |
| main() | |