Spaces:

empirenexus
/

TranscriptWriting

Sleeping

App Files Files Community

TranscriptWriting / fix_llm_timeout.py

jmisak

Upload 2 files

56da263 verified 2 months ago

raw

history blame

10.1 kB

	#!/usr/bin/env python3
	"""
	LLM Timeout Fixer and Configuration Utility

	This script helps diagnose and fix LLM timeout issues, particularly
	when the node.js server or model loading causes the app to hang.

	Usage:
	python fix_llm_timeout.py --test # Test LLM connectivity
	python fix_llm_timeout.py --fix # Apply recommended fixes
	python fix_llm_timeout.py --config # Show current configuration
	"""

	import os
	import sys
	import argparse

	def print_banner():
	print("=" * 70)
	print(" TranscriptorAI - LLM Timeout Diagnostic & Fix Utility")
	print("=" * 70)
	print()

	def test_llm_connectivity():
	"""Test if LLM backends are accessible"""
	print("[1/4] Testing LLM Backend Connectivity...")
	print()

	# Test HuggingFace API
	print(" Testing HuggingFace API...")
	hf_token = os.getenv("HUGGINGFACE_TOKEN", "")

	if not hf_token:
	print(" ✗ HUGGINGFACE_TOKEN not set")
	print(" Set it with: export HUGGINGFACE_TOKEN='your_token_here'")
	hf_available = False
	else:
	try:
	from huggingface_hub import InferenceClient
	client = InferenceClient(token=hf_token)
	# Quick test
	result = client.text_generation(
	"Test",
	model="mistralai/Mixtral-8x7B-Instruct-v0.1",
	max_new_tokens=10,
	timeout=10
	)
	print(" ✓ HuggingFace API is accessible")
	hf_available = True
	except Exception as e:
	print(f" ✗ HuggingFace API failed: {e}")
	hf_available = False

	print()

	# Test LMStudio
	print(" Testing LMStudio...")
	lmstudio_url = os.getenv("LM_STUDIO_URL", "http://192.168.1.245:1234")

	try:
	import requests
	response = requests.get(f"{lmstudio_url}/v1/models", timeout=5)
	if response.status_code == 200:
	print(f" ✓ LMStudio is accessible at {lmstudio_url}")
	lmstudio_available = True
	else:
	print(f" ✗ LMStudio returned status {response.status_code}")
	lmstudio_available = False
	except Exception as e:
	print(f" ✗ LMStudio not accessible: {e}")
	print(f" Checked URL: {lmstudio_url}")
	lmstudio_available = False

	print()
	print("=" * 70)
	print("SUMMARY:")
	print(f" HuggingFace API: {'✓ Available' if hf_available else '✗ Not Available'}")
	print(f" LMStudio: {'✓ Available' if lmstudio_available else '✗ Not Available'}")
	print("=" * 70)
	print()

	if not hf_available and not lmstudio_available:
	print("⚠ WARNING: No LLM backends are available!")
	print()
	print("RECOMMENDED ACTIONS:")
	print("1. For HuggingFace API:")
	print(" export HUGGINGFACE_TOKEN='your_hf_token_here'")
	print()
	print("2. For LMStudio:")
	print(" - Start LMStudio server")
	print(" - Load a model (recommended: Mistral 7B or smaller)")
	print(" - Verify it's running at: http://localhost:1234")
	print(" - Set URL: export LM_STUDIO_URL='http://localhost:1234'")
	print()
	return False

	return True

	def show_current_config():
	"""Display current configuration"""
	print("[2/4] Current Configuration...")
	print()

	config_items = [
	("LLM Backend", os.getenv("LLM_BACKEND", "hf_api")),
	("HuggingFace Model", os.getenv("HF_MODEL", "mistralai/Mixtral-8x7B-Instruct-v0.1")),
	("LMStudio URL", os.getenv("LM_STUDIO_URL", "http://192.168.1.245:1234")),
	("Max Tokens", os.getenv("MAX_TOKENS_PER_REQUEST", "300")),
	("LLM Timeout", os.getenv("LLM_TIMEOUT", "120")),
	("Temperature", os.getenv("LLM_TEMPERATURE", "0.3")),
	]

	for key, value in config_items:
	print(f" {key:20s}: {value}")

	print()

	def apply_fixes():
	"""Apply recommended configuration fixes"""
	print("[3/4] Applying Recommended Fixes...")
	print()

	fixes_applied = []

	# Create .env file with recommended settings
	env_content = """# TranscriptorAI LLM Configuration - Optimized for Stability
	# Generated by fix_llm_timeout.py

	# Use HuggingFace API (more stable than local models)
	LLM_BACKEND=hf_api

	# Set your HuggingFace token here
	HUGGINGFACE_TOKEN=your_token_here

	# Use a lighter, faster model
	HF_MODEL=mistralai/Mistral-7B-Instruct-v0.2

	# Reduce token requirements to prevent timeouts
	MAX_TOKENS_PER_REQUEST=200

	# Aggressive timeout (60 seconds instead of 120)
	LLM_TIMEOUT=60

	# Lower temperature for more consistent output
	LLM_TEMPERATURE=0.3

	# LMStudio configuration (if using local)
	LM_STUDIO_URL=http://localhost:1234

	# Chunking optimization
	MAX_CHUNK_TOKENS=4000
	OVERLAP_TOKENS=100
	"""

	env_path = "/home/john/TranscriptorEnhanced/.env"

	try:
	with open(env_path, 'w') as f:
	f.write(env_content)
	print(f" ✓ Created optimized .env file at {env_path}")
	fixes_applied.append("Created .env configuration")
	except Exception as e:
	print(f" ✗ Failed to create .env file: {e}")

	# Create a startup script
	startup_script = """#!/bin/bash
	# TranscriptorAI Startup Script with LLM Health Check

	echo "==================================="
	echo " TranscriptorAI Startup"
	echo "==================================="
	echo

	# Load environment variables
	if [ -f .env ]; then
	export $(cat .env \| grep -v '^#' \| xargs)
	echo "✓ Loaded .env configuration"
	else
	echo "⚠ No .env file found, using defaults"
	fi

	echo
	echo "Testing LLM connectivity..."
	python fix_llm_timeout.py --test

	if [ $? -ne 0 ]; then
	echo
	echo "⚠ LLM connectivity issues detected!"
	echo "Continue anyway? (y/n)"
	read -r response
	if [ "$response" != "y" ]; then
	echo "Startup cancelled"
	exit 1
	fi
	fi

	echo
	echo "Starting application..."
	python app.py
	"""

	startup_path = "/home/john/TranscriptorEnhanced/start.sh"

	try:
	with open(startup_path, 'w') as f:
	f.write(startup_script)
	os.chmod(startup_path, 0o755)
	print(f" ✓ Created startup script at {startup_path}")
	print(f" Run with: ./start.sh")
	fixes_applied.append("Created startup script")
	except Exception as e:
	print(f" ✗ Failed to create startup script: {e}")

	print()
	print("=" * 70)
	print("FIXES APPLIED:")
	for fix in fixes_applied:
	print(f" - {fix}")
	print("=" * 70)
	print()

	print("NEXT STEPS:")
	print("1. Edit .env file and add your HUGGINGFACE_TOKEN")
	print("2. Run: ./start.sh")
	print(" OR: source .env && python app.py")
	print()

	def diagnose_hanging_issue():
	"""Diagnose why the app might be hanging"""
	print("[4/4] Diagnosing Potential Hang Issues...")
	print()

	issues_found = []

	# Check if we're using a heavy model
	model = os.getenv("HF_MODEL", "mistralai/Mixtral-8x7B-Instruct-v0.1")
	if "Mixtral-8x7B" in model or "70B" in model or "33B" in model:
	issues_found.append({
	"issue": "Using a large model that may cause timeouts",
	"solution": "Switch to a lighter model like Mistral-7B-Instruct-v0.2"
	})

	# Check timeout settings
	timeout = int(os.getenv("LLM_TIMEOUT", "120"))
	if timeout > 90:
	issues_found.append({
	"issue": f"LLM timeout is high ({timeout}s), may cause hanging appearance",
	"solution": "Reduce to 60 seconds for faster failure detection"
	})

	# Check max tokens
	max_tokens = int(os.getenv("MAX_TOKENS_PER_REQUEST", "300"))
	if max_tokens > 500:
	issues_found.append({
	"issue": f"Max tokens is high ({max_tokens}), slows generation",
	"solution": "Reduce to 200-300 tokens"
	})

	if not issues_found:
	print(" ✓ No obvious configuration issues detected")
	else:
	print(" Issues detected:")
	for i, item in enumerate(issues_found, 1):
	print(f"\n {i}. {item['issue']}")
	print(f" Solution: {item['solution']}")

	print()
	print("=" * 70)
	print("COMMON CAUSES OF HANGING:")
	print(" 1. Model server (LMStudio/node.js) running out of memory")
	print(" 2. Network timeout to HuggingFace API")
	print(" 3. Model too large for available resources")
	print(" 4. Multiple concurrent requests overloading server")
	print()
	print("PREVENTION:")
	print(" - Use the robust LLM wrapper (llm_robust.py) - already integrated")
	print(" - Set aggressive timeouts (60s max)")
	print(" - Use lighter models (Mistral-7B instead of Mixtral-8x7B)")
	print(" - Process transcripts in smaller batches")
	print("=" * 70)
	print()

	def main():
	parser = argparse.ArgumentParser(description="Fix LLM timeout issues")
	parser.add_argument("--test", action="store_true", help="Test LLM connectivity")
	parser.add_argument("--fix", action="store_true", help="Apply recommended fixes")
	parser.add_argument("--config", action="store_true", help="Show current config")
	parser.add_argument("--diagnose", action="store_true", help="Diagnose hanging issues")

	args = parser.parse_args()

	print_banner()

	if not any(vars(args).values()):
	# No arguments, run all
	test_llm_connectivity()
	show_current_config()
	apply_fixes()
	diagnose_hanging_issue()
	else:
	if args.test:
	success = test_llm_connectivity()
	sys.exit(0 if success else 1)
	if args.config:
	show_current_config()
	if args.fix:
	apply_fixes()
	if args.diagnose:
	diagnose_hanging_issue()

	if __name__ == "__main__":
	main()