Spaces:
Sleeping
Sleeping
File size: 10,090 Bytes
56da263 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 |
#!/usr/bin/env python3
"""
LLM Timeout Fixer and Configuration Utility
This script helps diagnose and fix LLM timeout issues, particularly
when the node.js server or model loading causes the app to hang.
Usage:
python fix_llm_timeout.py --test # Test LLM connectivity
python fix_llm_timeout.py --fix # Apply recommended fixes
python fix_llm_timeout.py --config # Show current configuration
"""
import os
import sys
import argparse
def print_banner():
print("=" * 70)
print(" TranscriptorAI - LLM Timeout Diagnostic & Fix Utility")
print("=" * 70)
print()
def test_llm_connectivity():
"""Test if LLM backends are accessible"""
print("[1/4] Testing LLM Backend Connectivity...")
print()
# Test HuggingFace API
print(" Testing HuggingFace API...")
hf_token = os.getenv("HUGGINGFACE_TOKEN", "")
if not hf_token:
print(" β HUGGINGFACE_TOKEN not set")
print(" Set it with: export HUGGINGFACE_TOKEN='your_token_here'")
hf_available = False
else:
try:
from huggingface_hub import InferenceClient
client = InferenceClient(token=hf_token)
# Quick test
result = client.text_generation(
"Test",
model="mistralai/Mixtral-8x7B-Instruct-v0.1",
max_new_tokens=10,
timeout=10
)
print(" β HuggingFace API is accessible")
hf_available = True
except Exception as e:
print(f" β HuggingFace API failed: {e}")
hf_available = False
print()
# Test LMStudio
print(" Testing LMStudio...")
lmstudio_url = os.getenv("LM_STUDIO_URL", "http://192.168.1.245:1234")
try:
import requests
response = requests.get(f"{lmstudio_url}/v1/models", timeout=5)
if response.status_code == 200:
print(f" β LMStudio is accessible at {lmstudio_url}")
lmstudio_available = True
else:
print(f" β LMStudio returned status {response.status_code}")
lmstudio_available = False
except Exception as e:
print(f" β LMStudio not accessible: {e}")
print(f" Checked URL: {lmstudio_url}")
lmstudio_available = False
print()
print("=" * 70)
print("SUMMARY:")
print(f" HuggingFace API: {'β Available' if hf_available else 'β Not Available'}")
print(f" LMStudio: {'β Available' if lmstudio_available else 'β Not Available'}")
print("=" * 70)
print()
if not hf_available and not lmstudio_available:
print("β WARNING: No LLM backends are available!")
print()
print("RECOMMENDED ACTIONS:")
print("1. For HuggingFace API:")
print(" export HUGGINGFACE_TOKEN='your_hf_token_here'")
print()
print("2. For LMStudio:")
print(" - Start LMStudio server")
print(" - Load a model (recommended: Mistral 7B or smaller)")
print(" - Verify it's running at: http://localhost:1234")
print(" - Set URL: export LM_STUDIO_URL='http://localhost:1234'")
print()
return False
return True
def show_current_config():
"""Display current configuration"""
print("[2/4] Current Configuration...")
print()
config_items = [
("LLM Backend", os.getenv("LLM_BACKEND", "hf_api")),
("HuggingFace Model", os.getenv("HF_MODEL", "mistralai/Mixtral-8x7B-Instruct-v0.1")),
("LMStudio URL", os.getenv("LM_STUDIO_URL", "http://192.168.1.245:1234")),
("Max Tokens", os.getenv("MAX_TOKENS_PER_REQUEST", "300")),
("LLM Timeout", os.getenv("LLM_TIMEOUT", "120")),
("Temperature", os.getenv("LLM_TEMPERATURE", "0.3")),
]
for key, value in config_items:
print(f" {key:20s}: {value}")
print()
def apply_fixes():
"""Apply recommended configuration fixes"""
print("[3/4] Applying Recommended Fixes...")
print()
fixes_applied = []
# Create .env file with recommended settings
env_content = """# TranscriptorAI LLM Configuration - Optimized for Stability
# Generated by fix_llm_timeout.py
# Use HuggingFace API (more stable than local models)
LLM_BACKEND=hf_api
# Set your HuggingFace token here
HUGGINGFACE_TOKEN=your_token_here
# Use a lighter, faster model
HF_MODEL=mistralai/Mistral-7B-Instruct-v0.2
# Reduce token requirements to prevent timeouts
MAX_TOKENS_PER_REQUEST=200
# Aggressive timeout (60 seconds instead of 120)
LLM_TIMEOUT=60
# Lower temperature for more consistent output
LLM_TEMPERATURE=0.3
# LMStudio configuration (if using local)
LM_STUDIO_URL=http://localhost:1234
# Chunking optimization
MAX_CHUNK_TOKENS=4000
OVERLAP_TOKENS=100
"""
env_path = "/home/john/TranscriptorEnhanced/.env"
try:
with open(env_path, 'w') as f:
f.write(env_content)
print(f" β Created optimized .env file at {env_path}")
fixes_applied.append("Created .env configuration")
except Exception as e:
print(f" β Failed to create .env file: {e}")
# Create a startup script
startup_script = """#!/bin/bash
# TranscriptorAI Startup Script with LLM Health Check
echo "==================================="
echo " TranscriptorAI Startup"
echo "==================================="
echo
# Load environment variables
if [ -f .env ]; then
export $(cat .env | grep -v '^#' | xargs)
echo "β Loaded .env configuration"
else
echo "β No .env file found, using defaults"
fi
echo
echo "Testing LLM connectivity..."
python fix_llm_timeout.py --test
if [ $? -ne 0 ]; then
echo
echo "β LLM connectivity issues detected!"
echo "Continue anyway? (y/n)"
read -r response
if [ "$response" != "y" ]; then
echo "Startup cancelled"
exit 1
fi
fi
echo
echo "Starting application..."
python app.py
"""
startup_path = "/home/john/TranscriptorEnhanced/start.sh"
try:
with open(startup_path, 'w') as f:
f.write(startup_script)
os.chmod(startup_path, 0o755)
print(f" β Created startup script at {startup_path}")
print(f" Run with: ./start.sh")
fixes_applied.append("Created startup script")
except Exception as e:
print(f" β Failed to create startup script: {e}")
print()
print("=" * 70)
print("FIXES APPLIED:")
for fix in fixes_applied:
print(f" - {fix}")
print("=" * 70)
print()
print("NEXT STEPS:")
print("1. Edit .env file and add your HUGGINGFACE_TOKEN")
print("2. Run: ./start.sh")
print(" OR: source .env && python app.py")
print()
def diagnose_hanging_issue():
"""Diagnose why the app might be hanging"""
print("[4/4] Diagnosing Potential Hang Issues...")
print()
issues_found = []
# Check if we're using a heavy model
model = os.getenv("HF_MODEL", "mistralai/Mixtral-8x7B-Instruct-v0.1")
if "Mixtral-8x7B" in model or "70B" in model or "33B" in model:
issues_found.append({
"issue": "Using a large model that may cause timeouts",
"solution": "Switch to a lighter model like Mistral-7B-Instruct-v0.2"
})
# Check timeout settings
timeout = int(os.getenv("LLM_TIMEOUT", "120"))
if timeout > 90:
issues_found.append({
"issue": f"LLM timeout is high ({timeout}s), may cause hanging appearance",
"solution": "Reduce to 60 seconds for faster failure detection"
})
# Check max tokens
max_tokens = int(os.getenv("MAX_TOKENS_PER_REQUEST", "300"))
if max_tokens > 500:
issues_found.append({
"issue": f"Max tokens is high ({max_tokens}), slows generation",
"solution": "Reduce to 200-300 tokens"
})
if not issues_found:
print(" β No obvious configuration issues detected")
else:
print(" Issues detected:")
for i, item in enumerate(issues_found, 1):
print(f"\n {i}. {item['issue']}")
print(f" Solution: {item['solution']}")
print()
print("=" * 70)
print("COMMON CAUSES OF HANGING:")
print(" 1. Model server (LMStudio/node.js) running out of memory")
print(" 2. Network timeout to HuggingFace API")
print(" 3. Model too large for available resources")
print(" 4. Multiple concurrent requests overloading server")
print()
print("PREVENTION:")
print(" - Use the robust LLM wrapper (llm_robust.py) - already integrated")
print(" - Set aggressive timeouts (60s max)")
print(" - Use lighter models (Mistral-7B instead of Mixtral-8x7B)")
print(" - Process transcripts in smaller batches")
print("=" * 70)
print()
def main():
parser = argparse.ArgumentParser(description="Fix LLM timeout issues")
parser.add_argument("--test", action="store_true", help="Test LLM connectivity")
parser.add_argument("--fix", action="store_true", help="Apply recommended fixes")
parser.add_argument("--config", action="store_true", help="Show current config")
parser.add_argument("--diagnose", action="store_true", help="Diagnose hanging issues")
args = parser.parse_args()
print_banner()
if not any(vars(args).values()):
# No arguments, run all
test_llm_connectivity()
show_current_config()
apply_fixes()
diagnose_hanging_issue()
else:
if args.test:
success = test_llm_connectivity()
sys.exit(0 if success else 1)
if args.config:
show_current_config()
if args.fix:
apply_fixes()
if args.diagnose:
diagnose_hanging_issue()
if __name__ == "__main__":
main()
|