Spaces:

empirenexus
/

TranscriptWriting

Paused

App Files Files Community

jmisak commited on Oct 19, 2025

Commit

56da263

verified ·

1 Parent(s): d17fde8

Upload 2 files

Browse files

Files changed (2) hide show

fix_llm_timeout.py +312 -0
llm_robust.py +262 -0

fix_llm_timeout.py ADDED Viewed

	@@ -0,0 +1,312 @@

+#!/usr/bin/env python3
+"""
+LLM Timeout Fixer and Configuration Utility
+This script helps diagnose and fix LLM timeout issues, particularly
+when the node.js server or model loading causes the app to hang.
+Usage:
+    python fix_llm_timeout.py --test      # Test LLM connectivity
+    python fix_llm_timeout.py --fix       # Apply recommended fixes
+    python fix_llm_timeout.py --config    # Show current configuration
+"""
+import os
+import sys
+import argparse
+def print_banner():
+    print("=" * 70)
+    print("  TranscriptorAI - LLM Timeout Diagnostic & Fix Utility")
+    print("=" * 70)
+    print()
+def test_llm_connectivity():
+    """Test if LLM backends are accessible"""
+    print("[1/4] Testing LLM Backend Connectivity...")
+    print()
+    # Test HuggingFace API
+    print("  Testing HuggingFace API...")
+    hf_token = os.getenv("HUGGINGFACE_TOKEN", "")
+    if not hf_token:
+        print("  ✗ HUGGINGFACE_TOKEN not set")
+        print("    Set it with: export HUGGINGFACE_TOKEN='your_token_here'")
+        hf_available = False
+    else:
+        try:
+            from huggingface_hub import InferenceClient
+            client = InferenceClient(token=hf_token)
+            # Quick test
+            result = client.text_generation(
+                "Test",
+                model="mistralai/Mixtral-8x7B-Instruct-v0.1",
+                max_new_tokens=10,
+                timeout=10
+            )
+            print("  ✓ HuggingFace API is accessible")
+            hf_available = True
+        except Exception as e:
+            print(f"  ✗ HuggingFace API failed: {e}")
+            hf_available = False
+    print()
+    # Test LMStudio
+    print("  Testing LMStudio...")
+    lmstudio_url = os.getenv("LM_STUDIO_URL", "http://192.168.1.245:1234")
+    try:
+        import requests
+        response = requests.get(f"{lmstudio_url}/v1/models", timeout=5)
+        if response.status_code == 200:
+            print(f"  ✓ LMStudio is accessible at {lmstudio_url}")
+            lmstudio_available = True
+        else:
+            print(f"  ✗ LMStudio returned status {response.status_code}")
+            lmstudio_available = False
+    except Exception as e:
+        print(f"  ✗ LMStudio not accessible: {e}")
+        print(f"    Checked URL: {lmstudio_url}")
+        lmstudio_available = False
+    print()
+    print("=" * 70)
+    print("SUMMARY:")
+    print(f"  HuggingFace API: {'✓ Available' if hf_available else '✗ Not Available'}")
+    print(f"  LMStudio:        {'✓ Available' if lmstudio_available else '✗ Not Available'}")
+    print("=" * 70)
+    print()
+    if not hf_available and not lmstudio_available:
+        print("⚠ WARNING: No LLM backends are available!")
+        print()
+        print("RECOMMENDED ACTIONS:")
+        print("1. For HuggingFace API:")
+        print("   export HUGGINGFACE_TOKEN='your_hf_token_here'")
+        print()
+        print("2. For LMStudio:")
+        print("   - Start LMStudio server")
+        print("   - Load a model (recommended: Mistral 7B or smaller)")
+        print("   - Verify it's running at: http://localhost:1234")
+        print("   - Set URL: export LM_STUDIO_URL='http://localhost:1234'")
+        print()
+        return False
+    return True
+def show_current_config():
+    """Display current configuration"""
+    print("[2/4] Current Configuration...")
+    print()
+    config_items = [
+        ("LLM Backend", os.getenv("LLM_BACKEND", "hf_api")),
+        ("HuggingFace Model", os.getenv("HF_MODEL", "mistralai/Mixtral-8x7B-Instruct-v0.1")),
+        ("LMStudio URL", os.getenv("LM_STUDIO_URL", "http://192.168.1.245:1234")),
+        ("Max Tokens", os.getenv("MAX_TOKENS_PER_REQUEST", "300")),
+        ("LLM Timeout", os.getenv("LLM_TIMEOUT", "120")),
+        ("Temperature", os.getenv("LLM_TEMPERATURE", "0.3")),
+    ]
+    for key, value in config_items:
+        print(f"  {key:20s}: {value}")
+    print()
+def apply_fixes():
+    """Apply recommended configuration fixes"""
+    print("[3/4] Applying Recommended Fixes...")
+    print()
+    fixes_applied = []
+    # Create .env file with recommended settings
+    env_content = """# TranscriptorAI LLM Configuration - Optimized for Stability
+# Generated by fix_llm_timeout.py
+# Use HuggingFace API (more stable than local models)
+LLM_BACKEND=hf_api
+# Set your HuggingFace token here
+HUGGINGFACE_TOKEN=your_token_here
+# Use a lighter, faster model
+HF_MODEL=mistralai/Mistral-7B-Instruct-v0.2
+# Reduce token requirements to prevent timeouts
+MAX_TOKENS_PER_REQUEST=200
+# Aggressive timeout (60 seconds instead of 120)
+LLM_TIMEOUT=60
+# Lower temperature for more consistent output
+LLM_TEMPERATURE=0.3
+# LMStudio configuration (if using local)
+LM_STUDIO_URL=http://localhost:1234
+# Chunking optimization
+MAX_CHUNK_TOKENS=4000
+OVERLAP_TOKENS=100
+"""
+    env_path = "/home/john/TranscriptorEnhanced/.env"
+    try:
+        with open(env_path, 'w') as f:
+            f.write(env_content)
+        print(f"  ✓ Created optimized .env file at {env_path}")
+        fixes_applied.append("Created .env configuration")
+    except Exception as e:
+        print(f"  ✗ Failed to create .env file: {e}")
+    # Create a startup script
+    startup_script = """#!/bin/bash
+# TranscriptorAI Startup Script with LLM Health Check
+echo "==================================="
+echo "  TranscriptorAI Startup"
+echo "==================================="
+echo
+# Load environment variables
+if [ -f .env ]; then
+    export $(cat .env | grep -v '^#' | xargs)
+    echo "✓ Loaded .env configuration"
+else
+    echo "⚠ No .env file found, using defaults"
+fi
+echo
+echo "Testing LLM connectivity..."
+python fix_llm_timeout.py --test
+if [ $? -ne 0 ]; then
+    echo
+    echo "⚠ LLM connectivity issues detected!"
+    echo "Continue anyway? (y/n)"
+    read -r response
+    if [ "$response" != "y" ]; then
+        echo "Startup cancelled"
+        exit 1
+    fi
+fi
+echo
+echo "Starting application..."
+python app.py
+"""
+    startup_path = "/home/john/TranscriptorEnhanced/start.sh"
+    try:
+        with open(startup_path, 'w') as f:
+            f.write(startup_script)
+        os.chmod(startup_path, 0o755)
+        print(f"  ✓ Created startup script at {startup_path}")
+        print(f"    Run with: ./start.sh")
+        fixes_applied.append("Created startup script")
+    except Exception as e:
+        print(f"  ✗ Failed to create startup script: {e}")
+    print()
+    print("=" * 70)
+    print("FIXES APPLIED:")
+    for fix in fixes_applied:
+        print(f"  - {fix}")
+    print("=" * 70)
+    print()
+    print("NEXT STEPS:")
+    print("1. Edit .env file and add your HUGGINGFACE_TOKEN")
+    print("2. Run: ./start.sh")
+    print("   OR: source .env && python app.py")
+    print()
+def diagnose_hanging_issue():
+    """Diagnose why the app might be hanging"""
+    print("[4/4] Diagnosing Potential Hang Issues...")
+    print()
+    issues_found = []
+    # Check if we're using a heavy model
+    model = os.getenv("HF_MODEL", "mistralai/Mixtral-8x7B-Instruct-v0.1")
+    if "Mixtral-8x7B" in model or "70B" in model or "33B" in model:
+        issues_found.append({
+            "issue": "Using a large model that may cause timeouts",
+            "solution": "Switch to a lighter model like Mistral-7B-Instruct-v0.2"
+        })
+    # Check timeout settings
+    timeout = int(os.getenv("LLM_TIMEOUT", "120"))
+    if timeout > 90:
+        issues_found.append({
+            "issue": f"LLM timeout is high ({timeout}s), may cause hanging appearance",
+            "solution": "Reduce to 60 seconds for faster failure detection"
+        })
+    # Check max tokens
+    max_tokens = int(os.getenv("MAX_TOKENS_PER_REQUEST", "300"))
+    if max_tokens > 500:
+        issues_found.append({
+            "issue": f"Max tokens is high ({max_tokens}), slows generation",
+            "solution": "Reduce to 200-300 tokens"
+        })
+    if not issues_found:
+        print("  ✓ No obvious configuration issues detected")
+    else:
+        print("  Issues detected:")
+        for i, item in enumerate(issues_found, 1):
+            print(f"\n  {i}. {item['issue']}")
+            print(f"     Solution: {item['solution']}")
+    print()
+    print("=" * 70)
+    print("COMMON CAUSES OF HANGING:")
+    print("  1. Model server (LMStudio/node.js) running out of memory")
+    print("  2. Network timeout to HuggingFace API")
+    print("  3. Model too large for available resources")
+    print("  4. Multiple concurrent requests overloading server")
+    print()
+    print("PREVENTION:")
+    print("  - Use the robust LLM wrapper (llm_robust.py) - already integrated")
+    print("  - Set aggressive timeouts (60s max)")
+    print("  - Use lighter models (Mistral-7B instead of Mixtral-8x7B)")
+    print("  - Process transcripts in smaller batches")
+    print("=" * 70)
+    print()
+def main():
+    parser = argparse.ArgumentParser(description="Fix LLM timeout issues")
+    parser.add_argument("--test", action="store_true", help="Test LLM connectivity")
+    parser.add_argument("--fix", action="store_true", help="Apply recommended fixes")
+    parser.add_argument("--config", action="store_true", help="Show current config")
+    parser.add_argument("--diagnose", action="store_true", help="Diagnose hanging issues")
+    args = parser.parse_args()
+    print_banner()
+    if not any(vars(args).values()):
+        # No arguments, run all
+        test_llm_connectivity()
+        show_current_config()
+        apply_fixes()
+        diagnose_hanging_issue()
+    else:
+        if args.test:
+            success = test_llm_connectivity()
+            sys.exit(0 if success else 1)
+        if args.config:
+            show_current_config()
+        if args.fix:
+            apply_fixes()
+        if args.diagnose:
+            diagnose_hanging_issue()
+if __name__ == "__main__":
+    main()

llm_robust.py ADDED Viewed

	@@ -0,0 +1,262 @@

+"""
+Robust LLM wrapper with aggressive timeout protection and lightweight fallbacks
+Prevents node.js/model server crashes during summarization
+"""
+import os
+import signal
+import time
+from contextlib import contextmanager
+from typing import Tuple, Dict, Optional
+class TimeoutException(Exception):
+    pass
+@contextmanager
+def timeout(seconds):
+    """Context manager for enforcing hard timeouts"""
+    def signal_handler(signum, frame):
+        raise TimeoutException(f"Operation timed out after {seconds} seconds")
+    # Set the signal handler
+    old_handler = signal.signal(signal.SIGALRM, signal_handler)
+    signal.alarm(seconds)
+    try:
+        yield
+    finally:
+        signal.alarm(0)
+        signal.signal(signal.SIGALRM, old_handler)
+def query_llm_with_timeout(
+    prompt: str,
+    user_context: str,
+    interviewee_type: str,
+    extract_structured: bool = True,
+    is_summary: bool = False,
+    max_timeout: int = 60  # Reduced from 120 to 60 seconds
+) -> Tuple[str, Dict]:
+    """
+    Query LLM with aggressive timeout protection
+    Falls back to lightweight processing if heavy models fail
+    """
+    print(f"[LLM] Starting {'summary' if is_summary else 'analysis'} generation...")
+    print(f"[LLM] Timeout limit: {max_timeout}s")
+    # Import here to avoid circular dependencies
+    from llm import query_llm
+    try:
+        # Try with timeout protection
+        with timeout(max_timeout):
+            result = query_llm(
+                prompt,
+                user_context,
+                interviewee_type,
+                extract_structured=extract_structured,
+                is_summary=is_summary
+            )
+            print(f"[LLM] ✓ Completed successfully")
+            return result
+    except TimeoutException as e:
+        print(f"[LLM] ✗ Timeout after {max_timeout}s")
+        print(f"[LLM] Generating lightweight fallback...")
+        # Generate lightweight fallback
+        if is_summary:
+            return generate_lightweight_summary(prompt, interviewee_type)
+        else:
+            return generate_lightweight_analysis(prompt, interviewee_type)
+    except Exception as e:
+        print(f"[LLM] ✗ Error: {type(e).__name__}: {str(e)}")
+        print(f"[LLM] Generating emergency fallback...")
+        # Emergency fallback
+        if is_summary:
+            return generate_emergency_summary(interviewee_type)
+        else:
+            return generate_emergency_analysis(interviewee_type)
+def generate_lightweight_summary(prompt: str, interviewee_type: str) -> Tuple[str, Dict]:
+    """
+    Generate a lightweight summary without heavy LLM processing
+    Extracts key points from the prompt itself
+    """
+    print("[Fallback] Creating lightweight summary from prompt data...")
+    # Extract numbers from prompt
+    import re
+    # Find participant counts
+    participant_matches = re.findall(r'(\d+)\s+(?:participants|transcripts|interviews)', prompt, re.IGNORECASE)
+    num_participants = int(participant_matches[0]) if participant_matches else 0
+    # Find percentages
+    percentages = re.findall(r'(\d+)%', prompt)
+    # Find mentions of conditions/themes
+    lines = prompt.split('\n')
+    themes = []
+    for line in lines:
+        if ':' in line and not line.strip().startswith(('#', '-', '*', '=')):
+            parts = line.split(':', 1)
+            if len(parts) == 2:
+                theme = parts[0].strip()
+                if len(theme) < 50:  # Reasonable theme length
+                    themes.append(theme)
+    summary = f"""LIGHTWEIGHT SUMMARY REPORT
+(Generated due to LLM timeout - data extracted from available information)
+SAMPLE OVERVIEW:
+Total {interviewee_type} interviews analyzed: {num_participants}
+KEY OBSERVATIONS:
+This analysis is based on structured data extraction rather than full LLM synthesis.
+For detailed narrative analysis, please:
+1. Reduce the number of transcripts being analyzed simultaneously
+2. Check LLM server (LMStudio/HuggingFace) connectivity
+3. Consider using a lighter model
+DATA EXTRACTED:
+"""
+    if themes:
+        summary += f"\nIdentified themes ({len(themes)} total):\n"
+        for i, theme in enumerate(themes[:10], 1):
+            summary += f"{i}. {theme}\n"
+    if percentages:
+        summary += f"\nPercentages mentioned: {', '.join(set(percentages))}%\n"
+    summary += f"""
+RECOMMENDATIONS:
+1. Review the CSV output file for structured data
+2. Individual transcript analyses contain detailed information
+3. For full narrative synthesis, retry with:
+   - Fewer transcripts per batch
+   - Increased timeout limits
+   - Verified LLM server connectivity
+This lightweight summary preserves data integrity while avoiding server crashes.
+For production use, ensure LLM backend is properly configured and responsive.
+"""
+    return summary, {}
+def generate_emergency_summary(interviewee_type: str) -> Tuple[str, Dict]:
+    """Emergency fallback when even lightweight processing fails"""
+    summary = f"""EMERGENCY FALLBACK REPORT
+LLM PROCESSING UNAVAILABLE
+The system encountered critical errors during summary generation.
+All structured data has been preserved in the CSV output file.
+IMMEDIATE ACTIONS REQUIRED:
+1. Check LLM server status (LMStudio/HuggingFace API)
+2. Verify network connectivity
+3. Review console logs for specific error messages
+4. Check available system memory
+DATA PRESERVATION:
+✓ Individual transcript analyses completed
+✓ Structured data extracted to CSV
+✓ Quality scores calculated
+✗ Cross-transcript narrative synthesis failed
+NEXT STEPS:
+1. Review the CSV file: Contains all extracted structured data
+2. Check individual transcript results below this summary
+3. Resolve LLM connectivity issues
+4. Re-run summary generation once service is restored
+This emergency report ensures no data loss while protecting system stability.
+"""
+    return summary, {}
+def generate_lightweight_analysis(prompt: str, interviewee_type: str) -> Tuple[str, Dict]:
+    """Lightweight analysis without heavy LLM"""
+    # Extract basic structured data from prompt
+    import re
+    structured_data = {}
+    if interviewee_type == "HCP":
+        # Extract medical terms
+        medical_pattern = r'\b(diagnos\w+|prescri\w+|treatment|medication|therapy)\b'
+        terms = re.findall(medical_pattern, prompt, re.IGNORECASE)
+        structured_data = {
+            "diagnoses": list(set([t for t in terms if 'diagnos' in t.lower()])),
+            "prescriptions": list(set([t for t in terms if 'prescri' in t.lower()])),
+            "treatment_rationale": [],
+            "key_insights": [f"Lightweight extraction: {len(terms)} medical terms identified"]
+        }
+    elif interviewee_type == "Patient":
+        # Extract patient terms
+        patient_pattern = r'\b(symptom|pain|concern|treatment|medication|side effect)\b'
+        terms = re.findall(patient_pattern, prompt, re.IGNORECASE)
+        structured_data = {
+            "symptoms": list(set([t for t in terms if 'symptom' in t.lower() or 'pain' in t.lower()])),
+            "concerns": [],
+            "treatment_response": [],
+            "key_insights": [f"Lightweight extraction: {len(terms)} patient-related terms identified"]
+        }
+    else:
+        structured_data = {
+            "key_insights": ["Lightweight analysis - full LLM processing unavailable"]
+        }
+    analysis = f"""[LIGHTWEIGHT ANALYSIS]
+Due to LLM timeout, basic pattern extraction was used.
+Structured data contains {sum(len(v) for v in structured_data.values() if isinstance(v, list))} items.
+For full analysis, ensure LLM server is responsive.
+"""
+    return analysis, structured_data
+def generate_emergency_analysis(interviewee_type: str) -> Tuple[str, Dict]:
+    """Emergency fallback for individual transcript analysis"""
+    structured_data = {
+        "key_insights": ["Emergency fallback - LLM processing failed"],
+        "processing_status": "FALLBACK_MODE"
+    }
+    analysis = "[EMERGENCY FALLBACK] LLM processing unavailable. Minimal data extraction performed."
+    return analysis, structured_data
+# Utility function to test LLM connectivity before processing
+def test_llm_connection(timeout_seconds: int = 10) -> bool:
+    """Test if LLM backend is responsive"""
+    print("[LLM] Testing backend connectivity...")
+    test_prompt = "Test"
+    try:
+        with timeout(timeout_seconds):
+            from llm import query_llm
+            result = query_llm(
+                test_prompt,
+                "",
+                "Other",
+                extract_structured=False,
+                is_summary=False
+            )
+            print("[LLM] ✓ Backend responsive")
+            return True
+    except Exception as e:
+        print(f"[LLM] ✗ Backend not responsive: {e}")
+        return False