File size: 10,090 Bytes
56da263
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
#!/usr/bin/env python3
"""

LLM Timeout Fixer and Configuration Utility



This script helps diagnose and fix LLM timeout issues, particularly

when the node.js server or model loading causes the app to hang.



Usage:

    python fix_llm_timeout.py --test      # Test LLM connectivity

    python fix_llm_timeout.py --fix       # Apply recommended fixes

    python fix_llm_timeout.py --config    # Show current configuration

"""

import os
import sys
import argparse

def print_banner():
    print("=" * 70)
    print("  TranscriptorAI - LLM Timeout Diagnostic & Fix Utility")
    print("=" * 70)
    print()

def test_llm_connectivity():
    """Test if LLM backends are accessible"""
    print("[1/4] Testing LLM Backend Connectivity...")
    print()

    # Test HuggingFace API
    print("  Testing HuggingFace API...")
    hf_token = os.getenv("HUGGINGFACE_TOKEN", "")

    if not hf_token:
        print("  βœ— HUGGINGFACE_TOKEN not set")
        print("    Set it with: export HUGGINGFACE_TOKEN='your_token_here'")
        hf_available = False
    else:
        try:
            from huggingface_hub import InferenceClient
            client = InferenceClient(token=hf_token)
            # Quick test
            result = client.text_generation(
                "Test",
                model="mistralai/Mixtral-8x7B-Instruct-v0.1",
                max_new_tokens=10,
                timeout=10
            )
            print("  βœ“ HuggingFace API is accessible")
            hf_available = True
        except Exception as e:
            print(f"  βœ— HuggingFace API failed: {e}")
            hf_available = False

    print()

    # Test LMStudio
    print("  Testing LMStudio...")
    lmstudio_url = os.getenv("LM_STUDIO_URL", "http://192.168.1.245:1234")

    try:
        import requests
        response = requests.get(f"{lmstudio_url}/v1/models", timeout=5)
        if response.status_code == 200:
            print(f"  βœ“ LMStudio is accessible at {lmstudio_url}")
            lmstudio_available = True
        else:
            print(f"  βœ— LMStudio returned status {response.status_code}")
            lmstudio_available = False
    except Exception as e:
        print(f"  βœ— LMStudio not accessible: {e}")
        print(f"    Checked URL: {lmstudio_url}")
        lmstudio_available = False

    print()
    print("=" * 70)
    print("SUMMARY:")
    print(f"  HuggingFace API: {'βœ“ Available' if hf_available else 'βœ— Not Available'}")
    print(f"  LMStudio:        {'βœ“ Available' if lmstudio_available else 'βœ— Not Available'}")
    print("=" * 70)
    print()

    if not hf_available and not lmstudio_available:
        print("⚠ WARNING: No LLM backends are available!")
        print()
        print("RECOMMENDED ACTIONS:")
        print("1. For HuggingFace API:")
        print("   export HUGGINGFACE_TOKEN='your_hf_token_here'")
        print()
        print("2. For LMStudio:")
        print("   - Start LMStudio server")
        print("   - Load a model (recommended: Mistral 7B or smaller)")
        print("   - Verify it's running at: http://localhost:1234")
        print("   - Set URL: export LM_STUDIO_URL='http://localhost:1234'")
        print()
        return False

    return True

def show_current_config():
    """Display current configuration"""
    print("[2/4] Current Configuration...")
    print()

    config_items = [
        ("LLM Backend", os.getenv("LLM_BACKEND", "hf_api")),
        ("HuggingFace Model", os.getenv("HF_MODEL", "mistralai/Mixtral-8x7B-Instruct-v0.1")),
        ("LMStudio URL", os.getenv("LM_STUDIO_URL", "http://192.168.1.245:1234")),
        ("Max Tokens", os.getenv("MAX_TOKENS_PER_REQUEST", "300")),
        ("LLM Timeout", os.getenv("LLM_TIMEOUT", "120")),
        ("Temperature", os.getenv("LLM_TEMPERATURE", "0.3")),
    ]

    for key, value in config_items:
        print(f"  {key:20s}: {value}")

    print()

def apply_fixes():
    """Apply recommended configuration fixes"""
    print("[3/4] Applying Recommended Fixes...")
    print()

    fixes_applied = []

    # Create .env file with recommended settings
    env_content = """# TranscriptorAI LLM Configuration - Optimized for Stability

# Generated by fix_llm_timeout.py



# Use HuggingFace API (more stable than local models)

LLM_BACKEND=hf_api



# Set your HuggingFace token here

HUGGINGFACE_TOKEN=your_token_here



# Use a lighter, faster model

HF_MODEL=mistralai/Mistral-7B-Instruct-v0.2



# Reduce token requirements to prevent timeouts

MAX_TOKENS_PER_REQUEST=200



# Aggressive timeout (60 seconds instead of 120)

LLM_TIMEOUT=60



# Lower temperature for more consistent output

LLM_TEMPERATURE=0.3



# LMStudio configuration (if using local)

LM_STUDIO_URL=http://localhost:1234



# Chunking optimization

MAX_CHUNK_TOKENS=4000

OVERLAP_TOKENS=100

"""

    env_path = "/home/john/TranscriptorEnhanced/.env"

    try:
        with open(env_path, 'w') as f:
            f.write(env_content)
        print(f"  βœ“ Created optimized .env file at {env_path}")
        fixes_applied.append("Created .env configuration")
    except Exception as e:
        print(f"  βœ— Failed to create .env file: {e}")

    # Create a startup script
    startup_script = """#!/bin/bash

# TranscriptorAI Startup Script with LLM Health Check



echo "==================================="

echo "  TranscriptorAI Startup"

echo "==================================="

echo



# Load environment variables

if [ -f .env ]; then

    export $(cat .env | grep -v '^#' | xargs)

    echo "βœ“ Loaded .env configuration"

else

    echo "⚠ No .env file found, using defaults"

fi



echo

echo "Testing LLM connectivity..."

python fix_llm_timeout.py --test



if [ $? -ne 0 ]; then

    echo

    echo "⚠ LLM connectivity issues detected!"

    echo "Continue anyway? (y/n)"

    read -r response

    if [ "$response" != "y" ]; then

        echo "Startup cancelled"

        exit 1

    fi

fi



echo

echo "Starting application..."

python app.py

"""

    startup_path = "/home/john/TranscriptorEnhanced/start.sh"

    try:
        with open(startup_path, 'w') as f:
            f.write(startup_script)
        os.chmod(startup_path, 0o755)
        print(f"  βœ“ Created startup script at {startup_path}")
        print(f"    Run with: ./start.sh")
        fixes_applied.append("Created startup script")
    except Exception as e:
        print(f"  βœ— Failed to create startup script: {e}")

    print()
    print("=" * 70)
    print("FIXES APPLIED:")
    for fix in fixes_applied:
        print(f"  - {fix}")
    print("=" * 70)
    print()

    print("NEXT STEPS:")
    print("1. Edit .env file and add your HUGGINGFACE_TOKEN")
    print("2. Run: ./start.sh")
    print("   OR: source .env && python app.py")
    print()

def diagnose_hanging_issue():
    """Diagnose why the app might be hanging"""
    print("[4/4] Diagnosing Potential Hang Issues...")
    print()

    issues_found = []

    # Check if we're using a heavy model
    model = os.getenv("HF_MODEL", "mistralai/Mixtral-8x7B-Instruct-v0.1")
    if "Mixtral-8x7B" in model or "70B" in model or "33B" in model:
        issues_found.append({
            "issue": "Using a large model that may cause timeouts",
            "solution": "Switch to a lighter model like Mistral-7B-Instruct-v0.2"
        })

    # Check timeout settings
    timeout = int(os.getenv("LLM_TIMEOUT", "120"))
    if timeout > 90:
        issues_found.append({
            "issue": f"LLM timeout is high ({timeout}s), may cause hanging appearance",
            "solution": "Reduce to 60 seconds for faster failure detection"
        })

    # Check max tokens
    max_tokens = int(os.getenv("MAX_TOKENS_PER_REQUEST", "300"))
    if max_tokens > 500:
        issues_found.append({
            "issue": f"Max tokens is high ({max_tokens}), slows generation",
            "solution": "Reduce to 200-300 tokens"
        })

    if not issues_found:
        print("  βœ“ No obvious configuration issues detected")
    else:
        print("  Issues detected:")
        for i, item in enumerate(issues_found, 1):
            print(f"\n  {i}. {item['issue']}")
            print(f"     Solution: {item['solution']}")

    print()
    print("=" * 70)
    print("COMMON CAUSES OF HANGING:")
    print("  1. Model server (LMStudio/node.js) running out of memory")
    print("  2. Network timeout to HuggingFace API")
    print("  3. Model too large for available resources")
    print("  4. Multiple concurrent requests overloading server")
    print()
    print("PREVENTION:")
    print("  - Use the robust LLM wrapper (llm_robust.py) - already integrated")
    print("  - Set aggressive timeouts (60s max)")
    print("  - Use lighter models (Mistral-7B instead of Mixtral-8x7B)")
    print("  - Process transcripts in smaller batches")
    print("=" * 70)
    print()

def main():
    parser = argparse.ArgumentParser(description="Fix LLM timeout issues")
    parser.add_argument("--test", action="store_true", help="Test LLM connectivity")
    parser.add_argument("--fix", action="store_true", help="Apply recommended fixes")
    parser.add_argument("--config", action="store_true", help="Show current config")
    parser.add_argument("--diagnose", action="store_true", help="Diagnose hanging issues")

    args = parser.parse_args()

    print_banner()

    if not any(vars(args).values()):
        # No arguments, run all
        test_llm_connectivity()
        show_current_config()
        apply_fixes()
        diagnose_hanging_issue()
    else:
        if args.test:
            success = test_llm_connectivity()
            sys.exit(0 if success else 1)
        if args.config:
            show_current_config()
        if args.fix:
            apply_fixes()
        if args.diagnose:
            diagnose_hanging_issue()

if __name__ == "__main__":
    main()