File size: 4,379 Bytes
57fa449
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
"""

Test script for local model inference

Run this to verify your setup before deploying to HuggingFace Spaces

"""

import os
import sys

# Set environment for local model
os.environ["USE_HF_API"] = "False"
os.environ["USE_LMSTUDIO"] = "False"
os.environ["DEBUG_MODE"] = "True"
os.environ["LLM_BACKEND"] = "local"
os.environ["LLM_TEMPERATURE"] = "0.7"

print("="*80)
print("๐Ÿงช Testing Local Model Inference")
print("="*80)

# Test imports
print("\n1๏ธโƒฃ Testing imports...")
try:
    import torch
    print(f"   โœ… PyTorch {torch.__version__}")
    print(f"   ๐Ÿ”ง CUDA available: {torch.cuda.is_available()}")
    if torch.cuda.is_available():
        print(f"   ๐ŸŽฎ GPU: {torch.cuda.get_device_name(0)}")
except ImportError as e:
    print(f"   โŒ PyTorch not installed: {e}")
    print("   ๐Ÿ“ฆ Install: pip install torch")
    sys.exit(1)

try:
    from transformers import AutoModelForCausalLM, AutoTokenizer
    print(f"   โœ… Transformers installed")
except ImportError as e:
    print(f"   โŒ Transformers not installed: {e}")
    print("   ๐Ÿ“ฆ Install: pip install transformers accelerate")
    sys.exit(1)

# Test LLM function
print("\n2๏ธโƒฃ Testing LLM function...")
try:
    from llm import query_llm
    print("   โœ… LLM module imported")
except ImportError as e:
    print(f"   โŒ Failed to import llm module: {e}")
    sys.exit(1)

# Test simple query
print("\n3๏ธโƒฃ Testing simple query (this will download the model on first run)...")
print("   โณ This may take 2-5 minutes for first-time model download...\n")

test_prompt = """You are a medical transcript analyzer.



Analyze this brief interview segment:



Interviewer: How do you treat moderate acne?

Doctor: I typically start with topical retinoids and benzoyl peroxide. For more severe cases, I prescribe oral antibiotics like doxycycline 100mg daily.



Provide a brief summary and extract structured data in JSON format:

{

  "diagnoses": ["list of conditions mentioned"],

  "prescriptions": ["list of medications with dosages"],

  "treatment_rationale": ["list of treatment approaches"]

}

"""

try:
    response, structured_data = query_llm(
        chunk=test_prompt,
        user_context="Extract medical information from this dermatology interview",
        interviewee_type="HCP",
        extract_structured=True,
        timeout=180
    )

    print("\n" + "="*80)
    print("๐Ÿ“Š RESULTS")
    print("="*80)

    print(f"\n๐Ÿ“ Response Text ({len(response)} chars):")
    print("-" * 80)
    print(response)

    print(f"\n๐Ÿ” Structured Data ({len(structured_data)} fields):")
    print("-" * 80)
    import json
    print(json.dumps(structured_data, indent=2))

    # Validate results
    print("\n" + "="*80)
    print("โœ… VALIDATION")
    print("="*80)

    if len(response) < 50:
        print("โš ๏ธ Warning: Response is very short")
    else:
        print(f"โœ… Response length OK ({len(response)} chars)")

    if not structured_data:
        print("โŒ No structured data extracted - check JSON parsing!")
    elif len(structured_data) == 0:
        print("โš ๏ธ Structured data is empty")
    else:
        print(f"โœ… Structured data extracted ({len(structured_data)} fields)")
        for key, values in structured_data.items():
            if values:
                print(f"   โ€ข {key}: {len(values)} items")

    if "[Error]" in response:
        print("โŒ Response contains error message!")
    else:
        print("โœ… No error messages in response")

    print("\n" + "="*80)
    print("๐ŸŽ‰ TEST COMPLETE!")
    print("="*80)
    print("\nYour system is ready for HuggingFace Spaces deployment.")
    print("\n๐Ÿ“– See HUGGINGFACE_SPACES_SETUP.md for deployment instructions.")

except Exception as e:
    print("\n" + "="*80)
    print("โŒ TEST FAILED")
    print("="*80)
    print(f"\nError: {e}")

    import traceback
    print("\nFull traceback:")
    print(traceback.format_exc())

    print("\n๐Ÿ”ง Troubleshooting:")
    print("1. Make sure GPU is available (or set device_map='cpu')")
    print("2. Check if you have enough RAM/VRAM (~8GB needed)")
    print("3. Try a smaller model: LOCAL_MODEL=TinyLlama/TinyLlama-1.1B-Chat-v1.0")
    print("4. Check internet connection for model download")

    sys.exit(1)