Spaces:
Sleeping
Sleeping
File size: 4,379 Bytes
57fa449 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 |
"""
Test script for local model inference
Run this to verify your setup before deploying to HuggingFace Spaces
"""
import os
import sys
# Set environment for local model
os.environ["USE_HF_API"] = "False"
os.environ["USE_LMSTUDIO"] = "False"
os.environ["DEBUG_MODE"] = "True"
os.environ["LLM_BACKEND"] = "local"
os.environ["LLM_TEMPERATURE"] = "0.7"
print("="*80)
print("๐งช Testing Local Model Inference")
print("="*80)
# Test imports
print("\n1๏ธโฃ Testing imports...")
try:
import torch
print(f" โ
PyTorch {torch.__version__}")
print(f" ๐ง CUDA available: {torch.cuda.is_available()}")
if torch.cuda.is_available():
print(f" ๐ฎ GPU: {torch.cuda.get_device_name(0)}")
except ImportError as e:
print(f" โ PyTorch not installed: {e}")
print(" ๐ฆ Install: pip install torch")
sys.exit(1)
try:
from transformers import AutoModelForCausalLM, AutoTokenizer
print(f" โ
Transformers installed")
except ImportError as e:
print(f" โ Transformers not installed: {e}")
print(" ๐ฆ Install: pip install transformers accelerate")
sys.exit(1)
# Test LLM function
print("\n2๏ธโฃ Testing LLM function...")
try:
from llm import query_llm
print(" โ
LLM module imported")
except ImportError as e:
print(f" โ Failed to import llm module: {e}")
sys.exit(1)
# Test simple query
print("\n3๏ธโฃ Testing simple query (this will download the model on first run)...")
print(" โณ This may take 2-5 minutes for first-time model download...\n")
test_prompt = """You are a medical transcript analyzer.
Analyze this brief interview segment:
Interviewer: How do you treat moderate acne?
Doctor: I typically start with topical retinoids and benzoyl peroxide. For more severe cases, I prescribe oral antibiotics like doxycycline 100mg daily.
Provide a brief summary and extract structured data in JSON format:
{
"diagnoses": ["list of conditions mentioned"],
"prescriptions": ["list of medications with dosages"],
"treatment_rationale": ["list of treatment approaches"]
}
"""
try:
response, structured_data = query_llm(
chunk=test_prompt,
user_context="Extract medical information from this dermatology interview",
interviewee_type="HCP",
extract_structured=True,
timeout=180
)
print("\n" + "="*80)
print("๐ RESULTS")
print("="*80)
print(f"\n๐ Response Text ({len(response)} chars):")
print("-" * 80)
print(response)
print(f"\n๐ Structured Data ({len(structured_data)} fields):")
print("-" * 80)
import json
print(json.dumps(structured_data, indent=2))
# Validate results
print("\n" + "="*80)
print("โ
VALIDATION")
print("="*80)
if len(response) < 50:
print("โ ๏ธ Warning: Response is very short")
else:
print(f"โ
Response length OK ({len(response)} chars)")
if not structured_data:
print("โ No structured data extracted - check JSON parsing!")
elif len(structured_data) == 0:
print("โ ๏ธ Structured data is empty")
else:
print(f"โ
Structured data extracted ({len(structured_data)} fields)")
for key, values in structured_data.items():
if values:
print(f" โข {key}: {len(values)} items")
if "[Error]" in response:
print("โ Response contains error message!")
else:
print("โ
No error messages in response")
print("\n" + "="*80)
print("๐ TEST COMPLETE!")
print("="*80)
print("\nYour system is ready for HuggingFace Spaces deployment.")
print("\n๐ See HUGGINGFACE_SPACES_SETUP.md for deployment instructions.")
except Exception as e:
print("\n" + "="*80)
print("โ TEST FAILED")
print("="*80)
print(f"\nError: {e}")
import traceback
print("\nFull traceback:")
print(traceback.format_exc())
print("\n๐ง Troubleshooting:")
print("1. Make sure GPU is available (or set device_map='cpu')")
print("2. Check if you have enough RAM/VRAM (~8GB needed)")
print("3. Try a smaller model: LOCAL_MODEL=TinyLlama/TinyLlama-1.1B-Chat-v1.0")
print("4. Check internet connection for model download")
sys.exit(1)
|