TranscriptWriting / test_local_model.py
jmisak's picture
Upload 6 files
57fa449 verified
raw
history blame
4.38 kB
"""
Test script for local model inference
Run this to verify your setup before deploying to HuggingFace Spaces
"""
import os
import sys
# Set environment for local model
os.environ["USE_HF_API"] = "False"
os.environ["USE_LMSTUDIO"] = "False"
os.environ["DEBUG_MODE"] = "True"
os.environ["LLM_BACKEND"] = "local"
os.environ["LLM_TEMPERATURE"] = "0.7"
print("="*80)
print("๐Ÿงช Testing Local Model Inference")
print("="*80)
# Test imports
print("\n1๏ธโƒฃ Testing imports...")
try:
import torch
print(f" โœ… PyTorch {torch.__version__}")
print(f" ๐Ÿ”ง CUDA available: {torch.cuda.is_available()}")
if torch.cuda.is_available():
print(f" ๐ŸŽฎ GPU: {torch.cuda.get_device_name(0)}")
except ImportError as e:
print(f" โŒ PyTorch not installed: {e}")
print(" ๐Ÿ“ฆ Install: pip install torch")
sys.exit(1)
try:
from transformers import AutoModelForCausalLM, AutoTokenizer
print(f" โœ… Transformers installed")
except ImportError as e:
print(f" โŒ Transformers not installed: {e}")
print(" ๐Ÿ“ฆ Install: pip install transformers accelerate")
sys.exit(1)
# Test LLM function
print("\n2๏ธโƒฃ Testing LLM function...")
try:
from llm import query_llm
print(" โœ… LLM module imported")
except ImportError as e:
print(f" โŒ Failed to import llm module: {e}")
sys.exit(1)
# Test simple query
print("\n3๏ธโƒฃ Testing simple query (this will download the model on first run)...")
print(" โณ This may take 2-5 minutes for first-time model download...\n")
test_prompt = """You are a medical transcript analyzer.
Analyze this brief interview segment:
Interviewer: How do you treat moderate acne?
Doctor: I typically start with topical retinoids and benzoyl peroxide. For more severe cases, I prescribe oral antibiotics like doxycycline 100mg daily.
Provide a brief summary and extract structured data in JSON format:
{
"diagnoses": ["list of conditions mentioned"],
"prescriptions": ["list of medications with dosages"],
"treatment_rationale": ["list of treatment approaches"]
}
"""
try:
response, structured_data = query_llm(
chunk=test_prompt,
user_context="Extract medical information from this dermatology interview",
interviewee_type="HCP",
extract_structured=True,
timeout=180
)
print("\n" + "="*80)
print("๐Ÿ“Š RESULTS")
print("="*80)
print(f"\n๐Ÿ“ Response Text ({len(response)} chars):")
print("-" * 80)
print(response)
print(f"\n๐Ÿ” Structured Data ({len(structured_data)} fields):")
print("-" * 80)
import json
print(json.dumps(structured_data, indent=2))
# Validate results
print("\n" + "="*80)
print("โœ… VALIDATION")
print("="*80)
if len(response) < 50:
print("โš ๏ธ Warning: Response is very short")
else:
print(f"โœ… Response length OK ({len(response)} chars)")
if not structured_data:
print("โŒ No structured data extracted - check JSON parsing!")
elif len(structured_data) == 0:
print("โš ๏ธ Structured data is empty")
else:
print(f"โœ… Structured data extracted ({len(structured_data)} fields)")
for key, values in structured_data.items():
if values:
print(f" โ€ข {key}: {len(values)} items")
if "[Error]" in response:
print("โŒ Response contains error message!")
else:
print("โœ… No error messages in response")
print("\n" + "="*80)
print("๐ŸŽ‰ TEST COMPLETE!")
print("="*80)
print("\nYour system is ready for HuggingFace Spaces deployment.")
print("\n๐Ÿ“– See HUGGINGFACE_SPACES_SETUP.md for deployment instructions.")
except Exception as e:
print("\n" + "="*80)
print("โŒ TEST FAILED")
print("="*80)
print(f"\nError: {e}")
import traceback
print("\nFull traceback:")
print(traceback.format_exc())
print("\n๐Ÿ”ง Troubleshooting:")
print("1. Make sure GPU is available (or set device_map='cpu')")
print("2. Check if you have enough RAM/VRAM (~8GB needed)")
print("3. Try a smaller model: LOCAL_MODEL=TinyLlama/TinyLlama-1.1B-Chat-v1.0")
print("4. Check internet connection for model download")
sys.exit(1)