Spaces:

empirenexus
/

TranscriptWriting

Paused

App Files Files Community

TranscriptWriting / test_local_model.py

jmisak

Upload 6 files

57fa449 verified 6 months ago

raw

history blame

4.38 kB

	"""
	Test script for local model inference
	Run this to verify your setup before deploying to HuggingFace Spaces
	"""

	import os
	import sys

	# Set environment for local model
	os.environ["USE_HF_API"] = "False"
	os.environ["USE_LMSTUDIO"] = "False"
	os.environ["DEBUG_MODE"] = "True"
	os.environ["LLM_BACKEND"] = "local"
	os.environ["LLM_TEMPERATURE"] = "0.7"

	print("="*80)
	print("🧪 Testing Local Model Inference")
	print("="*80)

	# Test imports
	print("\n1️⃣ Testing imports...")
	try:
	import torch
	print(f" ✅ PyTorch {torch.__version__}")
	print(f" 🔧 CUDA available: {torch.cuda.is_available()}")
	if torch.cuda.is_available():
	print(f" 🎮 GPU: {torch.cuda.get_device_name(0)}")
	except ImportError as e:
	print(f" ❌ PyTorch not installed: {e}")
	print(" 📦 Install: pip install torch")
	sys.exit(1)

	try:
	from transformers import AutoModelForCausalLM, AutoTokenizer
	print(f" ✅ Transformers installed")
	except ImportError as e:
	print(f" ❌ Transformers not installed: {e}")
	print(" 📦 Install: pip install transformers accelerate")
	sys.exit(1)

	# Test LLM function
	print("\n2️⃣ Testing LLM function...")
	try:
	from llm import query_llm
	print(" ✅ LLM module imported")
	except ImportError as e:
	print(f" ❌ Failed to import llm module: {e}")
	sys.exit(1)

	# Test simple query
	print("\n3️⃣ Testing simple query (this will download the model on first run)...")
	print(" ⏳ This may take 2-5 minutes for first-time model download...\n")

	test_prompt = """You are a medical transcript analyzer.

	Analyze this brief interview segment:

	Interviewer: How do you treat moderate acne?
	Doctor: I typically start with topical retinoids and benzoyl peroxide. For more severe cases, I prescribe oral antibiotics like doxycycline 100mg daily.

	Provide a brief summary and extract structured data in JSON format:
	{
	"diagnoses": ["list of conditions mentioned"],
	"prescriptions": ["list of medications with dosages"],
	"treatment_rationale": ["list of treatment approaches"]
	}
	"""

	try:
	response, structured_data = query_llm(
	chunk=test_prompt,
	user_context="Extract medical information from this dermatology interview",
	interviewee_type="HCP",
	extract_structured=True,
	timeout=180
	)

	print("\n" + "="*80)
	print("📊 RESULTS")
	print("="*80)

	print(f"\n📝 Response Text ({len(response)} chars):")
	print("-" * 80)
	print(response)

	print(f"\n🔍 Structured Data ({len(structured_data)} fields):")
	print("-" * 80)
	import json
	print(json.dumps(structured_data, indent=2))

	# Validate results
	print("\n" + "="*80)
	print("✅ VALIDATION")
	print("="*80)

	if len(response) < 50:
	print("⚠️ Warning: Response is very short")
	else:
	print(f"✅ Response length OK ({len(response)} chars)")

	if not structured_data:
	print("❌ No structured data extracted - check JSON parsing!")
	elif len(structured_data) == 0:
	print("⚠️ Structured data is empty")
	else:
	print(f"✅ Structured data extracted ({len(structured_data)} fields)")
	for key, values in structured_data.items():
	if values:
	print(f" • {key}: {len(values)} items")

	if "[Error]" in response:
	print("❌ Response contains error message!")
	else:
	print("✅ No error messages in response")

	print("\n" + "="*80)
	print("🎉 TEST COMPLETE!")
	print("="*80)
	print("\nYour system is ready for HuggingFace Spaces deployment.")
	print("\n📖 See HUGGINGFACE_SPACES_SETUP.md for deployment instructions.")

	except Exception as e:
	print("\n" + "="*80)
	print("❌ TEST FAILED")
	print("="*80)
	print(f"\nError: {e}")

	import traceback
	print("\nFull traceback:")
	print(traceback.format_exc())

	print("\n🔧 Troubleshooting:")
	print("1. Make sure GPU is available (or set device_map='cpu')")
	print("2. Check if you have enough RAM/VRAM (~8GB needed)")
	print("3. Try a smaller model: LOCAL_MODEL=TinyLlama/TinyLlama-1.1B-Chat-v1.0")
	print("4. Check internet connection for model download")

	sys.exit(1)