""" Test script for local model inference Run this to verify your setup before deploying to HuggingFace Spaces """ import os import sys # Set environment for local model os.environ["USE_HF_API"] = "False" os.environ["USE_LMSTUDIO"] = "False" os.environ["DEBUG_MODE"] = "True" os.environ["LLM_BACKEND"] = "local" os.environ["LLM_TEMPERATURE"] = "0.7" print("="*80) print("๐Ÿงช Testing Local Model Inference") print("="*80) # Test imports print("\n1๏ธโƒฃ Testing imports...") try: import torch print(f" โœ… PyTorch {torch.__version__}") print(f" ๐Ÿ”ง CUDA available: {torch.cuda.is_available()}") if torch.cuda.is_available(): print(f" ๐ŸŽฎ GPU: {torch.cuda.get_device_name(0)}") except ImportError as e: print(f" โŒ PyTorch not installed: {e}") print(" ๐Ÿ“ฆ Install: pip install torch") sys.exit(1) try: from transformers import AutoModelForCausalLM, AutoTokenizer print(f" โœ… Transformers installed") except ImportError as e: print(f" โŒ Transformers not installed: {e}") print(" ๐Ÿ“ฆ Install: pip install transformers accelerate") sys.exit(1) # Test LLM function print("\n2๏ธโƒฃ Testing LLM function...") try: from llm import query_llm print(" โœ… LLM module imported") except ImportError as e: print(f" โŒ Failed to import llm module: {e}") sys.exit(1) # Test simple query print("\n3๏ธโƒฃ Testing simple query (this will download the model on first run)...") print(" โณ This may take 2-5 minutes for first-time model download...\n") test_prompt = """You are a medical transcript analyzer. Analyze this brief interview segment: Interviewer: How do you treat moderate acne? Doctor: I typically start with topical retinoids and benzoyl peroxide. For more severe cases, I prescribe oral antibiotics like doxycycline 100mg daily. Provide a brief summary and extract structured data in JSON format: { "diagnoses": ["list of conditions mentioned"], "prescriptions": ["list of medications with dosages"], "treatment_rationale": ["list of treatment approaches"] } """ try: response, structured_data = query_llm( chunk=test_prompt, user_context="Extract medical information from this dermatology interview", interviewee_type="HCP", extract_structured=True, timeout=180 ) print("\n" + "="*80) print("๐Ÿ“Š RESULTS") print("="*80) print(f"\n๐Ÿ“ Response Text ({len(response)} chars):") print("-" * 80) print(response) print(f"\n๐Ÿ” Structured Data ({len(structured_data)} fields):") print("-" * 80) import json print(json.dumps(structured_data, indent=2)) # Validate results print("\n" + "="*80) print("โœ… VALIDATION") print("="*80) if len(response) < 50: print("โš ๏ธ Warning: Response is very short") else: print(f"โœ… Response length OK ({len(response)} chars)") if not structured_data: print("โŒ No structured data extracted - check JSON parsing!") elif len(structured_data) == 0: print("โš ๏ธ Structured data is empty") else: print(f"โœ… Structured data extracted ({len(structured_data)} fields)") for key, values in structured_data.items(): if values: print(f" โ€ข {key}: {len(values)} items") if "[Error]" in response: print("โŒ Response contains error message!") else: print("โœ… No error messages in response") print("\n" + "="*80) print("๐ŸŽ‰ TEST COMPLETE!") print("="*80) print("\nYour system is ready for HuggingFace Spaces deployment.") print("\n๐Ÿ“– See HUGGINGFACE_SPACES_SETUP.md for deployment instructions.") except Exception as e: print("\n" + "="*80) print("โŒ TEST FAILED") print("="*80) print(f"\nError: {e}") import traceback print("\nFull traceback:") print(traceback.format_exc()) print("\n๐Ÿ”ง Troubleshooting:") print("1. Make sure GPU is available (or set device_map='cpu')") print("2. Check if you have enough RAM/VRAM (~8GB needed)") print("3. Try a smaller model: LOCAL_MODEL=TinyLlama/TinyLlama-1.1B-Chat-v1.0") print("4. Check internet connection for model download") sys.exit(1)