"""
Test script for local model inference
Run this to verify your setup before deploying to HuggingFace Spaces
"""

import os
import sys

# Set environment for local model
os.environ["USE_HF_API"] = "False"
os.environ["USE_LMSTUDIO"] = "False"
os.environ["DEBUG_MODE"] = "True"
os.environ["LLM_BACKEND"] = "local"
os.environ["LLM_TEMPERATURE"] = "0.7"

print("="*80)
print("🧪 Testing Local Model Inference")
print("="*80)

# Test imports
print("\n1️⃣ Testing imports...")
try:
    import torch
    print(f"   ✅ PyTorch {torch.__version__}")
    print(f"   🔧 CUDA available: {torch.cuda.is_available()}")
    if torch.cuda.is_available():
        print(f"   🎮 GPU: {torch.cuda.get_device_name(0)}")
except ImportError as e:
    print(f"   ❌ PyTorch not installed: {e}")
    print("   📦 Install: pip install torch")
    sys.exit(1)

try:
    from transformers import AutoModelForCausalLM, AutoTokenizer
    print(f"   ✅ Transformers installed")
except ImportError as e:
    print(f"   ❌ Transformers not installed: {e}")
    print("   📦 Install: pip install transformers accelerate")
    sys.exit(1)

# Test LLM function
print("\n2️⃣ Testing LLM function...")
try:
    from llm import query_llm
    print("   ✅ LLM module imported")
except ImportError as e:
    print(f"   ❌ Failed to import llm module: {e}")
    sys.exit(1)

# Test simple query
print("\n3️⃣ Testing simple query (this will download the model on first run)...")
print("   ⏳ This may take 2-5 minutes for first-time model download...\n")

test_prompt = """You are a medical transcript analyzer.

Analyze this brief interview segment:

Interviewer: How do you treat moderate acne?
Doctor: I typically start with topical retinoids and benzoyl peroxide. For more severe cases, I prescribe oral antibiotics like doxycycline 100mg daily.

Provide a brief summary and extract structured data in JSON format:
{
  "diagnoses": ["list of conditions mentioned"],
  "prescriptions": ["list of medications with dosages"],
  "treatment_rationale": ["list of treatment approaches"]
}
"""

try:
    response, structured_data = query_llm(
        chunk=test_prompt,
        user_context="Extract medical information from this dermatology interview",
        interviewee_type="HCP",
        extract_structured=True,
        timeout=180
    )

    print("\n" + "="*80)
    print("📊 RESULTS")
    print("="*80)

    print(f"\n📝 Response Text ({len(response)} chars):")
    print("-" * 80)
    print(response)

    print(f"\n🔍 Structured Data ({len(structured_data)} fields):")
    print("-" * 80)
    import json
    print(json.dumps(structured_data, indent=2))

    # Validate results
    print("\n" + "="*80)
    print("✅ VALIDATION")
    print("="*80)

    if len(response) < 50:
        print("⚠️ Warning: Response is very short")
    else:
        print(f"✅ Response length OK ({len(response)} chars)")

    if not structured_data:
        print("❌ No structured data extracted - check JSON parsing!")
    elif len(structured_data) == 0:
        print("⚠️ Structured data is empty")
    else:
        print(f"✅ Structured data extracted ({len(structured_data)} fields)")
        for key, values in structured_data.items():
            if values:
                print(f"   • {key}: {len(values)} items")

    if "[Error]" in response:
        print("❌ Response contains error message!")
    else:
        print("✅ No error messages in response")

    print("\n" + "="*80)
    print("🎉 TEST COMPLETE!")
    print("="*80)
    print("\nYour system is ready for HuggingFace Spaces deployment.")
    print("\n📖 See HUGGINGFACE_SPACES_SETUP.md for deployment instructions.")

except Exception as e:
    print("\n" + "="*80)
    print("❌ TEST FAILED")
    print("="*80)
    print(f"\nError: {e}")

    import traceback
    print("\nFull traceback:")
    print(traceback.format_exc())

    print("\n🔧 Troubleshooting:")
    print("1. Make sure GPU is available (or set device_map='cpu')")
    print("2. Check if you have enough RAM/VRAM (~8GB needed)")
    print("3. Try a smaller model: LOCAL_MODEL=TinyLlama/TinyLlama-1.1B-Chat-v1.0")
    print("4. Check internet connection for model download")

    sys.exit(1)