#!/usr/bin/env python
"""
Test HuggingFace token for IndicBERT model access.

This script tests if the HuggingFace token can successfully:
1. Authenticate with HuggingFace
2. Download and load IndicBERT model
3. Run a test inference

If this works locally with the token, it will work on the server too.
"""

import os
import sys
import time
from pathlib import Path

# Add parent directory to path
sys.path.insert(0, str(Path(__file__).parent.parent))

from dotenv import load_dotenv

# Load .env file
env_path = Path(__file__).parent.parent / ".env"
if env_path.exists():
    load_dotenv(env_path)
else:
    load_dotenv()

def test_huggingface_token():
    """Test HuggingFace token and IndicBERT access."""
    print("=" * 60)
    print("HuggingFace Token & IndicBERT Test")
    print("=" * 60)
    print()
    
    # Check token
    token = os.getenv("HUGGINGFACE_TOKEN")
    if not token:
        print("[ERROR] HUGGINGFACE_TOKEN not found in environment")
        print("Set it in your .env file")
        return False
    
    print(f"Token: {token[:10]}...{token[-4:]}")
    print()
    
    # Test 1: HuggingFace Hub authentication
    print("Step 1: Testing HuggingFace Hub authentication...")
    try:
        from huggingface_hub import HfApi
        api = HfApi()
        user_info = api.whoami(token=token)
        print(f"  [OK] Authenticated as: {user_info.get('name', 'Unknown')}")
    except Exception as e:
        print(f"  [ERROR] Authentication failed: {e}")
        print("  Check your token at: https://huggingface.co/settings/tokens")
        return False
    print()
    
    # Test 2: Check IndicBERT access
    print("Step 2: Checking IndicBERT model access...")
    model_name = "ai4bharat/indic-bert"
    try:
        from huggingface_hub import model_info
        info = model_info(model_name, token=token)
        print(f"  [OK] Model found: {info.modelId}")
        print(f"  [OK] Model size: {info.siblings[0].size if info.siblings else 'Unknown'} bytes")
    except Exception as e:
        if "gated" in str(e).lower() or "access" in str(e).lower():
            print(f"  [ERROR] Model is gated and requires access request")
            print(f"  Please request access at: https://huggingface.co/{model_name}")
        else:
            print(f"  [ERROR] Failed to access model: {e}")
        return False
    print()
    
    # Test 3: Load tokenizer
    print("Step 3: Loading IndicBERT tokenizer...")
    start_time = time.time()
    try:
        from transformers import AutoTokenizer
        tokenizer = AutoTokenizer.from_pretrained(model_name, token=token)
        load_time = time.time() - start_time
        print(f"  [OK] Tokenizer loaded in {load_time:.2f}s")
    except Exception as e:
        print(f"  [ERROR] Failed to load tokenizer: {e}")
        return False
    print()
    
    # Test 4: Load model
    print("Step 4: Loading IndicBERT model (may take a few minutes on first run)...")
    start_time = time.time()
    try:
        from transformers import AutoModel
        model = AutoModel.from_pretrained(model_name, token=token)
        model.eval()
        load_time = time.time() - start_time
        print(f"  [OK] Model loaded in {load_time:.2f}s")
    except Exception as e:
        print(f"  [ERROR] Failed to load model: {e}")
        return False
    print()
    
    # Test 5: Run inference
    print("Step 5: Testing inference...")
    try:
        import torch
        
        test_message = "You have won 10 lakh rupees! Send OTP now."
        inputs = tokenizer(test_message, return_tensors="pt", truncation=True, max_length=512)
        
        with torch.no_grad():
            outputs = model(**inputs)
        
        embedding_shape = outputs.last_hidden_state.shape
        print(f"  [OK] Inference successful!")
        print(f"  [OK] Output shape: {embedding_shape}")
    except Exception as e:
        print(f"  [ERROR] Inference failed: {e}")
        return False
    print()
    
    # Test 6: Test ScamDetector
    print("Step 6: Testing ScamDetector with IndicBERT...")
    try:
        from app.models.detector import ScamDetector, reset_detector_cache
        
        # Reset cache to force reload with new token
        reset_detector_cache()
        
        detector = ScamDetector()
        result = detector.detect("You have won 10 lakh rupees! Share OTP to claim.")
        
        print(f"  [OK] ScamDetector working!")
        print(f"  [OK] Scam detected: {result['scam_detected']}")
        print(f"  [OK] Confidence: {result['confidence']:.2f}")
        print(f"  [OK] Indicators: {result['indicators']}")
    except Exception as e:
        print(f"  [WARNING] ScamDetector test: {e}")
        print("  This may still work - detector has keyword fallback")
    print()
    
    print("=" * 60)
    print("[SUCCESS] HuggingFace token works!")
    print("=" * 60)
    print()
    print("You can use this token on the server:")
    print(f"  HUGGINGFACE_TOKEN={token}")
    print()
    print("The model will be downloaded from HuggingFace on first request.")
    return True

if __name__ == "__main__":
    success = test_huggingface_token()
    sys.exit(0 if success else 1)