scam / scripts /test_huggingface_token.py
Gankit12's picture
Upload 129 files
31f0e50 verified
Raw
History Blame Contribute Delete
5.33 kB
#!/usr/bin/env python
"""
Test HuggingFace token for IndicBERT model access.
This script tests if the HuggingFace token can successfully:
1. Authenticate with HuggingFace
2. Download and load IndicBERT model
3. Run a test inference
If this works locally with the token, it will work on the server too.
"""
import os
import sys
import time
from pathlib import Path
# Add parent directory to path
sys.path.insert(0, str(Path(__file__).parent.parent))
from dotenv import load_dotenv
# Load .env file
env_path = Path(__file__).parent.parent / ".env"
if env_path.exists():
load_dotenv(env_path)
else:
load_dotenv()
def test_huggingface_token():
"""Test HuggingFace token and IndicBERT access."""
print("=" * 60)
print("HuggingFace Token & IndicBERT Test")
print("=" * 60)
print()
# Check token
token = os.getenv("HUGGINGFACE_TOKEN")
if not token:
print("[ERROR] HUGGINGFACE_TOKEN not found in environment")
print("Set it in your .env file")
return False
print(f"Token: {token[:10]}...{token[-4:]}")
print()
# Test 1: HuggingFace Hub authentication
print("Step 1: Testing HuggingFace Hub authentication...")
try:
from huggingface_hub import HfApi
api = HfApi()
user_info = api.whoami(token=token)
print(f" [OK] Authenticated as: {user_info.get('name', 'Unknown')}")
except Exception as e:
print(f" [ERROR] Authentication failed: {e}")
print(" Check your token at: https://huggingface.co/settings/tokens")
return False
print()
# Test 2: Check IndicBERT access
print("Step 2: Checking IndicBERT model access...")
model_name = "ai4bharat/indic-bert"
try:
from huggingface_hub import model_info
info = model_info(model_name, token=token)
print(f" [OK] Model found: {info.modelId}")
print(f" [OK] Model size: {info.siblings[0].size if info.siblings else 'Unknown'} bytes")
except Exception as e:
if "gated" in str(e).lower() or "access" in str(e).lower():
print(f" [ERROR] Model is gated and requires access request")
print(f" Please request access at: https://huggingface.co/{model_name}")
else:
print(f" [ERROR] Failed to access model: {e}")
return False
print()
# Test 3: Load tokenizer
print("Step 3: Loading IndicBERT tokenizer...")
start_time = time.time()
try:
from transformers import AutoTokenizer
tokenizer = AutoTokenizer.from_pretrained(model_name, token=token)
load_time = time.time() - start_time
print(f" [OK] Tokenizer loaded in {load_time:.2f}s")
except Exception as e:
print(f" [ERROR] Failed to load tokenizer: {e}")
return False
print()
# Test 4: Load model
print("Step 4: Loading IndicBERT model (may take a few minutes on first run)...")
start_time = time.time()
try:
from transformers import AutoModel
model = AutoModel.from_pretrained(model_name, token=token)
model.eval()
load_time = time.time() - start_time
print(f" [OK] Model loaded in {load_time:.2f}s")
except Exception as e:
print(f" [ERROR] Failed to load model: {e}")
return False
print()
# Test 5: Run inference
print("Step 5: Testing inference...")
try:
import torch
test_message = "You have won 10 lakh rupees! Send OTP now."
inputs = tokenizer(test_message, return_tensors="pt", truncation=True, max_length=512)
with torch.no_grad():
outputs = model(**inputs)
embedding_shape = outputs.last_hidden_state.shape
print(f" [OK] Inference successful!")
print(f" [OK] Output shape: {embedding_shape}")
except Exception as e:
print(f" [ERROR] Inference failed: {e}")
return False
print()
# Test 6: Test ScamDetector
print("Step 6: Testing ScamDetector with IndicBERT...")
try:
from app.models.detector import ScamDetector, reset_detector_cache
# Reset cache to force reload with new token
reset_detector_cache()
detector = ScamDetector()
result = detector.detect("You have won 10 lakh rupees! Share OTP to claim.")
print(f" [OK] ScamDetector working!")
print(f" [OK] Scam detected: {result['scam_detected']}")
print(f" [OK] Confidence: {result['confidence']:.2f}")
print(f" [OK] Indicators: {result['indicators']}")
except Exception as e:
print(f" [WARNING] ScamDetector test: {e}")
print(" This may still work - detector has keyword fallback")
print()
print("=" * 60)
print("[SUCCESS] HuggingFace token works!")
print("=" * 60)
print()
print("You can use this token on the server:")
print(f" HUGGINGFACE_TOKEN={token}")
print()
print("The model will be downloaded from HuggingFace on first request.")
return True
if __name__ == "__main__":
success = test_huggingface_token()
sys.exit(0 if success else 1)