index.html not found

from fastapi import FastAPI, HTTPException
from fastapi.middleware.cors import CORSMiddleware
from fastapi.responses import HTMLResponse, FileResponse
from pydantic import BaseModel
from huggingface_hub import hf_hub_download
import joblib
import string
import re
import os
from pathlib import Path

app = FastAPI(title="SpamDex API", version="1.0")

# CORS middleware
app.add_middleware(
    CORSMiddleware,
    allow_origins=["*"],
    allow_credentials=True,
    allow_methods=["*"],
    allow_headers=["*"],
)

# Global variables for model and vectorizer
model = None
vectorizer = None

# Load model and vectorizer on startup
@app.on_event("startup")
async def load_model():
    global model, vectorizer
    try:
        print("🔄 Loading model and vectorizer from Hugging Face...")
        
        # Download and load vectorizer
        vectorizer_path = hf_hub_download(
            "DarkNeuron-AI/darkneuron-spamdex-v1", 
            "spam_detection_vectorizer.pkl"
        )
        vectorizer = joblib.load(vectorizer_path)
        
        # Download and load model
        model_path = hf_hub_download(
            "DarkNeuron-AI/darkneuron-spamdex-v1", 
            "spam_detection_model.pkl"
        )
        model = joblib.load(model_path)
        
        print("✅ Model and vectorizer loaded successfully!")
    except Exception as e:
        print(f"❌ Error loading model: {str(e)}")
        raise

# Text cleaning function
def clean_text(text: str) -> str:
    """Clean and preprocess text for model input"""
    text = text.lower()
    text = re.sub(r'\d+', '', text)
    text = text.translate(str.maketrans('', '', string.punctuation))
    return text.strip()

# Request model
class TextRequest(BaseModel):
    text: str

# Response model
class PredictionResponse(BaseModel):
    prediction: str
    label: int
    confidence: float
    cleaned_text: str

@app.get("/", response_class=HTMLResponse)
async def read_root():
    """Serve the main HTML page"""
    try:
        # Try to read index.html from current directory
        html_path = Path(__file__).parent / "index.html"
        
        if html_path.exists():
            with open(html_path, 'r', encoding='utf-8') as f:
                html_content = f.read()
            return HTMLResponse(content=html_content, status_code=200)
        else:
            return HTMLResponse(
                content="<h1>index.html not found</h1><p>Please add index.html to the root directory</p>",
                status_code=404
            )
    except Exception as e:
        return HTMLResponse(
            content=f"<h1>Error loading page</h1><p>{str(e)}</p>",
            status_code=500
        )

@app.get("/health")
async def health_check():
    """Health check endpoint"""
    return {
        "status": "healthy",
        "model_loaded": model is not None,
        "vectorizer_loaded": vectorizer is not None
    }

@app.post("/api/predict", response_model=PredictionResponse)
async def predict(request: TextRequest):
    """Predict if text is spam or not"""
    if model is None or vectorizer is None:
        raise HTTPException(
            status_code=503, 
            detail="Model not loaded. Please try again later."
        )
    
    if not request.text or not request.text.strip():
        raise HTTPException(
            status_code=400,
            detail="Text cannot be empty"
        )
    
    try:
        # Clean the text
        cleaned_text = clean_text(request.text)
        
        if not cleaned_text:
            raise HTTPException(
                status_code=400,
                detail="Text contains no valid content after cleaning"
            )
        
        # Vectorize the text
        text_vector = vectorizer.transform([cleaned_text])
        
        # Make prediction
        prediction = model.predict(text_vector)[0]
        
        # Get prediction probability for confidence
        probabilities = model.predict_proba(text_vector)[0]
        confidence = float(max(probabilities) * 100)
        
        # Prepare response
        result = {
            "prediction": "spam" if prediction == 1 else "safe",
            "label": int(prediction),
            "confidence": round(confidence, 2),
            "cleaned_text": cleaned_text
        }
        
        return result
        
    except Exception as e:
        raise HTTPException(
            status_code=500,
            detail=f"Prediction error: {str(e)}"
        )

@app.get("/api/info")
async def model_info():
    """Get model information"""
    return {
        "model_name": "SpamDex v1.0",
        "algorithm": "Naive Bayes (MultinomialNB)",
        "vectorization": "TF-IDF",
        "developer": "DarkNeuronAI",
        "huggingface_repo": "DarkNeuron-AI/darkneuron-spamdex-v1",
        "labels": {
            "0": "Ham (Not Spam)",
            "1": "Spam"
        }
    }

if __name__ == "__main__":
    import uvicorn
    uvicorn.run(app, host="0.0.0.0", port=7860)