File size: 4,228 Bytes
e705023
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
# app.py - Deploy this to Hugging Face Spaces
# Install: pip install fastapi uvicorn torch transformers huggingface_hub

import json
import os
from pathlib import Path

import torch
import torch.nn as nn
from fastapi import FastAPI, HTTPException
from huggingface_hub import hf_hub_download
from pydantic import BaseModel
from transformers import AutoModel, AutoTokenizer

app = FastAPI(title="Sentiment Analysis API")

# Global variables for lazy loading
model = None
tokenizer = None
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")


# Model definition (must match training code)
class SentimentClassifier(nn.Module):
    def __init__(self):
        super().__init__()
        self.bert = AutoModel.from_pretrained("distilbert-base-uncased")
        self.dropout = nn.Dropout(0.3)
        self.classifier = nn.Linear(768, 2)

    def forward(self, input_ids, attention_mask, **kwargs):
        outputs = self.bert(input_ids=input_ids, attention_mask=attention_mask)
        pooled = outputs.last_hidden_state[:, 0]
        x = self.dropout(pooled)
        return self.classifier(x)


# Request/Response models
class PredictionRequest(BaseModel):
    text: str


class PredictionResponse(BaseModel):
    sentiment: str
    confidence: float


def load_model_from_hf(repo_id: str):
    """Load model from Hugging Face on-demand"""
    global model, tokenizer

    if model is not None:
        return  # Already loaded

    print(f"📥 Loading model from {repo_id}...")

    # Download model files
    cache_dir = "./model_cache"
    Path(cache_dir).mkdir(exist_ok=True)

    model_path = hf_hub_download(
        repo_id=repo_id, filename="model.pt", cache_dir=cache_dir
    )

    config_path = hf_hub_download(
        repo_id=repo_id, filename="config.json", cache_dir=cache_dir
    )

    # Load tokenizer
    tokenizer = AutoTokenizer.from_pretrained(repo_id, cache_dir=cache_dir)

    # Load model
    model = SentimentClassifier()
    model.load_state_dict(torch.load(model_path, map_location=device))
    model.to(device)
    model.eval()

    print(f"✅ Model loaded successfully on {device}")


@app.on_event("startup")
async def startup_event():
    """Load model when server starts"""
    # Read from environment variable or use default
    REPO_ID = os.environ.get("MODEL_REPO_ID", "angeruiizz/sentiment-model")
    load_model_from_hf(REPO_ID)


@app.get("/")
def root():
    return {
        "message": "Sentiment Analysis API",
        "status": "running",
        "endpoints": {
            "/predict": "POST - Analyze sentiment of text",
            "/health": "GET - Check if model is loaded",
            "/docs": "GET - Interactive API documentation",
        },
    }


@app.get("/health")
def health_check():
    return {
        "status": "healthy",
        "model_loaded": model is not None,
        "device": str(device),
    }


@app.post("/predict", response_model=PredictionResponse)
def predict(request: PredictionRequest):
    if model is None or tokenizer is None:
        raise HTTPException(status_code=503, detail="Model not loaded")

    try:
        # Tokenize input
        inputs = tokenizer(
            request.text,
            return_tensors="pt",
            padding=True,
            truncation=True,
            max_length=512,
        )
        inputs = {k: v.to(device) for k, v in inputs.items()}

        # Get prediction
        with torch.no_grad():
            outputs = model(**inputs)
            probs = torch.softmax(outputs, dim=1)
            prediction = torch.argmax(probs, dim=1).item()
            confidence = probs[0][prediction].item()

        sentiment = "positive" if prediction == 1 else "negative"

        return PredictionResponse(sentiment=sentiment, confidence=round(confidence, 4))

    except Exception as e:
        raise HTTPException(status_code=500, detail=str(e))


if __name__ == "__main__":
    import uvicorn

    port = int(os.environ.get("PORT", 7860))  # HF Spaces uses port 7860
    print("🚀 Starting API server...")
    uvicorn.run(app, host="0.0.0.0", port=port)