Spaces:
Running
Running
File size: 1,575 Bytes
a3ebb4f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 |
import torch
from fastapi import FastAPI
from pydantic import BaseModel
from transformers import AutoTokenizer, AutoModelForSequenceClassification
# 1. Initialize API
app = FastAPI()
# 2. Define Request Model
class PromptRequest(BaseModel):
prompt: str
# 3. Load Model (Runs once at startup)
# We use "." because the model files are now in the same folder as this script
MODEL_DIR = "."
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Loading model on {DEVICE}...")
try:
tokenizer = AutoTokenizer.from_pretrained(MODEL_DIR)
model = AutoModelForSequenceClassification.from_pretrained(MODEL_DIR).to(DEVICE)
model.eval()
print("✅ Model loaded successfully!")
except Exception as e:
print(f"❌ Error loading model: {e}")
raise RuntimeError("Model failed to load")
# 4. Define the Scoring Logic (The same math from your local script)
def calculate_score(text):
inputs = tokenizer(text, return_tensors="pt", truncation=True, max_length=128).to(DEVICE)
with torch.no_grad():
outputs = model(**inputs)
raw_score = outputs.logits.item()
# Calibration Formula
if raw_score < 30:
final_score = raw_score - 20
else:
final_score = (raw_score - 30) * 3.33
return round(max(0.0, min(100.0, final_score)), 2)
# 5. Define the API Endpoint
@app.post("/score")
def get_score(request: PromptRequest):
score = calculate_score(request.prompt)
return {"score": score}
@app.get("/")
def home():
return {"status": "Model is running. Send POST request to /score"} |