File size: 1,575 Bytes
a3ebb4f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
import torch
from fastapi import FastAPI
from pydantic import BaseModel
from transformers import AutoTokenizer, AutoModelForSequenceClassification

# 1. Initialize API
app = FastAPI()

# 2. Define Request Model
class PromptRequest(BaseModel):
    prompt: str

# 3. Load Model (Runs once at startup)
# We use "." because the model files are now in the same folder as this script
MODEL_DIR = "." 
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"

print(f"Loading model on {DEVICE}...")
try:
    tokenizer = AutoTokenizer.from_pretrained(MODEL_DIR)
    model = AutoModelForSequenceClassification.from_pretrained(MODEL_DIR).to(DEVICE)
    model.eval()
    print("✅ Model loaded successfully!")
except Exception as e:
    print(f"❌ Error loading model: {e}")
    raise RuntimeError("Model failed to load")

# 4. Define the Scoring Logic (The same math from your local script)
def calculate_score(text):
    inputs = tokenizer(text, return_tensors="pt", truncation=True, max_length=128).to(DEVICE)
    with torch.no_grad():
        outputs = model(**inputs)
    
    raw_score = outputs.logits.item()
    
    # Calibration Formula
    if raw_score < 30:
        final_score = raw_score - 20
    else:
        final_score = (raw_score - 30) * 3.33
        
    return round(max(0.0, min(100.0, final_score)), 2)

# 5. Define the API Endpoint
@app.post("/score")
def get_score(request: PromptRequest):
    score = calculate_score(request.prompt)
    return {"score": score}

@app.get("/")
def home():
    return {"status": "Model is running. Send POST request to /score"}