import torch from fastapi import FastAPI from pydantic import BaseModel from transformers import AutoTokenizer, AutoModelForSequenceClassification # 1. Initialize API app = FastAPI() # 2. Define Request Model class PromptRequest(BaseModel): prompt: str # 3. Load Model (Runs once at startup) # We use "." because the model files are now in the same folder as this script MODEL_DIR = "." DEVICE = "cuda" if torch.cuda.is_available() else "cpu" print(f"Loading model on {DEVICE}...") try: tokenizer = AutoTokenizer.from_pretrained(MODEL_DIR) model = AutoModelForSequenceClassification.from_pretrained(MODEL_DIR).to(DEVICE) model.eval() print("✅ Model loaded successfully!") except Exception as e: print(f"❌ Error loading model: {e}") raise RuntimeError("Model failed to load") # 4. Define the Scoring Logic (The same math from your local script) def calculate_score(text): inputs = tokenizer(text, return_tensors="pt", truncation=True, max_length=128).to(DEVICE) with torch.no_grad(): outputs = model(**inputs) raw_score = outputs.logits.item() # Calibration Formula if raw_score < 30: final_score = raw_score - 20 else: final_score = (raw_score - 30) * 3.33 return round(max(0.0, min(100.0, final_score)), 2) # 5. Define the API Endpoint @app.post("/score") def get_score(request: PromptRequest): score = calculate_score(request.prompt) return {"score": score} @app.get("/") def home(): return {"status": "Model is running. Send POST request to /score"}