from fastapi import FastAPI from pydantic import BaseModel from transformers import T5ForConditionalGeneration, T5Tokenizer import torch # Initialize FastAPI app = FastAPI() # Your specific model path model_id = "Moncey10/grammar-t5-small-finetuned" tokenizer = T5Tokenizer.from_pretrained(model_id) model = T5ForConditionalGeneration.from_pretrained(model_id) class GrammarRequest(BaseModel): text: str @app.get("/") def home(): return {"status": "Online", "message": "Grammar API is running"} @app.post("/predict") def predict(request: GrammarRequest): # Use the 'gec:' prefix used during your training input_text = "gec: " + request.text.strip() # Determine device (CPU/GPU) as done in your training script device = "cuda" if torch.cuda.is_available() else "cpu" model.to(device) inputs = tokenizer(input_text, return_tensors="pt").input_ids.to(device) # Generation settings from your successful Colab test outputs = model.generate( inputs, max_length=128, num_beams=10, early_stopping=True, no_repeat_ngram_size=2 ) corrected = tokenizer.decode(outputs[0], skip_special_tokens=True) return {"original": request.text, "corrected": corrected}