from fastapi import FastAPI from pydantic import BaseModel from transformers import AutoTokenizer, T5ForConditionalGeneration, AutoConfig import torch app = FastAPI(title="CodeT5+ Backend on HuggingFace") # ==== LOAD MODEL ==== base_ckpt = "Salesforce/codet5p-770m" finetuned_ckpt = "OSS-forge/codet5p-770m-pyresbugs" print("Loading tokenizer + config...") tokenizer = AutoTokenizer.from_pretrained(base_ckpt) config = AutoConfig.from_pretrained(base_ckpt) print("Loading fine-tuned model weights...") model = T5ForConditionalGeneration.from_pretrained( finetuned_ckpt, config=config ) device = "cuda" if torch.cuda.is_available() else "cpu" print("Running on:", device) model = model.to(device) model.eval() # ==== REQUEST / RESPONSE MODELS ==== class GenerateRequest(BaseModel): prompt: str language: str | None = "Python" task: str = "generate" max_new_tokens: int = 128 num_beams: int = 4 temperature: float = 0.7 class GenerateResponse(BaseModel): output: str def build_prompt(req: GenerateRequest): if req.task == "generate": return f"Generate {req.language} code:\n{req.prompt}" elif req.task == "fix": return f"Fix the bug in the following {req.language} code:\n{req.prompt}\n\nCorrected code:" else: return req.prompt @app.post("/generate", response_model=GenerateResponse) def generate(req: GenerateRequest): prompt = build_prompt(req) inputs = tokenizer(prompt, return_tensors="pt").to(device) with torch.no_grad(): outputs = model.generate( **inputs, max_new_tokens=req.max_new_tokens, num_beams=req.num_beams, temperature=req.temperature, early_stopping=True ) text = tokenizer.decode(outputs[0], skip_special_tokens=True) return GenerateResponse(output=text) @app.get("/") def root(): return {"status": "CodeT5+ backend is running 🚀"}