Spaces:
Sleeping
Sleeping
| from fastapi import FastAPI | |
| from pydantic import BaseModel | |
| from transformers import AutoTokenizer, T5ForConditionalGeneration, AutoConfig | |
| import torch | |
| app = FastAPI(title="CodeT5+ Backend on HuggingFace") | |
| # ==== LOAD MODEL ==== | |
| base_ckpt = "Salesforce/codet5p-770m" | |
| finetuned_ckpt = "OSS-forge/codet5p-770m-pyresbugs" | |
| print("Loading tokenizer + config...") | |
| tokenizer = AutoTokenizer.from_pretrained(base_ckpt) | |
| config = AutoConfig.from_pretrained(base_ckpt) | |
| print("Loading fine-tuned model weights...") | |
| model = T5ForConditionalGeneration.from_pretrained( | |
| finetuned_ckpt, | |
| config=config | |
| ) | |
| device = "cuda" if torch.cuda.is_available() else "cpu" | |
| print("Running on:", device) | |
| model = model.to(device) | |
| model.eval() | |
| # ==== REQUEST / RESPONSE MODELS ==== | |
| class GenerateRequest(BaseModel): | |
| prompt: str | |
| language: str | None = "Python" | |
| task: str = "generate" | |
| max_new_tokens: int = 128 | |
| num_beams: int = 4 | |
| temperature: float = 0.7 | |
| class GenerateResponse(BaseModel): | |
| output: str | |
| def build_prompt(req: GenerateRequest): | |
| if req.task == "generate": | |
| return f"Generate {req.language} code:\n{req.prompt}" | |
| elif req.task == "fix": | |
| return f"Fix the bug in the following {req.language} code:\n{req.prompt}\n\nCorrected code:" | |
| else: | |
| return req.prompt | |
| def generate(req: GenerateRequest): | |
| prompt = build_prompt(req) | |
| inputs = tokenizer(prompt, return_tensors="pt").to(device) | |
| with torch.no_grad(): | |
| outputs = model.generate( | |
| **inputs, | |
| max_new_tokens=req.max_new_tokens, | |
| num_beams=req.num_beams, | |
| temperature=req.temperature, | |
| early_stopping=True | |
| ) | |
| text = tokenizer.decode(outputs[0], skip_special_tokens=True) | |
| return GenerateResponse(output=text) | |
| def root(): | |
| return {"status": "CodeT5+ backend is running π"} | |