Spaces:

KazeStudy
/

codeT5Extension

Sleeping

App Files Files Community

codeT5Extension / app.py

KazeStudy

Update app.py fourthly

d60814d 7 days ago

raw

history blame

6.28 kB

	from fastapi import FastAPI
	from pydantic import BaseModel
	from transformers import AutoTokenizer, T5ForConditionalGeneration, AutoConfig
	import torch

	app = FastAPI(title="CodeT5+ Backend on HuggingFace")

	# ==== LOAD MODEL ====
	model_name = "Salesforce/codet5p-770m" # model đa ngôn ngữ, không fine-tune Python-only

	print("Loading tokenizer + config...")
	tokenizer = AutoTokenizer.from_pretrained(model_name)
	config = AutoConfig.from_pretrained(model_name)

	print("Loading model weights...")
	model = T5ForConditionalGeneration.from_pretrained(
	model_name,
	config=config
	)

	device = "cuda" if torch.cuda.is_available() else "cpu"
	print("Running on:", device)

	model = model.to(device)
	model.eval()

	# ==== REQUEST / RESPONSE MODELS ====


	class GenerateRequest(BaseModel):
	prompt: str # mô tả cần sinh code
	language: str \| None = "Python"
	max_new_tokens: int = 128
	num_beams: int = 1 # ít beam hơn cho ổn định
	temperature: float = 0.3 # giảm randomness


	class FixRequest(BaseModel):
	code: str # code bị lỗi
	language: str \| None = "Python"
	max_new_tokens: int = 128
	num_beams: int = 1
	temperature: float = 0.2 # thấp để sửa lỗi ổn định hơn


	class CompleteRequest(BaseModel):
	prefix: str # code phía trước con trỏ
	suffix: str = "" # code phía sau con trỏ (nếu có)
	language: str \| None = "Python"
	max_new_tokens: int = 64 # completion thường ngắn
	num_beams: int = 1 # completion kiểu Cursor thường để 1 beam
	temperature: float = 0.3 # ổn định hơn


	class CodeResponse(BaseModel):
	output: str


	# ==== TIỆN ÍCH DÙNG CHUNG ====


	def run_model(prompt: str,
	max_new_tokens: int,
	num_beams: int,
	temperature: float) -> str:
	inputs = tokenizer(prompt, return_tensors="pt").to(device)

	with torch.no_grad():
	outputs = model.generate(
	**inputs,
	max_new_tokens=max_new_tokens,
	num_beams=num_beams,
	temperature=temperature,
	early_stopping=True,
	repetition_penalty=1.05, # nhẹ để giảm lặp
	)

	text = tokenizer.decode(outputs[0], skip_special_tokens=True)
	return text.strip()


	def clean_code(raw: str, lang: str) -> str:
	"""
	Dọn mấy dòng rác đầu output (vd: ':', 'program:', ...) cho ra code “sạch” hơn.
	Không đụng gì phần giữa & cuối.
	"""
	lines = raw.splitlines()
	if not lines:
	return raw.strip()

	lang_low = (lang or "").lower()

	def looks_like_code(s: str) -> bool:
	s = s.strip()
	if not s:
	return False

	if lang_low == "python":
	# thường bắt đầu bằng import/def/class/# comment
	prefixes = ("def ", "class ", "import ", "from ", "#", "@")
	return s.startswith(prefixes)
	elif lang_low in ("c", "c++", "cpp"):
	prefixes = ("#include", "int ", "void ", "char ", "float ",
	"double ", "struct ", "typedef ")
	return s.startswith(prefixes)
	else:
	# fallback cho ngôn ngữ khác
	return any(ch in s for ch in (";", "{", "}", "=", "function ", "public ", "private "))

	start = 0
	for i, line in enumerate(lines):
	if looks_like_code(line):
	start = i
	break

	cleaned = "\n".join(lines[start:]).strip()
	return cleaned if cleaned else raw.strip()


	# ==== ENDPOINT 1: TẠO CODE TỪ PROMPT ====


	@app.post("/generate-code", response_model=CodeResponse)
	def generate_code(req: GenerateRequest):
	lang = req.language or "Python"

	prompt = f"""
	You are a helpful coding assistant.

	Generate ONLY valid {lang} source code for the task below.
	Do NOT add any explanations, comments in natural language, or markdown.
	Do NOT repeat the task description.
	Return only raw {lang} code that can be run.

	Task:
	{req.prompt}

	Begin {lang} code now:
	""".strip()

	output = run_model(
	prompt,
	max_new_tokens=req.max_new_tokens,
	num_beams=req.num_beams,
	temperature=req.temperature,
	)

	output = clean_code(output, lang)

	return CodeResponse(output=output)


	# ==== ENDPOINT 2: SỬA LỖI CODE ====


	@app.post("/fix-code", response_model=CodeResponse)
	def fix_code(req: FixRequest):
	lang = req.language or "Python"

	prompt = f"""
	The following {lang} code contains bugs.
	Fix all bugs and return ONLY the corrected {lang} code.
	Do NOT add any explanations or comments in natural language.
	Do NOT change the language or rewrite the task.

	Buggy {lang} code:
	{req.code}

	Corrected {lang} code:
	""".strip()

	output = run_model(
	prompt,
	max_new_tokens=req.max_new_tokens,
	num_beams=req.num_beams,
	temperature=req.temperature,
	)

	output = clean_code(output, lang)

	return CodeResponse(output=output)


	# ==== ENDPOINT 3: GỢI Ý CODE KIỂU CURSOR (COMPLETION) ====


	@app.post("/complete-code", response_model=CodeResponse)
	def complete_code(req: CompleteRequest):
	lang = req.language or "Python"

	prompt = f"""
	You are an AI code completion engine like Cursor or GitHub Copilot.

	You will be given the prefix and suffix of a {lang} file.
	Your task is to generate ONLY the missing {lang} code between them.

	Rules:
	- DO NOT repeat the prefix.
	- DO NOT repeat the suffix.
	- DO NOT add any explanations, natural language text, or markdown.
	- DO NOT add imports/includes if they already appear in the prefix.
	- Return ONLY raw {lang} code that can be directly inserted at the cursor.

	Prefix:
	{req.prefix}

	<CURSOR HERE>

	Suffix:
	{req.suffix}

	Missing {lang} code:
	""".strip()

	output = run_model(
	prompt,
	max_new_tokens=req.max_new_tokens,
	num_beams=req.num_beams,
	temperature=req.temperature,
	)

	# completion thường là snippet ngắn, không clean để tránh cắt nhầm
	return CodeResponse(output=output.strip())


	# ==== HEALTHCHECK ====


	@app.get("/")
	def root():
	return {"status": "CodeT5+ backend is running 🚀"}