Spaces:

KazeStudy
/

codeT5Extension

Sleeping

App Files Files Community

KazeStudy commited on 4 days ago

Commit

3587c1f

1 Parent(s): d60814d

Update app.py fithly

Browse files

Files changed (1) hide show

app.py +32 -98

app.py CHANGED Viewed

@@ -28,10 +28,10 @@ model.eval()
 class GenerateRequest(BaseModel):
-    prompt: str                     # mô tả cần sinh code
     language: str | None = "Python"
     max_new_tokens: int = 128
-    num_beams: int = 1              # ít beam hơn cho ổn định
     temperature: float = 0.3        # giảm randomness
@@ -45,11 +45,11 @@ class FixRequest(BaseModel):
 class CompleteRequest(BaseModel):
     prefix: str                     # code phía trước con trỏ
-    suffix: str = ""                # code phía sau con trỏ (nếu có)
     language: str | None = "Python"
     max_new_tokens: int = 64        # completion thường ngắn
-    num_beams: int = 1              # completion kiểu Cursor thường để 1 beam
-    temperature: float = 0.3        # ổn định hơn
 class CodeResponse(BaseModel):
@@ -72,71 +72,27 @@ def run_model(prompt: str,
             num_beams=num_beams,
             temperature=temperature,
             early_stopping=True,
-            repetition_penalty=1.05,   # nhẹ để giảm lặp
         )
     text = tokenizer.decode(outputs[0], skip_special_tokens=True)
     return text.strip()
-def clean_code(raw: str, lang: str) -> str:
-    """
-    Dọn mấy dòng rác đầu output (vd: ':', 'program:', ...) cho ra code “sạch” hơn.
-    Không đụng gì phần giữa & cuối.
-    """
-    lines = raw.splitlines()
-    if not lines:
-        return raw.strip()
-    lang_low = (lang or "").lower()
-    def looks_like_code(s: str) -> bool:
-        s = s.strip()
-        if not s:
-            return False
-        if lang_low == "python":
-            # thường bắt đầu bằng import/def/class/# comment
-            prefixes = ("def ", "class ", "import ", "from ", "#", "@")
-            return s.startswith(prefixes)
-        elif lang_low in ("c", "c++", "cpp"):
-            prefixes = ("#include", "int ", "void ", "char ", "float ",
-                        "double ", "struct ", "typedef ")
-            return s.startswith(prefixes)
-        else:
-            # fallback cho ngôn ngữ khác
-            return any(ch in s for ch in (";", "{", "}", "=", "function ", "public ", "private "))
-    start = 0
-    for i, line in enumerate(lines):
-        if looks_like_code(line):
-            start = i
-            break
-    cleaned = "\n".join(lines[start:]).strip()
-    return cleaned if cleaned else raw.strip()
 # ==== ENDPOINT 1: TẠO CODE TỪ PROMPT ====
 @app.post("/generate-code", response_model=CodeResponse)
 def generate_code(req: GenerateRequest):
     lang = req.language or "Python"
-    prompt = f"""
-You are a helpful coding assistant.
-Generate ONLY valid {lang} source code for the task below.
-Do NOT add any explanations, comments in natural language, or markdown.
-Do NOT repeat the task description.
-Return only raw {lang} code that can be run.
-Task:
-{req.prompt}
-Begin {lang} code now:
-""".strip()
     output = run_model(
         prompt,
@@ -145,8 +101,6 @@ Begin {lang} code now:
         temperature=req.temperature,
     )
-    output = clean_code(output, lang)
     return CodeResponse(output=output)
@@ -155,19 +109,17 @@ Begin {lang} code now:
 @app.post("/fix-code", response_model=CodeResponse)
 def fix_code(req: FixRequest):
     lang = req.language or "Python"
-    prompt = f"""
-The following {lang} code contains bugs.
-Fix all bugs and return ONLY the corrected {lang} code.
-Do NOT add any explanations or comments in natural language.
-Do NOT change the language or rewrite the task.
-Buggy {lang} code:
-{req.code}
-Corrected {lang} code:
-""".strip()
     output = run_model(
         prompt,
@@ -176,41 +128,24 @@ Corrected {lang} code:
         temperature=req.temperature,
     )
-    output = clean_code(output, lang)
     return CodeResponse(output=output)
-# ==== ENDPOINT 3: GỢI Ý CODE KIỂU CURSOR (COMPLETION) ====
 @app.post("/complete-code", response_model=CodeResponse)
 def complete_code(req: CompleteRequest):
     lang = req.language or "Python"
-    prompt = f"""
-You are an AI code completion engine like Cursor or GitHub Copilot.
-You will be given the prefix and suffix of a {lang} file.
-Your task is to generate ONLY the missing {lang} code between them.
-Rules:
-- DO NOT repeat the prefix.
-- DO NOT repeat the suffix.
-- DO NOT add any explanations, natural language text, or markdown.
-- DO NOT add imports/includes if they already appear in the prefix.
-- Return ONLY raw {lang} code that can be directly inserted at the cursor.
-Prefix:
-{req.prefix}
-<CURSOR HERE>
-Suffix:
-{req.suffix}
-Missing {lang} code:
-""".strip()
     output = run_model(
         prompt,
@@ -219,8 +154,7 @@ Missing {lang} code:
         temperature=req.temperature,
     )
-    # completion thường là snippet ngắn, không clean để tránh cắt nhầm
-    return CodeResponse(output=output.strip())
 # ==== HEALTHCHECK ====

 class GenerateRequest(BaseModel):
+    prompt: str                     # mô tả cần sinh code (nên gửi tiếng Anh)
     language: str | None = "Python"
     max_new_tokens: int = 128
+    num_beams: int = 1              # ít beam cho ổn định
     temperature: float = 0.3        # giảm randomness
 class CompleteRequest(BaseModel):
     prefix: str                     # code phía trước con trỏ
+    suffix: str = ""                # code phía sau con trỏ (chưa dùng nhiều, vì Codet5 không phải infill)
     language: str | None = "Python"
     max_new_tokens: int = 64        # completion thường ngắn
+    num_beams: int = 1
+    temperature: float = 0.3
 class CodeResponse(BaseModel):
             num_beams=num_beams,
             temperature=temperature,
             early_stopping=True,
+            repetition_penalty=1.05,  # nhẹ để giảm lặp
         )
     text = tokenizer.decode(outputs[0], skip_special_tokens=True)
     return text.strip()
 # ==== ENDPOINT 1: TẠO CODE TỪ PROMPT ====
 @app.post("/generate-code", response_model=CodeResponse)
 def generate_code(req: GenerateRequest):
+    """
+    Sinh code từ mô tả.
+    Lưu ý: Codet5+ “thích” prompt ngắn, dạng pattern.
+    """
     lang = req.language or "Python"
+    # Prompt cực ngắn, đúng style CodeT5 (tránh essay dài)
+    # Ví dụ: "Python code:\n# Task: Create a function that prints numbers from 1 to 10.\n"
+    prompt = f"{lang} code:\n# Task: {req.prompt}\n"
     output = run_model(
         prompt,
         temperature=req.temperature,
     )
     return CodeResponse(output=output)
 @app.post("/fix-code", response_model=CodeResponse)
 def fix_code(req: FixRequest):
+    """
+    Sửa lỗi code: input là code sai, output là code đúng.
+    """
     lang = req.language or "Python"
+    # Cũng giữ prompt thật đơn giản
+    prompt = (
+        f"Fix the following {lang} code:\n"
+        f"{req.code}\n\n"
+        f"Fixed {lang} code:\n"
+    )
     output = run_model(
         prompt,
         temperature=req.temperature,
     )
     return CodeResponse(output=output)
+# ==== ENDPOINT 3: GỢI Ý CODE (KIỂU CURSOR – DÙ CHỈ DÙNG PREFIX) ====
 @app.post("/complete-code", response_model=CodeResponse)
 def complete_code(req: CompleteRequest):
+    """
+    Gợi ý code tiếp theo dựa trên prefix.
+    Lưu ý: Codet5p-770m không phải model infill thực sự,
+    nên suffix ít tác dụng. Ở đây ta dùng chủ yếu prefix.
+    """
     lang = req.language or "Python"
+    # Dùng prefix làm context, để model tiếp tục code.
+    # Suffix có thể dùng để hiển thị phía client, còn model chủ yếu nhìn prefix.
+    prompt = f"{lang} code:\n{req.prefix}"
     output = run_model(
         prompt,
         temperature=req.temperature,
     )
+    return CodeResponse(output=output)
 # ==== HEALTHCHECK ====