Spaces:
Sleeping
Sleeping
File size: 1,901 Bytes
08fac8b 07fd26a 08fac8b 07fd26a b3b1b91 07fd26a 08fac8b 00db226 07fd26a 00db226 08fac8b 07fd26a 08fac8b 07fd26a a0b3172 b3b1b91 08fac8b b3b1b91 08fac8b c73bb3d a56c92a 08fac8b 0911813 07fd26a | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 | from fastapi import FastAPI
from pydantic import BaseModel
from llama_cpp import Llama
app = FastAPI()
# --- OPTIMIZED 1.5B INITIALIZATION ---
llm = Llama.from_pretrained(
repo_id="Qwen/Qwen2.5-Coder-1.5B-Instruct-GGUF", # Upgraded to 1.5B
filename="*q4_k_m.gguf", # Q4 quantization is the absolute sweet spot for speed vs. logic
n_ctx=1024, # REDUCED: Halving context from 2048 massively speeds up prompt processing
n_threads=2, # CRITICAL: Set to physical CPU cores. Over-allocating threads actively slows it down.
n_batch=512,
flash_attn=True # SPEED BOOST: Enabled Flash Attention for faster memory access
)
class EvalRequest(BaseModel):
task_description: str
python_code: str
@app.get("/")
async def health_check():
return {"status": "Online", "message": "1.5B AI Code Evaluator is running!"}
@app.post("/evaluate")
async def evaluate_code(request: EvalRequest):
prompt = f"{request.task_description}\n\nStudent Code:\n{request.python_code}"
# 1.5B is smart enough that we can make the prompt shorter (saving time)
system_prompt = """You are an encouraging Python tutor. Evaluate the code.
RULES: Score 0-100 integer. Praise effort first. Give 1-2 friendly tips.
Output ONLY a valid JSON object matching this schema:
{"score": 0 to 100 based on correctness of code with respect to task description, "feedback": "Great job...", "improvements": ["Tip 1"]}"""
response = llm.create_chat_completion(
messages=[
{"role": "system", "content": system_prompt},
{"role": "user", "content": prompt}
],
max_tokens=512, # REDUCED: The JSON is short. Capping this saves unnecessary generation time.
temperature=0.1,
response_format={"type": "json_object"}
)
return {"evaluation": response['choices'][0]['message']['content']} |