Spaces:

nryadav18
/

python-code-evaluator

Sleeping

Update app.py

b3b1b91 verified about 2 months ago

1.53 kB

	from fastapi import FastAPI
	from pydantic import BaseModel
	from llama_cpp import Llama
	import llama_cpp

	app = FastAPI()

	# --- HIGHEST OPTIMIZATION FOR 1.5B ---
	llm = Llama.from_pretrained(
	repo_id="Qwen/Qwen2.5-Coder-1.5B-Instruct-GGUF",
	filename="*q4_k_m.gguf",
	n_ctx=1024,
	n_threads=2, # Match your physical cores
	n_batch=512,
	flash_attn=True,
	n_mlock=True, # Keep model in RAM for consistent speed
	type_k=llama_cpp.GGML_TYPE_Q4_0, # 4-bit KV Cache for faster processing
	verbose=False
	)

	class EvalRequest(BaseModel):
	task_description: str
	python_code: str

	@app.get("/")
	async def health_check():
	return {"status": "Online", "message": "Optimized 1.5B Evaluator Ready"}

	@app.post("/evaluate")
	async def evaluate_code(request: EvalRequest):
	# Minimalist prompt for faster processing
	prompt = f"TASK: {request.task_description}\n\nCODE:\n{request.python_code}\n\nEVALUATE:"

	system_prompt = 'You are a Python tutor. Output ONLY JSON: {"score": int, "feedback": str, "improvements": list}'

	response = llm.create_chat_completion(
	messages=[
	{"role": "system", "content": system_prompt},
	{"role": "user", "content": prompt}
	],
	max_tokens=400,
	temperature=0.1,
	repeat_penalty=1.1,
	stop=["}"], # STOP IMMEDIATELY when JSON closes
	response_format={"type": "json_object"}
	)

	return {"evaluation": response['choices'][0]['message']['content']}