Spaces:

nryadav18
/

python-code-evaluator

Sleeping

App Files Files Community

python-code-evaluator / app.py

nryadav18

Update app.py

a0b3172 verified about 2 months ago

raw

history blame contribute delete

1.9 kB

	from fastapi import FastAPI
	from pydantic import BaseModel
	from llama_cpp import Llama

	app = FastAPI()

	# --- OPTIMIZED 1.5B INITIALIZATION ---
	llm = Llama.from_pretrained(
	repo_id="Qwen/Qwen2.5-Coder-1.5B-Instruct-GGUF", # Upgraded to 1.5B
	filename="*q4_k_m.gguf", # Q4 quantization is the absolute sweet spot for speed vs. logic
	n_ctx=1024, # REDUCED: Halving context from 2048 massively speeds up prompt processing
	n_threads=2, # CRITICAL: Set to physical CPU cores. Over-allocating threads actively slows it down.
	n_batch=512,
	flash_attn=True # SPEED BOOST: Enabled Flash Attention for faster memory access
	)

	class EvalRequest(BaseModel):
	task_description: str
	python_code: str

	@app.get("/")
	async def health_check():
	return {"status": "Online", "message": "1.5B AI Code Evaluator is running!"}

	@app.post("/evaluate")
	async def evaluate_code(request: EvalRequest):
	prompt = f"{request.task_description}\n\nStudent Code:\n{request.python_code}"

	# 1.5B is smart enough that we can make the prompt shorter (saving time)
	system_prompt = """You are an encouraging Python tutor. Evaluate the code.
	RULES: Score 0-100 integer. Praise effort first. Give 1-2 friendly tips.
	Output ONLY a valid JSON object matching this schema:
	{"score": 0 to 100 based on correctness of code with respect to task description, "feedback": "Great job...", "improvements": ["Tip 1"]}"""

	response = llm.create_chat_completion(
	messages=[
	{"role": "system", "content": system_prompt},
	{"role": "user", "content": prompt}
	],
	max_tokens=512, # REDUCED: The JSON is short. Capping this saves unnecessary generation time.
	temperature=0.1,
	response_format={"type": "json_object"}
	)

	return {"evaluation": response['choices'][0]['message']['content']}