Spaces:

build-small-hackathon
/

split-brain-copilot

Running

App Files Files Community

split-brain-copilot / modal_backend /verifier.py

blessingmwiti

Reduce Modal verifier credit usage

cd451e7 1 day ago

raw

history blame contribute delete

4.3 kB

	import json

	import modal


	app = modal.App("split-brain-verifier")

	model_volume = modal.Volume.from_name("qwen-14b-volume", create_if_missing=True)

	MODEL_DIR = "/models"
	MODEL_FILENAME = "Qwen2.5-Coder-14B-Instruct-Q4_K_M.gguf"
	MODEL_REPO = "bartowski/Qwen2.5-Coder-14B-Instruct-GGUF"

	download_image = modal.Image.debian_slim(python_version="3.11").pip_install(
	"huggingface-hub"
	)


	@app.function(
	image=download_image,
	volumes={MODEL_DIR: model_volume},
	timeout=3600,
	secrets=[modal.Secret.from_name("huggingface-secret")],
	)
	def download_model():
	from huggingface_hub import hf_hub_download

	hf_hub_download(
	repo_id=MODEL_REPO,
	filename=MODEL_FILENAME,
	local_dir=MODEL_DIR,
	)
	model_volume.commit()
	print(f"Downloaded to {MODEL_DIR}/{MODEL_FILENAME}")


	llama_image = (
	modal.Image.debian_slim(python_version="3.11")
	.apt_install("build-essential", "cmake", "git", "libgomp1")
	.run_commands(
	"git clone https://github.com/ggerganov/llama.cpp /llama.cpp",
	"cd /llama.cpp && cmake -B build -DLLAMA_CURL=OFF && cmake --build build --config Release -j$(nproc)",
	"cd /llama.cpp && pip install -e .",
	)
	.pip_install("llama-cpp-python==0.3.4", "fastapi", "uvicorn", "pydantic")
	)


	@app.cls(
	image=llama_image,
	gpu="A10G",
	volumes={MODEL_DIR: model_volume},
	scaledown_window=60,
	)
	@modal.concurrent(max_inputs=2)
	class Verifier:
	@modal.enter()
	def load_model(self):
	from llama_cpp import Llama

	self.llm = Llama(
	model_path=f"{MODEL_DIR}/{MODEL_FILENAME}",
	n_gpu_layers=-1,
	n_ctx=8192,
	n_batch=512,
	verbose=False,
	)

	@modal.method()
	def verify(self, prompt: str, draft_code: str, language: str = "python") -> dict:
	system = f"""You are a code verifier. A smaller model drafted the following {language} code.
	Your job:
	1. Check for bugs, logic errors, type errors, off-by-one errors, and security issues.
	2. If the code is correct, respond with exactly: {{"verdict": "PASS"}}
	3. If fixable, respond with: {{"verdict": "FIX", "corrected_code": "<fixed code here>", "reason": "<one line>"}}
	4. If fundamentally wrong, respond with: {{"verdict": "REWRITE", "corrected_code": "<rewritten code>", "reason": "<one line>"}}
	Respond ONLY with valid JSON. No markdown, no explanation outside the JSON."""

	user = f"Original prompt:\n{prompt}\n\nDrafted code:\n```{language}\n{draft_code}\n```"

	response = self.llm.create_chat_completion(
	messages=[
	{"role": "system", "content": system},
	{"role": "user", "content": user},
	],
	max_tokens=1024,
	temperature=0.1,
	)
	raw = response["choices"][0]["message"]["content"].strip()
	try:
	parsed = json.loads(raw)
	except json.JSONDecodeError:
	return {"verdict": "PASS", "reason": "Verifier response could not be parsed."}

	if parsed.get("verdict") not in {"PASS", "FIX", "REWRITE"}:
	return {"verdict": "PASS", "reason": "Verifier returned an unknown verdict."}
	return parsed


	api_image = modal.Image.debian_slim(python_version="3.11").pip_install(
	"fastapi", "uvicorn", "pydantic"
	)


	@app.function(
	image=api_image,
	scaledown_window=30,
	)
	@modal.asgi_app()
	def verifier_endpoint():
	from fastapi import FastAPI
	from fastapi.middleware.cors import CORSMiddleware
	from pydantic import BaseModel

	web_app = FastAPI()
	web_app.add_middleware(
	CORSMiddleware,
	allow_origins=["*"],
	allow_methods=["*"],
	allow_headers=["*"],
	)

	class VerifyRequest(BaseModel):
	prompt: str
	draft_code: str
	language: str = "python"

	@web_app.post("/verify")
	async def verify(req: VerifyRequest):
	return await Verifier().verify.remote.aio(req.prompt, req.draft_code, req.language)

	@web_app.get("/health")
	async def health():
	return {"ok": True}

	return web_app


	@app.function()
	def warm_once():
	"""Manually warm the verifier before a live demo if you want lower first-hit latency."""
	Verifier().verify.remote("test", "print('hello')", "python")