Spaces:

SharmaGroups07
/

coding_engine

Running

App Files Files Community

coding_engine / app.py

SharmaGroups07

Update app.py

c7135bb verified 26 days ago

raw

history blame contribute delete

2.29 kB

	from fastapi import FastAPI
	from pydantic import BaseModel
	from llama_cpp import Llama
	from huggingface_hub import hf_hub_download
	import multiprocessing

	app = FastAPI()

	# ===============================
	# MODEL CONFIG
	# ===============================

	MODEL_REPO = "bartowski/Qwen2.5-Coder-1.5B-Instruct-GGUF"
	MODEL_FILE = "Qwen2.5-Coder-1.5B-Instruct-Q4_K_M.gguf"

	model_path = hf_hub_download(
	repo_id=MODEL_REPO,
	filename=MODEL_FILE
	)

	# ===============================
	# LLM INITIALIZATION (OPTIMIZED)
	# ===============================

	llm = Llama(
	model_path=model_path,

	# Larger context for coding tasks
	n_ctx=8192,

	# Use all CPU cores
	n_threads=multiprocessing.cpu_count(),

	# CPU inference
	n_gpu_layers=0,

	# PERFORMANCE BOOST
	n_batch=512,
	use_mmap=True,
	use_mlock=True,
	)

	# ===============================
	# REQUEST MODEL
	# ===============================

	class ChatRequest(BaseModel):
	message: str

	# ===============================
	# HEALTH CHECK
	# ===============================

	@app.get("/")
	def root():
	return {"status": "Coding AI engine running"}

	# ===============================
	# CHAT ENDPOINT
	# ===============================

	@app.post("/chat")
	def chat(req: ChatRequest):

	# CODING SPECIALIZED SYSTEM PROMPT
	system_prompt = (
	"<\|system\|>"
	"You are an elite senior software engineer AI. "
	"Write clean, production-ready code. "
	"Always include comments. "
	"Use best practices, error handling, and optimization. "
	"Format output in proper markdown with code blocks."
	"<\|end\|>"
	)

	prompt = system_prompt + f"<\|user\|>{req.message}<\|assistant\|>"

	output = llm(
	prompt,

	# Larger token output for code
	max_tokens=800,

	# Lower randomness = better code
	temperature=0.4,

	# Stable generation
	top_p=0.9,

	# Prevent repetition loops
	repeat_penalty=1.2,

	stop=["<\|end\|>"]
	)

	response_text = output["choices"][0]["text"].strip()

	return {"reply": response_text}

	# ===============================
	# LOCAL RUN
	# ===============================

	if __name__ == "__main__":
	import uvicorn
	uvicorn.run(app, host="0.0.0.0", port=7860)