Spaces:

SharmaGroups07
/

strategy_engine

Running

App Files Files Community

strategy_engine / app.py

SharmaGroups07

Update app.py

61ccc8d verified 26 days ago

raw

history blame contribute delete

2.27 kB

	from fastapi import FastAPI
	from pydantic import BaseModel
	from llama_cpp import Llama
	from huggingface_hub import hf_hub_download
	import multiprocessing

	app = FastAPI()

	# ===============================
	# MODEL CONFIG
	# ===============================

	MODEL_REPO = "bartowski/Qwen2.5-3B-Instruct-GGUF"
	MODEL_FILE = "Qwen2.5-3B-Instruct-Q4_K_M.gguf"

	model_path = hf_hub_download(
	repo_id=MODEL_REPO,
	filename=MODEL_FILE
	)

	# ===============================
	# LLM INITIALIZATION (OPTIMIZED)
	# ===============================

	llm = Llama(
	model_path=model_path,

	# Large context for deep reasoning
	n_ctx=8192,

	# Use all CPU cores
	n_threads=multiprocessing.cpu_count(),

	# CPU mode
	n_gpu_layers=0,

	# Performance boost
	n_batch=512,
	use_mmap=True,
	use_mlock=True,
	)

	# ===============================
	# REQUEST MODEL
	# ===============================

	class ChatRequest(BaseModel):
	message: str

	# ===============================
	# HEALTH CHECK
	# ===============================

	@app.get("/")
	def root():
	return {"status": "Strategy AI engine running"}

	# ===============================
	# CHAT ENDPOINT
	# ===============================

	@app.post("/chat")
	def chat(req: ChatRequest):

	# STRATEGY SPECIALIZED SYSTEM PROMPT
	system_prompt = (
	"<\|system\|>"
	"You are an elite strategic intelligence AI. "
	"Think step-by-step before answering. "
	"Provide deep analysis, structured reasoning, and clear actionable insights. "
	"Use bullet points, numbered steps, and markdown formatting."
	"<\|end\|>"
	)

	prompt = system_prompt + f"<\|user\|>{req.message}<\|assistant\|>"

	output = llm(
	prompt,

	# Longer reasoning output
	max_tokens=900,

	# Lower randomness for logical thinking
	temperature=0.35,

	# Stable probability sampling
	top_p=0.9,

	# Prevent loops
	repeat_penalty=1.2,

	stop=["<\|end\|>"]
	)

	response_text = output["choices"][0]["text"].strip()

	return {"reply": response_text}

	# ===============================
	# LOCAL RUN
	# ===============================

	if __name__ == "__main__":
	import uvicorn
	uvicorn.run(app, host="0.0.0.0", port=7860)