Spaces:

SharmaGroups07
/

strategy_engine

Running

App Files Files Community

SharmaGroups07 commited on 26 days ago

Commit

61ccc8d

verified ·

1 Parent(s): 428ee1f

Update app.py

Browse files

Files changed (1) hide show

app.py +69 -9

app.py CHANGED Viewed

@@ -2,9 +2,14 @@ from fastapi import FastAPI
 from pydantic import BaseModel
 from llama_cpp import Llama
 from huggingface_hub import hf_hub_download
 app = FastAPI()
 MODEL_REPO = "bartowski/Qwen2.5-3B-Instruct-GGUF"
 MODEL_FILE = "Qwen2.5-3B-Instruct-Q4_K_M.gguf"
@@ -13,33 +18,88 @@ model_path = hf_hub_download(
     filename=MODEL_FILE
 )
 llm = Llama(
     model_path=model_path,
-    n_ctx=2048,
-    n_threads=2
 )
 class ChatRequest(BaseModel):
     message: str
 @app.get("/")
 def root():
-    return {"status": "AI engine running"}
 @app.post("/chat")
 def chat(req: ChatRequest):
     output = llm(
-        f"<|user|>{req.message}<|assistant|>",
-        max_tokens=512,
-        temperature=0.7,
         top_p=0.9,
-        repeat_penalty=1.1,
         stop=["<|end|>"]
     )
-    return {"reply": output["choices"][0]["text"]}
-# ⭐ THIS PART WAS MISSING
 if __name__ == "__main__":
     import uvicorn
     uvicorn.run(app, host="0.0.0.0", port=7860)

 from pydantic import BaseModel
 from llama_cpp import Llama
 from huggingface_hub import hf_hub_download
+import multiprocessing
 app = FastAPI()
+# ===============================
+# MODEL CONFIG
+# ===============================
 MODEL_REPO = "bartowski/Qwen2.5-3B-Instruct-GGUF"
 MODEL_FILE = "Qwen2.5-3B-Instruct-Q4_K_M.gguf"
     filename=MODEL_FILE
 )
+# ===============================
+# LLM INITIALIZATION (OPTIMIZED)
+# ===============================
 llm = Llama(
     model_path=model_path,
+    # Large context for deep reasoning
+    n_ctx=8192,
+    # Use all CPU cores
+    n_threads=multiprocessing.cpu_count(),
+    # CPU mode
+    n_gpu_layers=0,
+    # Performance boost
+    n_batch=512,
+    use_mmap=True,
+    use_mlock=True,
 )
+# ===============================
+# REQUEST MODEL
+# ===============================
 class ChatRequest(BaseModel):
     message: str
+# ===============================
+# HEALTH CHECK
+# ===============================
 @app.get("/")
 def root():
+    return {"status": "Strategy AI engine running"}
+# ===============================
+# CHAT ENDPOINT
+# ===============================
 @app.post("/chat")
 def chat(req: ChatRequest):
+    # STRATEGY SPECIALIZED SYSTEM PROMPT
+    system_prompt = (
+        "<|system|>"
+        "You are an elite strategic intelligence AI. "
+        "Think step-by-step before answering. "
+        "Provide deep analysis, structured reasoning, and clear actionable insights. "
+        "Use bullet points, numbered steps, and markdown formatting."
+        "<|end|>"
+    )
+    prompt = system_prompt + f"<|user|>{req.message}<|assistant|>"
     output = llm(
+        prompt,
+        # Longer reasoning output
+        max_tokens=900,
+        # Lower randomness for logical thinking
+        temperature=0.35,
+        # Stable probability sampling
         top_p=0.9,
+        # Prevent loops
+        repeat_penalty=1.2,
         stop=["<|end|>"]
     )
+    response_text = output["choices"][0]["text"].strip()
+    return {"reply": response_text}
+# ===============================
+# LOCAL RUN
+# ===============================
 if __name__ == "__main__":
     import uvicorn
     uvicorn.run(app, host="0.0.0.0", port=7860)