File size: 2,269 Bytes
21671b5
 
 
 
61ccc8d
21671b5
 
 
61ccc8d
 
 
 
0a5f402
21671b5
 
 
 
 
 
 
61ccc8d
 
 
 
21671b5
 
61ccc8d
 
 
 
 
 
 
 
 
 
 
 
 
 
21671b5
 
61ccc8d
 
 
 
21671b5
 
 
61ccc8d
 
 
 
21671b5
 
61ccc8d
 
 
 
 
21671b5
 
 
61ccc8d
 
 
 
 
 
 
 
 
 
 
 
 
21671b5
61ccc8d
 
 
 
 
 
 
 
 
428ee1f
61ccc8d
 
 
 
21671b5
 
428ee1f
61ccc8d
 
 
 
 
 
 
21671b5
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
from fastapi import FastAPI
from pydantic import BaseModel
from llama_cpp import Llama
from huggingface_hub import hf_hub_download
import multiprocessing

app = FastAPI()

# ===============================
# MODEL CONFIG
# ===============================

MODEL_REPO = "bartowski/Qwen2.5-3B-Instruct-GGUF"
MODEL_FILE = "Qwen2.5-3B-Instruct-Q4_K_M.gguf"

model_path = hf_hub_download(
    repo_id=MODEL_REPO,
    filename=MODEL_FILE
)

# ===============================
# LLM INITIALIZATION (OPTIMIZED)
# ===============================

llm = Llama(
    model_path=model_path,

    # Large context for deep reasoning
    n_ctx=8192,

    # Use all CPU cores
    n_threads=multiprocessing.cpu_count(),

    # CPU mode
    n_gpu_layers=0,

    # Performance boost
    n_batch=512,
    use_mmap=True,
    use_mlock=True,
)

# ===============================
# REQUEST MODEL
# ===============================

class ChatRequest(BaseModel):
    message: str

# ===============================
# HEALTH CHECK
# ===============================

@app.get("/")
def root():
    return {"status": "Strategy AI engine running"}

# ===============================
# CHAT ENDPOINT
# ===============================

@app.post("/chat")
def chat(req: ChatRequest):

    # STRATEGY SPECIALIZED SYSTEM PROMPT
    system_prompt = (
        "<|system|>"
        "You are an elite strategic intelligence AI. "
        "Think step-by-step before answering. "
        "Provide deep analysis, structured reasoning, and clear actionable insights. "
        "Use bullet points, numbered steps, and markdown formatting."
        "<|end|>"
    )

    prompt = system_prompt + f"<|user|>{req.message}<|assistant|>"

    output = llm(
        prompt,

        # Longer reasoning output
        max_tokens=900,

        # Lower randomness for logical thinking
        temperature=0.35,

        # Stable probability sampling
        top_p=0.9,

        # Prevent loops
        repeat_penalty=1.2,

        stop=["<|end|>"]
    )

    response_text = output["choices"][0]["text"].strip()

    return {"reply": response_text}

# ===============================
# LOCAL RUN
# ===============================

if __name__ == "__main__":
    import uvicorn
    uvicorn.run(app, host="0.0.0.0", port=7860)