CooLLaMACEO commited on
Commit
e85d42c
·
verified ·
1 Parent(s): 1daa6dc

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +44 -16
app.py CHANGED
@@ -3,37 +3,65 @@ from pydantic import BaseModel
3
  from llama_cpp import Llama
4
  import os
5
 
6
- app = FastAPI()
7
-
 
8
  MODEL_PATH = "./models/gpt-oss-20b-Q3_K_M.gguf"
9
 
10
- print("🔄 Loading model… this may take a while")
 
 
 
 
11
 
 
 
 
 
12
  llm = Llama(
13
  model_path=MODEL_PATH,
14
  n_ctx=16384,
15
  n_threads=os.cpu_count(),
16
- n_gpu_layers=0, # HF CPU-only unless paid GPU
17
  verbose=False,
18
  )
 
19
 
20
- print("✅ Model loaded successfully")
 
 
 
21
 
22
  class ChatRequest(BaseModel):
23
- prompt: str
 
 
 
 
 
 
 
 
 
 
 
 
24
 
25
- @app.post("/chat")
26
  def chat(req: ChatRequest):
 
 
 
 
 
 
 
27
  output = llm(
28
- f"User: {req.prompt}\nAssistant:",
29
  max_tokens=512,
30
- stop=["User:"],
 
31
  )
32
 
33
- return {
34
- "response": output["choices"][0]["text"].strip()
35
- }
36
-
37
- @app.get("/")
38
- def root():
39
- return {"status": "ChatGPT Open-Source 1.0 is running 🚀"}
 
3
  from llama_cpp import Llama
4
  import os
5
 
6
+ # =========================
7
+ # Config
8
+ # =========================
9
  MODEL_PATH = "./models/gpt-oss-20b-Q3_K_M.gguf"
10
 
11
+ SYSTEM_PROMPT = (
12
+ "You are ChatGPT Open-Source 1.0, a high-performance local AI. "
13
+ "You were built by the open-source community. "
14
+ "You are helpful, witty, and proud to run locally without the internet."
15
+ )
16
 
17
+ # =========================
18
+ # Load Model (ON START)
19
+ # =========================
20
+ print("🔥 Loading model...")
21
  llm = Llama(
22
  model_path=MODEL_PATH,
23
  n_ctx=16384,
24
  n_threads=os.cpu_count(),
25
+ n_batch=256,
26
  verbose=False,
27
  )
28
+ print("✅ Model loaded!")
29
 
30
+ # =========================
31
+ # FastAPI
32
+ # =========================
33
+ app = FastAPI(title="ChatGPT Open-Source 1.0")
34
 
35
  class ChatRequest(BaseModel):
36
+ message: str
37
+
38
+ class ChatResponse(BaseModel):
39
+ reply: str
40
+
41
+ @app.get("/")
42
+ def root():
43
+ return {
44
+ "name": "ChatGPT Open-Source 1.0",
45
+ "status": "running",
46
+ "model": "gpt-oss-20b-Q3_K_M",
47
+ "offline": True
48
+ }
49
 
50
+ @app.post("/chat", response_model=ChatResponse)
51
  def chat(req: ChatRequest):
52
+ prompt = f"""<|system|>
53
+ {SYSTEM_PROMPT}
54
+ <|user|>
55
+ {req.message}
56
+ <|assistant|>
57
+ """
58
+
59
  output = llm(
60
+ prompt,
61
  max_tokens=512,
62
+ stop=["<|user|>", "<|system|>"],
63
+ temperature=0.7,
64
  )
65
 
66
+ reply = output["choices"][0]["text"].strip()
67
+ return ChatResponse(reply=reply)