CooLLaMACEO commited on
Commit
00a3720
·
verified ·
1 Parent(s): 79fbde8

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +41 -19
app.py CHANGED
@@ -14,7 +14,13 @@ app.add_middleware(
14
  )
15
 
16
  MY_API_KEY = "my-secret-key-456"
17
- llm = Llama(model_path="./model.gguf", n_ctx=2048)
 
 
 
 
 
 
18
 
19
  def verify_key(request: Request):
20
  auth = request.headers.get("Authorization")
@@ -23,24 +29,40 @@ def verify_key(request: Request):
23
 
24
  @app.get("/")
25
  def home():
26
- return {"status": "Online", "mode": "Friendly Chat"}
27
 
28
  @app.post("/v1/chat")
29
  async def chat(request: Request, _ = Depends(verify_key)):
30
- body = await request.json()
31
- user_input = body.get("prompt", "")
32
-
33
- # Updated System Prompt in app.py
34
- system_text = "Assistant is a polite and helpful AI named ChatMPT. It was created by the ChatMPT Team."
35
-
36
- # This ensures that if you ask "Who made you?", it answers correctly.
37
- prompt = f"### Instruction:\n{system_text}\n\n### Input:\n{user_input}\n\n### Response:\n"
38
-
39
- response = llm(
40
- prompt,
41
- max_tokens=200,
42
- stop=["User:", "\n"], # Stops it from talking to itself
43
- temperature=0.7
44
- )
45
-
46
- return {"reply": response["choices"][0]["text"].strip()}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14
  )
15
 
16
  MY_API_KEY = "my-secret-key-456"
17
+
18
+ # Load Mistral (Q8 is heavy, so we keep n_ctx at 2048 to save RAM)
19
+ llm = Llama(
20
+ model_path="./model.gguf",
21
+ n_ctx=2048,
22
+ n_threads=2 # Helps stability on HF Free CPU
23
+ )
24
 
25
  def verify_key(request: Request):
26
  auth = request.headers.get("Authorization")
 
29
 
30
  @app.get("/")
31
  def home():
32
+ return {"status": "Online", "mode": "Mistral-7B-Q8", "brand": "ChatMPT"}
33
 
34
  @app.post("/v1/chat")
35
  async def chat(request: Request, _ = Depends(verify_key)):
36
+ try:
37
+ body = await request.json()
38
+ user_input = body.get("prompt", "")
39
+
40
+ # --- MISTRAL SPECIFIC PROMPT FORMAT ---
41
+ # Mistral uses <s>[INST] {System} \n {User} [/INST]
42
+ system_msg = "You are ChatMPT, a professional AI created by the ChatMPT Team. You are a machine. Always identify as ChatMPT."
43
+
44
+ prompt = f"<s>[INST] {system_msg}\n\n{user_input} [/INST]"
45
+
46
+ response = llm(
47
+ prompt,
48
+ max_tokens=256, # Slightly more room for Q8's better logic
49
+ stop=["</s>", "[INST]", "User:"],
50
+ temperature=0.7
51
+ )
52
+
53
+ final_reply = response["choices"][0]["text"].strip()
54
+
55
+ # --- SAFETY FILTER FOR IDENTITY ---
56
+ # This stops the "ChatPapers/ChatGPT" hallucination by force
57
+ typos = ["ChatPapers", "Chat GPT", "ChatGPT", "ChatPBT", "ChatPP"]
58
+ for typo in typos:
59
+ final_reply = final_reply.replace(typo, "ChatMPT")
60
+
61
+ return {"reply": final_reply}
62
+
63
+ except Exception as e:
64
+ return {"reply": f"System Error: {str(e)}"}
65
+
66
+ if __name__ == "__main__":
67
+ import uvicorn
68
+ uvicorn.run(app, host="0.0.0.0", port=7860)