CooLLaMACEO commited on
Commit
e1cf209
·
verified ·
1 Parent(s): 1258330

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +43 -52
app.py CHANGED
@@ -1,90 +1,81 @@
1
  from fastapi import FastAPI, Request, HTTPException, Depends
2
  from fastapi.middleware.cors import CORSMiddleware
 
3
  from fastapi.responses import JSONResponse
4
  from llama_cpp import Llama
5
  import uvicorn
6
- import os
7
 
8
  app = FastAPI()
 
9
 
10
- # 1. CORS CONFIGURATION
11
- # This is crucial so your website can actually read the response
12
  app.add_middleware(
13
  CORSMiddleware,
14
- allow_origins=["*"],
15
- allow_credentials=True,
16
  allow_methods=["*"],
17
  allow_headers=["*"],
18
  )
19
 
 
20
  MY_API_KEY = "my-secret-key-456"
21
 
22
- # 2. LOAD MISTRAL (Optimized for HF CPU)
23
- # We use 4 threads and a higher batch size for faster 'first thought'
24
  llm = Llama(
25
- model_path="./model.gguf",
26
- n_ctx=2048,
27
- n_threads=4,
28
- n_batch=512,
29
- verbose=True
30
  )
31
 
32
- def verify_key(request: Request):
33
- auth = request.headers.get("Authorization")
34
- if auth != f"Bearer {MY_API_KEY}":
35
- raise HTTPException(status_code=403, detail="Unauthorized")
 
36
 
37
  @app.get("/")
38
- def home():
39
- return {"status": "Online", "mode": "Mistral-7B-Q8", "brand": "ChatMPT"}
40
 
41
  @app.post("/v1/chat")
42
- async def chat(request: Request, _ = Depends(verify_key)):
43
  try:
44
- body = await request.json()
45
- user_input = body.get("prompt", "").strip()
46
-
47
  if not user_input:
48
- return JSONResponse(content={"reply": "Please enter a message!"})
49
 
50
- # --- MISTRAL INSTRUCT FORMAT ---
51
- # llama-cpp adds <s> automatically, so we just wrap in [INST]
52
- system_msg = "You are ChatMPT, a helpful AI. Be direct and concise."
53
- prompt = f"[INST] {system_msg}\n\n{user_input} [/INST]"
54
 
55
- print(f"\n--- USER PROMPT: {user_input} ---")
56
 
57
- # --- GENERATION SETTINGS ---
58
- # We lower repeat_penalty slightly so it doesn't get 'scared' to talk
59
- response = llm(
60
- prompt,
61
- max_tokens=512,
62
- stop=["</s>", "[INST]", "[/INST]"],
63
  temperature=0.7,
64
- repeat_penalty=1.1,
65
- mirostat_mode=2
66
  )
67
 
68
- final_reply = response["choices"][0]["text"].strip()
69
-
70
- # --- DEBUG LOGGING ---
71
- # This will show up in your Hugging Face Logs
72
- print(f"--- AI REPLY: {final_reply} ---")
73
-
74
- # --- FALLBACK IF EMPTY ---
75
- if not final_reply or len(final_reply) < 2:
76
- final_reply = "I'm here! Could you please rephrase that?"
77
 
78
- # Identity Safety
79
- hallucinations = ["ChatGPT", "Chat GPT", "OpenAI", "ChatPapers"]
80
- for item in hallucinations:
81
- final_reply = final_reply.replace(item, "ChatMPT")
82
 
83
- return JSONResponse(content={"reply": final_reply})
84
 
85
  except Exception as e:
86
- print(f"SERVER ERROR: {str(e)}")
87
- return JSONResponse(status_code=500, content={"reply": "System Error: Brain is overloaded."})
88
 
89
  if __name__ == "__main__":
90
  uvicorn.run(app, host="0.0.0.0", port=7860)
 
1
  from fastapi import FastAPI, Request, HTTPException, Depends
2
  from fastapi.middleware.cors import CORSMiddleware
3
+ from fastapi.security import HTTPBearer, HTTPAuthorizationCredentials
4
  from fastapi.responses import JSONResponse
5
  from llama_cpp import Llama
6
  import uvicorn
 
7
 
8
  app = FastAPI()
9
+ security = HTTPBearer()
10
 
11
+ # 1. CORS Configuration
 
12
  app.add_middleware(
13
  CORSMiddleware,
14
+ allow_origins=["*"],
 
15
  allow_methods=["*"],
16
  allow_headers=["*"],
17
  )
18
 
19
+ # 2. RESTORED API KEY
20
  MY_API_KEY = "my-secret-key-456"
21
 
22
+ # 3. LOAD MODEL (Q4 for Speed)
23
+ print("--- Loading ChatMPT Engine (Q4) ---")
24
  llm = Llama(
25
+ model_path="./model.gguf",
26
+ n_ctx=2048,
27
+ n_threads=2, # Optimized for HF Free Tier
28
+ n_batch=128,
29
+ verbose=False
30
  )
31
 
32
+ def verify_token(credentials: HTTPAuthorizationCredentials = Depends(security)):
33
+ """Validates the Bearer token: my-secret-key-456"""
34
+ if credentials.credentials != MY_API_KEY:
35
+ raise HTTPException(status_code=403, detail="Unauthorized: Invalid API Key")
36
+ return credentials.credentials
37
 
38
  @app.get("/")
39
+ def health():
40
+ return {"status": "online", "brand": "ChatMPT"}
41
 
42
  @app.post("/v1/chat")
43
+ async def chat(request: Request, _ = Depends(verify_token)):
44
  try:
45
+ data = await request.json()
46
+ user_input = data.get("prompt", "").strip()
47
+
48
  if not user_input:
49
+ return JSONResponse(content={"reply": "I'm listening!"})
50
 
51
+ # SYSTEM PROMPT: Forces the AI to be ChatMPT and stay concise
52
+ system_msg = "You are ChatMPT, a helpful AI created by the ChatMPT Team. Be concise and direct."
53
+ prompt = f"<s>[INST] {system_msg}\n\n{user_input} [/INST]"
 
54
 
55
+ print(f"User: {user_input}")
56
 
57
+ # GENERATION SETTINGS
58
+ output = llm(
59
+ prompt,
60
+ max_tokens=300,
 
 
61
  temperature=0.7,
62
+ stop=["</s>", "[INST]", "User:"],
63
+ echo=False
64
  )
65
 
66
+ reply = output["choices"][0]["text"].strip()
67
+
68
+ # Identity Cleanup
69
+ reply = reply.replace("ChatGPT", "ChatMPT").replace("OpenAI", "ChatMPT Team")
 
 
 
 
 
70
 
71
+ if not reply:
72
+ reply = "I'm here! Could you please rephrase that?"
 
 
73
 
74
+ return JSONResponse(content={"reply": reply})
75
 
76
  except Exception as e:
77
+ print(f"Error: {e}")
78
+ return JSONResponse(status_code=500, content={"reply": "Server Error. Try again."})
79
 
80
  if __name__ == "__main__":
81
  uvicorn.run(app, host="0.0.0.0", port=7860)