CooLLaMACEO commited on
Commit
ad138c6
·
verified ·
1 Parent(s): e1cf209

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +11 -32
app.py CHANGED
@@ -1,3 +1,4 @@
 
1
  from fastapi import FastAPI, Request, HTTPException, Depends
2
  from fastapi.middleware.cors import CORSMiddleware
3
  from fastapi.security import HTTPBearer, HTTPAuthorizationCredentials
@@ -8,7 +9,6 @@ import uvicorn
8
  app = FastAPI()
9
  security = HTTPBearer()
10
 
11
- # 1. CORS Configuration
12
  app.add_middleware(
13
  CORSMiddleware,
14
  allow_origins=["*"],
@@ -16,66 +16,45 @@ app.add_middleware(
16
  allow_headers=["*"],
17
  )
18
 
19
- # 2. RESTORED API KEY
20
  MY_API_KEY = "my-secret-key-456"
21
 
22
- # 3. LOAD MODEL (Q4 for Speed)
23
- print("--- Loading ChatMPT Engine (Q4) ---")
24
  llm = Llama(
25
  model_path="./model.gguf",
26
  n_ctx=2048,
27
- n_threads=2, # Optimized for HF Free Tier
28
- n_batch=128,
29
  verbose=False
30
  )
31
 
32
  def verify_token(credentials: HTTPAuthorizationCredentials = Depends(security)):
33
- """Validates the Bearer token: my-secret-key-456"""
34
  if credentials.credentials != MY_API_KEY:
35
- raise HTTPException(status_code=403, detail="Unauthorized: Invalid API Key")
36
  return credentials.credentials
37
 
38
- @app.get("/")
39
- def health():
40
- return {"status": "online", "brand": "ChatMPT"}
41
-
42
  @app.post("/v1/chat")
43
  async def chat(request: Request, _ = Depends(verify_token)):
44
  try:
45
  data = await request.json()
46
  user_input = data.get("prompt", "").strip()
47
 
48
- if not user_input:
49
- return JSONResponse(content={"reply": "I'm listening!"})
50
-
51
- # SYSTEM PROMPT: Forces the AI to be ChatMPT and stay concise
52
- system_msg = "You are ChatMPT, a helpful AI created by the ChatMPT Team. Be concise and direct."
53
- prompt = f"<s>[INST] {system_msg}\n\n{user_input} [/INST]"
54
-
55
- print(f"User: {user_input}")
56
 
57
- # GENERATION SETTINGS
58
  output = llm(
59
  prompt,
60
- max_tokens=300,
61
  temperature=0.7,
62
- stop=["</s>", "[INST]", "User:"],
63
  echo=False
64
  )
65
 
66
  reply = output["choices"][0]["text"].strip()
67
-
68
- # Identity Cleanup
69
- reply = reply.replace("ChatGPT", "ChatMPT").replace("OpenAI", "ChatMPT Team")
70
-
71
- if not reply:
72
- reply = "I'm here! Could you please rephrase that?"
73
-
74
- return JSONResponse(content={"reply": reply})
75
 
76
  except Exception as e:
77
  print(f"Error: {e}")
78
- return JSONResponse(status_code=500, content={"reply": "Server Error. Try again."})
79
 
80
  if __name__ == "__main__":
81
  uvicorn.run(app, host="0.0.0.0", port=7860)
 
1
+ import os
2
  from fastapi import FastAPI, Request, HTTPException, Depends
3
  from fastapi.middleware.cors import CORSMiddleware
4
  from fastapi.security import HTTPBearer, HTTPAuthorizationCredentials
 
9
  app = FastAPI()
10
  security = HTTPBearer()
11
 
 
12
  app.add_middleware(
13
  CORSMiddleware,
14
  allow_origins=["*"],
 
16
  allow_headers=["*"],
17
  )
18
 
 
19
  MY_API_KEY = "my-secret-key-456"
20
 
21
+ # Load model without extra overhead
 
22
  llm = Llama(
23
  model_path="./model.gguf",
24
  n_ctx=2048,
25
+ n_threads=2,
26
+ n_batch=512,
27
  verbose=False
28
  )
29
 
30
  def verify_token(credentials: HTTPAuthorizationCredentials = Depends(security)):
 
31
  if credentials.credentials != MY_API_KEY:
32
+ raise HTTPException(status_code=403, detail="Unauthorized")
33
  return credentials.credentials
34
 
 
 
 
 
35
  @app.post("/v1/chat")
36
  async def chat(request: Request, _ = Depends(verify_token)):
37
  try:
38
  data = await request.json()
39
  user_input = data.get("prompt", "").strip()
40
 
41
+ # REMOVED the extra <s> here to fix your warning
42
+ prompt = f"[INST] {user_input} [/INST]"
 
 
 
 
 
 
43
 
 
44
  output = llm(
45
  prompt,
46
+ max_tokens=512,
47
  temperature=0.7,
48
+ stop=["[/INST]", "User:"],
49
  echo=False
50
  )
51
 
52
  reply = output["choices"][0]["text"].strip()
53
+ return JSONResponse(content={"reply": reply or "..."})
 
 
 
 
 
 
 
54
 
55
  except Exception as e:
56
  print(f"Error: {e}")
57
+ return JSONResponse(status_code=500, content={"reply": "SYSTEM_ERROR_RETRY"})
58
 
59
  if __name__ == "__main__":
60
  uvicorn.run(app, host="0.0.0.0", port=7860)