CooLLaMACEO commited on
Commit
2142f68
·
verified ·
1 Parent(s): 5aee2f2

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +37 -40
app.py CHANGED
@@ -1,61 +1,58 @@
1
  from fastapi import FastAPI, Request, HTTPException, Depends
2
  from fastapi.middleware.cors import CORSMiddleware
3
- from fastapi.security import HTTPBearer, HTTPAuthorizationCredentials
4
- from fastapi.responses import JSONResponse
5
  from llama_cpp import Llama
6
  import uvicorn
7
 
8
  app = FastAPI()
9
 
10
- # Allow all origins (for frontend access)
11
  app.add_middleware(
12
  CORSMiddleware,
13
  allow_origins=["*"],
 
14
  allow_methods=["*"],
15
  allow_headers=["*"],
16
  )
17
 
18
- # Simple API key auth
19
- security = HTTPBearer()
20
- MY_API_KEY = "my-secret-key-456"
 
21
 
22
- # Load GGUF model (CPU only, small threads for Spaces)
23
- llm = Llama(
24
- model_path="./mpt-7b-chat.gguf", # Make sure this is a tokenizer-included GGUF
25
- n_ctx=2048,
26
- n_threads=2, # Reduce for free tier
27
- n_gpu_layers=0 # Force CPU
28
- )
29
-
30
- def verify_token(credentials: HTTPAuthorizationCredentials = Depends(security)):
31
- if credentials.credentials != MY_API_KEY:
32
  raise HTTPException(status_code=403, detail="Unauthorized")
33
- return credentials.credentials
34
-
35
- @app.post("/v1/chat")
36
- async def chat(request: Request, _ = Depends(verify_token)):
37
- try:
38
- data = await request.json()
39
- user_prompt = data.get("prompt", "").strip()
40
- if not user_prompt:
41
- return JSONResponse(status_code=400, content={"error": "No prompt provided"})
42
 
43
- # MPT chat format
44
- prompt = f"<|im_start|>user\n{user_prompt}<|im_end|>\n<|im_start|>assistant\n"
 
45
 
46
- output = llm(
47
- prompt,
48
- max_tokens=512,
49
- temperature=0.7,
50
- stop=["<|im_end|>", "<|im_start|>"],
51
- echo=False
52
- )
53
-
54
- reply = output["choices"][0]["text"].strip()
55
- return JSONResponse(content={"reply": reply})
56
-
57
- except Exception as e:
58
- return JSONResponse(status_code=500, content={"error": str(e)})
 
 
 
 
 
 
 
 
 
 
 
 
59
 
60
  if __name__ == "__main__":
61
  uvicorn.run(app, host="0.0.0.0", port=7860)
 
1
  from fastapi import FastAPI, Request, HTTPException, Depends
2
  from fastapi.middleware.cors import CORSMiddleware
 
 
3
  from llama_cpp import Llama
4
  import uvicorn
5
 
6
  app = FastAPI()
7
 
8
+ # --- CORS SETTINGS (Crucial for GitHub Pages) ---
9
  app.add_middleware(
10
  CORSMiddleware,
11
  allow_origins=["*"],
12
+ allow_credentials=True,
13
  allow_methods=["*"],
14
  allow_headers=["*"],
15
  )
16
 
17
+ # --- CONFIGURATION ---
18
+ MY_API_KEY = "my-secret-key-456"
19
+ # Optimized for HF Free Tier CPU
20
+ llm = Llama(model_path="./model.gguf", n_ctx=2048, n_threads=4, n_gpu_layers=0)
21
 
22
+ def verify_key(request: Request):
23
+ auth = request.headers.get("Authorization")
24
+ if auth != f"Bearer {MY_API_KEY}":
 
 
 
 
 
 
 
25
  raise HTTPException(status_code=403, detail="Unauthorized")
 
 
 
 
 
 
 
 
 
26
 
27
+ @app.get("/")
28
+ def home():
29
+ return {"status": "Online", "branding": "ChatMPT Team"}
30
 
31
+ @app.post("/v1/chat")
32
+ async def chat(request: Request, _ = Depends(verify_key)):
33
+ body = await request.json()
34
+ user_input = body.get("prompt", "")
35
+
36
+ # Instruction-based prompt to fix the identity
37
+ prompt = (
38
+ "Assistant is a polite AI named ChatMPT, created by the ChatMPT Team.\n"
39
+ f"User: {user_input}\n"
40
+ "Assistant:"
41
+ )
42
+
43
+ response = llm(
44
+ prompt,
45
+ max_tokens=500,
46
+ stop=["User:", "\n", "Assistant:"],
47
+ temperature=0.7
48
+ )
49
+
50
+ raw_reply = response["choices"][0]["text"].strip()
51
+
52
+ # Anti-Typo Safety Filter
53
+ final_reply = raw_reply.replace("ChatPBT", "ChatMPT").replace("ChatPP", "ChatMPT")
54
+
55
+ return {"reply": final_reply}
56
 
57
  if __name__ == "__main__":
58
  uvicorn.run(app, host="0.0.0.0", port=7860)