Spaces:

CooLLaMACEO
/

ChatMPT

Running

App Files Files Community

CooLLaMACEO commited on Feb 4

Commit

e1cf209

verified ·

1 Parent(s): 1258330

Update app.py

Browse files

Files changed (1) hide show

app.py +43 -52

app.py CHANGED Viewed

@@ -1,90 +1,81 @@
 from fastapi import FastAPI, Request, HTTPException, Depends
 from fastapi.middleware.cors import CORSMiddleware
 from fastapi.responses import JSONResponse
 from llama_cpp import Llama
 import uvicorn
-import os
 app = FastAPI()
-# 1. CORS CONFIGURATION
-# This is crucial so your website can actually read the response
 app.add_middleware(
     CORSMiddleware,
-    allow_origins=["*"],
-    allow_credentials=True,
     allow_methods=["*"],
     allow_headers=["*"],
 )
 MY_API_KEY = "my-secret-key-456"
-# 2. LOAD MISTRAL (Optimized for HF CPU)
-# We use 4 threads and a higher batch size for faster 'first thought'
 llm = Llama(
-    model_path="./model.gguf",
-    n_ctx=2048,
-    n_threads=4,
-    n_batch=512,
-    verbose=True
 )
-def verify_key(request: Request):
-    auth = request.headers.get("Authorization")
-    if auth != f"Bearer {MY_API_KEY}":
-        raise HTTPException(status_code=403, detail="Unauthorized")
 @app.get("/")
-def home():
-    return {"status": "Online", "mode": "Mistral-7B-Q8", "brand": "ChatMPT"}
 @app.post("/v1/chat")
-async def chat(request: Request, _ = Depends(verify_key)):
     try:
-        body = await request.json()
-        user_input = body.get("prompt", "").strip()
         if not user_input:
-            return JSONResponse(content={"reply": "Please enter a message!"})
-        # --- MISTRAL INSTRUCT FORMAT ---
-        # llama-cpp adds <s> automatically, so we just wrap in [INST]
-        system_msg = "You are ChatMPT, a helpful AI. Be direct and concise."
-        prompt = f"[INST] {system_msg}\n\n{user_input} [/INST]"
-        print(f"\n--- USER PROMPT: {user_input} ---")
-        # --- GENERATION SETTINGS ---
-        # We lower repeat_penalty slightly so it doesn't get 'scared' to talk
-        response = llm(
-            prompt,
-            max_tokens=512,
-            stop=["</s>", "[INST]", "[/INST]"],
             temperature=0.7,
-            repeat_penalty=1.1,
-            mirostat_mode=2
         )
-        final_reply = response["choices"][0]["text"].strip()
-        # --- DEBUG LOGGING ---
-        # This will show up in your Hugging Face Logs
-        print(f"--- AI REPLY: {final_reply} ---")
-        # --- FALLBACK IF EMPTY ---
-        if not final_reply or len(final_reply) < 2:
-            final_reply = "I'm here! Could you please rephrase that?"
-        # Identity Safety
-        hallucinations = ["ChatGPT", "Chat GPT", "OpenAI", "ChatPapers"]
-        for item in hallucinations:
-            final_reply = final_reply.replace(item, "ChatMPT")
-        return JSONResponse(content={"reply": final_reply})
     except Exception as e:
-        print(f"SERVER ERROR: {str(e)}")
-        return JSONResponse(status_code=500, content={"reply": "System Error: Brain is overloaded."})
 if __name__ == "__main__":
     uvicorn.run(app, host="0.0.0.0", port=7860)

 from fastapi import FastAPI, Request, HTTPException, Depends
 from fastapi.middleware.cors import CORSMiddleware
+from fastapi.security import HTTPBearer, HTTPAuthorizationCredentials
 from fastapi.responses import JSONResponse
 from llama_cpp import Llama
 import uvicorn
 app = FastAPI()
+security = HTTPBearer()
+# 1. CORS Configuration
 app.add_middleware(
     CORSMiddleware,
+    allow_origins=["*"],
     allow_methods=["*"],
     allow_headers=["*"],
 )
+# 2. RESTORED API KEY
 MY_API_KEY = "my-secret-key-456"
+# 3. LOAD MODEL (Q4 for Speed)
+print("--- Loading ChatMPT Engine (Q4) ---")
 llm = Llama(
+    model_path="./model.gguf",
+    n_ctx=2048,
+    n_threads=2, # Optimized for HF Free Tier
+    n_batch=128,
+    verbose=False
 )
+def verify_token(credentials: HTTPAuthorizationCredentials = Depends(security)):
+    """Validates the Bearer token: my-secret-key-456"""
+    if credentials.credentials != MY_API_KEY:
+        raise HTTPException(status_code=403, detail="Unauthorized: Invalid API Key")
+    return credentials.credentials
 @app.get("/")
+def health():
+    return {"status": "online", "brand": "ChatMPT"}
 @app.post("/v1/chat")
+async def chat(request: Request, _ = Depends(verify_token)):
     try:
+        data = await request.json()
+        user_input = data.get("prompt", "").strip()
         if not user_input:
+            return JSONResponse(content={"reply": "I'm listening!"})
+        # SYSTEM PROMPT: Forces the AI to be ChatMPT and stay concise
+        system_msg = "You are ChatMPT, a helpful AI created by the ChatMPT Team. Be concise and direct."
+        prompt = f"<s>[INST] {system_msg}\n\n{user_input} [/INST]"
+        print(f"User: {user_input}")
+        # GENERATION SETTINGS
+        output = llm(
+            prompt,
+            max_tokens=300,
             temperature=0.7,
+            stop=["</s>", "[INST]", "User:"],
+            echo=False
         )
+        reply = output["choices"][0]["text"].strip()
+        # Identity Cleanup
+        reply = reply.replace("ChatGPT", "ChatMPT").replace("OpenAI", "ChatMPT Team")
+        if not reply:
+            reply = "I'm here! Could you please rephrase that?"
+        return JSONResponse(content={"reply": reply})
     except Exception as e:
+        print(f"Error: {e}")
+        return JSONResponse(status_code=500, content={"reply": "Server Error. Try again."})
 if __name__ == "__main__":
     uvicorn.run(app, host="0.0.0.0", port=7860)