Update app.py
Browse files
app.py
CHANGED
|
@@ -13,10 +13,9 @@ logger = logging.getLogger(__name__)
|
|
| 13 |
MODEL_PATH = "./models/gpt-oss-20b-Q3_K_M.gguf"
|
| 14 |
llm = None
|
| 15 |
|
| 16 |
-
# 3. Memory Storage
|
| 17 |
-
# In a real production app, you'd use Redis or a Database.
|
| 18 |
sessions = {}
|
| 19 |
-
MAX_HISTORY = 6 # Keep last
|
| 20 |
|
| 21 |
def load_model():
|
| 22 |
global llm
|
|
@@ -60,27 +59,25 @@ async def chat(request: Request):
|
|
| 60 |
try:
|
| 61 |
data = await request.json()
|
| 62 |
user_message = data.get("message")
|
| 63 |
-
|
| 64 |
-
# Unique ID for the user (defaulting to 'default' if not provided)
|
| 65 |
session_id = data.get("session_id", "default_user")
|
| 66 |
|
| 67 |
if not user_message:
|
| 68 |
return JSONResponse({"response": "No message received."}, status_code=400)
|
| 69 |
|
| 70 |
-
# ---
|
| 71 |
if session_id not in sessions:
|
| 72 |
sessions[session_id] = []
|
| 73 |
|
| 74 |
-
# Build the history string for the prompt
|
| 75 |
history_str = ""
|
| 76 |
for msg in sessions[session_id]:
|
| 77 |
role = msg["role"]
|
| 78 |
content = msg["content"]
|
| 79 |
history_str += f"<|start|>{role}<|message|>{content}<|end|>\n"
|
| 80 |
|
| 81 |
-
# ---
|
| 82 |
prompt = (
|
| 83 |
-
f"<|start|>system<|message|>You are ChatGPT Open-Source 1.0
|
|
|
|
| 84 |
f"{history_str}"
|
| 85 |
f"<|start|>user<|message|>{user_message}<|end|>\n"
|
| 86 |
f"<|start|>assistant<|channel|>final<|message|>"
|
|
@@ -90,7 +87,7 @@ async def chat(request: Request):
|
|
| 90 |
prompt,
|
| 91 |
max_tokens=256,
|
| 92 |
stop=["<|end|>", "<|return|>", "<|start|>", "user:", "assistant:"],
|
| 93 |
-
temperature=0.6
|
| 94 |
)
|
| 95 |
|
| 96 |
reply = output["choices"][0]["text"].strip()
|
|
@@ -99,7 +96,6 @@ async def chat(request: Request):
|
|
| 99 |
sessions[session_id].append({"role": "user", "content": user_message})
|
| 100 |
sessions[session_id].append({"role": "assistant", "content": reply})
|
| 101 |
|
| 102 |
-
# Trim history so it doesn't grow forever
|
| 103 |
if len(sessions[session_id]) > MAX_HISTORY:
|
| 104 |
sessions[session_id] = sessions[session_id][-MAX_HISTORY:]
|
| 105 |
|
|
@@ -107,7 +103,7 @@ async def chat(request: Request):
|
|
| 107 |
|
| 108 |
except Exception as e:
|
| 109 |
logger.error(f"❌ Error: {e}")
|
| 110 |
-
return JSONResponse({"response": "
|
| 111 |
|
| 112 |
if __name__ == "__main__":
|
| 113 |
import uvicorn
|
|
|
|
| 13 |
MODEL_PATH = "./models/gpt-oss-20b-Q3_K_M.gguf"
|
| 14 |
llm = None
|
| 15 |
|
| 16 |
+
# 3. Memory Storage
|
|
|
|
| 17 |
sessions = {}
|
| 18 |
+
MAX_HISTORY = 6 # Keep last 3 full exchanges
|
| 19 |
|
| 20 |
def load_model():
|
| 21 |
global llm
|
|
|
|
| 59 |
try:
|
| 60 |
data = await request.json()
|
| 61 |
user_message = data.get("message")
|
|
|
|
|
|
|
| 62 |
session_id = data.get("session_id", "default_user")
|
| 63 |
|
| 64 |
if not user_message:
|
| 65 |
return JSONResponse({"response": "No message received."}, status_code=400)
|
| 66 |
|
| 67 |
+
# --- 4. Memory Logic ---
|
| 68 |
if session_id not in sessions:
|
| 69 |
sessions[session_id] = []
|
| 70 |
|
|
|
|
| 71 |
history_str = ""
|
| 72 |
for msg in sessions[session_id]:
|
| 73 |
role = msg["role"]
|
| 74 |
content = msg["content"]
|
| 75 |
history_str += f"<|start|>{role}<|message|>{content}<|end|>\n"
|
| 76 |
|
| 77 |
+
# --- 5. Identity Anchor & Prompt ---
|
| 78 |
prompt = (
|
| 79 |
+
f"<|start|>system<|message|>You are ChatGPT Open-Source 1.0, a 20B parameter model. "
|
| 80 |
+
f"You are independent and NOT developed by OpenAI. Be helpful and concise.<|end|>\n"
|
| 81 |
f"{history_str}"
|
| 82 |
f"<|start|>user<|message|>{user_message}<|end|>\n"
|
| 83 |
f"<|start|>assistant<|channel|>final<|message|>"
|
|
|
|
| 87 |
prompt,
|
| 88 |
max_tokens=256,
|
| 89 |
stop=["<|end|>", "<|return|>", "<|start|>", "user:", "assistant:"],
|
| 90 |
+
temperature=0.6
|
| 91 |
)
|
| 92 |
|
| 93 |
reply = output["choices"][0]["text"].strip()
|
|
|
|
| 96 |
sessions[session_id].append({"role": "user", "content": user_message})
|
| 97 |
sessions[session_id].append({"role": "assistant", "content": reply})
|
| 98 |
|
|
|
|
| 99 |
if len(sessions[session_id]) > MAX_HISTORY:
|
| 100 |
sessions[session_id] = sessions[session_id][-MAX_HISTORY:]
|
| 101 |
|
|
|
|
| 103 |
|
| 104 |
except Exception as e:
|
| 105 |
logger.error(f"❌ Error: {e}")
|
| 106 |
+
return JSONResponse({"response": "Brain error. Try shortening your message."}, status_code=500)
|
| 107 |
|
| 108 |
if __name__ == "__main__":
|
| 109 |
import uvicorn
|