CooLLaMACEO commited on
Commit
7dc174b
·
verified ·
1 Parent(s): 59fadc3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +8 -12
app.py CHANGED
@@ -13,10 +13,9 @@ logger = logging.getLogger(__name__)
13
  MODEL_PATH = "./models/gpt-oss-20b-Q3_K_M.gguf"
14
  llm = None
15
 
16
- # 3. Memory Storage (In-memory dict: session_id -> list of messages)
17
- # In a real production app, you'd use Redis or a Database.
18
  sessions = {}
19
- MAX_HISTORY = 6 # Keep last 6 messages (3 turns) to save RAM/Speed
20
 
21
  def load_model():
22
  global llm
@@ -60,27 +59,25 @@ async def chat(request: Request):
60
  try:
61
  data = await request.json()
62
  user_message = data.get("message")
63
-
64
- # Unique ID for the user (defaulting to 'default' if not provided)
65
  session_id = data.get("session_id", "default_user")
66
 
67
  if not user_message:
68
  return JSONResponse({"response": "No message received."}, status_code=400)
69
 
70
- # --- MEMORY LOGIC ---
71
  if session_id not in sessions:
72
  sessions[session_id] = []
73
 
74
- # Build the history string for the prompt
75
  history_str = ""
76
  for msg in sessions[session_id]:
77
  role = msg["role"]
78
  content = msg["content"]
79
  history_str += f"<|start|>{role}<|message|>{content}<|end|>\n"
80
 
81
- # --- HARMONY PROMPT WITH MEMORY ---
82
  prompt = (
83
- f"<|start|>system<|message|>You are ChatGPT Open-Source 1.0. Answer briefly and remember context.<|end|>\n"
 
84
  f"{history_str}"
85
  f"<|start|>user<|message|>{user_message}<|end|>\n"
86
  f"<|start|>assistant<|channel|>final<|message|>"
@@ -90,7 +87,7 @@ async def chat(request: Request):
90
  prompt,
91
  max_tokens=256,
92
  stop=["<|end|>", "<|return|>", "<|start|>", "user:", "assistant:"],
93
- temperature=0.6 # Slightly lower for more consistent memory
94
  )
95
 
96
  reply = output["choices"][0]["text"].strip()
@@ -99,7 +96,6 @@ async def chat(request: Request):
99
  sessions[session_id].append({"role": "user", "content": user_message})
100
  sessions[session_id].append({"role": "assistant", "content": reply})
101
 
102
- # Trim history so it doesn't grow forever
103
  if len(sessions[session_id]) > MAX_HISTORY:
104
  sessions[session_id] = sessions[session_id][-MAX_HISTORY:]
105
 
@@ -107,7 +103,7 @@ async def chat(request: Request):
107
 
108
  except Exception as e:
109
  logger.error(f"❌ Error: {e}")
110
- return JSONResponse({"response": "Error processing request."}, status_code=500)
111
 
112
  if __name__ == "__main__":
113
  import uvicorn
 
13
  MODEL_PATH = "./models/gpt-oss-20b-Q3_K_M.gguf"
14
  llm = None
15
 
16
+ # 3. Memory Storage
 
17
  sessions = {}
18
+ MAX_HISTORY = 6 # Keep last 3 full exchanges
19
 
20
  def load_model():
21
  global llm
 
59
  try:
60
  data = await request.json()
61
  user_message = data.get("message")
 
 
62
  session_id = data.get("session_id", "default_user")
63
 
64
  if not user_message:
65
  return JSONResponse({"response": "No message received."}, status_code=400)
66
 
67
+ # --- 4. Memory Logic ---
68
  if session_id not in sessions:
69
  sessions[session_id] = []
70
 
 
71
  history_str = ""
72
  for msg in sessions[session_id]:
73
  role = msg["role"]
74
  content = msg["content"]
75
  history_str += f"<|start|>{role}<|message|>{content}<|end|>\n"
76
 
77
+ # --- 5. Identity Anchor & Prompt ---
78
  prompt = (
79
+ f"<|start|>system<|message|>You are ChatGPT Open-Source 1.0, a 20B parameter model. "
80
+ f"You are independent and NOT developed by OpenAI. Be helpful and concise.<|end|>\n"
81
  f"{history_str}"
82
  f"<|start|>user<|message|>{user_message}<|end|>\n"
83
  f"<|start|>assistant<|channel|>final<|message|>"
 
87
  prompt,
88
  max_tokens=256,
89
  stop=["<|end|>", "<|return|>", "<|start|>", "user:", "assistant:"],
90
+ temperature=0.6
91
  )
92
 
93
  reply = output["choices"][0]["text"].strip()
 
96
  sessions[session_id].append({"role": "user", "content": user_message})
97
  sessions[session_id].append({"role": "assistant", "content": reply})
98
 
 
99
  if len(sessions[session_id]) > MAX_HISTORY:
100
  sessions[session_id] = sessions[session_id][-MAX_HISTORY:]
101
 
 
103
 
104
  except Exception as e:
105
  logger.error(f"❌ Error: {e}")
106
+ return JSONResponse({"response": "Brain error. Try shortening your message."}, status_code=500)
107
 
108
  if __name__ == "__main__":
109
  import uvicorn