Valtry commited on
Commit
926595a
Β·
verified Β·
1 Parent(s): c7f5553

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +24 -47
app.py CHANGED
@@ -4,10 +4,8 @@ from pydantic import BaseModel
4
  from llama_cpp import Llama
5
  from huggingface_hub import hf_hub_download
6
  from supabase import create_client
7
- import os, uvicorn, threading, time
8
  from contextlib import asynccontextmanager
9
- from queue import Queue
10
- from threading import Thread
11
 
12
  # =========================
13
  # CONFIG
@@ -119,17 +117,28 @@ def save_message(role, content, request_id):
119
  }).execute()
120
 
121
  # =========================
122
- # πŸ”₯ STREAMING GENERATION
123
  # =========================
 
 
 
 
 
 
 
 
124
 
 
 
 
125
  def generate_and_stream(req: ChatRequest):
126
 
127
  prompt = build_prompt(req.message)
128
 
129
- # save user
130
  save_message("user", req.message, req.request_id)
131
 
132
- # create assistant row
133
  res = supabase.table("messages").insert({
134
  "role": "assistant",
135
  "content": "",
@@ -140,36 +149,8 @@ def generate_and_stream(req: ChatRequest):
140
  msg_id = res.data[0]["id"]
141
 
142
  full_text = ""
143
- q = Queue()
144
-
145
- # =========================
146
- # πŸ”₯ DB WRITER THREAD
147
- # =========================
148
- def db_writer():
149
- last_sent = ""
150
-
151
- while True:
152
- chunk = q.get()
153
-
154
- if chunk is None:
155
- break
156
 
157
- last_sent += chunk
158
-
159
- try:
160
- supabase.table("messages").update({
161
- "content": last_sent
162
- }).eq("id", msg_id).execute()
163
- except:
164
- pass
165
-
166
- time.sleep(0.05) # smooth rate
167
-
168
- Thread(target=db_writer, daemon=True).start()
169
-
170
- # =========================
171
- # πŸ”₯ MODEL STREAM
172
- # =========================
173
  stream = model(
174
  prompt,
175
  max_tokens=2048,
@@ -182,29 +163,25 @@ def generate_and_stream(req: ChatRequest):
182
 
183
  for chunk in stream:
184
  token = chunk["choices"][0]["text"]
185
-
186
  full_text += token
187
 
188
- # πŸ”₯ send to queue instead of direct DB write
189
- q.put(token)
190
-
191
- # stop writer
192
- q.put(None)
193
 
 
194
  final = clean_output(full_text)
 
195
 
196
- supabase.table("messages").update({
197
- "content": final,
198
- "status": "done"
199
- }).eq("id", msg_id).execute()
200
-
201
  # =========================
202
  # CHAT
203
  # =========================
204
  @app.post("/v1/chat")
205
  async def chat(req: ChatRequest):
206
 
207
- # πŸ”₯ run in background
208
  threading.Thread(target=generate_and_stream, args=(req,)).start()
209
 
210
  return {"status": "streaming_started"}
 
4
  from llama_cpp import Llama
5
  from huggingface_hub import hf_hub_download
6
  from supabase import create_client
7
+ import os, uvicorn, threading
8
  from contextlib import asynccontextmanager
 
 
9
 
10
  # =========================
11
  # CONFIG
 
117
  }).execute()
118
 
119
  # =========================
120
+ # SUPABASE UPDATE HELPER
121
  # =========================
122
+ def update_message(msg_id, content, status=None):
123
+ data = {"content": content}
124
+ if status:
125
+ data["status"] = status
126
+ try:
127
+ supabase.table("messages").update(data).eq("id", msg_id).execute()
128
+ except Exception as e:
129
+ print(f"Supabase update failed: {e}")
130
 
131
+ # =========================
132
+ # STREAMING GENERATION
133
+ # =========================
134
  def generate_and_stream(req: ChatRequest):
135
 
136
  prompt = build_prompt(req.message)
137
 
138
+ # save user message
139
  save_message("user", req.message, req.request_id)
140
 
141
+ # create empty assistant row
142
  res = supabase.table("messages").insert({
143
  "role": "assistant",
144
  "content": "",
 
149
  msg_id = res.data[0]["id"]
150
 
151
  full_text = ""
152
+ word_count = 0 # βœ… track completed words
 
 
 
 
 
 
 
 
 
 
 
 
153
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
154
  stream = model(
155
  prompt,
156
  max_tokens=2048,
 
163
 
164
  for chunk in stream:
165
  token = chunk["choices"][0]["text"]
 
166
  full_text += token
167
 
168
+ # βœ… only update Supabase when a full word is completed (space found)
169
+ # this reduces DB calls from ~60 per response to ~10
170
+ if " " in token or "\n" in token:
171
+ word_count += 1
172
+ update_message(msg_id, full_text.strip())
173
 
174
+ # βœ… final clean + mark done
175
  final = clean_output(full_text)
176
+ update_message(msg_id, final, status="done")
177
 
 
 
 
 
 
178
  # =========================
179
  # CHAT
180
  # =========================
181
  @app.post("/v1/chat")
182
  async def chat(req: ChatRequest):
183
 
184
+ # run generation in background β€” return instantly to ESP32
185
  threading.Thread(target=generate_and_stream, args=(req,)).start()
186
 
187
  return {"status": "streaming_started"}