SiddhJagani commited on
Commit
0e7b58c
·
verified ·
1 Parent(s): c6d3a00

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +145 -0
app.py CHANGED
@@ -165,6 +165,151 @@ async def chat(request: Request, authorization: str = Header(None)):
165
  }
166
 
167
  return JSONResponse(data, headers={"Access-Control-Allow-Origin": "*"})
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
168
  # ---------------------------------------------------------------------
169
  # Minimal Gradio UI (to make HF Space start)
170
  # ---------------------------------------------------------------------
 
165
  }
166
 
167
  return JSONResponse(data, headers={"Access-Control-Allow-Origin": "*"})
168
+
169
+
170
+
171
+
172
+
173
+
174
+
175
+ # ---------------------------------------------------------------------
176
+ # /v2/chat/completions → Puter.js → OpenAI compatible
177
+ # ---------------------------------------------------------------------
178
+ @api.post("/v2/chat/completions")
179
+ async def puter_chat(request: Request, authorization: str = Header(None)):
180
+ check_key(authorization)
181
+
182
+ payload = await request.json()
183
+ model = payload.get("model")
184
+ messages = payload.get("messages", [])
185
+ temperature = payload.get("temperature", 1.0)
186
+ max_tokens = payload.get("max_tokens")
187
+ stream = payload.get("stream", False)
188
+
189
+ # Convert OpenAI-style messages → single string prompt for Puter.js
190
+ prompt = "\n".join([f"{m['role']}: {m['content']}" for m in messages])
191
+
192
+ # Node helper input
193
+ node_payload = json.dumps({
194
+ "prompt": prompt,
195
+ "model": model,
196
+ "temperature": temperature,
197
+ "max_tokens": max_tokens,
198
+ "stream": False # streaming handled later
199
+ })
200
+
201
+ # ------------------------------------------------------------------
202
+ # Non-streaming
203
+ # ------------------------------------------------------------------
204
+ if not stream:
205
+ proc = subprocess.Popen(
206
+ ["node", "puter_helper.js"],
207
+ stdin=subprocess.PIPE,
208
+ stdout=subprocess.PIPE,
209
+ stderr=subprocess.PIPE,
210
+ text=True
211
+ )
212
+
213
+ stdout, stderr = proc.communicate(node_payload)
214
+
215
+ if stderr:
216
+ raise HTTPException(status_code=500, detail=f"Node error: {stderr}")
217
+
218
+ try:
219
+ node_out = json.loads(stdout)
220
+ except:
221
+ raise HTTPException(status_code=500, detail=f"Bad Node output: {stdout}")
222
+
223
+ if not node_out.get("ok"):
224
+ raise HTTPException(status_code=502, detail=node_out.get("error"))
225
+
226
+ final_text = node_out["result"]
227
+
228
+ return {
229
+ "id": "chatcmpl-puter",
230
+ "object": "chat.completion",
231
+ "model": model,
232
+ "choices": [
233
+ {
234
+ "index": 0,
235
+ "message": {
236
+ "role": "assistant",
237
+ "content": final_text
238
+ },
239
+ "finish_reason": "stop"
240
+ }
241
+ ]
242
+ }
243
+
244
+ # ------------------------------------------------------------------
245
+ # Streaming path: /v2/chat/completions?stream=true
246
+ # ------------------------------------------------------------------
247
+ async def stream_generator():
248
+ # Because Puter.js Node helper is not streaming yet,
249
+ # we emulate SSE streaming by splitting text gradually.
250
+ proc = subprocess.Popen(
251
+ ["node", "puter_helper.js"],
252
+ stdin=subprocess.PIPE,
253
+ stdout=subprocess.PIPE,
254
+ stderr=subprocess.PIPE,
255
+ text=True
256
+ )
257
+
258
+ stdout, stderr = proc.communicate(node_payload)
259
+
260
+ if stderr:
261
+ yield f"data: {{\"error\": \"{stderr}\"}}\n\n"
262
+ yield "data: [DONE]\n\n"
263
+ return
264
+
265
+ try:
266
+ node_out = json.loads(stdout)
267
+ except:
268
+ yield f"data: {{\"error\": \"Bad Node output\"}}\n\n"
269
+ yield "data: [DONE]\n\n"
270
+ return
271
+
272
+ if not node_out.get("ok"):
273
+ yield f"data: {{\"error\": \"{node_out.get('error')}\"}}\n\n"
274
+ yield "data: [DONE]\n\n"
275
+ return
276
+
277
+ full_text = node_out["result"]
278
+
279
+ # Send word-by-word as streaming chunks
280
+ for word in full_text.split():
281
+ chunk = {
282
+ "id": "chatcmpl-puter-stream",
283
+ "object": "chat.completion.chunk",
284
+ "model": model,
285
+ "choices": [
286
+ {
287
+ "index": 0,
288
+ "delta": {"content": word + " "},
289
+ "finish_reason": None,
290
+ }
291
+ ]
292
+ }
293
+ yield f"data: {json.dumps(chunk)}\n\n"
294
+ await asyncio.sleep(0.02)
295
+
296
+ yield "data: [DONE]\n\n"
297
+
298
+ return StreamingResponse(
299
+ stream_generator(),
300
+ media_type="text/event-stream",
301
+ headers={"Access-Control-Allow-Origin": "*"},
302
+ )
303
+
304
+
305
+
306
+
307
+
308
+
309
+
310
+
311
+
312
+
313
  # ---------------------------------------------------------------------
314
  # Minimal Gradio UI (to make HF Space start)
315
  # ---------------------------------------------------------------------