SiddhJagani commited on
Commit
02861c2
·
verified ·
1 Parent(s): 4c93d32

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +41 -12
app.py CHANGED
@@ -153,11 +153,13 @@ async def v1_chat(request: Request, authorization: str = Header(None)):
153
  async def v2_chat_completions(request: Request, authorization: str = Header(None)):
154
  """
155
  v2 – clean OpenAI-compatible streaming.
156
- * Only delta.content
157
- * No usage chunk
158
- * Immediate empty delta for Continue.dev
159
  """
 
160
  check_key(authorization)
 
161
  if not BYTEZ_AUTH:
162
  raise HTTPException(status_code=500, detail="Server BYTEZ_API_KEY not configured")
163
 
@@ -168,11 +170,13 @@ async def v2_chat_completions(request: Request, authorization: str = Header(None
168
  raise HTTPException(status_code=400, detail=f"Invalid JSON: {e}")
169
 
170
  stream = payload.get("stream", False)
 
171
  upstream_headers = {
172
  "Authorization": BYTEZ_AUTH,
173
  "Content-Type": "application/json",
174
  }
175
 
 
176
  def make_openai_delta(content: str):
177
  return {
178
  "id": f"chatcmpl-v2-{int(time.time())}",
@@ -189,23 +193,41 @@ async def v2_chat_completions(request: Request, authorization: str = Header(None
189
  }
190
 
191
  async def clean_stream():
192
- # SEND EMPTY DELTA FIRSTwakes up Continue.dev
193
- yield f"data: {json.dumps(make_openai_delta(''))}\n\n"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
194
 
195
  async with httpx.AsyncClient(timeout=180) as client:
196
  try:
197
  async with client.stream(
198
  "POST", BYTEZ_CHAT_URL, headers=upstream_headers, json=payload
199
  ) as upstream:
 
200
  async for line in upstream.aiter_lines():
201
  line = line.strip()
202
  if not line:
203
  continue
 
204
  json_str = line[6:] if line.startswith("data: ") else line
205
 
206
- # Skip usage chunk
207
  if "usage" in json_str.lower():
208
  continue
 
209
  if json_str == "[DONE]":
210
  yield "data: [DONE]\n\n"
211
  return
@@ -230,7 +252,6 @@ async def v2_chat_completions(request: Request, authorization: str = Header(None
230
  if text:
231
  yield f"data: {json.dumps(make_openai_delta(text))}\n\n"
232
 
233
- # Fallback [DONE]
234
  yield "data: [DONE]\n\n"
235
 
236
  except Exception as e:
@@ -238,7 +259,7 @@ async def v2_chat_completions(request: Request, authorization: str = Header(None
238
  yield f"data: {json.dumps(error_chunk)}\n\n"
239
  yield "data: [DONE]\n\n"
240
 
241
- # --- Non-streaming ---
242
  if not stream:
243
  async with httpx.AsyncClient(timeout=120) as c:
244
  r = await c.post(BYTEZ_CHAT_URL, headers=upstream_headers, json=payload)
@@ -246,15 +267,23 @@ async def v2_chat_completions(request: Request, authorization: str = Header(None
246
  data = r.json()
247
 
248
  if "choices" not in data:
249
- content = data.get("output") or data.get("response") or data.get("message") or str(data)
 
 
 
 
 
250
  data = {
251
  "id": "chatcmpl-v2",
252
  "object": "chat.completion",
253
- "choices": [{"index": 0, "message": {"role": "assistant", "content": content}}],
 
 
254
  }
 
255
  return JSONResponse(data, headers={"Access-Control-Allow-Origin": "*"})
256
 
257
- # --- Streaming ---
258
  return StreamingResponse(
259
  clean_stream(),
260
  media_type="text/event-stream",
@@ -263,7 +292,7 @@ async def v2_chat_completions(request: Request, authorization: str = Header(None
263
  "Access-Control-Allow-Headers": "*",
264
  "Cache-Control": "no-cache",
265
  "Connection": "keep-alive",
266
- "X-Accel-Buffering": "no", # CRITICAL for HF Spaces
267
  },
268
  )
269
 
 
153
  async def v2_chat_completions(request: Request, authorization: str = Header(None)):
154
  """
155
  v2 – clean OpenAI-compatible streaming.
156
+ * First chunk includes role=assistant (required by Continue.dev)
157
+ * Later chunks send only delta.content
158
+ * No usage events
159
  """
160
+
161
  check_key(authorization)
162
+
163
  if not BYTEZ_AUTH:
164
  raise HTTPException(status_code=500, detail="Server BYTEZ_API_KEY not configured")
165
 
 
170
  raise HTTPException(status_code=400, detail=f"Invalid JSON: {e}")
171
 
172
  stream = payload.get("stream", False)
173
+
174
  upstream_headers = {
175
  "Authorization": BYTEZ_AUTH,
176
  "Content-Type": "application/json",
177
  }
178
 
179
+ # Normal content chunk (NO ROLE)
180
  def make_openai_delta(content: str):
181
  return {
182
  "id": f"chatcmpl-v2-{int(time.time())}",
 
193
  }
194
 
195
  async def clean_stream():
196
+ # FIRST CHUNK MUST SET THE ROLE REQUIRED by Continue.dev
197
+ first_chunk = {
198
+ "id": f"chatcmpl-v2-{int(time.time())}",
199
+ "object": "chat.completion.chunk",
200
+ "created": int(time.time()),
201
+ "model": payload.get("model", "unknown"),
202
+ "choices": [
203
+ {
204
+ "index": 0,
205
+ "delta": {"role": "assistant", "content": ""},
206
+ "finish_reason": None,
207
+ }
208
+ ],
209
+ }
210
+
211
+ # Send first role-setting chunk
212
+ yield f"data: {json.dumps(first_chunk)}\n\n"
213
 
214
  async with httpx.AsyncClient(timeout=180) as client:
215
  try:
216
  async with client.stream(
217
  "POST", BYTEZ_CHAT_URL, headers=upstream_headers, json=payload
218
  ) as upstream:
219
+
220
  async for line in upstream.aiter_lines():
221
  line = line.strip()
222
  if not line:
223
  continue
224
+
225
  json_str = line[6:] if line.startswith("data: ") else line
226
 
227
+ # Skip usage events
228
  if "usage" in json_str.lower():
229
  continue
230
+
231
  if json_str == "[DONE]":
232
  yield "data: [DONE]\n\n"
233
  return
 
252
  if text:
253
  yield f"data: {json.dumps(make_openai_delta(text))}\n\n"
254
 
 
255
  yield "data: [DONE]\n\n"
256
 
257
  except Exception as e:
 
259
  yield f"data: {json.dumps(error_chunk)}\n\n"
260
  yield "data: [DONE]\n\n"
261
 
262
+ # Non-streaming mode
263
  if not stream:
264
  async with httpx.AsyncClient(timeout=120) as c:
265
  r = await c.post(BYTEZ_CHAT_URL, headers=upstream_headers, json=payload)
 
267
  data = r.json()
268
 
269
  if "choices" not in data:
270
+ content = (
271
+ data.get("output")
272
+ or data.get("response")
273
+ or data.get("message")
274
+ or str(data)
275
+ )
276
  data = {
277
  "id": "chatcmpl-v2",
278
  "object": "chat.completion",
279
+ "choices": [
280
+ {"index": 0, "message": {"role": "assistant", "content": content}}
281
+ ],
282
  }
283
+
284
  return JSONResponse(data, headers={"Access-Control-Allow-Origin": "*"})
285
 
286
+ # Streaming mode
287
  return StreamingResponse(
288
  clean_stream(),
289
  media_type="text/event-stream",
 
292
  "Access-Control-Allow-Headers": "*",
293
  "Cache-Control": "no-cache",
294
  "Connection": "keep-alive",
295
+ "X-Accel-Buffering": "no",
296
  },
297
  )
298