SiddhJagani commited on
Commit
3ae3c70
·
verified ·
1 Parent(s): 96b3c51

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +92 -25
app.py CHANGED
@@ -12,17 +12,16 @@ from fastapi.responses import JSONResponse, StreamingResponse
12
  # ---------------------------------------------------------------------
13
  BYTEZ_CHAT_URL = "https://api.bytez.com/models/v2/openai/v1/chat/completions"
14
  BYTEZ_MODELS_URL = "https://api.bytez.com/models/v2/list/models"
15
- BYTEZ_AUTH = os.getenv("BYTEZ_API_KEY") # for /v1
16
- BYTEZ_AUTH_2 = os.getenv("BYTEZ_API_2") # for /v2 (optional, can be same)
17
- LOCAL_API_KEY = os.getenv("LOCAL_API_KEY") # optional local guard
18
 
19
  # ---------------------------------------------------------------------
20
  # FastAPI app
21
  # ---------------------------------------------------------------------
22
- api = FastAPI(title="Bytez -> OpenAI Proxy (v1 + v2)")
23
 
24
  def check_key(auth: str | None):
25
- """Validate Bearer token."""
26
  if not auth or not auth.startswith("Bearer "):
27
  raise HTTPException(status_code=401, detail="Missing or invalid API key")
28
  user_key = auth.split("Bearer ")[1].strip()
@@ -37,8 +36,9 @@ def root():
37
  return {"status": "ok", "message": "Bytez proxy (v1+v2) running"}
38
 
39
  # ---------------------------------------------------------------------
40
- # /v1/models
41
  # ---------------------------------------------------------------------
 
42
  @api.get("/v1/models")
43
  async def v1_models(authorization: str = Header(None)):
44
  check_key(authorization)
@@ -53,6 +53,7 @@ async def v1_models(authorization: str = Header(None)):
53
  except json.JSONDecodeError:
54
  raise HTTPException(status_code=502, detail="Upstream returned invalid JSON")
55
 
 
56
  models_list = [
57
  {"id": m.get("id") or m.get("name"), "object": "model"}
58
  for m in (data if isinstance(data, list) else data.get("data", []))
@@ -62,11 +63,10 @@ async def v1_models(authorization: str = Header(None)):
62
  headers={"Access-Control-Allow-Origin": "*"}
63
  )
64
 
65
- # ---------------------------------------------------------------------
66
- # /v1/chat/completions (your original – untouched)
67
- # ---------------------------------------------------------------------
68
  @api.post("/v1/chat/completions")
69
  async def v1_chat(request: Request, authorization: str = Header(None)):
 
70
  check_key(authorization)
71
  if not BYTEZ_AUTH:
72
  raise HTTPException(status_code=500, detail="Server BYTEZ_API_KEY not configured")
@@ -75,6 +75,7 @@ async def v1_chat(request: Request, authorization: str = Header(None)):
75
  stream = payload.get("stream", False)
76
  headers = {"Authorization": BYTEZ_AUTH, "Content-Type": "application/json"}
77
 
 
78
  async def v1_event_stream():
79
  async with httpx.AsyncClient(timeout=120) as client:
80
  async with client.stream("POST", BYTEZ_CHAT_URL, headers=headers, json=payload) as upstream:
@@ -92,6 +93,7 @@ async def v1_chat(request: Request, authorization: str = Header(None)):
92
  yield "data: [DONE]\n\n"
93
  break
94
 
 
95
  content = ""
96
  if "token" in chunk:
97
  content = chunk["token"]
@@ -119,6 +121,7 @@ async def v1_chat(request: Request, authorization: str = Header(None)):
119
  yield f"data: {json.dumps(openai_chunk)}\n\n"
120
  yield "data: [DONE]\n\n"
121
 
 
122
  if not stream:
123
  async with httpx.AsyncClient(timeout=120) as c:
124
  r = await c.post(BYTEZ_CHAT_URL, headers=headers, json=payload)
@@ -126,6 +129,7 @@ async def v1_chat(request: Request, authorization: str = Header(None)):
126
  data = r.json()
127
  except json.JSONDecodeError:
128
  raise HTTPException(status_code=502, detail="Upstream returned invalid JSON")
 
129
  if "choices" not in data:
130
  content = data.get("output") or data.get("response") or data.get("message") or str(data)
131
  data = {
@@ -135,45 +139,80 @@ async def v1_chat(request: Request, authorization: str = Header(None)):
135
  }
136
  return JSONResponse(data, headers={"Access-Control-Allow-Origin": "*"})
137
 
138
- return StreamingResponse(v1_event_stream(), media_type="text/event-stream", headers={"Access-Control-Allow-Origin": "*"})
 
 
 
 
139
 
140
  # ---------------------------------------------------------------------
141
- # /v2/chat/completions – CLEAN STREAMING (no usage chunk)
142
  # ---------------------------------------------------------------------
 
143
  @api.post("/v2/chat/completions")
144
  async def v2_chat_completions(request: Request, authorization: str = Header(None)):
 
 
 
 
 
 
145
  check_key(authorization)
146
- auth_to_use = BYTEZ_AUTH_2 or BYTEZ_AUTH
147
- if not auth_to_use:
148
- raise HTTPException(status_code=500, detail="BYTEZ_API_KEY or BYTEZ_API_2 not configured")
149
 
150
  payload = await request.json()
151
  stream = payload.get("stream", False)
152
- upstream_headers = {"Authorization": auth_to_use, "Content-Type": "application/json"}
153
 
 
 
 
 
 
 
 
 
154
  def make_openai_delta(content: str):
155
  return {
156
  "id": f"chatcmpl-v2-{int(time.time())}",
157
  "object": "chat.completion.chunk",
158
  "created": int(time.time()),
159
  "model": payload.get("model", "unknown"),
160
- "choices": [{"index": 0, "delta": {"content": content}, "finish_reason": None}],
 
 
 
 
 
 
161
  }
162
 
 
 
 
163
  async def clean_stream():
164
  async with httpx.AsyncClient(timeout=180) as client:
165
- async with client.stream("POST", BYTEZ_CHAT_URL, headers=upstream_headers, json=payload) as upstream:
 
 
 
166
  async for line in upstream.aiter_lines():
167
  line = line.strip()
168
  if not line:
169
  continue
170
 
 
171
  json_str = line[6:] if line.startswith("data: ") else line
172
 
173
- # Skip final usage chunk
 
 
174
  if "usage" in json_str.lower():
175
  continue
176
 
 
 
 
177
  if json_str == "[DONE]":
178
  yield "data: [DONE]\n\n"
179
  return
@@ -183,8 +222,12 @@ async def v2_chat_completions(request: Request, authorization: str = Header(None
183
  except json.JSONDecodeError:
184
  continue
185
 
 
 
 
186
  text = ""
187
  if isinstance(chunk, dict):
 
188
  if "token" in chunk:
189
  text = chunk["token"]
190
  elif "choices" in chunk and chunk["choices"]:
@@ -192,14 +235,22 @@ async def v2_chat_completions(request: Request, authorization: str = Header(None
192
  text = delta.get("content", "")
193
  elif "text" in chunk:
194
  text = chunk["text"]
 
195
  else:
196
  text = str(chunk)
197
 
 
 
 
198
  if text:
199
  yield f"data: {json.dumps(make_openai_delta(text))}\n\n"
200
 
 
201
  yield "data: [DONE]\n\n"
202
 
 
 
 
203
  if not stream:
204
  async with httpx.AsyncClient(timeout=120) as c:
205
  r = await c.post(BYTEZ_CHAT_URL, headers=upstream_headers, json=payload)
@@ -207,32 +258,48 @@ async def v2_chat_completions(request: Request, authorization: str = Header(None
207
  data = r.json()
208
  except json.JSONDecodeError:
209
  raise HTTPException(status_code=502, detail="Upstream returned invalid JSON")
 
 
210
  if "choices" not in data:
211
- content = data.get("output") or data.get("response") or data.get("message") or str(data)
 
 
 
 
 
212
  data = {
213
  "id": "chatcmpl-v2",
214
  "object": "chat.completion",
215
- "choices": [{"index": 0, "message": {"role": "assistant", "content": content}}],
 
 
216
  }
217
  return JSONResponse(data, headers={"Access-Control-Allow-Origin": "*"})
218
 
219
- return StreamingResponse(clean_stream(), media_type="text/event-stream", headers={"Access-Control-Allow-Origin": "*"})
 
 
 
 
 
 
 
220
 
221
  # ---------------------------------------------------------------------
222
- # Gradio UI (required for HF Space)
223
  # ---------------------------------------------------------------------
224
  with gr.Blocks() as ui:
225
  gr.Markdown(
226
- "### Bytez -> OpenAI Proxy\n"
227
  "- `/v1/models` \n"
228
- "- `/v1/chat/completions` (original) \n"
229
  "- **`/v2/chat/completions`** – clean streaming, no usage chunk"
230
  )
231
 
232
  demo = gr.mount_gradio_app(api, ui, path="/")
233
 
234
  # ---------------------------------------------------------------------
235
- # Local dev
236
  # ---------------------------------------------------------------------
237
  if __name__ == "__main__":
238
  uvicorn.run(demo, host="0.0.0.0", port=7860)
 
12
  # ---------------------------------------------------------------------
13
  BYTEZ_CHAT_URL = "https://api.bytez.com/models/v2/openai/v1/chat/completions"
14
  BYTEZ_MODELS_URL = "https://api.bytez.com/models/v2/list/models"
15
+ BYTEZ_AUTH = os.getenv("BYTEZ_API_KEY") # your Bytez key
16
+ LOCAL_API_KEY = os.getenv("LOCAL_API_KEY") # optional local guard
 
17
 
18
  # ---------------------------------------------------------------------
19
  # FastAPI app
20
  # ---------------------------------------------------------------------
21
+ api = FastAPI(title="Bytez OpenAI Proxy (v1 + v2)")
22
 
23
  def check_key(auth: str | None):
24
+ """Validate the Bearer token (optional local key)."""
25
  if not auth or not auth.startswith("Bearer "):
26
  raise HTTPException(status_code=401, detail="Missing or invalid API key")
27
  user_key = auth.split("Bearer ")[1].strip()
 
36
  return {"status": "ok", "message": "Bytez proxy (v1+v2) running"}
37
 
38
  # ---------------------------------------------------------------------
39
+ # -------------------------- /v1 ------------------------------------
40
  # ---------------------------------------------------------------------
41
+
42
  @api.get("/v1/models")
43
  async def v1_models(authorization: str = Header(None)):
44
  check_key(authorization)
 
53
  except json.JSONDecodeError:
54
  raise HTTPException(status_code=502, detail="Upstream returned invalid JSON")
55
 
56
+ # Transform Bytez → OpenAI list
57
  models_list = [
58
  {"id": m.get("id") or m.get("name"), "object": "model"}
59
  for m in (data if isinstance(data, list) else data.get("data", []))
 
63
  headers={"Access-Control-Allow-Origin": "*"}
64
  )
65
 
66
+
 
 
67
  @api.post("/v1/chat/completions")
68
  async def v1_chat(request: Request, authorization: str = Header(None)):
69
+ """Exactly the same implementation you already had – untouched."""
70
  check_key(authorization)
71
  if not BYTEZ_AUTH:
72
  raise HTTPException(status_code=500, detail="Server BYTEZ_API_KEY not configured")
 
75
  stream = payload.get("stream", False)
76
  headers = {"Authorization": BYTEZ_AUTH, "Content-Type": "application/json"}
77
 
78
+ # ---------- streaming helper ----------
79
  async def v1_event_stream():
80
  async with httpx.AsyncClient(timeout=120) as client:
81
  async with client.stream("POST", BYTEZ_CHAT_URL, headers=headers, json=payload) as upstream:
 
93
  yield "data: [DONE]\n\n"
94
  break
95
 
96
+ # ----- adapt Bytez chunk to OpenAI -----
97
  content = ""
98
  if "token" in chunk:
99
  content = chunk["token"]
 
121
  yield f"data: {json.dumps(openai_chunk)}\n\n"
122
  yield "data: [DONE]\n\n"
123
 
124
+ # ---------- non-stream ----------
125
  if not stream:
126
  async with httpx.AsyncClient(timeout=120) as c:
127
  r = await c.post(BYTEZ_CHAT_URL, headers=headers, json=payload)
 
129
  data = r.json()
130
  except json.JSONDecodeError:
131
  raise HTTPException(status_code=502, detail="Upstream returned invalid JSON")
132
+
133
  if "choices" not in data:
134
  content = data.get("output") or data.get("response") or data.get("message") or str(data)
135
  data = {
 
139
  }
140
  return JSONResponse(data, headers={"Access-Control-Allow-Origin": "*"})
141
 
142
+ return StreamingResponse(
143
+ v1_event_stream(),
144
+ media_type="text/event-stream",
145
+ headers={"Access-Control-Allow-Origin": "*"},
146
+ )
147
 
148
  # ---------------------------------------------------------------------
149
+ # -------------------------- /v2 ------------------------------------
150
  # ---------------------------------------------------------------------
151
+
152
  @api.post("/v2/chat/completions")
153
  async def v2_chat_completions(request: Request, authorization: str = Header(None)):
154
+ """
155
+ **v2** – clean OpenAI-compatible streaming.
156
+ * Only `delta.content` is sent.
157
+ * The final usage-statistics chunk is stripped.
158
+ * `[DONE]` is sent exactly once.
159
+ """
160
  check_key(authorization)
161
+ if not BYTEZ_AUTH:
162
+ raise HTTPException(status_code=500, detail="Server BYTEZ_API_KEY not configured")
 
163
 
164
  payload = await request.json()
165
  stream = payload.get("stream", False)
 
166
 
167
+ upstream_headers = {
168
+ "Authorization": BYTEZ_AUTH,
169
+ "Content-Type": "application/json",
170
+ }
171
+
172
+ # -----------------------------------------------------------------
173
+ # Helper: turn any Bytez chunk into a **minimal** OpenAI delta chunk
174
+ # -----------------------------------------------------------------
175
  def make_openai_delta(content: str):
176
  return {
177
  "id": f"chatcmpl-v2-{int(time.time())}",
178
  "object": "chat.completion.chunk",
179
  "created": int(time.time()),
180
  "model": payload.get("model", "unknown"),
181
+ "choices": [
182
+ {
183
+ "index": 0,
184
+ "delta": {"content": content},
185
+ "finish_reason": None,
186
+ }
187
+ ],
188
  }
189
 
190
+ # -----------------------------------------------------------------
191
+ # Streaming generator – **filters out the final usage object**
192
+ # -----------------------------------------------------------------
193
  async def clean_stream():
194
  async with httpx.AsyncClient(timeout=180) as client:
195
+ async with client.stream(
196
+ "POST", BYTEZ_CHAT_URL, headers=upstream_headers, json=payload
197
+ ) as upstream:
198
+
199
  async for line in upstream.aiter_lines():
200
  line = line.strip()
201
  if not line:
202
  continue
203
 
204
+ # Bytez may send "data: {...}" or raw JSON
205
  json_str = line[6:] if line.startswith("data: ") else line
206
 
207
+ # -----------------------------------------------------------------
208
+ # 1. Skip the final usage chunk (it contains `usage` field)
209
+ # -----------------------------------------------------------------
210
  if "usage" in json_str.lower():
211
  continue
212
 
213
+ # -----------------------------------------------------------------
214
+ # 2. [DONE] signal
215
+ # -----------------------------------------------------------------
216
  if json_str == "[DONE]":
217
  yield "data: [DONE]\n\n"
218
  return
 
222
  except json.JSONDecodeError:
223
  continue
224
 
225
+ # -----------------------------------------------------------------
226
+ # 3. Extract the text token
227
+ # -----------------------------------------------------------------
228
  text = ""
229
  if isinstance(chunk, dict):
230
+ # most common patterns
231
  if "token" in chunk:
232
  text = chunk["token"]
233
  elif "choices" in chunk and chunk["choices"]:
 
235
  text = delta.get("content", "")
236
  elif "text" in chunk:
237
  text = chunk["text"]
238
+ # fallback – stringify whole chunk (rare)
239
  else:
240
  text = str(chunk)
241
 
242
+ # -----------------------------------------------------------------
243
+ # 4. Yield clean OpenAI delta
244
+ # -----------------------------------------------------------------
245
  if text:
246
  yield f"data: {json.dumps(make_openai_delta(text))}\n\n"
247
 
248
+ # If upstream never sent [DONE], send it ourselves
249
  yield "data: [DONE]\n\n"
250
 
251
+ # -----------------------------------------------------------------
252
+ # Non-streaming path (identical to v1, but we keep it for completeness)
253
+ # -----------------------------------------------------------------
254
  if not stream:
255
  async with httpx.AsyncClient(timeout=120) as c:
256
  r = await c.post(BYTEZ_CHAT_URL, headers=upstream_headers, json=payload)
 
258
  data = r.json()
259
  except json.JSONDecodeError:
260
  raise HTTPException(status_code=502, detail="Upstream returned invalid JSON")
261
+
262
+ # Normalise to OpenAI shape
263
  if "choices" not in data:
264
+ content = (
265
+ data.get("output")
266
+ or data.get("response")
267
+ or data.get("message")
268
+ or str(data)
269
+ )
270
  data = {
271
  "id": "chatcmpl-v2",
272
  "object": "chat.completion",
273
+ "choices": [
274
+ {"index": 0, "message": {"role": "assistant", "content": content}}
275
+ ],
276
  }
277
  return JSONResponse(data, headers={"Access-Control-Allow-Origin": "*"})
278
 
279
+ # -----------------------------------------------------------------
280
+ # Return clean SSE stream
281
+ # -----------------------------------------------------------------
282
+ return StreamingResponse(
283
+ clean_stream(),
284
+ media_type="text/event-stream",
285
+ headers={"Access-Control-Allow-Origin": "*"},
286
+ )
287
 
288
  # ---------------------------------------------------------------------
289
+ # Minimal Gradio UI (required for HF Space to start)
290
  # ---------------------------------------------------------------------
291
  with gr.Blocks() as ui:
292
  gr.Markdown(
293
+ "### Bytez OpenAI Proxy (v1 + **v2**)\n"
294
  "- `/v1/models` \n"
295
+ "- `/v1/chat/completions` (unchanged) \n"
296
  "- **`/v2/chat/completions`** – clean streaming, no usage chunk"
297
  )
298
 
299
  demo = gr.mount_gradio_app(api, ui, path="/")
300
 
301
  # ---------------------------------------------------------------------
302
+ # Local dev entrypoint
303
  # ---------------------------------------------------------------------
304
  if __name__ == "__main__":
305
  uvicorn.run(demo, host="0.0.0.0", port=7860)