localhost-llm commited on
Commit
b3d421a
·
verified ·
1 Parent(s): ece5395

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +429 -0
app.py ADDED
@@ -0,0 +1,429 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import json
3
+ import time
4
+ import httpx
5
+ import uvicorn
6
+ import gradio as gr
7
+ from fastapi import FastAPI, Request, Header, HTTPException
8
+ from fastapi.responses import JSONResponse, StreamingResponse
9
+
10
+ # ---------------------------------------------------------------------
11
+ # Configuration
12
+ # ---------------------------------------------------------------------
13
+ BYTEZ_CHAT_URL = "https://api.bytez.com/models/v2/openai/v1/chat/completions"
14
+ BYTEZ_MODELS_URL = "https://api.bytez.com/models/v2/list/models"
15
+ BYTEZ_IMAGE_URL = "https://api.bytez.com/models/v2/openai/dall-e-3"
16
+ BYTEZ_AUTH = os.getenv("BYTEZ_API_KEY") # your Bytez key
17
+ LOCAL_API_KEY = os.getenv("LOCAL_API_KEY") # optional local guard
18
+ BYTEZ_AUTH_2 = os.getenv("BYTEZ_API_KEY_2")
19
+ # ---------------------------------------------------------------------
20
+ # FastAPI app
21
+ # ---------------------------------------------------------------------
22
+ api = FastAPI(title="Bytez → OpenAI Proxy (v1 + v2)")
23
+
24
+ def check_key(auth: str | None):
25
+ """Validate the Bearer token (optional local key)."""
26
+ if not auth or not auth.startswith("Bearer "):
27
+ raise HTTPException(status_code=401, detail="Missing or invalid API key")
28
+ user_key = auth.split("Bearer ")[1].strip()
29
+ if LOCAL_API_KEY and user_key != LOCAL_API_KEY:
30
+ raise HTTPException(status_code=403, detail="Unauthorized API key")
31
+
32
+ # ---------------------------------------------------------------------
33
+ # Root / health
34
+ # ---------------------------------------------------------------------
35
+ @api.get("/")
36
+ def root():
37
+ return {"status": "ok", "message": "Bytez proxy (v1+v2) running"}
38
+
39
+ # ---------------------------------------------------------------------
40
+ # -------------------------- /v1 ------------------------------------
41
+ # ---------------------------------------------------------------------
42
+
43
+ @api.get("/v1/models")
44
+ async def v1_models(authorization: str = Header(None)):
45
+ check_key(authorization)
46
+ if not BYTEZ_AUTH:
47
+ raise HTTPException(status_code=500, detail="Server BYTEZ_API_KEY not configured")
48
+
49
+ async with httpx.AsyncClient(timeout=30) as c:
50
+ r = await c.get(BYTEZ_MODELS_URL, headers={"Authorization": BYTEZ_AUTH})
51
+
52
+ try:
53
+ data = r.json()
54
+ except json.JSONDecodeError:
55
+ raise HTTPException(status_code=502, detail="Upstream returned invalid JSON")
56
+
57
+ # Transform Bytez → OpenAI list
58
+ models_list = [
59
+ {"id": m.get("id") or m.get("name"), "object": "model"}
60
+ for m in (data if isinstance(data, list) else data.get("data", []))
61
+ ]
62
+ return JSONResponse(
63
+ {"object": "list", "data": models_list},
64
+ headers={"Access-Control-Allow-Origin": "*"}
65
+ )
66
+
67
+
68
+ @api.post("/v1/chat/completions")
69
+ async def v1_chat(request: Request, authorization: str = Header(None)):
70
+ """Exactly the same implementation you already had – untouched."""
71
+ check_key(authorization)
72
+ if not BYTEZ_AUTH:
73
+ raise HTTPException(status_code=500, detail="Server BYTEZ_API_KEY not configured")
74
+
75
+ payload = await request.json()
76
+ stream = payload.get("stream", False)
77
+ headers = {"Authorization": BYTEZ_AUTH, "Content-Type": "application/json"}
78
+
79
+ # ---------- streaming helper ----------
80
+ async def v1_event_stream():
81
+ async with httpx.AsyncClient(timeout=120) as client:
82
+ async with client.stream("POST", BYTEZ_CHAT_URL, headers=headers, json=payload) as upstream:
83
+ async for line in upstream.aiter_lines():
84
+ line = line.strip()
85
+ if not line:
86
+ continue
87
+ json_str = line[6:] if line.startswith("data: ") else line
88
+ try:
89
+ chunk = json.loads(json_str)
90
+ except json.JSONDecodeError:
91
+ continue
92
+
93
+ if json_str == "[DONE]":
94
+ yield "data: [DONE]\n\n"
95
+ break
96
+
97
+ # ----- adapt Bytez chunk to OpenAI -----
98
+ content = ""
99
+ if "token" in chunk:
100
+ content = chunk["token"]
101
+ elif "choices" in chunk and chunk["choices"]:
102
+ delta = chunk["choices"][0].get("delta", {})
103
+ content = delta.get("content", "")
104
+ elif "text" in chunk:
105
+ content = chunk["text"]
106
+ else:
107
+ content = str(chunk)
108
+
109
+ openai_chunk = {
110
+ "id": "chatcmpl-proxy-stream",
111
+ "object": "chat.completion.chunk",
112
+ "created": int(time.time()),
113
+ "model": payload.get("model", "unknown"),
114
+ "choices": [
115
+ {
116
+ "index": 0,
117
+ "delta": {"role": "assistant", "content": content},
118
+ "finish_reason": None,
119
+ }
120
+ ],
121
+ }
122
+ yield f"data: {json.dumps(openai_chunk)}\n\n"
123
+ yield "data: [DONE]\n\n"
124
+
125
+ # ---------- non-stream ----------
126
+ if not stream:
127
+ async with httpx.AsyncClient(timeout=120) as c:
128
+ r = await c.post(BYTEZ_CHAT_URL, headers=headers, json=payload)
129
+ try:
130
+ data = r.json()
131
+ except json.JSONDecodeError:
132
+ raise HTTPException(status_code=502, detail="Upstream returned invalid JSON")
133
+
134
+ if "choices" not in data:
135
+ content = data.get("output") or data.get("response") or data.get("message") or str(data)
136
+ data = {
137
+ "id": "chatcmpl-proxy",
138
+ "object": "chat.completion",
139
+ "choices": [{"index": 0, "message": {"role": "assistant", "content": content}}],
140
+ }
141
+ return JSONResponse(data, headers={"Access-Control-Allow-Origin": "*"})
142
+
143
+ return StreamingResponse(
144
+ v1_event_stream(),
145
+ media_type="text/event-stream",
146
+ headers={"Access-Control-Allow-Origin": "*"},
147
+ )
148
+
149
+ # ---------------------------------------------------------------------
150
+ # --------------------- /v1/images/generations (FIXED) ---------------
151
+ # ---------------------------------------------------------------------
152
+ @api.post("/v1/images/generations")
153
+ async def v1_images_generations(request: Request, authorization: str = Header(None)):
154
+ """
155
+ Fully OpenAI-compatible DALL·E-3 via Bytez
156
+ → Accepts `model` field (required by Continue.dev, Cursor, etc.)
157
+ → Ignores it safely (since Bytez uses URL path, not model name)
158
+ → Returns proper OpenAI format with url + b64_json
159
+ """
160
+ check_key(authorization)
161
+
162
+ if not BYTEZ_AUTH_2:
163
+ raise HTTPException(status_code=500, detail="Server BYTEZ_API_KEY_2 not configured")
164
+
165
+ try:
166
+ payload = await request.json()
167
+ except json.JSONDecodeError:
168
+ raise HTTPException(status_code=400, detail="Invalid JSON")
169
+
170
+ prompt = payload.get("prompt")
171
+ if not prompt or not prompt.strip():
172
+ raise HTTPException(status_code=400, detail="Field 'prompt' is required and cannot be empty")
173
+
174
+ # These fields are REQUIRED by UI tools even if we ignore some
175
+ model_name = payload.get("model", "dall-e-3") # just for show — we ignore it
176
+ n = payload.get("n", 1)
177
+ size = payload.get("size", "1024x1024")
178
+ quality = payload.get("quality", "standard")
179
+ style = payload.get("style") # vivid or natural
180
+ response_format = payload.get("response_format", "url") # url or b64_json
181
+
182
+ # Map OpenAI sizes → Bytez accepts the same strings
183
+ if size not in ["1024x1024", "1024x1792", "1792x1024"]:
184
+ size = "1024x1024" # fallback
185
+
186
+ bytez_payload = {
187
+ "text": prompt,
188
+ "num_outputs": n,
189
+ "size": size,
190
+ }
191
+ if quality in ["standard", "hd"]:
192
+ bytez_payload["quality"] = quality
193
+ if style in ["vivid", "natural"]:
194
+ bytez_payload["style"] = style
195
+
196
+ headers = {
197
+ "Authorization": BYTEZ_AUTH_2,
198
+ "Content-Type": "application/json",
199
+ }
200
+
201
+ async with httpx.AsyncClient(timeout=200) as client:
202
+ try:
203
+ resp = await client.post(
204
+ "https://api.bytez.com/models/v2/openai/dall-e-3",
205
+ json=bytez_payload,
206
+ headers=headers,
207
+ )
208
+ resp.raise_for_status()
209
+ except httpx.HTTPStatusError as e:
210
+ try:
211
+ error_detail = e.response.json()
212
+ except:
213
+ error_detail = e.response.text
214
+ raise HTTPException(status_code=e.response.status_code, detail=error_detail)
215
+ except Exception as e:
216
+ raise HTTPException(status_code=502, detail=f"Bytez unreachable: {str(e)}")
217
+
218
+ try:
219
+ bytez_data = resp.json()
220
+ except json.JSONDecodeError:
221
+ raise HTTPException(status_code=502, detail="Bytez returned invalid JSON")
222
+
223
+ # Handle different possible response shapes from Bytez
224
+ images = bytez_data.get("images") or bytez_data.get("data") or []
225
+ if isinstance(images, str):
226
+ images = [images]
227
+ if not images:
228
+ raise HTTPException(status_code=500, detail="No images returned from Bytez")
229
+
230
+ # Build proper OpenAI response
231
+ openai_images = []
232
+ for img_data in images:
233
+ if img_data.startswith("data:image"):
234
+ b64 = img_data.split("base64,")[-1]
235
+ url = img_data
236
+ else:
237
+ b64 = img_data
238
+ url = f"data:image/png;base64,{img_data}"
239
+
240
+ item = {}
241
+ if response_format == "b64_json" or response_format is None:
242
+ item["b64_json"] = b64
243
+ else:
244
+ item["url"] = url
245
+
246
+ # Optional: include revised_prompt if Bytez returns it
247
+ if "revised_prompt" in bytez_data:
248
+ item["revised_prompt"] = bytez_data["revised_prompt"]
249
+
250
+ openai_images.append(item)
251
+
252
+ final_response = {
253
+ "created": int(time.time()),
254
+ "data": openai_images
255
+ }
256
+
257
+ return JSONResponse(final_response, headers={"Access-Control-Allow-Origin": "*"})
258
+
259
+
260
+
261
+ # ---------------------------------------------------------------------
262
+ # -------------------------- /v2 ------------------------------------
263
+ # ---------------------------------------------------------------------
264
+
265
+ @api.post("/v2/chat/completions")
266
+ async def v2_chat_completions(request: Request, authorization: str = Header(None)):
267
+ """
268
+ v2 – clean OpenAI-compatible streaming.
269
+ * First chunk includes role=assistant (required by Continue.dev)
270
+ * Later chunks send only delta.content
271
+ * No usage events
272
+ """
273
+
274
+ check_key(authorization)
275
+
276
+ if not BYTEZ_AUTH_2:
277
+ raise HTTPException(status_code=500, detail="Server BYTEZ_API_2 not configured")
278
+
279
+ try:
280
+ body = await request.body()
281
+ payload = json.loads(body.decode("utf-8"))
282
+ except json.JSONDecodeError as e:
283
+ raise HTTPException(status_code=400, detail=f"Invalid JSON: {e}")
284
+
285
+ stream = payload.get("stream", False)
286
+
287
+ upstream_headers = {
288
+ "Authorization": BYTEZ_AUTH_2,
289
+ "Content-Type": "application/json",
290
+ }
291
+
292
+ # Normal content chunk (NO ROLE)
293
+ def make_openai_delta(content: str):
294
+ return {
295
+ "id": f"chatcmpl-v2-{int(time.time())}",
296
+ "object": "chat.completion.chunk",
297
+ "created": int(time.time()),
298
+ "model": payload.get("model", "unknown"),
299
+ "choices": [
300
+ {
301
+ "index": 0,
302
+ "delta": {"content": content},
303
+ "finish_reason": None,
304
+ }
305
+ ],
306
+ }
307
+
308
+ async def clean_stream():
309
+ # FIRST CHUNK MUST SET THE ROLE → REQUIRED by Continue.dev
310
+ first_chunk = {
311
+ "id": f"chatcmpl-v2-{int(time.time())}",
312
+ "object": "chat.completion.chunk",
313
+ "created": int(time.time()),
314
+ "model": payload.get("model", "unknown"),
315
+ "choices": [
316
+ {
317
+ "index": 0,
318
+ "delta": {"role": "assistant", "content": ""},
319
+ "finish_reason": None,
320
+ }
321
+ ],
322
+ }
323
+
324
+ # Send first role-setting chunk
325
+ yield f"data: {json.dumps(first_chunk)}\n\n"
326
+
327
+ async with httpx.AsyncClient(timeout=180) as client:
328
+ try:
329
+ async with client.stream(
330
+ "POST", BYTEZ_CHAT_URL, headers=upstream_headers, json=payload
331
+ ) as upstream:
332
+
333
+ async for line in upstream.aiter_lines():
334
+ line = line.strip()
335
+ if not line:
336
+ continue
337
+
338
+ json_str = line[6:] if line.startswith("data: ") else line
339
+
340
+ # Skip usage events
341
+ if "usage" in json_str.lower():
342
+ continue
343
+
344
+ if json_str == "[DONE]":
345
+ yield "data: [DONE]\n\n"
346
+ return
347
+
348
+ try:
349
+ chunk = json.loads(json_str)
350
+ except json.JSONDecodeError:
351
+ continue
352
+
353
+ text = ""
354
+ if isinstance(chunk, dict):
355
+ if "token" in chunk:
356
+ text = chunk["token"]
357
+ elif "choices" in chunk and chunk["choices"]:
358
+ delta = chunk["choices"][0].get("delta", {})
359
+ text = delta.get("content", "")
360
+ elif "text" in chunk:
361
+ text = chunk["text"]
362
+ else:
363
+ text = str(chunk)
364
+
365
+ if text:
366
+ yield f"data: {json.dumps(make_openai_delta(text))}\n\n"
367
+
368
+ yield "data: [DONE]\n\n"
369
+
370
+ except Exception as e:
371
+ error_chunk = make_openai_delta(f"Error: {str(e)}")
372
+ yield f"data: {json.dumps(error_chunk)}\n\n"
373
+ yield "data: [DONE]\n\n"
374
+
375
+ # Non-streaming mode
376
+ if not stream:
377
+ async with httpx.AsyncClient(timeout=120) as c:
378
+ r = await c.post(BYTEZ_CHAT_URL, headers=upstream_headers, json=payload)
379
+ r.raise_for_status()
380
+ data = r.json()
381
+
382
+ if "choices" not in data:
383
+ content = (
384
+ data.get("output")
385
+ or data.get("response")
386
+ or data.get("message")
387
+ or str(data)
388
+ )
389
+ data = {
390
+ "id": "chatcmpl-v2",
391
+ "object": "chat.completion",
392
+ "choices": [
393
+ {"index": 0, "message": {"role": "assistant", "content": content}}
394
+ ],
395
+ }
396
+
397
+ return JSONResponse(data, headers={"Access-Control-Allow-Origin": "*"})
398
+
399
+ # Streaming mode
400
+ return StreamingResponse(
401
+ clean_stream(),
402
+ media_type="text/event-stream",
403
+ headers={
404
+ "Access-Control-Allow-Origin": "*",
405
+ "Access-Control-Allow-Headers": "*",
406
+ "Cache-Control": "no-cache",
407
+ "Connection": "keep-alive",
408
+ "X-Accel-Buffering": "no",
409
+ },
410
+ )
411
+
412
+ # ---------------------------------------------------------------------
413
+ # Minimal Gradio UI (required for HF Space to start)
414
+ # ---------------------------------------------------------------------
415
+ with gr.Blocks() as ui:
416
+ gr.Markdown(
417
+ "### Bytez → OpenAI Proxy (v1 + **v2**)\n"
418
+ "- `/v1/models` \n"
419
+ "- `/v1/chat/completions` (unchanged) \n"
420
+ "- **`/v2/chat/completions`** – clean streaming, no usage chunk"
421
+ )
422
+
423
+ demo = gr.mount_gradio_app(api, ui, path="/")
424
+
425
+ # ---------------------------------------------------------------------
426
+ # Local dev entrypoint
427
+ # ---------------------------------------------------------------------
428
+ if __name__ == "__main__":
429
+ uvicorn.run(demo, host="0.0.0.0", port=7860)