Hiren122 commited on
Commit
0bc47ec
·
verified ·
1 Parent(s): 4b070a8

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +382 -1069
app.py CHANGED
@@ -2,1130 +2,443 @@ from flask import Flask, request, Response, jsonify
2
  import requests
3
  import json
4
  import uuid
 
5
  import os
6
- from datetime import datetime
7
 
8
  app = Flask(__name__)
9
 
10
- ONYX_API_URL = os.getenv("ONYX_API_URL", "https://cloud.onyx.app/api/chat/send-chat-message")
11
- ONYX_API_TOKEN = os.getenv("ONYX_SECRET", "")
 
12
 
13
- if not ONYX_API_TOKEN:
14
- print("WARNING: ONYX_API_TOKEN not set in HuggingFace Secrets!")
15
 
16
- def stream_onyx_response(onyx_response, format_type="openai"):
17
- try:
18
- for line in onyx_response.iter_lines():
19
- if line:
20
- line = line.decode('utf-8')
21
- if line.startswith('data: '):
22
- data = line[6:]
23
- if data == '[DONE]':
24
- if format_type == "openai":
25
- yield f"data: [DONE]\n\n"
26
- break
27
- try:
28
- onyx_data = json.loads(data)
29
- content = onyx_data.get("message", "")
30
- if format_type == "huggingface":
31
- hf_chunk = {"token": {"id": 0, "text": content, "logprob": 0.0, "special": False}, "generated_text": None, "details": None}
32
- yield f"data:{json.dumps(hf_chunk)}\n\n"
33
- else:
34
- openai_chunk = {"id": f"chatcmpl-{uuid.uuid4().hex[:8]}", "object": "chat.completion.chunk", "created": int(datetime.now().timestamp()), "model": onyx_data.get("model", "unknown"), "choices": [{"index": 0, "delta": {"content": content}, "finish_reason": None}]}
35
- yield f"data: {json.dumps(openai_chunk)}\n\n"
36
- except json.JSONDecodeError:
37
- continue
38
- if format_type == "huggingface":
39
- final_hf = {"token": {"id": 0, "text": "", "logprob": 0.0, "special": True}, "generated_text": "", "details": {"finish_reason": "stop", "generated_tokens": 0, "seed": None}}
40
- yield f"data:{json.dumps(final_hf)}\n\n"
41
- else:
42
- final_chunk = {"id": f"chatcmpl-{uuid.uuid4().hex[:8]}", "object": "chat.completion.chunk", "created": int(datetime.now().timestamp()), "model": "unknown", "choices": [{"index": 0, "delta": {}, "finish_reason": "stop"}]}
43
- yield f"data: {json.dumps(final_chunk)}\n\n"
44
- yield f"data: [DONE]\n\n"
45
- except Exception as e:
46
- error_chunk = {"error": {"message": str(e), "type": "server_error"}}
47
- yield f"data: {json.dumps(error_chunk)}\n\n"
48
-
49
- @app.route('/v1/chat/completions', methods=['POST'])
50
- def chat_completions():
51
- try:
52
- data = request.json
53
- messages = data.get('messages', [])
54
- model_provider = data.get('model_provider', 'openai')
55
- model_id = data.get('model', 'gpt-4')
56
- temperature = data.get('temperature', 1.0)
57
- stream_requested = data.get('stream', False)
58
- user_message = ""
59
- for msg in reversed(messages):
60
- if msg.get('role') == 'user':
61
- user_message = msg.get('content', '')
62
- break
63
- onyx_payload = {"message": user_message, "llm_override": {"model_provider": model_provider, "model_version": model_id, "temperature": temperature}, "allowed_tool_ids": [], "file_descriptors": [], "deep_research": False, "origin": "api", "parent_message_id": -1, "chat_session_id": str(uuid.uuid4()), "chat_session_info": {"persona_id": 0, "description": "OpenAI API Bridge", "project_id": 0}, "stream": True, "include_citations": True}
64
- headers = {"Authorization": f"Bearer {ONYX_API_TOKEN}", "Content-Type": "application/json"}
65
- onyx_response = requests.post(ONYX_API_URL, json=onyx_payload, headers=headers, stream=True)
66
- if onyx_response.status_code != 200:
67
- return jsonify({"error": {"message": f"Onyx API error: {onyx_response.status_code}", "type": "api_error"}}), onyx_response.status_code
68
- if stream_requested:
69
- return Response(stream_onyx_response(onyx_response, "openai"), mimetype='text/event-stream', headers={'Cache-Control': 'no-cache', 'X-Accel-Buffering': 'no'})
70
- else:
71
- full_content = ""
72
- for line in onyx_response.iter_lines():
73
- if line:
74
- line = line.decode('utf-8')
75
- if line.startswith('data: '):
76
- data_str = line[6:]
77
- if data_str != '[DONE]':
78
- try:
79
- onyx_data = json.loads(data_str)
80
- full_content += onyx_data.get("message", "")
81
- except json.JSONDecodeError:
82
- continue
83
- return jsonify({"id": f"chatcmpl-{uuid.uuid4().hex[:8]}", "object": "chat.completion", "created": int(datetime.now().timestamp()), "model": model_id, "choices": [{"index": 0, "message": {"role": "assistant", "content": full_content}, "finish_reason": "stop"}], "usage": {"prompt_tokens": 0, "completion_tokens": 0, "total_tokens": 0}})
84
- except Exception as e:
85
- return jsonify({"error": {"message": str(e), "type": "server_error"}}), 500
86
-
87
- @app.route('/generate', methods=['POST'])
88
- @app.route('/v1/completions', methods=['POST'])
89
- def hf_generate():
90
- try:
91
- data = request.json
92
- inputs = data.get('inputs', '')
93
- parameters = data.get('parameters', {})
94
- model_provider = parameters.get('model_provider', 'openai')
95
- model_id = parameters.get('model', 'gpt-4')
96
- temperature = parameters.get('temperature', 1.0)
97
- stream_requested = data.get('stream', False)
98
- onyx_payload = {"message": inputs, "llm_override": {"model_provider": model_provider, "model_version": model_id, "temperature": temperature}, "allowed_tool_ids": [], "file_descriptors": [], "deep_research": False, "origin": "api", "parent_message_id": -1, "chat_session_id": str(uuid.uuid4()), "chat_session_info": {"persona_id": 0, "description": "HuggingFace API Bridge", "project_id": 0}, "stream": True, "include_citations": True}
99
- headers = {"Authorization": f"Bearer {ONYX_API_TOKEN}", "Content-Type": "application/json"}
100
- onyx_response = requests.post(ONYX_API_URL, json=onyx_payload, headers=headers, stream=True)
101
- if onyx_response.status_code != 200:
102
- return jsonify({"error": f"Onyx API error: {onyx_response.status_code}"}), onyx_response.status_code
103
- if stream_requested:
104
- return Response(stream_onyx_response(onyx_response, "huggingface"), mimetype='text/event-stream', headers={'Cache-Control': 'no-cache', 'X-Accel-Buffering': 'no'})
105
- else:
106
- full_content = ""
107
- for line in onyx_response.iter_lines():
108
- if line:
109
- line = line.decode('utf-8')
110
- if line.startswith('data: '):
111
- data_str = line[6:]
112
- if data_str != '[DONE]':
113
- try:
114
- onyx_data = json.loads(data_str)
115
- full_content += onyx_data.get("message", "")
116
- except json.JSONDecodeError:
117
- continue
118
- return jsonify([{"generated_text": full_content}])
119
- except Exception as e:
120
- return jsonify({"error": str(e)}), 500
121
-
122
- @app.route('/v1/models', methods=['POST', 'GET'])
123
- @app.route('/models', methods=['POST', 'GET'])
124
- def list_models():
125
- return jsonify({"object": "list", "data": [{"id": "gpt-4", "object": "model", "created": int(datetime.now().timestamp()), "owned_by": "onyx"}, {"id": "claude-3-5-sonnet", "object": "model", "created": int(datetime.now().timestamp()), "owned_by": "onyx"}]})
126
 
127
- @app.route('/health', methods=['GET'])
128
- @app.route('/', methods=['GET'])
129
- def health():
130
- return jsonify({"status": "ok", "api_token_set": bool(ONYX_API_TOKEN), "endpoints": {"openai": "/v1/chat/completions", "huggingface": "/generate", "models": "/v1/models"}})
131
 
132
- if __name__ == '__main__':
133
- port = int(os.getenv("PORT", 7860))
134
- app.run(host="0.0.0.0", port=port) hf_chunk = {
135
- "token": {
136
- "id": 0,
137
- "text": content,
138
- "logprob": 0.0,
139
- "special": False
140
- },
141
- "generated_text": None,
142
- "details": None
143
- }
144
- yield f"data:{json.dumps(hf_chunk)}\n\n"
145
- else:
146
- openai_chunk = {
147
- "id": f"chatcmpl-{uuid.uuid4().hex[:8]}",
148
- "object": "chat.completion.chunk",
149
- "created": int(datetime.now().timestamp()),
150
- "model": onyx_data.get("model", "unknown"),
151
- "choices": [{
152
- "index": 0,
153
- "delta": {"content": content},
154
- "finish_reason": None
155
- }]
156
- }
157
- yield f"data: {json.dumps(openai_chunk)}\n\n"
158
- except json.JSONDecodeError:
159
- continue
160
-
161
- if format_type == "huggingface":
162
- final_hf = {
163
- "token": {
164
- "id": 0,
165
- "text": "",
166
- "logprob": 0.0,
167
- "special": True
168
- },
169
- "generated_text": "",
170
- "details": {
171
- "finish_reason": "stop",
172
- "generated_tokens": 0,
173
- "seed": None
174
- }
175
- }
176
- yield f"data:{json.dumps(final_hf)}\n\n"
177
- else:
178
- final_chunk = {
179
- "id": f"chatcmpl-{uuid.uuid4().hex[:8]}",
180
- "object": "chat.completion.chunk",
181
- "created": int(datetime.now().timestamp()),
182
- "model": "unknown",
183
- "choices": [{
184
- "index": 0,
185
- "delta": {},
186
- "finish_reason": "stop"
187
- }]
188
- }
189
- yield f"data: {json.dumps(final_chunk)}\n\n"
190
- yield f"data: [DONE]\n\n"
191
-
192
- except Exception as e:
193
- error_chunk = {"error": {"message": str(e), "type": "server_error"}}
194
- yield f"data: {json.dumps(error_chunk)}\n\n"
195
 
196
- @app.route('/v1/chat/completions', methods=['POST'])
197
- def chat_completions():
198
- """OpenAI-compatible endpoint"""
199
- try:
200
- data = request.json
201
- messages = data.get('messages', [])
202
- model_provider = data.get('model_provider', 'openai')
203
- model_id = data.get('model', 'gpt-4')
204
- temperature = data.get('temperature', 1.0)
205
- stream_requested = data.get('stream', False)
206
-
207
- user_message = ""
208
- for msg in reversed(messages):
209
- if msg.get('role') == 'user':
210
- user_message = msg.get('content', '')
211
- break
212
-
213
- onyx_payload = {
214
- "message": user_message,
215
- "llm_override": {
216
- "model_provider": model_provider,
217
- "model_version": model_id,
218
- "temperature": temperature
219
- },
220
- "allowed_tool_ids": [],
221
- "file_descriptors": [],
222
- "deep_research": False,
223
- "origin": "api",
224
- "parent_message_id": -1,
225
- "chat_session_id": str(uuid.uuid4()),
226
- "chat_session_info": {
227
- "persona_id": 0,
228
- "description": "OpenAI API Bridge",
229
- "project_id": 0
230
- },
231
- "stream": True,
232
- "include_citations": True
233
- }
234
-
235
- headers = {
236
- "Authorization": f"Bearer {ONYX_API_TOKEN}",
237
- "Content-Type": "application/json"
238
- }
239
-
240
- onyx_response = requests.post(ONYX_API_URL, json=onyx_payload, headers=headers, stream=True)
241
-
242
- if onyx_response.status_code != 200:
243
- return jsonify({
244
- "error": {
245
- "message": f"Onyx API error: {onyx_response.status_code}",
246
- "type": "api_error"
247
- }
248
- }), onyx_response.status_code
249
-
250
- if stream_requested:
251
- return Response(
252
- stream_onyx_response(onyx_response, "openai"),
253
- mimetype='text/event-stream',
254
- headers={'Cache-Control': 'no-cache', 'X-Accel-Buffering': 'no'}
255
- )
256
- else:
257
- full_content = ""
258
- for line in onyx_response.iter_lines():
259
- if line:
260
- line = line.decode('utf-8')
261
- if line.startswith('data: '):
262
- data = line[6:]
263
- if data != '[DONE]':
264
- try:
265
- onyx_data = json.loads(data)
266
- full_content += onyx_data.get("message", "")
267
- except json.JSONDecodeError:
268
- continue
269
-
270
- return jsonify({
271
- "id": f"chatcmpl-{uuid.uuid4().hex[:8]}",
272
- "object": "chat.completion",
273
- "created": int(datetime.now().timestamp()),
274
- "model": model_id,
275
- "choices": [{
276
- "index": 0,
277
- "message": {"role": "assistant", "content": full_content},
278
- "finish_reason": "stop"
279
- }],
280
- "usage": {"prompt_tokens": 0, "completion_tokens": 0, "total_tokens": 0}
281
- })
282
 
283
- except Exception as e:
284
- return jsonify({"error": {"message": str(e), "type": "server_error"}}), 500
285
-
286
- @app.route('/generate', methods=['POST'])
287
- @app.route('/v1/completions', methods=['POST'])
288
- def hf_generate():
289
- """HuggingFace TGI-compatible endpoint"""
290
- try:
291
- data = request.json
292
- inputs = data.get('inputs', '')
293
- parameters = data.get('parameters', {})
294
- model_provider = parameters.get('model_provider', 'openai')
295
- model_id = parameters.get('model', 'gpt-4')
296
- temperature = parameters.get('temperature', 1.0)
297
- stream_requested = data.get('stream', False)
298
-
299
- onyx_payload = {
300
- "message": inputs,
301
- "llm_override": {
302
- "model_provider": model_provider,
303
- "model_version": model_id,
304
- "temperature": temperature
305
- },
306
- "allowed_tool_ids": [],
307
- "file_descriptors": [],
308
- "deep_research": False,
309
- "origin": "api",
310
- "parent_message_id": -1,
311
- "chat_session_id": str(uuid.uuid4()),
312
- "chat_session_info": {
313
- "persona_id": 0,
314
- "description": "HuggingFace API Bridge",
315
- "project_id": 0
316
- },
317
- "stream": True,
318
- "include_citations": True
319
- }
320
-
321
- headers = {
322
- "Authorization": f"Bearer {ONYX_API_TOKEN}",
323
- "Content-Type": "application/json"
324
- }
325
-
326
- onyx_response = requests.post(ONYX_API_URL, json=onyx_payload, headers=headers, stream=True)
327
-
328
- if onyx_response.status_code != 200:
329
- return jsonify({"error": f"Onyx API error: {onyx_response.status_code}"}), onyx_response.status_code
330
-
331
- if stream_requested:
332
- return Response(
333
- stream_onyx_response(onyx_response, "huggingface"),
334
- mimetype='text/event-stream',
335
- headers={'Cache-Control': 'no-cache', 'X-Accel-Buffering': 'no'}
336
- )
337
- else:
338
- full_content = ""
339
- for line in onyx_response.iter_lines():
340
- if line:
341
- line = line.decode('utf-8')
342
- if line.startswith('data: '):
343
- data = line[6:]
344
- if data != '[DONE]':
345
- try:
346
- onyx_data = json.loads(data)
347
- full_content += onyx_data.get("message", "")
348
- except json.JSONDecodeError:
349
- continue
350
-
351
- return jsonify([{"generated_text": full_content}])
352
 
353
- except Exception as e:
354
- return jsonify({"error": str(e)}), 500
355
-
356
- @app.route('/v1/models', methods=['POST', 'GET'])
357
- @app.route('/models', methods=['POST', 'GET'])
358
- def list_models():
359
- """List available models"""
360
- return jsonify({
361
- "object": "list",
362
- "data": [
363
- {"id": "gpt-4", "object": "model", "created": int(datetime.now().timestamp()), "owned_by": "onyx"},
364
- {"id": "claude-3-5-sonnet", "object": "model", "created": int(datetime.now().timestamp()), "owned_by": "onyx"}
365
- ]
366
- })
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
367
 
368
- @app.route('/health', methods=['GET'])
369
- @app.route('/', methods=['GET'])
370
- def health():
371
- """Health check endpoint"""
372
- return jsonify({
373
- "status": "ok",
374
- "api_token_set": bool(ONYX_API_TOKEN),
375
- "endpoints": {
376
- "openai": "/v1/chat/completions",
377
- "huggingface": "/generate or /v1/completions",
378
- "models": "/v1/models"
379
- }
380
- })
381
 
382
- if __name__ == '__main__':
383
- port = int(os.getenv("PORT", 7860))
384
- app.run(host="0.0.0.0", port=port) hf_chunk = {
385
- "token": {
386
- "id": 0,
387
- "text": content,
388
- "logprob": 0.0,
389
- "special": False
390
- },
391
- "generated_text": None,
392
- "details": None
393
- }
394
- yield f"data:{json.dumps(hf_chunk)}\n\n"
395
- else:
396
- openai_chunk = {
397
- "id": f"chatcmpl-{uuid.uuid4().hex[:8]}",
398
- "object": "chat.completion.chunk",
399
- "created": int(datetime.now().timestamp()),
400
- "model": onyx_data.get("model", "unknown"),
401
- "choices": [{
402
- "index": 0,
403
- "delta": {"content": content},
404
- "finish_reason": None
405
- }]
406
- }
407
- yield f"data: {json.dumps(openai_chunk)}\n\n"
408
- except json.JSONDecodeError:
409
- continue
410
-
411
- if format_type == "huggingface":
412
- final_hf = {
413
- "token": {
414
- "id": 0,
415
- "text": "",
416
- "logprob": 0.0,
417
- "special": True
418
- },
419
- "generated_text": "",
420
- "details": {
421
- "finish_reason": "stop",
422
- "generated_tokens": 0,
423
- "seed": None
424
- }
425
- }
426
- yield f"data:{json.dumps(final_hf)}\n\n"
427
- else:
428
- final_chunk = {
429
- "id": f"chatcmpl-{uuid.uuid4().hex[:8]}",
430
- "object": "chat.completion.chunk",
431
- "created": int(datetime.now().timestamp()),
432
- "model": "unknown",
433
- "choices": [{
434
- "index": 0,
435
- "delta": {},
436
- "finish_reason": "stop"
437
- }]
438
- }
439
- yield f"data: {json.dumps(final_chunk)}\n\n"
440
- yield f"data: [DONE]\n\n"
441
-
442
- except Exception as e:
443
- error_chunk = {"error": {"message": str(e), "type": "server_error"}}
444
- yield f"data: {json.dumps(error_chunk)}\n\n"
445
 
446
- @app.route('/v1/chat/completions', methods=['POST'])
447
- def chat_completions():
448
- """OpenAI-compatible endpoint"""
449
- try:
450
- data = request.json
451
- messages = data.get('messages', [])
452
- model_provider = data.get('model_provider', 'openai')
453
- model_id = data.get('model', 'gpt-4')
454
- temperature = data.get('temperature', 1.0)
455
- stream_requested = data.get('stream', False)
456
-
457
- user_message = ""
458
- for msg in reversed(messages):
459
- if msg.get('role') == 'user':
460
- user_message = msg.get('content', '')
461
- break
462
-
463
- onyx_payload = {
464
- "message": user_message,
465
- "llm_override": {
466
- "model_provider": model_provider,
467
- "model_version": model_id,
468
- "temperature": temperature
469
- },
470
- "allowed_tool_ids": [],
471
- "file_descriptors": [],
472
- "deep_research": False,
473
- "origin": "api",
474
- "parent_message_id": -1,
475
- "chat_session_id": str(uuid.uuid4()),
476
- "chat_session_info": {
477
- "persona_id": 0,
478
- "description": "OpenAI API Bridge",
479
- "project_id": 0
480
- },
481
- "stream": True,
482
- "include_citations": True
483
- }
484
-
485
- headers = {
486
- "Authorization": f"Bearer {ONYX_API_TOKEN}",
487
- "Content-Type": "application/json"
488
- }
489
-
490
- onyx_response = requests.post(ONYX_API_URL, json=onyx_payload, headers=headers, stream=True)
491
-
492
- if onyx_response.status_code != 200:
493
- return jsonify({
494
- "error": {
495
- "message": f"Onyx API error: {onyx_response.status_code}",
496
- "type": "api_error"
497
- }
498
- }), onyx_response.status_code
499
-
500
- if stream_requested:
501
- return Response(
502
- stream_onyx_response(onyx_response, "openai"),
503
- mimetype='text/event-stream',
504
- headers={'Cache-Control': 'no-cache', 'X-Accel-Buffering': 'no'}
505
- )
506
- else:
507
- full_content = ""
508
- for line in onyx_response.iter_lines():
509
- if line:
510
- line = line.decode('utf-8')
511
- if line.startswith('data: '):
512
- data = line[6:]
513
- if data != '[DONE]':
514
- try:
515
- onyx_data = json.loads(data)
516
- full_content += onyx_data.get("message", "")
517
- except json.JSONDecodeError:
518
- continue
519
-
520
- return jsonify({
521
- "id": f"chatcmpl-{uuid.uuid4().hex[:8]}",
522
- "object": "chat.completion",
523
- "created": int(datetime.now().timestamp()),
524
- "model": model_id,
525
- "choices": [{
526
- "index": 0,
527
- "message": {"role": "assistant", "content": full_content},
528
- "finish_reason": "stop"
529
- }],
530
- "usage": {"prompt_tokens": 0, "completion_tokens": 0, "total_tokens": 0}
531
- })
532
-
533
- except Exception as e:
534
- return jsonify({"error": {"message": str(e), "type": "server_error"}}), 500
535
 
536
- @app.route('/generate', methods=['POST'])
537
- @app.route('/v1/completions', methods=['POST'])
538
- def hf_generate():
539
- """HuggingFace TGI-compatible endpoint"""
540
  try:
541
- data = request.json
542
- inputs = data.get('inputs', '')
543
- parameters = data.get('parameters', {})
544
- model_provider = parameters.get('model_provider', 'openai')
545
- model_id = parameters.get('model', 'gpt-4')
546
- temperature = parameters.get('temperature', 1.0)
547
- stream_requested = data.get('stream', False)
548
-
549
- onyx_payload = {
550
- "message": inputs,
551
- "llm_override": {
552
- "model_provider": model_provider,
553
- "model_version": model_id,
554
- "temperature": temperature
555
- },
556
- "allowed_tool_ids": [],
557
- "file_descriptors": [],
558
- "deep_research": False,
559
- "origin": "api",
560
- "parent_message_id": -1,
561
- "chat_session_id": str(uuid.uuid4()),
562
- "chat_session_info": {
563
- "persona_id": 0,
564
- "description": "HuggingFace API Bridge",
565
- "project_id": 0
566
- },
567
- "stream": True,
568
- "include_citations": True
569
- }
570
-
571
- headers = {
572
- "Authorization": f"Bearer {ONYX_API_TOKEN}",
573
- "Content-Type": "application/json"
574
- }
575
-
576
- onyx_response = requests.post(ONYX_API_URL, json=onyx_payload, headers=headers, stream=True)
577
-
578
- if onyx_response.status_code != 200:
579
- return jsonify({"error": f"Onyx API error: {onyx_response.status_code}"}), onyx_response.status_code
580
-
581
- if stream_requested:
582
- return Response(
583
- stream_onyx_response(onyx_response, "huggingface"),
584
- mimetype='text/event-stream',
585
- headers={'Cache-Control': 'no-cache', 'X-Accel-Buffering': 'no'}
586
- )
587
- else:
588
- full_content = ""
589
- for line in onyx_response.iter_lines():
590
- if line:
591
- line = line.decode('utf-8')
592
- if line.startswith('data: '):
593
- data = line[6:]
594
- if data != '[DONE]':
595
- try:
596
- onyx_data = json.loads(data)
597
- full_content += onyx_data.get("message", "")
598
- except json.JSONDecodeError:
599
- continue
600
 
601
- return jsonify([{"generated_text": full_content}])
 
 
 
602
 
603
- except Exception as e:
604
- return jsonify({"error": str(e)}), 500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
605
 
606
- @app.route('/v1/models', methods=['POST', 'GET'])
607
- @app.route('/models', methods=['POST', 'GET'])
608
- def list_models():
609
- """List available models"""
610
- return jsonify({
611
- "object": "list",
612
- "data": [
613
- {"id": "gpt-4", "object": "model", "created": int(datetime.now().timestamp()), "owned_by": "onyx"},
614
- {"id": "claude-3-5-sonnet", "object": "model", "created": int(datetime.now().timestamp()), "owned_by": "onyx"}
615
- ]
616
- })
617
 
618
- @app.route('/health', methods=['GET'])
619
- @app.route('/', methods=['GET'])
620
- def health():
621
- """Health check and documentation"""
622
- space_host = request.host
623
 
624
- html = f"""
625
- <!DOCTYPE html>
626
- <html>
627
- <head>
628
- <title>Onyx API Bridge</title>
629
- <style>
630
- body {{
631
- font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif;
632
- max-width: 900px;
633
- margin: 40px auto;
634
- padding: 20px;
635
- background: #f5f5f5;
636
- }}
637
- .container {{
638
- background: white;
639
- padding: 30px;
640
- border-radius: 8px;
641
- box-shadow: 0 2px 4px rgba(0,0,0,0.1);
642
- }}
643
- h1 {{
644
- color: #2563eb;
645
- margin-bottom: 10px;
646
- }}
647
- .status {{
648
- display: inline-block;
649
- padding: 4px 12px;
650
- border-radius: 12px;
651
- font-size: 14px;
652
- font-weight: 500;
653
- }}
654
- .status.ok {{
655
- background: #dcfce7;
656
- color: #166534;
657
- }}
658
- .status.error {{
659
- background: #fee2e2;
660
- color: #991b1b;
661
- }}
662
- .endpoint {{
663
- background: #f9fafb;
664
- padding: 15px;
665
- border-radius: 6px;
666
- margin: 15px 0;
667
- border-left: 4px solid #2563eb;
668
- }}
669
- .endpoint-title {{
670
- font-weight: 600;
671
- color: #1f2937;
672
- margin-bottom: 8px;
673
- }}
674
- .code-block {{
675
- background: #1f2937;
676
- color: #f9fafb;
677
- padding: 15px;
678
- border-radius: 6px;
679
- margin: 10px 0;
680
- position: relative;
681
- overflow-x: auto;
682
- }}
683
- .code-block pre {{
684
- margin: 0;
685
- font-family: 'Monaco', 'Menlo', monospace;
686
- font-size: 13px;
687
- line-height: 1.5;
688
- }}
689
- .copy-btn {{
690
- position: absolute;
691
- top: 10px;
692
- right: 10px;
693
- background: #3b82f6;
694
- color: white;
695
- border: none;
696
- padding: 6px 12px;
697
- border-radius: 4px;
698
- cursor: pointer;
699
- font-size: 12px;
700
- transition: background 0.2s;
701
- }}
702
- .copy-btn:hover {{
703
- background: #2563eb;
704
- }}
705
- .copy-btn.copied {{
706
- background: #10b981;
707
- }}
708
- .url {{
709
- color: #2563eb;
710
- word-break: break-all;
711
- }}
712
- </style>
713
- </head>
714
- <body>
715
- <div class="container">
716
- <h1>🚀 Onyx API Bridge</h1>
717
- <p>
718
- <span class="status {'ok' if ONYX_API_TOKEN else 'error'}">
719
- {'✓ API Token Set' if ONYX_API_TOKEN else '✗ API Token Missing'}
720
- </span>
721
- </p>
722
-
723
- <h2>Available Endpoints</h2>
724
-
725
- <div class="endpoint">
726
- <div class="endpoint-title">OpenAI Compatible</div>
727
- <code class="url">https://{space_host}/v1/chat/completions</code>
728
- </div>
729
-
730
- <div class="endpoint">
731
- <div class="endpoint-title">HuggingFace Compatible</div>
732
- <code class="url">https://{space_host}/generate</code>
733
- </div>
734
 
735
- <div class="endpoint">
736
- <div class="endpoint-title">Models List</div>
737
- <code class="url">https://{space_host}/v1/models</code>
738
- </div>
 
 
739
 
740
- <h2>Usage Examples</h2>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
741
 
742
- <h3>OpenAI SDK</h3>
743
- <div class="code-block">
744
- <button class="copy-btn" onclick="copyCode(this)">Copy</button>
745
- <pre>from openai import OpenAI
746
-
747
- client = OpenAI(
748
- api_key="dummy",
749
- base_url="https://{space_host}/v1"
750
- )
 
 
 
 
751
 
752
- response = client.chat.completions.create(
753
- model="claude-3-5-sonnet",
754
- model_provider="anthropic",
755
- messages=[{{"role": "user", "content": "Hello!"}}],
756
- stream=True
757
- )
758
 
759
- for chunk in response:
760
- if chunk.choices[0].delta.content:
761
- print(chunk.choices[0].delta.content, end="")</pre>
762
- </div>
 
 
 
 
 
 
 
 
763
 
764
- <h3>HuggingFace Client</h3>
765
- <div class="code-block">
766
- <button class="copy-btn" onclick="copyCode(this)">Copy</button>
767
- <pre>from huggingface_hub import InferenceClient
768
-
769
- client = InferenceClient(model="https://{space_host}")
770
-
771
- for token in client.text_generation(
772
- "Hello!",
773
- stream=True,
774
- parameters={{
775
- "model_provider": "anthropic",
776
- "model": "claude-3-5-sonnet"
777
- }}
778
- ):
779
- print(token, end="")</pre>
780
- </div>
781
 
782
- <h3>cURL</h3>
783
- <div class="code-block">
784
- <button class="copy-btn" onclick="copyCode(this)">Copy</button>
785
- <pre>curl -X POST https://{space_host}/v1/chat/completions \\
786
- -H "Content-Type: application/json" \\
787
- -d '{{
788
- "model": "gpt-4",
789
- "model_provider": "openai",
790
- "messages": [{{"role": "user", "content": "Hello!"}}],
791
- "stream": false
792
- }}'</pre>
793
- </div>
794
- </div>
795
-
796
- <script>
797
- function copyCode(button) {{
798
- const codeBlock = button.nextElementSibling;
799
- const text = codeBlock.textContent;
800
-
801
- navigator.clipboard.writeText(text).then(() => {{
802
- button.textContent = 'Copied!';
803
- button.classList.add('copied');
804
- setTimeout(() => {{
805
- button.textContent = 'Copy';
806
- button.classList.remove('copied');
807
- }}, 2000);
808
- }});
809
- }}
810
- </script>
811
- </body>
812
- </html>
813
- """
814
 
815
- return html
816
-
817
- if __name__ == '__main__':
818
- port = int(os.getenv("PORT", 7860))
819
- app.run(host="0.0.0.0", port=port) hf_chunk = {
820
- "token": {
821
- "id": 0,
822
- "text": content,
823
- "logprob": 0.0,
824
- "special": False
825
- },
826
- "generated_text": None,
827
- "details": None
828
- }
829
- yield f"data:{json.dumps(hf_chunk)}\n\n"
830
- else:
831
- openai_chunk = {
832
- "id": f"chatcmpl-{uuid.uuid4().hex[:8]}",
833
- "object": "chat.completion.chunk",
834
- "created": int(datetime.now().timestamp()),
835
- "model": onyx_data.get("model", "unknown"),
836
- "choices": [{
837
- "index": 0,
838
- "delta": {"content": content},
839
- "finish_reason": None
840
- }]
841
- }
842
- yield f"data: {json.dumps(openai_chunk)}\n\n"
843
- except json.JSONDecodeError:
844
- continue
845
-
846
- if format_type == "huggingface":
847
- final_hf = {
848
- "token": {
849
- "id": 0,
850
- "text": "",
851
- "logprob": 0.0,
852
- "special": True
853
- },
854
- "generated_text": "",
855
- "details": {
856
- "finish_reason": "stop",
857
- "generated_tokens": 0,
858
- "seed": None
859
- }
860
  }
861
- yield f"data:{json.dumps(final_hf)}\n\n"
862
- else:
863
- final_chunk = {
864
- "id": f"chatcmpl-{uuid.uuid4().hex[:8]}",
865
- "object": "chat.completion.chunk",
866
- "created": int(datetime.now().timestamp()),
867
- "model": "unknown",
868
- "choices": [{
869
- "index": 0,
870
- "delta": {},
871
- "finish_reason": "stop"
872
- }]
873
- }
874
- yield f"data: {json.dumps(final_chunk)}\n\n"
875
- yield f"data: [DONE]\n\n"
876
-
877
- except Exception as e:
878
- error_chunk = {"error": {"message": str(e), "type": "server_error"}}
879
- yield f"data: {json.dumps(error_chunk)}\n\n"
880
-
881
- @app.route('/v1/chat/completions', methods=['POST'])
882
- def chat_completions():
883
- """OpenAI-compatible endpoint"""
884
- try:
885
- data = request.json
886
- messages = data.get('messages', [])
887
- model_provider = data.get('model_provider', 'openai')
888
- model_id = data.get('model', 'gpt-4')
889
- temperature = data.get('temperature', 1.0)
890
- stream_requested = data.get('stream', False)
891
-
892
- user_message = ""
893
- for msg in reversed(messages):
894
- if msg.get('role') == 'user':
895
- user_message = msg.get('content', '')
896
- break
897
-
898
- onyx_payload = {
899
- "message": user_message,
900
- "llm_override": {
901
- "model_provider": model_provider,
902
- "model_version": model_id,
903
- "temperature": temperature
904
- },
905
- "allowed_tool_ids": [],
906
- "file_descriptors": [],
907
- "deep_research": False,
908
- "origin": "api",
909
- "parent_message_id": -1,
910
- "chat_session_id": str(uuid.uuid4()),
911
- "chat_session_info": {
912
- "persona_id": 0,
913
- "description": "OpenAI API Bridge",
914
- "project_id": 0
915
  },
916
- "stream": True,
917
- "include_citations": True
 
 
 
 
918
  }
919
-
920
- headers = {
921
- "Authorization": f"Bearer {ONYX_API_TOKEN}",
922
- "Content-Type": "application/json"
923
- }
924
-
925
- onyx_response = requests.post(ONYX_API_URL, json=onyx_payload, headers=headers, stream=True)
926
-
927
- if onyx_response.status_code != 200:
928
- return jsonify({
929
- "error": {
930
- "message": f"Onyx API error: {onyx_response.status_code}",
931
- "type": "api_error"
932
- }
933
- }), onyx_response.status_code
934
-
935
- if stream_requested:
936
- return Response(
937
- stream_onyx_response(onyx_response, "openai"),
938
- mimetype='text/event-stream',
939
- headers={'Cache-Control': 'no-cache', 'X-Accel-Buffering': 'no'}
940
- )
941
- else:
942
- full_content = ""
943
- for line in onyx_response.iter_lines():
944
- if line:
945
- line = line.decode('utf-8')
946
- if line.startswith('data: '):
947
- data = line[6:]
948
- if data != '[DONE]':
949
- try:
950
- onyx_data = json.loads(data)
951
- full_content += onyx_data.get("message", "")
952
- except json.JSONDecodeError:
953
- continue
954
-
955
- return jsonify({
956
- "id": f"chatcmpl-{uuid.uuid4().hex[:8]}",
957
- "object": "chat.completion",
958
- "created": int(datetime.now().timestamp()),
959
- "model": model_id,
960
- "choices": [{
961
- "index": 0,
962
- "message": {"role": "assistant", "content": full_content},
963
- "finish_reason": "stop"
964
- }],
965
- "usage": {"prompt_tokens": 0, "completion_tokens": 0, "total_tokens": 0}
966
- })
967
 
968
- except Exception as e:
969
- return jsonify({"error": {"message": str(e), "type": "server_error"}}), 500
 
 
970
 
971
- @app.route('/generate', methods=['POST'])
972
- @app.route('/v1/completions', methods=['POST'])
973
- def hf_generate():
974
- """HuggingFace TGI-compatible endpoint"""
975
  try:
976
  data = request.json
977
- inputs = data.get('inputs', '')
978
- parameters = data.get('parameters', {})
979
- model_provider = parameters.get('model_provider', 'openai')
980
- model_id = parameters.get('model', 'gpt-4')
981
- temperature = parameters.get('temperature', 1.0)
982
- stream_requested = data.get('stream', False)
983
-
984
- onyx_payload = {
985
- "message": inputs,
986
- "llm_override": {
987
- "model_provider": model_provider,
988
- "model_version": model_id,
989
- "temperature": temperature
990
- },
991
- "allowed_tool_ids": [],
992
- "file_descriptors": [],
993
- "deep_research": False,
994
- "origin": "api",
995
- "parent_message_id": -1,
996
- "chat_session_id": str(uuid.uuid4()),
997
- "chat_session_info": {
998
- "persona_id": 0,
999
- "description": "HuggingFace API Bridge",
1000
- "project_id": 0
1001
- },
1002
- "stream": True,
1003
- "include_citations": True
1004
- }
1005
-
1006
- headers = {
1007
- "Authorization": f"Bearer {ONYX_API_TOKEN}",
1008
- "Content-Type": "application/json"
1009
- }
1010
-
1011
- onyx_response = requests.post(ONYX_API_URL, json=onyx_payload, headers=headers, stream=True)
1012
-
1013
- if onyx_response.status_code != 200:
1014
- return jsonify({"error": f"Onyx API error: {onyx_response.status_code}"}), onyx_response.status_code
1015
-
1016
- if stream_requested:
1017
- return Response(
1018
- stream_onyx_response(onyx_response, "huggingface"),
1019
- mimetype='text/event-stream',
1020
- headers={'Cache-Control': 'no-cache', 'X-Accel-Buffering': 'no'}
1021
- )
1022
- else:
1023
- full_content = ""
1024
- for line in onyx_response.iter_lines():
1025
- if line:
1026
- line = line.decode('utf-8')
1027
- if line.startswith('data: '):
1028
- data = line[6:]
1029
- if data != '[DONE]':
1030
- try:
1031
- onyx_data = json.loads(data)
1032
- full_content += onyx_data.get("message", "")
1033
- except json.JSONDecodeError:
1034
- continue
1035
-
1036
- return jsonify([{"generated_text": full_content}])
1037
-
1038
  except Exception as e:
1039
- return jsonify({"error": str(e)}), 500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1040
 
1041
- @app.route('/v1/models', methods=['POST', 'GET'])
1042
- @app.route('/models', methods=['POST', 'GET'])
1043
  def list_models():
1044
- """List available models"""
 
 
 
 
 
 
 
 
 
1045
  return jsonify({
1046
  "object": "list",
1047
- "data": [
1048
- {"id": "gpt-4", "object": "model", "created": int(datetime.now().timestamp()), "owned_by": "onyx"},
1049
- {"id": "claude-3-5-sonnet", "object": "model", "created": int(datetime.now().timestamp()), "owned_by": "onyx"}
1050
- ]
 
 
 
 
 
 
 
1051
  })
1052
 
 
1053
  @app.route('/health', methods=['GET'])
1054
- @app.route('/', methods=['GET'])
1055
- def health():
1056
  """Health check endpoint"""
 
 
 
 
 
 
1057
  return jsonify({
1058
- "status": "ok",
1059
- "api_token_set": bool(ONYX_API_TOKEN),
1060
  "endpoints": {
1061
- "openai": "/v1/chat/completions",
1062
- "huggingface": "/generate or /v1/completions",
1063
- "models": "/v1/models"
1064
  }
1065
  })
1066
 
1067
- if __name__ == '__main__':
1068
- port = int(os.getenv("PORT", 7860))
1069
- app.run(host="0.0.0.0", port=port) # Splits on first slash, cleans whitespace, preserves case
1070
- provider, version = [part.strip() for part in raw_model.split("/", 1)]
1071
- else:
1072
- provider, version = "OpenAI", raw_model.strip()
1073
 
1074
- messages = data.get("messages", [])
1075
- user_content = messages[-1].get("content", "") if messages else ""
1076
 
1077
- # ONYX PAYLOAD: Uses your specific template requirements
1078
- onyx_payload = {
1079
- "message": user_content,
1080
- "llm_override": {
1081
- "model_provider": provider,
1082
- "model_version": version,
1083
- "temperature": data.get("temperature", 0.7)
1084
- },
1085
- "stream": True, # Forced ON regardless of user input
1086
- "include_citations": True,
1087
- "deep_research": False,
1088
- "parent_message_id": -1,
1089
- "chat_session_id": "3c90c3cc-0d44-4b50-8888-8dd25736052a"
1090
- }
1091
-
1092
- headers = {
1093
- "Authorization": f"Bearer {ONYX_API_KEY}",
1094
- "Content-Type": "application/json"
1095
- }
1096
 
1097
- def generate():
1098
- try:
1099
- with requests.post(ONYX_URL, json=onyx_payload, headers=headers, stream=True) as r:
1100
- if r.status_code != 200:
1101
- yield f"data: {json.dumps({'error': 'Onyx API Error', 'status': r.status_code})}\n\n"
1102
- return
1103
 
1104
- for line in r.iter_lines():
1105
- if not line:
1106
- continue
1107
-
1108
- try:
1109
- packet = json.loads(line.decode('utf-8'))
1110
- # Onyx provides content in 'message_delta' packets
1111
- if packet.get("type") == "message_delta":
1112
- content = packet.get("delta", "")
1113
- yield transform_to_openai_chunk(content, raw_model)
1114
- except (json.JSONDecodeError, KeyError):
1115
- continue
1116
-
1117
- # Signal stream completion
1118
- yield transform_to_openai_chunk(None, raw_model, finish_reason="stop")
1119
- yield "data: [DONE]\n\n"
1120
- except Exception as e:
1121
- yield f"data: {json.dumps({'error': str(e)})}\n\n"
1122
 
1123
- return Response(stream_with_context(generate()), mimetype='text/event-stream')
1124
 
1125
- @app.route('/')
1126
- def health_check():
1127
- return "Onyx-OpenAI Proxy is running on HF Space (Port 7860).", 200
1128
 
1129
  if __name__ == '__main__':
1130
- # Standard HF port
1131
- app.run(host='0.0.0.0', port=7860)
 
 
 
 
 
 
 
 
 
2
  import requests
3
  import json
4
  import uuid
5
+ import time
6
  import os
 
7
 
8
  app = Flask(__name__)
9
 
10
+ # Configuration
11
+ ONYX_API_URL = "https://cloud.onyx.app/api/chat/send-chat-message"
12
+ ONYX_API_TOKEN = os.environ.get("ONYX_SECRET", "<your-token-here>")
13
 
14
+ # Store chat sessions for context
15
+ chat_sessions = {}
16
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
17
 
18
+ def create_chat_session_id():
19
+ """Generate a new chat session UUID"""
20
+ return str(uuid.uuid4())
 
21
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
22
 
23
+ def build_onyx_payload(messages, model_provider, model_version, temperature, chat_session_id=None):
24
+ """Convert OpenAI format to Onyx payload"""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
25
 
26
+ # Extract the last user message
27
+ last_user_message = ""
28
+ for msg in reversed(messages):
29
+ if msg.get('role') == 'user':
30
+ content = msg.get('content', '')
31
+ if isinstance(content, list):
32
+ # Handle content array format (for vision models, etc.)
33
+ text_parts = [p.get('text', '') for p in content if p.get('type') == 'text']
34
+ last_user_message = ' '.join(text_parts)
35
+ else:
36
+ last_user_message = content
37
+ break
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
38
 
39
+ # Build system prompt from system messages
40
+ system_prompt = ""
41
+ for msg in messages:
42
+ if msg.get('role') == 'system':
43
+ system_prompt += msg.get('content', '') + "\n"
44
+
45
+ # Prepend system prompt to message if exists
46
+ full_message = last_user_message
47
+ if system_prompt:
48
+ full_message = f"[System Instructions: {system_prompt.strip()}]\n\n{last_user_message}"
49
+
50
+ payload = {
51
+ "message": full_message,
52
+ "llm_override": {
53
+ "model_provider": model_provider,
54
+ "model_version": model_version,
55
+ "temperature": temperature
56
+ },
57
+ "allowed_tool_ids": [],
58
+ "file_descriptors": [],
59
+ "internal_search_filters": {
60
+ "source_type": [],
61
+ "document_set": [],
62
+ "tags": []
63
+ },
64
+ "deep_research": False,
65
+ "origin": "unset",
66
+ "parent_message_id": -1,
67
+ "chat_session_id": chat_session_id or create_chat_session_id(),
68
+ "chat_session_info": {
69
+ "persona_id": 0,
70
+ "description": "OpenAI Compatible API Session",
71
+ "project_id": 0
72
+ },
73
+ "stream": True, # Always stream internally
74
+ "include_citations": False
75
+ }
76
+
77
+ return payload
78
 
 
 
 
 
 
 
 
 
 
 
 
 
 
79
 
80
+ def parse_model_string(model):
81
+ """
82
+ Parse model string in format 'provider/model_version' or 'provider:model_version'
83
+ Examples:
84
+ - 'openai/gpt-4' -> ('openai', 'gpt-4')
85
+ - 'anthropic/claude-3-opus' -> ('anthropic', 'claude-3-opus')
86
+ - 'gpt-4' -> ('openai', 'gpt-4') # default provider
87
+ """
88
+ if '/' in model:
89
+ parts = model.split('/', 1)
90
+ return parts[0], parts[1]
91
+ elif ':' in model:
92
+ parts = model.split(':', 1)
93
+ return parts[0], parts[1]
94
+ else:
95
+ # Default provider
96
+ return "openai", model
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
97
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
98
 
99
+ def parse_onyx_stream_chunk(chunk_text):
100
+ """Parse a chunk from Onyx stream and extract the text content"""
 
 
101
  try:
102
+ data = json.loads(chunk_text)
103
+
104
+ # Handle different response types from Onyx
105
+ if isinstance(data, dict):
106
+ # Check for answer/text content
107
+ if 'answer_piece' in data:
108
+ return data['answer_piece']
109
+ elif 'text' in data:
110
+ return data['text']
111
+ elif 'content' in data:
112
+ return data['content']
113
+ elif 'message' in data:
114
+ return data['message']
115
+ # Check for error
116
+ elif 'error' in data:
117
+ return f"[Error: {data['error']}]"
118
+ elif isinstance(data, str):
119
+ return data
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
120
 
121
+ except json.JSONDecodeError:
122
+ # If not JSON, return as-is if it looks like content
123
+ if chunk_text.strip() and not chunk_text.startswith('{'):
124
+ return chunk_text
125
 
126
+ return None
127
+
128
+
129
+ def generate_openai_stream_chunk(content, model, chunk_id, finish_reason=None):
130
+ """Generate an OpenAI-compatible SSE chunk"""
131
+ chunk = {
132
+ "id": chunk_id,
133
+ "object": "chat.completion.chunk",
134
+ "created": int(time.time()),
135
+ "model": model,
136
+ "choices": [{
137
+ "index": 0,
138
+ "delta": {"content": content} if content else {},
139
+ "finish_reason": finish_reason
140
+ }]
141
+ }
142
+ return f"data: {json.dumps(chunk)}\n\n"
143
 
 
 
 
 
 
 
 
 
 
 
 
144
 
145
+ def stream_onyx_response(payload, headers, model):
146
+ """Stream response from Onyx API in OpenAI SSE format"""
147
+ chunk_id = f"chatcmpl-{uuid.uuid4().hex[:24]}"
 
 
148
 
149
+ # Send initial chunk with role
150
+ initial_chunk = {
151
+ "id": chunk_id,
152
+ "object": "chat.completion.chunk",
153
+ "created": int(time.time()),
154
+ "model": model,
155
+ "choices": [{
156
+ "index": 0,
157
+ "delta": {"role": "assistant", "content": ""},
158
+ "finish_reason": None
159
+ }]
160
+ }
161
+ yield f"data: {json.dumps(initial_chunk)}\n\n"
162
+
163
+ try:
164
+ with requests.post(
165
+ ONYX_API_URL,
166
+ json=payload,
167
+ headers=headers,
168
+ stream=True,
169
+ timeout=120
170
+ ) as response:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
171
 
172
+ if response.status_code != 200:
173
+ error_content = f"Error from Onyx API: {response.status_code} - {response.text}"
174
+ yield generate_openai_stream_chunk(error_content, model, chunk_id)
175
+ yield generate_openai_stream_chunk("", model, chunk_id, "stop")
176
+ yield "data: [DONE]\n\n"
177
+ return
178
 
179
+ buffer = ""
180
+ for chunk in response.iter_content(chunk_size=None, decode_unicode=True):
181
+ if chunk:
182
+ buffer += chunk
183
+
184
+ # Process complete lines
185
+ while '\n' in buffer:
186
+ line, buffer = buffer.split('\n', 1)
187
+ line = line.strip()
188
+
189
+ if not line:
190
+ continue
191
+
192
+ # Handle SSE format from Onyx
193
+ if line.startswith('data: '):
194
+ line = line[6:]
195
+
196
+ if line == '[DONE]':
197
+ continue
198
+
199
+ content = parse_onyx_stream_chunk(line)
200
+ if content:
201
+ yield generate_openai_stream_chunk(content, model, chunk_id)
202
 
203
+ # Process any remaining buffer
204
+ if buffer.strip():
205
+ content = parse_onyx_stream_chunk(buffer.strip())
206
+ if content:
207
+ yield generate_openai_stream_chunk(content, model, chunk_id)
208
+
209
+ except requests.exceptions.RequestException as e:
210
+ error_content = f"Request error: {str(e)}"
211
+ yield generate_openai_stream_chunk(error_content, model, chunk_id)
212
+
213
+ # Send final chunk
214
+ yield generate_openai_stream_chunk("", model, chunk_id, "stop")
215
+ yield "data: [DONE]\n\n"
216
 
 
 
 
 
 
 
217
 
218
+ def collect_full_response(payload, headers, model):
219
+ """Collect full streaming response and return as complete OpenAI response"""
220
+ full_content = ""
221
+
222
+ try:
223
+ with requests.post(
224
+ ONYX_API_URL,
225
+ json=payload,
226
+ headers=headers,
227
+ stream=True,
228
+ timeout=120
229
+ ) as response:
230
 
231
+ if response.status_code != 200:
232
+ return {
233
+ "error": {
234
+ "message": f"Error from Onyx API: {response.status_code} - {response.text}",
235
+ "type": "api_error",
236
+ "code": response.status_code
237
+ }
238
+ }, response.status_code
 
 
 
 
 
 
 
 
 
239
 
240
+ buffer = ""
241
+ for chunk in response.iter_content(chunk_size=None, decode_unicode=True):
242
+ if chunk:
243
+ buffer += chunk
244
+
245
+ while '\n' in buffer:
246
+ line, buffer = buffer.split('\n', 1)
247
+ line = line.strip()
248
+
249
+ if not line:
250
+ continue
251
+
252
+ if line.startswith('data: '):
253
+ line = line[6:]
254
+
255
+ if line == '[DONE]':
256
+ continue
257
+
258
+ content = parse_onyx_stream_chunk(line)
259
+ if content:
260
+ full_content += content
261
+
262
+ # Process remaining buffer
263
+ if buffer.strip():
264
+ content = parse_onyx_stream_chunk(buffer.strip())
265
+ if content:
266
+ full_content += content
 
 
 
 
 
267
 
268
+ except requests.exceptions.RequestException as e:
269
+ return {
270
+ "error": {
271
+ "message": f"Request error: {str(e)}",
272
+ "type": "api_error",
273
+ "code": 500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
274
  }
275
+ }, 500
276
+
277
+ # Build OpenAI-compatible response
278
+ response_data = {
279
+ "id": f"chatcmpl-{uuid.uuid4().hex[:24]}",
280
+ "object": "chat.completion",
281
+ "created": int(time.time()),
282
+ "model": model,
283
+ "choices": [{
284
+ "index": 0,
285
+ "message": {
286
+ "role": "assistant",
287
+ "content": full_content
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
288
  },
289
+ "finish_reason": "stop"
290
+ }],
291
+ "usage": {
292
+ "prompt_tokens": -1, # Not available from Onyx
293
+ "completion_tokens": -1,
294
+ "total_tokens": -1
295
  }
296
+ }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
297
 
298
+ return response_data, 200
299
+
300
+
301
+ # ============== API Routes ==============
302
 
303
+ @app.route('/v1/chat/completions', methods=['POST'])
304
+ def chat_completions():
305
+ """OpenAI-compatible chat completions endpoint"""
306
+
307
  try:
308
  data = request.json
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
309
  except Exception as e:
310
+ return jsonify({"error": {"message": "Invalid JSON", "type": "invalid_request_error"}}), 400
311
+
312
+ # Extract parameters
313
+ model = data.get('model', 'openai/gpt-4')
314
+ messages = data.get('messages', [])
315
+ stream = data.get('stream', False)
316
+ temperature = data.get('temperature', 0.7)
317
+
318
+ # Validate messages
319
+ if not messages:
320
+ return jsonify({
321
+ "error": {
322
+ "message": "messages is required",
323
+ "type": "invalid_request_error"
324
+ }
325
+ }), 400
326
+
327
+ # Parse model string
328
+ model_provider, model_version = parse_model_string(model)
329
+
330
+ # Build Onyx payload
331
+ payload = build_onyx_payload(
332
+ messages=messages,
333
+ model_provider=model_provider,
334
+ model_version=model_version,
335
+ temperature=temperature
336
+ )
337
+
338
+ headers = {
339
+ "Authorization": f"Bearer {ONYX_API_TOKEN}",
340
+ "Content-Type": "application/json"
341
+ }
342
+
343
+ if stream:
344
+ # Return streaming response
345
+ return Response(
346
+ stream_onyx_response(payload, headers, model),
347
+ content_type='text/event-stream',
348
+ headers={
349
+ 'Cache-Control': 'no-cache',
350
+ 'Connection': 'keep-alive',
351
+ 'X-Accel-Buffering': 'no'
352
+ }
353
+ )
354
+ else:
355
+ # Return complete response
356
+ response_data, status_code = collect_full_response(payload, headers, model)
357
+ return jsonify(response_data), status_code
358
+
359
 
360
+ @app.route('/v1/models', methods=['GET'])
 
361
  def list_models():
362
+ """OpenAI-compatible models listing endpoint"""
363
+ models = [
364
+ {"id": "openai/gpt-4", "object": "model", "owned_by": "openai"},
365
+ {"id": "openai/gpt-4-turbo", "object": "model", "owned_by": "openai"},
366
+ {"id": "openai/gpt-3.5-turbo", "object": "model", "owned_by": "openai"},
367
+ {"id": "anthropic/claude-3-opus", "object": "model", "owned_by": "anthropic"},
368
+ {"id": "anthropic/claude-3-sonnet", "object": "model", "owned_by": "anthropic"},
369
+ {"id": "anthropic/claude-3-haiku", "object": "model", "owned_by": "anthropic"},
370
+ ]
371
+
372
  return jsonify({
373
  "object": "list",
374
+ "data": models
375
+ })
376
+
377
+
378
+ @app.route('/v1/models/<path:model_id>', methods=['GET'])
379
+ def get_model(model_id):
380
+ """OpenAI-compatible model details endpoint"""
381
+ return jsonify({
382
+ "id": model_id,
383
+ "object": "model",
384
+ "owned_by": model_id.split('/')[0] if '/' in model_id else "unknown"
385
  })
386
 
387
+
388
  @app.route('/health', methods=['GET'])
389
+ def health_check():
 
390
  """Health check endpoint"""
391
+ return jsonify({"status": "healthy", "timestamp": int(time.time())})
392
+
393
+
394
+ @app.route('/', methods=['GET'])
395
+ def root():
396
+ """Root endpoint with API info"""
397
  return jsonify({
398
+ "name": "OpenAI-Compatible Onyx API Proxy",
399
+ "version": "1.0.0",
400
  "endpoints": {
401
+ "chat_completions": "/v1/chat/completions",
402
+ "models": "/v1/models",
403
+ "health": "/health"
404
  }
405
  })
406
 
 
 
 
 
 
 
407
 
408
+ # ============== Error Handlers ==============
 
409
 
410
+ @app.errorhandler(404)
411
+ def not_found(e):
412
+ return jsonify({
413
+ "error": {
414
+ "message": "Endpoint not found",
415
+ "type": "invalid_request_error",
416
+ "code": 404
417
+ }
418
+ }), 404
 
 
 
 
 
 
 
 
 
 
419
 
 
 
 
 
 
 
420
 
421
+ @app.errorhandler(500)
422
+ def server_error(e):
423
+ return jsonify({
424
+ "error": {
425
+ "message": "Internal server error",
426
+ "type": "server_error",
427
+ "code": 500
428
+ }
429
+ }), 500
 
 
 
 
 
 
 
 
 
430
 
 
431
 
432
+ # ============== Main ==============
 
 
433
 
434
  if __name__ == '__main__':
435
+ print("Starting OpenAI-Compatible Onyx API Proxy...")
436
+ print(f"Onyx API URL: {ONYX_API_URL}")
437
+ print("="*50)
438
+
439
+ app.run(
440
+ host='0.0.0.0',
441
+ port=5000,
442
+ debug=True,
443
+ threaded=True
444
+ )