Hiren122 commited on
Commit
c3e47e0
·
verified ·
1 Parent(s): 8802a32

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +280 -30
app.py CHANGED
@@ -1,40 +1,290 @@
1
- import json
2
  import requests
3
- import time
 
4
  import os
5
- from flask import Flask, request, Response, stream_with_context, jsonify
6
 
7
  app = Flask(__name__)
8
 
9
- # CONFIGURATION: Set 'ONYX_API_KEY' in Hugging Face Settings > Variables and Secrets
10
- ONYX_API_KEY = os.getenv("ONYX_SECRET", "")
11
- ONYX_URL = "https://cloud.onyx.app/api/chat/send-chat-message"
12
-
13
- def transform_to_openai_chunk(content, model_name, finish_reason=None):
14
- """Formats raw text into an OpenAI-compatible SSE chunk."""
15
- chunk = {
16
- "id": f"chatcmpl-{int(time.time())}",
17
- "object": "chat.completion.chunk",
18
- "created": int(time.time()),
19
- "model": model_name,
20
- "choices": [{
21
- "index": 0,
22
- "delta": {"content": content} if content is not None else {},
23
- "finish_reason": finish_reason
24
- }]
25
- }
26
- return f"data: {json.dumps(chunk)}\n\n"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
27
 
28
  @app.route('/v1/chat/completions', methods=['POST'])
29
- def chat_proxy():
30
- data = request.json
31
- if not data or "messages" not in data:
32
- return jsonify({"error": "No messages provided"}), 400
33
-
34
- # DYNAMIC PARSING: Handles "Google Vertex / Gemini 1.5 Pro" or "openai/gpt-4o"
35
- raw_model = data.get("model", "OpenAI / gpt-4o")
36
- if "/" in raw_model:
37
- # Splits on first slash, cleans whitespace, preserves case
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
38
  provider, version = [part.strip() for part in raw_model.split("/", 1)]
39
  else:
40
  provider, version = "OpenAI", raw_model.strip()
 
1
+ from flask import Flask, request, Response, jsonify
2
  import requests
3
+ import json
4
+ import uuid
5
  import os
6
+ from datetime import datetime
7
 
8
  app = Flask(__name__)
9
 
10
+ # Configuration - automatically loads from HuggingFace Secrets
11
+ ONYX_API_URL = os.getenv("ONYX_API_URL", "https://cloud.onyx.app/api/chat/send-chat-message")
12
+ ONYX_API_TOKEN = os.getenv("ONYX_SECRET", "")
13
+
14
+ if not ONYX_API_TOKEN:
15
+ print("WARNING: ONYX_API_TOKEN not set in HuggingFace Secrets!")
16
+
17
+ def stream_onyx_response(onyx_response, format_type="openai"):
18
+ """Convert Onyx streaming response to OpenAI or HF SSE format"""
19
+ try:
20
+ for line in onyx_response.iter_lines():
21
+ if line:
22
+ line = line.decode('utf-8')
23
+
24
+ if line.startswith('data: '):
25
+ data = line[6:]
26
+
27
+ if data == '[DONE]':
28
+ if format_type == "openai":
29
+ yield f"data: [DONE]\n\n"
30
+ break
31
+
32
+ try:
33
+ onyx_data = json.loads(data)
34
+ content = onyx_data.get("message", "")
35
+
36
+ if format_type == "huggingface":
37
+ hf_chunk = {
38
+ "token": {
39
+ "id": 0,
40
+ "text": content,
41
+ "logprob": 0.0,
42
+ "special": False
43
+ },
44
+ "generated_text": None,
45
+ "details": None
46
+ }
47
+ yield f"data:{json.dumps(hf_chunk)}\n\n"
48
+ else:
49
+ openai_chunk = {
50
+ "id": f"chatcmpl-{uuid.uuid4().hex[:8]}",
51
+ "object": "chat.completion.chunk",
52
+ "created": int(datetime.now().timestamp()),
53
+ "model": onyx_data.get("model", "unknown"),
54
+ "choices": [{
55
+ "index": 0,
56
+ "delta": {"content": content},
57
+ "finish_reason": None
58
+ }]
59
+ }
60
+ yield f"data: {json.dumps(openai_chunk)}\n\n"
61
+ except json.JSONDecodeError:
62
+ continue
63
+
64
+ if format_type == "huggingface":
65
+ final_hf = {
66
+ "token": {
67
+ "id": 0,
68
+ "text": "",
69
+ "logprob": 0.0,
70
+ "special": True
71
+ },
72
+ "generated_text": "",
73
+ "details": {
74
+ "finish_reason": "stop",
75
+ "generated_tokens": 0,
76
+ "seed": None
77
+ }
78
+ }
79
+ yield f"data:{json.dumps(final_hf)}\n\n"
80
+ else:
81
+ final_chunk = {
82
+ "id": f"chatcmpl-{uuid.uuid4().hex[:8]}",
83
+ "object": "chat.completion.chunk",
84
+ "created": int(datetime.now().timestamp()),
85
+ "model": "unknown",
86
+ "choices": [{
87
+ "index": 0,
88
+ "delta": {},
89
+ "finish_reason": "stop"
90
+ }]
91
+ }
92
+ yield f"data: {json.dumps(final_chunk)}\n\n"
93
+ yield f"data: [DONE]\n\n"
94
+
95
+ except Exception as e:
96
+ error_chunk = {"error": {"message": str(e), "type": "server_error"}}
97
+ yield f"data: {json.dumps(error_chunk)}\n\n"
98
 
99
  @app.route('/v1/chat/completions', methods=['POST'])
100
+ def chat_completions():
101
+ """OpenAI-compatible endpoint"""
102
+ try:
103
+ data = request.json
104
+ messages = data.get('messages', [])
105
+ model_provider = data.get('model_provider', 'openai')
106
+ model_id = data.get('model', 'gpt-4')
107
+ temperature = data.get('temperature', 1.0)
108
+ stream_requested = data.get('stream', False)
109
+
110
+ user_message = ""
111
+ for msg in reversed(messages):
112
+ if msg.get('role') == 'user':
113
+ user_message = msg.get('content', '')
114
+ break
115
+
116
+ onyx_payload = {
117
+ "message": user_message,
118
+ "llm_override": {
119
+ "model_provider": model_provider,
120
+ "model_version": model_id,
121
+ "temperature": temperature
122
+ },
123
+ "allowed_tool_ids": [],
124
+ "file_descriptors": [],
125
+ "deep_research": False,
126
+ "origin": "api",
127
+ "parent_message_id": -1,
128
+ "chat_session_id": str(uuid.uuid4()),
129
+ "chat_session_info": {
130
+ "persona_id": 0,
131
+ "description": "OpenAI API Bridge",
132
+ "project_id": 0
133
+ },
134
+ "stream": True,
135
+ "include_citations": True
136
+ }
137
+
138
+ headers = {
139
+ "Authorization": f"Bearer {ONYX_API_TOKEN}",
140
+ "Content-Type": "application/json"
141
+ }
142
+
143
+ onyx_response = requests.post(ONYX_API_URL, json=onyx_payload, headers=headers, stream=True)
144
+
145
+ if onyx_response.status_code != 200:
146
+ return jsonify({
147
+ "error": {
148
+ "message": f"Onyx API error: {onyx_response.status_code}",
149
+ "type": "api_error"
150
+ }
151
+ }), onyx_response.status_code
152
+
153
+ if stream_requested:
154
+ return Response(
155
+ stream_onyx_response(onyx_response, "openai"),
156
+ mimetype='text/event-stream',
157
+ headers={'Cache-Control': 'no-cache', 'X-Accel-Buffering': 'no'}
158
+ )
159
+ else:
160
+ full_content = ""
161
+ for line in onyx_response.iter_lines():
162
+ if line:
163
+ line = line.decode('utf-8')
164
+ if line.startswith('data: '):
165
+ data = line[6:]
166
+ if data != '[DONE]':
167
+ try:
168
+ onyx_data = json.loads(data)
169
+ full_content += onyx_data.get("message", "")
170
+ except json.JSONDecodeError:
171
+ continue
172
+
173
+ return jsonify({
174
+ "id": f"chatcmpl-{uuid.uuid4().hex[:8]}",
175
+ "object": "chat.completion",
176
+ "created": int(datetime.now().timestamp()),
177
+ "model": model_id,
178
+ "choices": [{
179
+ "index": 0,
180
+ "message": {"role": "assistant", "content": full_content},
181
+ "finish_reason": "stop"
182
+ }],
183
+ "usage": {"prompt_tokens": 0, "completion_tokens": 0, "total_tokens": 0}
184
+ })
185
+
186
+ except Exception as e:
187
+ return jsonify({"error": {"message": str(e), "type": "server_error"}}), 500
188
+
189
+ @app.route('/generate', methods=['POST'])
190
+ @app.route('/v1/completions', methods=['POST'])
191
+ def hf_generate():
192
+ """HuggingFace TGI-compatible endpoint"""
193
+ try:
194
+ data = request.json
195
+ inputs = data.get('inputs', '')
196
+ parameters = data.get('parameters', {})
197
+ model_provider = parameters.get('model_provider', 'openai')
198
+ model_id = parameters.get('model', 'gpt-4')
199
+ temperature = parameters.get('temperature', 1.0)
200
+ stream_requested = data.get('stream', False)
201
+
202
+ onyx_payload = {
203
+ "message": inputs,
204
+ "llm_override": {
205
+ "model_provider": model_provider,
206
+ "model_version": model_id,
207
+ "temperature": temperature
208
+ },
209
+ "allowed_tool_ids": [],
210
+ "file_descriptors": [],
211
+ "deep_research": False,
212
+ "origin": "api",
213
+ "parent_message_id": -1,
214
+ "chat_session_id": str(uuid.uuid4()),
215
+ "chat_session_info": {
216
+ "persona_id": 0,
217
+ "description": "HuggingFace API Bridge",
218
+ "project_id": 0
219
+ },
220
+ "stream": True,
221
+ "include_citations": True
222
+ }
223
+
224
+ headers = {
225
+ "Authorization": f"Bearer {ONYX_API_TOKEN}",
226
+ "Content-Type": "application/json"
227
+ }
228
+
229
+ onyx_response = requests.post(ONYX_API_URL, json=onyx_payload, headers=headers, stream=True)
230
+
231
+ if onyx_response.status_code != 200:
232
+ return jsonify({"error": f"Onyx API error: {onyx_response.status_code}"}), onyx_response.status_code
233
+
234
+ if stream_requested:
235
+ return Response(
236
+ stream_onyx_response(onyx_response, "huggingface"),
237
+ mimetype='text/event-stream',
238
+ headers={'Cache-Control': 'no-cache', 'X-Accel-Buffering': 'no'}
239
+ )
240
+ else:
241
+ full_content = ""
242
+ for line in onyx_response.iter_lines():
243
+ if line:
244
+ line = line.decode('utf-8')
245
+ if line.startswith('data: '):
246
+ data = line[6:]
247
+ if data != '[DONE]':
248
+ try:
249
+ onyx_data = json.loads(data)
250
+ full_content += onyx_data.get("message", "")
251
+ except json.JSONDecodeError:
252
+ continue
253
+
254
+ return jsonify([{"generated_text": full_content}])
255
+
256
+ except Exception as e:
257
+ return jsonify({"error": str(e)}), 500
258
+
259
+ @app.route('/v1/models', methods=['POST', 'GET'])
260
+ @app.route('/models', methods=['POST', 'GET'])
261
+ def list_models():
262
+ """List available models"""
263
+ return jsonify({
264
+ "object": "list",
265
+ "data": [
266
+ {"id": "gpt-4", "object": "model", "created": int(datetime.now().timestamp()), "owned_by": "onyx"},
267
+ {"id": "claude-3-5-sonnet", "object": "model", "created": int(datetime.now().timestamp()), "owned_by": "onyx"}
268
+ ]
269
+ })
270
+
271
+ @app.route('/health', methods=['GET'])
272
+ @app.route('/', methods=['GET'])
273
+ def health():
274
+ """Health check endpoint"""
275
+ return jsonify({
276
+ "status": "ok",
277
+ "api_token_set": bool(ONYX_API_TOKEN),
278
+ "endpoints": {
279
+ "openai": "/v1/chat/completions",
280
+ "huggingface": "/generate or /v1/completions",
281
+ "models": "/v1/models"
282
+ }
283
+ })
284
+
285
+ if __name__ == '__main__':
286
+ port = int(os.getenv("PORT", 7860))
287
+ app.run(host="0.0.0.0", port=port) # Splits on first slash, cleans whitespace, preserves case
288
  provider, version = [part.strip() for part in raw_model.split("/", 1)]
289
  else:
290
  provider, version = "OpenAI", raw_model.strip()