Hiren122 commited on
Commit
9edf51a
·
verified ·
1 Parent(s): 7666d1c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +338 -178
app.py CHANGED
@@ -8,19 +8,78 @@ import os
8
  app = Flask(__name__)
9
 
10
  # Configuration
11
- ONYX_API_URL = "https://cloud.onyx.app/api/chat/send-chat-message"
12
  ONYX_API_TOKEN = os.environ.get("ONYX_SECRET", "<your-token-here>")
13
 
14
- # Store chat sessions for context
15
- chat_sessions = {}
16
 
17
 
18
- def create_chat_session_id():
19
- """Generate a new chat session UUID"""
20
- return str(uuid.uuid4())
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
21
 
22
 
23
- def build_onyx_payload(messages, model_provider, model_version, temperature, chat_session_id=None):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
24
  """Convert OpenAI format to Onyx payload"""
25
 
26
  # Extract the last user message
@@ -29,7 +88,6 @@ def build_onyx_payload(messages, model_provider, model_version, temperature, cha
29
  if msg.get('role') == 'user':
30
  content = msg.get('content', '')
31
  if isinstance(content, list):
32
- # Handle content array format (for vision models, etc.)
33
  text_parts = [p.get('text', '') for p in content if p.get('type') == 'text']
34
  last_user_message = ' '.join(text_parts)
35
  else:
@@ -45,85 +103,61 @@ def build_onyx_payload(messages, model_provider, model_version, temperature, cha
45
  # Prepend system prompt to message if exists
46
  full_message = last_user_message
47
  if system_prompt:
48
- full_message = f"[System Instructions: {system_prompt.strip()}]\n\n{last_user_message}"
49
 
50
  payload = {
51
  "message": full_message,
 
 
 
52
  "llm_override": {
53
  "model_provider": model_provider,
54
  "model_version": model_version,
55
  "temperature": temperature
56
  },
57
- "allowed_tool_ids": [],
58
  "file_descriptors": [],
59
- "internal_search_filters": {
60
- "source_type": [],
61
- "document_set": [],
62
- "tags": []
63
- },
64
- "deep_research": False,
65
- "origin": "unset",
66
- "parent_message_id": -1,
67
- "chat_session_id": chat_session_id or create_chat_session_id(),
68
- "chat_session_info": {
69
- "persona_id": 0,
70
- "description": "OpenAI Compatible API Session",
71
- "project_id": 0
72
- },
73
- "stream": True, # Always stream internally
74
  "include_citations": False
75
  }
76
 
 
 
 
77
  return payload
78
 
79
 
80
- def parse_model_string(model):
81
- """
82
- Parse model string in format 'provider/model_version' or 'provider:model_version'
83
- Examples:
84
- - 'openai/gpt-4' -> ('openai', 'gpt-4')
85
- - 'anthropic/claude-3-opus' -> ('anthropic', 'claude-3-opus')
86
- - 'gpt-4' -> ('openai', 'gpt-4') # default provider
87
- """
88
- if '/' in model:
89
- parts = model.split('/', 1)
90
- return parts[0], parts[1]
91
- elif ':' in model:
92
- parts = model.split(':', 1)
93
- return parts[0], parts[1]
94
- else:
95
- # Default provider
96
- return "openai", model
97
-
98
-
99
  def parse_onyx_stream_chunk(chunk_text):
100
  """Parse a chunk from Onyx stream and extract the text content"""
 
 
 
101
  try:
102
  data = json.loads(chunk_text)
103
 
104
- # Handle different response types from Onyx
105
  if isinstance(data, dict):
106
- # Check for answer/text content
 
 
 
107
  if 'answer_piece' in data:
108
- return data['answer_piece']
109
  elif 'text' in data:
110
- return data['text']
111
  elif 'content' in data:
112
- return data['content']
113
- elif 'message' in data:
114
- return data['message']
115
- # Check for error
116
  elif 'error' in data:
117
- return f"[Error: {data['error']}]"
 
118
  elif isinstance(data, str):
119
- return data
120
 
121
  except json.JSONDecodeError:
122
- # If not JSON, return as-is if it looks like content
123
- if chunk_text.strip() and not chunk_text.startswith('{'):
124
- return chunk_text
125
 
126
- return None
127
 
128
 
129
  def generate_openai_stream_chunk(content, model, chunk_id, finish_reason=None):
@@ -142,9 +176,16 @@ def generate_openai_stream_chunk(content, model, chunk_id, finish_reason=None):
142
  return f"data: {json.dumps(chunk)}\n\n"
143
 
144
 
145
- def stream_onyx_response(payload, headers, model):
146
  """Stream response from Onyx API in OpenAI SSE format"""
147
  chunk_id = f"chatcmpl-{uuid.uuid4().hex[:24]}"
 
 
 
 
 
 
 
148
 
149
  # Send initial chunk with role
150
  initial_chunk = {
@@ -160,121 +201,177 @@ def stream_onyx_response(payload, headers, model):
160
  }
161
  yield f"data: {json.dumps(initial_chunk)}\n\n"
162
 
163
- try:
164
- with requests.post(
165
- ONYX_API_URL,
166
- json=payload,
167
- headers=headers,
168
- stream=True,
169
- timeout=120
170
- ) as response:
171
-
172
- if response.status_code != 200:
173
- error_content = f"Error from Onyx API: {response.status_code} - {response.text}"
174
- yield generate_openai_stream_chunk(error_content, model, chunk_id)
175
- yield generate_openai_stream_chunk("", model, chunk_id, "stop")
176
- yield "data: [DONE]\n\n"
177
- return
178
 
179
- buffer = ""
180
- for chunk in response.iter_content(chunk_size=None, decode_unicode=True):
181
- if chunk:
182
- buffer += chunk
183
-
184
- # Process complete lines
185
- while '\n' in buffer:
186
- line, buffer = buffer.split('\n', 1)
187
- line = line.strip()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
188
 
189
- if not line:
190
- continue
191
-
192
- # Handle SSE format from Onyx
193
- if line.startswith('data: '):
194
- line = line[6:]
195
-
196
- if line == '[DONE]':
197
- continue
198
-
199
- content = parse_onyx_stream_chunk(line)
200
- if content:
201
- yield generate_openai_stream_chunk(content, model, chunk_id)
202
-
203
- # Process any remaining buffer
204
- if buffer.strip():
205
- content = parse_onyx_stream_chunk(buffer.strip())
206
- if content:
207
- yield generate_openai_stream_chunk(content, model, chunk_id)
208
-
209
- except requests.exceptions.RequestException as e:
210
- error_content = f"Request error: {str(e)}"
211
- yield generate_openai_stream_chunk(error_content, model, chunk_id)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
212
 
213
  # Send final chunk
214
  yield generate_openai_stream_chunk("", model, chunk_id, "stop")
215
  yield "data: [DONE]\n\n"
216
 
217
 
218
- def collect_full_response(payload, headers, model):
219
  """Collect full streaming response and return as complete OpenAI response"""
220
  full_content = ""
 
221
 
222
- try:
223
- with requests.post(
224
- ONYX_API_URL,
225
- json=payload,
226
- headers=headers,
227
- stream=True,
228
- timeout=120
229
- ) as response:
230
-
231
- if response.status_code != 200:
232
- return {
233
- "error": {
234
- "message": f"Error from Onyx API: {response.status_code} - {response.text}",
235
- "type": "api_error",
236
- "code": response.status_code
237
- }
238
- }, response.status_code
239
 
240
- buffer = ""
241
- for chunk in response.iter_content(chunk_size=None, decode_unicode=True):
242
- if chunk:
243
- buffer += chunk
244
-
245
- while '\n' in buffer:
246
- line, buffer = buffer.split('\n', 1)
247
- line = line.strip()
248
-
249
- if not line:
250
- continue
251
-
252
- if line.startswith('data: '):
253
- line = line[6:]
254
-
255
- if line == '[DONE]':
256
- continue
 
 
 
 
 
 
 
 
 
 
 
257
 
258
- content = parse_onyx_stream_chunk(line)
259
- if content:
260
- full_content += content
261
-
262
- # Process remaining buffer
263
- if buffer.strip():
264
- content = parse_onyx_stream_chunk(buffer.strip())
265
- if content:
266
- full_content += content
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
267
 
268
- except requests.exceptions.RequestException as e:
269
  return {
270
  "error": {
271
- "message": f"Request error: {str(e)}",
272
  "type": "api_error",
273
  "code": 500
274
  }
275
  }, 500
276
 
277
- # Build OpenAI-compatible response
278
  response_data = {
279
  "id": f"chatcmpl-{uuid.uuid4().hex[:24]}",
280
  "object": "chat.completion",
@@ -289,7 +386,7 @@ def collect_full_response(payload, headers, model):
289
  "finish_reason": "stop"
290
  }],
291
  "usage": {
292
- "prompt_tokens": -1, # Not available from Onyx
293
  "completion_tokens": -1,
294
  "total_tokens": -1
295
  }
@@ -306,8 +403,9 @@ def chat_completions():
306
 
307
  try:
308
  data = request.json
 
309
  except Exception as e:
310
- return jsonify({"error": {"message": "Invalid JSON", "type": "invalid_request_error"}}), 400
311
 
312
  # Extract parameters
313
  model = data.get('model', 'openai/gpt-4')
@@ -315,7 +413,9 @@ def chat_completions():
315
  stream = data.get('stream', False)
316
  temperature = data.get('temperature', 0.7)
317
 
318
- # Validate messages
 
 
319
  if not messages:
320
  return jsonify({
321
  "error": {
@@ -326,24 +426,32 @@ def chat_completions():
326
 
327
  # Parse model string
328
  model_provider, model_version = parse_model_string(model)
 
 
 
 
 
 
 
 
 
 
 
 
329
 
330
  # Build Onyx payload
331
  payload = build_onyx_payload(
332
  messages=messages,
333
  model_provider=model_provider,
334
  model_version=model_version,
335
- temperature=temperature
 
 
336
  )
337
 
338
- headers = {
339
- "Authorization": f"Bearer {ONYX_API_TOKEN}",
340
- "Content-Type": "application/json"
341
- }
342
-
343
  if stream:
344
- # Return streaming response
345
  return Response(
346
- stream_onyx_response(payload, headers, model),
347
  content_type='text/event-stream',
348
  headers={
349
  'Cache-Control': 'no-cache',
@@ -352,21 +460,50 @@ def chat_completions():
352
  }
353
  )
354
  else:
355
- # Return complete response
356
- response_data, status_code = collect_full_response(payload, headers, model)
357
  return jsonify(response_data), status_code
358
 
359
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
360
  @app.route('/v1/models', methods=['GET'])
361
  def list_models():
362
  """OpenAI-compatible models listing endpoint"""
363
  models = [
364
  {"id": "openai/gpt-4", "object": "model", "owned_by": "openai"},
365
  {"id": "openai/gpt-4-turbo", "object": "model", "owned_by": "openai"},
 
366
  {"id": "openai/gpt-3.5-turbo", "object": "model", "owned_by": "openai"},
367
- {"id": "anthropic/claude-3-opus", "object": "model", "owned_by": "anthropic"},
368
- {"id": "anthropic/claude-3-sonnet", "object": "model", "owned_by": "anthropic"},
369
- {"id": "anthropic/claude-3-haiku", "object": "model", "owned_by": "anthropic"},
370
  ]
371
 
372
  return jsonify({
@@ -388,7 +525,26 @@ def get_model(model_id):
388
  @app.route('/health', methods=['GET'])
389
  def health_check():
390
  """Health check endpoint"""
391
- return jsonify({"status": "healthy", "timestamp": int(time.time())})
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
392
 
393
 
394
  @app.route('/', methods=['GET'])
@@ -400,8 +556,11 @@ def root():
400
  "endpoints": {
401
  "chat_completions": "/v1/chat/completions",
402
  "models": "/v1/models",
403
- "health": "/health"
404
- }
 
 
 
405
  })
406
 
407
 
@@ -422,19 +581,20 @@ def not_found(e):
422
  def server_error(e):
423
  return jsonify({
424
  "error": {
425
- "message": "Internal server error",
426
  "type": "server_error",
427
  "code": 500
428
  }
429
  }), 500
430
 
431
 
432
- # ============== Main ==============
433
-
434
  if __name__ == '__main__':
435
- print("Starting OpenAI-Compatible Onyx API Proxy...")
436
- print(f"Onyx API URL: {ONYX_API_URL}")
437
- print("="*50)
 
 
 
438
 
439
  app.run(
440
  host='0.0.0.0',
 
8
  app = Flask(__name__)
9
 
10
  # Configuration
11
+ ONYX_BASE_URL = os.environ.get("ONYX_BASE_URL", "https://cloud.onyx.app")
12
  ONYX_API_TOKEN = os.environ.get("ONYX_SECRET", "<your-token-here>")
13
 
14
+ # Store chat sessions
15
+ chat_sessions_cache = {}
16
 
17
 
18
+ def get_headers():
19
+ """Get authorization headers"""
20
+ return {
21
+ "Authorization": f"Bearer {ONYX_API_TOKEN}",
22
+ "Content-Type": "application/json"
23
+ }
24
+
25
+
26
+ def create_chat_session(persona_id=0):
27
+ """Create a new chat session in Onyx"""
28
+ url = f"{ONYX_BASE_URL}/api/chat/create-chat-session"
29
+
30
+ payload = {
31
+ "persona_id": persona_id,
32
+ "description": "OpenAI Compatible API Session"
33
+ }
34
+
35
+ try:
36
+ response = requests.post(url, json=payload, headers=get_headers(), timeout=30)
37
+
38
+ if response.status_code == 200:
39
+ data = response.json()
40
+ # The response might have chat_session_id directly or nested
41
+ session_id = data.get('chat_session_id') or data.get('id') or data
42
+ print(f"Created chat session: {session_id}")
43
+ return str(session_id)
44
+ else:
45
+ print(f"Failed to create chat session: {response.status_code} - {response.text}")
46
+ return None
47
+ except Exception as e:
48
+ print(f"Error creating chat session: {e}")
49
+ return None
50
+
51
+
52
+ def get_or_create_session(session_key="default", persona_id=0):
53
+ """Get existing session or create new one"""
54
+ if session_key not in chat_sessions_cache:
55
+ session_id = create_chat_session(persona_id)
56
+ if session_id:
57
+ chat_sessions_cache[session_key] = {
58
+ "session_id": session_id,
59
+ "parent_message_id": None
60
+ }
61
+ return chat_sessions_cache.get(session_key)
62
 
63
 
64
+ def parse_model_string(model):
65
+ """
66
+ Parse model string in format 'provider/model_version'
67
+ Examples:
68
+ - 'openai/gpt-4' -> ('openai', 'gpt-4')
69
+ - 'anthropic/claude-3-opus' -> ('anthropic', 'claude-3-opus')
70
+ - 'gpt-4' -> ('openai', 'gpt-4')
71
+ """
72
+ if '/' in model:
73
+ parts = model.split('/', 1)
74
+ return parts[0], parts[1]
75
+ elif ':' in model:
76
+ parts = model.split(':', 1)
77
+ return parts[0], parts[1]
78
+ else:
79
+ return "openai", model
80
+
81
+
82
+ def build_onyx_payload(messages, model_provider, model_version, temperature, chat_session_id, parent_message_id=None):
83
  """Convert OpenAI format to Onyx payload"""
84
 
85
  # Extract the last user message
 
88
  if msg.get('role') == 'user':
89
  content = msg.get('content', '')
90
  if isinstance(content, list):
 
91
  text_parts = [p.get('text', '') for p in content if p.get('type') == 'text']
92
  last_user_message = ' '.join(text_parts)
93
  else:
 
103
  # Prepend system prompt to message if exists
104
  full_message = last_user_message
105
  if system_prompt:
106
+ full_message = f"[System: {system_prompt.strip()}]\n\n{last_user_message}"
107
 
108
  payload = {
109
  "message": full_message,
110
+ "chat_session_id": chat_session_id,
111
+ "parent_message_id": parent_message_id if parent_message_id else None,
112
+ "stream": True,
113
  "llm_override": {
114
  "model_provider": model_provider,
115
  "model_version": model_version,
116
  "temperature": temperature
117
  },
 
118
  "file_descriptors": [],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
119
  "include_citations": False
120
  }
121
 
122
+ # Remove None values
123
+ payload = {k: v for k, v in payload.items() if v is not None}
124
+
125
  return payload
126
 
127
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
128
  def parse_onyx_stream_chunk(chunk_text):
129
  """Parse a chunk from Onyx stream and extract the text content"""
130
+ if not chunk_text or not chunk_text.strip():
131
+ return None, None
132
+
133
  try:
134
  data = json.loads(chunk_text)
135
 
 
136
  if isinstance(data, dict):
137
+ # Extract message ID for tracking conversation
138
+ message_id = data.get('message_id')
139
+
140
+ # Check for different content fields
141
  if 'answer_piece' in data:
142
+ return data['answer_piece'], message_id
143
  elif 'text' in data:
144
+ return data['text'], message_id
145
  elif 'content' in data:
146
+ return data['content'], message_id
147
+ elif 'message' in data and isinstance(data['message'], str):
148
+ return data['message'], message_id
 
149
  elif 'error' in data:
150
+ return f"[Error: {data['error']}]", message_id
151
+
152
  elif isinstance(data, str):
153
+ return data, None
154
 
155
  except json.JSONDecodeError:
156
+ # Not JSON, might be raw text
157
+ if chunk_text.strip() and not chunk_text.strip().startswith('{'):
158
+ return chunk_text.strip(), None
159
 
160
+ return None, None
161
 
162
 
163
  def generate_openai_stream_chunk(content, model, chunk_id, finish_reason=None):
 
176
  return f"data: {json.dumps(chunk)}\n\n"
177
 
178
 
179
+ def stream_onyx_response(payload, model, session_key):
180
  """Stream response from Onyx API in OpenAI SSE format"""
181
  chunk_id = f"chatcmpl-{uuid.uuid4().hex[:24]}"
182
+ url = f"{ONYX_BASE_URL}/api/chat/send-message"
183
+
184
+ # Try alternate endpoints if needed
185
+ endpoints = [
186
+ f"{ONYX_BASE_URL}/api/chat/send-message",
187
+ f"{ONYX_BASE_URL}/api/chat/send-chat-message",
188
+ ]
189
 
190
  # Send initial chunk with role
191
  initial_chunk = {
 
201
  }
202
  yield f"data: {json.dumps(initial_chunk)}\n\n"
203
 
204
+ last_message_id = None
205
+
206
+ for url in endpoints:
207
+ try:
208
+ print(f"Trying endpoint: {url}")
209
+ print(f"Payload: {json.dumps(payload, indent=2)}")
 
 
 
 
 
 
 
 
 
210
 
211
+ with requests.post(
212
+ url,
213
+ json=payload,
214
+ headers=get_headers(),
215
+ stream=True,
216
+ timeout=120
217
+ ) as response:
218
+
219
+ print(f"Response status: {response.status_code}")
220
+
221
+ if response.status_code == 404:
222
+ continue # Try next endpoint
223
+
224
+ if response.status_code != 200:
225
+ error_text = response.text
226
+ print(f"Error response: {error_text}")
227
+ yield generate_openai_stream_chunk(
228
+ f"Error {response.status_code}: {error_text}",
229
+ model, chunk_id
230
+ )
231
+ yield generate_openai_stream_chunk("", model, chunk_id, "stop")
232
+ yield "data: [DONE]\n\n"
233
+ return
234
+
235
+ buffer = ""
236
+ for chunk in response.iter_content(chunk_size=None, decode_unicode=True):
237
+ if chunk:
238
+ buffer += chunk
239
 
240
+ while '\n' in buffer:
241
+ line, buffer = buffer.split('\n', 1)
242
+ line = line.strip()
243
+
244
+ if not line:
245
+ continue
246
+
247
+ if line.startswith('data: '):
248
+ line = line[6:]
249
+
250
+ if line == '[DONE]':
251
+ continue
252
+
253
+ content, msg_id = parse_onyx_stream_chunk(line)
254
+ if msg_id:
255
+ last_message_id = msg_id
256
+ if content:
257
+ yield generate_openai_stream_chunk(content, model, chunk_id)
258
+
259
+ # Process remaining buffer
260
+ if buffer.strip():
261
+ if buffer.strip().startswith('data: '):
262
+ buffer = buffer.strip()[6:]
263
+ content, msg_id = parse_onyx_stream_chunk(buffer.strip())
264
+ if msg_id:
265
+ last_message_id = msg_id
266
+ if content:
267
+ yield generate_openai_stream_chunk(content, model, chunk_id)
268
+
269
+ # Update session with last message ID
270
+ if session_key in chat_sessions_cache and last_message_id:
271
+ chat_sessions_cache[session_key]['parent_message_id'] = last_message_id
272
+
273
+ break # Success, exit loop
274
+
275
+ except requests.exceptions.RequestException as e:
276
+ print(f"Request error for {url}: {e}")
277
+ continue
278
 
279
  # Send final chunk
280
  yield generate_openai_stream_chunk("", model, chunk_id, "stop")
281
  yield "data: [DONE]\n\n"
282
 
283
 
284
+ def collect_full_response(payload, model, session_key):
285
  """Collect full streaming response and return as complete OpenAI response"""
286
  full_content = ""
287
+ last_message_id = None
288
 
289
+ endpoints = [
290
+ f"{ONYX_BASE_URL}/api/chat/send-message",
291
+ f"{ONYX_BASE_URL}/api/chat/send-chat-message",
292
+ ]
293
+
294
+ for url in endpoints:
295
+ try:
296
+ print(f"Trying endpoint: {url}")
297
+ print(f"Payload: {json.dumps(payload, indent=2)}")
 
 
 
 
 
 
 
 
298
 
299
+ with requests.post(
300
+ url,
301
+ json=payload,
302
+ headers=get_headers(),
303
+ stream=True,
304
+ timeout=120
305
+ ) as response:
306
+
307
+ print(f"Response status: {response.status_code}")
308
+
309
+ if response.status_code == 404:
310
+ continue
311
+
312
+ if response.status_code != 200:
313
+ error_text = response.text
314
+ print(f"Error response: {error_text}")
315
+ return {
316
+ "error": {
317
+ "message": f"Onyx API error {response.status_code}: {error_text}",
318
+ "type": "api_error",
319
+ "code": response.status_code
320
+ }
321
+ }, response.status_code
322
+
323
+ buffer = ""
324
+ for chunk in response.iter_content(chunk_size=None, decode_unicode=True):
325
+ if chunk:
326
+ buffer += chunk
327
 
328
+ while '\n' in buffer:
329
+ line, buffer = buffer.split('\n', 1)
330
+ line = line.strip()
331
+
332
+ if not line:
333
+ continue
334
+
335
+ if line.startswith('data: '):
336
+ line = line[6:]
337
+
338
+ if line == '[DONE]':
339
+ continue
340
+
341
+ content, msg_id = parse_onyx_stream_chunk(line)
342
+ if msg_id:
343
+ last_message_id = msg_id
344
+ if content:
345
+ full_content += content
346
+
347
+ if buffer.strip():
348
+ if buffer.strip().startswith('data: '):
349
+ buffer = buffer.strip()[6:]
350
+ content, msg_id = parse_onyx_stream_chunk(buffer.strip())
351
+ if msg_id:
352
+ last_message_id = msg_id
353
+ if content:
354
+ full_content += content
355
+
356
+ # Update session
357
+ if session_key in chat_sessions_cache and last_message_id:
358
+ chat_sessions_cache[session_key]['parent_message_id'] = last_message_id
359
+
360
+ break
361
+
362
+ except requests.exceptions.RequestException as e:
363
+ print(f"Request error: {e}")
364
+ continue
365
 
366
+ if not full_content:
367
  return {
368
  "error": {
369
+ "message": "No response from Onyx API",
370
  "type": "api_error",
371
  "code": 500
372
  }
373
  }, 500
374
 
 
375
  response_data = {
376
  "id": f"chatcmpl-{uuid.uuid4().hex[:24]}",
377
  "object": "chat.completion",
 
386
  "finish_reason": "stop"
387
  }],
388
  "usage": {
389
+ "prompt_tokens": -1,
390
  "completion_tokens": -1,
391
  "total_tokens": -1
392
  }
 
403
 
404
  try:
405
  data = request.json
406
+ print(f"Received request: {json.dumps(data, indent=2)}")
407
  except Exception as e:
408
+ return jsonify({"error": {"message": f"Invalid JSON: {e}", "type": "invalid_request_error"}}), 400
409
 
410
  # Extract parameters
411
  model = data.get('model', 'openai/gpt-4')
 
413
  stream = data.get('stream', False)
414
  temperature = data.get('temperature', 0.7)
415
 
416
+ # Use a unique session key per conversation or default
417
+ session_key = data.get('session_id', 'default')
418
+
419
  if not messages:
420
  return jsonify({
421
  "error": {
 
426
 
427
  # Parse model string
428
  model_provider, model_version = parse_model_string(model)
429
+ print(f"Model provider: {model_provider}, version: {model_version}")
430
+
431
+ # Get or create chat session
432
+ session_info = get_or_create_session(session_key)
433
+
434
+ if not session_info:
435
+ return jsonify({
436
+ "error": {
437
+ "message": "Failed to create chat session with Onyx API",
438
+ "type": "api_error"
439
+ }
440
+ }), 500
441
 
442
  # Build Onyx payload
443
  payload = build_onyx_payload(
444
  messages=messages,
445
  model_provider=model_provider,
446
  model_version=model_version,
447
+ temperature=temperature,
448
+ chat_session_id=session_info['session_id'],
449
+ parent_message_id=session_info.get('parent_message_id')
450
  )
451
 
 
 
 
 
 
452
  if stream:
 
453
  return Response(
454
+ stream_onyx_response(payload, model, session_key),
455
  content_type='text/event-stream',
456
  headers={
457
  'Cache-Control': 'no-cache',
 
460
  }
461
  )
462
  else:
463
+ response_data, status_code = collect_full_response(payload, model, session_key)
 
464
  return jsonify(response_data), status_code
465
 
466
 
467
+ @app.route('/v1/sessions', methods=['POST'])
468
+ def create_new_session():
469
+ """Create a new chat session"""
470
+ data = request.json or {}
471
+ persona_id = data.get('persona_id', 0)
472
+
473
+ session_id = create_chat_session(persona_id)
474
+
475
+ if session_id:
476
+ session_key = str(uuid.uuid4())
477
+ chat_sessions_cache[session_key] = {
478
+ "session_id": session_id,
479
+ "parent_message_id": None
480
+ }
481
+ return jsonify({
482
+ "session_key": session_key,
483
+ "chat_session_id": session_id
484
+ })
485
+ else:
486
+ return jsonify({"error": "Failed to create session"}), 500
487
+
488
+
489
+ @app.route('/v1/sessions', methods=['DELETE'])
490
+ def clear_sessions():
491
+ """Clear all cached sessions"""
492
+ chat_sessions_cache.clear()
493
+ return jsonify({"status": "cleared"})
494
+
495
+
496
  @app.route('/v1/models', methods=['GET'])
497
  def list_models():
498
  """OpenAI-compatible models listing endpoint"""
499
  models = [
500
  {"id": "openai/gpt-4", "object": "model", "owned_by": "openai"},
501
  {"id": "openai/gpt-4-turbo", "object": "model", "owned_by": "openai"},
502
+ {"id": "openai/gpt-4o", "object": "model", "owned_by": "openai"},
503
  {"id": "openai/gpt-3.5-turbo", "object": "model", "owned_by": "openai"},
504
+ {"id": "anthropic/claude-3-opus-20240229", "object": "model", "owned_by": "anthropic"},
505
+ {"id": "anthropic/claude-3-sonnet-20240229", "object": "model", "owned_by": "anthropic"},
506
+ {"id": "anthropic/claude-3-haiku-20240307", "object": "model", "owned_by": "anthropic"},
507
  ]
508
 
509
  return jsonify({
 
525
  @app.route('/health', methods=['GET'])
526
  def health_check():
527
  """Health check endpoint"""
528
+ return jsonify({
529
+ "status": "healthy",
530
+ "timestamp": int(time.time()),
531
+ "active_sessions": len(chat_sessions_cache)
532
+ })
533
+
534
+
535
+ @app.route('/debug/test-onyx', methods=['GET'])
536
+ def test_onyx_connection():
537
+ """Test connection to Onyx API"""
538
+ results = {}
539
+
540
+ # Test creating a session
541
+ session_id = create_chat_session()
542
+ results['create_session'] = {
543
+ "success": session_id is not None,
544
+ "session_id": session_id
545
+ }
546
+
547
+ return jsonify(results)
548
 
549
 
550
  @app.route('/', methods=['GET'])
 
556
  "endpoints": {
557
  "chat_completions": "/v1/chat/completions",
558
  "models": "/v1/models",
559
+ "sessions": "/v1/sessions",
560
+ "health": "/health",
561
+ "debug": "/debug/test-onyx"
562
+ },
563
+ "model_format": "provider/model_version (e.g., openai/gpt-4)"
564
  })
565
 
566
 
 
581
  def server_error(e):
582
  return jsonify({
583
  "error": {
584
+ "message": f"Internal server error: {str(e)}",
585
  "type": "server_error",
586
  "code": 500
587
  }
588
  }), 500
589
 
590
 
 
 
591
  if __name__ == '__main__':
592
+ print("="*60)
593
+ print("OpenAI-Compatible Onyx API Proxy")
594
+ print("="*60)
595
+ print(f"Onyx Base URL: {ONYX_BASE_URL}")
596
+ print(f"Token configured: {'Yes' if ONYX_API_TOKEN != '<your-token-here>' else 'No'}")
597
+ print("="*60)
598
 
599
  app.run(
600
  host='0.0.0.0',