Hiren122 commited on
Commit
d265db8
·
verified ·
1 Parent(s): fe4af11

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +1311 -440
app.py CHANGED
@@ -4,6 +4,10 @@ import json
4
  import uuid
5
  import time
6
  import os
 
 
 
 
7
 
8
  app = Flask(__name__)
9
 
@@ -14,6 +18,9 @@ ONYX_API_TOKEN = os.environ.get("ONYX_SECRET", "<your-token-here>")
14
  # Store chat sessions
15
  chat_sessions_cache = {}
16
 
 
 
 
17
 
18
  def get_headers():
19
  """Get authorization headers"""
@@ -26,18 +33,17 @@ def get_headers():
26
  def create_chat_session(persona_id=0):
27
  """Create a new chat session in Onyx"""
28
  url = f"{ONYX_BASE_URL}/api/chat/create-chat-session"
29
-
30
  payload = {
31
  "persona_id": persona_id,
32
  "description": "OpenAI Compatible API Session"
33
  }
34
-
35
  try:
36
  response = requests.post(url, json=payload, headers=get_headers(), timeout=30)
37
-
38
  if response.status_code == 200:
39
  data = response.json()
40
- # The response might have chat_session_id directly or nested
41
  session_id = data.get('chat_session_id') or data.get('id') or data
42
  print(f"Created chat session: {session_id}")
43
  return str(session_id)
@@ -64,7 +70,7 @@ def get_or_create_session(session_key="default", persona_id=0):
64
  def parse_model_string(model):
65
  """
66
  Parse model string in format 'provider/model_version'
67
- Examples:
68
  - 'openai/gpt-4' -> ('openai', 'gpt-4')
69
  - 'anthropic/claude-3-opus' -> ('anthropic', 'claude-3-opus')
70
  - 'gpt-4' -> ('openai', 'gpt-4')
@@ -80,148 +86,724 @@ def parse_model_string(model):
80
 
81
 
82
  # Known provider name mappings
83
- # Update these based on what's configured in Onyx Cloud admin panel
84
  PROVIDER_ALIASES = {
85
- "openai": "OpenAI", # Capitalized generally preferred
86
- "anthropic": "Anthropic", # Capitalized
87
  "google": "Google",
88
  "azure": "Azure",
89
  "bedrock": "Bedrock",
90
  "cohere": "Cohere",
91
  "mistral": "Mistral",
92
- # Add more aliases as needed
93
  }
94
 
95
 
96
  def normalize_provider_name(provider):
97
- """
98
- Normalize provider name to match Onyx configuration.
99
- Handles case sensitivity and common aliases.
100
- """
101
  provider_lower = provider.lower().strip()
102
  return PROVIDER_ALIASES.get(provider_lower, provider_lower)
103
 
104
 
105
- def build_onyx_payload(messages, model_provider, model_version, temperature, chat_session_id, parent_message_id=None, stream=True):
106
- """Convert OpenAI format to Onyx payload
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
107
 
108
  Args:
109
- messages: List of OpenAI format messages
110
- model_provider: LLM provider name (e.g., 'openai', 'anthropic')
111
- model_version: Model version (e.g., 'gpt-4', 'claude-3-opus-20240229')
112
- temperature: Temperature setting for generation
113
  chat_session_id: Onyx chat session ID
114
- parent_message_id: Optional parent message ID for threading
115
- stream: Whether to stream the response (default True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
116
  """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
117
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
118
  # Extract the last user message
119
  last_user_message = ""
120
- for msg in reversed(messages):
121
  if msg.get('role') == 'user':
122
  content = msg.get('content', '')
123
  if isinstance(content, list):
124
- text_parts = [p.get('text', '') for p in content if p.get('type') == 'text']
125
  last_user_message = ' '.join(text_parts)
126
  else:
127
  last_user_message = content
128
  break
129
-
130
  # Build system prompt from system messages
131
  system_prompt = ""
132
- for msg in messages:
133
  if msg.get('role') == 'system':
134
  content = msg.get('content', '')
135
  if isinstance(content, list):
136
- text_parts = [p.get('text', '') for p in content if p.get('type') == 'text']
137
  system_prompt += ' '.join(text_parts) + "\n"
138
  elif isinstance(content, str):
139
  system_prompt += content + "\n"
140
-
141
- # Prepend system prompt to message if exists
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
142
  full_message = last_user_message
143
- if system_prompt:
144
- full_message = f"[System: {system_prompt.strip()}]\n\n{last_user_message}"
145
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
146
  payload = {
147
  "message": full_message,
148
  "chat_session_id": chat_session_id,
149
  "parent_message_id": parent_message_id if parent_message_id else None,
150
- "stream": stream, # Now respects caller's preference
151
  "llm_override": {
152
  "model_provider": model_provider,
153
  "model_version": model_version,
154
  "temperature": temperature
155
  },
156
- "file_descriptors": [],
157
  "include_citations": False
158
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
159
 
160
- # Do NOT remove None values - Onyx API might expect explicit nulls
161
- # payload = {k: v for k, v in payload.items() if v is not None}
 
 
 
 
 
162
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
163
  return payload
164
 
165
 
 
 
166
  def parse_onyx_stream_chunk(chunk_text):
167
  """Parse a chunk from Onyx stream and extract the text content.
168
-
169
- New Onyx API uses packet-based format:
170
- - First packet: {"user_message_id": int, "reserved_assistant_message_id": int}
171
- - Content packets: {"ind": int, "obj": {"type": "message_delta", "content": "..."}}
172
- - Stop packet: {"ind": int, "obj": {"type": "stop"}}
173
-
174
  Returns:
175
  tuple: (content, message_id, packet_type)
176
  """
177
  if not chunk_text or not chunk_text.strip():
178
  return None, None, None
179
-
180
  try:
181
  data = json.loads(chunk_text)
182
-
183
  if not isinstance(data, dict):
184
  return None, None, None
185
-
186
-
187
-
188
  # Handle new packet-based format
189
  if 'obj' in data:
190
  obj = data['obj']
191
  packet_type = obj.get('type', '')
192
-
193
  if packet_type == 'message_delta':
194
- # This is the actual content!
195
  content = obj.get('content', '')
196
  return content, None, 'content'
197
-
198
  elif packet_type == 'message_start':
199
- # Contains final_documents, not content
200
  return None, None, 'message_start'
201
-
202
  elif packet_type == 'stop':
203
- # End of stream
204
  return None, None, 'stop'
205
-
206
  elif packet_type == 'error':
207
  error_msg = obj.get('message', obj.get('error', 'Unknown error'))
208
  return f"[Error: {error_msg}]", None, 'error'
209
-
210
  elif packet_type == 'citation_delta':
211
- # Citation info, not content
212
  return None, None, 'citation'
213
-
214
  elif packet_type in ['reasoning_start', 'reasoning_delta', 'reasoning_done']:
215
- # Reasoning packets
216
  return None, None, 'reasoning'
217
-
218
  else:
219
- # Other packet types (search, tools, etc.)
220
  return None, None, packet_type
221
-
222
- # FALLBACK: Old format support (for backward compatibility)
223
  message_id = data.get('message_id')
224
-
225
  if 'answer_piece' in data:
226
  return data['answer_piece'], message_id, 'legacy'
227
  elif 'text' in data:
@@ -230,42 +812,52 @@ def parse_onyx_stream_chunk(chunk_text):
230
  return data['content'], message_id, 'legacy'
231
  elif 'error' in data:
232
  return f"[Error: {data['error']}]", message_id, 'error'
233
-
234
  return None, None, None
235
-
236
  except json.JSONDecodeError:
237
- # Not JSON, might be raw text
238
  if chunk_text.strip() and not chunk_text.strip().startswith('{'):
239
  return chunk_text.strip(), None, 'raw'
240
-
241
  return None, None, None
242
 
243
 
244
- def generate_openai_stream_chunk(content, model, chunk_id, finish_reason=None):
245
- """Generate an OpenAI-compatible SSE chunk"""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
246
  chunk = {
247
  "id": chunk_id,
248
  "object": "chat.completion.chunk",
249
  "created": int(time.time()),
250
  "model": model,
251
- "choices": [{
252
- "index": 0,
253
- "delta": {"content": content} if content else {},
254
- "finish_reason": finish_reason
255
- }]
256
  }
257
  return f"data: {json.dumps(chunk)}\n\n"
258
 
259
 
260
- def stream_onyx_response(payload, model, session_key):
261
  """Stream response from Onyx API in OpenAI SSE format"""
262
 
263
- final_message_id = None # ✅ STEP 3.1
264
  chunk_id = f"chatcmpl-{uuid.uuid4().hex[:24]}"
 
265
 
266
  endpoints = [
267
- f"{ONYX_BASE_URL}/api/chat/send-chat-message", # Primary (new)
268
- f"{ONYX_BASE_URL}/api/chat/send-message", # Fallback
269
  ]
270
 
271
  # Initial assistant role chunk
@@ -280,7 +872,10 @@ def stream_onyx_response(payload, model, session_key):
280
  "finish_reason": None
281
  }]
282
  }
283
- yield f"data: {json.dumps(initial_chunk)}\n\n"
 
 
 
284
 
285
  last_message_id = None
286
 
@@ -288,14 +883,7 @@ def stream_onyx_response(payload, model, session_key):
288
  try:
289
  print(f"Trying endpoint: {url}")
290
 
291
- with requests.post(
292
- url,
293
- json=payload,
294
- headers=get_headers(),
295
- stream=True,
296
- timeout=120
297
- ) as response:
298
-
299
  print(f"Response status: {response.status_code}")
300
 
301
  if response.status_code != 200:
@@ -324,58 +912,86 @@ def stream_onyx_response(payload, model, session_key):
324
  if msg_id:
325
  last_message_id = msg_id
326
 
327
- # Yield content for all valid packet types
328
  if content and packet_type in ['content', 'legacy', 'raw']:
329
- yield generate_openai_stream_chunk(content, model, chunk_id)
 
 
 
 
330
 
331
  if packet_type == "stop":
332
- final_message_id = last_message_id # ✅ STEP 3.2
333
  break
334
 
335
- break # success → exit endpoint loop
336
 
337
  except Exception as e:
338
  print("Stream error:", e)
339
  continue
340
 
341
- # STEP 3.3 — store FINAL assistant message id
342
  if final_message_id and session_key in chat_sessions_cache:
343
  chat_sessions_cache[session_key]["parent_message_id"] = final_message_id
344
 
345
- yield generate_openai_stream_chunk("", model, chunk_id, "stop")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
346
  yield "data: [DONE]\n\n"
347
 
348
- def collect_full_response(payload, model, session_key):
 
349
  """Collect full streaming response and return as complete OpenAI response"""
350
  full_content = ""
351
  last_message_id = None
352
-
353
  endpoints = [
354
- f"{ONYX_BASE_URL}/api/chat/send-chat-message", # Primary (new)
355
- f"{ONYX_BASE_URL}/api/chat/send-message", # Fallback (deprecated)
356
  ]
357
-
358
  for url in endpoints:
359
  try:
360
  print(f"Trying endpoint: {url}")
361
- print(f"Payload: {json.dumps(payload, indent=2)}")
362
-
363
- # Check if we are requesting a stream or simple JSON
364
  is_streaming_request = payload.get('stream', False)
365
-
366
- with requests.post(
367
- url,
368
- json=payload,
369
- headers=get_headers(),
370
- stream=is_streaming_request, # Use stream=True only if requested
371
- timeout=120
372
- ) as response:
373
-
374
  print(f"Response status: {response.status_code}")
375
-
376
  if response.status_code == 404:
377
  continue
378
-
379
  if response.status_code != 200:
380
  error_text = response.text
381
  print(f"Error response: {error_text}")
@@ -386,47 +1002,32 @@ def collect_full_response(payload, model, session_key):
386
  "code": response.status_code
387
  }
388
  }, response.status_code
389
-
390
- # CASE 1: Non-Streaming Response (JSON)
391
  if not is_streaming_request:
392
  try:
393
  data = response.json()
394
- # Extract content - logs show 'answer' field is used
395
  full_content = data.get('answer') or data.get('message') or data.get('content') or ""
396
-
397
- # Extract Message ID if present to update session
398
  msg_id = data.get('message_id')
399
  if session_key in chat_sessions_cache and msg_id:
400
  chat_sessions_cache[session_key]['parent_message_id'] = msg_id
401
-
402
- break # Success
403
-
404
  except json.JSONDecodeError:
405
- print("Failed to decode JSON response")
406
- # Fallback to text if JSON fails
407
  full_content = response.text
408
  break
409
-
410
- # CASE 2: Streaming Response (Original Logic)
411
  else:
412
  buffer = ""
413
  for chunk in response.iter_content(chunk_size=None, decode_unicode=True):
414
  if chunk:
415
  buffer += chunk
416
-
417
  while '\n' in buffer:
418
  line, buffer = buffer.split('\n', 1)
419
  line = line.strip()
420
-
421
  if not line:
422
  continue
423
-
424
  if line.startswith('data: '):
425
  line = line[6:]
426
-
427
  if line == '[DONE]':
428
  continue
429
-
430
  content, msg_id, packet_type = parse_onyx_stream_chunk(line)
431
  if msg_id:
432
  last_message_id = msg_id
@@ -434,7 +1035,7 @@ def collect_full_response(payload, model, session_key):
434
  break
435
  if content and packet_type in ['content', 'legacy', 'raw', 'error']:
436
  full_content += content
437
-
438
  if buffer.strip():
439
  if buffer.strip().startswith('data: '):
440
  buffer = buffer.strip()[6:]
@@ -443,27 +1044,65 @@ def collect_full_response(payload, model, session_key):
443
  last_message_id = msg_id
444
  if content and packet_type in ['content', 'legacy', 'raw', 'error']:
445
  full_content += content
446
-
447
- # Update session (only if we got a valid message ID)
448
  if session_key in chat_sessions_cache and last_message_id:
449
  chat_sessions_cache[session_key]['parent_message_id'] = last_message_id
450
-
451
  break
452
 
453
-
454
  except requests.exceptions.RequestException as e:
455
  print(f"Request error: {e}")
456
  continue
457
-
458
  if not full_content:
459
  return {
460
  "error": {
461
- "message": f"No response from Onyx API. Debug Chunks: {debug_captured_chunks}",
462
  "type": "api_error",
463
  "code": 500
464
  }
465
  }, 500
466
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
467
  response_data = {
468
  "id": f"chatcmpl-{uuid.uuid4().hex[:24]}",
469
  "object": "chat.completion",
@@ -483,7 +1122,283 @@ def collect_full_response(payload, model, session_key):
483
  "total_tokens": -1
484
  }
485
  }
486
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
487
  return response_data, 200
488
 
489
 
@@ -491,23 +1406,30 @@ def collect_full_response(payload, model, session_key):
491
 
492
  @app.route('/v1/chat/completions', methods=['POST'])
493
  def chat_completions():
494
- """OpenAI-compatible chat completions endpoint"""
495
-
496
  try:
497
  data = request.json
498
- print(f"Received request: {json.dumps(data, indent=2)}")
499
  except Exception as e:
500
  return jsonify({"error": {"message": f"Invalid JSON: {e}", "type": "invalid_request_error"}}), 400
501
-
502
  # Extract parameters
503
  model = data.get('model', 'openai/gpt-4')
504
  messages = data.get('messages', [])
505
  stream = data.get('stream', False)
506
  temperature = data.get('temperature', 0.7)
507
-
508
- # Use a unique session key per conversation or default
 
 
 
 
 
 
 
509
  session_key = data.get('session_id', 'default')
510
-
511
  if not messages:
512
  return jsonify({
513
  "error": {
@@ -515,15 +1437,15 @@ def chat_completions():
515
  "type": "invalid_request_error"
516
  }
517
  }), 400
518
-
519
  # Parse model string and normalize provider name
520
  model_provider, model_version = parse_model_string(model)
521
  model_provider = normalize_provider_name(model_provider)
522
  print(f"Model provider: {model_provider}, version: {model_version}")
523
-
524
  # Get or create chat session
525
  session_info = get_or_create_session(session_key)
526
-
527
  if not session_info:
528
  return jsonify({
529
  "error": {
@@ -531,7 +1453,7 @@ def chat_completions():
531
  "type": "api_error"
532
  }
533
  }), 500
534
-
535
  # Build Onyx payload
536
  payload = build_onyx_payload(
537
  messages=messages,
@@ -540,12 +1462,13 @@ def chat_completions():
540
  temperature=temperature,
541
  chat_session_id=session_info['session_id'],
542
  parent_message_id=session_info.get('parent_message_id'),
543
- stream=stream # Pass client's streaming preference
 
544
  )
545
-
546
  if stream:
547
  return Response(
548
- stream_onyx_response(payload, model, session_key),
549
  content_type='text/event-stream',
550
  headers={
551
  'Cache-Control': 'no-cache',
@@ -554,7 +1477,7 @@ def chat_completions():
554
  }
555
  )
556
  else:
557
- response_data, status_code = collect_full_response(payload, model, session_key)
558
  return jsonify(response_data), status_code
559
 
560
 
@@ -563,9 +1486,9 @@ def create_new_session():
563
  """Create a new chat session"""
564
  data = request.json or {}
565
  persona_id = data.get('persona_id', 0)
566
-
567
  session_id = create_chat_session(persona_id)
568
-
569
  if session_id:
570
  session_key = str(uuid.uuid4())
571
  chat_sessions_cache[session_key] = {
@@ -594,12 +1517,11 @@ def list_models():
594
  {"id": "openai/gpt-5.2", "object": "model", "owned_by": "openai"},
595
  {"id": "google/gemini-3-pro-preview", "object": "model", "owned_by": "openai"},
596
  {"id": "openai/gpt-4o", "object": "model", "owned_by": "openai"},
597
-
598
  {"id": "anthropic/claude-opus-4-6", "object": "model", "owned_by": "anthropic"},
599
  {"id": "anthropic/claude-sonnet-4.5", "object": "model", "owned_by": "anthropic"},
600
  {"id": "anthropic/claude-haiku-4-5", "object": "model", "owned_by": "anthropic"},
601
  ]
602
-
603
  return jsonify({
604
  "object": "list",
605
  "data": models
@@ -620,7 +1542,7 @@ def get_model(model_id):
620
  def health_check():
621
  """Health check endpoint"""
622
  return jsonify({
623
- "status": "healthy",
624
  "timestamp": int(time.time()),
625
  "active_sessions": len(chat_sessions_cache)
626
  })
@@ -630,276 +1552,240 @@ def health_check():
630
  def test_onyx_connection():
631
  """Test connection to Onyx API"""
632
  results = {}
633
-
634
- # Test creating a session
635
  session_id = create_chat_session()
636
  results['create_session'] = {
637
  "success": session_id is not None,
638
  "session_id": session_id
639
  }
640
-
641
  return jsonify(results)
642
 
643
 
644
- # ============== Anthropic Messages API ==============
645
 
646
- def build_anthropic_payload_from_messages(messages, system_prompt, model_provider, model_version, temperature, chat_session_id, parent_message_id=None, stream=True, tools=None):
647
- """Convert Anthropic Messages API format to Onyx payload"""
 
648
 
649
- # Extract the last user message
650
- last_user_message = ""
651
- for msg in reversed(messages):
652
- if msg.get('role') == 'user':
653
- content = msg.get('content', '')
654
- if isinstance(content, list):
655
- text_parts = [p.get('text', '') for p in content if p.get('type') == 'text']
656
- last_user_message = ' '.join(text_parts)
657
- elif isinstance(content, str):
658
- last_user_message = content
659
- break
660
 
661
- # Build full message with system prompt
662
- full_message = last_user_message
663
- if system_prompt:
664
- if isinstance(system_prompt, list):
665
- sys_text = ' '.join([s.get('text', '') for s in system_prompt if s.get('type') == 'text'])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
666
  else:
667
- sys_text = system_prompt
668
- full_message = f"[System: {sys_text}]\n\n{last_user_message}"
669
-
670
- # If tools are provided, inject them into the prompt context
671
- if tools:
672
- tools_desc = "\n\n[Available Tools:\n"
673
- for tool in tools:
674
- name = tool.get('name', '')
675
- desc = tool.get('description', '')
676
- input_schema = json.dumps(tool.get('input_schema', {}), indent=2)
677
- tools_desc += f"- {name}: {desc}\n Input Schema: {input_schema}\n"
678
- tools_desc += "]\n\n"
679
- full_message = tools_desc + full_message
680
-
681
- payload = {
682
- "message": full_message,
683
- "chat_session_id": chat_session_id,
684
- "parent_message_id": parent_message_id if parent_message_id else None,
685
- "stream": stream,
686
- "llm_override": {
687
- "model_provider": model_provider,
688
- "model_version": model_version,
689
- "temperature": temperature
690
- },
691
- "file_descriptors": [],
692
- "include_citations": False
693
- }
694
-
695
- return payload
696
-
697
-
698
- def generate_anthropic_stream_events(payload, model, session_key):
699
- """Stream response from Onyx in Anthropic Messages SSE format"""
700
-
701
- msg_id = f"msg_{uuid.uuid4().hex[:24]}"
702
- final_message_id = None
703
-
704
- endpoints = [
705
- f"{ONYX_BASE_URL}/api/chat/send-chat-message",
706
- f"{ONYX_BASE_URL}/api/chat/send-message",
707
- ]
708
-
709
- # message_start event
710
- msg_start = {
711
- "type": "message_start",
712
- "message": {
713
- "id": msg_id,
714
- "type": "message",
715
- "role": "assistant",
716
- "content": [],
717
- "model": model,
718
- "stop_reason": None,
719
- "stop_sequence": None,
720
- "usage": {"input_tokens": 0, "output_tokens": 0}
721
  }
722
- }
723
- yield f"event: message_start\ndata: {json.dumps(msg_start)}\n\n"
724
-
725
- # content_block_start
726
- yield f"event: content_block_start\ndata: {json.dumps({'type': 'content_block_start', 'index': 0, 'content_block': {'type': 'text', 'text': ''}})}\n\n"
727
-
728
- # Ping
729
- yield f"event: ping\ndata: {json.dumps({'type': 'ping'})}\n\n"
730
-
731
- last_msg_id = None
732
-
733
- for url in endpoints:
734
- try:
735
- with requests.post(url, json=payload, headers=get_headers(), stream=True, timeout=120) as response:
736
- if response.status_code != 200:
737
- continue
738
 
739
- buffer = ""
740
- for chunk in response.iter_content(decode_unicode=True):
741
- if not chunk:
742
- continue
743
- buffer += chunk
744
-
745
- while '\n' in buffer:
746
- line, buffer = buffer.split('\n', 1)
747
- line = line.strip()
748
-
749
- if not line or line == "[DONE]":
750
- continue
751
- if line.startswith("data: "):
752
- line = line[6:]
753
-
754
- content, m_id, packet_type = parse_onyx_stream_chunk(line)
755
-
756
- if m_id:
757
- last_msg_id = m_id
758
-
759
- if content and packet_type in ['content', 'legacy', 'raw']:
760
- delta_event = {
761
- "type": "content_block_delta",
762
- "index": 0,
763
- "delta": {"type": "text_delta", "text": content}
764
- }
765
- yield f"event: content_block_delta\ndata: {json.dumps(delta_event)}\n\n"
766
-
767
- if packet_type == "stop":
768
- final_message_id = last_msg_id
769
- break
770
 
771
- break
772
- except Exception as e:
773
- print(f"Anthropic stream error: {e}")
774
- continue
775
-
776
- # Update session
777
- if final_message_id and session_key in chat_sessions_cache:
778
- chat_sessions_cache[session_key]["parent_message_id"] = final_message_id
779
-
780
- # content_block_stop
781
- yield f"event: content_block_stop\ndata: {json.dumps({'type': 'content_block_stop', 'index': 0})}\n\n"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
782
 
783
- # message_delta (stop reason)
784
- msg_delta = {
785
- "type": "message_delta",
786
- "delta": {"stop_reason": "end_turn", "stop_sequence": None},
787
- "usage": {"output_tokens": 0}
788
- }
789
- yield f"event: message_delta\ndata: {json.dumps(msg_delta)}\n\n"
790
 
791
- # message_stop
792
- yield f"event: message_stop\ndata: {json.dumps({'type': 'message_stop'})}\n\n"
 
 
793
 
794
 
795
- def collect_anthropic_full_response(payload, model, session_key):
796
- """Collect full response and return in Anthropic Messages format"""
 
 
 
797
 
798
- full_content = ""
799
- last_message_id = None
 
 
 
 
 
 
 
 
800
 
801
- endpoints = [
802
- f"{ONYX_BASE_URL}/api/chat/send-chat-message",
803
- f"{ONYX_BASE_URL}/api/chat/send-message",
804
- ]
 
 
 
 
 
 
 
 
 
805
 
806
- for url in endpoints:
807
- try:
808
- is_streaming_request = payload.get('stream', False)
809
-
810
- with requests.post(url, json=payload, headers=get_headers(), stream=is_streaming_request, timeout=120) as response:
811
- if response.status_code == 404:
812
- continue
813
-
814
- if response.status_code != 200:
815
- return {
816
- "type": "error",
817
- "error": {
818
- "type": "api_error",
819
- "message": f"Onyx API error {response.status_code}: {response.text}"
820
- }
821
- }, response.status_code
822
-
823
- if not is_streaming_request:
824
- try:
825
- data = response.json()
826
- full_content = data.get('answer') or data.get('message') or data.get('content') or ""
827
- msg_id = data.get('message_id')
828
- if session_key in chat_sessions_cache and msg_id:
829
- chat_sessions_cache[session_key]['parent_message_id'] = msg_id
830
- break
831
- except json.JSONDecodeError:
832
- full_content = response.text
833
- break
834
- else:
835
- buffer = ""
836
- for chunk in response.iter_content(chunk_size=None, decode_unicode=True):
837
- if chunk:
838
- buffer += chunk
839
- while '\n' in buffer:
840
- line, buffer = buffer.split('\n', 1)
841
- line = line.strip()
842
- if not line:
843
- continue
844
- if line.startswith('data: '):
845
- line = line[6:]
846
- if line == '[DONE]':
847
- continue
848
- content, msg_id, packet_type = parse_onyx_stream_chunk(line)
849
- if msg_id:
850
- last_message_id = msg_id
851
- if packet_type == 'stop':
852
- break
853
- if content and packet_type in ['content', 'legacy', 'raw', 'error']:
854
- full_content += content
855
-
856
- if session_key in chat_sessions_cache and last_message_id:
857
- chat_sessions_cache[session_key]['parent_message_id'] = last_message_id
858
- break
859
-
860
- except requests.exceptions.RequestException as e:
861
- print(f"Anthropic request error: {e}")
862
- continue
863
 
864
- if not full_content:
865
- return {
866
- "type": "error",
867
- "error": {
868
- "type": "api_error",
869
- "message": "No response from Onyx API"
870
- }
871
- }, 500
872
 
873
- response_data = {
874
- "id": f"msg_{uuid.uuid4().hex[:24]}",
875
- "type": "message",
876
- "role": "assistant",
877
- "content": [{"type": "text", "text": full_content}],
878
- "model": model,
879
- "stop_reason": "end_turn",
880
- "stop_sequence": None,
881
- "usage": {
882
- "input_tokens": 0,
883
- "output_tokens": 0
884
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
885
  }
 
886
 
887
- return response_data, 200
 
888
 
889
 
 
 
890
  @app.route('/v1/messages', methods=['POST'])
891
  def anthropic_messages():
892
- """Anthropic Messages API compatible endpoint — used by Claude Code"""
893
-
894
  try:
895
  data = request.json
896
- print(f"[Anthropic] Received request: {json.dumps(data, indent=2)[:500]}")
897
  except Exception as e:
898
  return jsonify({
899
  "type": "error",
900
  "error": {"type": "invalid_request_error", "message": f"Invalid JSON: {e}"}
901
  }), 400
902
-
903
  # Extract Anthropic parameters
904
  model = data.get('model', 'claude-opus-4-6')
905
  messages = data.get('messages', [])
@@ -908,26 +1794,28 @@ def anthropic_messages():
908
  temperature = data.get('temperature', 0.7)
909
  max_tokens = data.get('max_tokens', 4096)
910
  tools = data.get('tools', None)
911
-
 
912
  session_key = f"anthropic_{model}"
913
-
914
  if not messages:
915
  return jsonify({
916
  "type": "error",
917
  "error": {"type": "invalid_request_error", "message": "messages is required"}
918
  }), 400
919
-
920
- # Parse model — Anthropic sends bare model names like 'claude-opus-4-6'
921
- # We need to add 'anthropic/' prefix if not present
922
  if '/' not in model:
923
  full_model = f"anthropic/{model}"
924
  else:
925
  full_model = model
926
-
927
  model_provider, model_version = parse_model_string(full_model)
928
  model_provider = normalize_provider_name(model_provider)
929
  print(f"[Anthropic] Provider: {model_provider}, Version: {model_version}")
930
-
 
 
931
  # Get or create session
932
  session_info = get_or_create_session(session_key)
933
  if not session_info:
@@ -935,7 +1823,7 @@ def anthropic_messages():
935
  "type": "error",
936
  "error": {"type": "api_error", "message": "Failed to create chat session"}
937
  }), 500
938
-
939
  # Build Onyx payload
940
  payload = build_anthropic_payload_from_messages(
941
  messages=messages,
@@ -948,10 +1836,10 @@ def anthropic_messages():
948
  stream=stream,
949
  tools=tools
950
  )
951
-
952
  if stream:
953
  return Response(
954
- generate_anthropic_stream_events(payload, model, session_key),
955
  content_type='text/event-stream',
956
  headers={
957
  'Cache-Control': 'no-cache',
@@ -960,7 +1848,7 @@ def anthropic_messages():
960
  }
961
  )
962
  else:
963
- response_data, status_code = collect_anthropic_full_response(payload, model, session_key)
964
  return jsonify(response_data), status_code
965
 
966
 
@@ -969,10 +1857,25 @@ def root():
969
  """Root endpoint with API info"""
970
  return jsonify({
971
  "name": "OpenAI + Anthropic Compatible Onyx API Proxy",
972
- "version": "2.0.0",
 
 
 
 
 
 
 
 
 
973
  "endpoints": {
974
- "chat_completions": "/v1/chat/completions (OpenAI format)",
975
- "messages": "/v1/messages (Anthropic format)",
 
 
 
 
 
 
976
  "models": "/v1/models",
977
  "sessions": "/v1/sessions",
978
  "health": "/health",
@@ -982,39 +1885,6 @@ def root():
982
  })
983
 
984
 
985
- @app.route('/debug/personas', methods=['GET'])
986
- def list_personas():
987
- """List all available personas (agents) from Onyx API"""
988
- try:
989
- # Use configured token (from global var)
990
- api_token = ONYX_API_TOKEN
991
-
992
- # Handle dummy keys
993
- if "dummy-key" in api_token or "<your-token-here>" in api_token:
994
- return jsonify({
995
- "error": "Cannot fetch personas with dummy key. Set ONYX_API_TOKEN env var on server."
996
- }), 400
997
-
998
- headers = {
999
- "Authorization": f"Bearer {api_token}",
1000
- "Content-Type": "application/json"
1001
- }
1002
-
1003
- # Call Onyx API directly
1004
- resp = requests.get(f"{ONYX_BASE_URL}/api/persona", headers=headers, timeout=10)
1005
-
1006
- if resp.status_code == 200:
1007
- return jsonify(resp.json())
1008
- else:
1009
- return jsonify({
1010
- "error": f"Onyx API Error: {resp.status_code}",
1011
- "body": resp.text
1012
- }), resp.status_code
1013
-
1014
- except Exception as e:
1015
- return jsonify({"error": str(e)}), 500
1016
-
1017
-
1018
  # ============== Error Handlers ==============
1019
 
1020
  @app.errorhandler(404)
@@ -1040,13 +1910,14 @@ def server_error(e):
1040
 
1041
 
1042
  if __name__ == '__main__':
1043
- print("="*60)
1044
- print("OpenAI-Compatible Onyx API Proxy")
1045
- print("="*60)
 
1046
  print(f"Onyx Base URL: {ONYX_BASE_URL}")
1047
  print(f"Token configured: {'Yes' if ONYX_API_TOKEN != '<your-token-here>' else 'No'}")
1048
- print("="*60)
1049
-
1050
  app.run(
1051
  host='0.0.0.0',
1052
  port=7860,
 
4
  import uuid
5
  import time
6
  import os
7
+ import re
8
+ import base64
9
+ import tempfile
10
+ import mimetypes
11
 
12
  app = Flask(__name__)
13
 
 
18
  # Store chat sessions
19
  chat_sessions_cache = {}
20
 
21
+ # Store uploaded files metadata
22
+ files_cache = {}
23
+
24
 
25
  def get_headers():
26
  """Get authorization headers"""
 
33
  def create_chat_session(persona_id=0):
34
  """Create a new chat session in Onyx"""
35
  url = f"{ONYX_BASE_URL}/api/chat/create-chat-session"
36
+
37
  payload = {
38
  "persona_id": persona_id,
39
  "description": "OpenAI Compatible API Session"
40
  }
41
+
42
  try:
43
  response = requests.post(url, json=payload, headers=get_headers(), timeout=30)
44
+
45
  if response.status_code == 200:
46
  data = response.json()
 
47
  session_id = data.get('chat_session_id') or data.get('id') or data
48
  print(f"Created chat session: {session_id}")
49
  return str(session_id)
 
70
  def parse_model_string(model):
71
  """
72
  Parse model string in format 'provider/model_version'
73
+ Examples:
74
  - 'openai/gpt-4' -> ('openai', 'gpt-4')
75
  - 'anthropic/claude-3-opus' -> ('anthropic', 'claude-3-opus')
76
  - 'gpt-4' -> ('openai', 'gpt-4')
 
86
 
87
 
88
  # Known provider name mappings
 
89
  PROVIDER_ALIASES = {
90
+ "openai": "OpenAI",
91
+ "anthropic": "Anthropic",
92
  "google": "Google",
93
  "azure": "Azure",
94
  "bedrock": "Bedrock",
95
  "cohere": "Cohere",
96
  "mistral": "Mistral",
 
97
  }
98
 
99
 
100
  def normalize_provider_name(provider):
101
+ """Normalize provider name to match Onyx configuration."""
 
 
 
102
  provider_lower = provider.lower().strip()
103
  return PROVIDER_ALIASES.get(provider_lower, provider_lower)
104
 
105
 
106
+ # ============== Image Upload Support ==============
107
+
108
+ def extract_images_from_messages(messages, msg_format="openai"):
109
+ """
110
+ Extract images from message content blocks.
111
+
112
+ OpenAI format:
113
+ {"type": "image_url", "image_url": {"url": "data:image/png;base64,..." or "https://..."}}
114
+
115
+ Anthropic format:
116
+ {"type": "image", "source": {"type": "base64", "media_type": "image/png", "data": "..."}}
117
+
118
+ Returns:
119
+ list of dicts: [{"base64_data": str, "media_type": str, "filename": str}, ...]
120
+ """
121
+ images = []
122
+
123
+ for msg in messages:
124
+ content = msg.get('content', '')
125
+ if not isinstance(content, list):
126
+ continue
127
+
128
+ for block in content:
129
+ if not isinstance(block, dict):
130
+ continue
131
+
132
+ # OpenAI format: image_url
133
+ if block.get('type') == 'image_url':
134
+ image_url_obj = block.get('image_url', {})
135
+ url = image_url_obj.get('url', '')
136
+
137
+ if url.startswith('data:'):
138
+ # data:image/png;base64,iVBOR...
139
+ try:
140
+ header, b64_data = url.split(',', 1)
141
+ media_type = header.split(':')[1].split(';')[0]
142
+ ext = media_type.split('/')[-1]
143
+ if ext == 'jpeg':
144
+ ext = 'jpg'
145
+ images.append({
146
+ "base64_data": b64_data,
147
+ "media_type": media_type,
148
+ "filename": f"image_{uuid.uuid4().hex[:8]}.{ext}"
149
+ })
150
+ except Exception as e:
151
+ print(f"Failed to parse data URL image: {e}")
152
+
153
+ elif url.startswith('http'):
154
+ # Download the image from URL
155
+ try:
156
+ resp = requests.get(url, timeout=30)
157
+ if resp.status_code == 200:
158
+ content_type = resp.headers.get('content-type', 'image/png')
159
+ media_type = content_type.split(';')[0].strip()
160
+ ext = media_type.split('/')[-1]
161
+ if ext == 'jpeg':
162
+ ext = 'jpg'
163
+ b64_data = base64.b64encode(resp.content).decode('utf-8')
164
+ images.append({
165
+ "base64_data": b64_data,
166
+ "media_type": media_type,
167
+ "filename": f"image_{uuid.uuid4().hex[:8]}.{ext}"
168
+ })
169
+ except Exception as e:
170
+ print(f"Failed to download image from URL: {e}")
171
+
172
+ # Anthropic format: image
173
+ elif block.get('type') == 'image':
174
+ source = block.get('source', {})
175
+ if source.get('type') == 'base64':
176
+ media_type = source.get('media_type', 'image/png')
177
+ b64_data = source.get('data', '')
178
+ ext = media_type.split('/')[-1]
179
+ if ext == 'jpeg':
180
+ ext = 'jpg'
181
+ images.append({
182
+ "base64_data": b64_data,
183
+ "media_type": media_type,
184
+ "filename": f"image_{uuid.uuid4().hex[:8]}.{ext}"
185
+ })
186
+ elif source.get('type') == 'url':
187
+ url = source.get('url', '')
188
+ try:
189
+ resp = requests.get(url, timeout=30)
190
+ if resp.status_code == 200:
191
+ content_type = resp.headers.get('content-type', 'image/png')
192
+ media_type = content_type.split(';')[0].strip()
193
+ ext = media_type.split('/')[-1]
194
+ if ext == 'jpeg':
195
+ ext = 'jpg'
196
+ b64_data = base64.b64encode(resp.content).decode('utf-8')
197
+ images.append({
198
+ "base64_data": b64_data,
199
+ "media_type": media_type,
200
+ "filename": f"image_{uuid.uuid4().hex[:8]}.{ext}"
201
+ })
202
+ except Exception as e:
203
+ print(f"Failed to download Anthropic image from URL: {e}")
204
+
205
+ return images
206
+
207
+
208
+ def upload_image_to_onyx(image_data, chat_session_id):
209
+ """
210
+ Upload an image to Onyx's file upload API.
211
+
212
+ Args:
213
+ image_data: dict with base64_data, media_type, filename
214
+ chat_session_id: The chat session to associate the file with
215
+
216
+ Returns:
217
+ dict: File descriptor from Onyx, or None on failure
218
+ """
219
+ # Decode base64 to binary
220
+ try:
221
+ file_bytes = base64.b64decode(image_data['base64_data'])
222
+ except Exception as e:
223
+ print(f"Failed to decode base64 image: {e}")
224
+ return None
225
+
226
+ # Try multiple Onyx file upload endpoints
227
+ upload_endpoints = [
228
+ f"{ONYX_BASE_URL}/api/chat/file",
229
+ f"{ONYX_BASE_URL}/api/chat/upload-file",
230
+ f"{ONYX_BASE_URL}/api/manage/upload-file",
231
+ ]
232
+
233
+ headers = {
234
+ "Authorization": f"Bearer {ONYX_API_TOKEN}",
235
+ }
236
+
237
+ for url in upload_endpoints:
238
+ try:
239
+ # Upload as multipart form data
240
+ files = {
241
+ 'file': (image_data['filename'], file_bytes, image_data['media_type'])
242
+ }
243
+ form_data = {
244
+ 'chat_session_id': chat_session_id
245
+ }
246
+
247
+ print(f"Uploading image to: {url}")
248
+ response = requests.post(
249
+ url,
250
+ files=files,
251
+ data=form_data,
252
+ headers=headers,
253
+ timeout=60
254
+ )
255
+
256
+ print(f"Upload response: {response.status_code}")
257
+
258
+ if response.status_code == 200:
259
+ result = response.json()
260
+ print(f"Upload success: {result}")
261
+
262
+ # Normalize the file descriptor format
263
+ file_descriptor = {
264
+ "id": result.get('file_id') or result.get('id') or result.get('document_id', ''),
265
+ "type": "image",
266
+ "name": image_data['filename'],
267
+ }
268
+
269
+ # If the response includes the full descriptor, use it
270
+ if isinstance(result, dict):
271
+ for key in ['file_id', 'id', 'document_id', 'name', 'type']:
272
+ if key in result and result[key]:
273
+ file_descriptor[key] = result[key]
274
+
275
+ return file_descriptor
276
+ elif response.status_code == 404:
277
+ continue # Try next endpoint
278
+ else:
279
+ print(f"Upload failed: {response.status_code} - {response.text}")
280
+ continue
281
+
282
+ except Exception as e:
283
+ print(f"Upload error at {url}: {e}")
284
+ continue
285
+
286
+ print("All upload endpoints failed")
287
+ return None
288
+
289
+
290
+ def upload_images_for_session(images, chat_session_id):
291
+ """
292
+ Upload multiple images and return file descriptors.
293
 
294
  Args:
295
+ images: list of image data dicts from extract_images_from_messages
 
 
 
296
  chat_session_id: Onyx chat session ID
297
+
298
+ Returns:
299
+ list: File descriptors for successfully uploaded images
300
+ """
301
+ file_descriptors = []
302
+
303
+ for img in images:
304
+ descriptor = upload_image_to_onyx(img, chat_session_id)
305
+ if descriptor:
306
+ file_descriptors.append(descriptor)
307
+ else:
308
+ print(f"Warning: Failed to upload image {img.get('filename', '?')}")
309
+
310
+ return file_descriptors
311
+
312
+
313
+ # ============== Tool Calling Support ==============
314
+
315
+ def build_tools_system_prompt(tools, tool_format="anthropic"):
316
+ """
317
+ Build a system prompt injection that teaches the model how to call tools.
318
+ The model will output structured JSON when it wants to call a tool.
319
+
320
+ Args:
321
+ tools: List of tool definitions (Anthropic or OpenAI format)
322
+ tool_format: "anthropic" or "openai"
323
+
324
+ Returns:
325
+ str: System prompt text with tool instructions
326
+ """
327
+ if not tools:
328
+ return ""
329
+
330
+ tools_list = []
331
+ for tool in tools:
332
+ if tool_format == "openai":
333
+ # OpenAI format: {"type": "function", "function": {"name": ..., "description": ..., "parameters": ...}}
334
+ func = tool.get('function', tool)
335
+ tools_list.append({
336
+ "name": func.get('name', ''),
337
+ "description": func.get('description', ''),
338
+ "parameters": func.get('parameters', func.get('input_schema', {}))
339
+ })
340
+ else:
341
+ # Anthropic format: {"name": ..., "description": ..., "input_schema": ...}
342
+ tools_list.append({
343
+ "name": tool.get('name', ''),
344
+ "description": tool.get('description', ''),
345
+ "parameters": tool.get('input_schema', tool.get('parameters', {}))
346
+ })
347
+
348
+ tools_json = json.dumps(tools_list, indent=2)
349
+
350
+ prompt = f"""You have access to the following tools/functions that you can call to help accomplish your task:
351
+ <available_tools>
352
+ {tools_json}
353
+ </available_tools>
354
+ CRITICAL INSTRUCTIONS FOR TOOL CALLING:
355
+ 1. When you need to use a tool, you MUST output a JSON block wrapped in <tool_call> tags.
356
+ 2. You can call MULTIPLE tools in a single response by outputting multiple <tool_call> blocks.
357
+ 3. The format for each tool call MUST be exactly:
358
+ <tool_call>
359
+ {{"name": "tool_name", "id": "unique_call_id", "input": {{"param1": "value1", "param2": "value2"}}}}
360
+ </tool_call>
361
+ 4. The "id" should be a unique identifier like "call_1", "call_2", etc.
362
+ 5. The "input" must match the parameter schema defined for the tool.
363
+ 6. You can mix text and tool calls in your response. Text before/after tool calls is fine.
364
+ 7. After a tool is executed, you will receive the result and can continue your work.
365
+ 8. You MUST use the tools when they are needed. Do NOT say you cannot access tools.
366
+ 9. ALL listed tools are available to you and you MUST use them when needed.
367
+ 10. Do NOT refuse to call a tool by saying you don't have access or capability.
368
+ IMPORTANT: You have FULL access to ALL the tools listed above. Use them freely whenever needed."""
369
+
370
+ return prompt
371
+
372
+
373
+ def parse_tool_calls_from_response(response_text):
374
+ """
375
+ Parse tool call blocks from the model's response text.
376
+
377
+ Looks for patterns like:
378
+ <tool_call>
379
+ {"name": "...", "id": "...", "input": {...}}
380
+ </tool_call>
381
+
382
+ Also handles common variations the model might output.
383
+
384
+ Returns:
385
+ tuple: (text_content, list_of_tool_calls)
386
+ - text_content: The text parts with tool_call blocks removed
387
+ - list_of_tool_calls: List of parsed tool call dicts
388
+ """
389
+ tool_calls = []
390
+ text_parts = []
391
+
392
+ # Pattern 1: <tool_call>...</tool_call>
393
+ pattern = r'<tool_call>\s*(.*?)\s*</tool_call>'
394
+ matches = list(re.finditer(pattern, response_text, re.DOTALL))
395
+
396
+ if matches:
397
+ last_end = 0
398
+ for match in matches:
399
+ # Capture text before this tool call
400
+ before_text = response_text[last_end:match.start()].strip()
401
+ if before_text:
402
+ text_parts.append(before_text)
403
+ last_end = match.end()
404
+
405
+ try:
406
+ tool_data = json.loads(match.group(1).strip())
407
+ tool_call = {
408
+ "name": tool_data.get("name", ""),
409
+ "id": tool_data.get("id", f"toolu_{uuid.uuid4().hex[:24]}"),
410
+ "input": tool_data.get("input", tool_data.get("arguments", tool_data.get("parameters", {})))
411
+ }
412
+ tool_calls.append(tool_call)
413
+ except json.JSONDecodeError as e:
414
+ print(f"Failed to parse tool call JSON: {e}")
415
+ text_parts.append(match.group(0)) # Keep unparseable block as text
416
+
417
+ # Capture text after last tool call
418
+ after_text = response_text[last_end:].strip()
419
+ if after_text:
420
+ text_parts.append(after_text)
421
+ else:
422
+ # Pattern 2: Try ```tool_call ... ``` or ```json with tool calling structure
423
+ code_pattern = r'```(?:tool_call|json)?\s*(\{[^`]*?"name"\s*:\s*"[^"]+?"[^`]*?\})\s*```'
424
+ code_matches = list(re.finditer(code_pattern, response_text, re.DOTALL))
425
+
426
+ if code_matches:
427
+ last_end = 0
428
+ for match in code_matches:
429
+ before_text = response_text[last_end:match.start()].strip()
430
+ if before_text:
431
+ text_parts.append(before_text)
432
+ last_end = match.end()
433
+
434
+ try:
435
+ tool_data = json.loads(match.group(1).strip())
436
+ if "name" in tool_data and ("input" in tool_data or "arguments" in tool_data or "parameters" in tool_data):
437
+ tool_call = {
438
+ "name": tool_data.get("name", ""),
439
+ "id": tool_data.get("id", f"toolu_{uuid.uuid4().hex[:24]}"),
440
+ "input": tool_data.get("input", tool_data.get("arguments", tool_data.get("parameters", {})))
441
+ }
442
+ tool_calls.append(tool_call)
443
+ else:
444
+ text_parts.append(match.group(0))
445
+ except json.JSONDecodeError:
446
+ text_parts.append(match.group(0))
447
+
448
+ after_text = response_text[last_end:].strip()
449
+ if after_text:
450
+ text_parts.append(after_text)
451
+ else:
452
+ # No tool calls found
453
+ text_parts.append(response_text)
454
+
455
+ clean_text = "\n\n".join(text_parts).strip()
456
+ return clean_text, tool_calls
457
+
458
+
459
+ def convert_tool_results_to_text(messages):
460
  """
461
+ Convert tool_result messages back into readable text for the model.
462
+
463
+ Anthropic sends tool results as:
464
+ {"role": "user", "content": [{"type": "tool_result", "tool_use_id": "...", "content": "..."}]}
465
+
466
+ OpenAI sends tool results as:
467
+ {"role": "tool", "tool_call_id": "...", "content": "..."}
468
+
469
+ We convert these into human-readable text the model can understand.
470
+ """
471
+ converted = []
472
+
473
+ for msg in messages:
474
+ role = msg.get('role', '')
475
+ content = msg.get('content', '')
476
+
477
+ # Handle Anthropic tool_result
478
+ if role == 'user' and isinstance(content, list):
479
+ text_parts = []
480
+ tool_results = []
481
+
482
+ for block in content:
483
+ if isinstance(block, dict):
484
+ if block.get('type') == 'tool_result':
485
+ tool_use_id = block.get('tool_use_id', '')
486
+ result_content = block.get('content', '')
487
+ is_error = block.get('is_error', False)
488
+
489
+ # Handle content that is a list of blocks
490
+ if isinstance(result_content, list):
491
+ result_text = ""
492
+ for rc in result_content:
493
+ if isinstance(rc, dict) and rc.get('type') == 'text':
494
+ result_text += rc.get('text', '')
495
+ elif isinstance(rc, str):
496
+ result_text += rc
497
+ result_content = result_text
498
+
499
+ status = "ERROR" if is_error else "SUCCESS"
500
+ tool_results.append(f"<tool_result id=\"{tool_use_id}\" status=\"{status}\">\n{result_content}\n</tool_result>")
501
+
502
+ elif block.get('type') == 'text':
503
+ text_parts.append(block.get('text', ''))
504
+ elif isinstance(block, str):
505
+ text_parts.append(block)
506
+
507
+ if tool_results:
508
+ combined = "\n\n".join(tool_results)
509
+ if text_parts:
510
+ combined += "\n\n" + "\n".join(text_parts)
511
+ converted.append({
512
+ "role": "user",
513
+ "content": combined
514
+ })
515
+ else:
516
+ converted.append(msg)
517
+
518
+ # Handle OpenAI tool role
519
+ elif role == 'tool':
520
+ tool_call_id = msg.get('tool_call_id', '')
521
+ converted.append({
522
+ "role": "user",
523
+ "content": f"<tool_result id=\"{tool_call_id}\" status=\"SUCCESS\">\n{content}\n</tool_result>"
524
+ })
525
+
526
+ # Handle assistant messages with tool_calls (OpenAI format)
527
+ elif role == 'assistant' and msg.get('tool_calls'):
528
+ # Reconstruct what the assistant said + the tool calls it made
529
+ assistant_text = ""
530
+ if isinstance(content, str) and content:
531
+ assistant_text = content + "\n\n"
532
+
533
+ for tc in msg.get('tool_calls', []):
534
+ func = tc.get('function', {})
535
+ tc_id = tc.get('id', '')
536
+ try:
537
+ args = json.loads(func.get('arguments', '{}'))
538
+ except (json.JSONDecodeError, TypeError):
539
+ args = func.get('arguments', {})
540
+
541
+ tool_call_json = json.dumps({
542
+ "name": func.get('name', ''),
543
+ "id": tc_id,
544
+ "input": args
545
+ })
546
+ assistant_text += f"<tool_call>\n{tool_call_json}\n</tool_call>\n\n"
547
+
548
+ converted.append({
549
+ "role": "assistant",
550
+ "content": assistant_text.strip()
551
+ })
552
+
553
+ # Handle assistant messages with tool_use content blocks (Anthropic format)
554
+ elif role == 'assistant' and isinstance(content, list):
555
+ assistant_text = ""
556
+ for block in content:
557
+ if isinstance(block, dict):
558
+ if block.get('type') == 'text':
559
+ assistant_text += block.get('text', '') + "\n\n"
560
+ elif block.get('type') == 'tool_use':
561
+ tool_call_json = json.dumps({
562
+ "name": block.get('name', ''),
563
+ "id": block.get('id', ''),
564
+ "input": block.get('input', {})
565
+ })
566
+ assistant_text += f"<tool_call>\n{tool_call_json}\n</tool_call>\n\n"
567
+
568
+ converted.append({
569
+ "role": "assistant",
570
+ "content": assistant_text.strip()
571
+ })
572
+
573
+ else:
574
+ converted.append(msg)
575
 
576
+ return converted
577
+
578
+
579
+ # ============== Payload Builders ==============
580
+
581
+ def build_onyx_payload(messages, model_provider, model_version, temperature, chat_session_id, parent_message_id=None, stream=True, tools=None):
582
+ """Convert OpenAI format to Onyx payload with tool + image support"""
583
+
584
+ # Extract images from messages BEFORE converting tool results
585
+ images = extract_images_from_messages(messages, msg_format="openai")
586
+ file_descriptors = []
587
+ if images:
588
+ print(f"Found {len(images)} image(s) in OpenAI messages, uploading...")
589
+ file_descriptors = upload_images_for_session(images, chat_session_id)
590
+ print(f"Successfully uploaded {len(file_descriptors)} image(s)")
591
+
592
+ # Convert tool results in message history to text format
593
+ processed_messages = convert_tool_results_to_text(messages)
594
+
595
  # Extract the last user message
596
  last_user_message = ""
597
+ for msg in reversed(processed_messages):
598
  if msg.get('role') == 'user':
599
  content = msg.get('content', '')
600
  if isinstance(content, list):
601
+ text_parts = [p.get('text', '') for p in content if isinstance(p, dict) and p.get('type') == 'text']
602
  last_user_message = ' '.join(text_parts)
603
  else:
604
  last_user_message = content
605
  break
606
+
607
  # Build system prompt from system messages
608
  system_prompt = ""
609
+ for msg in processed_messages:
610
  if msg.get('role') == 'system':
611
  content = msg.get('content', '')
612
  if isinstance(content, list):
613
+ text_parts = [p.get('text', '') for p in content if isinstance(p, dict) and p.get('type') == 'text']
614
  system_prompt += ' '.join(text_parts) + "\n"
615
  elif isinstance(content, str):
616
  system_prompt += content + "\n"
617
+
618
+ # Build conversation history (all messages except last user message)
619
+ history_text = ""
620
+ for msg in processed_messages[:-1] if processed_messages else []:
621
+ role = msg.get('role', '')
622
+ content = msg.get('content', '')
623
+ if role == 'system':
624
+ continue # Already handled
625
+ if isinstance(content, list):
626
+ text_parts = []
627
+ for p in content:
628
+ if isinstance(p, dict) and p.get('type') == 'text':
629
+ text_parts.append(p.get('text', ''))
630
+ content = ' '.join(text_parts)
631
+ if content:
632
+ history_text += f"[{role}]: {content}\n\n"
633
+
634
+ # Inject tool definitions into the system prompt
635
+ tools_prompt = build_tools_system_prompt(tools, tool_format="openai") if tools else ""
636
+
637
+ # If images were uploaded, add a note to the message
638
+ image_note = ""
639
+ if file_descriptors:
640
+ image_note = f"\n[Note: {len(file_descriptors)} image(s) have been attached to this message. Please analyze them as requested.]\n"
641
+
642
+ # Construct the full message
643
  full_message = last_user_message
 
 
644
 
645
+ prefix_parts = []
646
+ if system_prompt.strip():
647
+ prefix_parts.append(f"[System Instructions]\n{system_prompt.strip()}")
648
+ if tools_prompt:
649
+ prefix_parts.append(tools_prompt)
650
+ if history_text.strip():
651
+ prefix_parts.append(f"[Conversation History]\n{history_text.strip()}")
652
+
653
+ if prefix_parts:
654
+ full_message = "\n\n".join(prefix_parts) + f"\n\n[Current User Message]\n{last_user_message}"
655
+
656
+ if image_note:
657
+ full_message += image_note
658
+
659
  payload = {
660
  "message": full_message,
661
  "chat_session_id": chat_session_id,
662
  "parent_message_id": parent_message_id if parent_message_id else None,
663
+ "stream": stream,
664
  "llm_override": {
665
  "model_provider": model_provider,
666
  "model_version": model_version,
667
  "temperature": temperature
668
  },
669
+ "file_descriptors": file_descriptors,
670
  "include_citations": False
671
  }
672
+
673
+ return payload
674
+
675
+
676
+ def build_anthropic_payload_from_messages(messages, system_prompt, model_provider, model_version, temperature, chat_session_id, parent_message_id=None, stream=True, tools=None):
677
+ """Convert Anthropic Messages API format to Onyx payload with full tool + image support"""
678
+
679
+ # Extract images from messages BEFORE converting tool results
680
+ images = extract_images_from_messages(messages, msg_format="anthropic")
681
+ file_descriptors = []
682
+ if images:
683
+ print(f"Found {len(images)} image(s) in Anthropic messages, uploading...")
684
+ file_descriptors = upload_images_for_session(images, chat_session_id)
685
+ print(f"Successfully uploaded {len(file_descriptors)} image(s)")
686
+
687
+ # Convert tool results in message history to text format
688
+ processed_messages = convert_tool_results_to_text(messages)
689
+
690
+ # Extract the last user message
691
+ last_user_message = ""
692
+ for msg in reversed(processed_messages):
693
+ if msg.get('role') == 'user':
694
+ content = msg.get('content', '')
695
+ if isinstance(content, list):
696
+ text_parts = [p.get('text', '') for p in content if isinstance(p, dict) and p.get('type') == 'text']
697
+ last_user_message = ' '.join(text_parts)
698
+ elif isinstance(content, str):
699
+ last_user_message = content
700
+ break
701
+
702
+ # Process system prompt
703
+ sys_text = ""
704
+ if system_prompt:
705
+ if isinstance(system_prompt, list):
706
+ sys_text = ' '.join([s.get('text', '') for s in system_prompt if isinstance(s, dict) and s.get('type') == 'text'])
707
+ else:
708
+ sys_text = system_prompt
709
+
710
+ # Build conversation history (all messages except last user)
711
+ history_text = ""
712
+ for msg in processed_messages[:-1] if processed_messages else []:
713
+ role = msg.get('role', '')
714
+ content = msg.get('content', '')
715
+ if isinstance(content, list):
716
+ text_parts = []
717
+ for p in content:
718
+ if isinstance(p, dict) and p.get('type') == 'text':
719
+ text_parts.append(p.get('text', ''))
720
+ content = ' '.join(text_parts)
721
+ if content:
722
+ history_text += f"[{role}]: {content}\n\n"
723
+
724
+ # Build tool prompt
725
+ tools_prompt = build_tools_system_prompt(tools, tool_format="anthropic") if tools else ""
726
+
727
+ # If images were uploaded, add a note
728
+ image_note = ""
729
+ if file_descriptors:
730
+ image_note = f"\n[Note: {len(file_descriptors)} image(s) have been attached to this message. Please analyze them as requested.]\n"
731
+
732
+ # Construct full message
733
+ full_message = last_user_message
734
 
735
+ prefix_parts = []
736
+ if sys_text.strip():
737
+ prefix_parts.append(f"[System Instructions]\n{sys_text.strip()}")
738
+ if tools_prompt:
739
+ prefix_parts.append(tools_prompt)
740
+ if history_text.strip():
741
+ prefix_parts.append(f"[Conversation History]\n{history_text.strip()}")
742
 
743
+ if prefix_parts:
744
+ full_message = "\n\n".join(prefix_parts) + f"\n\n[Current User Message]\n{last_user_message}"
745
+
746
+ if image_note:
747
+ full_message += image_note
748
+
749
+ payload = {
750
+ "message": full_message,
751
+ "chat_session_id": chat_session_id,
752
+ "parent_message_id": parent_message_id if parent_message_id else None,
753
+ "stream": stream,
754
+ "llm_override": {
755
+ "model_provider": model_provider,
756
+ "model_version": model_version,
757
+ "temperature": temperature
758
+ },
759
+ "file_descriptors": file_descriptors,
760
+ "include_citations": False
761
+ }
762
+
763
  return payload
764
 
765
 
766
+ # ============== Response Parsers ==============
767
+
768
  def parse_onyx_stream_chunk(chunk_text):
769
  """Parse a chunk from Onyx stream and extract the text content.
 
 
 
 
 
 
770
  Returns:
771
  tuple: (content, message_id, packet_type)
772
  """
773
  if not chunk_text or not chunk_text.strip():
774
  return None, None, None
775
+
776
  try:
777
  data = json.loads(chunk_text)
778
+
779
  if not isinstance(data, dict):
780
  return None, None, None
781
+
 
 
782
  # Handle new packet-based format
783
  if 'obj' in data:
784
  obj = data['obj']
785
  packet_type = obj.get('type', '')
786
+
787
  if packet_type == 'message_delta':
 
788
  content = obj.get('content', '')
789
  return content, None, 'content'
 
790
  elif packet_type == 'message_start':
 
791
  return None, None, 'message_start'
 
792
  elif packet_type == 'stop':
 
793
  return None, None, 'stop'
 
794
  elif packet_type == 'error':
795
  error_msg = obj.get('message', obj.get('error', 'Unknown error'))
796
  return f"[Error: {error_msg}]", None, 'error'
 
797
  elif packet_type == 'citation_delta':
 
798
  return None, None, 'citation'
 
799
  elif packet_type in ['reasoning_start', 'reasoning_delta', 'reasoning_done']:
 
800
  return None, None, 'reasoning'
 
801
  else:
 
802
  return None, None, packet_type
803
+
804
+ # FALLBACK: Old format
805
  message_id = data.get('message_id')
806
+
807
  if 'answer_piece' in data:
808
  return data['answer_piece'], message_id, 'legacy'
809
  elif 'text' in data:
 
812
  return data['content'], message_id, 'legacy'
813
  elif 'error' in data:
814
  return f"[Error: {data['error']}]", message_id, 'error'
815
+
816
  return None, None, None
817
+
818
  except json.JSONDecodeError:
 
819
  if chunk_text.strip() and not chunk_text.strip().startswith('{'):
820
  return chunk_text.strip(), None, 'raw'
821
+
822
  return None, None, None
823
 
824
 
825
+ # ============== OpenAI Format Streaming & Collection ==============
826
+
827
+ def generate_openai_stream_chunk(content, model, chunk_id, finish_reason=None, tool_calls=None):
828
+ """Generate an OpenAI-compatible SSE chunk, with optional tool_calls"""
829
+ choice = {
830
+ "index": 0,
831
+ "delta": {},
832
+ "finish_reason": finish_reason
833
+ }
834
+
835
+ if content is not None:
836
+ choice["delta"]["content"] = content
837
+
838
+ if tool_calls is not None:
839
+ choice["delta"]["tool_calls"] = tool_calls
840
+
841
  chunk = {
842
  "id": chunk_id,
843
  "object": "chat.completion.chunk",
844
  "created": int(time.time()),
845
  "model": model,
846
+ "choices": [choice]
 
 
 
 
847
  }
848
  return f"data: {json.dumps(chunk)}\n\n"
849
 
850
 
851
+ def stream_onyx_response(payload, model, session_key, has_tools=False):
852
  """Stream response from Onyx API in OpenAI SSE format"""
853
 
854
+ final_message_id = None
855
  chunk_id = f"chatcmpl-{uuid.uuid4().hex[:24]}"
856
+ full_content_buffer = "" # Buffer to detect tool calls
857
 
858
  endpoints = [
859
+ f"{ONYX_BASE_URL}/api/chat/send-chat-message",
860
+ f"{ONYX_BASE_URL}/api/chat/send-message",
861
  ]
862
 
863
  # Initial assistant role chunk
 
872
  "finish_reason": None
873
  }]
874
  }
875
+
876
+ if not has_tools:
877
+ # No tools = simple streaming
878
+ yield f"data: {json.dumps(initial_chunk)}\n\n"
879
 
880
  last_message_id = None
881
 
 
883
  try:
884
  print(f"Trying endpoint: {url}")
885
 
886
+ with requests.post(url, json=payload, headers=get_headers(), stream=True, timeout=120) as response:
 
 
 
 
 
 
 
887
  print(f"Response status: {response.status_code}")
888
 
889
  if response.status_code != 200:
 
912
  if msg_id:
913
  last_message_id = msg_id
914
 
 
915
  if content and packet_type in ['content', 'legacy', 'raw']:
916
+ if has_tools:
917
+ # Buffer content for tool call detection
918
+ full_content_buffer += content
919
+ else:
920
+ yield generate_openai_stream_chunk(content, model, chunk_id)
921
 
922
  if packet_type == "stop":
923
+ final_message_id = last_message_id
924
  break
925
 
926
+ break
927
 
928
  except Exception as e:
929
  print("Stream error:", e)
930
  continue
931
 
932
+ # Update session
933
  if final_message_id and session_key in chat_sessions_cache:
934
  chat_sessions_cache[session_key]["parent_message_id"] = final_message_id
935
 
936
+ # If we have tools, parse the buffered content for tool calls
937
+ if has_tools and full_content_buffer:
938
+ text_content, tool_calls = parse_tool_calls_from_response(full_content_buffer)
939
+
940
+ if tool_calls:
941
+ # Emit initial chunk
942
+ yield f"data: {json.dumps(initial_chunk)}\n\n"
943
+
944
+ # Emit text content if any
945
+ if text_content:
946
+ yield generate_openai_stream_chunk(text_content, model, chunk_id)
947
+
948
+ # Emit tool call chunks
949
+ for idx, tc in enumerate(tool_calls):
950
+ tc_chunk = [{
951
+ "index": idx,
952
+ "id": tc["id"],
953
+ "type": "function",
954
+ "function": {
955
+ "name": tc["name"],
956
+ "arguments": json.dumps(tc["input"])
957
+ }
958
+ }]
959
+ yield generate_openai_stream_chunk(None, model, chunk_id, tool_calls=tc_chunk)
960
+
961
+ yield generate_openai_stream_chunk("", model, chunk_id, "tool_calls")
962
+ else:
963
+ # No tool calls detected, emit as normal text
964
+ yield f"data: {json.dumps(initial_chunk)}\n\n"
965
+ yield generate_openai_stream_chunk(full_content_buffer, model, chunk_id)
966
+ yield generate_openai_stream_chunk("", model, chunk_id, "stop")
967
+ else:
968
+ yield generate_openai_stream_chunk("", model, chunk_id, "stop")
969
+
970
  yield "data: [DONE]\n\n"
971
 
972
+
973
+ def collect_full_response(payload, model, session_key, has_tools=False):
974
  """Collect full streaming response and return as complete OpenAI response"""
975
  full_content = ""
976
  last_message_id = None
977
+
978
  endpoints = [
979
+ f"{ONYX_BASE_URL}/api/chat/send-chat-message",
980
+ f"{ONYX_BASE_URL}/api/chat/send-message",
981
  ]
982
+
983
  for url in endpoints:
984
  try:
985
  print(f"Trying endpoint: {url}")
986
+
 
 
987
  is_streaming_request = payload.get('stream', False)
988
+
989
+ with requests.post(url, json=payload, headers=get_headers(), stream=is_streaming_request, timeout=120) as response:
 
 
 
 
 
 
 
990
  print(f"Response status: {response.status_code}")
991
+
992
  if response.status_code == 404:
993
  continue
994
+
995
  if response.status_code != 200:
996
  error_text = response.text
997
  print(f"Error response: {error_text}")
 
1002
  "code": response.status_code
1003
  }
1004
  }, response.status_code
1005
+
 
1006
  if not is_streaming_request:
1007
  try:
1008
  data = response.json()
 
1009
  full_content = data.get('answer') or data.get('message') or data.get('content') or ""
 
 
1010
  msg_id = data.get('message_id')
1011
  if session_key in chat_sessions_cache and msg_id:
1012
  chat_sessions_cache[session_key]['parent_message_id'] = msg_id
1013
+ break
 
 
1014
  except json.JSONDecodeError:
 
 
1015
  full_content = response.text
1016
  break
 
 
1017
  else:
1018
  buffer = ""
1019
  for chunk in response.iter_content(chunk_size=None, decode_unicode=True):
1020
  if chunk:
1021
  buffer += chunk
 
1022
  while '\n' in buffer:
1023
  line, buffer = buffer.split('\n', 1)
1024
  line = line.strip()
 
1025
  if not line:
1026
  continue
 
1027
  if line.startswith('data: '):
1028
  line = line[6:]
 
1029
  if line == '[DONE]':
1030
  continue
 
1031
  content, msg_id, packet_type = parse_onyx_stream_chunk(line)
1032
  if msg_id:
1033
  last_message_id = msg_id
 
1035
  break
1036
  if content and packet_type in ['content', 'legacy', 'raw', 'error']:
1037
  full_content += content
1038
+
1039
  if buffer.strip():
1040
  if buffer.strip().startswith('data: '):
1041
  buffer = buffer.strip()[6:]
 
1044
  last_message_id = msg_id
1045
  if content and packet_type in ['content', 'legacy', 'raw', 'error']:
1046
  full_content += content
1047
+
 
1048
  if session_key in chat_sessions_cache and last_message_id:
1049
  chat_sessions_cache[session_key]['parent_message_id'] = last_message_id
 
1050
  break
1051
 
 
1052
  except requests.exceptions.RequestException as e:
1053
  print(f"Request error: {e}")
1054
  continue
1055
+
1056
  if not full_content:
1057
  return {
1058
  "error": {
1059
+ "message": "No response from Onyx API",
1060
  "type": "api_error",
1061
  "code": 500
1062
  }
1063
  }, 500
1064
+
1065
+ # Parse for tool calls if tools were provided
1066
+ if has_tools:
1067
+ text_content, tool_calls = parse_tool_calls_from_response(full_content)
1068
+
1069
+ if tool_calls:
1070
+ # Build response with tool_calls
1071
+ message = {
1072
+ "role": "assistant",
1073
+ "content": text_content if text_content else None,
1074
+ "tool_calls": []
1075
+ }
1076
+
1077
+ for idx, tc in enumerate(tool_calls):
1078
+ message["tool_calls"].append({
1079
+ "id": tc["id"],
1080
+ "type": "function",
1081
+ "function": {
1082
+ "name": tc["name"],
1083
+ "arguments": json.dumps(tc["input"])
1084
+ }
1085
+ })
1086
+
1087
+ response_data = {
1088
+ "id": f"chatcmpl-{uuid.uuid4().hex[:24]}",
1089
+ "object": "chat.completion",
1090
+ "created": int(time.time()),
1091
+ "model": model,
1092
+ "choices": [{
1093
+ "index": 0,
1094
+ "message": message,
1095
+ "finish_reason": "tool_calls"
1096
+ }],
1097
+ "usage": {
1098
+ "prompt_tokens": -1,
1099
+ "completion_tokens": -1,
1100
+ "total_tokens": -1
1101
+ }
1102
+ }
1103
+ return response_data, 200
1104
+
1105
+ # Standard text response
1106
  response_data = {
1107
  "id": f"chatcmpl-{uuid.uuid4().hex[:24]}",
1108
  "object": "chat.completion",
 
1122
  "total_tokens": -1
1123
  }
1124
  }
1125
+
1126
+ return response_data, 200
1127
+
1128
+
1129
+ # ============== Anthropic Format Streaming & Collection ==============
1130
+
1131
+ def generate_anthropic_stream_events(payload, model, session_key, has_tools=False):
1132
+ """Stream response from Onyx in Anthropic Messages SSE format with tool support"""
1133
+
1134
+ msg_id = f"msg_{uuid.uuid4().hex[:24]}"
1135
+ final_message_id = None
1136
+ full_content_buffer = ""
1137
+
1138
+ endpoints = [
1139
+ f"{ONYX_BASE_URL}/api/chat/send-chat-message",
1140
+ f"{ONYX_BASE_URL}/api/chat/send-message",
1141
+ ]
1142
+
1143
+ last_msg_id = None
1144
+
1145
+ for url in endpoints:
1146
+ try:
1147
+ with requests.post(url, json=payload, headers=get_headers(), stream=True, timeout=120) as response:
1148
+ if response.status_code != 200:
1149
+ continue
1150
+
1151
+ buffer = ""
1152
+ for chunk in response.iter_content(decode_unicode=True):
1153
+ if not chunk:
1154
+ continue
1155
+ buffer += chunk
1156
+
1157
+ while '\n' in buffer:
1158
+ line, buffer = buffer.split('\n', 1)
1159
+ line = line.strip()
1160
+
1161
+ if not line or line == "[DONE]":
1162
+ continue
1163
+ if line.startswith("data: "):
1164
+ line = line[6:]
1165
+
1166
+ content, m_id, packet_type = parse_onyx_stream_chunk(line)
1167
+
1168
+ if m_id:
1169
+ last_msg_id = m_id
1170
+
1171
+ if content and packet_type in ['content', 'legacy', 'raw']:
1172
+ full_content_buffer += content
1173
+
1174
+ if packet_type == "stop":
1175
+ final_message_id = last_msg_id
1176
+ break
1177
+
1178
+ break
1179
+ except Exception as e:
1180
+ print(f"Anthropic stream error: {e}")
1181
+ continue
1182
+
1183
+ # Update session
1184
+ if final_message_id and session_key in chat_sessions_cache:
1185
+ chat_sessions_cache[session_key]["parent_message_id"] = final_message_id
1186
+
1187
+ # Now build the proper Anthropic SSE response
1188
+ text_content, tool_calls = ("", [])
1189
+ if has_tools and full_content_buffer:
1190
+ text_content, tool_calls = parse_tool_calls_from_response(full_content_buffer)
1191
+ else:
1192
+ text_content = full_content_buffer
1193
+
1194
+ # Determine stop reason
1195
+ stop_reason = "tool_use" if tool_calls else "end_turn"
1196
+
1197
+ # Build content blocks
1198
+ content_blocks = []
1199
+ if text_content:
1200
+ content_blocks.append({"type": "text", "text": text_content})
1201
+ for tc in tool_calls:
1202
+ content_blocks.append({
1203
+ "type": "tool_use",
1204
+ "id": tc["id"],
1205
+ "name": tc["name"],
1206
+ "input": tc["input"]
1207
+ })
1208
+
1209
+ # If no content at all, add empty text
1210
+ if not content_blocks:
1211
+ content_blocks.append({"type": "text", "text": ""})
1212
+
1213
+ # message_start
1214
+ msg_start = {
1215
+ "type": "message_start",
1216
+ "message": {
1217
+ "id": msg_id,
1218
+ "type": "message",
1219
+ "role": "assistant",
1220
+ "content": [],
1221
+ "model": model,
1222
+ "stop_reason": None,
1223
+ "stop_sequence": None,
1224
+ "usage": {"input_tokens": 0, "output_tokens": 0}
1225
+ }
1226
+ }
1227
+ yield f"event: message_start\ndata: {json.dumps(msg_start)}\n\n"
1228
+
1229
+ # Ping
1230
+ yield f"event: ping\ndata: {json.dumps({'type': 'ping'})}\n\n"
1231
+
1232
+ # Emit content blocks
1233
+ block_index = 0
1234
+
1235
+ for block in content_blocks:
1236
+ if block["type"] == "text":
1237
+ # content_block_start
1238
+ yield f"event: content_block_start\ndata: {json.dumps({'type': 'content_block_start', 'index': block_index, 'content_block': {'type': 'text', 'text': ''}})}\n\n"
1239
+
1240
+ # Stream text in chunks for better UX
1241
+ text = block["text"]
1242
+ chunk_size = 50 # characters per chunk
1243
+ for i in range(0, len(text), chunk_size):
1244
+ text_chunk = text[i:i + chunk_size]
1245
+ delta_event = {
1246
+ "type": "content_block_delta",
1247
+ "index": block_index,
1248
+ "delta": {"type": "text_delta", "text": text_chunk}
1249
+ }
1250
+ yield f"event: content_block_delta\ndata: {json.dumps(delta_event)}\n\n"
1251
+
1252
+ # content_block_stop
1253
+ yield f"event: content_block_stop\ndata: {json.dumps({'type': 'content_block_stop', 'index': block_index})}\n\n"
1254
+ block_index += 1
1255
+
1256
+ elif block["type"] == "tool_use":
1257
+ # content_block_start for tool_use
1258
+ yield f"event: content_block_start\ndata: {json.dumps({'type': 'content_block_start', 'index': block_index, 'content_block': {'type': 'tool_use', 'id': block['id'], 'name': block['name'], 'input': {}}})}\n\n"
1259
+
1260
+ # Send tool input as delta
1261
+ input_json = json.dumps(block["input"])
1262
+ delta_event = {
1263
+ "type": "content_block_delta",
1264
+ "index": block_index,
1265
+ "delta": {"type": "input_json_delta", "partial_json": input_json}
1266
+ }
1267
+ yield f"event: content_block_delta\ndata: {json.dumps(delta_event)}\n\n"
1268
+
1269
+ # content_block_stop
1270
+ yield f"event: content_block_stop\ndata: {json.dumps({'type': 'content_block_stop', 'index': block_index})}\n\n"
1271
+ block_index += 1
1272
+
1273
+ # message_delta (stop reason)
1274
+ msg_delta = {
1275
+ "type": "message_delta",
1276
+ "delta": {"stop_reason": stop_reason, "stop_sequence": None},
1277
+ "usage": {"output_tokens": 0}
1278
+ }
1279
+ yield f"event: message_delta\ndata: {json.dumps(msg_delta)}\n\n"
1280
+
1281
+ # message_stop
1282
+ yield f"event: message_stop\ndata: {json.dumps({'type': 'message_stop'})}\n\n"
1283
+
1284
+
1285
+ def collect_anthropic_full_response(payload, model, session_key, has_tools=False):
1286
+ """Collect full response and return in Anthropic Messages format with tool support"""
1287
+
1288
+ full_content = ""
1289
+ last_message_id = None
1290
+
1291
+ endpoints = [
1292
+ f"{ONYX_BASE_URL}/api/chat/send-chat-message",
1293
+ f"{ONYX_BASE_URL}/api/chat/send-message",
1294
+ ]
1295
+
1296
+ for url in endpoints:
1297
+ try:
1298
+ is_streaming_request = payload.get('stream', False)
1299
+
1300
+ with requests.post(url, json=payload, headers=get_headers(), stream=is_streaming_request, timeout=120) as response:
1301
+ if response.status_code == 404:
1302
+ continue
1303
+
1304
+ if response.status_code != 200:
1305
+ return {
1306
+ "type": "error",
1307
+ "error": {
1308
+ "type": "api_error",
1309
+ "message": f"Onyx API error {response.status_code}: {response.text}"
1310
+ }
1311
+ }, response.status_code
1312
+
1313
+ if not is_streaming_request:
1314
+ try:
1315
+ data = response.json()
1316
+ full_content = data.get('answer') or data.get('message') or data.get('content') or ""
1317
+ msg_id = data.get('message_id')
1318
+ if session_key in chat_sessions_cache and msg_id:
1319
+ chat_sessions_cache[session_key]['parent_message_id'] = msg_id
1320
+ break
1321
+ except json.JSONDecodeError:
1322
+ full_content = response.text
1323
+ break
1324
+ else:
1325
+ buffer = ""
1326
+ for chunk in response.iter_content(chunk_size=None, decode_unicode=True):
1327
+ if chunk:
1328
+ buffer += chunk
1329
+ while '\n' in buffer:
1330
+ line, buffer = buffer.split('\n', 1)
1331
+ line = line.strip()
1332
+ if not line:
1333
+ continue
1334
+ if line.startswith('data: '):
1335
+ line = line[6:]
1336
+ if line == '[DONE]':
1337
+ continue
1338
+ content, msg_id, packet_type = parse_onyx_stream_chunk(line)
1339
+ if msg_id:
1340
+ last_message_id = msg_id
1341
+ if packet_type == 'stop':
1342
+ break
1343
+ if content and packet_type in ['content', 'legacy', 'raw', 'error']:
1344
+ full_content += content
1345
+
1346
+ if session_key in chat_sessions_cache and last_message_id:
1347
+ chat_sessions_cache[session_key]['parent_message_id'] = last_message_id
1348
+ break
1349
+
1350
+ except requests.exceptions.RequestException as e:
1351
+ print(f"Anthropic request error: {e}")
1352
+ continue
1353
+
1354
+ if not full_content:
1355
+ return {
1356
+ "type": "error",
1357
+ "error": {
1358
+ "type": "api_error",
1359
+ "message": "No response from Onyx API"
1360
+ }
1361
+ }, 500
1362
+
1363
+ # Parse for tool calls
1364
+ content_blocks = []
1365
+ stop_reason = "end_turn"
1366
+
1367
+ if has_tools:
1368
+ text_content, tool_calls = parse_tool_calls_from_response(full_content)
1369
+
1370
+ if text_content:
1371
+ content_blocks.append({"type": "text", "text": text_content})
1372
+
1373
+ if tool_calls:
1374
+ stop_reason = "tool_use"
1375
+ for tc in tool_calls:
1376
+ content_blocks.append({
1377
+ "type": "tool_use",
1378
+ "id": tc["id"],
1379
+ "name": tc["name"],
1380
+ "input": tc["input"]
1381
+ })
1382
+ else:
1383
+ content_blocks.append({"type": "text", "text": full_content})
1384
+
1385
+ if not content_blocks:
1386
+ content_blocks.append({"type": "text", "text": ""})
1387
+
1388
+ response_data = {
1389
+ "id": f"msg_{uuid.uuid4().hex[:24]}",
1390
+ "type": "message",
1391
+ "role": "assistant",
1392
+ "content": content_blocks,
1393
+ "model": model,
1394
+ "stop_reason": stop_reason,
1395
+ "stop_sequence": None,
1396
+ "usage": {
1397
+ "input_tokens": 0,
1398
+ "output_tokens": 0
1399
+ }
1400
+ }
1401
+
1402
  return response_data, 200
1403
 
1404
 
 
1406
 
1407
  @app.route('/v1/chat/completions', methods=['POST'])
1408
  def chat_completions():
1409
+ """OpenAI-compatible chat completions endpoint with full tool/function calling support"""
1410
+
1411
  try:
1412
  data = request.json
1413
+ print(f"Received request: {json.dumps(data, indent=2)[:1000]}")
1414
  except Exception as e:
1415
  return jsonify({"error": {"message": f"Invalid JSON: {e}", "type": "invalid_request_error"}}), 400
1416
+
1417
  # Extract parameters
1418
  model = data.get('model', 'openai/gpt-4')
1419
  messages = data.get('messages', [])
1420
  stream = data.get('stream', False)
1421
  temperature = data.get('temperature', 0.7)
1422
+ tools = data.get('tools', None)
1423
+ functions = data.get('functions', None) # Legacy function calling
1424
+
1425
+ # Convert legacy functions to tools format
1426
+ if functions and not tools:
1427
+ tools = [{"type": "function", "function": f} for f in functions]
1428
+
1429
+ has_tools = bool(tools)
1430
+
1431
  session_key = data.get('session_id', 'default')
1432
+
1433
  if not messages:
1434
  return jsonify({
1435
  "error": {
 
1437
  "type": "invalid_request_error"
1438
  }
1439
  }), 400
1440
+
1441
  # Parse model string and normalize provider name
1442
  model_provider, model_version = parse_model_string(model)
1443
  model_provider = normalize_provider_name(model_provider)
1444
  print(f"Model provider: {model_provider}, version: {model_version}")
1445
+
1446
  # Get or create chat session
1447
  session_info = get_or_create_session(session_key)
1448
+
1449
  if not session_info:
1450
  return jsonify({
1451
  "error": {
 
1453
  "type": "api_error"
1454
  }
1455
  }), 500
1456
+
1457
  # Build Onyx payload
1458
  payload = build_onyx_payload(
1459
  messages=messages,
 
1462
  temperature=temperature,
1463
  chat_session_id=session_info['session_id'],
1464
  parent_message_id=session_info.get('parent_message_id'),
1465
+ stream=stream,
1466
+ tools=tools
1467
  )
1468
+
1469
  if stream:
1470
  return Response(
1471
+ stream_onyx_response(payload, model, session_key, has_tools=has_tools),
1472
  content_type='text/event-stream',
1473
  headers={
1474
  'Cache-Control': 'no-cache',
 
1477
  }
1478
  )
1479
  else:
1480
+ response_data, status_code = collect_full_response(payload, model, session_key, has_tools=has_tools)
1481
  return jsonify(response_data), status_code
1482
 
1483
 
 
1486
  """Create a new chat session"""
1487
  data = request.json or {}
1488
  persona_id = data.get('persona_id', 0)
1489
+
1490
  session_id = create_chat_session(persona_id)
1491
+
1492
  if session_id:
1493
  session_key = str(uuid.uuid4())
1494
  chat_sessions_cache[session_key] = {
 
1517
  {"id": "openai/gpt-5.2", "object": "model", "owned_by": "openai"},
1518
  {"id": "google/gemini-3-pro-preview", "object": "model", "owned_by": "openai"},
1519
  {"id": "openai/gpt-4o", "object": "model", "owned_by": "openai"},
 
1520
  {"id": "anthropic/claude-opus-4-6", "object": "model", "owned_by": "anthropic"},
1521
  {"id": "anthropic/claude-sonnet-4.5", "object": "model", "owned_by": "anthropic"},
1522
  {"id": "anthropic/claude-haiku-4-5", "object": "model", "owned_by": "anthropic"},
1523
  ]
1524
+
1525
  return jsonify({
1526
  "object": "list",
1527
  "data": models
 
1542
  def health_check():
1543
  """Health check endpoint"""
1544
  return jsonify({
1545
+ "status": "healthy",
1546
  "timestamp": int(time.time()),
1547
  "active_sessions": len(chat_sessions_cache)
1548
  })
 
1552
  def test_onyx_connection():
1553
  """Test connection to Onyx API"""
1554
  results = {}
 
 
1555
  session_id = create_chat_session()
1556
  results['create_session'] = {
1557
  "success": session_id is not None,
1558
  "session_id": session_id
1559
  }
 
1560
  return jsonify(results)
1561
 
1562
 
1563
+ # ============== File Upload API ==============
1564
 
1565
+ @app.route('/v1/files', methods=['POST'])
1566
+ def upload_file():
1567
+ """OpenAI-compatible file upload endpoint.
1568
 
1569
+ Accepts multipart form data with:
1570
+ - file: The file to upload
1571
+ - purpose: The purpose of the file (e.g., 'assistants', 'vision', 'fine-tune')
 
 
 
 
 
 
 
 
1572
 
1573
+ Also accepts JSON with base64 data:
1574
+ - file_data: base64-encoded file content
1575
+ - filename: name of the file
1576
+ - purpose: purpose string
1577
+ """
1578
+ try:
1579
+ file_id = f"file-{uuid.uuid4().hex[:24]}"
1580
+
1581
+ # Check if it's a multipart upload
1582
+ if 'file' in request.files:
1583
+ uploaded_file = request.files['file']
1584
+ purpose = request.form.get('purpose', 'assistants')
1585
+ filename = uploaded_file.filename or f"upload_{uuid.uuid4().hex[:8]}"
1586
+ file_bytes = uploaded_file.read()
1587
+ media_type = uploaded_file.content_type or mimetypes.guess_type(filename)[0] or 'application/octet-stream'
1588
+
1589
+ # Check if it's a JSON upload with base64
1590
+ elif request.is_json:
1591
+ data = request.json
1592
+ purpose = data.get('purpose', 'assistants')
1593
+ filename = data.get('filename', f"upload_{uuid.uuid4().hex[:8]}")
1594
+ file_data = data.get('file_data', '')
1595
+ media_type = data.get('content_type', mimetypes.guess_type(filename)[0] or 'application/octet-stream')
1596
+
1597
+ try:
1598
+ file_bytes = base64.b64decode(file_data)
1599
+ except Exception as e:
1600
+ return jsonify({"error": {"message": f"Invalid base64 file_data: {e}", "type": "invalid_request_error"}}), 400
1601
  else:
1602
+ return jsonify({"error": {"message": "No file provided. Use multipart form with 'file' field or JSON with 'file_data' base64 field.", "type": "invalid_request_error"}}), 400
1603
+
1604
+ file_size = len(file_bytes)
1605
+ print(f"Uploading file: {filename} ({file_size} bytes, {media_type})")
1606
+
1607
+ # Try to upload to Onyx
1608
+ onyx_file_id = None
1609
+ upload_endpoints = [
1610
+ f"{ONYX_BASE_URL}/api/chat/file",
1611
+ f"{ONYX_BASE_URL}/api/chat/upload-file",
1612
+ f"{ONYX_BASE_URL}/api/manage/upload-file",
1613
+ ]
1614
+
1615
+ headers = {
1616
+ "Authorization": f"Bearer {ONYX_API_TOKEN}",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1617
  }
1618
+
1619
+ for url in upload_endpoints:
1620
+ try:
1621
+ files = {
1622
+ 'file': (filename, file_bytes, media_type)
1623
+ }
 
 
 
 
 
 
 
 
 
 
1624
 
1625
+ resp = requests.post(url, files=files, headers=headers, timeout=60)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1626
 
1627
+ if resp.status_code == 200:
1628
+ result = resp.json()
1629
+ onyx_file_id = result.get('file_id') or result.get('id') or result.get('document_id')
1630
+ print(f"Uploaded to Onyx: {onyx_file_id}")
1631
+ break
1632
+ elif resp.status_code == 404:
1633
+ continue
1634
+ else:
1635
+ print(f"Upload to {url} failed: {resp.status_code} - {resp.text}")
1636
+ continue
1637
+ except Exception as e:
1638
+ print(f"Upload error at {url}: {e}")
1639
+ continue
1640
+
1641
+ # Store in local cache
1642
+ file_record = {
1643
+ "id": file_id,
1644
+ "object": "file",
1645
+ "bytes": file_size,
1646
+ "created_at": int(time.time()),
1647
+ "filename": filename,
1648
+ "purpose": purpose,
1649
+ "status": "processed",
1650
+ "status_details": None,
1651
+ "content_type": media_type,
1652
+ "onyx_file_id": onyx_file_id,
1653
+ }
1654
+
1655
+ # Store file bytes for retrieval
1656
+ file_record["_data"] = base64.b64encode(file_bytes).decode('utf-8')
1657
+ files_cache[file_id] = file_record
1658
+
1659
+ # Return OpenAI-compatible response (without internal _data)
1660
+ response = {k: v for k, v in file_record.items() if not k.startswith('_')}
1661
+ return jsonify(response), 200
1662
+
1663
+ except Exception as e:
1664
+ print(f"File upload error: {e}")
1665
+ return jsonify({"error": {"message": f"File upload failed: {str(e)}", "type": "api_error"}}), 500
1666
+
1667
+
1668
+ @app.route('/v1/files', methods=['GET'])
1669
+ def list_files():
1670
+ """OpenAI-compatible list files endpoint"""
1671
+ purpose = request.args.get('purpose', None)
1672
 
1673
+ files_list = []
1674
+ for fid, record in files_cache.items():
1675
+ if purpose and record.get('purpose') != purpose:
1676
+ continue
1677
+ files_list.append({k: v for k, v in record.items() if not k.startswith('_')})
 
 
1678
 
1679
+ return jsonify({
1680
+ "object": "list",
1681
+ "data": files_list
1682
+ })
1683
 
1684
 
1685
+ @app.route('/v1/files/<file_id>', methods=['GET'])
1686
+ def retrieve_file(file_id):
1687
+ """OpenAI-compatible retrieve file endpoint"""
1688
+ if file_id not in files_cache:
1689
+ return jsonify({"error": {"message": f"No file with id '{file_id}' found", "type": "invalid_request_error"}}), 404
1690
 
1691
+ record = files_cache[file_id]
1692
+ response = {k: v for k, v in record.items() if not k.startswith('_')}
1693
+ return jsonify(response)
1694
+
1695
+
1696
+ @app.route('/v1/files/<file_id>', methods=['DELETE'])
1697
+ def delete_file(file_id):
1698
+ """OpenAI-compatible delete file endpoint"""
1699
+ if file_id not in files_cache:
1700
+ return jsonify({"error": {"message": f"No file with id '{file_id}' found", "type": "invalid_request_error"}}), 404
1701
 
1702
+ del files_cache[file_id]
1703
+ return jsonify({
1704
+ "id": file_id,
1705
+ "object": "file",
1706
+ "deleted": True
1707
+ })
1708
+
1709
+
1710
+ @app.route('/v1/files/<file_id>/content', methods=['GET'])
1711
+ def retrieve_file_content(file_id):
1712
+ """OpenAI-compatible retrieve file content endpoint"""
1713
+ if file_id not in files_cache:
1714
+ return jsonify({"error": {"message": f"No file with id '{file_id}' found", "type": "invalid_request_error"}}), 404
1715
 
1716
+ record = files_cache[file_id]
1717
+ b64_data = record.get('_data', '')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1718
 
1719
+ if not b64_data:
1720
+ return jsonify({"error": {"message": "File content not available", "type": "api_error"}}), 404
 
 
 
 
 
 
1721
 
1722
+ file_bytes = base64.b64decode(b64_data)
1723
+ return Response(
1724
+ file_bytes,
1725
+ mimetype=record.get('content_type', 'application/octet-stream'),
1726
+ headers={
1727
+ 'Content-Disposition': f'attachment; filename="{record.get("filename", "file")}"'
 
 
 
 
 
1728
  }
1729
+ )
1730
+
1731
+
1732
+ @app.route('/upload', methods=['POST'])
1733
+ def simple_upload():
1734
+ """Simple file upload endpoint (non-OpenAI format) for direct uploads.
1735
+ Accepts multipart form data with a 'file' field.
1736
+ Optionally include 'chat_session_id' to associate with a session.
1737
+ """
1738
+ if 'file' not in request.files:
1739
+ return jsonify({"error": "No file provided"}), 400
1740
+
1741
+ uploaded_file = request.files['file']
1742
+ filename = uploaded_file.filename or f"upload_{uuid.uuid4().hex[:8]}"
1743
+ file_bytes = uploaded_file.read()
1744
+ media_type = uploaded_file.content_type or mimetypes.guess_type(filename)[0] or 'application/octet-stream'
1745
+ chat_session_id = request.form.get('chat_session_id', None)
1746
+
1747
+ # Upload to Onyx if session provided
1748
+ onyx_result = None
1749
+ if chat_session_id:
1750
+ image_data = {
1751
+ "base64_data": base64.b64encode(file_bytes).decode('utf-8'),
1752
+ "media_type": media_type,
1753
+ "filename": filename
1754
+ }
1755
+ onyx_result = upload_image_to_onyx(image_data, chat_session_id)
1756
+
1757
+ # Store locally
1758
+ file_id = f"file-{uuid.uuid4().hex[:24]}"
1759
+ file_record = {
1760
+ "id": file_id,
1761
+ "filename": filename,
1762
+ "size": len(file_bytes),
1763
+ "content_type": media_type,
1764
+ "uploaded_at": int(time.time()),
1765
+ "onyx_descriptor": onyx_result,
1766
+ "_data": base64.b64encode(file_bytes).decode('utf-8')
1767
  }
1768
+ files_cache[file_id] = file_record
1769
 
1770
+ response = {k: v for k, v in file_record.items() if not k.startswith('_')}
1771
+ return jsonify(response), 200
1772
 
1773
 
1774
+ # ============== Anthropic Messages API ==============
1775
+
1776
  @app.route('/v1/messages', methods=['POST'])
1777
  def anthropic_messages():
1778
+ """Anthropic Messages API compatible endpoint — used by Claude Code, with full tool support"""
1779
+
1780
  try:
1781
  data = request.json
1782
+ print(f"[Anthropic] Received request: {json.dumps(data, indent=2)[:1000]}")
1783
  except Exception as e:
1784
  return jsonify({
1785
  "type": "error",
1786
  "error": {"type": "invalid_request_error", "message": f"Invalid JSON: {e}"}
1787
  }), 400
1788
+
1789
  # Extract Anthropic parameters
1790
  model = data.get('model', 'claude-opus-4-6')
1791
  messages = data.get('messages', [])
 
1794
  temperature = data.get('temperature', 0.7)
1795
  max_tokens = data.get('max_tokens', 4096)
1796
  tools = data.get('tools', None)
1797
+
1798
+ has_tools = bool(tools)
1799
  session_key = f"anthropic_{model}"
1800
+
1801
  if not messages:
1802
  return jsonify({
1803
  "type": "error",
1804
  "error": {"type": "invalid_request_error", "message": "messages is required"}
1805
  }), 400
1806
+
1807
+ # Parse model
 
1808
  if '/' not in model:
1809
  full_model = f"anthropic/{model}"
1810
  else:
1811
  full_model = model
1812
+
1813
  model_provider, model_version = parse_model_string(full_model)
1814
  model_provider = normalize_provider_name(model_provider)
1815
  print(f"[Anthropic] Provider: {model_provider}, Version: {model_version}")
1816
+ if has_tools:
1817
+ print(f"[Anthropic] Tools provided: {[t.get('name', '?') for t in tools]}")
1818
+
1819
  # Get or create session
1820
  session_info = get_or_create_session(session_key)
1821
  if not session_info:
 
1823
  "type": "error",
1824
  "error": {"type": "api_error", "message": "Failed to create chat session"}
1825
  }), 500
1826
+
1827
  # Build Onyx payload
1828
  payload = build_anthropic_payload_from_messages(
1829
  messages=messages,
 
1836
  stream=stream,
1837
  tools=tools
1838
  )
1839
+
1840
  if stream:
1841
  return Response(
1842
+ generate_anthropic_stream_events(payload, model, session_key, has_tools=has_tools),
1843
  content_type='text/event-stream',
1844
  headers={
1845
  'Cache-Control': 'no-cache',
 
1848
  }
1849
  )
1850
  else:
1851
+ response_data, status_code = collect_anthropic_full_response(payload, model, session_key, has_tools=has_tools)
1852
  return jsonify(response_data), status_code
1853
 
1854
 
 
1857
  """Root endpoint with API info"""
1858
  return jsonify({
1859
  "name": "OpenAI + Anthropic Compatible Onyx API Proxy",
1860
+ "version": "3.1.0",
1861
+ "features": [
1862
+ "Full OpenAI function/tool calling support",
1863
+ "Full Anthropic tool_use support",
1864
+ "Streaming (SSE) for both formats",
1865
+ "Tool result forwarding",
1866
+ "Conversation history with tool context",
1867
+ "Image upload & vision support",
1868
+ "File upload API (OpenAI-compatible)"
1869
+ ],
1870
  "endpoints": {
1871
+ "chat_completions": "/v1/chat/completions (OpenAI format with tools)",
1872
+ "messages": "/v1/messages (Anthropic format with tool_use)",
1873
+ "files_upload": "/v1/files (POST - upload file)",
1874
+ "files_list": "/v1/files (GET - list files)",
1875
+ "files_retrieve": "/v1/files/{file_id} (GET - retrieve file info)",
1876
+ "files_delete": "/v1/files/{file_id} (DELETE - delete file)",
1877
+ "files_content": "/v1/files/{file_id}/content (GET - download file)",
1878
+ "simple_upload": "/upload (POST - simple multipart upload)",
1879
  "models": "/v1/models",
1880
  "sessions": "/v1/sessions",
1881
  "health": "/health",
 
1885
  })
1886
 
1887
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1888
  # ============== Error Handlers ==============
1889
 
1890
  @app.errorhandler(404)
 
1910
 
1911
 
1912
  if __name__ == '__main__':
1913
+ print("=" * 60)
1914
+ print("OpenAI + Anthropic Compatible Onyx API Proxy v3.1")
1915
+ print("Full Function/Tool Calling + File Upload Support")
1916
+ print("=" * 60)
1917
  print(f"Onyx Base URL: {ONYX_BASE_URL}")
1918
  print(f"Token configured: {'Yes' if ONYX_API_TOKEN != '<your-token-here>' else 'No'}")
1919
+ print("=" * 60)
1920
+
1921
  app.run(
1922
  host='0.0.0.0',
1923
  port=7860,