Spaces:
Running
Running
Updated app.py
Browse files
app.py
CHANGED
|
@@ -131,7 +131,12 @@ def build_onyx_payload(messages, model_provider, model_version, temperature, cha
|
|
| 131 |
system_prompt = ""
|
| 132 |
for msg in messages:
|
| 133 |
if msg.get('role') == 'system':
|
| 134 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 135 |
|
| 136 |
# Prepend system prompt to message if exists
|
| 137 |
full_message = last_user_message
|
|
@@ -636,20 +641,344 @@ def test_onyx_connection():
|
|
| 636 |
return jsonify(results)
|
| 637 |
|
| 638 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 639 |
@app.route('/', methods=['GET'])
|
| 640 |
def root():
|
| 641 |
"""Root endpoint with API info"""
|
| 642 |
return jsonify({
|
| 643 |
-
"name": "OpenAI
|
| 644 |
-
"version": "
|
| 645 |
"endpoints": {
|
| 646 |
-
"chat_completions": "/v1/chat/completions",
|
|
|
|
| 647 |
"models": "/v1/models",
|
| 648 |
"sessions": "/v1/sessions",
|
| 649 |
"health": "/health",
|
| 650 |
"debug": "/debug/test-onyx"
|
| 651 |
},
|
| 652 |
-
"model_format": "provider/model_version (e.g., openai/gpt-4)"
|
| 653 |
})
|
| 654 |
|
| 655 |
|
|
|
|
| 131 |
system_prompt = ""
|
| 132 |
for msg in messages:
|
| 133 |
if msg.get('role') == 'system':
|
| 134 |
+
content = msg.get('content', '')
|
| 135 |
+
if isinstance(content, list):
|
| 136 |
+
text_parts = [p.get('text', '') for p in content if p.get('type') == 'text']
|
| 137 |
+
system_prompt += ' '.join(text_parts) + "\n"
|
| 138 |
+
elif isinstance(content, str):
|
| 139 |
+
system_prompt += content + "\n"
|
| 140 |
|
| 141 |
# Prepend system prompt to message if exists
|
| 142 |
full_message = last_user_message
|
|
|
|
| 641 |
return jsonify(results)
|
| 642 |
|
| 643 |
|
| 644 |
+
# ============== Anthropic Messages API ==============
|
| 645 |
+
|
| 646 |
+
def build_anthropic_payload_from_messages(messages, system_prompt, model_provider, model_version, temperature, chat_session_id, parent_message_id=None, stream=True, tools=None):
|
| 647 |
+
"""Convert Anthropic Messages API format to Onyx payload"""
|
| 648 |
+
|
| 649 |
+
# Extract the last user message
|
| 650 |
+
last_user_message = ""
|
| 651 |
+
for msg in reversed(messages):
|
| 652 |
+
if msg.get('role') == 'user':
|
| 653 |
+
content = msg.get('content', '')
|
| 654 |
+
if isinstance(content, list):
|
| 655 |
+
text_parts = [p.get('text', '') for p in content if p.get('type') == 'text']
|
| 656 |
+
last_user_message = ' '.join(text_parts)
|
| 657 |
+
elif isinstance(content, str):
|
| 658 |
+
last_user_message = content
|
| 659 |
+
break
|
| 660 |
+
|
| 661 |
+
# Build full message with system prompt
|
| 662 |
+
full_message = last_user_message
|
| 663 |
+
if system_prompt:
|
| 664 |
+
if isinstance(system_prompt, list):
|
| 665 |
+
sys_text = ' '.join([s.get('text', '') for s in system_prompt if s.get('type') == 'text'])
|
| 666 |
+
else:
|
| 667 |
+
sys_text = system_prompt
|
| 668 |
+
full_message = f"[System: {sys_text}]\n\n{last_user_message}"
|
| 669 |
+
|
| 670 |
+
# If tools are provided, inject them into the prompt context
|
| 671 |
+
if tools:
|
| 672 |
+
tools_desc = "\n\n[Available Tools:\n"
|
| 673 |
+
for tool in tools:
|
| 674 |
+
name = tool.get('name', '')
|
| 675 |
+
desc = tool.get('description', '')
|
| 676 |
+
input_schema = json.dumps(tool.get('input_schema', {}), indent=2)
|
| 677 |
+
tools_desc += f"- {name}: {desc}\n Input Schema: {input_schema}\n"
|
| 678 |
+
tools_desc += "]\n\n"
|
| 679 |
+
full_message = tools_desc + full_message
|
| 680 |
+
|
| 681 |
+
payload = {
|
| 682 |
+
"message": full_message,
|
| 683 |
+
"chat_session_id": chat_session_id,
|
| 684 |
+
"parent_message_id": parent_message_id if parent_message_id else None,
|
| 685 |
+
"stream": stream,
|
| 686 |
+
"llm_override": {
|
| 687 |
+
"model_provider": model_provider,
|
| 688 |
+
"model_version": model_version,
|
| 689 |
+
"temperature": temperature
|
| 690 |
+
},
|
| 691 |
+
"file_descriptors": [],
|
| 692 |
+
"include_citations": False
|
| 693 |
+
}
|
| 694 |
+
|
| 695 |
+
return payload
|
| 696 |
+
|
| 697 |
+
|
| 698 |
+
def generate_anthropic_stream_events(payload, model, session_key):
|
| 699 |
+
"""Stream response from Onyx in Anthropic Messages SSE format"""
|
| 700 |
+
|
| 701 |
+
msg_id = f"msg_{uuid.uuid4().hex[:24]}"
|
| 702 |
+
final_message_id = None
|
| 703 |
+
|
| 704 |
+
endpoints = [
|
| 705 |
+
f"{ONYX_BASE_URL}/api/chat/send-chat-message",
|
| 706 |
+
f"{ONYX_BASE_URL}/api/chat/send-message",
|
| 707 |
+
]
|
| 708 |
+
|
| 709 |
+
# message_start event
|
| 710 |
+
msg_start = {
|
| 711 |
+
"type": "message_start",
|
| 712 |
+
"message": {
|
| 713 |
+
"id": msg_id,
|
| 714 |
+
"type": "message",
|
| 715 |
+
"role": "assistant",
|
| 716 |
+
"content": [],
|
| 717 |
+
"model": model,
|
| 718 |
+
"stop_reason": None,
|
| 719 |
+
"stop_sequence": None,
|
| 720 |
+
"usage": {"input_tokens": 0, "output_tokens": 0}
|
| 721 |
+
}
|
| 722 |
+
}
|
| 723 |
+
yield f"event: message_start\ndata: {json.dumps(msg_start)}\n\n"
|
| 724 |
+
|
| 725 |
+
# content_block_start
|
| 726 |
+
yield f"event: content_block_start\ndata: {json.dumps({'type': 'content_block_start', 'index': 0, 'content_block': {'type': 'text', 'text': ''}})}\n\n"
|
| 727 |
+
|
| 728 |
+
# Ping
|
| 729 |
+
yield f"event: ping\ndata: {json.dumps({'type': 'ping'})}\n\n"
|
| 730 |
+
|
| 731 |
+
last_msg_id = None
|
| 732 |
+
|
| 733 |
+
for url in endpoints:
|
| 734 |
+
try:
|
| 735 |
+
with requests.post(url, json=payload, headers=get_headers(), stream=True, timeout=120) as response:
|
| 736 |
+
if response.status_code != 200:
|
| 737 |
+
continue
|
| 738 |
+
|
| 739 |
+
buffer = ""
|
| 740 |
+
for chunk in response.iter_content(decode_unicode=True):
|
| 741 |
+
if not chunk:
|
| 742 |
+
continue
|
| 743 |
+
buffer += chunk
|
| 744 |
+
|
| 745 |
+
while '\n' in buffer:
|
| 746 |
+
line, buffer = buffer.split('\n', 1)
|
| 747 |
+
line = line.strip()
|
| 748 |
+
|
| 749 |
+
if not line or line == "[DONE]":
|
| 750 |
+
continue
|
| 751 |
+
if line.startswith("data: "):
|
| 752 |
+
line = line[6:]
|
| 753 |
+
|
| 754 |
+
content, m_id, packet_type = parse_onyx_stream_chunk(line)
|
| 755 |
+
|
| 756 |
+
if m_id:
|
| 757 |
+
last_msg_id = m_id
|
| 758 |
+
|
| 759 |
+
if content and packet_type in ['content', 'legacy', 'raw']:
|
| 760 |
+
delta_event = {
|
| 761 |
+
"type": "content_block_delta",
|
| 762 |
+
"index": 0,
|
| 763 |
+
"delta": {"type": "text_delta", "text": content}
|
| 764 |
+
}
|
| 765 |
+
yield f"event: content_block_delta\ndata: {json.dumps(delta_event)}\n\n"
|
| 766 |
+
|
| 767 |
+
if packet_type == "stop":
|
| 768 |
+
final_message_id = last_msg_id
|
| 769 |
+
break
|
| 770 |
+
|
| 771 |
+
break
|
| 772 |
+
except Exception as e:
|
| 773 |
+
print(f"Anthropic stream error: {e}")
|
| 774 |
+
continue
|
| 775 |
+
|
| 776 |
+
# Update session
|
| 777 |
+
if final_message_id and session_key in chat_sessions_cache:
|
| 778 |
+
chat_sessions_cache[session_key]["parent_message_id"] = final_message_id
|
| 779 |
+
|
| 780 |
+
# content_block_stop
|
| 781 |
+
yield f"event: content_block_stop\ndata: {json.dumps({'type': 'content_block_stop', 'index': 0})}\n\n"
|
| 782 |
+
|
| 783 |
+
# message_delta (stop reason)
|
| 784 |
+
msg_delta = {
|
| 785 |
+
"type": "message_delta",
|
| 786 |
+
"delta": {"stop_reason": "end_turn", "stop_sequence": None},
|
| 787 |
+
"usage": {"output_tokens": 0}
|
| 788 |
+
}
|
| 789 |
+
yield f"event: message_delta\ndata: {json.dumps(msg_delta)}\n\n"
|
| 790 |
+
|
| 791 |
+
# message_stop
|
| 792 |
+
yield f"event: message_stop\ndata: {json.dumps({'type': 'message_stop'})}\n\n"
|
| 793 |
+
|
| 794 |
+
|
| 795 |
+
def collect_anthropic_full_response(payload, model, session_key):
|
| 796 |
+
"""Collect full response and return in Anthropic Messages format"""
|
| 797 |
+
|
| 798 |
+
full_content = ""
|
| 799 |
+
last_message_id = None
|
| 800 |
+
|
| 801 |
+
endpoints = [
|
| 802 |
+
f"{ONYX_BASE_URL}/api/chat/send-chat-message",
|
| 803 |
+
f"{ONYX_BASE_URL}/api/chat/send-message",
|
| 804 |
+
]
|
| 805 |
+
|
| 806 |
+
for url in endpoints:
|
| 807 |
+
try:
|
| 808 |
+
is_streaming_request = payload.get('stream', False)
|
| 809 |
+
|
| 810 |
+
with requests.post(url, json=payload, headers=get_headers(), stream=is_streaming_request, timeout=120) as response:
|
| 811 |
+
if response.status_code == 404:
|
| 812 |
+
continue
|
| 813 |
+
|
| 814 |
+
if response.status_code != 200:
|
| 815 |
+
return {
|
| 816 |
+
"type": "error",
|
| 817 |
+
"error": {
|
| 818 |
+
"type": "api_error",
|
| 819 |
+
"message": f"Onyx API error {response.status_code}: {response.text}"
|
| 820 |
+
}
|
| 821 |
+
}, response.status_code
|
| 822 |
+
|
| 823 |
+
if not is_streaming_request:
|
| 824 |
+
try:
|
| 825 |
+
data = response.json()
|
| 826 |
+
full_content = data.get('answer') or data.get('message') or data.get('content') or ""
|
| 827 |
+
msg_id = data.get('message_id')
|
| 828 |
+
if session_key in chat_sessions_cache and msg_id:
|
| 829 |
+
chat_sessions_cache[session_key]['parent_message_id'] = msg_id
|
| 830 |
+
break
|
| 831 |
+
except json.JSONDecodeError:
|
| 832 |
+
full_content = response.text
|
| 833 |
+
break
|
| 834 |
+
else:
|
| 835 |
+
buffer = ""
|
| 836 |
+
for chunk in response.iter_content(chunk_size=None, decode_unicode=True):
|
| 837 |
+
if chunk:
|
| 838 |
+
buffer += chunk
|
| 839 |
+
while '\n' in buffer:
|
| 840 |
+
line, buffer = buffer.split('\n', 1)
|
| 841 |
+
line = line.strip()
|
| 842 |
+
if not line:
|
| 843 |
+
continue
|
| 844 |
+
if line.startswith('data: '):
|
| 845 |
+
line = line[6:]
|
| 846 |
+
if line == '[DONE]':
|
| 847 |
+
continue
|
| 848 |
+
content, msg_id, packet_type = parse_onyx_stream_chunk(line)
|
| 849 |
+
if msg_id:
|
| 850 |
+
last_message_id = msg_id
|
| 851 |
+
if packet_type == 'stop':
|
| 852 |
+
break
|
| 853 |
+
if content and packet_type in ['content', 'legacy', 'raw', 'error']:
|
| 854 |
+
full_content += content
|
| 855 |
+
|
| 856 |
+
if session_key in chat_sessions_cache and last_message_id:
|
| 857 |
+
chat_sessions_cache[session_key]['parent_message_id'] = last_message_id
|
| 858 |
+
break
|
| 859 |
+
|
| 860 |
+
except requests.exceptions.RequestException as e:
|
| 861 |
+
print(f"Anthropic request error: {e}")
|
| 862 |
+
continue
|
| 863 |
+
|
| 864 |
+
if not full_content:
|
| 865 |
+
return {
|
| 866 |
+
"type": "error",
|
| 867 |
+
"error": {
|
| 868 |
+
"type": "api_error",
|
| 869 |
+
"message": "No response from Onyx API"
|
| 870 |
+
}
|
| 871 |
+
}, 500
|
| 872 |
+
|
| 873 |
+
response_data = {
|
| 874 |
+
"id": f"msg_{uuid.uuid4().hex[:24]}",
|
| 875 |
+
"type": "message",
|
| 876 |
+
"role": "assistant",
|
| 877 |
+
"content": [{"type": "text", "text": full_content}],
|
| 878 |
+
"model": model,
|
| 879 |
+
"stop_reason": "end_turn",
|
| 880 |
+
"stop_sequence": None,
|
| 881 |
+
"usage": {
|
| 882 |
+
"input_tokens": 0,
|
| 883 |
+
"output_tokens": 0
|
| 884 |
+
}
|
| 885 |
+
}
|
| 886 |
+
|
| 887 |
+
return response_data, 200
|
| 888 |
+
|
| 889 |
+
|
| 890 |
+
@app.route('/v1/messages', methods=['POST'])
|
| 891 |
+
def anthropic_messages():
|
| 892 |
+
"""Anthropic Messages API compatible endpoint — used by Claude Code"""
|
| 893 |
+
|
| 894 |
+
try:
|
| 895 |
+
data = request.json
|
| 896 |
+
print(f"[Anthropic] Received request: {json.dumps(data, indent=2)[:500]}")
|
| 897 |
+
except Exception as e:
|
| 898 |
+
return jsonify({
|
| 899 |
+
"type": "error",
|
| 900 |
+
"error": {"type": "invalid_request_error", "message": f"Invalid JSON: {e}"}
|
| 901 |
+
}), 400
|
| 902 |
+
|
| 903 |
+
# Extract Anthropic parameters
|
| 904 |
+
model = data.get('model', 'claude-opus-4-6')
|
| 905 |
+
messages = data.get('messages', [])
|
| 906 |
+
system_prompt = data.get('system', '')
|
| 907 |
+
stream = data.get('stream', False)
|
| 908 |
+
temperature = data.get('temperature', 0.7)
|
| 909 |
+
max_tokens = data.get('max_tokens', 4096)
|
| 910 |
+
tools = data.get('tools', None)
|
| 911 |
+
|
| 912 |
+
session_key = f"anthropic_{model}"
|
| 913 |
+
|
| 914 |
+
if not messages:
|
| 915 |
+
return jsonify({
|
| 916 |
+
"type": "error",
|
| 917 |
+
"error": {"type": "invalid_request_error", "message": "messages is required"}
|
| 918 |
+
}), 400
|
| 919 |
+
|
| 920 |
+
# Parse model — Anthropic sends bare model names like 'claude-opus-4-6'
|
| 921 |
+
# We need to add 'anthropic/' prefix if not present
|
| 922 |
+
if '/' not in model:
|
| 923 |
+
full_model = f"anthropic/{model}"
|
| 924 |
+
else:
|
| 925 |
+
full_model = model
|
| 926 |
+
|
| 927 |
+
model_provider, model_version = parse_model_string(full_model)
|
| 928 |
+
model_provider = normalize_provider_name(model_provider)
|
| 929 |
+
print(f"[Anthropic] Provider: {model_provider}, Version: {model_version}")
|
| 930 |
+
|
| 931 |
+
# Get or create session
|
| 932 |
+
session_info = get_or_create_session(session_key)
|
| 933 |
+
if not session_info:
|
| 934 |
+
return jsonify({
|
| 935 |
+
"type": "error",
|
| 936 |
+
"error": {"type": "api_error", "message": "Failed to create chat session"}
|
| 937 |
+
}), 500
|
| 938 |
+
|
| 939 |
+
# Build Onyx payload
|
| 940 |
+
payload = build_anthropic_payload_from_messages(
|
| 941 |
+
messages=messages,
|
| 942 |
+
system_prompt=system_prompt,
|
| 943 |
+
model_provider=model_provider,
|
| 944 |
+
model_version=model_version,
|
| 945 |
+
temperature=temperature,
|
| 946 |
+
chat_session_id=session_info['session_id'],
|
| 947 |
+
parent_message_id=session_info.get('parent_message_id'),
|
| 948 |
+
stream=stream,
|
| 949 |
+
tools=tools
|
| 950 |
+
)
|
| 951 |
+
|
| 952 |
+
if stream:
|
| 953 |
+
return Response(
|
| 954 |
+
generate_anthropic_stream_events(payload, model, session_key),
|
| 955 |
+
content_type='text/event-stream',
|
| 956 |
+
headers={
|
| 957 |
+
'Cache-Control': 'no-cache',
|
| 958 |
+
'Connection': 'keep-alive',
|
| 959 |
+
'X-Accel-Buffering': 'no'
|
| 960 |
+
}
|
| 961 |
+
)
|
| 962 |
+
else:
|
| 963 |
+
response_data, status_code = collect_anthropic_full_response(payload, model, session_key)
|
| 964 |
+
return jsonify(response_data), status_code
|
| 965 |
+
|
| 966 |
+
|
| 967 |
@app.route('/', methods=['GET'])
|
| 968 |
def root():
|
| 969 |
"""Root endpoint with API info"""
|
| 970 |
return jsonify({
|
| 971 |
+
"name": "OpenAI + Anthropic Compatible Onyx API Proxy",
|
| 972 |
+
"version": "2.0.0",
|
| 973 |
"endpoints": {
|
| 974 |
+
"chat_completions": "/v1/chat/completions (OpenAI format)",
|
| 975 |
+
"messages": "/v1/messages (Anthropic format)",
|
| 976 |
"models": "/v1/models",
|
| 977 |
"sessions": "/v1/sessions",
|
| 978 |
"health": "/health",
|
| 979 |
"debug": "/debug/test-onyx"
|
| 980 |
},
|
| 981 |
+
"model_format": "provider/model_version (e.g., openai/gpt-4, anthropic/claude-opus-4-6)"
|
| 982 |
})
|
| 983 |
|
| 984 |
|