Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
|
@@ -6,11 +6,12 @@ from flask import Flask, request, Response, stream_with_context, jsonify
|
|
| 6 |
|
| 7 |
app = Flask(__name__)
|
| 8 |
|
| 9 |
-
#
|
| 10 |
-
ONYX_API_KEY = os.getenv("ONYX_SECRET")
|
| 11 |
ONYX_URL = "https://cloud.onyx.app/api/chat/send-chat-message"
|
| 12 |
|
| 13 |
def transform_to_openai_chunk(content, model_name, finish_reason=None):
|
|
|
|
| 14 |
chunk = {
|
| 15 |
"id": f"chatcmpl-{int(time.time())}",
|
| 16 |
"object": "chat.completion.chunk",
|
|
@@ -18,7 +19,7 @@ def transform_to_openai_chunk(content, model_name, finish_reason=None):
|
|
| 18 |
"model": model_name,
|
| 19 |
"choices": [{
|
| 20 |
"index": 0,
|
| 21 |
-
"delta": {"content": content} if content else {},
|
| 22 |
"finish_reason": finish_reason
|
| 23 |
}]
|
| 24 |
}
|
|
@@ -30,9 +31,10 @@ def chat_proxy():
|
|
| 30 |
if not data or "messages" not in data:
|
| 31 |
return jsonify({"error": "No messages provided"}), 400
|
| 32 |
|
| 33 |
-
#
|
| 34 |
raw_model = data.get("model", "OpenAI / gpt-4o")
|
| 35 |
if "/" in raw_model:
|
|
|
|
| 36 |
provider, version = [part.strip() for part in raw_model.split("/", 1)]
|
| 37 |
else:
|
| 38 |
provider, version = "OpenAI", raw_model.strip()
|
|
@@ -40,6 +42,7 @@ def chat_proxy():
|
|
| 40 |
messages = data.get("messages", [])
|
| 41 |
user_content = messages[-1].get("content", "") if messages else ""
|
| 42 |
|
|
|
|
| 43 |
onyx_payload = {
|
| 44 |
"message": user_content,
|
| 45 |
"llm_override": {
|
|
@@ -47,7 +50,7 @@ def chat_proxy():
|
|
| 47 |
"model_version": version,
|
| 48 |
"temperature": data.get("temperature", 0.7)
|
| 49 |
},
|
| 50 |
-
"stream": True,
|
| 51 |
"include_citations": True,
|
| 52 |
"deep_research": False,
|
| 53 |
"parent_message_id": -1,
|
|
@@ -62,9 +65,8 @@ def chat_proxy():
|
|
| 62 |
def generate():
|
| 63 |
try:
|
| 64 |
with requests.post(ONYX_URL, json=onyx_payload, headers=headers, stream=True) as r:
|
| 65 |
-
# Basic error handling for the Onyx API response
|
| 66 |
if r.status_code != 200:
|
| 67 |
-
yield f"data: {json.dumps({'error': 'Onyx API
|
| 68 |
return
|
| 69 |
|
| 70 |
for line in r.iter_lines():
|
|
@@ -73,11 +75,14 @@ def chat_proxy():
|
|
| 73 |
|
| 74 |
try:
|
| 75 |
packet = json.loads(line.decode('utf-8'))
|
|
|
|
| 76 |
if packet.get("type") == "message_delta":
|
| 77 |
-
|
| 78 |
-
|
|
|
|
| 79 |
continue
|
| 80 |
|
|
|
|
| 81 |
yield transform_to_openai_chunk(None, raw_model, finish_reason="stop")
|
| 82 |
yield "data: [DONE]\n\n"
|
| 83 |
except Exception as e:
|
|
@@ -86,9 +91,9 @@ def chat_proxy():
|
|
| 86 |
return Response(stream_with_context(generate()), mimetype='text/event-stream')
|
| 87 |
|
| 88 |
@app.route('/')
|
| 89 |
-
def
|
| 90 |
-
return
|
| 91 |
|
| 92 |
if __name__ == '__main__':
|
| 93 |
-
# HF
|
| 94 |
app.run(host='0.0.0.0', port=7860)
|
|
|
|
| 6 |
|
| 7 |
app = Flask(__name__)
|
| 8 |
|
| 9 |
+
# CONFIGURATION: Set 'ONYX_API_KEY' in Hugging Face Settings > Variables and Secrets
|
| 10 |
+
ONYX_API_KEY = os.getenv("ONYX_SECRET", "")
|
| 11 |
ONYX_URL = "https://cloud.onyx.app/api/chat/send-chat-message"
|
| 12 |
|
| 13 |
def transform_to_openai_chunk(content, model_name, finish_reason=None):
|
| 14 |
+
"""Formats raw text into an OpenAI-compatible SSE chunk."""
|
| 15 |
chunk = {
|
| 16 |
"id": f"chatcmpl-{int(time.time())}",
|
| 17 |
"object": "chat.completion.chunk",
|
|
|
|
| 19 |
"model": model_name,
|
| 20 |
"choices": [{
|
| 21 |
"index": 0,
|
| 22 |
+
"delta": {"content": content} if content is not None else {},
|
| 23 |
"finish_reason": finish_reason
|
| 24 |
}]
|
| 25 |
}
|
|
|
|
| 31 |
if not data or "messages" not in data:
|
| 32 |
return jsonify({"error": "No messages provided"}), 400
|
| 33 |
|
| 34 |
+
# DYNAMIC PARSING: Handles "Google Vertex / Gemini 1.5 Pro" or "openai/gpt-4o"
|
| 35 |
raw_model = data.get("model", "OpenAI / gpt-4o")
|
| 36 |
if "/" in raw_model:
|
| 37 |
+
# Splits on first slash, cleans whitespace, preserves case
|
| 38 |
provider, version = [part.strip() for part in raw_model.split("/", 1)]
|
| 39 |
else:
|
| 40 |
provider, version = "OpenAI", raw_model.strip()
|
|
|
|
| 42 |
messages = data.get("messages", [])
|
| 43 |
user_content = messages[-1].get("content", "") if messages else ""
|
| 44 |
|
| 45 |
+
# ONYX PAYLOAD: Uses your specific template requirements
|
| 46 |
onyx_payload = {
|
| 47 |
"message": user_content,
|
| 48 |
"llm_override": {
|
|
|
|
| 50 |
"model_version": version,
|
| 51 |
"temperature": data.get("temperature", 0.7)
|
| 52 |
},
|
| 53 |
+
"stream": True, # Forced ON regardless of user input
|
| 54 |
"include_citations": True,
|
| 55 |
"deep_research": False,
|
| 56 |
"parent_message_id": -1,
|
|
|
|
| 65 |
def generate():
|
| 66 |
try:
|
| 67 |
with requests.post(ONYX_URL, json=onyx_payload, headers=headers, stream=True) as r:
|
|
|
|
| 68 |
if r.status_code != 200:
|
| 69 |
+
yield f"data: {json.dumps({'error': 'Onyx API Error', 'status': r.status_code})}\n\n"
|
| 70 |
return
|
| 71 |
|
| 72 |
for line in r.iter_lines():
|
|
|
|
| 75 |
|
| 76 |
try:
|
| 77 |
packet = json.loads(line.decode('utf-8'))
|
| 78 |
+
# Onyx provides content in 'message_delta' packets
|
| 79 |
if packet.get("type") == "message_delta":
|
| 80 |
+
content = packet.get("delta", "")
|
| 81 |
+
yield transform_to_openai_chunk(content, raw_model)
|
| 82 |
+
except (json.JSONDecodeError, KeyError):
|
| 83 |
continue
|
| 84 |
|
| 85 |
+
# Signal stream completion
|
| 86 |
yield transform_to_openai_chunk(None, raw_model, finish_reason="stop")
|
| 87 |
yield "data: [DONE]\n\n"
|
| 88 |
except Exception as e:
|
|
|
|
| 91 |
return Response(stream_with_context(generate()), mimetype='text/event-stream')
|
| 92 |
|
| 93 |
@app.route('/')
|
| 94 |
+
def health_check():
|
| 95 |
+
return "Onyx-OpenAI Proxy is running on HF Space (Port 7860).", 200
|
| 96 |
|
| 97 |
if __name__ == '__main__':
|
| 98 |
+
# Standard HF port
|
| 99 |
app.run(host='0.0.0.0', port=7860)
|