Spaces:

radison-tech
/

pr-1

Running

App Files Files Community

Hiren122 commited on 22 days ago

Commit

08189ae

verified ·

1 Parent(s): f184aa9

Update app.py

Browse files

Files changed (1) hide show

app.py +22 -126

app.py CHANGED Viewed

@@ -2,16 +2,15 @@ import json
 import requests
 import time
 import os
-from flask import Flask, request, Response, stream_with_context
 app = Flask(__name__)
-# CONFIGURATION: Set this in your HF Space Secrets
-ONYX_API_KEY = os.getenv("ONYX_SECRET", "your_onyx_token_here")
 ONYX_URL = "https://cloud.onyx.app/api/chat/send-chat-message"
 def transform_to_openai_chunk(content, model_name, finish_reason=None):
-    """Encapsulates content into OpenAI's SSE format."""
     chunk = {
         "id": f"chatcmpl-{int(time.time())}",
         "object": "chat.completion.chunk",
@@ -28,14 +27,14 @@ def transform_to_openai_chunk(content, model_name, finish_reason=None):
 @app.route('/v1/chat/completions', methods=['POST'])
 def chat_proxy():
     data = request.json
-    # DYNAMIC PARSING:
-    # Logic: "Provider Name / Model Name" -> provider="Provider Name", version="Model Name"
     raw_model = data.get("model", "OpenAI / gpt-4o")
     if "/" in raw_model:
         provider, version = [part.strip() for part in raw_model.split("/", 1)]
     else:
-        # Fallback if the user doesn't use a slash
         provider, version = "OpenAI", raw_model.strip()
     messages = data.get("messages", [])
@@ -48,7 +47,7 @@ def chat_proxy():
             "model_version": version,
             "temperature": data.get("temperature", 0.7)
         },
-        "stream": True, # Force stream mode logic
         "include_citations": True,
         "deep_research": False,
         "parent_message_id": -1,
@@ -63,17 +62,22 @@ def chat_proxy():
     def generate():
         try:
             with requests.post(ONYX_URL, json=onyx_payload, headers=headers, stream=True) as r:
                 for line in r.iter_lines():
                     if not line:
                         continue
-                    packet = json.loads(line.decode('utf-8'))
-                    # Onyx streams use 'message_delta' for actual text chunks
-                    if packet.get("type") == "message_delta":
-                        yield transform_to_openai_chunk(packet.get("delta", ""), raw_model)
-                # Finalize the stream for the OpenAI SDK
                 yield transform_to_openai_chunk(None, raw_model, finish_reason="stop")
                 yield "data: [DONE]\n\n"
         except Exception as e:
@@ -82,117 +86,9 @@ def chat_proxy():
     return Response(stream_with_context(generate()), mimetype='text/event-stream')
 @app.route('/')
-def index():
-    return "Onyx OpenAI Proxy is Online. Point your SDK to /v1", 200
-if __name__ == '__main__':
-    # HF.co listens on 7860
-    app.run(host='0.0.0.0', port=7860)
-    if not data or 'messages' not in data:
-        return jsonify({"error": "No messages provided"}), 400
-    # 🔒 Client controls routing directly
-    model_provider = data.get("model_provider", "OpenAI")
-    model = data.get("model", "gpt-5.2")
-    temperature = float(data.get("temperature", 0.7))
-    client_stream = bool(data.get("stream", False))
-    user_input = data["messages"][-1]["content"]
-    payload = {
-        "message": user_input,
-        # persona_id present but NOT used
-        "chat_session_info": {
-            "persona_id": 0
-        },
-        # 🔥 DIRECT client routing
-        "llm_override": {
-            "model_provider": model_provider,
-            "model_version": model,
-            "temperature": temperature
-        },
-        "stream": True,
-        "include_citations": True
-    }
-    headers = {
-        "Authorization": f"Bearer {API_KEY}",
-        "Content-Type": "application/json"
-    }
-    response = requests.post(
-        ONYX_URL,
-        json=payload,
-        headers=headers,
-        stream=True
-    )
-    # ===================== STREAMING =====================
-    if client_stream:
-        def event_stream():
-            chat_id = f"chatcmpl-{uuid.uuid4()}"
-            sent_any = False
-            for line in response.iter_lines(decode_unicode=True):
-                if not line:
-                    continue
-                try:
-                    packet = json.loads(line)
-                    if "user_message_id" in packet:
-                        continue
-                    obj = packet.get("obj", {})
-                    if obj.get("type") == "message_delta":
-                        sent_any = True
-                        yield f"data: {json.dumps({'id': chat_id,'object':'chat.completion.chunk','choices':[{'delta':{'content':obj.get('content','')}}]})}\n\n"
-                    elif obj.get("type") == "stop":
-                        if not sent_any:
-                            yield f"data: {json.dumps({'id': chat_id,'object':'chat.completion.chunk','choices':[{'delta':{'content':'(No response from model)'}}]})}\n\n"
-                        yield "data: [DONE]\n\n"
-                        break
-                except:
-                    continue
-        return Response(event_stream(), mimetype="text/event-stream")
-    # ===================== NON-STREAM =====================
-    full_text = []
-    for line in response.iter_lines(decode_unicode=True):
-        if not line:
-            continue
-        try:
-            packet = json.loads(line)
-            if "user_message_id" in packet:
-                continue
-            obj = packet.get("obj", {})
-            if obj.get("type") == "message_delta":
-                full_text.append(obj.get("content", ""))
-            elif obj.get("type") == "stop":
-                break
-        except:
-            continue
-    if not full_text:
-        full_text = ["(No response from model)"]
-    return jsonify({
-        "id": f"chatcmpl-{uuid.uuid4()}",
-        "object": "chat.completion",
-        "created": int(time.time()),
-        "model": model,
-        "choices": [{
-            "index": 0,
-            "message": {
-                "role": "assistant",
-                "content": "".join(full_text)
-            },
-            "finish_reason": "stop"
-        }]
-    })
 if __name__ == '__main__':
     app.run(host='0.0.0.0', port=7860)

 import requests
 import time
 import os
+from flask import Flask, request, Response, stream_with_context, jsonify
 app = Flask(__name__)
+# Config from Hugging Face Secrets
+ONYX_API_KEY = os.getenv("ONYX_API_KEY")
 ONYX_URL = "https://cloud.onyx.app/api/chat/send-chat-message"
 def transform_to_openai_chunk(content, model_name, finish_reason=None):
     chunk = {
         "id": f"chatcmpl-{int(time.time())}",
         "object": "chat.completion.chunk",
 @app.route('/v1/chat/completions', methods=['POST'])
 def chat_proxy():
     data = request.json
+    if not data or "messages" not in data:
+        return jsonify({"error": "No messages provided"}), 400
+    # Handle "Provider / Model ID" with gaps and caps
     raw_model = data.get("model", "OpenAI / gpt-4o")
     if "/" in raw_model:
         provider, version = [part.strip() for part in raw_model.split("/", 1)]
     else:
         provider, version = "OpenAI", raw_model.strip()
     messages = data.get("messages", [])
             "model_version": version,
             "temperature": data.get("temperature", 0.7)
         },
+        "stream": True, # Forced ON
         "include_citations": True,
         "deep_research": False,
         "parent_message_id": -1,
     def generate():
         try:
             with requests.post(ONYX_URL, json=onyx_payload, headers=headers, stream=True) as r:
+                # Basic error handling for the Onyx API response
+                if r.status_code != 200:
+                    yield f"data: {json.dumps({'error': 'Onyx API error', 'details': r.text})}\n\n"
+                    return
                 for line in r.iter_lines():
                     if not line:
                         continue
+                    try:
+                        packet = json.loads(line.decode('utf-8'))
+                        if packet.get("type") == "message_delta":
+                            yield transform_to_openai_chunk(packet.get("delta", ""), raw_model)
+                    except:
+                        continue
                 yield transform_to_openai_chunk(None, raw_model, finish_reason="stop")
                 yield "data: [DONE]\n\n"
         except Exception as e:
     return Response(stream_with_context(generate()), mimetype='text/event-stream')
 @app.route('/')
+def health():
+    return jsonify({"status": "running", "port": 7860}), 200
 if __name__ == '__main__':
+    # HF Spaces requires port 7860
     app.run(host='0.0.0.0', port=7860)