Spaces:

radison-tech
/

pr-1

Running

App Files Files Community

Hiren122 commited on 22 days ago

Commit

c3e47e0

verified ·

1 Parent(s): 8802a32

Update app.py

Browse files

Files changed (1) hide show

app.py +280 -30

app.py CHANGED Viewed

@@ -1,40 +1,290 @@
-import json
 import requests
-import time
 import os
-from flask import Flask, request, Response, stream_with_context, jsonify
 app = Flask(__name__)
-# CONFIGURATION: Set 'ONYX_API_KEY' in Hugging Face Settings > Variables and Secrets
-ONYX_API_KEY = os.getenv("ONYX_SECRET", "")
-ONYX_URL = "https://cloud.onyx.app/api/chat/send-chat-message"
-def transform_to_openai_chunk(content, model_name, finish_reason=None):
-    """Formats raw text into an OpenAI-compatible SSE chunk."""
-    chunk = {
-        "id": f"chatcmpl-{int(time.time())}",
-        "object": "chat.completion.chunk",
-        "created": int(time.time()),
-        "model": model_name,
-        "choices": [{
-            "index": 0,
-            "delta": {"content": content} if content is not None else {},
-            "finish_reason": finish_reason
-        }]
-    }
-    return f"data: {json.dumps(chunk)}\n\n"
 @app.route('/v1/chat/completions', methods=['POST'])
-def chat_proxy():
-    data = request.json
-    if not data or "messages" not in data:
-        return jsonify({"error": "No messages provided"}), 400
-    # DYNAMIC PARSING: Handles "Google Vertex / Gemini 1.5 Pro" or "openai/gpt-4o"
-    raw_model = data.get("model", "OpenAI / gpt-4o")
-    if "/" in raw_model:
-        # Splits on first slash, cleans whitespace, preserves case
         provider, version = [part.strip() for part in raw_model.split("/", 1)]
     else:
         provider, version = "OpenAI", raw_model.strip()

+from flask import Flask, request, Response, jsonify
 import requests
+import json
+import uuid
 import os
+from datetime import datetime
 app = Flask(__name__)
+# Configuration - automatically loads from HuggingFace Secrets
+ONYX_API_URL = os.getenv("ONYX_API_URL", "https://cloud.onyx.app/api/chat/send-chat-message")
+ONYX_API_TOKEN = os.getenv("ONYX_SECRET", "")
+if not ONYX_API_TOKEN:
+    print("WARNING: ONYX_API_TOKEN not set in HuggingFace Secrets!")
+def stream_onyx_response(onyx_response, format_type="openai"):
+    """Convert Onyx streaming response to OpenAI or HF SSE format"""
+    try:
+        for line in onyx_response.iter_lines():
+            if line:
+                line = line.decode('utf-8')
+                if line.startswith('data: '):
+                    data = line[6:]
+                    if data == '[DONE]':
+                        if format_type == "openai":
+                            yield f"data: [DONE]\n\n"
+                        break
+                    try:
+                        onyx_data = json.loads(data)
+                        content = onyx_data.get("message", "")
+                        if format_type == "huggingface":
+                            hf_chunk = {
+                                "token": {
+                                    "id": 0,
+                                    "text": content,
+                                    "logprob": 0.0,
+                                    "special": False
+                                },
+                                "generated_text": None,
+                                "details": None
+                            }
+                            yield f"data:{json.dumps(hf_chunk)}\n\n"
+                        else:
+                            openai_chunk = {
+                                "id": f"chatcmpl-{uuid.uuid4().hex[:8]}",
+                                "object": "chat.completion.chunk",
+                                "created": int(datetime.now().timestamp()),
+                                "model": onyx_data.get("model", "unknown"),
+                                "choices": [{
+                                    "index": 0,
+                                    "delta": {"content": content},
+                                    "finish_reason": None
+                                }]
+                            }
+                            yield f"data: {json.dumps(openai_chunk)}\n\n"
+                    except json.JSONDecodeError:
+                        continue
+        if format_type == "huggingface":
+            final_hf = {
+                "token": {
+                    "id": 0,
+                    "text": "",
+                    "logprob": 0.0,
+                    "special": True
+                },
+                "generated_text": "",
+                "details": {
+                    "finish_reason": "stop",
+                    "generated_tokens": 0,
+                    "seed": None
+                }
+            }
+            yield f"data:{json.dumps(final_hf)}\n\n"
+        else:
+            final_chunk = {
+                "id": f"chatcmpl-{uuid.uuid4().hex[:8]}",
+                "object": "chat.completion.chunk",
+                "created": int(datetime.now().timestamp()),
+                "model": "unknown",
+                "choices": [{
+                    "index": 0,
+                    "delta": {},
+                    "finish_reason": "stop"
+                }]
+            }
+            yield f"data: {json.dumps(final_chunk)}\n\n"
+            yield f"data: [DONE]\n\n"
+    except Exception as e:
+        error_chunk = {"error": {"message": str(e), "type": "server_error"}}
+        yield f"data: {json.dumps(error_chunk)}\n\n"
 @app.route('/v1/chat/completions', methods=['POST'])
+def chat_completions():
+    """OpenAI-compatible endpoint"""
+    try:
+        data = request.json
+        messages = data.get('messages', [])
+        model_provider = data.get('model_provider', 'openai')
+        model_id = data.get('model', 'gpt-4')
+        temperature = data.get('temperature', 1.0)
+        stream_requested = data.get('stream', False)
+        user_message = ""
+        for msg in reversed(messages):
+            if msg.get('role') == 'user':
+                user_message = msg.get('content', '')
+                break
+        onyx_payload = {
+            "message": user_message,
+            "llm_override": {
+                "model_provider": model_provider,
+                "model_version": model_id,
+                "temperature": temperature
+            },
+            "allowed_tool_ids": [],
+            "file_descriptors": [],
+            "deep_research": False,
+            "origin": "api",
+            "parent_message_id": -1,
+            "chat_session_id": str(uuid.uuid4()),
+            "chat_session_info": {
+                "persona_id": 0,
+                "description": "OpenAI API Bridge",
+                "project_id": 0
+            },
+            "stream": True,
+            "include_citations": True
+        }
+        headers = {
+            "Authorization": f"Bearer {ONYX_API_TOKEN}",
+            "Content-Type": "application/json"
+        }
+        onyx_response = requests.post(ONYX_API_URL, json=onyx_payload, headers=headers, stream=True)
+        if onyx_response.status_code != 200:
+            return jsonify({
+                "error": {
+                    "message": f"Onyx API error: {onyx_response.status_code}",
+                    "type": "api_error"
+                }
+            }), onyx_response.status_code
+        if stream_requested:
+            return Response(
+                stream_onyx_response(onyx_response, "openai"),
+                mimetype='text/event-stream',
+                headers={'Cache-Control': 'no-cache', 'X-Accel-Buffering': 'no'}
+            )
+        else:
+            full_content = ""
+            for line in onyx_response.iter_lines():
+                if line:
+                    line = line.decode('utf-8')
+                    if line.startswith('data: '):
+                        data = line[6:]
+                        if data != '[DONE]':
+                            try:
+                                onyx_data = json.loads(data)
+                                full_content += onyx_data.get("message", "")
+                            except json.JSONDecodeError:
+                                continue
+            return jsonify({
+                "id": f"chatcmpl-{uuid.uuid4().hex[:8]}",
+                "object": "chat.completion",
+                "created": int(datetime.now().timestamp()),
+                "model": model_id,
+                "choices": [{
+                    "index": 0,
+                    "message": {"role": "assistant", "content": full_content},
+                    "finish_reason": "stop"
+                }],
+                "usage": {"prompt_tokens": 0, "completion_tokens": 0, "total_tokens": 0}
+            })
+    except Exception as e:
+        return jsonify({"error": {"message": str(e), "type": "server_error"}}), 500
+@app.route('/generate', methods=['POST'])
+@app.route('/v1/completions', methods=['POST'])
+def hf_generate():
+    """HuggingFace TGI-compatible endpoint"""
+    try:
+        data = request.json
+        inputs = data.get('inputs', '')
+        parameters = data.get('parameters', {})
+        model_provider = parameters.get('model_provider', 'openai')
+        model_id = parameters.get('model', 'gpt-4')
+        temperature = parameters.get('temperature', 1.0)
+        stream_requested = data.get('stream', False)
+        onyx_payload = {
+            "message": inputs,
+            "llm_override": {
+                "model_provider": model_provider,
+                "model_version": model_id,
+                "temperature": temperature
+            },
+            "allowed_tool_ids": [],
+            "file_descriptors": [],
+            "deep_research": False,
+            "origin": "api",
+            "parent_message_id": -1,
+            "chat_session_id": str(uuid.uuid4()),
+            "chat_session_info": {
+                "persona_id": 0,
+                "description": "HuggingFace API Bridge",
+                "project_id": 0
+            },
+            "stream": True,
+            "include_citations": True
+        }
+        headers = {
+            "Authorization": f"Bearer {ONYX_API_TOKEN}",
+            "Content-Type": "application/json"
+        }
+        onyx_response = requests.post(ONYX_API_URL, json=onyx_payload, headers=headers, stream=True)
+        if onyx_response.status_code != 200:
+            return jsonify({"error": f"Onyx API error: {onyx_response.status_code}"}), onyx_response.status_code
+        if stream_requested:
+            return Response(
+                stream_onyx_response(onyx_response, "huggingface"),
+                mimetype='text/event-stream',
+                headers={'Cache-Control': 'no-cache', 'X-Accel-Buffering': 'no'}
+            )
+        else:
+            full_content = ""
+            for line in onyx_response.iter_lines():
+                if line:
+                    line = line.decode('utf-8')
+                    if line.startswith('data: '):
+                        data = line[6:]
+                        if data != '[DONE]':
+                            try:
+                                onyx_data = json.loads(data)
+                                full_content += onyx_data.get("message", "")
+                            except json.JSONDecodeError:
+                                continue
+            return jsonify([{"generated_text": full_content}])
+    except Exception as e:
+        return jsonify({"error": str(e)}), 500
+@app.route('/v1/models', methods=['POST', 'GET'])
+@app.route('/models', methods=['POST', 'GET'])
+def list_models():
+    """List available models"""
+    return jsonify({
+        "object": "list",
+        "data": [
+            {"id": "gpt-4", "object": "model", "created": int(datetime.now().timestamp()), "owned_by": "onyx"},
+            {"id": "claude-3-5-sonnet", "object": "model", "created": int(datetime.now().timestamp()), "owned_by": "onyx"}
+        ]
+    })
+@app.route('/health', methods=['GET'])
+@app.route('/', methods=['GET'])
+def health():
+    """Health check endpoint"""
+    return jsonify({
+        "status": "ok",
+        "api_token_set": bool(ONYX_API_TOKEN),
+        "endpoints": {
+            "openai": "/v1/chat/completions",
+            "huggingface": "/generate or /v1/completions",
+            "models": "/v1/models"
+        }
+    })
+if __name__ == '__main__':
+    port = int(os.getenv("PORT", 7860))
+    app.run(host="0.0.0.0", port=port)        # Splits on first slash, cleans whitespace, preserves case
         provider, version = [part.strip() for part in raw_model.split("/", 1)]
     else:
         provider, version = "OpenAI", raw_model.strip()