Spaces:

SalexAI
/

api

Sleeping

App Files Files Community

SalexAI commited on Feb 12

Commit

3e9ae46

verified ·

1 Parent(s): bf898ab

Create main.py

Browse files

Files changed (1) hide show

main.py +299 -0

main.py ADDED Viewed

	@@ -0,0 +1,299 @@

+import os
+import json
+import asyncio
+from typing import Any, Dict, Optional
+from fastapi import FastAPI, WebSocket, WebSocketDisconnect
+from fastapi.responses import JSONResponse
+from dotenv import load_dotenv
+import websockets
+load_dotenv()
+app = FastAPI(title="Gemini Live WS Proxy", version="1.0.0")
+# Gemini Live API WebSocket endpoint for BidiGenerateContent (v1beta)
+# (Official endpoint in the Live API WebSockets reference.)
+GEMINI_LIVE_WS_URL = (
+    "wss://generativelanguage.googleapis.com/ws/"
+    "google.ai.generativelanguage.v1beta.GenerativeService.BidiGenerateContent"
+)
+DEFAULT_MODEL = os.getenv("GEMINI_MODEL", "models/gemini-2.5-flash")
+DEFAULT_SYSTEM = os.getenv("GEMINI_SYSTEM_INSTRUCTION", "You are a helpful assistant.")
+DEFAULT_TEMPERATURE = float(os.getenv("GEMINI_TEMPERATURE", "0.7"))
+DEFAULT_MAX_TOKENS = int(os.getenv("GEMINI_MAX_OUTPUT_TOKENS", "1024"))
+API_KEY = os.getenv("GEMINI_API_KEY", "").strip()
+if not API_KEY:
+    # Don't crash import-time on HF if they haven't set secrets yet;
+    # we will return a clear runtime error at connection time.
+    pass
+@app.get("/health")
+async def health():
+    ok = bool(API_KEY)
+    return JSONResponse(
+        {
+            "ok": ok,
+            "has_api_key": ok,
+            "model": DEFAULT_MODEL,
+        }
+    )
+def _safe_get_text_from_content(content: Dict[str, Any]) -> str:
+    """
+    Gemini Content format typically includes:
+      {"role": "...", "parts": [{"text": "..."} , ...]}
+    We concatenate any text parts we see.
+    """
+    parts = content.get("parts") or []
+    out = []
+    for p in parts:
+        if isinstance(p, dict) and "text" in p and isinstance(p["text"], str):
+            out.append(p["text"])
+    return "".join(out)
+async def _gemini_connect_and_setup(
+    model: str,
+    system_instruction: str,
+    temperature: float,
+    max_output_tokens: int,
+    response_modalities: Optional[list] = None,
+):
+    """
+    Opens a websocket to Gemini Live API and sends the required initial setup message.
+    Clients should wait for setupComplete before sending further messages.
+    """
+    headers = {
+        # Gemini API auth: x-goog-api-key header is required for requests. :contentReference[oaicite:2]{index=2}
+        "x-goog-api-key": API_KEY,
+    }
+    ws = await websockets.connect(
+        GEMINI_LIVE_WS_URL,
+        extra_headers=headers,
+        max_size=8 * 1024 * 1024,  # allow larger payloads if needed later
+        ping_interval=20,
+        ping_timeout=20,
+    )
+    setup_payload = {
+        "setup": {
+            "model": model,
+            "generationConfig": {
+                "temperature": temperature,
+                "maxOutputTokens": max_output_tokens,
+                "responseModalities": response_modalities or ["TEXT"],
+            },
+            # Live API reference shows systemInstruction is Content; we send text-only Content.
+            # (Docs note text parts in system instruction.) :contentReference[oaicite:3]{index=3}
+            "systemInstruction": {
+                "role": "system",
+                "parts": [{"text": system_instruction}],
+            },
+        }
+    }
+    await ws.send(json.dumps(setup_payload))
+    # Wait for setupComplete
+    while True:
+        raw = await ws.recv()
+        msg = json.loads(raw)
+        if "setupComplete" in msg:
+            return ws
+        # Forward other early messages if they appear, but don't block setup forever.
+        # If Gemini returns an error-like structure, surface it.
+        if "error" in msg:
+            raise RuntimeError(f"Gemini setup error: {msg['error']}")
+@app.websocket("/ws")
+async def ws_proxy(client_ws: WebSocket):
+    """
+    Client protocol (simple):
+      -> {"type":"text","text":"hello"}
+      -> {"type":"configure", "model": "...", "system_instruction": "...", "temperature": 0.7, "max_output_tokens": 1024}
+      -> {"type":"close"}
+    Server sends:
+      <- {"type":"ready"}
+      <- {"type":"text_delta","text":"..."}   (streaming)
+      <- {"type":"turn_complete"}
+      <- {"type":"gemini_raw","message":{...}} (debug passthrough)
+      <- {"type":"error","message":"..."}
+    """
+    await client_ws.accept()
+    if not API_KEY:
+        await client_ws.send_text(
+            json.dumps(
+                {
+                    "type": "error",
+                    "message": "Server missing GEMINI_API_KEY env var. Set it in your Space secrets.",
+                }
+            )
+        )
+        await client_ws.close(code=1011)
+        return
+    # Per-connection defaults (can be overridden by configure message)
+    model = DEFAULT_MODEL
+    system_instruction = DEFAULT_SYSTEM
+    temperature = DEFAULT_TEMPERATURE
+    max_output_tokens = DEFAULT_MAX_TOKENS
+    gemini_ws = None
+    stop_event = asyncio.Event()
+    async def ensure_gemini():
+        nonlocal gemini_ws
+        if gemini_ws is None:
+            gemini_ws = await _gemini_connect_and_setup(
+                model=model,
+                system_instruction=system_instruction,
+                temperature=temperature,
+                max_output_tokens=max_output_tokens,
+                response_modalities=["TEXT"],
+            )
+    async def forward_client_to_gemini():
+        """
+        Reads from your client WebSocket and sends appropriate Live API messages to Gemini.
+        Uses clientContent + turnComplete for clean text turns. :contentReference[oaicite:4]{index=4}
+        """
+        try:
+            while not stop_event.is_set():
+                raw = await client_ws.receive_text()
+                data = json.loads(raw)
+                msg_type = data.get("type")
+                if msg_type == "configure":
+                    # Allow config BEFORE Gemini connection is created.
+                    if gemini_ws is not None:
+                        await client_ws.send_text(
+                            json.dumps(
+                                {
+                                    "type": "error",
+                                    "message": "Cannot configure after session started. Open a new WS connection.",
+                                }
+                            )
+                        )
+                        continue
+                    model = data.get("model", model)
+                    system_instruction = data.get("system_instruction", system_instruction)
+                    temperature = float(data.get("temperature", temperature))
+                    max_output_tokens = int(data.get("max_output_tokens", max_output_tokens))
+                    await client_ws.send_text(json.dumps({"type": "configured"}))
+                    continue
+                if msg_type == "close":
+                    stop_event.set()
+                    return
+                if msg_type == "text":
+                    text = data.get("text", "")
+                    if not isinstance(text, str) or not text.strip():
+                        continue
+                    await ensure_gemini()
+                    # Send a single "turn" using clientContent.turns and turnComplete=true. :contentReference[oaicite:5]{index=5}
+                    payload = {
+                        "clientContent": {
+                            "turns": [
+                                {
+                                    "role": "user",
+                                    "parts": [{"text": text}],
+                                }
+                            ],
+                            "turnComplete": True,
+                        }
+                    }
+                    await gemini_ws.send(json.dumps(payload))
+                    continue
+                # Optional: raw passthrough (advanced users)
+                if msg_type == "live_raw":
+                    await ensure_gemini()
+                    payload = data.get("payload")
+                    if isinstance(payload, dict):
+                        await gemini_ws.send(json.dumps(payload))
+                    continue
+                await client_ws.send_text(
+                    json.dumps({"type": "error", "message": f"Unknown message type: {msg_type}"})
+                )
+        except WebSocketDisconnect:
+            stop_event.set()
+        except Exception as e:
+            stop_event.set()
+            try:
+                await client_ws.send_text(json.dumps({"type": "error", "message": str(e)}))
+            except Exception:
+                pass
+    async def forward_gemini_to_client():
+        """
+        Reads Gemini Live API server messages and forwards useful pieces to your client.
+        We extract text from serverContent.modelTurn.parts[].text when present. :contentReference[oaicite:6]{index=6}
+        """
+        try:
+            await ensure_gemini()
+            await client_ws.send_text(json.dumps({"type": "ready"}))
+            while not stop_event.is_set():
+                raw = await gemini_ws.recv()
+                msg = json.loads(raw)
+                # Optional debug passthrough:
+                await client_ws.send_text(json.dumps({"type": "gemini_raw", "message": msg}))
+                # The main streaming content arrives under "serverContent"
+                server_content = msg.get("serverContent")
+                if isinstance(server_content, dict):
+                    # modelTurn is Content (role+parts)
+                    model_turn = server_content.get("modelTurn")
+                    if isinstance(model_turn, dict):
+                        delta = _safe_get_text_from_content(model_turn)
+                        if delta:
+                            await client_ws.send_text(json.dumps({"type": "text_delta", "text": delta}))
+                    # When generationComplete true, we end the turn
+                    if server_content.get("generationComplete") is True:
+                        await client_ws.send_text(json.dumps({"type": "turn_complete"}))
+                # Tool calls (if you later enable tools in setup)
+                if "toolCall" in msg:
+                    await client_ws.send_text(json.dumps({"type": "tool_call", "toolCall": msg["toolCall"]}))
+                if "goAway" in msg:
+                    await client_ws.send_text(json.dumps({"type": "go_away", "goAway": msg["goAway"]}))
+        except Exception as e:
+            stop_event.set()
+            try:
+                await client_ws.send_text(json.dumps({"type": "error", "message": f"Gemini link error: {e}"}))
+            except Exception:
+                pass
+    try:
+        # Run both directions
+        await asyncio.gather(forward_client_to_gemini(), forward_gemini_to_client())
+    finally:
+        stop_event.set()
+        try:
+            if gemini_ws is not None:
+                await gemini_ws.close()
+        except Exception:
+            pass
+        try:
+            await client_ws.close()
+        except Exception:
+            pass