Spaces:

internationalscholarsprogram
/

gemini-ws-bridge

Sleeping

App Files Files Community

internationalscholarsprogram commited on Dec 18, 2025

Commit

dd06b07

1 Parent(s): a6785cc

Deploy Gemini WS bridge as Docker Space

Browse files

Files changed (5) hide show

Dockerfile +13 -3
README.md +0 -4
app.py +0 -117
requirements.txt +1 -3
server.py +104 -0

Dockerfile CHANGED Viewed

@@ -5,9 +5,19 @@ WORKDIR /app
 COPY requirements.txt .
 RUN pip install --no-cache-dir -r requirements.txt
-COPY . .
-ENV PORT=7860
 EXPOSE 7860
-CMD ["sh", "-c", "uvicorn app:app --host 0.0.0.0 --port ${PORT}"]

 COPY requirements.txt .
 RUN pip install --no-cache-dir -r requirements.txt
+# socat will forward 7860 -> 9083 so your python stays unchanged
+RUN apt-get update && apt-get install -y --no-install-recommends socat \
+    && rm -rf /var/lib/apt/lists/*
+COPY server.py .
+# Hugging Face expects the app to listen on 7860
 EXPOSE 7860
+# GOOGLE_API_KEY should be set as a Hugging Face Secret, not in code
+ENV GOOGLE_API_KEY=""
+CMD bash -lc "\
+    python server.py & \
+    socat TCP-LISTEN:7860,fork,reuseaddr TCP:localhost:9083 \
+    "

README.md CHANGED Viewed

@@ -1,8 +1,4 @@
 ---
-title: gemini-ws-bridge
 sdk: docker
 app_port: 7860
 ---
-# Gemini WebSocket Bridge
-A WebSocket server that forwards audio/image chunks to Gemini Live and streams responses back.

 ---
 sdk: docker
 app_port: 7860
 ---

app.py DELETED Viewed

@@ -1,117 +0,0 @@
-import asyncio
-import json
-import os
-import base64
-import logging
-# ---- Logging: reduce HF probe noise ----
-logging.basicConfig(level=logging.INFO)
-logging.getLogger("websockets").setLevel(logging.ERROR)
-logging.getLogger("websockets.server").setLevel(logging.ERROR)
-logging.getLogger("uvicorn.error").setLevel(logging.INFO)
-logging.getLogger("uvicorn.access").setLevel(logging.WARNING)
-print("APP STARTED ✅ FastAPI + Gemini WebSocket Bridge")
-from fastapi import FastAPI, WebSocket, WebSocketDisconnect
-from google import genai
-MODEL = os.environ.get("MODEL", "gemini-2.0-flash-exp")
-# IMPORTANT: Set GOOGLE_API_KEY as a Hugging Face Secret
-client = genai.Client(http_options={"api_version": "v1alpha"})
-app = FastAPI()
-# ---- HTTP health check (required by Hugging Face) ----
-@app.get("/")
-def health():
-    return {"status": "ok"}
-# ---- WebSocket endpoint ----
-@app.websocket("/ws")
-async def gemini_ws_bridge(ws: WebSocket):
-    await ws.accept()
-    try:
-        # First message must be setup/config
-        config_message = await ws.receive_text()
-        config_data = json.loads(config_message)
-        config = config_data.get("setup", {})
-        config["system_instruction"] = (
-            "You are a helpful assistant for screen sharing sessions. Your role is to:\n"
-            "1) Analyze and describe the content being shared on screen\n"
-            "2) Answer questions about the shared content\n"
-            "3) Provide relevant information and context about what's being shown\n"
-            "4) Assist with technical issues related to screen sharing\n"
-            "5) Maintain a professional and helpful tone. Focus on being concise and clear."
-        )
-        async with client.aio.live.connect(model=MODEL, config=config) as session:
-            async def send_to_gemini():
-                try:
-                    while True:
-                        message = await ws.receive_text()
-                        data = json.loads(message)
-                        if "realtime_input" in data:
-                            for chunk in data["realtime_input"]["media_chunks"]:
-                                mt = chunk.get("mime_type")
-                                payload = chunk.get("data")
-                                if mt in ("audio/pcm", "image/jpeg") and payload:
-                                    await session.send(
-                                        {"mime_type": mt, "data": payload}
-                                    )
-                except WebSocketDisconnect:
-                    pass
-                except Exception as e:
-                    logging.error(f"send_to_gemini error: {e}")
-            async def receive_from_gemini():
-                try:
-                    async for response in session.receive():
-                        if response.server_content is None:
-                            continue
-                        model_turn = response.server_content.model_turn
-                        if model_turn:
-                            for part in model_turn.parts:
-                                if getattr(part, "text", None):
-                                    await ws.send_text(
-                                        json.dumps({"text": part.text})
-                                    )
-                                elif getattr(part, "inline_data", None):
-                                    b64_audio = base64.b64encode(
-                                        part.inline_data.data
-                                    ).decode("utf-8")
-                                    await ws.send_text(
-                                        json.dumps({"audio": b64_audio})
-                                    )
-                        if response.server_content.turn_complete:
-                            await ws.send_text(
-                                json.dumps({"turn_complete": True})
-                            )
-                except WebSocketDisconnect:
-                    pass
-                except Exception as e:
-                    logging.error(f"receive_from_gemini error: {e}")
-            await asyncio.gather(send_to_gemini(), receive_from_gemini())
-    except WebSocketDisconnect:
-        pass
-    except Exception as e:
-        logging.error(f"gemini_ws_bridge error: {e}")
-        try:
-            await ws.send_text(json.dumps({"error": str(e)}))
-        except Exception:
-            pass
-        try:
-            await ws.close()
-        except Exception:
-            pass

requirements.txt CHANGED Viewed

@@ -1,4 +1,2 @@
 google-genai==0.3.0
-websockets==14.1
-fastapi==0.115.6
-uvicorn[standard]==0.32.1


1	google-genai==0.3.0
2	+ websockets

server.py ADDED Viewed

	@@ -0,0 +1,104 @@

+import asyncio
+import json
+import os
+import websockets
+from google import genai
+import base64
+# Load API key from environment (do NOT overwrite it)
+# Set GOOGLE_API_KEY in Hugging Face Space Secrets
+if not os.getenv("GOOGLE_API_KEY"):
+    raise RuntimeError("GOOGLE_API_KEY is not set. Add it in Hugging Face Space Secrets.")
+MODEL = "gemini-2.0-flash-exp"  # use your model ID
+client = genai.Client(
+  http_options={
+    'api_version': 'v1alpha',
+  }
+)
+async def gemini_session_handler(client_websocket: websockets.WebSocketServerProtocol):
+    try:
+        config_message = await client_websocket.recv()
+        config_data = json.loads(config_message)
+        config = config_data.get("setup", {})
+        config["system_instruction"] = """You are a helpful assistant for screen sharing sessions. Your role is to:
+                                        1) Analyze and describe the content being shared on screen
+                                        2) Answer questions about the shared content
+                                        3) Provide relevant information and context about what's being shown
+                                        4) Assist with technical issues related to screen sharing
+                                        5) Maintain a professional and helpful tone. Focus on being concise and clear in your responses."""
+        async with client.aio.live.connect(model=MODEL, config=config) as session:
+            print("Connected to Gemini API")
+            async def send_to_gemini():
+                try:
+                    async for message in client_websocket:
+                        try:
+                            data = json.loads(message)
+                            if "realtime_input" in data:
+                                for chunk in data["realtime_input"]["media_chunks"]:
+                                    if chunk["mime_type"] == "audio/pcm":
+                                        await session.send({"mime_type": "audio/pcm", "data": chunk["data"]})
+                                    elif chunk["mime_type"] == "image/jpeg":
+                                        await session.send({"mime_type": "image/jpeg", "data": chunk["data"]})
+                        except Exception as e:
+                            print(f"Error sending to Gemini: {e}")
+                    print("Client connection closed (send)")
+                except Exception as e:
+                    print(f"Error sending to Gemini: {e}")
+                finally:
+                    print("send_to_gemini closed")
+            async def receive_from_gemini():
+                try:
+                    while True:
+                        try:
+                            print("receiving from gemini")
+                            async for response in session.receive():
+                                if response.server_content is None:
+                                    print(f'Unhandled server message! - {response}')
+                                    continue
+                                model_turn = response.server_content.model_turn
+                                if model_turn:
+                                    for part in model_turn.parts:
+                                        if hasattr(part, 'text') and part.text is not None:
+                                            await client_websocket.send(json.dumps({"text": part.text}))
+                                        elif hasattr(part, 'inline_data') and part.inline_data is not None:
+                                            print("audio mime_type:", part.inline_data.mime_type)
+                                            base64_audio = base64.b64encode(part.inline_data.data).decode('utf-8')
+                                            await client_websocket.send(json.dumps({"audio": base64_audio}))
+                                            print("audio received")
+                                if response.server_content.turn_complete:
+                                    print('\n<Turn complete>')
+                        except websockets.exceptions.ConnectionClosedOK:
+                            print("Client connection closed normally (receive)")
+                            break
+                        except Exception as e:
+                            print(f"Error receiving from Gemini: {e}")
+                            break
+                except Exception as e:
+                    print(f"Error receiving from Gemini: {e}")
+                finally:
+                    print("Gemini connection closed (receive)")
+            send_task = asyncio.create_task(send_to_gemini())
+            receive_task = asyncio.create_task(receive_from_gemini())
+            await asyncio.gather(send_task, receive_task)
+    except Exception as e:
+        print(f"Error in Gemini session: {e}")
+    finally:
+        print("Gemini session closed.")
+async def main() -> None:
+    async with websockets.serve(gemini_session_handler, "localhost", 9083):
+        print("Running websocket server localhost:9083...")
+        await asyncio.Future()
+if __name__ == "__main__":
+    asyncio.run(main())