Commit
·
dd06b07
1
Parent(s):
a6785cc
Deploy Gemini WS bridge as Docker Space
Browse files- Dockerfile +13 -3
- README.md +0 -4
- app.py +0 -117
- requirements.txt +1 -3
- server.py +104 -0
Dockerfile
CHANGED
|
@@ -5,9 +5,19 @@ WORKDIR /app
|
|
| 5 |
COPY requirements.txt .
|
| 6 |
RUN pip install --no-cache-dir -r requirements.txt
|
| 7 |
|
| 8 |
-
|
|
|
|
|
|
|
| 9 |
|
| 10 |
-
|
|
|
|
|
|
|
| 11 |
EXPOSE 7860
|
| 12 |
|
| 13 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 5 |
COPY requirements.txt .
|
| 6 |
RUN pip install --no-cache-dir -r requirements.txt
|
| 7 |
|
| 8 |
+
# socat will forward 7860 -> 9083 so your python stays unchanged
|
| 9 |
+
RUN apt-get update && apt-get install -y --no-install-recommends socat \
|
| 10 |
+
&& rm -rf /var/lib/apt/lists/*
|
| 11 |
|
| 12 |
+
COPY server.py .
|
| 13 |
+
|
| 14 |
+
# Hugging Face expects the app to listen on 7860
|
| 15 |
EXPOSE 7860
|
| 16 |
|
| 17 |
+
# GOOGLE_API_KEY should be set as a Hugging Face Secret, not in code
|
| 18 |
+
ENV GOOGLE_API_KEY=""
|
| 19 |
+
|
| 20 |
+
CMD bash -lc "\
|
| 21 |
+
python server.py & \
|
| 22 |
+
socat TCP-LISTEN:7860,fork,reuseaddr TCP:localhost:9083 \
|
| 23 |
+
"
|
README.md
CHANGED
|
@@ -1,8 +1,4 @@
|
|
| 1 |
---
|
| 2 |
-
title: gemini-ws-bridge
|
| 3 |
sdk: docker
|
| 4 |
app_port: 7860
|
| 5 |
---
|
| 6 |
-
|
| 7 |
-
# Gemini WebSocket Bridge
|
| 8 |
-
A WebSocket server that forwards audio/image chunks to Gemini Live and streams responses back.
|
|
|
|
| 1 |
---
|
|
|
|
| 2 |
sdk: docker
|
| 3 |
app_port: 7860
|
| 4 |
---
|
|
|
|
|
|
|
|
|
app.py
DELETED
|
@@ -1,117 +0,0 @@
|
|
| 1 |
-
import asyncio
|
| 2 |
-
import json
|
| 3 |
-
import os
|
| 4 |
-
import base64
|
| 5 |
-
import logging
|
| 6 |
-
|
| 7 |
-
# ---- Logging: reduce HF probe noise ----
|
| 8 |
-
logging.basicConfig(level=logging.INFO)
|
| 9 |
-
logging.getLogger("websockets").setLevel(logging.ERROR)
|
| 10 |
-
logging.getLogger("websockets.server").setLevel(logging.ERROR)
|
| 11 |
-
logging.getLogger("uvicorn.error").setLevel(logging.INFO)
|
| 12 |
-
logging.getLogger("uvicorn.access").setLevel(logging.WARNING)
|
| 13 |
-
|
| 14 |
-
print("APP STARTED ✅ FastAPI + Gemini WebSocket Bridge")
|
| 15 |
-
|
| 16 |
-
from fastapi import FastAPI, WebSocket, WebSocketDisconnect
|
| 17 |
-
from google import genai
|
| 18 |
-
|
| 19 |
-
MODEL = os.environ.get("MODEL", "gemini-2.0-flash-exp")
|
| 20 |
-
|
| 21 |
-
# IMPORTANT: Set GOOGLE_API_KEY as a Hugging Face Secret
|
| 22 |
-
client = genai.Client(http_options={"api_version": "v1alpha"})
|
| 23 |
-
|
| 24 |
-
app = FastAPI()
|
| 25 |
-
|
| 26 |
-
|
| 27 |
-
# ---- HTTP health check (required by Hugging Face) ----
|
| 28 |
-
@app.get("/")
|
| 29 |
-
def health():
|
| 30 |
-
return {"status": "ok"}
|
| 31 |
-
|
| 32 |
-
|
| 33 |
-
# ---- WebSocket endpoint ----
|
| 34 |
-
@app.websocket("/ws")
|
| 35 |
-
async def gemini_ws_bridge(ws: WebSocket):
|
| 36 |
-
await ws.accept()
|
| 37 |
-
|
| 38 |
-
try:
|
| 39 |
-
# First message must be setup/config
|
| 40 |
-
config_message = await ws.receive_text()
|
| 41 |
-
config_data = json.loads(config_message)
|
| 42 |
-
|
| 43 |
-
config = config_data.get("setup", {})
|
| 44 |
-
config["system_instruction"] = (
|
| 45 |
-
"You are a helpful assistant for screen sharing sessions. Your role is to:\n"
|
| 46 |
-
"1) Analyze and describe the content being shared on screen\n"
|
| 47 |
-
"2) Answer questions about the shared content\n"
|
| 48 |
-
"3) Provide relevant information and context about what's being shown\n"
|
| 49 |
-
"4) Assist with technical issues related to screen sharing\n"
|
| 50 |
-
"5) Maintain a professional and helpful tone. Focus on being concise and clear."
|
| 51 |
-
)
|
| 52 |
-
|
| 53 |
-
async with client.aio.live.connect(model=MODEL, config=config) as session:
|
| 54 |
-
|
| 55 |
-
async def send_to_gemini():
|
| 56 |
-
try:
|
| 57 |
-
while True:
|
| 58 |
-
message = await ws.receive_text()
|
| 59 |
-
data = json.loads(message)
|
| 60 |
-
|
| 61 |
-
if "realtime_input" in data:
|
| 62 |
-
for chunk in data["realtime_input"]["media_chunks"]:
|
| 63 |
-
mt = chunk.get("mime_type")
|
| 64 |
-
payload = chunk.get("data")
|
| 65 |
-
if mt in ("audio/pcm", "image/jpeg") and payload:
|
| 66 |
-
await session.send(
|
| 67 |
-
{"mime_type": mt, "data": payload}
|
| 68 |
-
)
|
| 69 |
-
except WebSocketDisconnect:
|
| 70 |
-
pass
|
| 71 |
-
except Exception as e:
|
| 72 |
-
logging.error(f"send_to_gemini error: {e}")
|
| 73 |
-
|
| 74 |
-
async def receive_from_gemini():
|
| 75 |
-
try:
|
| 76 |
-
async for response in session.receive():
|
| 77 |
-
if response.server_content is None:
|
| 78 |
-
continue
|
| 79 |
-
|
| 80 |
-
model_turn = response.server_content.model_turn
|
| 81 |
-
if model_turn:
|
| 82 |
-
for part in model_turn.parts:
|
| 83 |
-
if getattr(part, "text", None):
|
| 84 |
-
await ws.send_text(
|
| 85 |
-
json.dumps({"text": part.text})
|
| 86 |
-
)
|
| 87 |
-
elif getattr(part, "inline_data", None):
|
| 88 |
-
b64_audio = base64.b64encode(
|
| 89 |
-
part.inline_data.data
|
| 90 |
-
).decode("utf-8")
|
| 91 |
-
await ws.send_text(
|
| 92 |
-
json.dumps({"audio": b64_audio})
|
| 93 |
-
)
|
| 94 |
-
|
| 95 |
-
if response.server_content.turn_complete:
|
| 96 |
-
await ws.send_text(
|
| 97 |
-
json.dumps({"turn_complete": True})
|
| 98 |
-
)
|
| 99 |
-
except WebSocketDisconnect:
|
| 100 |
-
pass
|
| 101 |
-
except Exception as e:
|
| 102 |
-
logging.error(f"receive_from_gemini error: {e}")
|
| 103 |
-
|
| 104 |
-
await asyncio.gather(send_to_gemini(), receive_from_gemini())
|
| 105 |
-
|
| 106 |
-
except WebSocketDisconnect:
|
| 107 |
-
pass
|
| 108 |
-
except Exception as e:
|
| 109 |
-
logging.error(f"gemini_ws_bridge error: {e}")
|
| 110 |
-
try:
|
| 111 |
-
await ws.send_text(json.dumps({"error": str(e)}))
|
| 112 |
-
except Exception:
|
| 113 |
-
pass
|
| 114 |
-
try:
|
| 115 |
-
await ws.close()
|
| 116 |
-
except Exception:
|
| 117 |
-
pass
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
requirements.txt
CHANGED
|
@@ -1,4 +1,2 @@
|
|
| 1 |
google-genai==0.3.0
|
| 2 |
-
websockets
|
| 3 |
-
fastapi==0.115.6
|
| 4 |
-
uvicorn[standard]==0.32.1
|
|
|
|
| 1 |
google-genai==0.3.0
|
| 2 |
+
websockets
|
|
|
|
|
|
server.py
ADDED
|
@@ -0,0 +1,104 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import asyncio
|
| 2 |
+
import json
|
| 3 |
+
import os
|
| 4 |
+
import websockets
|
| 5 |
+
from google import genai
|
| 6 |
+
import base64
|
| 7 |
+
|
| 8 |
+
# Load API key from environment (do NOT overwrite it)
|
| 9 |
+
# Set GOOGLE_API_KEY in Hugging Face Space Secrets
|
| 10 |
+
if not os.getenv("GOOGLE_API_KEY"):
|
| 11 |
+
raise RuntimeError("GOOGLE_API_KEY is not set. Add it in Hugging Face Space Secrets.")
|
| 12 |
+
|
| 13 |
+
MODEL = "gemini-2.0-flash-exp" # use your model ID
|
| 14 |
+
|
| 15 |
+
client = genai.Client(
|
| 16 |
+
http_options={
|
| 17 |
+
'api_version': 'v1alpha',
|
| 18 |
+
}
|
| 19 |
+
)
|
| 20 |
+
|
| 21 |
+
async def gemini_session_handler(client_websocket: websockets.WebSocketServerProtocol):
|
| 22 |
+
try:
|
| 23 |
+
config_message = await client_websocket.recv()
|
| 24 |
+
config_data = json.loads(config_message)
|
| 25 |
+
config = config_data.get("setup", {})
|
| 26 |
+
config["system_instruction"] = """You are a helpful assistant for screen sharing sessions. Your role is to:
|
| 27 |
+
1) Analyze and describe the content being shared on screen
|
| 28 |
+
2) Answer questions about the shared content
|
| 29 |
+
3) Provide relevant information and context about what's being shown
|
| 30 |
+
4) Assist with technical issues related to screen sharing
|
| 31 |
+
5) Maintain a professional and helpful tone. Focus on being concise and clear in your responses."""
|
| 32 |
+
|
| 33 |
+
async with client.aio.live.connect(model=MODEL, config=config) as session:
|
| 34 |
+
print("Connected to Gemini API")
|
| 35 |
+
|
| 36 |
+
async def send_to_gemini():
|
| 37 |
+
try:
|
| 38 |
+
async for message in client_websocket:
|
| 39 |
+
try:
|
| 40 |
+
data = json.loads(message)
|
| 41 |
+
if "realtime_input" in data:
|
| 42 |
+
for chunk in data["realtime_input"]["media_chunks"]:
|
| 43 |
+
if chunk["mime_type"] == "audio/pcm":
|
| 44 |
+
await session.send({"mime_type": "audio/pcm", "data": chunk["data"]})
|
| 45 |
+
elif chunk["mime_type"] == "image/jpeg":
|
| 46 |
+
await session.send({"mime_type": "image/jpeg", "data": chunk["data"]})
|
| 47 |
+
except Exception as e:
|
| 48 |
+
print(f"Error sending to Gemini: {e}")
|
| 49 |
+
print("Client connection closed (send)")
|
| 50 |
+
except Exception as e:
|
| 51 |
+
print(f"Error sending to Gemini: {e}")
|
| 52 |
+
finally:
|
| 53 |
+
print("send_to_gemini closed")
|
| 54 |
+
|
| 55 |
+
async def receive_from_gemini():
|
| 56 |
+
try:
|
| 57 |
+
while True:
|
| 58 |
+
try:
|
| 59 |
+
print("receiving from gemini")
|
| 60 |
+
async for response in session.receive():
|
| 61 |
+
if response.server_content is None:
|
| 62 |
+
print(f'Unhandled server message! - {response}')
|
| 63 |
+
continue
|
| 64 |
+
|
| 65 |
+
model_turn = response.server_content.model_turn
|
| 66 |
+
if model_turn:
|
| 67 |
+
for part in model_turn.parts:
|
| 68 |
+
if hasattr(part, 'text') and part.text is not None:
|
| 69 |
+
await client_websocket.send(json.dumps({"text": part.text}))
|
| 70 |
+
elif hasattr(part, 'inline_data') and part.inline_data is not None:
|
| 71 |
+
print("audio mime_type:", part.inline_data.mime_type)
|
| 72 |
+
base64_audio = base64.b64encode(part.inline_data.data).decode('utf-8')
|
| 73 |
+
await client_websocket.send(json.dumps({"audio": base64_audio}))
|
| 74 |
+
print("audio received")
|
| 75 |
+
|
| 76 |
+
if response.server_content.turn_complete:
|
| 77 |
+
print('\n<Turn complete>')
|
| 78 |
+
except websockets.exceptions.ConnectionClosedOK:
|
| 79 |
+
print("Client connection closed normally (receive)")
|
| 80 |
+
break
|
| 81 |
+
except Exception as e:
|
| 82 |
+
print(f"Error receiving from Gemini: {e}")
|
| 83 |
+
break
|
| 84 |
+
except Exception as e:
|
| 85 |
+
print(f"Error receiving from Gemini: {e}")
|
| 86 |
+
finally:
|
| 87 |
+
print("Gemini connection closed (receive)")
|
| 88 |
+
|
| 89 |
+
send_task = asyncio.create_task(send_to_gemini())
|
| 90 |
+
receive_task = asyncio.create_task(receive_from_gemini())
|
| 91 |
+
await asyncio.gather(send_task, receive_task)
|
| 92 |
+
|
| 93 |
+
except Exception as e:
|
| 94 |
+
print(f"Error in Gemini session: {e}")
|
| 95 |
+
finally:
|
| 96 |
+
print("Gemini session closed.")
|
| 97 |
+
|
| 98 |
+
async def main() -> None:
|
| 99 |
+
async with websockets.serve(gemini_session_handler, "localhost", 9083):
|
| 100 |
+
print("Running websocket server localhost:9083...")
|
| 101 |
+
await asyncio.Future()
|
| 102 |
+
|
| 103 |
+
if __name__ == "__main__":
|
| 104 |
+
asyncio.run(main())
|