internationalscholarsprogram commited on
Commit
dd06b07
·
1 Parent(s): a6785cc

Deploy Gemini WS bridge as Docker Space

Browse files
Files changed (5) hide show
  1. Dockerfile +13 -3
  2. README.md +0 -4
  3. app.py +0 -117
  4. requirements.txt +1 -3
  5. server.py +104 -0
Dockerfile CHANGED
@@ -5,9 +5,19 @@ WORKDIR /app
5
  COPY requirements.txt .
6
  RUN pip install --no-cache-dir -r requirements.txt
7
 
8
- COPY . .
 
 
9
 
10
- ENV PORT=7860
 
 
11
  EXPOSE 7860
12
 
13
- CMD ["sh", "-c", "uvicorn app:app --host 0.0.0.0 --port ${PORT}"]
 
 
 
 
 
 
 
5
  COPY requirements.txt .
6
  RUN pip install --no-cache-dir -r requirements.txt
7
 
8
+ # socat will forward 7860 -> 9083 so your python stays unchanged
9
+ RUN apt-get update && apt-get install -y --no-install-recommends socat \
10
+ && rm -rf /var/lib/apt/lists/*
11
 
12
+ COPY server.py .
13
+
14
+ # Hugging Face expects the app to listen on 7860
15
  EXPOSE 7860
16
 
17
+ # GOOGLE_API_KEY should be set as a Hugging Face Secret, not in code
18
+ ENV GOOGLE_API_KEY=""
19
+
20
+ CMD bash -lc "\
21
+ python server.py & \
22
+ socat TCP-LISTEN:7860,fork,reuseaddr TCP:localhost:9083 \
23
+ "
README.md CHANGED
@@ -1,8 +1,4 @@
1
  ---
2
- title: gemini-ws-bridge
3
  sdk: docker
4
  app_port: 7860
5
  ---
6
-
7
- # Gemini WebSocket Bridge
8
- A WebSocket server that forwards audio/image chunks to Gemini Live and streams responses back.
 
1
  ---
 
2
  sdk: docker
3
  app_port: 7860
4
  ---
 
 
 
app.py DELETED
@@ -1,117 +0,0 @@
1
- import asyncio
2
- import json
3
- import os
4
- import base64
5
- import logging
6
-
7
- # ---- Logging: reduce HF probe noise ----
8
- logging.basicConfig(level=logging.INFO)
9
- logging.getLogger("websockets").setLevel(logging.ERROR)
10
- logging.getLogger("websockets.server").setLevel(logging.ERROR)
11
- logging.getLogger("uvicorn.error").setLevel(logging.INFO)
12
- logging.getLogger("uvicorn.access").setLevel(logging.WARNING)
13
-
14
- print("APP STARTED ✅ FastAPI + Gemini WebSocket Bridge")
15
-
16
- from fastapi import FastAPI, WebSocket, WebSocketDisconnect
17
- from google import genai
18
-
19
- MODEL = os.environ.get("MODEL", "gemini-2.0-flash-exp")
20
-
21
- # IMPORTANT: Set GOOGLE_API_KEY as a Hugging Face Secret
22
- client = genai.Client(http_options={"api_version": "v1alpha"})
23
-
24
- app = FastAPI()
25
-
26
-
27
- # ---- HTTP health check (required by Hugging Face) ----
28
- @app.get("/")
29
- def health():
30
- return {"status": "ok"}
31
-
32
-
33
- # ---- WebSocket endpoint ----
34
- @app.websocket("/ws")
35
- async def gemini_ws_bridge(ws: WebSocket):
36
- await ws.accept()
37
-
38
- try:
39
- # First message must be setup/config
40
- config_message = await ws.receive_text()
41
- config_data = json.loads(config_message)
42
-
43
- config = config_data.get("setup", {})
44
- config["system_instruction"] = (
45
- "You are a helpful assistant for screen sharing sessions. Your role is to:\n"
46
- "1) Analyze and describe the content being shared on screen\n"
47
- "2) Answer questions about the shared content\n"
48
- "3) Provide relevant information and context about what's being shown\n"
49
- "4) Assist with technical issues related to screen sharing\n"
50
- "5) Maintain a professional and helpful tone. Focus on being concise and clear."
51
- )
52
-
53
- async with client.aio.live.connect(model=MODEL, config=config) as session:
54
-
55
- async def send_to_gemini():
56
- try:
57
- while True:
58
- message = await ws.receive_text()
59
- data = json.loads(message)
60
-
61
- if "realtime_input" in data:
62
- for chunk in data["realtime_input"]["media_chunks"]:
63
- mt = chunk.get("mime_type")
64
- payload = chunk.get("data")
65
- if mt in ("audio/pcm", "image/jpeg") and payload:
66
- await session.send(
67
- {"mime_type": mt, "data": payload}
68
- )
69
- except WebSocketDisconnect:
70
- pass
71
- except Exception as e:
72
- logging.error(f"send_to_gemini error: {e}")
73
-
74
- async def receive_from_gemini():
75
- try:
76
- async for response in session.receive():
77
- if response.server_content is None:
78
- continue
79
-
80
- model_turn = response.server_content.model_turn
81
- if model_turn:
82
- for part in model_turn.parts:
83
- if getattr(part, "text", None):
84
- await ws.send_text(
85
- json.dumps({"text": part.text})
86
- )
87
- elif getattr(part, "inline_data", None):
88
- b64_audio = base64.b64encode(
89
- part.inline_data.data
90
- ).decode("utf-8")
91
- await ws.send_text(
92
- json.dumps({"audio": b64_audio})
93
- )
94
-
95
- if response.server_content.turn_complete:
96
- await ws.send_text(
97
- json.dumps({"turn_complete": True})
98
- )
99
- except WebSocketDisconnect:
100
- pass
101
- except Exception as e:
102
- logging.error(f"receive_from_gemini error: {e}")
103
-
104
- await asyncio.gather(send_to_gemini(), receive_from_gemini())
105
-
106
- except WebSocketDisconnect:
107
- pass
108
- except Exception as e:
109
- logging.error(f"gemini_ws_bridge error: {e}")
110
- try:
111
- await ws.send_text(json.dumps({"error": str(e)}))
112
- except Exception:
113
- pass
114
- try:
115
- await ws.close()
116
- except Exception:
117
- pass
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
requirements.txt CHANGED
@@ -1,4 +1,2 @@
1
  google-genai==0.3.0
2
- websockets==14.1
3
- fastapi==0.115.6
4
- uvicorn[standard]==0.32.1
 
1
  google-genai==0.3.0
2
+ websockets
 
 
server.py ADDED
@@ -0,0 +1,104 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import asyncio
2
+ import json
3
+ import os
4
+ import websockets
5
+ from google import genai
6
+ import base64
7
+
8
+ # Load API key from environment (do NOT overwrite it)
9
+ # Set GOOGLE_API_KEY in Hugging Face Space Secrets
10
+ if not os.getenv("GOOGLE_API_KEY"):
11
+ raise RuntimeError("GOOGLE_API_KEY is not set. Add it in Hugging Face Space Secrets.")
12
+
13
+ MODEL = "gemini-2.0-flash-exp" # use your model ID
14
+
15
+ client = genai.Client(
16
+ http_options={
17
+ 'api_version': 'v1alpha',
18
+ }
19
+ )
20
+
21
+ async def gemini_session_handler(client_websocket: websockets.WebSocketServerProtocol):
22
+ try:
23
+ config_message = await client_websocket.recv()
24
+ config_data = json.loads(config_message)
25
+ config = config_data.get("setup", {})
26
+ config["system_instruction"] = """You are a helpful assistant for screen sharing sessions. Your role is to:
27
+ 1) Analyze and describe the content being shared on screen
28
+ 2) Answer questions about the shared content
29
+ 3) Provide relevant information and context about what's being shown
30
+ 4) Assist with technical issues related to screen sharing
31
+ 5) Maintain a professional and helpful tone. Focus on being concise and clear in your responses."""
32
+
33
+ async with client.aio.live.connect(model=MODEL, config=config) as session:
34
+ print("Connected to Gemini API")
35
+
36
+ async def send_to_gemini():
37
+ try:
38
+ async for message in client_websocket:
39
+ try:
40
+ data = json.loads(message)
41
+ if "realtime_input" in data:
42
+ for chunk in data["realtime_input"]["media_chunks"]:
43
+ if chunk["mime_type"] == "audio/pcm":
44
+ await session.send({"mime_type": "audio/pcm", "data": chunk["data"]})
45
+ elif chunk["mime_type"] == "image/jpeg":
46
+ await session.send({"mime_type": "image/jpeg", "data": chunk["data"]})
47
+ except Exception as e:
48
+ print(f"Error sending to Gemini: {e}")
49
+ print("Client connection closed (send)")
50
+ except Exception as e:
51
+ print(f"Error sending to Gemini: {e}")
52
+ finally:
53
+ print("send_to_gemini closed")
54
+
55
+ async def receive_from_gemini():
56
+ try:
57
+ while True:
58
+ try:
59
+ print("receiving from gemini")
60
+ async for response in session.receive():
61
+ if response.server_content is None:
62
+ print(f'Unhandled server message! - {response}')
63
+ continue
64
+
65
+ model_turn = response.server_content.model_turn
66
+ if model_turn:
67
+ for part in model_turn.parts:
68
+ if hasattr(part, 'text') and part.text is not None:
69
+ await client_websocket.send(json.dumps({"text": part.text}))
70
+ elif hasattr(part, 'inline_data') and part.inline_data is not None:
71
+ print("audio mime_type:", part.inline_data.mime_type)
72
+ base64_audio = base64.b64encode(part.inline_data.data).decode('utf-8')
73
+ await client_websocket.send(json.dumps({"audio": base64_audio}))
74
+ print("audio received")
75
+
76
+ if response.server_content.turn_complete:
77
+ print('\n<Turn complete>')
78
+ except websockets.exceptions.ConnectionClosedOK:
79
+ print("Client connection closed normally (receive)")
80
+ break
81
+ except Exception as e:
82
+ print(f"Error receiving from Gemini: {e}")
83
+ break
84
+ except Exception as e:
85
+ print(f"Error receiving from Gemini: {e}")
86
+ finally:
87
+ print("Gemini connection closed (receive)")
88
+
89
+ send_task = asyncio.create_task(send_to_gemini())
90
+ receive_task = asyncio.create_task(receive_from_gemini())
91
+ await asyncio.gather(send_task, receive_task)
92
+
93
+ except Exception as e:
94
+ print(f"Error in Gemini session: {e}")
95
+ finally:
96
+ print("Gemini session closed.")
97
+
98
+ async def main() -> None:
99
+ async with websockets.serve(gemini_session_handler, "localhost", 9083):
100
+ print("Running websocket server localhost:9083...")
101
+ await asyncio.Future()
102
+
103
+ if __name__ == "__main__":
104
+ asyncio.run(main())