lachieandmitch commited on
Commit
fdecdfa
Β·
verified Β·
1 Parent(s): 5077869

assess code for improvement

Browse files
Files changed (2) hide show
  1. README.md +8 -5
  2. index.html +258 -19
README.md CHANGED
@@ -1,10 +1,13 @@
1
  ---
2
- title: Code Insight Analyzer
3
- emoji: πŸ“š
4
- colorFrom: indigo
5
- colorTo: indigo
6
  sdk: static
7
  pinned: false
 
 
8
  ---
9
 
10
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
1
  ---
2
+ title: Code Insight Analyzer πŸ”
3
+ colorFrom: gray
4
+ colorTo: pink
5
+ emoji: 🐳
6
  sdk: static
7
  pinned: false
8
+ tags:
9
+ - deepsite-v3
10
  ---
11
 
12
+ # Welcome to your new DeepSite project!
13
+ This project was created with [DeepSite](https://huggingface.co/deepsite).
index.html CHANGED
@@ -1,19 +1,258 @@
1
- <!doctype html>
2
- <html>
3
- <head>
4
- <meta charset="utf-8" />
5
- <meta name="viewport" content="width=device-width" />
6
- <title>My static Space</title>
7
- <link rel="stylesheet" href="style.css" />
8
- </head>
9
- <body>
10
- <div class="card">
11
- <h1>Welcome to your static Space!</h1>
12
- <p>You can modify this app directly by editing <i>index.html</i> in the Files and versions tab.</p>
13
- <p>
14
- Also don't forget to check the
15
- <a href="https://huggingface.co/docs/hub/spaces" target="_blank">Spaces documentation</a>.
16
- </p>
17
- </div>
18
- </body>
19
- </html>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ ## Documentation
3
+ Quickstart: https://github.com/google-gemini/cookbook/blob/main/quickstarts/Get_started_LiveAPI.py
4
+
5
+ ## Setup
6
+
7
+ To install the dependencies for this script, run:
8
+
9
+ ```
10
+ pip install google-genai opencv-python pyaudio pillow mss
11
+ ```
12
+ """
13
+
14
+ import os
15
+ import asyncio
16
+ import base64
17
+ import io
18
+ import traceback
19
+
20
+ import cv2
21
+ import pyaudio
22
+ import PIL.Image
23
+ import mss
24
+
25
+ import argparse
26
+
27
+ from google import genai
28
+ from google.genai import types
29
+
30
+ FORMAT = pyaudio.paInt16
31
+ CHANNELS = 1
32
+ SEND_SAMPLE_RATE = 16000
33
+ RECEIVE_SAMPLE_RATE = 24000
34
+ CHUNK_SIZE = 1024
35
+
36
+ MODEL = "models/gemini-2.5-flash-native-audio-preview-09-2025"
37
+
38
+ DEFAULT_MODE = "camera"
39
+
40
+ client = genai.Client(
41
+ http_options={"api_version": "v1beta"},
42
+ api_key=os.environ.get("GEMINI_API_KEY"),
43
+ )
44
+
45
+
46
+ CONFIG = types.LiveConnectConfig(
47
+ response_modalities=[
48
+ "AUDIO",
49
+ ],
50
+ media_resolution="MEDIA_RESOLUTION_MEDIUM",
51
+ speech_config=types.SpeechConfig(
52
+ voice_config=types.VoiceConfig(
53
+ prebuilt_voice_config=types.PrebuiltVoiceConfig(voice_name="Zephyr")
54
+ )
55
+ ),
56
+ context_window_compression=types.ContextWindowCompressionConfig(
57
+ trigger_tokens=25600,
58
+ sliding_window=types.SlidingWindow(target_tokens=12800),
59
+ ),
60
+ system_instruction=types.Content(
61
+ parts=[types.Part.from_text(text="You are a stromg of mind AI who says it as it is")],
62
+ role="user"
63
+ ),
64
+ )
65
+
66
+ pya = pyaudio.PyAudio()
67
+
68
+
69
+ class AudioLoop:
70
+ def __init__(self, video_mode=DEFAULT_MODE):
71
+ self.video_mode = video_mode
72
+
73
+ self.audio_in_queue = None
74
+ self.out_queue = None
75
+
76
+ self.session = None
77
+
78
+ self.send_text_task = None
79
+ self.receive_audio_task = None
80
+ self.play_audio_task = None
81
+
82
+ async def send_text(self):
83
+ while True:
84
+ text = await asyncio.to_thread(
85
+ input,
86
+ "message > ",
87
+ )
88
+ if text.lower() == "q":
89
+ break
90
+ await self.session.send(input=text or ".", end_of_turn=True)
91
+
92
+ def _get_frame(self, cap):
93
+ # Read the frameq
94
+ ret, frame = cap.read()
95
+ # Check if the frame was read successfully
96
+ if not ret:
97
+ return None
98
+ # Fix: Convert BGR to RGB color space
99
+ # OpenCV captures in BGR but PIL expects RGB format
100
+ # This prevents the blue tint in the video feed
101
+ frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
102
+ img = PIL.Image.fromarray(frame_rgb) # Now using RGB frame
103
+ img.thumbnail([1024, 1024])
104
+
105
+ image_io = io.BytesIO()
106
+ img.save(image_io, format="jpeg")
107
+ image_io.seek(0)
108
+
109
+ mime_type = "image/jpeg"
110
+ image_bytes = image_io.read()
111
+ return {"mime_type": mime_type, "data": base64.b64encode(image_bytes).decode()}
112
+
113
+ async def get_frames(self):
114
+ # This takes about a second, and will block the whole program
115
+ # causing the audio pipeline to overflow if you don't to_thread it.
116
+ cap = await asyncio.to_thread(
117
+ cv2.VideoCapture, 0
118
+ ) # 0 represents the default camera
119
+
120
+ while True:
121
+ frame = await asyncio.to_thread(self._get_frame, cap)
122
+ if frame is None:
123
+ break
124
+
125
+ await asyncio.sleep(1.0)
126
+
127
+ await self.out_queue.put(frame)
128
+
129
+ # Release the VideoCapture object
130
+ cap.release()
131
+
132
+ def _get_screen(self):
133
+ sct = mss.mss()
134
+ monitor = sct.monitors[0]
135
+
136
+ i = sct.grab(monitor)
137
+
138
+ mime_type = "image/jpeg"
139
+ image_bytes = mss.tools.to_png(i.rgb, i.size)
140
+ img = PIL.Image.open(io.BytesIO(image_bytes))
141
+
142
+ image_io = io.BytesIO()
143
+ img.save(image_io, format="jpeg")
144
+ image_io.seek(0)
145
+
146
+ image_bytes = image_io.read()
147
+ return {"mime_type": mime_type, "data": base64.b64encode(image_bytes).decode()}
148
+
149
+ async def get_screen(self):
150
+
151
+ while True:
152
+ frame = await asyncio.to_thread(self._get_screen)
153
+ if frame is None:
154
+ break
155
+
156
+ await asyncio.sleep(1.0)
157
+
158
+ await self.out_queue.put(frame)
159
+
160
+ async def send_realtime(self):
161
+ while True:
162
+ msg = await self.out_queue.get()
163
+ await self.session.send(input=msg)
164
+
165
+ async def listen_audio(self):
166
+ mic_info = pya.get_default_input_device_info()
167
+ self.audio_stream = await asyncio.to_thread(
168
+ pya.open,
169
+ format=FORMAT,
170
+ channels=CHANNELS,
171
+ rate=SEND_SAMPLE_RATE,
172
+ input=True,
173
+ input_device_index=mic_info["index"],
174
+ frames_per_buffer=CHUNK_SIZE,
175
+ )
176
+ if __debug__:
177
+ kwargs = {"exception_on_overflow": False}
178
+ else:
179
+ kwargs = {}
180
+ while True:
181
+ data = await asyncio.to_thread(self.audio_stream.read, CHUNK_SIZE, **kwargs)
182
+ await self.out_queue.put({"data": data, "mime_type": "audio/pcm"})
183
+
184
+ async def receive_audio(self):
185
+ "Background task to reads from the websocket and write pcm chunks to the output queue"
186
+ while True:
187
+ turn = self.session.receive()
188
+ async for response in turn:
189
+ if data := response.data:
190
+ self.audio_in_queue.put_nowait(data)
191
+ continue
192
+ if text := response.text:
193
+ print(text, end="")
194
+
195
+ # If you interrupt the model, it sends a turn_complete.
196
+ # For interruptions to work, we need to stop playback.
197
+ # So empty out the audio queue because it may have loaded
198
+ # much more audio than has played yet.
199
+ while not self.audio_in_queue.empty():
200
+ self.audio_in_queue.get_nowait()
201
+
202
+ async def play_audio(self):
203
+ stream = await asyncio.to_thread(
204
+ pya.open,
205
+ format=FORMAT,
206
+ channels=CHANNELS,
207
+ rate=RECEIVE_SAMPLE_RATE,
208
+ output=True,
209
+ )
210
+ while True:
211
+ bytestream = await self.audio_in_queue.get()
212
+ await asyncio.to_thread(stream.write, bytestream)
213
+
214
+ async def run(self):
215
+ try:
216
+ async with (
217
+ client.aio.live.connect(model=MODEL, config=CONFIG) as session,
218
+ asyncio.TaskGroup() as tg,
219
+ ):
220
+ self.session = session
221
+
222
+ self.audio_in_queue = asyncio.Queue()
223
+ self.out_queue = asyncio.Queue(maxsize=5)
224
+
225
+ send_text_task = tg.create_task(self.send_text())
226
+ tg.create_task(self.send_realtime())
227
+ tg.create_task(self.listen_audio())
228
+ if self.video_mode == "camera":
229
+ tg.create_task(self.get_frames())
230
+ elif self.video_mode == "screen":
231
+ tg.create_task(self.get_screen())
232
+
233
+ tg.create_task(self.receive_audio())
234
+ tg.create_task(self.play_audio())
235
+
236
+ await send_text_task
237
+ raise asyncio.CancelledError("User requested exit")
238
+
239
+ except asyncio.CancelledError:
240
+ pass
241
+ except ExceptionGroup as EG:
242
+ self.audio_stream.close()
243
+ traceback.print_exception(EG)
244
+
245
+
246
+ if __name__ == "__main__":
247
+ parser = argparse.ArgumentParser()
248
+ parser.add_argument(
249
+ "--mode",
250
+ type=str,
251
+ default=DEFAULT_MODE,
252
+ help="pixels to stream from",
253
+ choices=["camera", "screen", "none"],
254
+ )
255
+ args = parser.parse_args()
256
+ main = AudioLoop(video_mode=args.mode)
257
+ asyncio.run(main.run())
258
+ <script src="https://huggingface.co/deepsite/deepsite-badge.js"></script>