humanvprojectceo commited on
Commit
dfe4e71
·
verified ·
1 Parent(s): 1d8075f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +45 -82
app.py CHANGED
@@ -1,108 +1,71 @@
1
  import os
2
- import io
3
  import asyncio
4
  import soundfile as sf
5
- import gradio as gr
6
  from google import genai
 
7
 
8
- client = genai.Client(api_key=os.getenv("GEMINI_API_KEY"))
9
 
10
- MODEL = "gemini-2.5-flash-native-audio-preview-09-2025"
 
 
 
11
 
12
- config = {
13
- "response_modalities": ["AUDIO"]
14
- }
15
 
16
- def load_and_convert_audio(file_path):
17
- # load audio
18
- y, sr = sf.read(file_path)
19
 
20
- # تبدیل به mono
 
 
 
21
  if len(y.shape) > 1:
22
  y = y.mean(axis=1)
23
 
24
- # resample به 16k اگر لازم باشد
25
  if sr != 16000:
26
  import resampy
27
  y = resampy.resample(y, sr, 16000)
28
- sr = 16000
29
 
30
- # تبدیل به PCM16
31
- pcm16 = (y * 32767).astype("int16")
32
  return pcm16.tobytes()
33
 
34
- async def generate_audio_response_from_file(file_path: str):
35
- audio_bytes = load_and_convert_audio(file_path)
36
-
37
- async with client.aio.live.connect(model=MODEL, config=config) as session:
38
- await session.send_client_content(
39
- turns={
40
- "role": "user",
41
- "parts": [
42
- {
43
- "inline_data": {
44
- "data": audio_bytes,
45
- "mime_type": "audio/pcm"
46
- }
47
- }
48
- ]
49
  },
50
- turn_complete=True
51
  )
52
 
53
  audio_chunks = []
54
- async for response in session.receive():
55
- if response.data is not None:
 
 
56
  audio_chunks.append(response.data)
57
 
58
- full_audio = b''.join(audio_chunks)
59
- if not full_audio:
60
- raise ValueError("No audio response received from the model.")
61
-
62
- buf = io.BytesIO(full_audio)
63
- y, sr = sf.read(
64
- buf,
65
- channels=1,
66
- samplerate=24000,
67
- format="RAW",
68
- subtype="PCM_16",
69
- dtype="float32"
70
- )
71
- return sr, y
72
 
73
- def process_audio(file):
74
- if file is None:
75
- return None, "Please upload an audio file."
76
 
77
- try:
78
- sr, audio_data = asyncio.run(
79
- generate_audio_response_from_file(file)
80
- )
81
- return (sr, audio_data), "Response generated successfully!"
82
- except Exception as e:
83
- return None, f"Error: {str(e)}"
84
-
85
- with gr.Blocks() as demo:
86
- gr.Markdown("# Gemini Audio → Audio")
87
-
88
- input_audio = gr.Audio(
89
- label="Upload audio",
90
- type="filepath"
91
- )
92
-
93
- output_audio = gr.Audio(
94
- label="Gemini spoken response",
95
- type="numpy",
96
- autoplay=True
97
- )
98
-
99
- status = gr.Textbox(label="Status")
100
- btn = gr.Button("Send Audio")
101
-
102
- btn.click(
103
- fn=process_audio,
104
- inputs=input_audio,
105
- outputs=[output_audio, status]
106
- )
107
-
108
- demo.launch()
 
1
  import os
 
2
  import asyncio
3
  import soundfile as sf
4
+ import numpy as np
5
  from google import genai
6
+ from google.genai import types
7
 
8
+ MODEL = "models/gemini-2.5-flash-native-audio-preview-12-2025"
9
 
10
+ client = genai.Client(
11
+ http_options={"api_version": "v1beta"},
12
+ api_key=os.environ.get("GEMINI_API_KEY"),
13
+ )
14
 
15
+ CONFIG = types.LiveConnectConfig(
16
+ response_modalities=["AUDIO"]
17
+ )
18
 
 
 
 
19
 
20
+ def load_audio_as_pcm16(path):
21
+ y, sr = sf.read(path)
22
+
23
+ # mono
24
  if len(y.shape) > 1:
25
  y = y.mean(axis=1)
26
 
27
+ # resample to 16k
28
  if sr != 16000:
29
  import resampy
30
  y = resampy.resample(y, sr, 16000)
 
31
 
32
+ # float int16
33
+ pcm16 = (y * 32767).astype(np.int16)
34
  return pcm16.tobytes()
35
 
36
+
37
+ async def send_audio_file(file_path):
38
+ audio_bytes = load_audio_as_pcm16(file_path)
39
+
40
+ async with client.aio.live.connect(model=MODEL, config=CONFIG) as session:
41
+
42
+ await session.send(
43
+ input={
44
+ "data": audio_bytes,
45
+ "mime_type": "audio/pcm"
 
 
 
 
 
46
  },
47
+ end_of_turn=True
48
  )
49
 
50
  audio_chunks = []
51
+
52
+ turn = session.receive()
53
+ async for response in turn:
54
+ if response.data:
55
  audio_chunks.append(response.data)
56
 
57
+ full_audio = b"".join(audio_chunks)
58
+ return full_audio
 
 
 
 
 
 
 
 
 
 
 
 
59
 
 
 
 
60
 
61
+ def main(file_path):
62
+ audio = asyncio.run(send_audio_file(file_path))
63
+
64
+ with open("response.raw", "wb") as f:
65
+ f.write(audio)
66
+
67
+ print("Audio response saved as response.raw")
68
+
69
+
70
+ if __name__ == "__main__":
71
+ main("input_audio.wav")