Spaces:

humanvprojectceo
/

HumanV

Sleeping

App Files Files Community

humanvprojectceo commited on Feb 9

Commit

dfe4e71

verified ·

1 Parent(s): 1d8075f

Update app.py

Browse files

Files changed (1) hide show

app.py +45 -82

app.py CHANGED Viewed

@@ -1,108 +1,71 @@
 import os
-import io
 import asyncio
 import soundfile as sf
-import gradio as gr
 from google import genai
-client = genai.Client(api_key=os.getenv("GEMINI_API_KEY"))
-MODEL = "gemini-2.5-flash-native-audio-preview-09-2025"
-config = {
-    "response_modalities": ["AUDIO"]
-}
-def load_and_convert_audio(file_path):
-    # load audio
-    y, sr = sf.read(file_path)
-    # تبدیل به mono
     if len(y.shape) > 1:
         y = y.mean(axis=1)
-    # resample به 16k اگر لازم باشد
     if sr != 16000:
         import resampy
         y = resampy.resample(y, sr, 16000)
-        sr = 16000
-    # تبدیل به PCM16
-    pcm16 = (y * 32767).astype("int16")
     return pcm16.tobytes()
-async def generate_audio_response_from_file(file_path: str):
-    audio_bytes = load_and_convert_audio(file_path)
-    async with client.aio.live.connect(model=MODEL, config=config) as session:
-        await session.send_client_content(
-            turns={
-                "role": "user",
-                "parts": [
-                    {
-                        "inline_data": {
-                            "data": audio_bytes,
-                            "mime_type": "audio/pcm"
-                        }
-                    }
-                ]
             },
-            turn_complete=True
         )
         audio_chunks = []
-        async for response in session.receive():
-            if response.data is not None:
                 audio_chunks.append(response.data)
-        full_audio = b''.join(audio_chunks)
-        if not full_audio:
-            raise ValueError("No audio response received from the model.")
-        buf = io.BytesIO(full_audio)
-        y, sr = sf.read(
-            buf,
-            channels=1,
-            samplerate=24000,
-            format="RAW",
-            subtype="PCM_16",
-            dtype="float32"
-        )
-        return sr, y
-def process_audio(file):
-    if file is None:
-        return None, "Please upload an audio file."
-    try:
-        sr, audio_data = asyncio.run(
-            generate_audio_response_from_file(file)
-        )
-        return (sr, audio_data), "Response generated successfully!"
-    except Exception as e:
-        return None, f"Error: {str(e)}"
-with gr.Blocks() as demo:
-    gr.Markdown("# Gemini Audio → Audio")
-    input_audio = gr.Audio(
-        label="Upload audio",
-        type="filepath"
-    )
-    output_audio = gr.Audio(
-        label="Gemini spoken response",
-        type="numpy",
-        autoplay=True
-    )
-    status = gr.Textbox(label="Status")
-    btn = gr.Button("Send Audio")
-    btn.click(
-        fn=process_audio,
-        inputs=input_audio,
-        outputs=[output_audio, status]
-    )
-demo.launch()

 import os
 import asyncio
 import soundfile as sf
+import numpy as np
 from google import genai
+from google.genai import types
+MODEL = "models/gemini-2.5-flash-native-audio-preview-12-2025"
+client = genai.Client(
+    http_options={"api_version": "v1beta"},
+    api_key=os.environ.get("GEMINI_API_KEY"),
+)
+CONFIG = types.LiveConnectConfig(
+    response_modalities=["AUDIO"]
+)
+def load_audio_as_pcm16(path):
+    y, sr = sf.read(path)
+    # mono
     if len(y.shape) > 1:
         y = y.mean(axis=1)
+    # resample to 16k
     if sr != 16000:
         import resampy
         y = resampy.resample(y, sr, 16000)
+    # float → int16
+    pcm16 = (y * 32767).astype(np.int16)
     return pcm16.tobytes()
+async def send_audio_file(file_path):
+    audio_bytes = load_audio_as_pcm16(file_path)
+    async with client.aio.live.connect(model=MODEL, config=CONFIG) as session:
+        await session.send(
+            input={
+                "data": audio_bytes,
+                "mime_type": "audio/pcm"
             },
+            end_of_turn=True
         )
         audio_chunks = []
+        turn = session.receive()
+        async for response in turn:
+            if response.data:
                 audio_chunks.append(response.data)
+        full_audio = b"".join(audio_chunks)
+        return full_audio
+def main(file_path):
+    audio = asyncio.run(send_audio_file(file_path))
+    with open("response.raw", "wb") as f:
+        f.write(audio)
+    print("Audio response saved as response.raw")
+if __name__ == "__main__":
+    main("input_audio.wav")