Spaces:

1MR
/

testing2121cxz

Runtime error

App Files Files Community

1MR commited on Oct 22, 2025

Commit

52296e0

verified ·

1 Parent(s): 7895244

Update app.py

Browse files

Files changed (1) hide show

app.py +52 -95

app.py CHANGED Viewed

@@ -1,129 +1,86 @@
 import argparse
-from typing import Generator, Tuple
 import numpy as np
-from fastrtc import (
-    AlgoOptions,
-    ReplyOnPause,
-    Stream,
-    audio_to_bytes,
-)
-from groq import Groq
 from loguru import logger
-from process_groq_tts import process_groq_tts
 from simple_math_agent import agent, agent_config
-import os
 os.environ["GROQ_API_KEY"] = "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"
-logger.remove()
-logger.add(
-    lambda msg: print(msg),
-    colorize=True,
-    format="<green>{time:HH:mm:ss}</green> | <level>{level}</level> | <level>{message}</level>",
-)
-groq_client = Groq(api_key="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")
-def response(
-    audio: tuple[int, np.ndarray],
-) -> Generator[Tuple[int, np.ndarray], None, None]:
     """
-    Process audio input, transcribe it, generate a response using LangGraph, and deliver TTS audio.
-    Args:
-        audio: Tuple containing sample rate and audio data
-    Yields:
-        Tuples of (sample_rate, audio_array) for audio playback
     """
     logger.info("🎙️ Received audio input")
-    logger.debug("🔄 Transcribing audio...")
-    import whisper
-    import wave
-    import tempfile
-    import os
     model = whisper.load_model("base")
-    # Create a temporary WAV file
-    temp_file = tempfile.NamedTemporaryFile(suffix=".wav", delete=False)
-    temp_file.close()
-    try:
-        # Convert audio data to bytes and save as WAV
-        audio_bytes = audio_to_bytes(audio)
-        # Save as WAV file using wave module
-        with wave.open(temp_file.name, 'wb') as wav_file:
-            wav_file.setnchannels(1)  # mono audio
-            wav_file.setsampwidth(2)  # 16-bit audio
-            wav_file.setframerate(audio[0])  # sample rate
-            wav_file.writeframes(audio_bytes)
-        # Transcribe the audio
-        result = model.transcribe(temp_file.name, language="ar")
-        transcript = result["text"]
-    finally:
-        # Clean up the temporary file
-        if os.path.exists(temp_file.name):
-            os.remove(temp_file.name)
     logger.info(f'👂 Transcribed: "{transcript}"')
-    logger.debug("🧠 Running agent...")
     agent_response = agent.invoke(
-        {"messages": [{"role": "user", "content": transcript}]}, config=agent_config
     )
     response_text = agent_response["messages"][-1].content
     logger.info(f'💬 Response: "{response_text}"')
-    logger.debug("🔊 Generating speech...")
     tts_response = groq_client.audio.speech.create(
         model="playai-tts-arabic",
         voice="Ahmad-PlayAI",
         response_format="wav",
         input=response_text,
     )
-    yield from process_groq_tts(tts_response)
-def create_stream() -> Stream:
-    """
-    Create and configure a Stream instance with audio capabilities.
-    Returns:
-        Stream: Configured FastRTC Stream instance
-    """
-    return Stream(
-        modality="audio",
-        mode="send-receive",
-        handler=ReplyOnPause(
-            response,
-            algo_options=AlgoOptions(
-                speech_threshold=0.5,
-            ),
-        ),
-    )
 if __name__ == "__main__":
-    parser = argparse.ArgumentParser(description="FastRTC Groq Voice Agent")
-    parser.add_argument(
-        "--phone",
-        action="store_true",
-        help="Launch with FastRTC phone interface (get a temp phone number)",
-    )
-    args = parser.parse_args()
-    stream = create_stream()
-    logger.info("🎧 Stream handler configured")
-    if args.phone:
-        logger.info("Launching with FastRTC phone interface...")
-        stream.fastphone(share=True)
-    else:
-        logger.info("Launching with Gradio UI...")
-        stream.ui.launch(share=True)

 import argparse
 import numpy as np
+import tempfile
+import wave
+import os
 from loguru import logger
+import gradio as gr
+import whisper
+from groq import Groq
 from simple_math_agent import agent, agent_config
+from process_groq_tts import process_groq_tts
+from fastrtc import audio_to_bytes  # keep using it for conversion
+# ✅ Set your API key safely
 os.environ["GROQ_API_KEY"] = "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"
+groq_client = Groq(api_key=os.environ["GROQ_API_KEY"])
+logger.remove()
+logger.add(lambda msg: print(msg),
+           colorize=True,
+           format="<green>{time:HH:mm:ss}</green> | <level>{level}</level> | <level>{message}</level>")
+def process_audio(audio):
     """
+    Take an uploaded or recorded audio file, transcribe it, generate an AI response,
+    and return the spoken audio as a WAV.
     """
     logger.info("🎙️ Received audio input")
+    # If audio is a file path (Gradio format)
+    if isinstance(audio, tuple):
+        sr, data = audio
+        audio_bytes = audio_to_bytes((sr, data))
+        temp_path = tempfile.mktemp(suffix=".wav")
+        with wave.open(temp_path, 'wb') as wav_file:
+            wav_file.setnchannels(1)
+            wav_file.setsampwidth(2)
+            wav_file.setframerate(sr)
+            wav_file.writeframes(audio_bytes)
+    else:
+        temp_path = audio
+    # 🎧 Transcribe using Whisper
     model = whisper.load_model("base")
+    result = model.transcribe(temp_path, language="ar")
+    transcript = result["text"]
     logger.info(f'👂 Transcribed: "{transcript}"')
+    # 🧠 Run agent
     agent_response = agent.invoke(
+        {"messages": [{"role": "user", "content": transcript}]},
+        config=agent_config
     )
     response_text = agent_response["messages"][-1].content
     logger.info(f'💬 Response: "{response_text}"')
+    # 🔊 Generate speech with Groq
     tts_response = groq_client.audio.speech.create(
         model="playai-tts-arabic",
         voice="Ahmad-PlayAI",
         response_format="wav",
         input=response_text,
     )
+    output_path = tempfile.mktemp(suffix=".wav")
+    with open(output_path, "wb") as f:
+        f.write(tts_response.read())
+    return response_text, output_path
+# ✅ Use Gradio UI (no RTC, fully compatible with Spaces)
+demo = gr.Interface(
+    fn=process_audio,
+    inputs=gr.Audio(sources=["microphone", "upload"], type="filepath", label="🎤 Speak or Upload Audio"),
+    outputs=[
+        gr.Textbox(label="💬 Transcription + Response"),
+        gr.Audio(label="���� AI Voice Reply")
+    ],
+    title="🎙️ Groq Voice Assistant",
+    description="Speak Arabic and get an intelligent spoken reply (STT → Agent → TTS)"
+)
 if __name__ == "__main__":
+    demo.launch(server_port=None, share=True)