Spaces:

1MR
/

testing2121cxz

Runtime error

App Files Files Community

1MR commited on Oct 22, 2025

Commit

0b2ebe2

verified ·

1 Parent(s): 5984019

Create app.py

Browse files

Files changed (1) hide show

app.py +129 -0

app.py ADDED Viewed

	@@ -0,0 +1,129 @@

+import argparse
+from typing import Generator, Tuple
+import numpy as np
+from fastrtc import (
+    AlgoOptions,
+    ReplyOnPause,
+    Stream,
+    audio_to_bytes,
+)
+from groq import Groq
+from loguru import logger
+from process_groq_tts import process_groq_tts
+from simple_math_agent import agent, agent_config
+import os
+os.environ["GROQ_API_KEY"] = "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"
+logger.remove()
+logger.add(
+    lambda msg: print(msg),
+    colorize=True,
+    format="<green>{time:HH:mm:ss}</green> | <level>{level}</level> | <level>{message}</level>",
+)
+groq_client = Groq(api_key="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")
+def response(
+    audio: tuple[int, np.ndarray],
+) -> Generator[Tuple[int, np.ndarray], None, None]:
+    """
+    Process audio input, transcribe it, generate a response using LangGraph, and deliver TTS audio.
+    Args:
+        audio: Tuple containing sample rate and audio data
+    Yields:
+        Tuples of (sample_rate, audio_array) for audio playback
+    """
+    logger.info("🎙️ Received audio input")
+    logger.debug("🔄 Transcribing audio...")
+    import whisper
+    import wave
+    import tempfile
+    import os
+    model = whisper.load_model("base")
+    # Create a temporary WAV file
+    temp_file = tempfile.NamedTemporaryFile(suffix=".wav", delete=False)
+    temp_file.close()
+    try:
+        # Convert audio data to bytes and save as WAV
+        audio_bytes = audio_to_bytes(audio)
+        # Save as WAV file using wave module
+        with wave.open(temp_file.name, 'wb') as wav_file:
+            wav_file.setnchannels(1)  # mono audio
+            wav_file.setsampwidth(2)  # 16-bit audio
+            wav_file.setframerate(audio[0])  # sample rate
+            wav_file.writeframes(audio_bytes)
+        # Transcribe the audio
+        result = model.transcribe(temp_file.name, language="ar")
+        transcript = result["text"]
+    finally:
+        # Clean up the temporary file
+        if os.path.exists(temp_file.name):
+            os.remove(temp_file.name)
+    logger.info(f'👂 Transcribed: "{transcript}"')
+    logger.debug("🧠 Running agent...")
+    agent_response = agent.invoke(
+        {"messages": [{"role": "user", "content": transcript}]}, config=agent_config
+    )
+    response_text = agent_response["messages"][-1].content
+    logger.info(f'💬 Response: "{response_text}"')
+    logger.debug("🔊 Generating speech...")
+    tts_response = groq_client.audio.speech.create(
+        model="playai-tts-arabic",
+        voice="Ahmad-PlayAI",
+        response_format="wav",
+        input=response_text,
+    )
+    yield from process_groq_tts(tts_response)
+def create_stream() -> Stream:
+    """
+    Create and configure a Stream instance with audio capabilities.
+    Returns:
+        Stream: Configured FastRTC Stream instance
+    """
+    return Stream(
+        modality="audio",
+        mode="send-receive",
+        handler=ReplyOnPause(
+            response,
+            algo_options=AlgoOptions(
+                speech_threshold=0.5,
+            ),
+        ),
+    )
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description="FastRTC Groq Voice Agent")
+    parser.add_argument(
+        "--phone",
+        action="store_true",
+        help="Launch with FastRTC phone interface (get a temp phone number)",
+    )
+    args = parser.parse_args()
+    stream = create_stream()
+    logger.info("🎧 Stream handler configured")
+    if args.phone:
+        logger.info("Launching with FastRTC phone interface...")
+        stream.fastphone(share=True)
+    else:
+        logger.info("Launching with Gradio UI...")
+        stream.ui.launch(share=True)