Spaces:
Runtime error
Runtime error
| import gradio as gr | |
| import requests | |
| import openai | |
| import asyncio | |
| import os | |
| from deepgram import Deepgram | |
| from vocode.streaming.models.transcriber import ( | |
| DeepgramTranscriberConfig, | |
| PunctuationEndpointingConfig, | |
| ) | |
| from vocode.streaming.models.agent import ChatGPTAgentConfig | |
| from vocode.streaming.models.message import BaseMessage | |
| from vocode.streaming.models.synthesizer import ElevenLabsSynthesizerConfig | |
| from vocode.streaming.transcriber.deepgram_transcriber import DeepgramTranscriber | |
| from vocode.streaming.agent.chat_gpt_agent import ChatGPTAgent | |
| from vocode.streaming.synthesizer.eleven_labs_synthesizer import ElevenLabsSynthesizer | |
| from vocode.streaming.streaming_conversation import StreamingConversation | |
| from vocode.helpers import create_streaming_microphone_input_and_speaker_output | |
| # Fetch API keys and voice IDs from environment variables | |
| DEEPGRAM_API_KEY = os.getenv("DEEPGRAM_API_KEY") | |
| ELEVEN_LABS_API_KEY = os.getenv("ELEVEN_LABS_API_KEY") | |
| VOICE_ID = os.getenv("VOICE_ID") | |
| OPENAI_API_KEY = os.getenv("OPENAI_API_KEY") | |
| # Initialize OpenAI client | |
| client = openai.OpenAI(api_key=OPENAI_API_KEY) | |
| # Initialize Deepgram | |
| deepgram = Deepgram(DEEPGRAM_API_KEY) | |
| # Function to transcribe audio using Deepgram | |
| async def transcribe_audio(audio_file_path): | |
| with open(audio_file_path, 'rb') as audio_file: | |
| audio_data = audio_file.read() | |
| response = await deepgram.transcription.prerecorded( | |
| {"buffer": audio_data, "mimetype": "audio/wav"}, | |
| {'punctuate': True, 'language': 'en'} | |
| ) | |
| transcription = response['results']['channels'][0]['alternatives'][0]['transcript'] | |
| return transcription | |
| # Function to generate content using OpenAI GPT-4 | |
| def generate_content(input_text): | |
| response = client.chat.completions.create( | |
| model="gpt-4", | |
| messages=[ | |
| {"role": "system", "content": "You are a helpful assistant."}, | |
| {"role": "user", "content": input_text} | |
| ] | |
| ) | |
| generated_text = response.choices[0].message.content.strip() | |
| return generated_text | |
| # Function to convert text to speech using Eleven Labs | |
| def text_to_speech(text): | |
| url = f"https://api.elevenlabs.io/v1/text-to-speech/{VOICE_ID}" | |
| headers = { | |
| "Accept": "audio/mpeg", | |
| "Content-Type": "application/json", | |
| "xi-api-key": ELEVEN_LABS_API_KEY | |
| } | |
| data = { | |
| "text": text, | |
| "voice_settings": { | |
| "stability": 0.75, | |
| "similarity_boost": 0.75 | |
| } | |
| } | |
| response = requests.post(url, json=data, headers=headers) | |
| if response.status_code == 200: | |
| with open("output.mp3", "wb") as f: | |
| f.write(response.content) | |
| return "output.mp3" | |
| else: | |
| return f"Error: {response.status_code} - {response.text}" | |
| # Main function to handle the entire process | |
| async def process_audio(audio): | |
| transcription = await transcribe_audio(audio) | |
| generated_text = generate_content(transcription) | |
| audio_file = text_to_speech(generated_text) | |
| return transcription, generated_text, audio_file | |
| # Gradio interface setup | |
| interface = gr.Interface( | |
| fn=lambda audio: asyncio.run(process_audio(audio)), | |
| inputs=gr.Audio(type="filepath", label="Speak into your microphone"), | |
| outputs=[ | |
| gr.Textbox(label="Transcription Output"), | |
| gr.Textbox(label="Generated Content"), | |
| gr.Audio(label="Synthesized Speech") | |
| ], | |
| title="Speech-to-Text, Content Generation, and Text-to-Speech", | |
| description="Speak into the microphone, and the system will transcribe your speech, generate content, and convert the generated text into speech." | |
| ) | |
| # Launch the Gradio interface | |
| interface.launch() | |