VoicetoVoice / App.py
akazmi's picture
Create App.py
4fbf0c5 verified
# Import libraries
import whisper
import requests
from gtts import gTTS
import os
import gradio as gr
# Load Whisper model for transcription
model = whisper.load_model("base")
# Function to transcribe audio input using Whisper
def transcribe_audio(audio_file):
result = model.transcribe(audio_file)
return result['text']
# Function to get response from Groq's LLM API (replace with actual API details)
def get_llm_response(text_input):
# Example Groq API call (you'll need to modify with actual endpoint and key)
response = requests.post("https://api.groq.com/llm", json={"prompt": text_input})
return response.json()["output"] # Adjust this based on the actual response format
# Function to convert text response to speech using gTTS
def text_to_speech(response_text):
tts = gTTS(text=response_text, lang='en')
tts.save("response_audio.mp3")
return "response_audio.mp3"
# Gradio pipeline function combining all steps
def chatbot_pipeline(audio):
# Step 1: Transcribe audio input
transcription = transcribe_audio(audio)
# Step 2: Get LLM response from Groq API
llm_response = get_llm_response(transcription)
# Step 3: Convert LLM response to audio using gTTS
audio_output = text_to_speech(llm_response)
return llm_response, audio_output
# Gradio interface for real-time interaction
interface = gr.Interface(
fn=chatbot_pipeline,
inputs=gr.Audio(type="filepath"), # Accepts audio file input, records if mic
outputs=[gr.Textbox(label="Transcription"), gr.Audio(label="Response Audio")]
)
# Launch Gradio interface
interface.launch()