Spaces:

David1717
/

butttler

Runtime error

App Files Files Community

butttler / app.py

David1717

initial commit

08c317d verified over 1 year ago

raw

history blame contribute delete

2.36 kB

	# This Gradio app creates a conversation pipeline that includes speech-to-text using the Whisper model,
	# GPT response generation, and text-to-speech using the Google Text-to-Speech API.
	# The app uses the microphone input to capture audio, processes it through the pipeline, and returns the GPT response as text and audio.

	import gradio as gr
	import openai
	import whisper
	import numpy as np
	import os

	# Load the Whisper model
	model = whisper.load_model("base")

	# OpenAI API key for GPT
	openai.api_key = 'your_openai_api_key'

	# Function to convert speech to text using Whisper
	def speech_to_text(audio):
	result = model.transcribe(audio)
	return result['text']

	# Function to get GPT response
	def gpt_response(text):
	response = openai.Completion.create(
	engine="gpt-3.5-turbo",
	prompt=text,
	max_tokens=100
	)
	return response.choices[0].text.strip()

	# Function to convert text to speech using Google Text-to-Speech API
	def text_to_speech_google(text):
	from google.cloud import texttospeech
	client = texttospeech.TextToSpeechClient()
	input_text = texttospeech.SynthesisInput(text=text)

	voice = texttospeech.VoiceSelectionParams(
	language_code="en-US",
	ssml_gender=texttospeech.SsmlVoiceGender.NEUTRAL
	)

	audio_config = texttospeech.AudioConfig(
	audio_encoding=texttospeech.AudioEncoding.MP3
	)

	response = client.synthesize_speech(
	input=input_text, voice=voice, audio_config=audio_config
	)

	# Save the audio response to a file
	output_path = "output.mp3"
	with open(output_path, "wb") as out:
	out.write(response.audio_content)
	return output_path

	# Function to handle the entire conversation pipeline
	def conversation_pipeline(audio):
	# Step 1: Convert speech to text
	text = speech_to_text(audio)

	# Step 2: Get GPT response
	response_text = gpt_response(text)

	# Step 3: Convert GPT response to speech
	response_audio = text_to_speech_google(response_text)

	return response_text, response_audio

	# Gradio interface
	demo = gr.Interface(
	fn=conversation_pipeline,
	inputs=gr.Audio(source="microphone", type="filepath"),
	outputs=[gr.Textbox(label="GPT Response"), gr.Audio(label="GPT Response Audio", type="filepath", autoplay=True)]
	)

	demo.launch(show_error=True)