Spaces:

sanchezpaez
/

speech_to_text_to_speech_language_tutor

Sleeping

speech_to_text_to_speech_language_tutor / app.py

Sandra Sanchez

Remove the Extra Gradio Audio Output, maintain only autoplaying HTML audio

eeadbca about 1 year ago

2.76 kB

	# imports
	import os
	import base64
	import gradio as gr
	from openai import OpenAI

	# Initialization
	openai_api_key = os.getenv('OPENAI_API_KEY')

	if openai_api_key:
	print(f"OpenAI API Key exists and begins {openai_api_key[:8]}")
	else:
	print("OpenAI API Key not set")

	MODEL = "gpt-4o-mini"
	openai = OpenAI()

	system_message = "You are a language tutor, and as such provide only with \
	helpful tips and accurate translations. You are entertaining and polite. \
	If you don't know something, you say so."


	def talker(message):
	response = openai.audio.speech.create(
	model="tts-1",
	voice="nova", # Can I change the vibe parameter?
	input=message
	)

	# Convert audio content to base64 for embedding in HTML
	audio_base64 = base64.b64encode(response.content).decode()
	audio_html = f'<audio autoplay controls src="data:audio/mp3;base64,{audio_base64}"></audio>'

	return message, audio_html # Returning text + HTML audio (no separate "Press Play" button)


	# Transcription function
	def transcribe_audio(audio_file):
	# Ensure the audio file is opened as a binary file
	with open(audio_file, "rb") as audio:
	translation = openai.audio.translations.create(
	model="whisper-1",
	file=audio # Pass the opened file, not the filepath
	)

	print(translation.text)
	return translation.text


	# Wrapper function to combine microphone input, transcription, and chat
	def process_microphone_input(audio, history=[]):
	if audio is None:
	raise ValueError("No audio input detected. Please ensure the microphone is functioning correctly.")
	# Step 1: Transcribe the audio captured from the microphone
	transcribed_text = transcribe_audio(audio)

	# Step 2: Pass the transcription to the chat function
	response = chat(transcribed_text, history)

	return response


	def chat(message, history):
	messages = [{"role": "system", "content": system_message}] + history + [{"role": "user", "content": message}]
	response = openai.chat.completions.create(model=MODEL, messages=messages)
	reply = response.choices[0].message.content

	print(f"History: {history}")
	print(f"Message: {message}")
	print(f"Messages: {messages}")

	return talker(reply)


	# Gradio interface for microphone input
	interface = gr.Interface(
	fn=process_microphone_input,
	inputs=[gr.Audio(sources="microphone", type="filepath")], # Microphone as input
	outputs=["text", "html"], # Keep text + autoplaying HTML audio
	title="Speech-to-Chatbot-to-Speech Language Tutor",
	description="Speak into the microphone to chat with GPT-4. Wait a couple of seconds before you submit your message."
	)

	if __name__ == "__main__":
	interface.launch()