Spaces:

sanchezpaez
/

speech_to_text_to_speech_language_tutor

Sleeping

App Files Files Community

Sandra Sanchez commited on Apr 23, 2025

Commit

a8b3c67

1 Parent(s): ab11b1b

Initial commit

Browse files

Files changed (3) hide show

.gitignore +1 -0
app.py +90 -0
requirements.txt +4 -0

.gitignore ADDED Viewed

	@@ -0,0 +1 @@


1	+ .idea/

app.py ADDED Viewed

	@@ -0,0 +1,90 @@

+# imports
+import os
+from io import BytesIO
+import gradio as gr
+from dotenv import load_dotenv
+from openai import OpenAI
+from pydub import AudioSegment
+from pydub.playback import play
+# Initialization
+load_dotenv(override=True)
+openai_api_key = os.getenv('OPENAI_API_KEY')
+if openai_api_key:
+    print(f"OpenAI API Key exists and begins {openai_api_key[:8]}")
+else:
+    print("OpenAI API Key not set")
+MODEL = "gpt-4o-mini"
+openai = OpenAI()
+system_message = "You are a language tutor, and as such provide only with \
+    helpful tips and accurate translations. You are entertaining and polite. \
+    If you don't know something, you say so."
+def talker(message):
+    response = openai.audio.speech.create(
+        model="tts-1",
+        voice="nova",  # Can I change the vibe parameter?
+        input=message
+    )
+    audio_stream = BytesIO(response.content)
+    audio = AudioSegment.from_file(audio_stream, format="mp3")
+    play(audio)
+# Transcription function
+def transcribe_audio(audio_file):
+    # Ensure the audio file is opened as a binary file
+    with open(audio_file, "rb") as audio:
+        translation = openai.audio.translations.create(
+            model="whisper-1",
+            file=audio  # Pass the opened file, not the filepath
+        )
+    print(translation.text)
+    return translation.text
+# Wrapper function to combine microphone input, transcription, and chat
+def process_microphone_input(audio, history=[]):
+    if audio is None:
+        raise ValueError("No audio input detected. Please ensure the microphone is functioning correctly.")
+    # Step 1: Transcribe the audio captured from the microphone
+    transcribed_text = transcribe_audio(audio)
+    # Step 2: Pass the transcription to the chat function
+    response = chat(transcribed_text, history)
+    return response
+def chat(message, history):
+    messages = [{"role": "system", "content": system_message}] + history + [{"role": "user", "content": message}]
+    response = openai.chat.completions.create(model=MODEL, messages=messages)
+    reply = response.choices[0].message.content
+    print(f"History: {history}")
+    print(f"Message: {message}")
+    print(f"Messages: {messages}")
+    talker(reply)
+    return reply
+# Gradio interface for microphone input
+interface = gr.Interface(
+    fn=process_microphone_input,
+    inputs=[gr.Audio(sources="microphone", type="filepath")],  # Microphone as input
+    outputs="text",  # GPT response as text
+    title="Speech-to-Chatbot-to-Speech Language Tutor",
+    description="Speak into the microphone to chat with GPT-4. Wait a couple of seconds before you submit your message."
+)
+if __name__ == "__main__":
+    interface.launch()

requirements.txt ADDED Viewed

	@@ -0,0 +1,4 @@

+openai
+gradio
+pydub
+dotenv