File size: 2,496 Bytes
62a911d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5874a34
17130b5
62a911d
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
import openai
import speech_recognition as sr
import os
import gradio as gr
from dotenv import load_dotenv
from gtts import gTTS

# Load your OpenAI API key from the Hugging Face secrets
openai.api_key = os.getenv("OPENAI_API_KEY")

# The prompt will be retrieved from Hugging Face secrets
def get_prompt():
    return os.getenv("AI_PROMPT_SECRET")

# Initialize speech recognition
recognizer = sr.Recognizer()

# Function to convert speech to text
def speech_to_text(audio):
    if not audio:  # Check if audio is None or empty
        return "No audio input detected. Please provide a valid audio file."
    try:
        with sr.AudioFile(audio) as source:
            recognizer.adjust_for_ambient_noise(source)
            audio_data = recognizer.record(source)
            text = recognizer.recognize_google(audio_data)
            return text
    except sr.UnknownValueError:
        return "Sorry, I could not understand the audio."
    except sr.RequestError as e:
        return f"Could not request results; {e}"

# Function to get GPT-3.5-turbo response
def get_gpt_response(prompt):
    try:
        response = openai.ChatCompletion.create(
            model="gpt-3.5-turbo",
            messages=[
                {"role": "system", "content": get_prompt()},  # Using the secret prompt
                {"role": "user", "content": prompt}
            ]
        )
        return response['choices'][0]['message']['content']
    except Exception as e:
        return f"Error: {e}"

# Function to convert text to speech using gTTS
def text_to_speech(response):
    tts = gTTS(response)
    tts.save("response.mp3")
    return "response.mp3"

# Gradio function that integrates all components
def chatbot(audio):
    user_input = speech_to_text(audio)
    if "Sorry" in user_input or "No audio input" in user_input:
        return user_input, None
    response = get_gpt_response(user_input)
    if response:
        audio_response = text_to_speech(response)
        return response, audio_response
    return "Error generating response.", None

# Gradio interface
iface = gr.Interface(
    fn=chatbot,
    inputs=gr.Audio(type="filepath"),
    outputs=[gr.Textbox(label="Transcription"), gr.Audio(label="AI Response")],
    live=True,
    title="dindi, at times wodehousian!",
    description="AMA - Ask me anything, questions,trivia (Upload an audio file or record your voice) to get a response from the AI."
)

# Launch the Gradio interface
if __name__ == "__main__":
    iface.launch()