File size: 2,662 Bytes
1fa3e4c
6de38d8
3a0ec3f
5650516
b73a5f7
bc8f67e
9a917c3
b56f80b
d9e3c0a
bc8f67e
 
 
 
 
 
 
 
 
 
3a0ec3f
9a917c3
bc8f67e
 
 
 
 
 
 
5650516
bc8f67e
 
9a917c3
 
 
 
 
 
 
 
 
bc8f67e
 
 
f404bdb
bc8f67e
 
b73a5f7
bc8f67e
b73a5f7
bc8f67e
b73a5f7
bc8f67e
b73a5f7
bc8f67e
 
 
9a917c3
 
bc8f67e
 
 
 
9a917c3
 
 
 
 
bc8f67e
 
8a3153b
bc8f67e
3a0ec3f
 
 
b73a5f7
5650516
3a0ec3f
 
bc8f67e
 
c20bd0b
3a0ec3f
8a3153b
 
9a917c3
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
import requests
import gradio as gr
import pyttsx3
import speech_recognition as sr

# Replace with your Gemini API Key and endpoint
API_KEY = "AIzaSyAm0RSpUKY38494Fug8SPIpdHLXE2d3cps"  # Replace with your actual API key
API_URL = "https://generativelanguage.googleapis.com/v1alpha2/models/gemini2:generateText"  # Gemini API URL

# Function to call Gemini API
def call_gemini_api(message):
    headers = {
        "Authorization": f"Bearer {API_KEY}",
        "Content-Type": "application/json"
    }
    payload = {
        "prompt": message,
        "max_output_tokens": 100
    }
    try:
        # Sending request to Gemini API
        response = requests.post(API_URL, headers=headers, json=payload)
        if response.status_code == 200:
            return response.json().get("generated_text", "No response text")
        else:
            return f"Error: {response.status_code}, {response.text}"
    except Exception as e:
        return f"Error occurred while calling API: {str(e)}"

# Convert text to speech (TTS)
def text_to_speech(text):
    try:
        engine = pyttsx3.init()
        audio_filename = "response.mp3"
        engine.save_to_file(text, audio_filename)
        engine.runAndWait()
        return audio_filename
    except Exception as e:
        print(f"Error with TTS: {e}")
        return None

# Convert audio to text (ASR)
def audio_to_text(audio_path):
    recognizer = sr.Recognizer()
    with sr.AudioFile(audio_path) as source:
        audio_data = recognizer.record(source)
    try:
        return recognizer.recognize_google(audio_data)
    except sr.UnknownValueError:
        return "Could not understand audio"
    except sr.RequestError:
        return "Request error with the recognition service"

# Define function for Gradio interface
def respond(text_input=None, audio_input=None):
    if audio_input:
        # If audio input is provided, convert it to text
        text_input = audio_to_text(audio_input)
    
    if not text_input:
        return "Error: No input provided.", None
    
    # Call Gemini API with text input and get response
    api_response = call_gemini_api(text_input)
    
    # Convert the API response text into audio
    audio_response = text_to_speech(api_response)
    
    return api_response, audio_response

# Gradio Interface setup
demo = gr.Interface(
    fn=respond,
    inputs=[
        gr.Textbox(label="Text Input", placeholder="Enter your message..."),
        gr.Audio(type="filepath", label="Audio Input")
    ],
    outputs=[
        gr.Textbox(label="Response Text"),
        gr.Audio(label="Response Audio")
    ]
)

if __name__ == "__main__":
    demo.launch(debug=True)