kathirog commited on
Commit
bc8f67e
·
verified ·
1 Parent(s): f7c2b4d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +49 -70
app.py CHANGED
@@ -3,83 +3,62 @@ import gradio as gr
3
  import pyttsx3
4
  import speech_recognition as sr
5
 
6
- # Replace with your actual API key
7
- API_KEY = "AIzaSyAm0RSpUKY38494Fug8SPIpdHLXE2d3cps" # Replace with your Gemini 2.0 Flash API key
8
- MODEL_NAME = "gemini-2.0-flash" # Replace with your Gemini model name, e.g., "Gemini 2.0 flash"
9
 
10
- # API URL for Gemini 2.0 Flash
11
- API_URL = "https://generativelanguage.googleapis.com" # Example URL; make sure to replace with actual endpoint if different
12
-
13
- # Headers to pass the API Key
14
- headers = {"Authorization": f"Bearer {API_KEY}"}
15
-
16
- # Function to make the API call to Gemini
17
- def gemini_api_request(message):
 
 
18
  try:
19
- response = requests.post(
20
- API_URL,
21
- headers=headers,
22
- json={"input": message}
23
- )
24
-
25
- # Check if the response status code is OK
26
- response.raise_for_status()
27
- result = response.json()
28
- return result.get("output", "Error: No output returned from API.")
29
-
30
- except requests.exceptions.RequestException as e:
31
- return f"Error: {str(e)}"
32
 
33
- # Convert audio to text
34
- def voice_to_text(audio_path):
 
 
 
 
 
 
 
35
  recognizer = sr.Recognizer()
 
 
36
  try:
37
- with sr.AudioFile(audio_path) as source:
38
- audio_data = recognizer.record(source)
39
- text = recognizer.recognize_google(audio_data)
40
  except sr.UnknownValueError:
41
- text = "Sorry, I could not understand the audio."
42
  except sr.RequestError:
43
- text = "Could not connect to the recognition service."
44
- except Exception as e:
45
- text = f"Audio Processing Error: {str(e)}"
46
- return text
47
-
48
- # Convert text to speech
49
- def text_to_voice(text):
50
- try:
51
- audio_filename = "response.mp3"
52
- engine = pyttsx3.init()
53
- engine.save_to_file(text, audio_filename)
54
- engine.runAndWait()
55
- return audio_filename
56
- except Exception as e:
57
- print(f"TTS Error: {e}")
58
- return None
59
 
60
- # Function to handle both text and voice input/output
61
- def respond(message, history=None, audio_input=None):
62
- try:
63
- if history is None:
64
- history = []
65
-
66
- if audio_input:
67
- message = voice_to_text(audio_input)
68
-
69
- if not message.strip():
70
- return "Error: No input provided.", None
71
-
72
- # Make request to Gemini API for processing
73
- response = gemini_api_request(message)
74
-
75
- # Convert response to audio
76
- audio_output = text_to_voice(response)
77
-
78
- return response, audio_output
79
- except Exception as e:
80
- return f"Error: {str(e)}", None
81
 
82
- # Gradio UI
83
  demo = gr.Interface(
84
  fn=respond,
85
  inputs=[
@@ -87,8 +66,8 @@ demo = gr.Interface(
87
  gr.Audio(type="filepath", label="Audio Input")
88
  ],
89
  outputs=[
90
- gr.Textbox(label="Chatbot Response"),
91
- gr.Audio(label="Voice Output")
92
  ]
93
  )
94
 
 
3
  import pyttsx3
4
  import speech_recognition as sr
5
 
6
+ # Replace with your Gemini API Key and endpoint
7
+ API_KEY = "AIzaSyAm0RSpUKY38494Fug8SPIpdHLXE2d3cps"
8
+ API_URL = "https://generativelanguage.googleapis.com/v1alpha2/models/gemini2:generateText" # Example URL (adjust as needed)
9
 
10
+ # Function to call Gemini API
11
+ def call_gemini_api(message):
12
+ headers = {
13
+ "Authorization": f"Bearer {API_KEY}",
14
+ "Content-Type": "application/json"
15
+ }
16
+ payload = {
17
+ "prompt": message,
18
+ "max_output_tokens": 100
19
+ }
20
  try:
21
+ response = requests.post(API_URL, headers=headers, json=payload)
22
+ if response.status_code == 200:
23
+ return response.json().get("generated_text", "No response text")
24
+ else:
25
+ return f"Error: {response.status_code}, {response.text}"
26
+ except Exception as e:
27
+ return f"Error occurred while calling API: {str(e)}"
 
 
 
 
 
 
28
 
29
+ # Convert text to speech (TTS)
30
+ def text_to_speech(text):
31
+ engine = pyttsx3.init()
32
+ engine.save_to_file(text, "response.mp3")
33
+ engine.runAndWait()
34
+ return "response.mp3"
35
+
36
+ # Convert audio to text (ASR)
37
+ def audio_to_text(audio_path):
38
  recognizer = sr.Recognizer()
39
+ with sr.AudioFile(audio_path) as source:
40
+ audio_data = recognizer.record(source)
41
  try:
42
+ return recognizer.recognize_google(audio_data)
 
 
43
  except sr.UnknownValueError:
44
+ return "Could not understand audio"
45
  except sr.RequestError:
46
+ return "Request error with the recognition service"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
47
 
48
+ # Define function for Gradio interface
49
+ def respond(text_input=None, audio_input=None):
50
+ if audio_input:
51
+ text_input = audio_to_text(audio_input) # Convert audio to text if audio input is provided
52
+
53
+ if not text_input:
54
+ return "Error: No input provided.", None
55
+
56
+ api_response = call_gemini_api(text_input) # Get response from Gemini API
57
+ audio_response = text_to_speech(api_response) # Convert response text to audio
58
+
59
+ return api_response, audio_response
 
 
 
 
 
 
 
 
 
60
 
61
+ # Gradio Interface setup
62
  demo = gr.Interface(
63
  fn=respond,
64
  inputs=[
 
66
  gr.Audio(type="filepath", label="Audio Input")
67
  ],
68
  outputs=[
69
+ gr.Textbox(label="Response Text"),
70
+ gr.Audio(label="Response Audio")
71
  ]
72
  )
73