Spaces:

kathirog
/

fumblebots

Sleeping

App Files Files Community

kathirog commited on Feb 28, 2025

Commit

bc8f67e

verified ·

1 Parent(s): f7c2b4d

Update app.py

Browse files

Files changed (1) hide show

app.py +49 -70

app.py CHANGED Viewed

@@ -3,83 +3,62 @@ import gradio as gr
 import pyttsx3
 import speech_recognition as sr
-# Replace with your actual API key
-API_KEY = "AIzaSyAm0RSpUKY38494Fug8SPIpdHLXE2d3cps"  # Replace with your Gemini 2.0 Flash API key
-MODEL_NAME = "gemini-2.0-flash"  # Replace with your Gemini model name, e.g., "Gemini 2.0 flash"
-# API URL for Gemini 2.0 Flash
-API_URL = "https://generativelanguage.googleapis.com"  # Example URL; make sure to replace with actual endpoint if different
-# Headers to pass the API Key
-headers = {"Authorization": f"Bearer {API_KEY}"}
-# Function to make the API call to Gemini
-def gemini_api_request(message):
     try:
-        response = requests.post(
-            API_URL,
-            headers=headers,
-            json={"input": message}
-        )
-        # Check if the response status code is OK
-        response.raise_for_status()
-        result = response.json()
-        return result.get("output", "Error: No output returned from API.")
-    except requests.exceptions.RequestException as e:
-        return f"Error: {str(e)}"
-# Convert audio to text
-def voice_to_text(audio_path):
     recognizer = sr.Recognizer()
     try:
-        with sr.AudioFile(audio_path) as source:
-            audio_data = recognizer.record(source)
-            text = recognizer.recognize_google(audio_data)
     except sr.UnknownValueError:
-        text = "Sorry, I could not understand the audio."
     except sr.RequestError:
-        text = "Could not connect to the recognition service."
-    except Exception as e:
-        text = f"Audio Processing Error: {str(e)}"
-    return text
-# Convert text to speech
-def text_to_voice(text):
-    try:
-        audio_filename = "response.mp3"
-        engine = pyttsx3.init()
-        engine.save_to_file(text, audio_filename)
-        engine.runAndWait()
-        return audio_filename
-    except Exception as e:
-        print(f"TTS Error: {e}")
-        return None
-# Function to handle both text and voice input/output
-def respond(message, history=None, audio_input=None):
-    try:
-        if history is None:
-            history = []
-        if audio_input:
-            message = voice_to_text(audio_input)
-        if not message.strip():
-            return "Error: No input provided.", None
-        # Make request to Gemini API for processing
-        response = gemini_api_request(message)
-        # Convert response to audio
-        audio_output = text_to_voice(response)
-        return response, audio_output
-    except Exception as e:
-        return f"Error: {str(e)}", None
-# Gradio UI
 demo = gr.Interface(
     fn=respond,
     inputs=[
@@ -87,8 +66,8 @@ demo = gr.Interface(
         gr.Audio(type="filepath", label="Audio Input")
     ],
     outputs=[
-        gr.Textbox(label="Chatbot Response"),
-        gr.Audio(label="Voice Output")
     ]
 )

 import pyttsx3
 import speech_recognition as sr
+# Replace with your Gemini API Key and endpoint
+API_KEY = "AIzaSyAm0RSpUKY38494Fug8SPIpdHLXE2d3cps"
+API_URL = "https://generativelanguage.googleapis.com/v1alpha2/models/gemini2:generateText"  # Example URL (adjust as needed)
+# Function to call Gemini API
+def call_gemini_api(message):
+    headers = {
+        "Authorization": f"Bearer {API_KEY}",
+        "Content-Type": "application/json"
+    }
+    payload = {
+        "prompt": message,
+        "max_output_tokens": 100
+    }
     try:
+        response = requests.post(API_URL, headers=headers, json=payload)
+        if response.status_code == 200:
+            return response.json().get("generated_text", "No response text")
+        else:
+            return f"Error: {response.status_code}, {response.text}"
+    except Exception as e:
+        return f"Error occurred while calling API: {str(e)}"
+# Convert text to speech (TTS)
+def text_to_speech(text):
+    engine = pyttsx3.init()
+    engine.save_to_file(text, "response.mp3")
+    engine.runAndWait()
+    return "response.mp3"
+# Convert audio to text (ASR)
+def audio_to_text(audio_path):
     recognizer = sr.Recognizer()
+    with sr.AudioFile(audio_path) as source:
+        audio_data = recognizer.record(source)
     try:
+        return recognizer.recognize_google(audio_data)
     except sr.UnknownValueError:
+        return "Could not understand audio"
     except sr.RequestError:
+        return "Request error with the recognition service"
+# Define function for Gradio interface
+def respond(text_input=None, audio_input=None):
+    if audio_input:
+        text_input = audio_to_text(audio_input)  # Convert audio to text if audio input is provided
+    if not text_input:
+        return "Error: No input provided.", None
+    api_response = call_gemini_api(text_input)  # Get response from Gemini API
+    audio_response = text_to_speech(api_response)  # Convert response text to audio
+    return api_response, audio_response
+# Gradio Interface setup
 demo = gr.Interface(
     fn=respond,
     inputs=[
         gr.Audio(type="filepath", label="Audio Input")
     ],
     outputs=[
+        gr.Textbox(label="Response Text"),
+        gr.Audio(label="Response Audio")
     ]
 )