Spaces:

akazmi
/

hackaton1

Sleeping

App Files Files Community

akazmi commited on Nov 5, 2024

Commit

9ea8dc2

verified ·

1 Parent(s): 16b4a8e

Update app.py

Browse files

Files changed (1) hide show

app.py +17 -13

app.py CHANGED Viewed

@@ -1,4 +1,3 @@
-# Import necessary libraries
 import gradio as gr
 from gtts import gTTS
 import os
@@ -16,13 +15,12 @@ def text_to_speech(text):
     return filename
 # Speech-to-Text function
-def speech_to_text():
     recognizer = sr.Recognizer()
-    with sr.Microphone() as source:
-        print("Please say something:")
-        audio = recognizer.listen(source)
         try:
-            text = recognizer.recognize_google(audio)
             return text
         except sr.UnknownValueError:
             return "Sorry, I could not understand the audio."
@@ -41,17 +39,22 @@ def generate_image_description(image):
 # Video Description function
 def generate_video_description(video):
-    cap = cv2.VideoCapture(video.name)
     descriptions = []
-    for _ in range(5):  # Limit to first 5 frames for description
         ret, frame = cap.read()
         if not ret:
             break
-        frame_path = f"frame.jpg"
-        cv2.imwrite(frame_path, frame)  # Save frame as image
-        description = generate_image_description(Image.open(frame_path))
         descriptions.append(description)
     cap.release()
     return descriptions
@@ -69,9 +72,10 @@ def main():
             tts_button.click(fn=text_to_speech, inputs=text_input, outputs=tts_output)
         # Speech-to-Text
-        stt_button = gr.Button("Record Audio")
         stt_output = gr.Textbox(label="Speech-to-Text Output")
-        stt_button.click(fn=speech_to_text, outputs=stt_output)
         # Image Description
         image_input = gr.Image(label="Upload an Image")

 import gradio as gr
 from gtts import gTTS
 import os
     return filename
 # Speech-to-Text function
+def speech_to_text(audio):
     recognizer = sr.Recognizer()
+    with sr.AudioFile(audio) as source:  # Use the audio file directly
+        audio_data = recognizer.record(source)
         try:
+            text = recognizer.recognize_google(audio_data)
             return text
         except sr.UnknownValueError:
             return "Sorry, I could not understand the audio."
 # Video Description function
 def generate_video_description(video):
+    cap = cv2.VideoCapture(video.name)  # Access the video using the file name
     descriptions = []
+    if not cap.isOpened():
+        return "Error opening video file."
+    frame_count = 0
+    while frame_count < 5:  # Limit to first 5 frames for this example
         ret, frame = cap.read()
         if not ret:
             break
+        # Use a temporary image to generate description
+        image = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
+        description = generate_image_description(image)
         descriptions.append(description)
+        frame_count += 1
     cap.release()
     return descriptions
             tts_button.click(fn=text_to_speech, inputs=text_input, outputs=tts_output)
         # Speech-to-Text
+        stt_input = gr.Audio(source="microphone", type="filepath", label="Record Audio")
+        stt_button = gr.Button("Convert Speech to Text")
         stt_output = gr.Textbox(label="Speech-to-Text Output")
+        stt_button.click(fn=speech_to_text, inputs=stt_input, outputs=stt_output)
         # Image Description
         image_input = gr.Image(label="Upload an Image")