Spaces:

akazmi
/

hackaton1

Sleeping

akazmi commited on Nov 5, 2024

Commit

5af1eca

verified ·

1 Parent(s): 9ea8dc2

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -17,7 +17,7 @@ def text_to_speech(text):
 # Speech-to-Text function
 def speech_to_text(audio):
     recognizer = sr.Recognizer()
-    with sr.AudioFile(audio) as source:  # Use the audio file directly
         audio_data = recognizer.record(source)
         try:
             text = recognizer.recognize_google(audio_data)
@@ -39,7 +39,7 @@ def generate_image_description(image):
 # Video Description function
 def generate_video_description(video):
-    cap = cv2.VideoCapture(video.name)  # Access the video using the file name
     descriptions = []
     if not cap.isOpened():
@@ -50,7 +50,6 @@ def generate_video_description(video):
         ret, frame = cap.read()
         if not ret:
             break
-        # Use a temporary image to generate description
         image = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
         description = generate_image_description(image)
         descriptions.append(description)
@@ -72,7 +71,7 @@ def main():
             tts_button.click(fn=text_to_speech, inputs=text_input, outputs=tts_output)
         # Speech-to-Text
-        stt_input = gr.Audio(source="microphone", type="filepath", label="Record Audio")
         stt_button = gr.Button("Convert Speech to Text")
         stt_output = gr.Textbox(label="Speech-to-Text Output")
         stt_button.click(fn=speech_to_text, inputs=stt_input, outputs=stt_output)

 # Speech-to-Text function
 def speech_to_text(audio):
     recognizer = sr.Recognizer()
+    with sr.AudioFile(audio) as source:
         audio_data = recognizer.record(source)
         try:
             text = recognizer.recognize_google(audio_data)
 # Video Description function
 def generate_video_description(video):
+    cap = cv2.VideoCapture(video.name)
     descriptions = []
     if not cap.isOpened():
         ret, frame = cap.read()
         if not ret:
             break
         image = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
         description = generate_image_description(image)
         descriptions.append(description)
             tts_button.click(fn=text_to_speech, inputs=text_input, outputs=tts_output)
         # Speech-to-Text
+        stt_input = gr.Audio(label="Record Audio", type="filepath")
         stt_button = gr.Button("Convert Speech to Text")
         stt_output = gr.Textbox(label="Speech-to-Text Output")
         stt_button.click(fn=speech_to_text, inputs=stt_input, outputs=stt_output)