Spaces:

akazmi
/

hackaton1

Sleeping

App Files Files Community

akazmi commited on Nov 5, 2024

Commit

3553b09

verified ·

1 Parent(s): b1e9d32

Create myapp.py

Browse files

Files changed (1) hide show

myapp.py +91 -0

myapp.py ADDED Viewed

	@@ -0,0 +1,91 @@

+# Import necessary libraries
+import gradio as gr
+from gtts import gTTS
+import os
+import speech_recognition as sr
+from transformers import BlipProcessor, BlipForConditionalGeneration
+import torch
+from PIL import Image
+import cv2
+# Text-to-Speech function
+def text_to_speech(text):
+    tts = gTTS(text=text, lang='en', slow=False)
+    filename = "output.mp3"
+    tts.save(filename)
+    return filename
+# Speech-to-Text function
+def speech_to_text():
+    recognizer = sr.Recognizer()
+    with sr.Microphone() as source:
+        print("Please say something:")
+        audio = recognizer.listen(source)
+        try:
+            text = recognizer.recognize_google(audio)
+            return text
+        except sr.UnknownValueError:
+            return "Sorry, I could not understand the audio."
+        except sr.RequestError as e:
+            return f"Could not request results; {e}"
+# Image Description function
+def generate_image_description(image):
+    processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
+    model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base")
+    inputs = processor(images=image, return_tensors="pt")
+    out = model.generate(**inputs)
+    description = processor.decode(out[0], skip_special_tokens=True)
+    return description
+# Video Description function
+def generate_video_description(video):
+    cap = cv2.VideoCapture(video.name)
+    descriptions = []
+    for _ in range(5):  # Limit to first 5 frames for description
+        ret, frame = cap.read()
+        if not ret:
+            break
+        frame_path = f"frame.jpg"
+        cv2.imwrite(frame_path, frame)  # Save frame as image
+        description = generate_image_description(Image.open(frame_path))
+        descriptions.append(description)
+    cap.release()
+    return descriptions
+# Gradio Interface
+def main():
+    with gr.Blocks() as app:
+        gr.Markdown("<h1>AI-Powered Accessibility Tools</h1>")
+        # Text-to-Speech
+        with gr.Row():
+            text_input = gr.Textbox(label="Enter text for Text-to-Speech")
+            tts_button = gr.Button("Convert to Speech")
+            tts_output = gr.Audio(label="TTS Output")
+            tts_button.click(fn=text_to_speech, inputs=text_input, outputs=tts_output)
+        # Speech-to-Text
+        stt_button = gr.Button("Record Audio")
+        stt_output = gr.Textbox(label="Speech-to-Text Output")
+        stt_button.click(fn=speech_to_text, outputs=stt_output)
+        # Image Description
+        image_input = gr.Image(label="Upload an Image")
+        image_desc_output = gr.Textbox(label="Image Description")
+        image_desc_button = gr.Button("Describe Image")
+        image_desc_button.click(fn=generate_image_description, inputs=image_input, outputs=image_desc_output)
+        # Video Description
+        video_input = gr.File(label="Upload a Video")
+        video_desc_output = gr.Textbox(label="Video Descriptions")
+        video_desc_button = gr.Button("Describe Video")
+        video_desc_button.click(fn=generate_video_description, inputs=video_input, outputs=video_desc_output)
+    app.launch()
+if __name__ == "__main__":
+    main()