akazmi commited on
Commit
3553b09
·
verified ·
1 Parent(s): b1e9d32

Create myapp.py

Browse files
Files changed (1) hide show
  1. myapp.py +91 -0
myapp.py ADDED
@@ -0,0 +1,91 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Import necessary libraries
2
+ import gradio as gr
3
+ from gtts import gTTS
4
+ import os
5
+ import speech_recognition as sr
6
+ from transformers import BlipProcessor, BlipForConditionalGeneration
7
+ import torch
8
+ from PIL import Image
9
+ import cv2
10
+
11
+ # Text-to-Speech function
12
+ def text_to_speech(text):
13
+ tts = gTTS(text=text, lang='en', slow=False)
14
+ filename = "output.mp3"
15
+ tts.save(filename)
16
+ return filename
17
+
18
+ # Speech-to-Text function
19
+ def speech_to_text():
20
+ recognizer = sr.Recognizer()
21
+ with sr.Microphone() as source:
22
+ print("Please say something:")
23
+ audio = recognizer.listen(source)
24
+ try:
25
+ text = recognizer.recognize_google(audio)
26
+ return text
27
+ except sr.UnknownValueError:
28
+ return "Sorry, I could not understand the audio."
29
+ except sr.RequestError as e:
30
+ return f"Could not request results; {e}"
31
+
32
+ # Image Description function
33
+ def generate_image_description(image):
34
+ processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
35
+ model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base")
36
+
37
+ inputs = processor(images=image, return_tensors="pt")
38
+ out = model.generate(**inputs)
39
+ description = processor.decode(out[0], skip_special_tokens=True)
40
+ return description
41
+
42
+ # Video Description function
43
+ def generate_video_description(video):
44
+ cap = cv2.VideoCapture(video.name)
45
+ descriptions = []
46
+
47
+ for _ in range(5): # Limit to first 5 frames for description
48
+ ret, frame = cap.read()
49
+ if not ret:
50
+ break
51
+ frame_path = f"frame.jpg"
52
+ cv2.imwrite(frame_path, frame) # Save frame as image
53
+ description = generate_image_description(Image.open(frame_path))
54
+ descriptions.append(description)
55
+
56
+ cap.release()
57
+ return descriptions
58
+
59
+ # Gradio Interface
60
+ def main():
61
+ with gr.Blocks() as app:
62
+ gr.Markdown("<h1>AI-Powered Accessibility Tools</h1>")
63
+
64
+ # Text-to-Speech
65
+ with gr.Row():
66
+ text_input = gr.Textbox(label="Enter text for Text-to-Speech")
67
+ tts_button = gr.Button("Convert to Speech")
68
+ tts_output = gr.Audio(label="TTS Output")
69
+ tts_button.click(fn=text_to_speech, inputs=text_input, outputs=tts_output)
70
+
71
+ # Speech-to-Text
72
+ stt_button = gr.Button("Record Audio")
73
+ stt_output = gr.Textbox(label="Speech-to-Text Output")
74
+ stt_button.click(fn=speech_to_text, outputs=stt_output)
75
+
76
+ # Image Description
77
+ image_input = gr.Image(label="Upload an Image")
78
+ image_desc_output = gr.Textbox(label="Image Description")
79
+ image_desc_button = gr.Button("Describe Image")
80
+ image_desc_button.click(fn=generate_image_description, inputs=image_input, outputs=image_desc_output)
81
+
82
+ # Video Description
83
+ video_input = gr.File(label="Upload a Video")
84
+ video_desc_output = gr.Textbox(label="Video Descriptions")
85
+ video_desc_button = gr.Button("Describe Video")
86
+ video_desc_button.click(fn=generate_video_description, inputs=video_input, outputs=video_desc_output)
87
+
88
+ app.launch()
89
+
90
+ if __name__ == "__main__":
91
+ main()