Prince9191 commited on
Commit
96d7fb7
·
verified ·
1 Parent(s): d545532

Delete object_detection.py

Browse files
Files changed (1) hide show
  1. object_detection.py +0 -68
object_detection.py DELETED
@@ -1,68 +0,0 @@
1
- import torch
2
- from transformers import BlipProcessor, BlipForConditionalGeneration
3
- from gtts import gTTS
4
- import tempfile
5
- import subprocess
6
- import sys
7
- import gradio
8
-
9
-
10
- def ensure_package_installed(package_name):
11
- try:
12
- __import__(package_name)
13
- except ImportError:
14
- print(f"{package_name} package not found. Installing...")
15
- subprocess.check_call([sys.executable, "-m", "pip", "install", package_name])
16
- __import__(package_name)
17
-
18
- # Check and install openai
19
- ensure_package_installed("gradio")
20
- ensure_package_installed("transformers")
21
- ensure_package_installed("gtts")
22
-
23
-
24
- # Load the image captioning model
25
- processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
26
- model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base")
27
-
28
- def generate_description(image):
29
- """Generates a textual description of the given image using a pre-trained BLIP model."""
30
- inputs = processor(image, return_tensors="pt").to(model.device)
31
- output = model.generate(**inputs)
32
- description = processor.decode(output[0], skip_special_tokens=True)
33
- return description
34
-
35
- def text_to_speech(text):
36
- """Converts text to speech using gTTS and returns the audio file path."""
37
- tts = gTTS(text=text, lang='en')
38
- temp_audio = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3")
39
- tts.save(temp_audio.name)
40
- return temp_audio.name
41
-
42
- def process_image(image):
43
- """Processes the uploaded image to generate description and return audio file."""
44
- description = generate_description(image)
45
- return description
46
-
47
- def get_audio(description):
48
- """Generates the audio file for the given description."""
49
- return text_to_speech(description)
50
-
51
- # Build Gradio Interface
52
- with gradio.Blocks() as demo:
53
- gradio.Markdown("# Image Description and Audio Transcript App")
54
- gradio.Markdown("Upload an image to get an AI-generated description. Click the button to hear the description.")
55
-
56
- with gradio.Row():
57
- image_input = gradio.Image(type="pil")
58
- text_output = gradio.Textbox(label="Generated Description")
59
-
60
- generate_btn = gradio.Button("Generate Description")
61
- audio_btn = gradio.Button("Click here for an audio transcript")
62
- audio_output = gradio.Audio()
63
-
64
- generate_btn.click(process_image, inputs=[image_input], outputs=[text_output])
65
- audio_btn.click(get_audio, inputs=[text_output], outputs=[audio_output])
66
-
67
- # Launch the Gradio app
68
- demo.launch()