ProfRom commited on
Commit
b28336f
·
verified ·
1 Parent(s): adbc5fd

Poudel - Sanity Check

Browse files
Files changed (1) hide show
  1. app.py +19 -122
app.py CHANGED
@@ -1,127 +1,24 @@
1
 
2
  import gradio as gr
3
  from transformers import pipeline
4
- from PIL import ImageDraw, ImageFont
5
- import textwrap
6
 
7
- # --- LOAD MODELS ---
8
- print("Loading Models...")
9
- caption_pipeline = pipeline("image-to-text", model="Salesforce/blip-image-captioning-base")
10
- classification_pipeline = pipeline("image-classification", model="google/vit-base-patch16-224")
11
- sentiment_pipeline = pipeline("sentiment-analysis")
12
-
13
- # --- DRAWING FUNCTION ---
14
- def add_caption_to_image(image, text):
15
- draw = ImageDraw.Draw(image)
16
- image_width, image_height = image.size
17
-
18
- # 1. Setup Font
19
- try:
20
- font = ImageFont.truetype("DejaVuSans.ttf", 20)
21
- except IOError:
22
- font = ImageFont.load_default()
23
-
24
- # 2. Wrap Text
25
- avg_char_width = 12
26
- chars_per_line = max(10, int((image_width - 40) / avg_char_width))
27
- lines = textwrap.wrap(text, width=chars_per_line)
28
-
29
- # 3. Calculate Box Size
30
- line_height = 24
31
- total_text_height = len(lines) * line_height
32
- y_start = image_height - total_text_height - 20
33
-
34
- max_line_width = 0
35
- for line in lines:
36
- bbox = draw.textbbox((0, 0), line, font=font)
37
- w = bbox[2] - bbox[0]
38
- if w > max_line_width: max_line_width = w
39
-
40
- box_x = (image_width - max_line_width) / 2
41
-
42
- # 4. Draw Box
43
- padding = 10
44
- draw.rectangle(
45
- [
46
- (box_x - padding, y_start - padding),
47
- (box_x + max_line_width + padding, y_start + total_text_height + padding)
48
- ],
49
- fill=(0, 0, 0, 180)
50
- )
51
-
52
- # 5. Draw Text
53
- current_y = y_start
54
- for line in lines:
55
- bbox = draw.textbbox((0, 0), line, font=font)
56
- line_width = bbox[2] - bbox[0]
57
- line_x = (image_width - line_width) / 2
58
- draw.text((line_x, current_y), line, font=font, fill="white")
59
- current_y += line_height
60
-
61
- return image
62
-
63
- # --- ANALYSIS FUNCTION ---
64
- def multimodal_analysis(input_image):
65
- if input_image is None: return None, "Upload image first", "N/A"
66
-
67
- processed_image = input_image.copy()
68
-
69
- # 1. Caption
70
- try:
71
- caption = caption_pipeline(input_image)[0]['generated_text']
72
- except:
73
- return processed_image, "Error", "Error"
74
-
75
- # 2. Draw
76
- final_img = add_caption_to_image(processed_image, caption)
77
-
78
- # 3. Classify
79
- try:
80
- res = classification_pipeline(input_image)
81
- cls_str = f"{res[0]['label']} ({res[0]['score']:.2f})"
82
- except:
83
- cls_str = "Error"
84
-
85
- # 4. Sentiment
86
- try:
87
- sent = sentiment_pipeline(caption)[0]['label']
88
- except:
89
- sent = "Error"
90
-
91
- return final_img, cls_str, sent
92
-
93
- # --- INTERFACE (Removed Theme to fix crash) ---
94
- with gr.Blocks() as demo:
95
- gr.Markdown("# 🤖 Multimodal AI Analyst")
96
- gr.Markdown("Select an example image below to see: **Image Captioning**, **Vision Classification**, and **NLP Sentiment Analysis** working together.")
97
-
98
- with gr.Row():
99
- with gr.Column():
100
- image_input = gr.Image(type="pil", label="Input Image")
101
- submit_btn = gr.Button("🔍 Analyze Image", variant="primary")
102
-
103
- with gr.Column():
104
- output_image = gr.Image(label="AI Caption Result")
105
- with gr.Row():
106
- output_class = gr.Textbox(label="Object Class")
107
- output_sent = gr.Textbox(label="Caption Sentiment")
108
-
109
- # EXACT FILES FROM YOUR LIST
110
- examples = [
111
- ["Ashe Catcum with Pikachu.png"],
112
- ["Beautiful sunrise over ocean.png"],
113
- ["Cat on a couch.png"],
114
- ["Female Crying.png"],
115
- ["Lions Football team huddle.png"],
116
- ["michael jordan trophy.png"],
117
- ["Puppies playing in grass.png"],
118
- ["Red Ferrari.png"],
119
- ["Siamese cat.png"],
120
- ["Stormy dark sky lightning.png"]
121
- ]
122
-
123
- gr.Examples(examples=examples, inputs=image_input)
124
- submit_btn.click(fn=multimodal_analysis, inputs=image_input, outputs=[output_image, output_class, output_sent])
125
-
126
- demo.launch()
127
 
 
1
 
2
  import gradio as gr
3
  from transformers import pipeline
 
 
4
 
5
+ # Load image captioning pipeline
6
+ captioner = pipeline("image-to-text", model="nlpconnect/vit-gpt2-image-captioning")
7
+
8
+ def generate_caption(image):
9
+ if image is None:
10
+ return "Please upload an image."
11
+ result = captioner(image)
12
+ return result[0]['generated_text']
13
+
14
+ demo = gr.Interface(
15
+ fn=generate_caption,
16
+ inputs=gr.Image(type="pil", label="Upload an image"),
17
+ outputs=gr.Textbox(label="Generated Caption"),
18
+ title="Image Captioning Demo",
19
+ description="Multimodal model: Vision → Language"
20
+ )
21
+
22
+ if __name__ == "__main__":
23
+ demo.launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
24