Spaces:

ProfRom
/

TestSpace3

Running

App Files Files Community

TestSpace3 / app.py

ProfRom

Smallwood - Sanity Check 3

adbc5fd verified 5 months ago

raw

history blame

3.99 kB


	import gradio as gr
	from transformers import pipeline
	from PIL import ImageDraw, ImageFont
	import textwrap

	# --- LOAD MODELS ---
	print("Loading Models...")
	caption_pipeline = pipeline("image-to-text", model="Salesforce/blip-image-captioning-base")
	classification_pipeline = pipeline("image-classification", model="google/vit-base-patch16-224")
	sentiment_pipeline = pipeline("sentiment-analysis")

	# --- DRAWING FUNCTION ---
	def add_caption_to_image(image, text):
	draw = ImageDraw.Draw(image)
	image_width, image_height = image.size

	# 1. Setup Font
	try:
	font = ImageFont.truetype("DejaVuSans.ttf", 20)
	except IOError:
	font = ImageFont.load_default()

	# 2. Wrap Text
	avg_char_width = 12
	chars_per_line = max(10, int((image_width - 40) / avg_char_width))
	lines = textwrap.wrap(text, width=chars_per_line)

	# 3. Calculate Box Size
	line_height = 24
	total_text_height = len(lines) * line_height
	y_start = image_height - total_text_height - 20

	max_line_width = 0
	for line in lines:
	bbox = draw.textbbox((0, 0), line, font=font)
	w = bbox[2] - bbox[0]
	if w > max_line_width: max_line_width = w

	box_x = (image_width - max_line_width) / 2

	# 4. Draw Box
	padding = 10
	draw.rectangle(
	[
	(box_x - padding, y_start - padding),
	(box_x + max_line_width + padding, y_start + total_text_height + padding)
	],
	fill=(0, 0, 0, 180)
	)

	# 5. Draw Text
	current_y = y_start
	for line in lines:
	bbox = draw.textbbox((0, 0), line, font=font)
	line_width = bbox[2] - bbox[0]
	line_x = (image_width - line_width) / 2
	draw.text((line_x, current_y), line, font=font, fill="white")
	current_y += line_height

	return image

	# --- ANALYSIS FUNCTION ---
	def multimodal_analysis(input_image):
	if input_image is None: return None, "Upload image first", "N/A"

	processed_image = input_image.copy()

	# 1. Caption
	try:
	caption = caption_pipeline(input_image)[0]['generated_text']
	except:
	return processed_image, "Error", "Error"

	# 2. Draw
	final_img = add_caption_to_image(processed_image, caption)

	# 3. Classify
	try:
	res = classification_pipeline(input_image)
	cls_str = f"{res[0]['label']} ({res[0]['score']:.2f})"
	except:
	cls_str = "Error"

	# 4. Sentiment
	try:
	sent = sentiment_pipeline(caption)[0]['label']
	except:
	sent = "Error"

	return final_img, cls_str, sent

	# --- INTERFACE (Removed Theme to fix crash) ---
	with gr.Blocks() as demo:
	gr.Markdown("# 🤖 Multimodal AI Analyst")
	gr.Markdown("Select an example image below to see: Image Captioning, Vision Classification, and NLP Sentiment Analysis working together.")

	with gr.Row():
	with gr.Column():
	image_input = gr.Image(type="pil", label="Input Image")
	submit_btn = gr.Button("🔍 Analyze Image", variant="primary")

	with gr.Column():
	output_image = gr.Image(label="AI Caption Result")
	with gr.Row():
	output_class = gr.Textbox(label="Object Class")
	output_sent = gr.Textbox(label="Caption Sentiment")

	# EXACT FILES FROM YOUR LIST
	examples = [
	["Ashe Catcum with Pikachu.png"],
	["Beautiful sunrise over ocean.png"],
	["Cat on a couch.png"],
	["Female Crying.png"],
	["Lions Football team huddle.png"],
	["michael jordan trophy.png"],
	["Puppies playing in grass.png"],
	["Red Ferrari.png"],
	["Siamese cat.png"],
	["Stormy dark sky lightning.png"]
	]

	gr.Examples(examples=examples, inputs=image_input)
	submit_btn.click(fn=multimodal_analysis, inputs=image_input, outputs=[output_image, output_class, output_sent])

	demo.launch()