Spaces:

Yatheshr
/

Image_Text_Process_Using_Keras

Sleeping

App Files Files Community

Image_Text_Process_Using_Keras / app.py

Yatheshr

Update app.py

6a83e1d verified 9 months ago

raw

history blame contribute delete

2.01 kB

	import gradio as gr
	from transformers import CLIPProcessor, CLIPModel
	from PIL import Image
	import torch

	# Load CLIP model and processor
	model_name = "openai/clip-vit-base-patch16"
	processor = CLIPProcessor.from_pretrained(model_name)
	model = CLIPModel.from_pretrained(model_name)

	# Define matching function
	def match_image_with_descriptions(image, descriptions):
	if not image or not descriptions.strip():
	return {"Error": "Please upload an image and enter descriptions."}

	# Split user input into lines
	captions = [line.strip() for line in descriptions.strip().split('\n') if line.strip()]

	if len(captions) < 2:
	return {"Error": "Please enter at least two descriptions."}

	# Tokenize inputs
	inputs = processor(text=captions, images=image, return_tensors="pt", padding=True)

	# Run inference
	with torch.no_grad():
	outputs = model(**inputs)

	# Get probabilities
	logits_per_image = outputs.logits_per_image # [1, num_captions]
	probs = logits_per_image.softmax(dim=1)[0] # shape: [num_captions]

	# Convert to percentage and build result dict
	result_dict = {
	captions[i]: round(probs[i].item() * 100, 2) # convert to percentage
	for i in range(len(captions))
	}

	# Sort from best match to worst
	sorted_results = dict(sorted(result_dict.items(), key=lambda item: item[1], reverse=True))

	return sorted_results

	# Create the Gradio interface
	iface = gr.Interface(
	fn=match_image_with_descriptions,
	inputs=[
	gr.Image(type="pil", label="Upload an Image"),
	gr.Textbox(lines=6, placeholder="Enter one description per line...", label="Descriptions")
	],
	outputs=gr.JSON(label="Match Scores (Sorted by Confidence %)"),
	title="🧠 CLIP Image-Text Matcher (Sorted, %)",
	description="Upload an image and enter multiple captions (one per line). The AI will compare each one and show match scores in % — sorted from best to worst.",
	)

	# Launch the app
	iface.launch()