Spaces:

ks415
/

CLIP-Demo

Sleeping

CLIP-Demo / app.py

Upload 2 files

19dd951 verified about 1 year ago

1.29 kB

	import gradio as gr
	import torch
	import clip
	from PIL import Image

	device = "cuda" if torch.cuda.is_available() else "cpu"

	def calculate_similarity(image, text, model_name):

	model, preprocess = load_model(model_name)
	# 画像の前処理
	image = preprocess(image).unsqueeze(0).to(device)

	# テキストの前処理
	text = clip.tokenize([text]).to(device)

	# 類似度の計算
	with torch.no_grad():
	image_features = model.encode_image(image)
	text_features = model.encode_text(text)

	similarity = torch.cosine_similarity(image_features, text_features).cpu().numpy()[0]

	return similarity

	def load_model(model_name):
	model, preprocess = clip.load(model_name, device=device)
	return model, preprocess

	iface = gr.Interface(
	fn=calculate_similarity,
	inputs=[
	gr.Image(type="pil"),
	gr.Textbox(lines=2, placeholder="A photo of a ..."),
	gr.Radio(["ViT-B/32", "ViT-B/16", "ViT-L/14", "ViT-L/14@336px"], label="モデル選択")
	],
	outputs="number",
	title="CLIPによる画像とテキストの類似度計算",
	description="類似度を計算したい画像とテキストを入力し，使用するCLIPモデルを選択してください．"
	)

	iface.launch()