clip-vit-score / app.py
fajarluhung's picture
Update app.py
51e0330
import os
import open_clip
from PIL import Image
import time
import numpy as np
import torch
model_name = "ViT-B-32-quickgelu"
pretrained_path = "laion400m_e32"
clip_model, _, clip_preprocess = open_clip.create_model_and_transforms(model_name, pretrained_path)
tokenizer = open_clip.get_tokenizer(model_name)
# device = torch.device("cuda:0")
# clip_model = clip_model.to(device)
def clip_inference(image_path, texts):
start = time.time()
images = clip_preprocess(Image.open(image_path)).unsqueeze(0)
# images = images.to(device)
texts = [element.strip() for element in texts.split(",")]
# texts_token = tokenizer(texts).to(device)
texts_token = tokenizer(texts)
with torch.no_grad():
image_features = clip_model.encode_image(images)
image_features /= image_features.norm(dim=-1, keepdim=True)
text_features = clip_model.encode_text(texts_token)
text_features /= text_features.norm(dim=-1, keepdim=True)
text_probs = (100.0 * image_features @ text_features.T)
text_probs = text_probs.cpu().numpy().tolist()
results = []
for text_prob in text_probs:
for confidence, text in zip(text_prob, texts):
result = {text: confidence}
results.append(result)
print("classification time:", time.time()-start)
return results
import gradio as gr
block = gr.Blocks().queue()
with block:
gr.Markdown("# Anomaly Detection Demo")
gr.Markdown("### Using OVOD and OVIC")
with gr.Tab("OVIC"):
with gr.Row():
with gr.Column():
clip_input_image = gr.Image(source='upload', type="filepath")
clip_prompt= gr.Textbox(label="Classification Prompt")
clip_run_button = gr.Button(label="Run")
with gr.Column():
clip_output = gr.outputs.Textbox(label="Output")
clip_run_button.click(fn=clip_inference, inputs=[clip_input_image, clip_prompt], outputs=[clip_output])
block.launch()