Spaces:
Runtime error
Runtime error
| import os | |
| import json | |
| from typing import List | |
| import torch | |
| import torch.nn.functional as F | |
| import torchvision.transforms as T | |
| from PIL import Image | |
| from torchvision.transforms._transforms_video import NormalizeVideo | |
| from uniformer import uniformer_small, uniformer_base, uniformer_small_plus, uniformer_base_ls | |
| import gradio as gr | |
| # Device on which to run the model | |
| # Set to cuda to load on GPU | |
| device = "cpu" | |
| os.system("https://huggingface.co/Andy1621/uniformer/blob/main/uniformer_small_in1k.pth") | |
| # Pick a pretrained model | |
| model = uniformer_small() | |
| state_dict = torch.load('uniformer_small_in1k.pth', map_location='cpu') | |
| model.load_state_dict(state_dict) | |
| # Set to eval mode and move to desired device | |
| model = model.to(device) | |
| model = model.eval() | |
| os.system("wget https://huggingface.co/Andy1621/uniformer/blob/main/imagenet_class_index.json") | |
| with open("imagenet_class_index.json", "r") as f: | |
| imagenet_classnames = json.load(f) | |
| # Create an id to label name mapping | |
| imagenet_id_to_classname = {} | |
| for k, v in imagenet_classnames.items(): | |
| imagenet_id_to_classname[k] = v[1] | |
| os.system("wget https://upload.wikimedia.org/wikipedia/commons/thumb/c/c5/13-11-02-olb-by-RalfR-03.jpg/800px-13-11-02-olb-by-RalfR-03.jpg -O library.jpg") | |
| def inference(img): | |
| image = img | |
| image_transform = T.Compose( | |
| [ | |
| T.Resize(224), | |
| T.CenterCrop(224), | |
| T.ToTensor(), | |
| T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), | |
| ] | |
| ) | |
| image = image_transform(image) | |
| # The model expects inputs of shape: B x C x T x H x W | |
| image = image[None, :, None, ...] | |
| prediction = model(image, input_type="image") | |
| prediction = F.softmax(prediction, dim=1) | |
| pred_classes = prediction.topk(k=5).indices | |
| pred_class_names = [imagenet_id_to_classname[str(i.item())] for i in pred_classes[0]] | |
| return "Top 5 predicted labels: %s" % ", ".join(pred_class_names) | |
| inputs = gr.inputs.Image(type='pil') | |
| outputs = gr.outputs.Textbox(label="Output") | |
| title = "UniFormer-S" | |
| description = "Gradio demo for UniFormer: To use it, simply upload your image, or click one of the examples to load them. Read more at the links below." | |
| article = "<p style='text-align: center'><a href='https://arxiv.org/abs/2201.09450' target='_blank'>UniFormer: Unifying Convolution and Self-attention for Visual Recognition</a> | <a href='https://github.com/Sense-X/UniFormer' target='_blank'>Github Repo</a></p>" | |
| gr.Interface(inference, inputs, outputs, title=title, description=description, article=article, examples=[['library.jpg']]).launch(enable_queue=True,cache_examples=True) | |