File size: 1,025 Bytes
b1a427a
 
 
 
 
 
 
 
 
 
 
 
 
 
31fd1a2
b1a427a
 
 
76b3196
b1a427a
 
80c2f4a
 
b1a427a
a09bc7c
 
 
b1a427a
a09bc7c
b1a427a
1be2d5d
b1a427a
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
from PIL import Image
import requests
import gradio as gr 
import torch
from loader import get_loader
import torchvision.transforms as transforms

transform = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

train_loader,dataset=get_loader(root_folder='FlickrDataset/Images',annotation_file='captions.txt',transform=transform,num_workers=2)
filepath="ImageCaptioningusingLSTM.pth"
from model import CNNtoRNN
model=CNNtoRNN(embed_size=256,hidden_size=256,vocab_size=len(dataset.vocab),num_layers=1)
model.load_state_dict(torch.load(filepath,map_location='cpu'))
model.eval()

def launch(input):
    im=Image.open(requests.get(input,stream=True).raw)
    image=transform(im.convert('RGB')).unsqueeze(0)

    output=model.caption_image(image,dataset.vocab)
    str=" ".join(output[1:-1])
    
    return str

iface=gr.Interface(launch,inputs="text",outputs="text")
iface.launch()