Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
|
@@ -56,8 +56,6 @@ model2.config.id2label = mapping
|
|
| 56 |
model3 = models.resnet101(pretrained=False)
|
| 57 |
model3.fc = torch.nn.Linear(2048, len(genres))
|
| 58 |
|
| 59 |
-
device = torch.device('cpu')
|
| 60 |
-
|
| 61 |
|
| 62 |
# In[5]:
|
| 63 |
|
|
@@ -94,19 +92,19 @@ class Multimodal(torch.nn.Module):
|
|
| 94 |
|
| 95 |
model = Multimodal(model1, model2, model3)
|
| 96 |
model.load_state_dict(torch.load('multimodel.pt', map_location=torch.device('cpu')))
|
| 97 |
-
model.to(device)
|
| 98 |
model.eval()
|
| 99 |
-
|
| 100 |
-
model_gen.eval()
|
| 101 |
|
| 102 |
|
| 103 |
# In[8]:
|
| 104 |
|
| 105 |
|
| 106 |
-
def generate_plot(title: str, model: AutoModelForSeq2SeqLM, tokenizer: AutoTokenizer) -> str:
|
| 107 |
quote = 'What is the story of the movie {}?'
|
|
|
|
|
|
|
| 108 |
|
| 109 |
-
input_ids = tokenizer(quote.format(title), return_tensors='pt')
|
| 110 |
output = model.generate(input_ids, max_length=256, do_sample=True, temperature=0.09)
|
| 111 |
return tokenizer.decode(output[0], skip_special_tokens=True)
|
| 112 |
|
|
@@ -117,24 +115,25 @@ def generate_plot(title: str, model: AutoModelForSeq2SeqLM, tokenizer: AutoToken
|
|
| 117 |
def inference(title, image,
|
| 118 |
tokenizer1=tokenizer1, tokenizer2=tokenizer2, tokenizer_gen=tokenizer_gen,
|
| 119 |
model_gen=model_gen, model=model,
|
| 120 |
-
genres=genres):
|
| 121 |
title_input = tokenizer1(title, return_tensors='pt', padding=True, truncation=True)
|
| 122 |
-
title_input_ids = title_input['input_ids']
|
| 123 |
-
title_attention_mask = title_input['attention_mask']
|
| 124 |
|
| 125 |
-
plot = generate_plot(title, model_gen, tokenizer_gen)
|
| 126 |
plot_input = tokenizer2(plot, return_tensors='pt', padding=True, truncation=True)
|
| 127 |
-
plot_input_ids = plot_input['input_ids']
|
| 128 |
-
plot_attention_mask = plot_input['attention_mask']
|
| 129 |
|
| 130 |
# If image is not uploaded
|
| 131 |
if image is None:
|
| 132 |
-
image_input = torch.zeros((1, 3, 224, 224))
|
| 133 |
|
| 134 |
else:
|
| 135 |
image_input = image.resize((224, 224))
|
| 136 |
image_input = v2.ToTensor()(image_input)
|
| 137 |
image_input = image_input.unsqueeze(0)
|
|
|
|
| 138 |
|
| 139 |
output = model(title_input_ids, title_attention_mask, plot_input_ids, plot_attention_mask, image_input)
|
| 140 |
output = torch.sigmoid(output)
|
|
@@ -163,3 +162,4 @@ app = gr.Interface(fn=inference, inputs=["text", "pil"], outputs="text", title="
|
|
| 163 |
|
| 164 |
|
| 165 |
app.launch(share=True)
|
|
|
|
|
|
| 56 |
model3 = models.resnet101(pretrained=False)
|
| 57 |
model3.fc = torch.nn.Linear(2048, len(genres))
|
| 58 |
|
|
|
|
|
|
|
| 59 |
|
| 60 |
# In[5]:
|
| 61 |
|
|
|
|
| 92 |
|
| 93 |
model = Multimodal(model1, model2, model3)
|
| 94 |
model.load_state_dict(torch.load('multimodel.pt', map_location=torch.device('cpu')))
|
|
|
|
| 95 |
model.eval()
|
| 96 |
+
device = torch.device('cpu')
|
|
|
|
| 97 |
|
| 98 |
|
| 99 |
# In[8]:
|
| 100 |
|
| 101 |
|
| 102 |
+
def generate_plot(title: str, model: AutoModelForSeq2SeqLM, tokenizer: AutoTokenizer, device) -> str:
|
| 103 |
quote = 'What is the story of the movie {}?'
|
| 104 |
+
model_gen.to(device)
|
| 105 |
+
model_gen.eval()
|
| 106 |
|
| 107 |
+
input_ids = tokenizer(quote.format(title), return_tensors='pt').input_ids.to(device)
|
| 108 |
output = model.generate(input_ids, max_length=256, do_sample=True, temperature=0.09)
|
| 109 |
return tokenizer.decode(output[0], skip_special_tokens=True)
|
| 110 |
|
|
|
|
| 115 |
def inference(title, image,
|
| 116 |
tokenizer1=tokenizer1, tokenizer2=tokenizer2, tokenizer_gen=tokenizer_gen,
|
| 117 |
model_gen=model_gen, model=model,
|
| 118 |
+
genres=genres, device=device):
|
| 119 |
title_input = tokenizer1(title, return_tensors='pt', padding=True, truncation=True)
|
| 120 |
+
title_input_ids = title_input['input_ids'].to(device)
|
| 121 |
+
title_attention_mask = title_input['attention_mask'].to(device)
|
| 122 |
|
| 123 |
+
plot = generate_plot(title, model_gen, tokenizer_gen, device)
|
| 124 |
plot_input = tokenizer2(plot, return_tensors='pt', padding=True, truncation=True)
|
| 125 |
+
plot_input_ids = plot_input['input_ids'].to(device)
|
| 126 |
+
plot_attention_mask = plot_input['attention_mask'].to(device)
|
| 127 |
|
| 128 |
# If image is not uploaded
|
| 129 |
if image is None:
|
| 130 |
+
image_input = torch.zeros((1, 3, 224, 224)).to(device)
|
| 131 |
|
| 132 |
else:
|
| 133 |
image_input = image.resize((224, 224))
|
| 134 |
image_input = v2.ToTensor()(image_input)
|
| 135 |
image_input = image_input.unsqueeze(0)
|
| 136 |
+
image_input = image_input.to(device)
|
| 137 |
|
| 138 |
output = model(title_input_ids, title_attention_mask, plot_input_ids, plot_attention_mask, image_input)
|
| 139 |
output = torch.sigmoid(output)
|
|
|
|
| 162 |
|
| 163 |
|
| 164 |
app.launch(share=True)
|
| 165 |
+
|