Spaces:
Runtime error
Runtime error
add prompt
Browse files
app.py
CHANGED
|
@@ -231,7 +231,7 @@ from transformers import AutoTokenizer
|
|
| 231 |
tokenizer = AutoTokenizer.from_pretrained("imthanhlv/gpt2news")
|
| 232 |
|
| 233 |
|
| 234 |
-
def inference(img, text, is_translation):
|
| 235 |
prefix_length = 10
|
| 236 |
model = ClipCaptionModel(prefix_length)
|
| 237 |
model_path = 'sat_019.pt'
|
|
@@ -247,7 +247,7 @@ def inference(img, text, is_translation):
|
|
| 247 |
with torch.no_grad():
|
| 248 |
prefix = clip_model.encode_text(text).to(device, dtype=torch.float32)
|
| 249 |
prefix_embed = model.clip_project(prefix).reshape(1, prefix_length, -1)
|
| 250 |
-
generated_text_prefix = generate_beam(model, tokenizer, embed=prefix_embed)[0]
|
| 251 |
|
| 252 |
else:
|
| 253 |
if img is None:
|
|
@@ -266,17 +266,18 @@ def inference(img, text, is_translation):
|
|
| 266 |
title = "CLIP Dual encoder"
|
| 267 |
description = "You can translate English to Vietnamese or generate Vietnamese caption from image"
|
| 268 |
examples=[
|
| 269 |
-
["examples/drug.jpg","", False],
|
| 270 |
-
["examples/harry.jpeg","", False],
|
| 271 |
-
["examples/OldTrafford.jpeg","", False],
|
| 272 |
-
["examples/coffee.jpg","", False],
|
| 273 |
-
["", "What is your name?", True]
|
| 274 |
]
|
| 275 |
|
| 276 |
inputs = [
|
| 277 |
gr.inputs.Image(type="file", label="Image to generate Vietnamese caption", optional=True),
|
| 278 |
gr.inputs.Textbox(lines=2, placeholder="English sentence for translation"),
|
| 279 |
-
gr.inputs.Checkbox()
|
|
|
|
| 280 |
]
|
| 281 |
|
| 282 |
gr.Interface(
|
|
|
|
| 231 |
tokenizer = AutoTokenizer.from_pretrained("imthanhlv/gpt2news")
|
| 232 |
|
| 233 |
|
| 234 |
+
def inference(img, text, is_translation, prompt=None):
|
| 235 |
prefix_length = 10
|
| 236 |
model = ClipCaptionModel(prefix_length)
|
| 237 |
model_path = 'sat_019.pt'
|
|
|
|
| 247 |
with torch.no_grad():
|
| 248 |
prefix = clip_model.encode_text(text).to(device, dtype=torch.float32)
|
| 249 |
prefix_embed = model.clip_project(prefix).reshape(1, prefix_length, -1)
|
| 250 |
+
generated_text_prefix = generate_beam(model, tokenizer, embed=prefix_embed, prompt=prompt)[0]
|
| 251 |
|
| 252 |
else:
|
| 253 |
if img is None:
|
|
|
|
| 266 |
title = "CLIP Dual encoder"
|
| 267 |
description = "You can translate English to Vietnamese or generate Vietnamese caption from image"
|
| 268 |
examples=[
|
| 269 |
+
["examples/drug.jpg","", False, "Một bức ảnh về"],
|
| 270 |
+
["examples/harry.jpeg","", False, "Một bức ảnh về"],
|
| 271 |
+
["examples/OldTrafford.jpeg","", False, "Một bức ảnh về"],
|
| 272 |
+
["examples/coffee.jpg","", False, "Một bức ảnh về"],
|
| 273 |
+
["", "What is your name?", True, "trong Tiếng Việt có nghĩa là"]
|
| 274 |
]
|
| 275 |
|
| 276 |
inputs = [
|
| 277 |
gr.inputs.Image(type="file", label="Image to generate Vietnamese caption", optional=True),
|
| 278 |
gr.inputs.Textbox(lines=2, placeholder="English sentence for translation"),
|
| 279 |
+
gr.inputs.Checkbox(),
|
| 280 |
+
gr.inputs.Textbox(lines=1, placeholder="Prompt [Optional]", optional=True)
|
| 281 |
]
|
| 282 |
|
| 283 |
gr.Interface(
|