Spaces:
Runtime error
Runtime error
Commit
·
15ed10c
1
Parent(s):
5842ec8
added files
Browse files
app.py
CHANGED
|
@@ -61,7 +61,7 @@ model, image_processor, tokenizer = create_model_and_transforms(
|
|
| 61 |
|
| 62 |
checkpoint_path = hf_hub_download("openflamingo/OpenFlamingo-9B-vitl-mpt7b", "checkpoint.pt")
|
| 63 |
model.load_state_dict(torch.load(checkpoint_path), strict=False)
|
| 64 |
-
model.eval()
|
| 65 |
|
| 66 |
def generate(
|
| 67 |
idx,
|
|
@@ -152,7 +152,7 @@ def generate(
|
|
| 152 |
|
| 153 |
# with torch.cuda.amp.autocast(dtype=torch.bfloat16):
|
| 154 |
output = model.generate(
|
| 155 |
-
vision_x=vision_x
|
| 156 |
lang_x=input_ids.to("cuda"),
|
| 157 |
attention_mask=attention_mask.to("cuda"),
|
| 158 |
max_new_tokens=30,
|
|
|
|
| 61 |
|
| 62 |
checkpoint_path = hf_hub_download("openflamingo/OpenFlamingo-9B-vitl-mpt7b", "checkpoint.pt")
|
| 63 |
model.load_state_dict(torch.load(checkpoint_path), strict=False)
|
| 64 |
+
model.eval()
|
| 65 |
|
| 66 |
def generate(
|
| 67 |
idx,
|
|
|
|
| 152 |
|
| 153 |
# with torch.cuda.amp.autocast(dtype=torch.bfloat16):
|
| 154 |
output = model.generate(
|
| 155 |
+
vision_x=vision_x,
|
| 156 |
lang_x=input_ids.to("cuda"),
|
| 157 |
attention_mask=attention_mask.to("cuda"),
|
| 158 |
max_new_tokens=30,
|
open_flamingo/open_flamingo/src/factory.py
CHANGED
|
@@ -79,6 +79,7 @@ def create_model_and_transforms(
|
|
| 79 |
decoder_layers_attr_name = _infer_decoder_layers_attr_name(lang_encoder)
|
| 80 |
lang_encoder.set_decoder_layers_attr_name(decoder_layers_attr_name)
|
| 81 |
lang_encoder.resize_token_embeddings(len(text_tokenizer))
|
|
|
|
| 82 |
|
| 83 |
model = Flamingo(
|
| 84 |
vision_encoder,
|
|
|
|
| 79 |
decoder_layers_attr_name = _infer_decoder_layers_attr_name(lang_encoder)
|
| 80 |
lang_encoder.set_decoder_layers_attr_name(decoder_layers_attr_name)
|
| 81 |
lang_encoder.resize_token_embeddings(len(text_tokenizer))
|
| 82 |
+
lang_encoder.to(0)
|
| 83 |
|
| 84 |
model = Flamingo(
|
| 85 |
vision_encoder,
|
open_flamingo/open_flamingo/src/flamingo.py
CHANGED
|
@@ -212,7 +212,7 @@ class Flamingo(nn.Module):
|
|
| 212 |
with torch.no_grad():
|
| 213 |
vision_x = self.vision_encoder(vision_x)[1]
|
| 214 |
vision_x = rearrange(vision_x, "(b T F) v d -> b T F v d", b=b, T=T, F=F)
|
| 215 |
-
vision_x = self.perceiver(vision_x)
|
| 216 |
|
| 217 |
for layer in self.lang_encoder._get_decoder_layers():
|
| 218 |
layer.condition_vis_x(vision_x)
|
|
|
|
| 212 |
with torch.no_grad():
|
| 213 |
vision_x = self.vision_encoder(vision_x)[1]
|
| 214 |
vision_x = rearrange(vision_x, "(b T F) v d -> b T F v d", b=b, T=T, F=F)
|
| 215 |
+
vision_x = self.perceiver(vision_x).to(0)
|
| 216 |
|
| 217 |
for layer in self.lang_encoder._get_decoder_layers():
|
| 218 |
layer.condition_vis_x(vision_x)
|