Spaces:
Sleeping
Sleeping
put the model on CPU, because there's no GPU
Browse files
app.py
CHANGED
|
@@ -15,7 +15,7 @@ processor = AutoProcessor.from_pretrained("HuggingFaceM4/Idefics3-8B-Llama3")
|
|
| 15 |
model = Idefics3ForConditionalGeneration.from_pretrained("HuggingFaceM4/Idefics3-8B-Llama3",
|
| 16 |
torch_dtype=torch.bfloat16,
|
| 17 |
#_attn_implementation="flash_attention_2",
|
| 18 |
-
trust_remote_code=True).to("cuda")
|
| 19 |
|
| 20 |
BAD_WORDS_IDS = processor.tokenizer(["<image>", "<fake_token_around_image>"], add_special_tokens=False).input_ids
|
| 21 |
EOS_WORDS_IDS = [processor.tokenizer.eos_token_id]
|
|
@@ -50,7 +50,9 @@ def model_inference(
|
|
| 50 |
|
| 51 |
prompt = processor.apply_chat_template(resulting_messages, add_generation_prompt=True)
|
| 52 |
inputs = processor(text=prompt, images=[images], return_tensors="pt")
|
| 53 |
-
inputs = {k: v.to("cuda") for k, v in inputs.items()}
|
|
|
|
|
|
|
| 54 |
|
| 55 |
generation_args = {
|
| 56 |
"max_new_tokens": max_new_tokens,
|
|
|
|
| 15 |
model = Idefics3ForConditionalGeneration.from_pretrained("HuggingFaceM4/Idefics3-8B-Llama3",
|
| 16 |
torch_dtype=torch.bfloat16,
|
| 17 |
#_attn_implementation="flash_attention_2",
|
| 18 |
+
trust_remote_code=True)#.to("cuda")
|
| 19 |
|
| 20 |
BAD_WORDS_IDS = processor.tokenizer(["<image>", "<fake_token_around_image>"], add_special_tokens=False).input_ids
|
| 21 |
EOS_WORDS_IDS = [processor.tokenizer.eos_token_id]
|
|
|
|
| 50 |
|
| 51 |
prompt = processor.apply_chat_template(resulting_messages, add_generation_prompt=True)
|
| 52 |
inputs = processor(text=prompt, images=[images], return_tensors="pt")
|
| 53 |
+
# inputs = {k: v.to("cuda") for k, v in inputs.items()}
|
| 54 |
+
inputs = {k: v for k, v in inputs.items()}
|
| 55 |
+
|
| 56 |
|
| 57 |
generation_args = {
|
| 58 |
"max_new_tokens": max_new_tokens,
|