sangioai commited on
Commit
f31c509
·
verified ·
1 Parent(s): e6b907d

Fix: seamless integration with 🤗 generation pipelines

Browse files

- Add **input_ids** parsing into the *generate* function, commonly used in 🤗 pipelines

Example:
```python
# Load models
tok = AutoTokenizer.from_pretrained("apple/FastVLM-7B", trust_remote_code=True)
model = AutoModelForCausalLM.from_pretrained(
"apple/FastVLM-7B",
torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
device_map="auto",
trust_remote_code=True,
)

# Build prompt
messages = [
{"role": "user", "content": "Descrube san francisco"}
]

# Tokenize
inputs = tokenizer.apply_chat_template(
messages,
add_generation_prompt=True,
tokenize=True,
return_dict=True,
return_tensors="pt",
).to(model.device)

# Generate
with torch.no_grad():
out = model.generate(
**inputs,
max_new_tokens=128,
)

print(tokenizer.decode(out[0], skip_special_tokens=True))

```

Files changed (1) hide show
  1. llava_qwen.py +1 -0
llava_qwen.py CHANGED
@@ -2186,6 +2186,7 @@ class LlavaQwen2ForCausalLM(Qwen2ForCausalLM, LlavaMetaForCausalLM):
2186
  ) -> Union[GenerateOutput, torch.LongTensor]:
2187
  position_ids = kwargs.pop("position_ids", None)
2188
  attention_mask = kwargs.pop("attention_mask", None)
 
2189
  if "inputs_embeds" in kwargs:
2190
  raise NotImplementedError("`inputs_embeds` is not supported")
2191
 
 
2186
  ) -> Union[GenerateOutput, torch.LongTensor]:
2187
  position_ids = kwargs.pop("position_ids", None)
2188
  attention_mask = kwargs.pop("attention_mask", None)
2189
+ inputs = kwargs.pop("input_ids", inputs)
2190
  if "inputs_embeds" in kwargs:
2191
  raise NotImplementedError("`inputs_embeds` is not supported")
2192