Fix: seamless integration with 🤗 generation pipelines
Browse files- Add **input_ids** parsing into the *generate* function, commonly used in 🤗 pipelines
Example:
```python
# Load models
tok = AutoTokenizer.from_pretrained("apple/FastVLM-7B", trust_remote_code=True)
model = AutoModelForCausalLM.from_pretrained(
"apple/FastVLM-7B",
torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
device_map="auto",
trust_remote_code=True,
)
# Build prompt
messages = [
{"role": "user", "content": "Descrube san francisco"}
]
# Tokenize
inputs = tokenizer.apply_chat_template(
messages,
add_generation_prompt=True,
tokenize=True,
return_dict=True,
return_tensors="pt",
).to(model.device)
# Generate
with torch.no_grad():
out = model.generate(
**inputs,
max_new_tokens=128,
)
print(tokenizer.decode(out[0], skip_special_tokens=True))
```
- llava_qwen.py +1 -0
|
@@ -2186,6 +2186,7 @@ class LlavaQwen2ForCausalLM(Qwen2ForCausalLM, LlavaMetaForCausalLM):
|
|
| 2186 |
) -> Union[GenerateOutput, torch.LongTensor]:
|
| 2187 |
position_ids = kwargs.pop("position_ids", None)
|
| 2188 |
attention_mask = kwargs.pop("attention_mask", None)
|
|
|
|
| 2189 |
if "inputs_embeds" in kwargs:
|
| 2190 |
raise NotImplementedError("`inputs_embeds` is not supported")
|
| 2191 |
|
|
|
|
| 2186 |
) -> Union[GenerateOutput, torch.LongTensor]:
|
| 2187 |
position_ids = kwargs.pop("position_ids", None)
|
| 2188 |
attention_mask = kwargs.pop("attention_mask", None)
|
| 2189 |
+
inputs = kwargs.pop("input_ids", inputs)
|
| 2190 |
if "inputs_embeds" in kwargs:
|
| 2191 |
raise NotImplementedError("`inputs_embeds` is not supported")
|
| 2192 |
|