---
license: mit
---

```
from transformers import AutoModelForCausalLM, AutoTokenizer
from PIL import Image
import requests
import torch 

model = AutoModelForCausalLM.from_pretrained(
    "anananan116/TinyVLM",
    trust_remote_code = True,
    torch_dtype=torch.float16,
    ).to('cuda').eval()
tokenizer = AutoTokenizer.from_pretrained("anananan116/TinyVLM")

# `<IMGPLH>` is the image placeholder which will be replaced by image embeddings. 
# the number of `<IMGPLH>` should be equal to the number of input images

prompt = "Here's an image:<IMGPLH>Describe this image."
image = Image.open(requests.get('https://github.com/anananan116/TinyVLM/blob/main/test.png?raw=true',stream=True).raw)
inputs = model.prepare_input_ids_for_generation([prompt], [image], tokenizer)

with torch.no_grad():
    outputs = model.generate(
        input_ids=inputs['input_ids'].to("cuda"), 
        attention_mask=inputs['attention_mask'].to("cuda"), 
        encoded_image = inputs["encoded_image"], 
        max_new_tokens=128, 
        do_sample=True
    )

output_text = tokenizer.batch_decode(outputs, skip_special_tokens=True)
```