MARC
Collection
2 items
•
Updated
MARC (Memory-Augmented RL Token Compression), accepted at ICLR 2026.
from transformers import Qwen2_5_VLForConditionalGeneration, AutoProcessor
import torch
# Load model
model = Qwen2_5_VLForConditionalGeneration.from_pretrained(
"path/to/model",
torch_dtype=torch.bfloat16,
device_map="auto"
)
processor = AutoProcessor.from_pretrained("path/to/model")
# Prepare video input
messages = [{
"role": "user",
"content": [
{"type": "video", "video": "path/to/video.mp4"},
{"type": "text", "text": "What is happening in this video?"}
]
}]
# Generate with compression
inputs = processor(
messages=messages,
videos=videos,
compress=True, # Enable compression
return_tensors="pt"
).to("cuda")
outputs = model.generate(**inputs, compress=True, max_new_tokens=512)
response = processor.decode(outputs[0], skip_special_tokens=True)
See inference_script/inference_example.py for a complete example.