Instructions to use OS-Copilot/OS-Atlas-Pro-7B with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use OS-Copilot/OS-Atlas-Pro-7B with Transformers:
# Use a pipeline as a high-level helper from transformers import pipeline pipe = pipeline("image-text-to-text", model="OS-Copilot/OS-Atlas-Pro-7B") messages = [ { "role": "user", "content": [ {"type": "image", "url": "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/p-blog/candy.JPG"}, {"type": "text", "text": "What animal is on the candy?"} ] }, ] pipe(text=messages)# Load model directly from transformers import AutoProcessor, AutoModelForImageTextToText processor = AutoProcessor.from_pretrained("OS-Copilot/OS-Atlas-Pro-7B") model = AutoModelForImageTextToText.from_pretrained("OS-Copilot/OS-Atlas-Pro-7B") messages = [ { "role": "user", "content": [ {"type": "image", "url": "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/p-blog/candy.JPG"}, {"type": "text", "text": "What animal is on the candy?"} ] }, ] inputs = processor.apply_chat_template( messages, add_generation_prompt=True, tokenize=True, return_dict=True, return_tensors="pt", ).to(model.device) outputs = model.generate(**inputs, max_new_tokens=40) print(processor.decode(outputs[0][inputs["input_ids"].shape[-1]:])) - Notebooks
- Google Colab
- Kaggle
- Local Apps
- vLLM
How to use OS-Copilot/OS-Atlas-Pro-7B with vLLM:
Install from pip and serve model
# Install vLLM from pip: pip install vllm # Start the vLLM server: vllm serve "OS-Copilot/OS-Atlas-Pro-7B" # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:8000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "OS-Copilot/OS-Atlas-Pro-7B", "messages": [ { "role": "user", "content": [ { "type": "text", "text": "Describe this image in one sentence." }, { "type": "image_url", "image_url": { "url": "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg" } } ] } ] }'Use Docker
docker model run hf.co/OS-Copilot/OS-Atlas-Pro-7B
- SGLang
How to use OS-Copilot/OS-Atlas-Pro-7B with SGLang:
Install from pip and serve model
# Install SGLang from pip: pip install sglang # Start the SGLang server: python3 -m sglang.launch_server \ --model-path "OS-Copilot/OS-Atlas-Pro-7B" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "OS-Copilot/OS-Atlas-Pro-7B", "messages": [ { "role": "user", "content": [ { "type": "text", "text": "Describe this image in one sentence." }, { "type": "image_url", "image_url": { "url": "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg" } } ] } ] }'Use Docker images
docker run --gpus all \ --shm-size 32g \ -p 30000:30000 \ -v ~/.cache/huggingface:/root/.cache/huggingface \ --env "HF_TOKEN=<secret>" \ --ipc=host \ lmsysorg/sglang:latest \ python3 -m sglang.launch_server \ --model-path "OS-Copilot/OS-Atlas-Pro-7B" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "OS-Copilot/OS-Atlas-Pro-7B", "messages": [ { "role": "user", "content": [ { "type": "text", "text": "Describe this image in one sentence." }, { "type": "image_url", "image_url": { "url": "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg" } } ] } ] }' - Docker Model Runner
How to use OS-Copilot/OS-Atlas-Pro-7B with Docker Model Runner:
docker model run hf.co/OS-Copilot/OS-Atlas-Pro-7B
[Error in of repo inference] ValueError: size must contain 'shortest_edge' and 'longest_edge' keys.
When I run the inference example it fails to load processor it fails with error
ValueError Traceback (most recent call last)
Cell In[4], line 1
----> 1 processor = AutoProcessor.from_pretrained("OS-Copilot/OS-Atlas-Base-7B")
File /venv/main/lib/python3.12/site-packages/transformers/models/auto/processing_auto.py:376, in AutoProcessor.from_pretrained(cls, pretrained_model_name_or_path, **kwargs)
372 return processor_class.from_pretrained(
373 pretrained_model_name_or_path, trust_remote_code=trust_remote_code, **kwargs
374 )
375 elif processor_class is not None:
--> 376 return processor_class.from_pretrained(
377 pretrained_model_name_or_path, trust_remote_code=trust_remote_code, **kwargs
378 )
379 # Last try: we use the PROCESSOR_MAPPING.
380 elif type(config) in PROCESSOR_MAPPING:
File /venv/main/lib/python3.12/site-packages/transformers/processing_utils.py:1185, in ProcessorMixin.from_pretrained(cls, pretrained_model_name_or_path, cache_dir, force_download, local_files_only, token, revision, **kwargs)
1182 if token is not None:
1183 kwargs["token"] = token
-> 1185 args = cls._get_arguments_from_pretrained(pretrained_model_name_or_path, **kwargs)
1186 processor_dict, kwargs = cls.get_processor_dict(pretrained_model_name_or_path, **kwargs)
1187 return cls.from_args_and_dict(args, processor_dict, **kwargs)
File /venv/main/lib/python3.12/site-packages/transformers/processing_utils.py:1248, in ProcessorMixin._get_arguments_from_pretrained(cls, pretrained_model_name_or_path, **kwargs)
1245 else:
1246 attribute_class = cls.get_possibly_dynamic_module(class_name)
-> 1248 args.append(attribute_class.from_pretrained(pretrained_model_name_or_path, **kwargs))
1249 return args
File /venv/main/lib/python3.12/site-packages/transformers/models/auto/image_processing_auto.py:564, in AutoImageProcessor.from_pretrained(cls, pretrained_model_name_or_path, *inputs, **kwargs)
562 return image_processor_class.from_dict(config_dict, **kwargs)
563 elif image_processor_class is not None:
--> 564 return image_processor_class.from_dict(config_dict, **kwargs)
565 # Last try: we use the IMAGE_PROCESSOR_MAPPING.
566 elif type(config) in IMAGE_PROCESSOR_MAPPING:
File /venv/main/lib/python3.12/site-packages/transformers/image_processing_base.py:422, in ImageProcessingMixin.from_dict(cls, image_processor_dict, **kwargs)
419 if "crop_size" in kwargs and "crop_size" in image_processor_dict:
420 image_processor_dict["crop_size"] = kwargs.pop("crop_size")
--> 422 image_processor = cls(**image_processor_dict)
424 # Update image_processor with kwargs if needed
425 to_remove = []
File /venv/main/lib/python3.12/site-packages/transformers/models/qwen2_vl/image_processing_qwen2_vl.py:143, in Qwen2VLImageProcessor.init(self, do_resize, size, resample, do_rescale, rescale_factor, do_normalize, image_mean, image_std, do_convert_rgb, min_pixels, max_pixels, patch_size, temporal_patch_size, merge_size, **kwargs)
141 super().init(**kwargs)
142 if size is not None and ("shortest_edge" not in size or "longest_edge" not in size):
--> 143 raise ValueError("size must contain 'shortest_edge' and 'longest_edge' keys.")
144 else:
145 size = {"shortest_edge": 56 * 56, "longest_edge": 28 * 28 * 1280}
ValueError: size must contain 'shortest_edge' and 'longest_edge' keys.