Update README.md
Browse files
README.md
CHANGED
|
@@ -166,47 +166,6 @@ decoded = processor.decode(generate_ids[0, inputs["input_ids"].shape[-1] :], ski
|
|
| 166 |
print(decoded)
|
| 167 |
```
|
| 168 |
|
| 169 |
-
### Requirements
|
| 170 |
-
|
| 171 |
-
To run STEP3-VL-10B efficiently, we recommend setting up a Python environment (>=3.10) with **vLLM**:
|
| 172 |
-
|
| 173 |
-
```bash
|
| 174 |
-
pip install vllm>=0.6.3
|
| 175 |
-
```
|
| 176 |
-
|
| 177 |
-
### vLLM Inference Example
|
| 178 |
-
|
| 179 |
-
Below is a minimal example to load the model and generate a response using vLLM's chat API.
|
| 180 |
-
|
| 181 |
-
```python
|
| 182 |
-
from vllm import LLM, SamplingParams
|
| 183 |
-
|
| 184 |
-
# 1. Load the model
|
| 185 |
-
# Ensure you have ~24GB VRAM for BF16 inference
|
| 186 |
-
llm = LLM(
|
| 187 |
-
model="stepfun-ai/Step3-VL-10B",
|
| 188 |
-
trust_remote_code=True,
|
| 189 |
-
gpu_memory_utilization=0.95
|
| 190 |
-
)
|
| 191 |
-
|
| 192 |
-
# 2. Prepare input (Supports local paths or URLs)
|
| 193 |
-
messages = [
|
| 194 |
-
{
|
| 195 |
-
"role": "user",
|
| 196 |
-
"content": [
|
| 197 |
-
{"type": "image", "image": "[https://modelscope.oss-cn-beijing.aliyuncs.com/resource/demo.jpg](https://modelscope.oss-cn-beijing.aliyuncs.com/resource/demo.jpg)"},
|
| 198 |
-
{"type": "text", "text": "Describe this image in detail."}
|
| 199 |
-
]
|
| 200 |
-
}
|
| 201 |
-
]
|
| 202 |
-
|
| 203 |
-
# 3. Generate
|
| 204 |
-
sampling_params = SamplingParams(temperature=0.1, max_tokens=1024)
|
| 205 |
-
outputs = llm.chat(messages=messages, sampling_params=sampling_params)
|
| 206 |
-
|
| 207 |
-
print(f"Output: {outputs[0].outputs[0].text}")
|
| 208 |
-
```
|
| 209 |
-
|
| 210 |
## 📜 Citation
|
| 211 |
|
| 212 |
If you find this project useful in your research, please cite our technical report:
|
|
|
|
| 166 |
print(decoded)
|
| 167 |
```
|
| 168 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 169 |
## 📜 Citation
|
| 170 |
|
| 171 |
If you find this project useful in your research, please cite our technical report:
|