Instructions to use internlm/Intern-S1-Pro with libraries, inference providers, notebooks, and local apps. Follow these links to get started.

Libraries

How to use internlm/Intern-S1-Pro with Transformers:

# Use a pipeline as a high-level helper
from transformers import pipeline

pipe = pipeline("image-text-to-text", model="internlm/Intern-S1-Pro", trust_remote_code=True)
messages = [
    {
        "role": "user",
        "content": [
            {"type": "image", "url": "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/p-blog/candy.JPG"},
            {"type": "text", "text": "What animal is on the candy?"}
        ]
    },
]
pipe(text=messages)

# Load model directly
from transformers import AutoModelForCausalLM
model = AutoModelForCausalLM.from_pretrained("internlm/Intern-S1-Pro", trust_remote_code=True, dtype="auto")

Notebooks
Google Colab
Kaggle
Local Apps

vLLM

How to use internlm/Intern-S1-Pro with vLLM:

Install from pip and serve model

# Install vLLM from pip:
pip install vllm
# Start the vLLM server:
vllm serve "internlm/Intern-S1-Pro"
# Call the server using curl (OpenAI-compatible API):
curl -X POST "http://localhost:8000/v1/chat/completions" \
	-H "Content-Type: application/json" \
	--data '{
		"model": "internlm/Intern-S1-Pro",
		"messages": [
			{
				"role": "user",
				"content": [
					{
						"type": "text",
						"text": "Describe this image in one sentence."
					},
					{
						"type": "image_url",
						"image_url": {
							"url": "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg"
						}
					}
				]
			}
		]
	}'

Use Docker

docker model run hf.co/internlm/Intern-S1-Pro

SGLang

How to use internlm/Intern-S1-Pro with SGLang:

Install from pip and serve model

# Install SGLang from pip:
pip install sglang
# Start the SGLang server:
python3 -m sglang.launch_server \
    --model-path "internlm/Intern-S1-Pro" \
    --host 0.0.0.0 \
    --port 30000
# Call the server using curl (OpenAI-compatible API):
curl -X POST "http://localhost:30000/v1/chat/completions" \
	-H "Content-Type: application/json" \
	--data '{
		"model": "internlm/Intern-S1-Pro",
		"messages": [
			{
				"role": "user",
				"content": [
					{
						"type": "text",
						"text": "Describe this image in one sentence."
					},
					{
						"type": "image_url",
						"image_url": {
							"url": "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg"
						}
					}
				]
			}
		]
	}'

Use Docker images

docker run --gpus all \
    --shm-size 32g \
    -p 30000:30000 \
    -v ~/.cache/huggingface:/root/.cache/huggingface \
    --env "HF_TOKEN=<secret>" \
    --ipc=host \
    lmsysorg/sglang:latest \
    python3 -m sglang.launch_server \
        --model-path "internlm/Intern-S1-Pro" \
        --host 0.0.0.0 \
        --port 30000
# Call the server using curl (OpenAI-compatible API):
curl -X POST "http://localhost:30000/v1/chat/completions" \
	-H "Content-Type: application/json" \
	--data '{
		"model": "internlm/Intern-S1-Pro",
		"messages": [
			{
				"role": "user",
				"content": [
					{
						"type": "text",
						"text": "Describe this image in one sentence."
					},
					{
						"type": "image_url",
						"image_url": {
							"url": "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg"
						}
					}
				]
			}
		]
	}'

Docker Model Runner
How to use internlm/Intern-S1-Pro with Docker Model Runner:
```
docker model run hf.co/internlm/Intern-S1-Pro
```

yehaochen commited on Feb 12

Commit

bbe1e5b

1 Parent(s): 262600b

[test] update time series test scripts

Browse files

Files changed (2) hide show

0092638_seism.npy +3 -0
test_inference_ts.py +78 -0

0092638_seism.npy ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c2b94653c6964b630038897a27cb6d276ff866d9ecd1f6419358b9407f0df62e
+size 72128

test_inference_ts.py ADDED Viewed

	@@ -0,0 +1,78 @@

+from pathlib import Path
+import torch
+from transformers import AutoConfig, AutoModelForCausalLM, AutoProcessor
+model_path = Path(__file__).parent.resolve()
+print(f"Loading model from: {model_path}")
+# 加载模型配置
+config = AutoConfig.from_pretrained(model_path, trust_remote_code=True)
+print(f"Model config: {config.model_type}")
+print(f"Architecture: {config.architectures}")
+# 加载处理器（tokenizer + image processor + ts processor）
+print("\nLoading processor...")
+processor = AutoProcessor.from_pretrained(model_path, trust_remote_code=True)
+# 加载模型（使用 bfloat16 精度和自动设备映射）
+print("\nLoading model...")
+model = AutoModelForCausalLM.from_pretrained(
+    model_path,
+    dtype=torch.bfloat16,
+    device_map="auto",
+    # attn_implementation="flash_attention_2",  #时序暂不支持flash_attn，load加这行会报错
+    trust_remote_code=True
+)
+print(f"✓ Model loaded successfully!")
+print(f"Model type: {type(model).__name__}")
+print(f"Model device: {model.device}")
+# ============================================================================
+# 测试 3: 时序对话
+# ============================================================================
+print("\n" + "=" * 80)
+print("测试 3: 时序对话")
+print("=" * 80)
+messages = [
+    {
+        "role": "user",
+        "content": [
+            {"type": "time_series", "data": "./0092638_seism.npy", "sampling_rate": 100},
+            {"type": "text", "text": "Please determine whether an Earthquake event has occurred in the provided time-series data. If so, please specify the starting time point indices of the P-wave and S-wave in the event."},
+        ],
+    }
+]
+time_series_inputs = processor.time_series_preprocessor(messages)
+multimodal_inputs = processor.apply_chat_template(messages, add_generation_prompt=True, tokenize=True, return_dict=True, return_tensors="pt", enable_thinking=False, **time_series_inputs).to(model.device, dtype=torch.bfloat16)
+print("\n生成时序回复...")
+with torch.inference_mode():
+    multimodal_generated_ids = model.generate(
+        **multimodal_inputs,
+        max_new_tokens=200,
+        do_sample=False,
+        temperature=1.0,
+    )
+# 提取生成的 token（去除输入部分）
+multimodal_generated_ids_trimmed = [
+    out_ids[len(in_ids):] for in_ids, out_ids in zip(multimodal_inputs.input_ids, multimodal_generated_ids)
+]
+# 解码为文本
+multimodal_output = processor.batch_decode(
+    multimodal_generated_ids_trimmed,
+    skip_special_tokens=True,
+    clean_up_tokenization_spaces=False
+)
+print("\n" + "-" * 80)
+print("时序输出：")
+print("-" * 80)
+print(multimodal_output[0])
+print("-" * 80)
+print("\n✅ 时序功能测试完成！")