Instructions to use TFJiangxiLab/TianJiangZhuGe_8B with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use TFJiangxiLab/TianJiangZhuGe_8B with Transformers:
# Use a pipeline as a high-level helper from transformers import pipeline pipe = pipeline("image-text-to-text", model="TFJiangxiLab/TianJiangZhuGe_8B", trust_remote_code=True) messages = [ { "role": "user", "content": [ {"type": "image", "url": "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/p-blog/candy.JPG"}, {"type": "text", "text": "What animal is on the candy?"} ] }, ] pipe(text=messages)# Load model directly from transformers import TianJiangZhuGe model = TianJiangZhuGe.from_pretrained("TFJiangxiLab/TianJiangZhuGe_8B", trust_remote_code=True, dtype="auto") - Notebooks
- Google Colab
- Kaggle
- Local Apps
- vLLM
How to use TFJiangxiLab/TianJiangZhuGe_8B with vLLM:
Install from pip and serve model
# Install vLLM from pip: pip install vllm # Start the vLLM server: vllm serve "TFJiangxiLab/TianJiangZhuGe_8B" # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:8000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "TFJiangxiLab/TianJiangZhuGe_8B", "messages": [ { "role": "user", "content": [ { "type": "text", "text": "Describe this image in one sentence." }, { "type": "image_url", "image_url": { "url": "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg" } } ] } ] }'Use Docker
docker model run hf.co/TFJiangxiLab/TianJiangZhuGe_8B
- SGLang
How to use TFJiangxiLab/TianJiangZhuGe_8B with SGLang:
Install from pip and serve model
# Install SGLang from pip: pip install sglang # Start the SGLang server: python3 -m sglang.launch_server \ --model-path "TFJiangxiLab/TianJiangZhuGe_8B" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "TFJiangxiLab/TianJiangZhuGe_8B", "messages": [ { "role": "user", "content": [ { "type": "text", "text": "Describe this image in one sentence." }, { "type": "image_url", "image_url": { "url": "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg" } } ] } ] }'Use Docker images
docker run --gpus all \ --shm-size 32g \ -p 30000:30000 \ -v ~/.cache/huggingface:/root/.cache/huggingface \ --env "HF_TOKEN=<secret>" \ --ipc=host \ lmsysorg/sglang:latest \ python3 -m sglang.launch_server \ --model-path "TFJiangxiLab/TianJiangZhuGe_8B" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "TFJiangxiLab/TianJiangZhuGe_8B", "messages": [ { "role": "user", "content": [ { "type": "text", "text": "Describe this image in one sentence." }, { "type": "image_url", "image_url": { "url": "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg" } } ] } ] }' - Docker Model Runner
How to use TFJiangxiLab/TianJiangZhuGe_8B with Docker Model Runner:
docker model run hf.co/TFJiangxiLab/TianJiangZhuGe_8B
π§ TianJiangZhuGe-8B: A Multimodal Large Language Model
π’ Developed by
Advanced Computing Research Center, Tianfu Jiangxi Laboratory
π Overview
TianJiangZhuGe-8B is a multimodal large language model (MLLM) capable of understanding both visual and textual information.
It is designed for image comprehension, visual question answering (VQA), and multiturn multimodal dialogue in both Chinese and English.
The model architecture is inspired by the InternVL3.5 framework and integrates vision encoders and a large language model through efficient alignment and instruction-tuning phases.
π§ Model Details
| Attribute | Description |
|---|---|
| Model Type | Vision-Language Multimodal Model |
| Architecture | InternVL3.5-style encoder-decoder |
| Languages | Chinese, English |
| Precision | bfloat16 |
| File Format | safetensors |
| Frameworks | PyTorch, ModelScope, Transformers |
| Primary Tasks | Image Question Answering, Optical Character Recognition, Multimodal Dialogue |
π Quick Inference Example
import math
import numpy as np
import torch
import torchvision.transforms as T
from decord import VideoReader, cpu
from PIL import Image
from torchvision.transforms.functional import InterpolationMode
from modelscope import AutoModel, AutoTokenizer
IMAGENET_MEAN = (0.485, 0.456, 0.406)
IMAGENET_STD = (0.229, 0.224, 0.225)
def build_transform(input_size):
transform = T.Compose([
T.Lambda(lambda img: img.convert('RGB') if img.mode != 'RGB' else img),
T.Resize((input_size, input_size), interpolation=InterpolationMode.BICUBIC),
T.ToTensor(),
T.Normalize(mean=IMAGENET_MEAN, std=IMAGENET_STD)
])
return transform
def find_closest_aspect_ratio(aspect_ratio, target_ratios, width, height, image_size):
best_ratio_diff = float('inf')
best_ratio = (1, 1)
area = width * height
for ratio in target_ratios:
target_aspect_ratio = ratio[0] / ratio[1]
ratio_diff = abs(aspect_ratio - target_aspect_ratio)
if ratio_diff < best_ratio_diff:
best_ratio_diff = ratio_diff
best_ratio = ratio
elif ratio_diff == best_ratio_diff:
if area > 0.5 * image_size * image_size * ratio[0] * ratio[1]:
best_ratio = ratio
return best_ratio
def dynamic_preprocess(image, min_num=1, max_num=12, image_size=448, use_thumbnail=False):
orig_width, orig_height = image.size
aspect_ratio = orig_width / orig_height
target_ratios = set(
(i, j) for n in range(min_num, max_num + 1)
for i in range(1, n + 1)
for j in range(1, n + 1)
if i * j <= max_num and i * j >= min_num)
target_ratios = sorted(target_ratios, key=lambda x: x[0] * x[1])
target_aspect_ratio = find_closest_aspect_ratio(aspect_ratio, target_ratios, orig_width, orig_height, image_size)
target_width = image_size * target_aspect_ratio[0]
target_height = image_size * target_aspect_ratio[1]
blocks = target_aspect_ratio[0] * target_aspect_ratio[1]
resized_img = image.resize((target_width, target_height))
processed_images = []
for i in range(blocks):
box = (
(i % (target_width // image_size)) * image_size,
(i // (target_width // image_size)) * image_size,
((i % (target_width // image_size)) + 1) * image_size,
((i // (target_width // image_size)) + 1) * image_size
)
split_img = resized_img.crop(box)
processed_images.append(split_img)
assert len(processed_images) == blocks
if use_thumbnail and len(processed_images) != 1:
thumbnail_img = image.resize((image_size, image_size))
processed_images.append(thumbnail_img)
return processed_images
def load_image(image_file, input_size=448, max_num=12):
image = Image.open(image_file).convert('RGB')
transform = build_transform(input_size=input_size)
images = dynamic_preprocess(image, image_size=input_size, use_thumbnail=True, max_num=max_num)
pixel_values = [transform(im) for im in images]
pixel_values = torch.stack(pixel_values)
return pixel_values
# -------------------------------------------
# Load model & tokenizer
path = "tianfu-lab/TianJiangZhuGe-8B" # Replace with your HF repo name
model = AutoModel.from_pretrained(
path,
torch_dtype=torch.bfloat16,
trust_remote_code=True,
device_map="auto"
).eval()
tokenizer = AutoTokenizer.from_pretrained(path, trust_remote_code=True, use_fast=False)
# Load example image
pixel_values = load_image("example_image.jpg", max_num=12).to(torch.bfloat16).cuda()
generation_config = dict(max_new_tokens=1024, do_sample=True)
question = "<image>\nWhat is shown in this picture?"
response = model.chat(tokenizer, pixel_values, question, generation_config)
print(f"User: {question}\nAssistant: {response}")
π§ Example Capabilities
| Task | Description |
|---|---|
| Image Captioning | Generate a natural language description for an image |
| Visual Question Answering (VQA) | Answer open-ended questions about images |
| Multimodal Dialogue | Conduct context-aware conversations conditioned on visual input |
| OCR-based Reasoning | Understand and reason over textual contents in images |
βοΈ License
This model is released for research and educational purposes only.
Commercial use of the model or its derivatives is prohibited without explicit permission.
- Downloads last month
- 2