Instructions to use Portx/do_extractor_v1_20252102_16bit with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use Portx/do_extractor_v1_20252102_16bit with Transformers:
# Use a pipeline as a high-level helper from transformers import pipeline pipe = pipeline("image-text-to-text", model="Portx/do_extractor_v1_20252102_16bit") messages = [ { "role": "user", "content": [ {"type": "image", "url": "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/p-blog/candy.JPG"}, {"type": "text", "text": "What animal is on the candy?"} ] }, ] pipe(text=messages)# Load model directly from transformers import AutoProcessor, AutoModelForImageTextToText processor = AutoProcessor.from_pretrained("Portx/do_extractor_v1_20252102_16bit") model = AutoModelForImageTextToText.from_pretrained("Portx/do_extractor_v1_20252102_16bit") messages = [ { "role": "user", "content": [ {"type": "image", "url": "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/p-blog/candy.JPG"}, {"type": "text", "text": "What animal is on the candy?"} ] }, ] inputs = processor.apply_chat_template( messages, add_generation_prompt=True, tokenize=True, return_dict=True, return_tensors="pt", ).to(model.device) outputs = model.generate(**inputs, max_new_tokens=40) print(processor.decode(outputs[0][inputs["input_ids"].shape[-1]:])) - Notebooks
- Google Colab
- Kaggle
- Local Apps
- vLLM
How to use Portx/do_extractor_v1_20252102_16bit with vLLM:
Install from pip and serve model
# Install vLLM from pip: pip install vllm # Start the vLLM server: vllm serve "Portx/do_extractor_v1_20252102_16bit" # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:8000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "Portx/do_extractor_v1_20252102_16bit", "messages": [ { "role": "user", "content": [ { "type": "text", "text": "Describe this image in one sentence." }, { "type": "image_url", "image_url": { "url": "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg" } } ] } ] }'Use Docker
docker model run hf.co/Portx/do_extractor_v1_20252102_16bit
- SGLang
How to use Portx/do_extractor_v1_20252102_16bit with SGLang:
Install from pip and serve model
# Install SGLang from pip: pip install sglang # Start the SGLang server: python3 -m sglang.launch_server \ --model-path "Portx/do_extractor_v1_20252102_16bit" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "Portx/do_extractor_v1_20252102_16bit", "messages": [ { "role": "user", "content": [ { "type": "text", "text": "Describe this image in one sentence." }, { "type": "image_url", "image_url": { "url": "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg" } } ] } ] }'Use Docker images
docker run --gpus all \ --shm-size 32g \ -p 30000:30000 \ -v ~/.cache/huggingface:/root/.cache/huggingface \ --env "HF_TOKEN=<secret>" \ --ipc=host \ lmsysorg/sglang:latest \ python3 -m sglang.launch_server \ --model-path "Portx/do_extractor_v1_20252102_16bit" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "Portx/do_extractor_v1_20252102_16bit", "messages": [ { "role": "user", "content": [ { "type": "text", "text": "Describe this image in one sentence." }, { "type": "image_url", "image_url": { "url": "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg" } } ] } ] }' - Docker Model Runner
How to use Portx/do_extractor_v1_20252102_16bit with Docker Model Runner:
docker model run hf.co/Portx/do_extractor_v1_20252102_16bit
File size: 7,112 Bytes
3a49a5c | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 | import sys
from subprocess import run
#run("pip install unsloth", shell=True, check=True)
run("pip uninstall unsloth unsloth_zoo -y", shell=True, check=True)
run("pip install unsloth unsloth_zoo --no-cache-dir --upgrade", shell=True, check=True)
#run("pip uninstall unsloth -y && pip install --upgrade --no-cache-dir --no-deps git+https://github.com/unslothai/unsloth.git", shell=True, check=True)
run("pip install uvicorn fastapi python-multipart", shell=True, check=True)
run("pip install PyMuPDF pypdf", shell=True, check=True)
import torch
from unsloth.trainer import UnslothVisionDataCollator
from unsloth import FastVisionModel
from PIL import Image
import re
import json
from fastapi import FastAPI, HTTPException, Query, Request, File, UploadFile
from fastapi.responses import JSONResponse
from fastapi.middleware.cors import CORSMiddleware
import shutil
import os
import pymupdf
from pypdf import PdfReader
from enum import Enum
from pydantic import BaseModel, Field
from typing import Optional, Union, List, Dict, Any
if not os.path.exists('./static'): os.mkdir('./static')
import logging
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
logging.basicConfig(level=logging.DEBUG, format='%(asctime)s - %(levelname)s - %(message)s')
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model_id = "unsloth/Llama-3.2-11B-Vision-Instruct"
adapter_id = "Portx/do_extractor_v1_20252001_adapters"
class PromptSet:
main_order_information_prompt = """
You are an expert in analyzing and extracting information from freight, shipment, or delivery orders. Please carefully read the provided order file and extract the following 10 key pieces of information. Ensure that the key names are exactly as listed below. Do not create any additional key names other than these. If any information is missing or unavailable, output '-'.
#Key names and their descriptions:
1. container_number: The container number/no of the shipment (e.g., TRKU2038448, MSDU8549321). This should be an 11-character container number, with no additional format. If not available, output '-'.
2. bill_of_lading: The Bill of Lading number, which could include formats such as B/L No., AWS No., BL No., or ocean Bill of Lading (e.g., AXVJMER000008166, TRKU-10152009, HLCU ALY241000275). If not available, output '-'.
3. importing_carrier: The importing or ocean carrier, which may include SCAC codes, carrier's local agents, or sea line codes. If not available, output '-'.
4. origin_address: The address for picking up the container, such as the origin address, pickup location, terminal, or port of discharge. Exclude loading location information. (e.g., "PORT LIBERTY NY CONTAINER TERMINAL 300 WESTERN AVE"). If not available, output '-'.
5. destination_address: The address where the container is to be delivered, typically a company name or a specific delivery location (e.g., "AERO RECEIVING EAST, 2 BRICK PLANT ROAD, SOUTH RIVER, NJ"). If not available, output '-'.
6. container_weight: The weight of the container (in numeric format, e.g., 58,201.44). If there are multiple weights, output the highest value. If not available, output '-'.
7. container_weight_unit: The unit of measurement for the container's weight (e.g., LBS, KGS, KG, LB). If not available, output '-'.
8. container_type: The type/size of the container (e.g., 40HC, 20GP FCL). If not available, output '-'.
9. po_number: The purchase order number or customer’s PO (e.g., PO Number, customer’s PO, consol). If not available, output '-'.
10. reference_number: The reference number, file number, or any internal reference (e.g., reference number, our ref no.). If not available, output '-'.
#Output:
{container_number: ...,
bill_of_lading: ..,
importing_carrier: ...,
origin_address: ...,
destination_address: ...,
container_weight: ...,
container_weight_unit: ...,
container_type: ...,
po_number: ...,
reference_number: ...
}
Guidelines:
- Very important: do not make up anything. If the information of a required field is not available, output '-' for it.
- Output in JSON format. The JSON should contain the above 10 keys.
"""
order_list_prompt = "How much container are there? Give to me all container numbers only in a json array?"
multiple_container_information_prompt = "Give to me container weight, container weight unit,the container size (with type) of {query} in the same line with container_number:{query}.You must response only in a JSON format. Example output is must be 'container_number': 'OOCU6979480', 'container_type': '40HC or DV', 'weight': '46,737.52', 'weight_unit': 'LB'"
class RegexSet:
def get_all_container_array(input_response):
try:
pattern = r'\[([^\]]+)\]'
matches = re.findall(pattern, input_response)
final_response = matches[0].split(', ')
total_container_number = len(final_response)
return final_response, total_container_number
except:
return '[]', 0
def convert_one_order_information(input_response):
try:
pattern = r"'([^']+)':\s'([^']+)'"
matches = re.findall(pattern, input_response)
final_response = {match[0]: match[1] for match in matches}
return final_response
except:
return '-'
class Utils:
def base64_to_jpg(base64_string):
image_data = base64.b64decode(base64_string)
with open("./do_img.jpg", 'wb') as f:
f.write(image_data)
class EndpointHandler:
def __init__(self, path=""):
self.model, self.tokenizer = FastVisionModel.from_pretrained(model_id, token=os.getenv('HF_TOKEN'),
quantization_config=BitsAndBytesConfig(load_in_4bit=True))
self.model.load_adapter(adapter_id)
def __call__(self, data: Dict[str, bytes]) -> Dict[str, List[Any]]:
FastVisionModel.for_inference(self.model)
prompt, base64_image = data["inputs"]["text"], data["inputs"]["image"]
if prompt == "0":
final_prompt = PromptSet.main_order_information_prompt
elif prompt == "1":
final_prompt = PromptSet.order_list_prompt
else:
final_prompt = prompt
converted_image = Utils.base64_to_jpg(base64_image)
image = Image.open(converted_image)
messages = [
{"role": "user", "content": [
{"type": "image"},
{"type": "text", "text": final_prompt}
]}]
input_text = self.tokenizer.apply_chat_template(messages, add_generation_prompt = False)
inputs = self.tokenizer(image, input_text, add_special_tokens = False, return_tensors = "pt",).to("cuda")
output = self.model.generate(**inputs, max_new_tokens = 512, use_cache = True, temperature = 1.5, min_p = 0.9)
#final_output = self.tokenizer.decode(output[0][len(inputs['input_ids'][0]):], skip_special_tokens=True)
#response = RegexSet.convert_one_order_information(input_response=final_output)
return {"predictions": output} |