Alexandre-Numind commited on
Commit
4c05205
·
verified ·
1 Parent(s): 30bac12

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +40 -10
README.md CHANGED
@@ -110,18 +110,48 @@ print(processor.decode(out[0].split("<answer>")[1].split("</answer>")[0], skip_s
110
  ## VLLM:
111
  ```python
112
  from PIL import Image
113
- from vllm import LLM, SamplingParameters
114
  from transformers import AutoProcessor
115
 
116
- model_id = "NM-dev/NuMarkdown-Qwen2.5-VL"
117
- llm = LLM(model=model_id, trust_remote_code=True, dtype="bfloat16")
118
- proc = AutoProcessor.from_pretrained(model_id, trust_remote_code=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
119
 
120
- img = Image.open("invoice_scan.png")
121
- prompt = proc(text="Convert this to Markdown with reasoning.", image=img,
122
- return_tensors="np") # numpy arrays for vLLM
 
123
 
124
- params = SamplingParameters(max_tokens=1024, temperature=0.8, top_p=0.95)
125
- result = llm.generate([{"prompt": prompt}], params)[0].outputs[0].text.split("<answer>")[1].split("</answer>")[0]
126
- print(result)
127
  ```
 
110
  ## VLLM:
111
  ```python
112
  from PIL import Image
113
+ from vllm import LLM, SamplingParams
114
  from transformers import AutoProcessor
115
 
116
+ model_id = "NM-dev/Qwen7B-m-5"
117
+
118
+ llm = LLM(
119
+ model=model_id,
120
+ tokenizer=model_id,
121
+ dtype="bfloat16",
122
+ gpu_memory_utilization=0.85,
123
+ max_num_seqs=256,
124
+ enforce_eager=True,
125
+ trust_remote_code=True
126
+ )
127
+
128
+ sampling_params = SamplingParams(
129
+ temperature=0.8,
130
+ max_tokens=5000,
131
+ )
132
+ processor = AutoProcessor.from_pretrained(model_id, trust_remote_code=True)
133
+
134
+ inputs = []
135
+ messages = [{
136
+ "role": "user",
137
+ "content": [
138
+ {"type": "image"},
139
+ # {"type": "text", "text": guideline},
140
+ ]
141
+ }]
142
+
143
+ prompt = proc.apply_chat_template(
144
+ messages,
145
+ tokenize=False,
146
+ add_generation_prompt=True,
147
+ )
148
+ image = Image.open("invoice.png").convert("RGB")
149
 
150
+ inputs.append({
151
+ "prompt": prompt,
152
+ "multi_modal_data": {"image": image}
153
+ })
154
 
155
+ outs = llm.generate(inputs, sampling_params)
156
+ preds = [o.outputs[0].text.strip().split("<answer>")[1].split("</answer>")[0] for o in outs]
 
157
  ```