| |
| import os |
| import re |
| from typing import Literal |
|
|
| os.environ['CUDA_VISIBLE_DEVICES'] = '0' |
|
|
|
|
| def draw_bbox_qwen2_vl(image, response, norm_bbox: Literal['norm1000', 'none']): |
| matches = re.findall( |
| r'<\|object_ref_start\|>(.*?)<\|object_ref_end\|><\|box_start\|>\((\d+),(\d+)\),\((\d+),(\d+)\)<\|box_end\|>', |
| response) |
| ref = [] |
| bbox = [] |
| for match_ in matches: |
| ref.append(match_[0]) |
| bbox.append(list(match_[1:])) |
| draw_bbox(image, ref, bbox, norm_bbox=norm_bbox) |
|
|
|
|
| def infer_grounding(): |
| from swift.llm import PtEngine, RequestConfig, BaseArguments, InferRequest, safe_snapshot_download |
| output_path = 'bbox.png' |
| image = load_image('http://modelscope-open.oss-cn-hangzhou.aliyuncs.com/images/animal.png') |
| infer_request = InferRequest(messages=[{'role': 'user', 'content': 'Task: Object Detection'}], images=[image]) |
|
|
| request_config = RequestConfig(max_tokens=512, temperature=0) |
| adapter_path = safe_snapshot_download('swift/test_grounding') |
| args = BaseArguments.from_pretrained(adapter_path) |
|
|
| engine = PtEngine(args.model, adapters=[adapter_path]) |
| resp_list = engine.infer([infer_request], request_config) |
| response = resp_list[0].choices[0].message.content |
| print(f'lora-response: {response}') |
|
|
| draw_bbox_qwen2_vl(image, response, norm_bbox=args.norm_bbox) |
| print(f'output_path: {output_path}') |
| image.save(output_path) |
|
|
|
|
| if __name__ == '__main__': |
| from swift.llm import draw_bbox, load_image |
| infer_grounding() |
|
|