1f commited on Jun 7, 2025

Commit

e2cfb48

verified ·

1 Parent(s): ff53362

Add files using upload-large-folder tool

Browse files

Files changed (20) hide show

r1-a/response_generation/minicpm/MiniCPM-o/.github/ISSUE_TEMPLATE/bug_report.yaml +89 -0
r1-a/response_generation/minicpm/MiniCPM-o/.github/ISSUE_TEMPLATE/config.yaml +1 -0
r1-a/response_generation/minicpm/MiniCPM-o/.github/ISSUE_TEMPLATE/feature_request.yaml +78 -0
r1-a/response_generation/minicpm/MiniCPM-o/.github/ISSUE_TEMPLATE/llamacpp.yaml +78 -0
r1-a/response_generation/minicpm/MiniCPM-o/.github/ISSUE_TEMPLATE/ollama.yaml +78 -0
r1-a/response_generation/minicpm/MiniCPM-o/.github/ISSUE_TEMPLATE/vllm.yaml +78 -0
r1-a/response_generation/minicpm/MiniCPM-o/.vscode/settings.json +5 -0
r1-a/response_generation/minicpm/MiniCPM-o/assets/discord.png +0 -0
r1-a/response_generation/minicpm/MiniCPM-o/assets/logo.html +3 -0
r1-a/response_generation/minicpm/MiniCPM-o/assets/minicpm-llama-v-2-5_languages.md +176 -0
r1-a/response_generation/minicpm/MiniCPM-o/assets/minicpm-o-group.jpeg +0 -0
r1-a/response_generation/minicpm/MiniCPM-o/assets/minicpm-v24.png +0 -0
r1-a/response_generation/minicpm/MiniCPM-o/assets/minicpm-v25.png +0 -0
r1-a/response_generation/minicpm/MiniCPM-o/assets/minicpm-v26.png +0 -0
r1-a/response_generation/minicpm/MiniCPM-o/assets/minicpmv-omnilmm.png +0 -0
r1-a/response_generation/minicpm/MiniCPM-o/assets/minicpmv.png +0 -0
r1-a/response_generation/minicpm/MiniCPM-o/assets/modelbest.png +0 -0
r1-a/response_generation/minicpm/MiniCPM-o/chat.py +293 -0
r1-a/response_generation/minicpm/MiniCPM-o/requirements.txt +35 -0
r1-a/response_generation/minicpm/MiniCPM-o/requirements_o2.6.txt +22 -0

r1-a/response_generation/minicpm/MiniCPM-o/.github/ISSUE_TEMPLATE/bug_report.yaml ADDED Viewed

	@@ -0,0 +1,89 @@

+name: 🐞 Bug
+description: 提交错误报告 | File a bug/issue
+title: "[BUG] <title>"
+labels: []
+body:
+  - type: checkboxes
+    attributes:
+      label: 是否已有关于该错误的issue或讨论？ | Is there an existing issue / discussion for this?
+      description: |
+        请先搜索您遇到的错误是否在已有的issues或讨论中提到过。
+        Please search to see if an issue / discussion already exists for the bug you encountered.
+        [Issues](https://github.com/OpenBMB/MiniCPM-V/issues)
+        [Discussions](https://github.com/OpenBMB/MiniCPM-V/discussions)
+      options:
+        - label: 我已经搜索过已有的issues和讨论 | I have searched the existing issues / discussions
+          required: true
+  - type: checkboxes
+    attributes:
+      label: 该问题是否在FAQ中有解答？ | Is there an existing answer for this in FAQ?
+      description: |
+        请先搜索您遇到的错误是否已在FAQ中有相关解答。
+        Please search to see if an answer already exists in FAQ for the bug you encountered.
+        [FAQ-en](https://github.com/OpenBMB/MiniCPM-V/blob/main/FAQ.md)
+        [FAQ-zh](https://github.com/OpenBMB/MiniCPM-V/blob/main/FAQ_zh.md)
+      options:
+        - label: 我已经搜索过FAQ | I have searched FAQ
+          required: true
+  - type: textarea
+    attributes:
+      label: 当前行为 | Current Behavior
+      description: |
+        准确描述遇到的行为。
+        A concise description of what you're experiencing.
+    validations:
+      required: false
+  - type: textarea
+    attributes:
+      label: 期望行为 | Expected Behavior
+      description: |
+        准确描述预期的行为。
+        A concise description of what you expected to happen.
+    validations:
+      required: false
+  - type: textarea
+    attributes:
+      label: 复现方法 | Steps To Reproduce
+      description: |
+        复现当前行为的详细步骤。
+        Steps to reproduce the behavior.
+      placeholder: |
+        1. In this environment...
+        2. With this config...
+        3. Run '...'
+        4. See error...
+    validations:
+      required: false
+  - type: textarea
+    attributes:
+      label: 运行环境 | Environment
+      description: |
+        examples:
+          - **OS**: Ubuntu 20.04
+          - **Python**: 3.8
+          - **Transformers**: 4.31.0
+          - **PyTorch**: 2.0.1
+          - **CUDA**: 11.4
+      value: |
+        - OS:
+        - Python:
+        - Transformers:
+        - PyTorch:
+        - CUDA (`python -c 'import torch; print(torch.version.cuda)'`):
+      render: Markdown
+    validations:
+      required: false
+  - type: textarea
+    attributes:
+      label: 备注 | Anything else?
+      description: |
+        您可以在这里补充其他关于该问题背景信息的描述、链接或引用等。
+        您可以通过点击高亮此区域然后拖动文件的方式上传图片或日志文件。
+        Links? References? Anything that will give us more context about the issue you are encountering!
+        Tip: You can attach images or log files by clicking this area to highlight it and then dragging files in.
+    validations:
+      required: false

r1-a/response_generation/minicpm/MiniCPM-o/.github/ISSUE_TEMPLATE/config.yaml ADDED Viewed

	@@ -0,0 +1 @@


1	+ blank_issues_enabled: true

r1-a/response_generation/minicpm/MiniCPM-o/.github/ISSUE_TEMPLATE/feature_request.yaml ADDED Viewed

	@@ -0,0 +1,78 @@

+name: "💡 Feature Request"
+description: 创建新功能请求 | Create a new ticket for a new feature request
+title: "💡 [REQUEST] - <title>"
+labels: [
+  "question"
+]
+body:
+  - type: input
+    id: start_date
+    attributes:
+      label: "起始日期 | Start Date"
+      description: |
+        起始开发日期
+        Start of development
+      placeholder: "month/day/year"
+    validations:
+      required: false
+  - type: textarea
+    id: implementation_pr
+    attributes:
+      label: "实现PR | Implementation PR"
+      description: |
+        实现该功能的Pull request
+        Pull request used
+      placeholder: "#Pull Request ID"
+    validations:
+      required: false
+  - type: textarea
+    id: reference_issues
+    attributes:
+      label: "相关Issues | Reference Issues"
+      description: |
+        与该功能相关的issues
+        Common issues
+      placeholder: "#Issues IDs"
+    validations:
+      required: false
+  - type: textarea
+    id: summary
+    attributes:
+      label: "摘要 | Summary"
+      description: |
+        简要描述新功能的特点
+        Provide a brief explanation of the feature
+      placeholder: |
+        Describe in a few lines your feature request
+    validations:
+      required: true
+  - type: textarea
+    id: basic_example
+    attributes:
+      label: "基本示例 | Basic Example"
+      description: Indicate here some basic examples of your feature.
+      placeholder: A few specific words about your feature request.
+    validations:
+      required: true
+  - type: textarea
+    id: drawbacks
+    attributes:
+      label: "缺陷 | Drawbacks"
+      description: |
+        该新功能有哪些缺陷/可能造成哪些影响？
+        What are the drawbacks/impacts of your feature request ?
+      placeholder: |
+        Identify the drawbacks and impacts while being neutral on your feature request
+    validations:
+      required: true
+  - type: textarea
+    id: unresolved_question
+    attributes:
+      label: "未解决问题 | Unresolved questions"
+      description: |
+        有哪些尚未解决的问题？
+        What questions still remain unresolved ?
+      placeholder: |
+        Identify any unresolved issues.
+    validations:
+      required: false

r1-a/response_generation/minicpm/MiniCPM-o/.github/ISSUE_TEMPLATE/llamacpp.yaml ADDED Viewed

	@@ -0,0 +1,78 @@

+name: "llamacpp issue"
+description: 创建新功能请求 | Create a new ticket for a new feature request
+title: "[llamacpp] - <title>"
+labels: [
+  "question"
+]
+body:
+  - type: input
+    id: start_date
+    attributes:
+      label: "起始日期 | Start Date"
+      description: |
+        起始开发日期
+        Start of development
+      placeholder: "month/day/year"
+    validations:
+      required: false
+  - type: textarea
+    id: implementation_pr
+    attributes:
+      label: "实现PR | Implementation PR"
+      description: |
+        实现该功能的Pull request
+        Pull request used
+      placeholder: "#Pull Request ID"
+    validations:
+      required: false
+  - type: textarea
+    id: reference_issues
+    attributes:
+      label: "相关Issues | Reference Issues"
+      description: |
+        与该功能相关的issues
+        Common issues
+      placeholder: "#Issues IDs"
+    validations:
+      required: false
+  - type: textarea
+    id: summary
+    attributes:
+      label: "摘要 | Summary"
+      description: |
+        简要描述新功能的特点
+        Provide a brief explanation of the feature
+      placeholder: |
+        Describe in a few lines your feature request
+    validations:
+      required: true
+  - type: textarea
+    id: basic_example
+    attributes:
+      label: "基本示例 | Basic Example"
+      description: Indicate here some basic examples of your feature.
+      placeholder: A few specific words about your feature request.
+    validations:
+      required: true
+  - type: textarea
+    id: drawbacks
+    attributes:
+      label: "缺陷 | Drawbacks"
+      description: |
+        该新功能有哪些缺陷/可能造成哪些影响？
+        What are the drawbacks/impacts of your feature request ?
+      placeholder: |
+        Identify the drawbacks and impacts while being neutral on your feature request
+    validations:
+      required: true
+  - type: textarea
+    id: unresolved_question
+    attributes:
+      label: "未解决问题 | Unresolved questions"
+      description: |
+        有哪些尚未解决的问题？
+        What questions still remain unresolved ?
+      placeholder: |
+        Identify any unresolved issues.
+    validations:
+      required: false

r1-a/response_generation/minicpm/MiniCPM-o/.github/ISSUE_TEMPLATE/ollama.yaml ADDED Viewed

	@@ -0,0 +1,78 @@

+name: "ollama issue"
+description: 创建新功能请求 | Create a new ticket for a new feature request
+title: "[ollama] - <title>"
+labels: [
+  "question"
+]
+body:
+  - type: input
+    id: start_date
+    attributes:
+      label: "起始日期 | Start Date"
+      description: |
+        起始开发日期
+        Start of development
+      placeholder: "month/day/year"
+    validations:
+      required: false
+  - type: textarea
+    id: implementation_pr
+    attributes:
+      label: "实现PR | Implementation PR"
+      description: |
+        实现该功能的Pull request
+        Pull request used
+      placeholder: "#Pull Request ID"
+    validations:
+      required: false
+  - type: textarea
+    id: reference_issues
+    attributes:
+      label: "相关Issues | Reference Issues"
+      description: |
+        与该功能相关的issues
+        Common issues
+      placeholder: "#Issues IDs"
+    validations:
+      required: false
+  - type: textarea
+    id: summary
+    attributes:
+      label: "摘要 | Summary"
+      description: |
+        简要描述新功能的特点
+        Provide a brief explanation of the feature
+      placeholder: |
+        Describe in a few lines your feature request
+    validations:
+      required: true
+  - type: textarea
+    id: basic_example
+    attributes:
+      label: "基本示例 | Basic Example"
+      description: Indicate here some basic examples of your feature.
+      placeholder: A few specific words about your feature request.
+    validations:
+      required: true
+  - type: textarea
+    id: drawbacks
+    attributes:
+      label: "缺陷 | Drawbacks"
+      description: |
+        该新功能有哪些缺陷/可能造成哪些影响？
+        What are the drawbacks/impacts of your feature request ?
+      placeholder: |
+        Identify the drawbacks and impacts while being neutral on your feature request
+    validations:
+      required: true
+  - type: textarea
+    id: unresolved_question
+    attributes:
+      label: "未解决问题 | Unresolved questions"
+      description: |
+        有哪些尚未解决的问题？
+        What questions still remain unresolved ?
+      placeholder: |
+        Identify any unresolved issues.
+    validations:
+      required: false

r1-a/response_generation/minicpm/MiniCPM-o/.github/ISSUE_TEMPLATE/vllm.yaml ADDED Viewed

	@@ -0,0 +1,78 @@

+name: "vllm issue"
+description: 创建新功能请求 | Create a new ticket for a new feature request
+title: "[vllm] - <title>"
+labels: [
+  "question"
+]
+body:
+  - type: input
+    id: start_date
+    attributes:
+      label: "起始日期 | Start Date"
+      description: |
+        起始开发日期
+        Start of development
+      placeholder: "month/day/year"
+    validations:
+      required: false
+  - type: textarea
+    id: implementation_pr
+    attributes:
+      label: "实现PR | Implementation PR"
+      description: |
+        实现该功能的Pull request
+        Pull request used
+      placeholder: "#Pull Request ID"
+    validations:
+      required: false
+  - type: textarea
+    id: reference_issues
+    attributes:
+      label: "相关Issues | Reference Issues"
+      description: |
+        与该功能相关的issues
+        Common issues
+      placeholder: "#Issues IDs"
+    validations:
+      required: false
+  - type: textarea
+    id: summary
+    attributes:
+      label: "摘要 | Summary"
+      description: |
+        简要描述新功能的特点
+        Provide a brief explanation of the feature
+      placeholder: |
+        Describe in a few lines your feature request
+    validations:
+      required: true
+  - type: textarea
+    id: basic_example
+    attributes:
+      label: "基本示例 | Basic Example"
+      description: Indicate here some basic examples of your feature.
+      placeholder: A few specific words about your feature request.
+    validations:
+      required: true
+  - type: textarea
+    id: drawbacks
+    attributes:
+      label: "缺陷 | Drawbacks"
+      description: |
+        该新功能有哪些缺陷/可能造成哪些影响？
+        What are the drawbacks/impacts of your feature request ?
+      placeholder: |
+        Identify the drawbacks and impacts while being neutral on your feature request
+    validations:
+      required: true
+  - type: textarea
+    id: unresolved_question
+    attributes:
+      label: "未解决问题 | Unresolved questions"
+      description: |
+        有哪些尚未解决的问题？
+        What questions still remain unresolved ?
+      placeholder: |
+        Identify any unresolved issues.
+    validations:
+      required: false

r1-a/response_generation/minicpm/MiniCPM-o/.vscode/settings.json ADDED Viewed

	@@ -0,0 +1,5 @@

+{
+    "githubPullRequests.ignoredPullRequestBranches": [
+        "main"
+    ]
+}

r1-a/response_generation/minicpm/MiniCPM-o/assets/discord.png ADDED Viewed

r1-a/response_generation/minicpm/MiniCPM-o/assets/logo.html ADDED Viewed

	@@ -0,0 +1,3 @@

+<span style="color:#56A7DA; font-size: 10em; font-weight: bold;">
+    MiniCPM-<span>o</span>
+</span>

r1-a/response_generation/minicpm/MiniCPM-o/assets/minicpm-llama-v-2-5_languages.md ADDED Viewed

	@@ -0,0 +1,176 @@

+- English
+- 中文
+- 한국어
+- 日本語
+- Deutsch
+- Français
+- Português
+- Español
+- မြန်မာဘာသာ
+- ไทย
+- Tiếng Việt
+- Türkçe
+- ܣܘܪܝܝܐ
+- العربية
+- हिन्दी
+- বাংলা
+- नेपाली
+- Türkmençe
+- Тоҷикӣ
+- Кыргызча
+- Русский
+- Українська
+- Беларуская
+- ქართული
+- Azərbaycanca
+- Հայերեն
+- Polski
+- Lietuvių
+- Eesti
+- Latviešu
+- Čeština
+- Slovenčina
+- Magyar
+- Slovenščina
+- Hrvatski
+- Bosanski
+- Crnogorski
+- Српски
+- Shqip
+- Română
+- Български
+- Македонски
+## 支持语言
+英语
+中文
+韩语
+日语
+德语
+法语
+葡萄牙语
+西班牙语
+缅甸语
+泰语
+越南语
+土耳其语
+叙利亚语
+阿拉伯语
+印地语
+孟加拉语
+尼泊尔语
+土库曼语
+塔吉克语
+吉尔吉斯语
+俄语
+乌克兰语
+白俄罗斯语
+格鲁吉亚语
+阿塞拜疆语
+亚美尼亚语
+波兰语
+立陶宛语
+爱沙尼亚语
+拉脱维亚语
+捷克语
+斯洛伐克语
+匈牙利语
+斯洛文尼亚语
+克罗地亚语
+波斯尼亚语
+黑山语
+塞尔维亚语
+阿尔巴尼亚语
+罗马尼亚语
+保加利亚
+马其顿语
+## Supported Languages
+English
+Chinese
+Korean
+Japanese
+German
+French
+Portuguese
+Spanish
+Burmese
+Thai
+Vietnamese
+Turkish
+Syriac
+Arabic
+Hindi
+Bengali
+Nepali
+Turkmen
+Tajik
+Kyrgyz
+Russian
+Ukrainian
+Belarusian
+Georgian
+Azerbaijani
+Armenian
+Polish
+Lithuanian
+Estonian
+Latvian
+Czech
+Slovak
+Hungarian
+Slovenian
+Croatian
+Bosnian
+Montenegrin
+Serbian
+Albanian
+Romanian
+Bulgarian
+Macedonian

r1-a/response_generation/minicpm/MiniCPM-o/assets/minicpm-o-group.jpeg ADDED Viewed

r1-a/response_generation/minicpm/MiniCPM-o/assets/minicpm-v24.png ADDED Viewed

r1-a/response_generation/minicpm/MiniCPM-o/assets/minicpm-v25.png ADDED Viewed

r1-a/response_generation/minicpm/MiniCPM-o/assets/minicpm-v26.png ADDED Viewed

r1-a/response_generation/minicpm/MiniCPM-o/assets/minicpmv-omnilmm.png ADDED Viewed

r1-a/response_generation/minicpm/MiniCPM-o/assets/minicpmv.png ADDED Viewed

r1-a/response_generation/minicpm/MiniCPM-o/assets/modelbest.png ADDED Viewed

r1-a/response_generation/minicpm/MiniCPM-o/chat.py ADDED Viewed

	@@ -0,0 +1,293 @@

+import os
+import torch
+import json
+from PIL import Image
+import base64
+import io
+from accelerate import load_checkpoint_and_dispatch, init_empty_weights
+from transformers import AutoTokenizer, AutoModel
+from omnilmm.utils import disable_torch_init
+from omnilmm.model.omnilmm import OmniLMMForCausalLM
+from omnilmm.model.utils import build_transform
+from omnilmm.train.train_utils import omni_preprocess
+DEFAULT_IMAGE_TOKEN = "<image>"
+DEFAULT_IMAGE_PATCH_TOKEN = "<im_patch>"
+DEFAULT_IM_START_TOKEN = "<im_start>"
+DEFAULT_IM_END_TOKEN = "<im_end>"
+def init_omni_lmm(model_path):
+    torch.backends.cuda.matmul.allow_tf32 = True
+    disable_torch_init()
+    model_name = os.path.expanduser(model_path)
+    print(f'Load omni_lmm model and tokenizer from {model_name}')
+    tokenizer = AutoTokenizer.from_pretrained(
+        model_name, model_max_length=2048)
+    if False:
+        # model on multiple devices for small size gpu memory (Nvidia 3090 24G x2)
+        with init_empty_weights():
+            model = OmniLMMForCausalLM.from_pretrained(model_name, tune_clip=True, torch_dtype=torch.bfloat16)
+        model = load_checkpoint_and_dispatch(model, model_name, dtype=torch.bfloat16,
+                    device_map="auto",  no_split_module_classes=['Eva','MistralDecoderLayer', 'ModuleList', 'Resampler']
+        )
+    else:
+        model = OmniLMMForCausalLM.from_pretrained(
+            model_name, tune_clip=True, torch_dtype=torch.bfloat16
+        ).to(device='cuda', dtype=torch.bfloat16)
+    image_processor = build_transform(
+        is_train=False, input_size=model.model.config.image_size, std_mode='OPENAI_CLIP')
+    mm_use_im_start_end = getattr(model.config, "mm_use_im_start_end", False)
+    assert mm_use_im_start_end
+    tokenizer.add_tokens([DEFAULT_IMAGE_PATCH_TOKEN, DEFAULT_IM_START_TOKEN,
+                         DEFAULT_IM_END_TOKEN], special_tokens=True)
+    vision_config = model.model.vision_config
+    vision_config.im_patch_token = tokenizer.convert_tokens_to_ids(
+        [DEFAULT_IMAGE_PATCH_TOKEN])[0]
+    vision_config.use_im_start_end = mm_use_im_start_end
+    vision_config.im_start_token, vision_config.im_end_token = tokenizer.convert_tokens_to_ids(
+        [DEFAULT_IM_START_TOKEN, DEFAULT_IM_END_TOKEN])
+    image_token_len = model.model.config.num_query
+    return model, image_processor, image_token_len, tokenizer
+def expand_question_into_multimodal(question_text, image_token_len, im_st_token, im_ed_token, im_patch_token):
+    if '<image>' in question_text[0]['content']:
+        question_text[0]['content'] = question_text[0]['content'].replace(
+            '<image>', im_st_token + im_patch_token * image_token_len + im_ed_token)
+    else:
+        question_text[0]['content'] = im_st_token + im_patch_token * \
+            image_token_len + im_ed_token + '\n' + question_text[0]['content']
+    return question_text
+def wrap_question_for_omni_lmm(question, image_token_len, tokenizer):
+    question = expand_question_into_multimodal(
+        question, image_token_len, DEFAULT_IM_START_TOKEN, DEFAULT_IM_END_TOKEN, DEFAULT_IMAGE_PATCH_TOKEN)
+    conversation = question
+    data_dict = omni_preprocess(sources=[conversation],
+                                  tokenizer=tokenizer,
+                                  generation=True)
+    data_dict = dict(input_ids=data_dict["input_ids"][0],
+                     labels=data_dict["labels"][0])
+    return data_dict
+class OmniLMM12B:
+    def __init__(self, model_path) -> None:
+        model, img_processor, image_token_len, tokenizer = init_omni_lmm(model_path)
+        self.model = model
+        self.image_token_len = image_token_len
+        self.image_transform = img_processor
+        self.tokenizer = tokenizer
+        self.model.eval()
+    def decode(self, image, input_ids):
+        with torch.inference_mode():
+            output = self.model.generate_vllm(
+                input_ids=input_ids.unsqueeze(0).cuda(),
+                images=image.unsqueeze(0).half().cuda(),
+                temperature=0.6,
+                max_new_tokens=1024,
+                # num_beams=num_beams,
+                do_sample=True,
+                output_scores=True,
+                return_dict_in_generate=True,
+                repetition_penalty=1.1,
+                top_k=30,
+                top_p=0.9,
+            )
+            response = self.tokenizer.decode(
+                output.sequences[0], skip_special_tokens=True)
+            response = response.strip()
+            return response
+    def chat(self, input):
+        try:
+            image = Image.open(io.BytesIO(base64.b64decode(input['image']))).convert('RGB')
+        except Exception as e:
+            return "Image decode error"
+        msgs = json.loads(input['question'])
+        input_ids = wrap_question_for_omni_lmm(
+            msgs, self.image_token_len, self.tokenizer)['input_ids']
+        input_ids = torch.as_tensor(input_ids)
+        #print('input_ids', input_ids)
+        image = self.image_transform(image)
+        out = self.decode(image, input_ids)
+        return out
+def img2base64(file_name):
+    with open(file_name, 'rb') as f:
+        encoded_string = base64.b64encode(f.read())
+        return encoded_string
+class MiniCPMV:
+    def __init__(self, model_path) -> None:
+        self.model = AutoModel.from_pretrained(model_path, trust_remote_code=True).to(dtype=torch.bfloat16)
+        self.tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
+        self.model.eval().cuda()
+    def chat(self, input):
+        try:
+            image = Image.open(io.BytesIO(base64.b64decode(input['image']))).convert('RGB')
+        except Exception as e:
+            return "Image decode error"
+        msgs = json.loads(input['question'])
+        answer, context, _ = self.model.chat(
+            image=image,
+            msgs=msgs,
+            context=None,
+            tokenizer=self.tokenizer,
+            sampling=True,
+            temperature=0.7
+    	)
+        return answer
+class MiniCPMV2_5:
+    def __init__(self, model_path) -> None:
+        self.model = AutoModel.from_pretrained(model_path, trust_remote_code=True).to(dtype=torch.float16)
+        self.tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
+        self.model.eval().cuda()
+    def chat(self, input):
+        try:
+            image = Image.open(io.BytesIO(base64.b64decode(input['image']))).convert('RGB')
+        except Exception as e:
+            return "Image decode error"
+        msgs = json.loads(input['question'])
+        answer = self.model.chat(
+            image=image,
+            msgs=msgs,
+            tokenizer=self.tokenizer,
+            sampling=True,
+            temperature=0.7
+    	)
+        return answer
+class MiniCPMV2_6:
+    def __init__(self, model_path, multi_gpus=False) -> None:
+        print('torch_version:', torch.__version__)
+        if multi_gpus: # inference on multi-gpus
+            from accelerate import load_checkpoint_and_dispatch, init_empty_weights, infer_auto_device_map
+            with init_empty_weights():
+                model = AutoModel.from_pretrained(model_path, trust_remote_code=True,
+                    attn_implementation='sdpa', torch_dtype=torch.bfloat16)
+            device_map = infer_auto_device_map(model, max_memory={0: "10GB", 1: "10GB"},
+                no_split_module_classes=['SiglipVisionTransformer', 'Qwen2DecoderLayer'])
+            device_id = device_map["llm.model.embed_tokens"]
+            device_map["llm.lm_head"] = device_id # first and last layer of llm should be in the same device
+            device_map["vpm"] = device_id
+            device_map["resampler"] = device_id
+            device_id2 = device_map["llm.model.layers.26"]
+            device_map["llm.model.layers.8"] = device_id2
+            device_map["llm.model.layers.9"] = device_id2
+            device_map["llm.model.layers.10"] = device_id2
+            device_map["llm.model.layers.11"] = device_id2
+            device_map["llm.model.layers.12"] = device_id2
+            device_map["llm.model.layers.13"] = device_id2
+            device_map["llm.model.layers.14"] = device_id2
+            device_map["llm.model.layers.15"] = device_id2
+            device_map["llm.model.layers.16"] = device_id2
+            print(device_map)
+            self.model = load_checkpoint_and_dispatch(model, model_path, dtype=torch.bfloat16, device_map=device_map)
+            self.model.eval()
+        else:
+            self.model = AutoModel.from_pretrained(model_path, trust_remote_code=True,
+                attn_implementation='sdpa', torch_dtype=torch.bfloat16)
+            self.model.eval().cuda()
+        self.tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
+    def chat(self, input):
+        image = None
+        if "image" in input and len(input["image"]) > 10: # legacy API
+            try:
+                image = Image.open(io.BytesIO(base64.b64decode(input['image']))).convert('RGB')
+            except Exception as e:
+                return "Image decode error"
+        msgs = json.loads(input["question"])
+        for msg in msgs:
+            contents = msg.pop('content') # support str or List[Dict]
+            if isinstance(contents, str):
+                contents = [contents]
+            new_cnts = []
+            for c in contents:
+                if isinstance(c, dict):
+                    if c['type'] == 'text':
+                        c = c['pairs']
+                    elif c['type'] == 'image':
+                        c = Image.open(io.BytesIO(base64.b64decode(c["pairs"]))).convert('RGB')
+                    else:
+                        raise ValueError("content type only support text and image.")
+                new_cnts.append(c)
+            msg['content'] = new_cnts
+        print(f'msgs: {str(msgs)}')
+        answer = self.model.chat(
+            image=image,
+            msgs=msgs,
+            tokenizer=self.tokenizer,
+        )
+        return answer
+class MiniCPMVChat:
+    def __init__(self, model_path, multi_gpus=False) -> None:
+        if '12B' in model_path:
+            self.model = OmniLMM12B(model_path)
+        elif 'MiniCPM-Llama3-V' in model_path:
+            self.model = MiniCPMV2_5(model_path)
+        elif 'MiniCPM-V-2_6' in model_path:
+            self.model = MiniCPMV2_6(model_path, multi_gpus)
+        else:
+            self.model = MiniCPMV(model_path)
+    def chat(self, input):
+        return self.model.chat(input)
+if __name__ == '__main__':
+    model_path = 'openbmb/OmniLMM-12B'
+    chat_model = MiniCPMVChat(model_path)
+    im_64 = img2base64('./assets/worldmap_ck.jpg')
+    # first round chat
+    msgs = [{"role": "user", "content": "What is interesting about this image?"}]
+    input = {"image": im_64, "question": json.dumps(msgs, ensure_ascii=True)}
+    answer = chat_model.chat(input)
+    print(msgs[-1]["content"]+'\n', answer)
+    # second round chat
+    msgs.append({"role": "assistant", "content": answer})
+    msgs.append({"role": "user", "content": "Where is China in the image"})
+    input = {"image": im_64,"question": json.dumps(msgs, ensure_ascii=True)}
+    answer = chat_model.chat(input)
+    print(msgs[-1]["content"]+'\n', answer)

r1-a/response_generation/minicpm/MiniCPM-o/requirements.txt ADDED Viewed

	@@ -0,0 +1,35 @@

+packaging==23.2
+addict==2.4.0
+editdistance==0.6.2
+einops==0.7.0
+fairscale==0.4.0
+jsonlines==4.0.0
+markdown2==2.4.10
+matplotlib==3.7.4
+more_itertools==10.1.0
+nltk==3.8.1
+numpy==1.24.4
+opencv_python_headless==4.5.5.64
+openpyxl==3.1.2
+Pillow==10.1.0
+sacrebleu==2.3.2
+seaborn==0.13.0
+shortuuid==1.0.11
+#spacy==3.7.2
+timm==0.9.10
+torch==2.1.2
+torchvision==0.16.2
+tqdm==4.66.1
+protobuf==4.25.0
+transformers==4.40.0
+typing_extensions==4.8.0
+uvicorn==0.24.0.post1
+#xformers==0.0.22.post7
+#flash_attn==2.3.4
+sentencepiece==0.1.99
+accelerate==0.30.1
+socksio==1.0.0
+gradio==4.41.0
+gradio_client
+http://thunlp.oss-cn-qingdao.aliyuncs.com/multi_modal/never_delete/modelscope_studio-0.4.0.9-py3-none-any.whl
+decord

r1-a/response_generation/minicpm/MiniCPM-o/requirements_o2.6.txt ADDED Viewed

	@@ -0,0 +1,22 @@

+Pillow==10.1.0
+torch==2.3.1
+torchaudio==2.3.1
+torchvision==0.18.1
+transformers==4.44.2
+sentencepiece==0.2.0
+vector-quantize-pytorch==1.18.5
+vocos==0.1.0
+accelerate==1.2.1
+timm==0.9.10
+soundfile==0.12.1
+librosa==0.9.0
+decord
+moviepy
+# for web demo
+aiofiles==23.2.1
+onnxruntime==1.20.1
+fastapi
+uvicorn
+gradio==4.44.1
+http://thunlp.oss-cn-qingdao.aliyuncs.com/multi_modal/never_delete/modelscope_studio-0.4.0.9-py3-none-any.whl