Spaces:

qgyd2021
/

llm_eval_system

Paused

App Files Files Community

HoneyTian commited on Dec 8, 2025

Commit

a984ba9

1 Parent(s): a0ec039

update

Browse files

Files changed (37) hide show

data/dataset/agent-bigseller-id-60-choice.jsonl +0 -0
data/dataset/agent-lingoace-zh-375-v2-choice.jsonl +3 -0
data/dataset/agent-nxcloud-zh-375-v2-choice.jsonl +3 -0
data/eval_data/aliyun_choice/aliyun/qwen-plus-2025-12-01/shenzhen_sase/aliyun_api_key/20251208_120245/agent-lingoace-zh-400-choice.jsonl +3 -0
data/eval_data/aliyun_choice/aliyun/qwen3-max-2025-09-23/shenzhen_sase/aliyun_api_key/20251208_105202/agent-lingoace-zh-400-choice.jsonl +3 -0
data/eval_data/aliyun_choice/aliyun/qwen3-max-2025-09-23/shenzhen_sase/aliyun_api_key/20251208_133147/agent-lingoace-zh-400-choice.jsonl +3 -0
data/eval_data/aliyun_choice/aliyun/qwen3-max-preview/shenzhen_sase/aliyun_api_key/20251208_113004/agent-lingoace-zh-400-choice.jsonl +3 -0
data/eval_data/aliyun_nxcloud_v2_choice/aliyun/qwen3-max-2025-09-23/shenzhen_sase/aliyun_api_key/20251208_102934/agent-nxcloud-zh-375-v2-choice.jsonl +3 -0
data/eval_data/aliyun_nxcloud_v2_choice/aliyun/qwen3-max-preview/shenzhen_sase/aliyun_api_key/20251208_110422/agent-nxcloud-zh-375-v2-choice.jsonl +3 -0
data/eval_data/aws/aws/amazon.nova-lite-v1-0/shenzhen_sase/aws_us_east/20250916_133511/agent-bingoplus-ph-90-choice.jsonl +3 -0
data/eval_data/aws/aws/amazon.nova-lite-v1-0/shenzhen_sase/aws_us_east/20250916_154723/agent-lingoace-zh-400-choice.jsonl +3 -0
data/eval_data/aws/aws/amazon.nova-micro-v1-0/shenzhen_sase/aws_us_east/20250916_140957/agent-bingoplus-ph-90-choice.jsonl +3 -0
data/eval_data/aws/aws/amazon.nova-micro-v1-0/shenzhen_sase/aws_us_east/20250916_170731/agent-lingoace-zh-400-choice.jsonl +3 -0
data/eval_data/aws/aws/amazon.nova-pro-v1-0/shenzhen_sase/aws_us_east/20250916_114857/agent-bingoplus-ph-90-choice.jsonl +3 -0
data/eval_data/aws/aws/amazon.nova-pro-v1-0/shenzhen_sase/aws_us_east/20250916_142846/agent-lingoace-zh-400-choice.jsonl +3 -0
data/eval_data/google_anthropic/anthropic/claude-3-5-haiku@20241022/shenzhen_sase/google_nxcloud_312303/20250910_100415/agent-lingoace-zh-400-choice.jsonl +3 -0
data/eval_data/google_anthropic/anthropic/claude-3-5-sonnet-v2@20241022/shenzhen_sase/google_nxcloud_312303/20250910_100113/agent-lingoace-zh-400-choice.jsonl +0 -0
data/eval_data/google_anthropic/anthropic/claude-3-5-sonnet@20240620/shenzhen_sase/google_nxcloud_312303/20250910_100441/agent-lingoace-zh-400-choice.jsonl +0 -0
data/eval_data/google_anthropic/anthropic/claude-3-7-sonnet@20250219/shenzhen_sase/google_nxcloud_312303/20250910_100042/agent-lingoace-zh-400-choice.jsonl +3 -0
data/eval_data/google_anthropic/anthropic/claude-3-haiku@20240307/shenzhen_sase/google_nxcloud_312303/20250910_100501/agent-lingoace-zh-400-choice.jsonl +0 -0
data/eval_data/google_anthropic/anthropic/claude-3-opus@20240229/shenzhen_sase/google_nxcloud_312303/20250910_100451/agent-lingoace-zh-400-choice.jsonl +0 -0
data/eval_data/google_anthropic/anthropic/claude-opus-4-1@20250805/shenzhen_sase/google_nxcloud_312303/20250910_095955/agent-lingoace-zh-400-choice.jsonl +3 -0
examples/ali_communication/make_dataset.py +85 -0
examples/kms/get_aliyun_dev_apikey.py +41 -0
examples/make_dataset/make_choice.py +70 -0
examples/make_dataset/make_choice_lingoace_v2.py +141 -0
examples/make_raw_dataset/step_1_make_hk_dataset_by_log.py +2 -1
examples/make_raw_dataset/step_3_filter_by_keywords.py +5 -0
examples/make_raw_dataset/step_6_filter_by_choice.py +1 -1
llm_eval_script/aliyun_choice.py +173 -0
llm_eval_script/aliyun_nxcloud_v2_choice.py +233 -0
llm_eval_script/aws.py +227 -0
llm_eval_script/google_anthropic.py +10 -1
main.py +3 -0
requirements.txt +4 -0
toolbox/aliyun_kms/__init__.py +6 -0
toolbox/aliyun_kms/aliyun_kms.py +122 -0

data/dataset/agent-bigseller-id-60-choice.jsonl ADDED Viewed

File without changes

data/dataset/agent-lingoace-zh-375-v2-choice.jsonl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3a036052d750daf27450c520c1f5c7257077783a31d3a3fb43bf0e228ab22e80
+size 1239647

data/dataset/agent-nxcloud-zh-375-v2-choice.jsonl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:4099cf306583c063b4bf69a485187ade46f45410843561154a3d8d50001b0bd3
+size 1238502

data/eval_data/aliyun_choice/aliyun/qwen-plus-2025-12-01/shenzhen_sase/aliyun_api_key/20251208_120245/agent-lingoace-zh-400-choice.jsonl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:bda5b0d743a600262716a089ec6eba8b21d6eb4bca2286443948f86035954f9b
+size 1233921

data/eval_data/aliyun_choice/aliyun/qwen3-max-2025-09-23/shenzhen_sase/aliyun_api_key/20251208_105202/agent-lingoace-zh-400-choice.jsonl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e02b6dbf7cde1d9ee61289bb43af9082d695b38c966474e3b4e1015c54bdd7b2
+size 1211356

data/eval_data/aliyun_choice/aliyun/qwen3-max-2025-09-23/shenzhen_sase/aliyun_api_key/20251208_133147/agent-lingoace-zh-400-choice.jsonl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b39f27725a32fc91d5cdfa594ea2e67d2f6d1f533b7b6718faab62b93fa21e34
+size 1233777

data/eval_data/aliyun_choice/aliyun/qwen3-max-preview/shenzhen_sase/aliyun_api_key/20251208_113004/agent-lingoace-zh-400-choice.jsonl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:31ec4900d031c7f0fa507b38eb226d0e3c255e4e921669d4776d93685d78f0cc
+size 1211172

data/eval_data/aliyun_nxcloud_v2_choice/aliyun/qwen3-max-2025-09-23/shenzhen_sase/aliyun_api_key/20251208_102934/agent-nxcloud-zh-375-v2-choice.jsonl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e83e4cdcecfdd3b3444e5f2852d2f6dbd6db34ecd4e1a96a4ff1355185986869
+size 1081363

data/eval_data/aliyun_nxcloud_v2_choice/aliyun/qwen3-max-preview/shenzhen_sase/aliyun_api_key/20251208_110422/agent-nxcloud-zh-375-v2-choice.jsonl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f99aead6576f9f2409be441d891112c91d9b064a601523e434c7e852ed614a4a
+size 1081322

data/eval_data/aws/aws/amazon.nova-lite-v1-0/shenzhen_sase/aws_us_east/20250916_133511/agent-bingoplus-ph-90-choice.jsonl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b7dbdf8e6f5ebc2cc0fd3f32f297efb627c559e12656f603327ddf78f09a0c01
+size 258621

data/eval_data/aws/aws/amazon.nova-lite-v1-0/shenzhen_sase/aws_us_east/20250916_154723/agent-lingoace-zh-400-choice.jsonl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3699747d0b0888affdf832c462ae58b9aa06d080729ba3ba174125f873cd412f
+size 1204448

data/eval_data/aws/aws/amazon.nova-micro-v1-0/shenzhen_sase/aws_us_east/20250916_140957/agent-bingoplus-ph-90-choice.jsonl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5dadd78180802b49a905a850317eb54f0e183685dfbc6d4eee8e5ab5a7e50677
+size 258563

data/eval_data/aws/aws/amazon.nova-micro-v1-0/shenzhen_sase/aws_us_east/20250916_170731/agent-lingoace-zh-400-choice.jsonl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c54b42d8739211d1aa7da75041c3a754f4c88ca4b2f55e26908adb3a31f48565
+size 1205834

data/eval_data/aws/aws/amazon.nova-pro-v1-0/shenzhen_sase/aws_us_east/20250916_114857/agent-bingoplus-ph-90-choice.jsonl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:951c1df603efe55563fbe2a824233805192adbcf1592b41b9b975070b406356b
+size 258625

data/eval_data/aws/aws/amazon.nova-pro-v1-0/shenzhen_sase/aws_us_east/20250916_142846/agent-lingoace-zh-400-choice.jsonl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ed19fe093a4d6457297bef741fdf283313c462544ed014fe40e58c6331e5700e
+size 1204189

data/eval_data/google_anthropic/anthropic/claude-3-5-haiku@20241022/shenzhen_sase/google_nxcloud_312303/20250910_100415/agent-lingoace-zh-400-choice.jsonl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9ed17f71bbde50f38be38a32f3caea001df9f0e2e22f2f189541f5ae1dbaa4b0
+size 9134

data/eval_data/google_anthropic/anthropic/claude-3-5-sonnet-v2@20241022/shenzhen_sase/google_nxcloud_312303/20250910_100113/agent-lingoace-zh-400-choice.jsonl ADDED Viewed

File without changes

data/eval_data/google_anthropic/anthropic/claude-3-5-sonnet@20240620/shenzhen_sase/google_nxcloud_312303/20250910_100441/agent-lingoace-zh-400-choice.jsonl ADDED Viewed

File without changes

data/eval_data/google_anthropic/anthropic/claude-3-7-sonnet@20250219/shenzhen_sase/google_nxcloud_312303/20250910_100042/agent-lingoace-zh-400-choice.jsonl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:017dd25dc264762cc26eb201149b9bc7a18f81381174b92b0d619665655cb93e
+size 26705

data/eval_data/google_anthropic/anthropic/claude-3-haiku@20240307/shenzhen_sase/google_nxcloud_312303/20250910_100501/agent-lingoace-zh-400-choice.jsonl ADDED Viewed

File without changes

data/eval_data/google_anthropic/anthropic/claude-3-opus@20240229/shenzhen_sase/google_nxcloud_312303/20250910_100451/agent-lingoace-zh-400-choice.jsonl ADDED Viewed

File without changes

data/eval_data/google_anthropic/anthropic/claude-opus-4-1@20250805/shenzhen_sase/google_nxcloud_312303/20250910_095955/agent-lingoace-zh-400-choice.jsonl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a079d7731f824df3d2bd5e758d58c31b41b6412c5253cb4e8f21493724a85571
+size 14137

examples/ali_communication/make_dataset.py ADDED Viewed

	@@ -0,0 +1,85 @@

+#!/usr/bin/python3
+# -*- coding: utf-8 -*-
+import argparse
+import json
+import pandas as pd
+from project_settings import environment, project_path
+def get_args():
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "--filename",
+        default="evaluation_results_max7.xlsx",
+        type=str
+    )
+    parser.add_argument(
+        "--dataset",
+        default=(project_path / "data/dataset/agent-lingoace-zh-400-choice.jsonl").as_posix(),
+        type=str
+    )
+    args = parser.parse_args()
+    return args
+def main():
+    args = get_args()
+    dataset = dict()
+    with open(args.dataset, "r", encoding="utf-8") as f:
+        for row in f:
+            row = json.loads(row)
+            idx = row["idx"]
+            prompt = row["prompt"]
+            response = row["response"]
+            dataset[idx] = row
+    result = list()
+    df = pd.read_excel(args.filename)
+    for i, row in df.iterrows():
+        # print(row)
+        idx = row["idx"]
+        conversation = row["conversation"]
+        expected = row["expected"]
+        actual_label = row["actual_label"]
+        actual_reason = row["actual_reason"]
+        correct = row["correct"]
+        note = row["note"]
+        if correct is False:
+            print(idx)
+            print(conversation)
+            print(expected, actual_label)
+            print(actual_reason)
+            print(note)
+            print("+" * 150)
+            dataset_ = dataset[idx]
+            prompt = dataset_["prompt"]
+            response = dataset_["response"]
+            print(prompt)
+            print(response)
+            print("-" * 150)
+            result.append({
+                "idx": idx,
+                "conversation": conversation,
+                "expected": expected,
+                "actual_label": actual_label,
+                "actual_reason": actual_reason,
+                "note": note,
+                "prompt": prompt,
+                "response": response,
+                "op": None,
+                "remark": None,
+            })
+    result = pd.DataFrame(result)
+    result.to_excel("result.xlsx", index=False)
+    return
+if __name__ == "__main__":
+    main()

examples/kms/get_aliyun_dev_apikey.py ADDED Viewed

	@@ -0,0 +1,41 @@

+#!/usr/bin/python3
+# -*- coding: utf-8 -*-
+import argparse
+from project_settings import environment, project_path
+from toolbox.aliyun_kms.aliyun_kms import AliyunKMS
+def get_args():
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "--secret_name",
+        default="aliyun-chn-llm-dev",
+        type=str
+    )
+    args = parser.parse_args()
+    return args
+def main():
+    args = get_args()
+    access_key_id = environment.get("ALIBABA_CLOUD_ACCESS_KEY_ID")
+    access_key_secret = environment.get("ALIBABA_CLOUD_ACCESS_KEY_SECRET")
+    kms_manager = AliyunKMS(
+        access_key_id=access_key_id,
+        access_key_secret=access_key_secret,
+    )
+    js = kms_manager.get_secret_value(args.secret_name)
+    secret_data = js["body"]["SecretData"]
+    # sk-6728fced6fd848149ebbb7c3899cc043
+    print(secret_data)
+    return
+if __name__ == "__main__":
+    main()

examples/make_dataset/make_choice.py ADDED Viewed

	@@ -0,0 +1,70 @@

+#!/usr/bin/python3
+# -*- coding: utf-8 -*-
+import argparse
+import json
+import os
+from pathlib import Path
+import sys
+import time
+pwd = os.path.abspath(os.path.dirname(__file__))
+sys.path.append(os.path.join(pwd, "../../"))
+from project_settings import environment, project_path
+def get_args():
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "--raw_dataset",
+        default=(project_path / "data/raw_dataset/agent-bigseller-id-60-choice").as_posix(),
+        type=str
+    )
+    parser.add_argument(
+        "--dataset",
+        default=(project_path / "data/dataset/agent-bigseller-id-60-choice.jsonl").as_posix(),
+        type=str
+    )
+    args = parser.parse_args()
+    return args
+def main():
+    args = get_args()
+    raw_dataset = Path(args.raw_dataset)
+    dataset = Path(args.dataset)
+    dataset.parent.mkdir(parents=True, exist_ok=True)
+    with open(dataset.as_posix(), "w", encoding="utf-8") as fout:
+        for sample_dir in raw_dataset.glob("*"):
+            idx = sample_dir.parts[-1]
+            system_prompt_file = sample_dir / "system_prompt.txt"
+            user_prompt_file = sample_dir / "user_prompt.txt"
+            response_file = sample_dir / "response.txt"
+            with open(system_prompt_file.as_posix(), "r", encoding="utf-8") as f:
+                system_prompt = f.read()
+            with open(user_prompt_file.as_posix(), "r", encoding="utf-8") as f:
+                user_prompt = f.read()
+            with open(response_file.as_posix(), "r", encoding="utf-8") as f:
+                response = f.read()
+            prompt = f"""{system_prompt}\n\n{user_prompt}""".strip()
+            print(f"{prompt}\n\n{response}")
+            print("-" * 150)
+            row_ = {
+                "idx": idx,
+                "prompt": prompt,
+                "response": response,
+            }
+            row_ = json.dumps(row_, ensure_ascii=False)
+            fout.write(f"{row_}\n")
+    return
+if __name__ == "__main__":
+    main()

examples/make_dataset/make_choice_lingoace_v2.py ADDED Viewed

	@@ -0,0 +1,141 @@

+#!/usr/bin/python3
+# -*- coding: utf-8 -*-
+import argparse
+import json
+import os
+from pathlib import Path
+import re
+import sys
+import time
+pwd = os.path.abspath(os.path.dirname(__file__))
+sys.path.append(os.path.join(pwd, "../../"))
+from project_settings import environment, project_path
+def get_args():
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "--raw_dataset",
+        default=(project_path / "data/raw_dataset/finished/agent-lingoace-zh-375-choice-v2").as_posix(),
+        type=str
+    )
+    parser.add_argument(
+        "--dataset",
+        default=(project_path / "data/dataset/agent-lingoace-zh-375-choice-v2.jsonl").as_posix(),
+        type=str
+    )
+    args = parser.parse_args()
+    return args
+def main():
+    args = get_args()
+    raw_dataset = Path(args.raw_dataset)
+    dataset = Path(args.dataset)
+    dataset.parent.mkdir(parents=True, exist_ok=True)
+    with open(dataset.as_posix(), "w", encoding="utf-8") as fout:
+        for sample_dir in raw_dataset.glob("*"):
+            idx = sample_dir.parts[-1]
+            system_prompt_file = sample_dir / "system_prompt.txt"
+            user_prompt_file = sample_dir / "user_prompt.txt"
+            response_file = sample_dir / "response.txt"
+            with open(system_prompt_file.as_posix(), "r", encoding="utf-8") as f:
+                system_prompt = f.read()
+            with open(user_prompt_file.as_posix(), "r", encoding="utf-8") as f:
+                user_prompt = f.read()
+            with open(response_file.as_posix(), "r", encoding="utf-8") as f:
+                response = f.read()
+            # conversation
+            pattern = r"\*Conversation starts\*(.*)\*Conversation ends\*"
+            match = re.search(pattern, user_prompt, flags=re.DOTALL)
+            if match is None:
+                raise AssertionError
+            conversation = match.group(1)
+            pattern = r'(client:|customer service:)([^\n]*)'
+            matches = re.findall(pattern, conversation)
+            conversation_ = list()
+            for speaker, content in matches:
+                if speaker == "customer service:":
+                    speaker = "assistant"
+                elif speaker == "client:":
+                    speaker = "user"
+                else:
+                    raise AssertionError(speaker)
+                conversation_.append({
+                    "role": speaker,
+                    "content": content,
+                })
+            # examples
+            pattern = r"\*Conversation ends\*(.*)\*\*Output\*\*"
+            match = re.search(pattern, user_prompt, flags=re.DOTALL)
+            if match is not None:
+                examples = match.group(0)
+            else:
+                examples = ""
+            examples_ = list()
+            pattern = re.compile(r'(?m)^\[(用户|你)\]:\s*"([^"]*)"\s*$|^输出:\s*(\S+)\s*$|^解释:\s*(.+)\s*$')
+            example_conversation_ = list()
+            outputs = dict()
+            for m in pattern.finditer(examples):
+                speaker, content, out, explanation = m.group(1), m.group(2), m.group(3), m.group(4)
+                if speaker:
+                    if speaker == "你":
+                        # speaker = "customer service"
+                        speaker = "assistant"
+                    elif speaker == "用户":
+                        # speaker = "client"
+                        speaker = "user"
+                    else:
+                        raise AssertionError
+                    conversation_turn = {"role": speaker, "content": content}
+                    example_conversation_.append(conversation_turn)
+                elif out:
+                    outputs["output"] = out
+                elif explanation:
+                    outputs["explanation"] = explanation
+                    examples_.append({
+                        "conversation": example_conversation_,
+                        "outputs": outputs,
+                    })
+                    example_conversation_ = list()
+                    outputs = dict()
+            splits = user_prompt.split("**Output**")
+            choice = splits[1].strip()
+            pattern = r'If (.*?)output ([A-F])'
+            matches = re.findall(pattern, choice, re.DOTALL)
+            choices_ = list()
+            for condition, output_letter in matches:
+                condition_ = f"If {condition[:-2]}"
+                choice_letter = output_letter
+                row = {
+                    "condition": condition_,
+                    "choice_letter": choice_letter,
+                }
+                choices_.append(row)
+            row = {
+                "idx": idx,
+                "system_prompt": system_prompt,
+                "conversation": conversation_,
+                "examples": examples_,
+                "choices": choices_,
+                "response": response,
+            }
+            row = json.dumps(row, ensure_ascii=False)
+            fout.write(f"{row}\n")
+            fout.flush()
+    return
+if __name__ == "__main__":
+    main()

examples/make_raw_dataset/step_1_make_hk_dataset_by_log.py CHANGED Viewed

@@ -133,7 +133,8 @@ def main():
                     row = extract(row)
                 except Exception as e:
                     print(row)
-                    raise e
                 call_id = row["call_id"]
                 system_prompt = row.get("system_prompt")
                 conversation = row.get("conversation")

                     row = extract(row)
                 except Exception as e:
                     print(row)
+                    # raise e
+                    continue
                 call_id = row["call_id"]
                 system_prompt = row.get("system_prompt")
                 conversation = row.get("conversation")

examples/make_raw_dataset/step_3_filter_by_keywords.py CHANGED Viewed

@@ -69,6 +69,11 @@ def main():
                 (["作为VIP客户"], "vip"),
                 (["FedEx"], "fedex"),
                 (["Chinese laser cutting"], "laser"),
             ]
             flag = False

                 (["作为VIP客户"], "vip"),
                 (["FedEx"], "fedex"),
                 (["Chinese laser cutting"], "laser"),
+                (["Bigseller"], "bigseller"),
+                (["BigSeller"], "bigseller"),
+                (["ERP"], "bigseller"),
+                (["product"], "promote"),
+                (["川芎红花苗灸液"], "promote"),
             ]
             flag = False

examples/make_raw_dataset/step_6_filter_by_choice.py CHANGED Viewed

@@ -12,7 +12,7 @@ def get_args():
     parser = argparse.ArgumentParser()
     parser.add_argument(
         "--data_dir",
-        default=(project_path / "data/llm-log-hk/extract-dataset/choice-nxpay").as_posix(),
         type=str
     )
     args = parser.parse_args()

     parser = argparse.ArgumentParser()
     parser.add_argument(
         "--data_dir",
+        default=(project_path / "data/llm-log-hk/extract-dataset/choice-promote").as_posix(),
         type=str
     )
     args = parser.parse_args()

llm_eval_script/aliyun_choice.py ADDED Viewed

	@@ -0,0 +1,173 @@

+#!/usr/bin/python3
+# -*- coding: utf-8 -*-
+import argparse
+from datetime import datetime
+import json
+import os
+from pathlib import Path
+import sys
+import time
+from zoneinfo import ZoneInfo  # Python 3.9+ 自带，无需安装
+pwd = os.path.abspath(os.path.dirname(__file__))
+sys.path.append(os.path.join(pwd, "../"))
+from openai import OpenAI
+from project_settings import environment, project_path
+def get_args():
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "--model_name",
+        default="qwen3-max-2025-09-23",
+        # default="qwen3-max-preview",
+        # default="qwen-plus-2025-12-01",
+        type=str
+    )
+    parser.add_argument(
+        "--eval_dataset_name",
+        default="agent-lingoace-zh-400-choice.jsonl",
+        type=str
+    )
+    parser.add_argument(
+        "--eval_dataset_dir",
+        default=(project_path / "data/dataset").as_posix(),
+        type=str
+    )
+    parser.add_argument(
+        "--eval_data_dir",
+        default=(project_path / "data/eval_data").as_posix(),
+        type=str
+    )
+    parser.add_argument(
+        "--client",
+        default="shenzhen_sase",
+        type=str
+    )
+    parser.add_argument(
+        "--service",
+        default="aliyun_api_key",
+        type=str
+    )
+    parser.add_argument(
+        "--create_time_str",
+        default="null",
+        # default="20250812_092418",
+        type=str
+    )
+    parser.add_argument(
+        "--interval",
+        default=1,
+        type=int
+    )
+    args = parser.parse_args()
+    return args
+def main():
+    args = get_args()
+    eval_dataset_dir = Path(args.eval_dataset_dir)
+    eval_dataset_dir.mkdir(parents=True, exist_ok=True)
+    eval_data_dir = Path(args.eval_data_dir)
+    eval_data_dir.mkdir(parents=True, exist_ok=True)
+    if args.create_time_str == "null":
+        tz = ZoneInfo("Asia/Shanghai")
+        now = datetime.now(tz)
+        create_time_str = now.strftime("%Y%m%d_%H%M%S")
+        # create_time_str = "20250724_090615"
+    else:
+        create_time_str = args.create_time_str
+    eval_dataset = eval_dataset_dir / args.eval_dataset_name
+    model_name_ = args.model_name.replace("/", "#")
+    output_file = eval_data_dir / f"aliyun_choice/aliyun/{model_name_}/{args.client}/{args.service}/{create_time_str}/{args.eval_dataset_name}"
+    output_file.parent.mkdir(parents=True, exist_ok=True)
+    api_key = environment.get(args.service, dtype=str)
+    client = OpenAI(
+        base_url="https://dashscope.aliyuncs.com/compatible-mode/v1",
+        # Read your Ark API Key from the environment variable.
+        api_key=api_key
+    )
+    total = 0
+    total_correct = 0
+    # finished
+    finished_idx_set = set()
+    if os.path.exists(output_file.as_posix()):
+        with open(output_file.as_posix(), "r", encoding="utf-8") as f:
+            for row in f:
+                row = json.loads(row)
+                idx = row["idx"]
+                total = row["total"]
+                total_correct = row["total_correct"]
+                finished_idx_set.add(idx)
+    print(f"finished count: {len(finished_idx_set)}")
+    with open(eval_dataset.as_posix(), "r", encoding="utf-8") as fin, open(output_file.as_posix(), "a+", encoding="utf-8") as fout:
+        for row in fin:
+            row = json.loads(row)
+            idx = row["idx"]
+            prompt = row["prompt"]
+            response = row["response"]
+            if idx in finished_idx_set:
+                continue
+            finished_idx_set.add(idx)
+            try:
+                time.sleep(args.interval)
+                print(f"sleep: {args.interval}")
+                time_begin = time.time()
+                completion = client.chat.completions.create(
+                    model=args.model_name,
+                    messages=[
+                        {"role": "user", "content": prompt},
+                    ],
+                    # 由于 enable_thinking 非 OpenAI 标准参数，需要通过 extra_body 传入
+                    extra_body={"enable_thinking": False},
+                    stream=False,
+                )
+                time_cost = time.time() - time_begin
+                print(f"time_cost: {time_cost}")
+            except Exception as e:
+                print(f"request failed, error type: {type(e)}, error text: {str(e)}")
+                continue
+            # print(f"completion: {completion}")
+            prediction = completion.choices[0].message.content
+            rid = completion.id
+            correct = 1 if prediction == response else 0
+            total += 1
+            total_correct += correct
+            score = total_correct / total
+            row_ = {
+                "idx": idx,
+                "rid": rid,
+                "prompt": prompt,
+                "response": response,
+                "prediction": prediction,
+                "correct": correct,
+                "total": total,
+                "total_correct": total_correct,
+                "score": score,
+                "time_cost": time_cost,
+            }
+            row_ = json.dumps(row_, ensure_ascii=False)
+            fout.write(f"{row_}\n")
+            fout.flush()
+    return
+if __name__ == "__main__":
+    main()

llm_eval_script/aliyun_nxcloud_v2_choice.py ADDED Viewed

	@@ -0,0 +1,233 @@

+#!/usr/bin/python3
+# -*- coding: utf-8 -*-
+"""
+https://help.aliyun.com/zh/model-studio/qwen-api-reference
+https://help.aliyun.com/zh/model-studio/models
+https://help.aliyun.com/zh/model-studio/models?spm=a2c4g.11186623.0.i4#d4ccf72f23jh9
+https://help.aliyun.com/zh/model-studio/text-generation?spm=a2c4g.11186623.0.0.6b772e068nnT1J#24e54b27d4agt
+Deep-Thinking
+https://help.aliyun.com/zh/model-studio/deep-thinking?spm=a2c4g.11186623.0.0.56076f58IJd4mP
+"""
+import argparse
+from datetime import datetime
+import json
+import os
+from pathlib import Path
+import sys
+import time
+from zoneinfo import ZoneInfo  # Python 3.9+ 自带，无需安装
+pwd = os.path.abspath(os.path.dirname(__file__))
+sys.path.append(os.path.join(pwd, "../"))
+from openai import OpenAI
+from project_settings import environment, project_path
+def get_args():
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "--model_name",
+        # default="qwen3-max-2025-09-23",
+        default="qwen3-max-preview",
+        # default="qwen-plus-2025-12-01",
+        type=str
+    )
+    parser.add_argument(
+        "--eval_dataset_name",
+        default="agent-nxcloud-zh-375-choice-v2.jsonl",
+        type=str
+    )
+    parser.add_argument(
+        "--eval_dataset_dir",
+        default=(project_path / "data/dataset").as_posix(),
+        type=str
+    )
+    parser.add_argument(
+        "--eval_data_dir",
+        default=(project_path / "data/eval_data").as_posix(),
+        type=str
+    )
+    parser.add_argument(
+        "--client",
+        default="shenzhen_sase",
+        type=str
+    )
+    parser.add_argument(
+        "--service",
+        default="aliyun_api_key",
+        type=str
+    )
+    parser.add_argument(
+        "--create_time_str",
+        default="null",
+        # default="20250812_092418",
+        type=str
+    )
+    parser.add_argument(
+        "--interval",
+        default=1,
+        type=int
+    )
+    args = parser.parse_args()
+    return args
+def conversation_to_str(conversation: list):
+    conversation_str = ""
+    for turn in conversation:
+        role = turn["role"]
+        content = turn["content"]
+        row_ = f"{role}: {content}\n"
+        conversation_str += row_
+    return conversation_str
+def main():
+    args = get_args()
+    eval_dataset_dir = Path(args.eval_dataset_dir)
+    eval_dataset_dir.mkdir(parents=True, exist_ok=True)
+    eval_data_dir = Path(args.eval_data_dir)
+    eval_data_dir.mkdir(parents=True, exist_ok=True)
+    if args.create_time_str == "null":
+        tz = ZoneInfo("Asia/Shanghai")
+        now = datetime.now(tz)
+        create_time_str = now.strftime("%Y%m%d_%H%M%S")
+        # create_time_str = "20250724_090615"
+    else:
+        create_time_str = args.create_time_str
+    eval_dataset = eval_dataset_dir / args.eval_dataset_name
+    model_name_ = args.model_name.replace("/", "#")
+    output_file = eval_data_dir / f"aliyun_nxcloud_v2_choice/aliyun/{model_name_}/{args.client}/{args.service}/{create_time_str}/{args.eval_dataset_name}"
+    output_file.parent.mkdir(parents=True, exist_ok=True)
+    api_key = environment.get(args.service, dtype=str)
+    client = OpenAI(
+        base_url="https://dashscope.aliyuncs.com/compatible-mode/v1",
+        # Read your Ark API Key from the environment variable.
+        api_key=api_key
+    )
+    total = 0
+    total_correct = 0
+    # finished
+    finished_idx_set = set()
+    if os.path.exists(output_file.as_posix()):
+        with open(output_file.as_posix(), "r", encoding="utf-8") as f:
+            for row in f:
+                row = json.loads(row)
+                idx = row["idx"]
+                total = row["total"]
+                total_correct = row["total_correct"]
+                finished_idx_set.add(idx)
+    print(f"finished count: {len(finished_idx_set)}")
+    with open(eval_dataset.as_posix(), "r", encoding="utf-8") as fin, open(output_file.as_posix(), "a+", encoding="utf-8") as fout:
+        for row in fin:
+            row = json.loads(row)
+            idx = row["idx"]
+            system_prompt = row["system_prompt"]
+            conversation = row["conversation"]
+            examples = row["examples"]
+            choices = row["choices"]
+            response = row["response"]
+            if idx in finished_idx_set:
+                continue
+            # conversation
+            conversation_str = conversation_to_str(conversation)
+            examples_str = ""
+            for example in examples:
+                conversation_ = example["conversation"]
+                outputs = example["outputs"]
+                output = outputs["output"]
+                explanation = outputs["explanation"]
+                examples_str += conversation_to_str(conversation_)
+                examples_str += f"Output: {output}\n"
+                examples_str += f"Explanation: {explanation}\n\n"
+            # print(examples_str)
+            choices_str = ""
+            for choice in choices:
+                condition = choice["condition"]
+                choice_letter = choice["choice_letter"]
+                row_ = f"{condition}, output: {choice_letter}\n"
+                choices_str += row_
+            choices_str += "\nRemember to output ONLY the corresponding letter.\nYour output is:"
+            # prompt = f"{system_prompt}\n\n**Output**\n{choices_}\n**Examples**\n{examples_}"
+            prompt1 = f"{system_prompt}\n\n**Examples**\n{examples_str}"
+            prompt2 = f"**Conversation**\n{conversation_str}\n\n**Output**\n{choices_str}"
+            print(prompt1)
+            print(prompt2)
+            messages = list()
+            messages.append(
+                {"role": "system", "content": prompt1},
+            )
+            messages.append(
+                {"role": "user", "content": prompt2},
+            )
+            print(f"messages: {json.dumps(messages, ensure_ascii=False, indent=4)}")
+            try:
+                time.sleep(args.interval)
+                print(f"sleep: {args.interval}")
+                time_begin = time.time()
+                completion = client.chat.completions.create(
+                    model=args.model_name,
+                    messages=messages,
+                    # 由于 enable_thinking 非 OpenAI 标准参数，需要通过 extra_body 传入
+                    extra_body={"enable_thinking": False},
+                    stream=False,
+                )
+                time_cost = time.time() - time_begin
+                print(f"time_cost: {time_cost}")
+            except Exception as e:
+                print(f"request failed, error type: {type(e)}, error text: {str(e)}")
+                continue
+            # print(f"completion: {completion}")
+            prediction = completion.choices[0].message.content
+            correct = 1 if prediction == response else 0
+            total += 1
+            total_correct += correct
+            score = total_correct / total
+            row_ = {
+                "idx": idx,
+                "messages": messages,
+                "response": response,
+                "prediction": prediction,
+                "correct": correct,
+                "total": total,
+                "total_correct": total_correct,
+                "score": score,
+                "time_cost": time_cost,
+            }
+            row_ = json.dumps(row_, ensure_ascii=False)
+            fout.write(f"{row_}\n")
+            fout.flush()
+    return
+if __name__ == "__main__":
+    main()

llm_eval_script/aws.py ADDED Viewed

	@@ -0,0 +1,227 @@

+#!/usr/bin/python3
+# -*- coding: utf-8 -*-
+"""
+https://docs.aws.amazon.com/bedrock/latest/userguide/models-supported.html
+https://docs.aws.amazon.com/nova/latest/userguide/using-invoke-api.html?utm_source=chatgpt.com
+"""
+import argparse
+from datetime import datetime
+import json
+import os
+from pathlib import Path
+import sys
+import time
+from zoneinfo import ZoneInfo  # Python 3.9+ 自带，无需安装
+pwd = os.path.abspath(os.path.dirname(__file__))
+sys.path.append(os.path.join(pwd, "../"))
+import boto3
+from project_settings import environment, project_path
+def get_args():
+    """
+python3 aws_claude.py --model_name anthropic.claude-instant-v1 \
+--eval_dataset_name agent-lingoace-zh-400-choice.jsonl \
+--client "us_west(47.88.76.239)" \
+--create_time_str 20250723-interval-10 \
+--interval 10
+python3 aws_claude.py --model_name anthropic.claude-v2 \
+--eval_dataset_name agent-lingoace-zh-400-choice.jsonl \
+--client "us_west(47.88.76.239)" \
+--create_time_str 20250723-interval-10 \
+--interval 10
+    """
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "--model_name",
+        # default="ai21.jamba-1-5-large-v1:0",
+        # default="ai21.jamba-1-5-mini-v1:0",
+        # default="amazon.nova-canvas-v1:0",
+        # default="amazon.nova-premier-v1:0",
+        # default="amazon.nova-pro-v1:0",
+        # default="amazon.nova-lite-v1:0",
+        default="amazon.nova-micro-v1:0",
+        # default="amazon.nova-reel-v1:0",
+        # default="amazon.nova-reel-v1:1",
+        # default="amazon.nova-sonic-v1:0",
+        type=str
+    )
+    parser.add_argument(
+        "--eval_dataset_name",
+        # default="agent-bingoplus-ph-90-choice.jsonl",
+        default="agent-lingoace-zh-400-choice.jsonl",
+        # default="arc-easy-1000-choice.jsonl",
+        type=str
+    )
+    parser.add_argument(
+        "--eval_dataset_dir",
+        default=(project_path / "data/dataset").as_posix(),
+        type=str
+    )
+    parser.add_argument(
+        "--eval_data_dir",
+        default=(project_path / "data/eval_data").as_posix(),
+        type=str
+    )
+    parser.add_argument(
+        "--client",
+        default="shenzhen_sase",
+        type=str
+    )
+    parser.add_argument(
+        "--service",
+        default="aws_us_east",
+        type=str
+    )
+    parser.add_argument(
+        "--create_time_str",
+        default="null",
+        type=str
+    )
+    parser.add_argument(
+        "--interval",
+        default=10,
+        type=int
+    )
+    args = parser.parse_args()
+    return args
+def main():
+    args = get_args()
+    service = environment.get(key=args.service, dtype=json.loads)
+    aws_access_key_id = service["AWS_ACCESS_KEY_ID"]
+    aws_secret_access_key = service["AWS_SECRET_ACCESS_KEY"]
+    aws_default_region = service["AWS_DEFAULT_REGION"]
+    os.environ["AWS_ACCESS_KEY_ID"] = aws_access_key_id
+    os.environ["AWS_SECRET_ACCESS_KEY"] = aws_secret_access_key
+    os.environ["AWS_DEFAULT_REGION"] = aws_default_region
+    client = boto3.client(
+        service_name="bedrock-runtime",
+        region_name=aws_default_region
+    )
+    eval_dataset_dir = Path(args.eval_dataset_dir)
+    eval_dataset_dir.mkdir(parents=True, exist_ok=True)
+    eval_data_dir = Path(args.eval_data_dir)
+    eval_data_dir.mkdir(parents=True, exist_ok=True)
+    if args.create_time_str == "null":
+        tz = ZoneInfo("Asia/Shanghai")
+        now = datetime.now(tz)
+        create_time_str = now.strftime("%Y%m%d_%H%M%S")
+        # create_time_str = "20250722_173400"
+    else:
+        create_time_str = args.create_time_str
+    eval_dataset = eval_dataset_dir / args.eval_dataset_name
+    model_name_ = args.model_name
+    model_name_ = model_name_.replace("/", "#")
+    model_name_ = model_name_.replace(":", "-")
+    output_file = eval_data_dir / f"aws/aws/{model_name_}/{args.client}/{args.service}/{create_time_str}/{args.eval_dataset_name}"
+    output_file.parent.mkdir(parents=True, exist_ok=True)
+    total = 0
+    total_correct = 0
+    # finished
+    finished_idx_set = set()
+    if os.path.exists(output_file.as_posix()):
+        with open(output_file.as_posix(), "r", encoding="utf-8") as f:
+            for row in f:
+                row = json.loads(row)
+                idx = row["idx"]
+                total = row["total"]
+                total_correct = row["total_correct"]
+                finished_idx_set.add(idx)
+    print(f"finished count: {len(finished_idx_set)}")
+    with open(eval_dataset.as_posix(), "r", encoding="utf-8") as fin, open(output_file.as_posix(), "a+", encoding="utf-8") as fout:
+        for row in fin:
+            row = json.loads(row)
+            idx = row["idx"]
+            prompt = row["prompt"]
+            response = row["response"]
+            if idx in finished_idx_set:
+                continue
+            finished_idx_set.add(idx)
+            body = {
+                "schemaVersion": "messages-v1",
+                "messages": [
+                    {
+                        "role": "user",
+                        "content": [{"text": prompt}]
+                    }
+                ],
+                "inferenceConfig": {
+                    "maxTokens": 1,
+                    "temperature": 0.5,
+                    "topP": 0.95,
+                    # 可选 topK 如果用额外字段
+                }
+            }
+            try:
+                # client.converse()
+                time.sleep(args.interval)
+                print(f"sleep: {args.interval}")
+                time_begin = time.time()
+                llm_response = client.invoke_model(
+                    modelId=args.model_name,
+                    body=json.dumps(body),
+                    contentType="application/json"
+                )
+                llm_response = json.loads(llm_response["body"].read())
+                # print(result['content'][0]['text'])
+                time_cost = time.time() - time_begin
+                print(f"time_cost: {time_cost}")
+            except Exception as e:
+                print(f"request failed, error type: {type(e)}, error text: {str(e)}")
+                continue
+            llm_response = llm_response["output"]["message"]
+            prediction = llm_response["content"][0]["text"]
+            correct = 1 if prediction == response else 0
+            total += 1
+            total_correct += correct
+            score = total_correct / total
+            row_ = {
+                "idx": idx,
+                "prompt": prompt,
+                "response": response,
+                "prediction": prediction,
+                "correct": correct,
+                "total": total,
+                "total_correct": total_correct,
+                "score": score,
+                "time_cost": time_cost,
+            }
+            row_ = json.dumps(row_, ensure_ascii=False)
+            fout.write(f"{row_}\n")
+            fout.flush()
+    return
+if __name__ == "__main__":
+    main()

llm_eval_script/google_anthropic.py CHANGED Viewed

@@ -27,8 +27,17 @@ def get_args():
     parser = argparse.ArgumentParser()
     parser.add_argument(
         "--model_name",
-        default="claude-opus-4@20250514",
         # default="claude-sonnet-4@20250514",
         type=str
     )
     parser.add_argument(

     parser = argparse.ArgumentParser()
     parser.add_argument(
         "--model_name",
+        # default="claude-opus-4-1@20250805",
+        # default="claude-opus-4@20250514",
         # default="claude-sonnet-4@20250514",
+        # default="claude-3-7-sonnet@20250219",
+        # default="claude-3-5-haiku@20241022",
+        # default="claude-3-5-sonnet-v2@20241022",
+        # default="claude-3-opus@20240229",
+        # default="claude-3-5-sonnet@20240620",
+        default="claude-3-haiku@20240307",
         type=str
     )
     parser.add_argument(

main.py CHANGED Viewed

@@ -146,6 +146,7 @@ def load_board():
         if total == 0:
             continue
         score = np.mean(score_list)
         time_cost_mean = np.mean(time_cost_list)
         time_cost_var = np.var(time_cost_list)
@@ -158,6 +159,7 @@ def load_board():
             "model_name": model_name,
             "dataset": dataset,
             "score": round(score, 4),
             "time_cost(mean)": round(time_cost_mean, 4),
             "time_cost(var)": round(time_cost_var, 4),
             "time_cost(75%)": round(time_cost_p75, 4),
@@ -238,6 +240,7 @@ def when_click_view_chat_button(filename: str):
 board_columns_choices = [
     "company", "model_name", "dataset", "score",
     "time_cost(mean)",
     "time_cost(var)",
     "time_cost(75%)", "time_cost(95%)", "time_cost(99%)",

         if total == 0:
             continue
         score = np.mean(score_list)
+        time_cost_min = np.min(time_cost_list)
         time_cost_mean = np.mean(time_cost_list)
         time_cost_var = np.var(time_cost_list)
             "model_name": model_name,
             "dataset": dataset,
             "score": round(score, 4),
+            "time_cost(min)": round(time_cost_min, 4),
             "time_cost(mean)": round(time_cost_mean, 4),
             "time_cost(var)": round(time_cost_var, 4),
             "time_cost(75%)": round(time_cost_p75, 4),
 board_columns_choices = [
     "company", "model_name", "dataset", "score",
+    "time_cost(min)",
     "time_cost(mean)",
     "time_cost(var)",
     "time_cost(75%)", "time_cost(95%)", "time_cost(99%)",

requirements.txt CHANGED Viewed

@@ -9,3 +9,7 @@ smithy-aws-core>=0.0.1
 aws_sdk_bedrock_runtime
 boto3
 anthropic

 aws_sdk_bedrock_runtime
 boto3
 anthropic
+alibabacloud_kms20160120
+alibabacloud_credentials
+alibabacloud_tea_openapi
+alibabacloud_tea_util

toolbox/aliyun_kms/__init__.py ADDED Viewed

	@@ -0,0 +1,6 @@

+#!/usr/bin/python3
+# -*- coding: utf-8 -*-
+if __name__ == "__main__":
+    pass

toolbox/aliyun_kms/aliyun_kms.py ADDED Viewed

	@@ -0,0 +1,122 @@

+#!/usr/bin/python3
+# -*- coding: utf-8 -*-
+import argparse
+import json
+import os
+from alibabacloud_kms20160120.client import Client as Kms20160120Client
+from alibabacloud_credentials.client import Client as CredentialClient
+from alibabacloud_kms20160120.models import GetSecretValueResponse
+from alibabacloud_tea_openapi import models as open_api_models
+from alibabacloud_kms20160120 import models as kms_20160120_models
+from alibabacloud_tea_util import models as util_models
+from alibabacloud_tea_util.client import Client as UtilClient
+from alibabacloud_credentials.models import Config
+class AliyunKMS(object):
+    """
+    https://help.aliyun.com/zh/sdk/developer-reference/v2-manage-python-access-credentials
+    """
+    def __init__(self,
+                 access_key_id: str = None,
+                 access_key_secret: str = None,
+                 endpoint: str = "kms.ap-southeast-1.aliyuncs.com",
+                 ):
+        self.access_key_id = access_key_id
+        self.access_key_secret = access_key_secret
+        self.endpoint = endpoint
+        self.client = self.get_client()
+    def get_client(self):
+        credential = CredentialClient(
+            config=Config(
+                type="access_key",
+                access_key_id=self.access_key_id,
+                access_key_secret=self.access_key_secret,
+            )
+        )
+        config = open_api_models.Config(
+            credential=credential
+        )
+        # Endpoint 请参考 https://api.aliyun.com/product/Kms
+        config.endpoint = self.endpoint
+        client = Kms20160120Client(config)
+        return client
+    def create_secret(self, secret_name: str, secret_data: str, version_id: str):
+        """
+        https://next.api.aliyun.com/api/Kms/2016-01-20/CreateSecret
+        """
+        create_secret_request = kms_20160120_models.CreateSecretRequest(
+            secret_name=secret_name,
+            secret_data=secret_data,
+            version_id=version_id,
+        )
+        runtime = util_models.RuntimeOptions()
+        result = self.client.create_secret_with_options(create_secret_request, runtime)
+        return result
+    def get_secret_value(self, secret_name: str, version_id: str = None):
+        """
+        https://next.api.aliyun.com/api/Kms/2016-01-20/GetSecretValue
+        """
+        get_secret_value_request = kms_20160120_models.GetSecretValueRequest(
+            secret_name=secret_name,
+            version_id=version_id,
+        )
+        runtime = util_models.RuntimeOptions()
+        response: GetSecretValueResponse = self.client.get_secret_value_with_options(get_secret_value_request, runtime)
+        js = response.to_map()
+        return js
+    async def async_get_secret_value(self, secret_name: str, version_id: str = None):
+        """
+        https://next.api.aliyun.com/api/Kms/2016-01-20/GetSecretValue
+        """
+        get_secret_value_request = kms_20160120_models.GetSecretValueRequest(
+            secret_name=secret_name,
+            version_id=version_id,
+        )
+        runtime = util_models.RuntimeOptions()
+        response: GetSecretValueResponse = await self.client.get_secret_value_with_options_async(get_secret_value_request, runtime)
+        js = response.to_map()
+        return js
+def main():
+    from settings import environment
+    access_key_id = environment.get(key="ALIBABA_CLOUD_ACCESS_KEY_ID", dtype=str)
+    access_key_secret = environment.get(key="ALIBABA_CLOUD_ACCESS_KEY_SECRET", dtype=str)
+    print(f"access_key_id: {access_key_id}")
+    print(f"access_key_secret: {access_key_secret}")
+    # os.environ["ALIBABA_CLOUD_ACCESS_KEY_ID"] = access_key_id
+    # os.environ["ALIBABA_CLOUD_ACCESS_KEY_SECRET"] = access_key_secret
+    manager = AliyunKMS(
+        access_key_id=access_key_id,
+        access_key_secret=access_key_secret,
+    )
+    # result = manager.get_secret_value(
+    #     secret_name="azure-east-asia-asr-dev",
+    #     version_id="v1",
+    # )
+    # print(result)
+    result = manager.get_secret_value(
+        secret_name="aliyun-nxai123-oss-dev",
+        # version_id="d5b82ac1ee63d748b25bf7be6c75695e",
+    )
+    print(result)
+    return
+if __name__ == "__main__":
+    main()