File size: 9,641 Bytes
f2364fe | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 | # # from openai import OpenAI
# # def test_local_llm(port: int, prompt: str, model_name: str = "qwen"):
# # # 初始化本地模型客户端
# # client = OpenAI(
# # api_key="EMPTY", # 本地模型通常不校验API Key
# # base_url=f"http://localhost:{port}/v1" # 拼接你的端口
# # )
# # try:
# # # 发起 chat completion 请求
# # response = client.chat.completions.create(
# # model=model_name,
# # messages=[{"role": "user", "content": prompt}],
# # max_tokens=1024,
# # temperature=0 # 可调节
# # )
# # # 提取返回结果
# # generated_text = response.choices[0].message.content.strip()
# # print("生成结果:")
# # print(generated_text)
# # except Exception as e:
# # print(f"生成失败:{e}")
# # # 示例调用
# # if __name__ == "__main__":
# # test_prompt = f'''Swiss-Prot description:
# # MDNTIPGGINITILIPNLMIIIFGLVGLTGNGIVFWLLGFCLHRNAFSVYILNLALADFFFLLGHIIDSILLLLNVFYPITFLLCFYTIMMVLYIAGLSMLSAISTERCLSVLCPIWYHCHRPEHTSTVMCAVIWVLSLLICILNSYFCGFLNTQYKNENGCLALNFFTAAYLMFLFVVLCLSSLALVARLFCGTGQIKLTRLYVTIILSILVFLLCGLPFGIHWFLLFKIKDDFHVFDLGFYLASVVLTAINSCANPIIYFFVGSFRHRLKHQTLKMVLQNALQDTPETAKIMVEMSRSKSEP
# # Describe the given protein.'''
# # test_local_llm(port=8000, prompt=test_prompt)
# # import json
# # from openai import OpenAI
# # from tqdm import tqdm
# # def test_local_llm_batch(input_file: str, output_file: str, port: int, model_name: str = "qwen"):
# # # 初始化本地模型客户端
# # client = OpenAI(
# # api_key="EMPTY", # 本地服务通常不校验
# # base_url=f"http://localhost:{port}/v1"
# # )
# # results = []
# # # 逐行读取 JSONL 文件
# # with open(input_file, "r", encoding="utf-8") as f:
# # lines = f.readlines()
# # for line in tqdm(lines, desc="生成中"):
# # item = json.loads(line)
# # question = item.get("question", "").strip()
# # reference_answer = item.get("answer", "").strip()
# # try:
# # response = client.chat.completions.create(
# # model=model_name,
# # messages=[{"role": "user", "content": question}],
# # max_tokens=1024,
# # temperature=0
# # )
# # generated_answer = response.choices[0].message.content.strip()
# # except Exception as e:
# # print(f"生成失败:{e}")
# # generated_answer = ""
# # # 保存原始和生成结果
# # results.append({
# # "reference_answer": reference_answer,
# # "generated_answer": genera , ted_answer
# # })
# # # 写入输出 JSONL 文件
# # with open(output_file, "w", encoding="utf-8") as f:
# # for item in results:
# # f.write(json.dumps(item, ensure_ascii=False) + "\n")
# # print(f"生成完成,结果已保存至 {output_file}")
# # # 示例调用
# # if __name__ == "__main__":
# # input_path = "/nas/shared/kilab/wangyujia/DeepLocBinary_test.jsonl" # 输入JSONL路径
# # output_path = "/nas/shared/kilab/wangyujia/BIO/ablation/DeepLocBinary_test.jsonl" # 输出结果路径
# # test_local_llm_batch(input_file=input_path, output_file=output_path, port=8000)
# # import json
# # # 路径替换为你的 jsonl 文件路径
# # file_path = '/nas/shared/kilab/wangyujia/BIO/ablation/material_production_test.jsonl'
# # total = 0
# # correct = 0
# # with open(file_path, 'r', encoding='utf-8') as f:
# # for line in f:
# # data = json.loads(line)
# # total += 1
# # if data['reference_answer'] == data['generated_answer']:
# # correct += 1
# # accuracy = correct / total if total > 0 else 0
# # print(f'准确率: {accuracy:.4f} ({correct}/{total})')
# import csv
# import json
# from openai import OpenAI
# from tqdm import tqdm
# def csv_to_jsonl(csv_file: str, jsonl_file: str):
# """
# 从CSV中提取aa_seq和label,构造JSONL文件作为LLM输入。
# """
# data = []
# # 根据label具体含义修改
# with open(csv_file, "r", encoding="utf-8") as f:
# reader = csv.DictReader(f)
# for row in reader:
# aa_seq = row["aa_seq"].strip()
# label = row["label"].strip()
# #smiles=row["smiles"].strip()
# # 构造prompt
# prompt = f"""
# 【Task】Predict the thermostability value of the given protein {aa_seq}.
# 【Background】Thermostability refers to the ability of a molecule to resist irreversible chemical or physical changes at high temperatures, such as decomposition or aggregation.
# 【Output Format】Provide the predicted thermostability as a numeric value (e.g., melting temperature in °C). Wrap your answer in <answer></answer> tags.
# """
# # 构造JSONL项
# item = {
# "question": prompt,
# "answer": label
# }
# data.append(item)
# data=data[:1500]
# # 写入JSONL文件
# with open(jsonl_file, "w", encoding="utf-8") as f:
# for item in data:
# f.write(json.dumps(item, ensure_ascii=False) + "\n")
# print(f"[INFO] JSONL文件已保存至: {jsonl_file}")
# def test_local_llm_batch(input_file: str, output_file: str, port: int, model_name: str = "qwen"):
# """
# 本地LLM批量推理,读取JSONL,生成并保存结果。
# """
# client = OpenAI(
# api_key="EMPTY", # 本地部署模型时通常不验证API Key
# base_url=f"http://localhost:{port}/v1"
# )
# results = []
# with open(input_file, "r", encoding="utf-8") as f:
# lines = f.readlines()
# for line in tqdm(lines, desc="模型推理中"):
# item = json.loads(line)
# question = item.get("question", "").strip()
# reference_answer = item.get("answer", "").strip()
# # prompt=f'''Based on the input, directly provide the predicted numerical value(s) and place the result inside <answer>...</answer>.
# # Format: <answer>your predicted value</answer>. Do not include any explanation or analysis—only the number(s).'''
# prompt = '''Based on the input, directly provide the predicted value, which must be either 0 or 1.
# Place your answer inside <answer>...</answer>.
# Format: <answer>0</answer> or <answer>1</answer>. Do not include any explanation or analysis—only the number.'''
# question+=prompt
# try:
# response = client.chat.completions.create(
# model=model_name,
# messages=[{"role": "user", "content": question}],
# max_tokens=1024,
# temperature=0
# )
# generated_answer = response.choices[0].message.content.strip()
# except Exception as e:
# print(f"[ERROR] 推理失败:{e}")
# generated_answer = ""
# results.append({
# "reference_answer": reference_answer,
# "generated_answer": generated_answer
# })
# with open(output_file, "w", encoding="utf-8") as f:
# for item in results:
# f.write(json.dumps(item, ensure_ascii=False) + "\n")
# print(f"[INFO] 推理完成,结果保存至: {output_file}")
# # ========= 示例调用入口 =========
# if __name__ == "__main__":
# #csv_path = "/oss/wangyujia/ProtT3/ProtT3/data/sft/dataset/temperature_stability/test.csv" # 替换为你的CSV路径
# jsonl_input_path = "/nas/shared/kilab/wangyujia/temperature_stability.jsonl"
# jsonl_output_path = "/nas/shared/kilab/wangyujia/BIO/ablation/temperature_stability.jsonl"
# # 步骤1:CSV → JSONL
# #csv_to_jsonl(csv_path, jsonl_input_path)
# # 步骤2:JSONL → 推理 → 保存结果
# test_local_llm_batch(input_file=jsonl_input_path, output_file=jsonl_output_path, port=8000)
# # prompt = f"""
# # 【Protein sequence (1-letter amino acid codes)】:{aa_seq}【Ligand structure (SMILES)】:{smiles}
# # Task: Evaluate the inhibitory effect of the ligand on the given protein.
# # Based on the provided protein and ligand, predict the inhibitory strength by classifying the IC50 level.as a numeric value,Wrap your answer in <answer></answer> tags.
# # """
from openai import OpenAI
# 初始化本地服务客户端
client = OpenAI(
api_key="EMPTY", # 本地部署时通常无需密钥
base_url="http://localhost:8000/v1" # 替换为你自己的服务地址
)
# 构造问题及提示
prompt=f'''protein sequence:MVKVKSKNSVIKLLSTAASGYSRYISIKKGAPLVTQVRYDPVVKRHVLFKEAKKRKVAERKPLDFLRTAK. According to the protein information provided , predict the most likely subcellular localization from the following options:\nOptions: 0. \"Nucleus, U\" \n 1. \"Cytoplasm, S\" \n 2. \"Extracellular, S\" \n 3. \"Mitochondrion, U\" \n 4. \"Cell membrane, M\" \n 5. \"Endoplasmic reticulum, M\" \n 6. \"Plastid, S\" \n 7. \"Golgi apparatus, M\" \n 8. \"Lysosome/Vacuole, M\" \n9. \"Peroxisome, U\"\n\n
Think briefly about the question and then give the correct answer inside <answer></answer>
'''
# 发送请求,生成回答
response = client.chat.completions.create(
model="qwen", # 替换为你的模型名称
messages=[{"role": "user", "content": prompt}],
max_tokens=512,
temperature=0.7
)
# 输出模型的完整回答
print(response.choices[0].message.content)
|