nas / test.py

Add files using upload-large-folder tool

f2364fe verified 6 months ago

9.64 kB

	# # from openai import OpenAI

	# # def test_local_llm(port: int, prompt: str, model_name: str = "qwen"):
	# # # 初始化本地模型客户端
	# # client = OpenAI(
	# # api_key="EMPTY", # 本地模型通常不校验API Key
	# # base_url=f"http://localhost:{port}/v1" # 拼接你的端口
	# # )

	# # try:
	# # # 发起 chat completion 请求
	# # response = client.chat.completions.create(
	# # model=model_name,
	# # messages=[{"role": "user", "content": prompt}],
	# # max_tokens=1024,
	# # temperature=0 # 可调节
	# # )

	# # # 提取返回结果
	# # generated_text = response.choices[0].message.content.strip()
	# # print("生成结果：")
	# # print(generated_text)
	# # except Exception as e:
	# # print(f"生成失败：{e}")

	# # # 示例调用
	# # if __name__ == "__main__":
	# # test_prompt = f'''Swiss-Prot description:
	# # MDNTIPGGINITILIPNLMIIIFGLVGLTGNGIVFWLLGFCLHRNAFSVYILNLALADFFFLLGHIIDSILLLLNVFYPITFLLCFYTIMMVLYIAGLSMLSAISTERCLSVLCPIWYHCHRPEHTSTVMCAVIWVLSLLICILNSYFCGFLNTQYKNENGCLALNFFTAAYLMFLFVVLCLSSLALVARLFCGTGQIKLTRLYVTIILSILVFLLCGLPFGIHWFLLFKIKDDFHVFDLGFYLASVVLTAINSCANPIIYFFVGSFRHRLKHQTLKMVLQNALQDTPETAKIMVEMSRSKSEP
	# # Describe the given protein.'''
	# # test_local_llm(port=8000, prompt=test_prompt)

	# # import json
	# # from openai import OpenAI
	# # from tqdm import tqdm

	# # def test_local_llm_batch(input_file: str, output_file: str, port: int, model_name: str = "qwen"):
	# # # 初始化本地模型客户端
	# # client = OpenAI(
	# # api_key="EMPTY", # 本地服务通常不校验
	# # base_url=f"http://localhost:{port}/v1"
	# # )

	# # results = []

	# # # 逐行读取 JSONL 文件
	# # with open(input_file, "r", encoding="utf-8") as f:
	# # lines = f.readlines()

	# # for line in tqdm(lines, desc="生成中"):
	# # item = json.loads(line)
	# # question = item.get("question", "").strip()
	# # reference_answer = item.get("answer", "").strip()

	# # try:
	# # response = client.chat.completions.create(
	# # model=model_name,
	# # messages=[{"role": "user", "content": question}],
	# # max_tokens=1024,
	# # temperature=0
	# # )
	# # generated_answer = response.choices[0].message.content.strip()
	# # except Exception as e:
	# # print(f"生成失败：{e}")
	# # generated_answer = ""

	# # # 保存原始和生成结果
	# # results.append({
	# # "reference_answer": reference_answer,
	# # "generated_answer": genera ， ted_answer
	# # })

	# # # 写入输出 JSONL 文件
	# # with open(output_file, "w", encoding="utf-8") as f:
	# # for item in results:
	# # f.write(json.dumps(item, ensure_ascii=False) + "\n")

	# # print(f"生成完成，结果已保存至 {output_file}")

	# # # 示例调用
	# # if __name__ == "__main__":
	# # input_path = "/nas/shared/kilab/wangyujia/DeepLocBinary_test.jsonl" # 输入JSONL路径
	# # output_path = "/nas/shared/kilab/wangyujia/BIO/ablation/DeepLocBinary_test.jsonl" # 输出结果路径
	# # test_local_llm_batch(input_file=input_path, output_file=output_path, port=8000)


	# # import json

	# # # 路径替换为你的 jsonl 文件路径
	# # file_path = '/nas/shared/kilab/wangyujia/BIO/ablation/material_production_test.jsonl'

	# # total = 0
	# # correct = 0

	# # with open(file_path, 'r', encoding='utf-8') as f:
	# # for line in f:
	# # data = json.loads(line)
	# # total += 1
	# # if data['reference_answer'] == data['generated_answer']:
	# # correct += 1

	# # accuracy = correct / total if total > 0 else 0
	# # print(f'准确率: {accuracy:.4f} ({correct}/{total})')


	# import csv
	# import json
	# from openai import OpenAI
	# from tqdm import tqdm

	# def csv_to_jsonl(csv_file: str, jsonl_file: str):
	# """
	# 从CSV中提取aa_seq和label，构造JSONL文件作为LLM输入。
	# """
	# data = []
	# # 根据label具体含义修改

	# with open(csv_file, "r", encoding="utf-8") as f:
	# reader = csv.DictReader(f)
	# for row in reader:
	# aa_seq = row["aa_seq"].strip()
	# label = row["label"].strip()
	# #smiles=row["smiles"].strip()

	# # 构造prompt

	# prompt = f"""
	# 【Task】Predict the thermostability value of the given protein {aa_seq}.
	# 【Background】Thermostability refers to the ability of a molecule to resist irreversible chemical or physical changes at high temperatures, such as decomposition or aggregation.
	# 【Output Format】Provide the predicted thermostability as a numeric value (e.g., melting temperature in °C). Wrap your answer in <answer></answer> tags.

	# """
	# # 构造JSONL项
	# item = {
	# "question": prompt,
	# "answer": label
	# }
	# data.append(item)
	# data=data[:1500]
	# # 写入JSONL文件
	# with open(jsonl_file, "w", encoding="utf-8") as f:
	# for item in data:
	# f.write(json.dumps(item, ensure_ascii=False) + "\n")

	# print(f"[INFO] JSONL文件已保存至: {jsonl_file}")


	# def test_local_llm_batch(input_file: str, output_file: str, port: int, model_name: str = "qwen"):
	# """
	# 本地LLM批量推理，读取JSONL，生成并保存结果。
	# """
	# client = OpenAI(
	# api_key="EMPTY", # 本地部署模型时通常不验证API Key
	# base_url=f"http://localhost:{port}/v1"
	# )

	# results = []

	# with open(input_file, "r", encoding="utf-8") as f:
	# lines = f.readlines()

	# for line in tqdm(lines, desc="模型推理中"):
	# item = json.loads(line)
	# question = item.get("question", "").strip()
	# reference_answer = item.get("answer", "").strip()
	# # prompt=f'''Based on the input, directly provide the predicted numerical value(s) and place the result inside <answer>...</answer>.
	# # Format: <answer>your predicted value</answer>. Do not include any explanation or analysis—only the number(s).'''
	# prompt = '''Based on the input, directly provide the predicted value, which must be either 0 or 1.
	# Place your answer inside <answer>...</answer>.
	# Format: <answer>0</answer> or <answer>1</answer>. Do not include any explanation or analysis—only the number.'''

	# question+=prompt
	# try:
	# response = client.chat.completions.create(
	# model=model_name,
	# messages=[{"role": "user", "content": question}],
	# max_tokens=1024,
	# temperature=0
	# )
	# generated_answer = response.choices[0].message.content.strip()
	# except Exception as e:
	# print(f"[ERROR] 推理失败：{e}")
	# generated_answer = ""

	# results.append({
	# "reference_answer": reference_answer,
	# "generated_answer": generated_answer
	# })

	# with open(output_file, "w", encoding="utf-8") as f:
	# for item in results:
	# f.write(json.dumps(item, ensure_ascii=False) + "\n")

	# print(f"[INFO] 推理完成，结果保存至: {output_file}")


	# # ========= 示例调用入口 =========
	# if __name__ == "__main__":
	# #csv_path = "/oss/wangyujia/ProtT3/ProtT3/data/sft/dataset/temperature_stability/test.csv" # 替换为你的CSV路径
	# jsonl_input_path = "/nas/shared/kilab/wangyujia/temperature_stability.jsonl"
	# jsonl_output_path = "/nas/shared/kilab/wangyujia/BIO/ablation/temperature_stability.jsonl"

	# # 步骤1：CSV → JSONL
	# #csv_to_jsonl(csv_path, jsonl_input_path)

	# # 步骤2：JSONL → 推理 → 保存结果
	# test_local_llm_batch(input_file=jsonl_input_path, output_file=jsonl_output_path, port=8000)


	# # prompt = f"""
	# # 【Protein sequence (1-letter amino acid codes)】:{aa_seq}【Ligand structure (SMILES)】:{smiles}
	# # Task: Evaluate the inhibitory effect of the ligand on the given protein.
	# # Based on the provided protein and ligand, predict the inhibitory strength by classifying the IC50 level.as a numeric value,Wrap your answer in <answer></answer> tags.
	# # """



	from openai import OpenAI

	# 初始化本地服务客户端
	client = OpenAI(
	api_key="EMPTY", # 本地部署时通常无需密钥
	base_url="http://localhost:8000/v1" # 替换为你自己的服务地址
	)

	# 构造问题及提示

	prompt=f'''protein sequence:MVKVKSKNSVIKLLSTAASGYSRYISIKKGAPLVTQVRYDPVVKRHVLFKEAKKRKVAERKPLDFLRTAK. According to the protein information provided , predict the most likely subcellular localization from the following options:\nOptions: 0. \"Nucleus, U\" \n 1. \"Cytoplasm, S\" \n 2. \"Extracellular, S\" \n 3. \"Mitochondrion, U\" \n 4. \"Cell membrane, M\" \n 5. \"Endoplasmic reticulum, M\" \n 6. \"Plastid, S\" \n 7. \"Golgi apparatus, M\" \n 8. \"Lysosome/Vacuole, M\" \n9. \"Peroxisome, U\"\n\n
	Think briefly about the question and then give the correct answer inside <answer></answer>
	'''

	# 发送请求，生成回答
	response = client.chat.completions.create(
	model="qwen", # 替换为你的模型名称
	messages=[{"role": "user", "content": prompt}],
	max_tokens=512,
	temperature=0.7
	)

	# 输出模型的完整回答
	print(response.choices[0].message.content)