File size: 9,641 Bytes

f2364fe

# # from openai import OpenAI

# # def test_local_llm(port: int, prompt: str, model_name: str = "qwen"):
# #     # 初始化本地模型客户端
# #     client = OpenAI(
# #         api_key="EMPTY",  # 本地模型通常不校验API Key
# #         base_url=f"http://localhost:{port}/v1"  # 拼接你的端口
# #     )

# #     try:
# #         # 发起 chat completion 请求
# #         response = client.chat.completions.create(
# #             model=model_name,
# #             messages=[{"role": "user", "content": prompt}],
# #             max_tokens=1024,
# #             temperature=0  # 可调节
# #         )

# #         # 提取返回结果
# #         generated_text = response.choices[0].message.content.strip()
# #         print("生成结果：")
# #         print(generated_text)
# #     except Exception as e:
# #         print(f"生成失败：{e}")

# # # 示例调用
# # if __name__ == "__main__":
# #     test_prompt = f'''Swiss-Prot description: 
# #     MDNTIPGGINITILIPNLMIIIFGLVGLTGNGIVFWLLGFCLHRNAFSVYILNLALADFFFLLGHIIDSILLLLNVFYPITFLLCFYTIMMVLYIAGLSMLSAISTERCLSVLCPIWYHCHRPEHTSTVMCAVIWVLSLLICILNSYFCGFLNTQYKNENGCLALNFFTAAYLMFLFVVLCLSSLALVARLFCGTGQIKLTRLYVTIILSILVFLLCGLPFGIHWFLLFKIKDDFHVFDLGFYLASVVLTAINSCANPIIYFFVGSFRHRLKHQTLKMVLQNALQDTPETAKIMVEMSRSKSEP
# #     Describe the given protein.'''
# #     test_local_llm(port=8000, prompt=test_prompt)

# # import json
# # from openai import OpenAI
# # from tqdm import tqdm

# # def test_local_llm_batch(input_file: str, output_file: str, port: int, model_name: str = "qwen"):
# #     # 初始化本地模型客户端
# #     client = OpenAI(
# #         api_key="EMPTY",  # 本地服务通常不校验
# #         base_url=f"http://localhost:{port}/v1"
# #     )

# #     results = []

# #     # 逐行读取 JSONL 文件
# #     with open(input_file, "r", encoding="utf-8") as f:
# #         lines = f.readlines()

# #     for line in tqdm(lines, desc="生成中"):
# #         item = json.loads(line)
# #         question = item.get("question", "").strip()
# #         reference_answer = item.get("answer", "").strip()

# #         try:
# #             response = client.chat.completions.create(
# #                 model=model_name,
# #                 messages=[{"role": "user", "content": question}],
# #                 max_tokens=1024,
# #                 temperature=0
# #             )
# #             generated_answer = response.choices[0].message.content.strip()
# #         except Exception as e:
# #             print(f"生成失败：{e}")
# #             generated_answer = ""

# #         # 保存原始和生成结果
# #         results.append({
# #             "reference_answer": reference_answer,
# #             "generated_answer": genera  ，  ted_answer
# #         })

# #     # 写入输出 JSONL 文件
# #     with open(output_file, "w", encoding="utf-8") as f:
# #         for item in results:
# #             f.write(json.dumps(item, ensure_ascii=False) + "\n")

# #     print(f"生成完成，结果已保存至 {output_file}")

# # # 示例调用
# # if __name__ == "__main__":
# #     input_path = "/nas/shared/kilab/wangyujia/DeepLocBinary_test.jsonl"   # 输入JSONL路径
# #     output_path = "/nas/shared/kilab/wangyujia/BIO/ablation/DeepLocBinary_test.jsonl"  # 输出结果路径
# #     test_local_llm_batch(input_file=input_path, output_file=output_path, port=8000)


# # import json

# # # 路径替换为你的 jsonl 文件路径
# # file_path = '/nas/shared/kilab/wangyujia/BIO/ablation/material_production_test.jsonl'

# # total = 0
# # correct = 0

# # with open(file_path, 'r', encoding='utf-8') as f:
# #     for line in f:
# #         data = json.loads(line)
# #         total += 1
# #         if data['reference_answer'] == data['generated_answer']:
# #             correct += 1

# # accuracy = correct / total if total > 0 else 0
# # print(f'准确率: {accuracy:.4f} ({correct}/{total})')


# import csv
# import json
# from openai import OpenAI
# from tqdm import tqdm

# def csv_to_jsonl(csv_file: str, jsonl_file: str):
#     """
#     从CSV中提取aa_seq和label，构造JSONL文件作为LLM输入。
#     """
#     data = []
#       # 根据label具体含义修改

#     with open(csv_file, "r", encoding="utf-8") as f:
#         reader = csv.DictReader(f)
#         for row in reader:
#             aa_seq = row["aa_seq"].strip()
#             label = row["label"].strip()
#             #smiles=row["smiles"].strip()

#             # 构造prompt
            
#             prompt = f"""
# 【Task】Predict the thermostability value of the given protein {aa_seq}.
# 【Background】Thermostability refers to the ability of a molecule to resist irreversible chemical or physical changes at high temperatures, such as decomposition or aggregation.
# 【Output Format】Provide the predicted thermostability as a numeric value (e.g., melting temperature in °C). Wrap your answer in <answer></answer> tags.  

# """
#             # 构造JSONL项
#             item = {
#                 "question": prompt,
#                 "answer": label
#             }
#             data.append(item)
#     data=data[:1500]
#     # 写入JSONL文件
#     with open(jsonl_file, "w", encoding="utf-8") as f:
#         for item in data:
#             f.write(json.dumps(item, ensure_ascii=False) + "\n")

#     print(f"[INFO] JSONL文件已保存至: {jsonl_file}")


# def test_local_llm_batch(input_file: str, output_file: str, port: int, model_name: str = "qwen"):
#     """
#     本地LLM批量推理，读取JSONL，生成并保存结果。
#     """
#     client = OpenAI(
#         api_key="EMPTY",  # 本地部署模型时通常不验证API Key
#         base_url=f"http://localhost:{port}/v1"
#     )

#     results = []

#     with open(input_file, "r", encoding="utf-8") as f:
#         lines = f.readlines()

#     for line in tqdm(lines, desc="模型推理中"):
#         item = json.loads(line)
#         question = item.get("question", "").strip()
#         reference_answer = item.get("answer", "").strip()
# #         prompt=f'''Based on the input, directly provide the predicted numerical value(s) and place the result inside <answer>...</answer>.
# # Format: <answer>your predicted value</answer>. Do not include any explanation or analysis—only the number(s).'''
#         prompt = '''Based on the input, directly provide the predicted value, which must be either 0 or 1. 
# Place your answer inside <answer>...</answer>. 
# Format: <answer>0</answer> or <answer>1</answer>. Do not include any explanation or analysis—only the number.'''

#         question+=prompt
#         try:
#             response = client.chat.completions.create(
#                 model=model_name,
#                 messages=[{"role": "user", "content": question}],
#                 max_tokens=1024,
#                 temperature=0
#             )
#             generated_answer = response.choices[0].message.content.strip()
#         except Exception as e:
#             print(f"[ERROR] 推理失败：{e}")
#             generated_answer = ""

#         results.append({
#             "reference_answer": reference_answer,
#             "generated_answer": generated_answer
#         })

#     with open(output_file, "w", encoding="utf-8") as f:
#         for item in results:
#             f.write(json.dumps(item, ensure_ascii=False) + "\n")

#     print(f"[INFO] 推理完成，结果保存至: {output_file}")


# # ========= 示例调用入口 =========
# if __name__ == "__main__":
#     #csv_path = "/oss/wangyujia/ProtT3/ProtT3/data/sft/dataset/temperature_stability/test.csv"  # 替换为你的CSV路径
#     jsonl_input_path = "/nas/shared/kilab/wangyujia/temperature_stability.jsonl"
#     jsonl_output_path = "/nas/shared/kilab/wangyujia/BIO/ablation/temperature_stability.jsonl"

#     # 步骤1：CSV → JSONL
#     #csv_to_jsonl(csv_path, jsonl_input_path)

#     # 步骤2：JSONL → 推理 → 保存结果
#     test_local_llm_batch(input_file=jsonl_input_path, output_file=jsonl_output_path, port=8000)


# # prompt = f"""
# #                 【Protein sequence (1-letter amino acid codes)】:{aa_seq}【Ligand structure (SMILES)】:{smiles}
# #                 Task: Evaluate the inhibitory effect of the ligand on the given protein.
# #                 Based on the provided protein and ligand, predict the inhibitory strength by classifying the IC50 level.as a numeric value,Wrap your answer in <answer></answer> tags. 
# #                 """



from openai import OpenAI

# 初始化本地服务客户端
client = OpenAI(
    api_key="EMPTY",  # 本地部署时通常无需密钥
    base_url="http://localhost:8000/v1"  # 替换为你自己的服务地址
)

# 构造问题及提示

prompt=f'''protein sequence:MVKVKSKNSVIKLLSTAASGYSRYISIKKGAPLVTQVRYDPVVKRHVLFKEAKKRKVAERKPLDFLRTAK. According to the protein information provided , predict the most likely subcellular localization from the following options:\nOptions: 0. \"Nucleus, U\" \n 1. \"Cytoplasm, S\"  \n 2. \"Extracellular, S\"  \n 3. \"Mitochondrion, U\"  \n 4. \"Cell membrane, M\"  \n 5. \"Endoplasmic reticulum, M\"  \n 6. \"Plastid, S\"  \n 7. \"Golgi apparatus, M\"  \n  8. \"Lysosome/Vacuole, M\"  \n9. \"Peroxisome, U\"\n\n
Think briefly about the question and then give the correct answer inside <answer></answer>
'''

# 发送请求，生成回答
response = client.chat.completions.create(
    model="qwen",  # 替换为你的模型名称
    messages=[{"role": "user", "content": prompt}],
    max_tokens=512,
    temperature=0.7
)

# 输出模型的完整回答
print(response.choices[0].message.content)