# # from openai import OpenAI # # def test_local_llm(port: int, prompt: str, model_name: str = "qwen"): # # # 初始化本地模型客户端 # # client = OpenAI( # # api_key="EMPTY", # 本地模型通常不校验API Key # # base_url=f"http://localhost:{port}/v1" # 拼接你的端口 # # ) # # try: # # # 发起 chat completion 请求 # # response = client.chat.completions.create( # # model=model_name, # # messages=[{"role": "user", "content": prompt}], # # max_tokens=1024, # # temperature=0 # 可调节 # # ) # # # 提取返回结果 # # generated_text = response.choices[0].message.content.strip() # # print("生成结果:") # # print(generated_text) # # except Exception as e: # # print(f"生成失败:{e}") # # # 示例调用 # # if __name__ == "__main__": # # test_prompt = f'''Swiss-Prot description: # # MDNTIPGGINITILIPNLMIIIFGLVGLTGNGIVFWLLGFCLHRNAFSVYILNLALADFFFLLGHIIDSILLLLNVFYPITFLLCFYTIMMVLYIAGLSMLSAISTERCLSVLCPIWYHCHRPEHTSTVMCAVIWVLSLLICILNSYFCGFLNTQYKNENGCLALNFFTAAYLMFLFVVLCLSSLALVARLFCGTGQIKLTRLYVTIILSILVFLLCGLPFGIHWFLLFKIKDDFHVFDLGFYLASVVLTAINSCANPIIYFFVGSFRHRLKHQTLKMVLQNALQDTPETAKIMVEMSRSKSEP # # Describe the given protein.''' # # test_local_llm(port=8000, prompt=test_prompt) # # import json # # from openai import OpenAI # # from tqdm import tqdm # # def test_local_llm_batch(input_file: str, output_file: str, port: int, model_name: str = "qwen"): # # # 初始化本地模型客户端 # # client = OpenAI( # # api_key="EMPTY", # 本地服务通常不校验 # # base_url=f"http://localhost:{port}/v1" # # ) # # results = [] # # # 逐行读取 JSONL 文件 # # with open(input_file, "r", encoding="utf-8") as f: # # lines = f.readlines() # # for line in tqdm(lines, desc="生成中"): # # item = json.loads(line) # # question = item.get("question", "").strip() # # reference_answer = item.get("answer", "").strip() # # try: # # response = client.chat.completions.create( # # model=model_name, # # messages=[{"role": "user", "content": question}], # # max_tokens=1024, # # temperature=0 # # ) # # generated_answer = response.choices[0].message.content.strip() # # except Exception as e: # # print(f"生成失败:{e}") # # generated_answer = "" # # # 保存原始和生成结果 # # results.append({ # # "reference_answer": reference_answer, # # "generated_answer": genera , ted_answer # # }) # # # 写入输出 JSONL 文件 # # with open(output_file, "w", encoding="utf-8") as f: # # for item in results: # # f.write(json.dumps(item, ensure_ascii=False) + "\n") # # print(f"生成完成,结果已保存至 {output_file}") # # # 示例调用 # # if __name__ == "__main__": # # input_path = "/nas/shared/kilab/wangyujia/DeepLocBinary_test.jsonl" # 输入JSONL路径 # # output_path = "/nas/shared/kilab/wangyujia/BIO/ablation/DeepLocBinary_test.jsonl" # 输出结果路径 # # test_local_llm_batch(input_file=input_path, output_file=output_path, port=8000) # # import json # # # 路径替换为你的 jsonl 文件路径 # # file_path = '/nas/shared/kilab/wangyujia/BIO/ablation/material_production_test.jsonl' # # total = 0 # # correct = 0 # # with open(file_path, 'r', encoding='utf-8') as f: # # for line in f: # # data = json.loads(line) # # total += 1 # # if data['reference_answer'] == data['generated_answer']: # # correct += 1 # # accuracy = correct / total if total > 0 else 0 # # print(f'准确率: {accuracy:.4f} ({correct}/{total})') # import csv # import json # from openai import OpenAI # from tqdm import tqdm # def csv_to_jsonl(csv_file: str, jsonl_file: str): # """ # 从CSV中提取aa_seq和label,构造JSONL文件作为LLM输入。 # """ # data = [] # # 根据label具体含义修改 # with open(csv_file, "r", encoding="utf-8") as f: # reader = csv.DictReader(f) # for row in reader: # aa_seq = row["aa_seq"].strip() # label = row["label"].strip() # #smiles=row["smiles"].strip() # # 构造prompt # prompt = f""" # 【Task】Predict the thermostability value of the given protein {aa_seq}. # 【Background】Thermostability refers to the ability of a molecule to resist irreversible chemical or physical changes at high temperatures, such as decomposition or aggregation. # 【Output Format】Provide the predicted thermostability as a numeric value (e.g., melting temperature in °C). Wrap your answer in tags. # """ # # 构造JSONL项 # item = { # "question": prompt, # "answer": label # } # data.append(item) # data=data[:1500] # # 写入JSONL文件 # with open(jsonl_file, "w", encoding="utf-8") as f: # for item in data: # f.write(json.dumps(item, ensure_ascii=False) + "\n") # print(f"[INFO] JSONL文件已保存至: {jsonl_file}") # def test_local_llm_batch(input_file: str, output_file: str, port: int, model_name: str = "qwen"): # """ # 本地LLM批量推理,读取JSONL,生成并保存结果。 # """ # client = OpenAI( # api_key="EMPTY", # 本地部署模型时通常不验证API Key # base_url=f"http://localhost:{port}/v1" # ) # results = [] # with open(input_file, "r", encoding="utf-8") as f: # lines = f.readlines() # for line in tqdm(lines, desc="模型推理中"): # item = json.loads(line) # question = item.get("question", "").strip() # reference_answer = item.get("answer", "").strip() # # prompt=f'''Based on the input, directly provide the predicted numerical value(s) and place the result inside .... # # Format: your predicted value. Do not include any explanation or analysis—only the number(s).''' # prompt = '''Based on the input, directly provide the predicted value, which must be either 0 or 1. # Place your answer inside .... # Format: 0 or 1. Do not include any explanation or analysis—only the number.''' # question+=prompt # try: # response = client.chat.completions.create( # model=model_name, # messages=[{"role": "user", "content": question}], # max_tokens=1024, # temperature=0 # ) # generated_answer = response.choices[0].message.content.strip() # except Exception as e: # print(f"[ERROR] 推理失败:{e}") # generated_answer = "" # results.append({ # "reference_answer": reference_answer, # "generated_answer": generated_answer # }) # with open(output_file, "w", encoding="utf-8") as f: # for item in results: # f.write(json.dumps(item, ensure_ascii=False) + "\n") # print(f"[INFO] 推理完成,结果保存至: {output_file}") # # ========= 示例调用入口 ========= # if __name__ == "__main__": # #csv_path = "/oss/wangyujia/ProtT3/ProtT3/data/sft/dataset/temperature_stability/test.csv" # 替换为你的CSV路径 # jsonl_input_path = "/nas/shared/kilab/wangyujia/temperature_stability.jsonl" # jsonl_output_path = "/nas/shared/kilab/wangyujia/BIO/ablation/temperature_stability.jsonl" # # 步骤1:CSV → JSONL # #csv_to_jsonl(csv_path, jsonl_input_path) # # 步骤2:JSONL → 推理 → 保存结果 # test_local_llm_batch(input_file=jsonl_input_path, output_file=jsonl_output_path, port=8000) # # prompt = f""" # # 【Protein sequence (1-letter amino acid codes)】:{aa_seq}【Ligand structure (SMILES)】:{smiles} # # Task: Evaluate the inhibitory effect of the ligand on the given protein. # # Based on the provided protein and ligand, predict the inhibitory strength by classifying the IC50 level.as a numeric value,Wrap your answer in tags. # # """ from openai import OpenAI # 初始化本地服务客户端 client = OpenAI( api_key="EMPTY", # 本地部署时通常无需密钥 base_url="http://localhost:8000/v1" # 替换为你自己的服务地址 ) # 构造问题及提示 prompt=f'''protein sequence:MVKVKSKNSVIKLLSTAASGYSRYISIKKGAPLVTQVRYDPVVKRHVLFKEAKKRKVAERKPLDFLRTAK. According to the protein information provided , predict the most likely subcellular localization from the following options:\nOptions: 0. \"Nucleus, U\" \n 1. \"Cytoplasm, S\" \n 2. \"Extracellular, S\" \n 3. \"Mitochondrion, U\" \n 4. \"Cell membrane, M\" \n 5. \"Endoplasmic reticulum, M\" \n 6. \"Plastid, S\" \n 7. \"Golgi apparatus, M\" \n 8. \"Lysosome/Vacuole, M\" \n9. \"Peroxisome, U\"\n\n Think briefly about the question and then give the correct answer inside ''' # 发送请求,生成回答 response = client.chat.completions.create( model="qwen", # 替换为你的模型名称 messages=[{"role": "user", "content": prompt}], max_tokens=512, temperature=0.7 ) # 输出模型的完整回答 print(response.choices[0].message.content)