File size: 9,641 Bytes
f2364fe
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
# # from openai import OpenAI

# # def test_local_llm(port: int, prompt: str, model_name: str = "qwen"):
# #     # 初始化本地模型客户端
# #     client = OpenAI(
# #         api_key="EMPTY",  # 本地模型通常不校验API Key
# #         base_url=f"http://localhost:{port}/v1"  # 拼接你的端口
# #     )

# #     try:
# #         # 发起 chat completion 请求
# #         response = client.chat.completions.create(
# #             model=model_name,
# #             messages=[{"role": "user", "content": prompt}],
# #             max_tokens=1024,
# #             temperature=0  # 可调节
# #         )

# #         # 提取返回结果
# #         generated_text = response.choices[0].message.content.strip()
# #         print("生成结果:")
# #         print(generated_text)
# #     except Exception as e:
# #         print(f"生成失败:{e}")

# # # 示例调用
# # if __name__ == "__main__":
# #     test_prompt = f'''Swiss-Prot description: 
# #     MDNTIPGGINITILIPNLMIIIFGLVGLTGNGIVFWLLGFCLHRNAFSVYILNLALADFFFLLGHIIDSILLLLNVFYPITFLLCFYTIMMVLYIAGLSMLSAISTERCLSVLCPIWYHCHRPEHTSTVMCAVIWVLSLLICILNSYFCGFLNTQYKNENGCLALNFFTAAYLMFLFVVLCLSSLALVARLFCGTGQIKLTRLYVTIILSILVFLLCGLPFGIHWFLLFKIKDDFHVFDLGFYLASVVLTAINSCANPIIYFFVGSFRHRLKHQTLKMVLQNALQDTPETAKIMVEMSRSKSEP
# #     Describe the given protein.'''
# #     test_local_llm(port=8000, prompt=test_prompt)

# # import json
# # from openai import OpenAI
# # from tqdm import tqdm

# # def test_local_llm_batch(input_file: str, output_file: str, port: int, model_name: str = "qwen"):
# #     # 初始化本地模型客户端
# #     client = OpenAI(
# #         api_key="EMPTY",  # 本地服务通常不校验
# #         base_url=f"http://localhost:{port}/v1"
# #     )

# #     results = []

# #     # 逐行读取 JSONL 文件
# #     with open(input_file, "r", encoding="utf-8") as f:
# #         lines = f.readlines()

# #     for line in tqdm(lines, desc="生成中"):
# #         item = json.loads(line)
# #         question = item.get("question", "").strip()
# #         reference_answer = item.get("answer", "").strip()

# #         try:
# #             response = client.chat.completions.create(
# #                 model=model_name,
# #                 messages=[{"role": "user", "content": question}],
# #                 max_tokens=1024,
# #                 temperature=0
# #             )
# #             generated_answer = response.choices[0].message.content.strip()
# #         except Exception as e:
# #             print(f"生成失败:{e}")
# #             generated_answer = ""

# #         # 保存原始和生成结果
# #         results.append({
# #             "reference_answer": reference_answer,
# #             "generated_answer": genera  ,  ted_answer
# #         })

# #     # 写入输出 JSONL 文件
# #     with open(output_file, "w", encoding="utf-8") as f:
# #         for item in results:
# #             f.write(json.dumps(item, ensure_ascii=False) + "\n")

# #     print(f"生成完成,结果已保存至 {output_file}")

# # # 示例调用
# # if __name__ == "__main__":
# #     input_path = "/nas/shared/kilab/wangyujia/DeepLocBinary_test.jsonl"   # 输入JSONL路径
# #     output_path = "/nas/shared/kilab/wangyujia/BIO/ablation/DeepLocBinary_test.jsonl"  # 输出结果路径
# #     test_local_llm_batch(input_file=input_path, output_file=output_path, port=8000)


# # import json

# # # 路径替换为你的 jsonl 文件路径
# # file_path = '/nas/shared/kilab/wangyujia/BIO/ablation/material_production_test.jsonl'

# # total = 0
# # correct = 0

# # with open(file_path, 'r', encoding='utf-8') as f:
# #     for line in f:
# #         data = json.loads(line)
# #         total += 1
# #         if data['reference_answer'] == data['generated_answer']:
# #             correct += 1

# # accuracy = correct / total if total > 0 else 0
# # print(f'准确率: {accuracy:.4f} ({correct}/{total})')


# import csv
# import json
# from openai import OpenAI
# from tqdm import tqdm

# def csv_to_jsonl(csv_file: str, jsonl_file: str):
#     """
#     从CSV中提取aa_seq和label,构造JSONL文件作为LLM输入。
#     """
#     data = []
#       # 根据label具体含义修改

#     with open(csv_file, "r", encoding="utf-8") as f:
#         reader = csv.DictReader(f)
#         for row in reader:
#             aa_seq = row["aa_seq"].strip()
#             label = row["label"].strip()
#             #smiles=row["smiles"].strip()

#             # 构造prompt
            
#             prompt = f"""
# 【Task】Predict the thermostability value of the given protein {aa_seq}.
# 【Background】Thermostability refers to the ability of a molecule to resist irreversible chemical or physical changes at high temperatures, such as decomposition or aggregation.
# 【Output Format】Provide the predicted thermostability as a numeric value (e.g., melting temperature in °C). Wrap your answer in <answer></answer> tags.  

# """
#             # 构造JSONL项
#             item = {
#                 "question": prompt,
#                 "answer": label
#             }
#             data.append(item)
#     data=data[:1500]
#     # 写入JSONL文件
#     with open(jsonl_file, "w", encoding="utf-8") as f:
#         for item in data:
#             f.write(json.dumps(item, ensure_ascii=False) + "\n")

#     print(f"[INFO] JSONL文件已保存至: {jsonl_file}")


# def test_local_llm_batch(input_file: str, output_file: str, port: int, model_name: str = "qwen"):
#     """
#     本地LLM批量推理,读取JSONL,生成并保存结果。
#     """
#     client = OpenAI(
#         api_key="EMPTY",  # 本地部署模型时通常不验证API Key
#         base_url=f"http://localhost:{port}/v1"
#     )

#     results = []

#     with open(input_file, "r", encoding="utf-8") as f:
#         lines = f.readlines()

#     for line in tqdm(lines, desc="模型推理中"):
#         item = json.loads(line)
#         question = item.get("question", "").strip()
#         reference_answer = item.get("answer", "").strip()
# #         prompt=f'''Based on the input, directly provide the predicted numerical value(s) and place the result inside <answer>...</answer>.
# # Format: <answer>your predicted value</answer>. Do not include any explanation or analysis—only the number(s).'''
#         prompt = '''Based on the input, directly provide the predicted value, which must be either 0 or 1. 
# Place your answer inside <answer>...</answer>. 
# Format: <answer>0</answer> or <answer>1</answer>. Do not include any explanation or analysis—only the number.'''

#         question+=prompt
#         try:
#             response = client.chat.completions.create(
#                 model=model_name,
#                 messages=[{"role": "user", "content": question}],
#                 max_tokens=1024,
#                 temperature=0
#             )
#             generated_answer = response.choices[0].message.content.strip()
#         except Exception as e:
#             print(f"[ERROR] 推理失败:{e}")
#             generated_answer = ""

#         results.append({
#             "reference_answer": reference_answer,
#             "generated_answer": generated_answer
#         })

#     with open(output_file, "w", encoding="utf-8") as f:
#         for item in results:
#             f.write(json.dumps(item, ensure_ascii=False) + "\n")

#     print(f"[INFO] 推理完成,结果保存至: {output_file}")


# # ========= 示例调用入口 =========
# if __name__ == "__main__":
#     #csv_path = "/oss/wangyujia/ProtT3/ProtT3/data/sft/dataset/temperature_stability/test.csv"  # 替换为你的CSV路径
#     jsonl_input_path = "/nas/shared/kilab/wangyujia/temperature_stability.jsonl"
#     jsonl_output_path = "/nas/shared/kilab/wangyujia/BIO/ablation/temperature_stability.jsonl"

#     # 步骤1:CSV → JSONL
#     #csv_to_jsonl(csv_path, jsonl_input_path)

#     # 步骤2:JSONL → 推理 → 保存结果
#     test_local_llm_batch(input_file=jsonl_input_path, output_file=jsonl_output_path, port=8000)


# # prompt = f"""
# #                 【Protein sequence (1-letter amino acid codes)】:{aa_seq}【Ligand structure (SMILES)】:{smiles}
# #                 Task: Evaluate the inhibitory effect of the ligand on the given protein.
# #                 Based on the provided protein and ligand, predict the inhibitory strength by classifying the IC50 level.as a numeric value,Wrap your answer in <answer></answer> tags. 
# #                 """



from openai import OpenAI

# 初始化本地服务客户端
client = OpenAI(
    api_key="EMPTY",  # 本地部署时通常无需密钥
    base_url="http://localhost:8000/v1"  # 替换为你自己的服务地址
)

# 构造问题及提示

prompt=f'''protein sequence:MVKVKSKNSVIKLLSTAASGYSRYISIKKGAPLVTQVRYDPVVKRHVLFKEAKKRKVAERKPLDFLRTAK. According to the protein information provided , predict the most likely subcellular localization from the following options:\nOptions: 0. \"Nucleus, U\" \n 1. \"Cytoplasm, S\"  \n 2. \"Extracellular, S\"  \n 3. \"Mitochondrion, U\"  \n 4. \"Cell membrane, M\"  \n 5. \"Endoplasmic reticulum, M\"  \n 6. \"Plastid, S\"  \n 7. \"Golgi apparatus, M\"  \n  8. \"Lysosome/Vacuole, M\"  \n9. \"Peroxisome, U\"\n\n
Think briefly about the question and then give the correct answer inside <answer></answer>
'''

# 发送请求,生成回答
response = client.chat.completions.create(
    model="qwen",  # 替换为你的模型名称
    messages=[{"role": "user", "content": prompt}],
    max_tokens=512,
    temperature=0.7
)

# 输出模型的完整回答
print(response.choices[0].message.content)