Junaidb commited on
Commit
dacf721
·
verified ·
1 Parent(s): 073ab01

Create biological_context_language.py

Browse files
Files changed (1) hide show
  1. biological_context_language.py +491 -0
biological_context_language.py ADDED
@@ -0,0 +1,491 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from groq import Groq
2
+ from jsonschema import validate , ValidationError
3
+ import json
4
+ import time
5
+ from databaseengine import DatabaseEngine
6
+
7
+
8
+ de=DatabaseEngine()
9
+
10
+
11
+
12
+ client=Groq(api_key="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")
13
+
14
+
15
+
16
+ uniprot_sequence='''
17
+ FORMAT FOR retrieve_uniprot_sequence:
18
+ {{
19
+ "operation": "retrieve_uniprot_sequence",
20
+ "biological_inputs": {{
21
+ "gene_symbol": "HER2"
22
+ }},
23
+ }}
24
+ '''
25
+
26
+ BCL_TASK_FORMAT_FOR_EXP_V2="""
27
+ FORMAT FOR introduce_point_mutation:
28
+ {{
29
+ "operation": "introduce_point_mutation",
30
+ "biological_inputs": {{
31
+ "wildtype_sequence": "",
32
+ "mutation": "S310F"
33
+ }},
34
+ "depends": "retrieve_uniprot_sequence"
35
+ }}
36
+ FORMAT FOR predict_structure:
37
+ {{
38
+ "operation":"predict_structure",
39
+ "biological_inputs":{{
40
+ "sequence_for_structure":""
41
+ }}
42
+ "depends": "domain_determination"
43
+ }}
44
+ FORMAT FOR analyze_epitopes:
45
+ {{
46
+ "operation":"analyze_epitopes"
47
+ "biological_inputs":{{
48
+ "structure":""
49
+ }}
50
+ "depends": "predict_structure"
51
+ }}
52
+ FORMAT FOR domain_determination:
53
+ {{
54
+
55
+ "operation":"domain_determination",
56
+
57
+ "biological_inputs": {{
58
+ "sequence":"",
59
+
60
+ }},
61
+ "depends":"introduce_point_mutation"
62
+
63
+ }}
64
+ FORMAT FOR fetch_nanobody_template:
65
+ {{
66
+ "operation":"fetch_nanobody_template",
67
+ "biological_inputs":{{
68
+
69
+ "nanobody":""
70
+ }},
71
+ "depends":"None"
72
+
73
+ }}
74
+ FORMAT FOR observe_orient_decide_act_loop:
75
+ {{
76
+
77
+ "operation": "observe_orient_decide_act_loop",
78
+ "biological_inputs": {{
79
+ "sequence": "",
80
+ "raw_prompt": "<fill this with the actual high level bio query received from the user"
81
+ }},
82
+ "depends": "fetch_template_nanobody"
83
+ }}
84
+ FORMAT FOR nanobody_template_mutator:
85
+ {{
86
+
87
+ "operation":"nanobody_template_mutator",
88
+ "biological_inputs":{{
89
+
90
+ "sequence":""
91
+ }},
92
+ "depends":"observe_orient_decide_act_loop"
93
+
94
+ }}
95
+ FORMAT FOR engineer_nanobody:
96
+ {{
97
+ "operation":"engineer_nanobody",
98
+
99
+ "biological_inputs":{{
100
+ "template_sequence":""
101
+ }},
102
+ "depends":"nanobody_template_mutator"
103
+
104
+ }}
105
+ """
106
+
107
+ BCL_TASK_FORMAT_FOR_EXP="""
108
+ FORMAT FOR introduce_point_mutation:
109
+ {{
110
+ "operation": "introduce_point_mutation",
111
+ "biological_inputs": {{
112
+ "wildtype_sequence": "",
113
+ "mutation": "S310F"
114
+ }},
115
+ "depends": "name of the operation (operation key) it depends on"
116
+ }}
117
+ FORMAT FOR predict_structure:
118
+ {{
119
+ "operation":"predict_structure",
120
+ "biological_inputs":{{
121
+ "sequence":""
122
+ }}
123
+ "depends": "name of the operation (operation key) it depends on"
124
+ }}
125
+ FORMAT FOR analyze_epitopes:
126
+ {{
127
+ "operation":"analyze_epitopes"
128
+ "biological_inputs":{{
129
+ "structure":""
130
+ }}
131
+ "depends": "name of the operation (operation key) it depends on"
132
+ }}
133
+ FORMAT FOR domain_determination:
134
+ {{
135
+
136
+ "operation":"domain_determination",
137
+
138
+ "biological_inputs": {{
139
+ "sequence":"",
140
+
141
+ }},
142
+ "depends":"name of the (operation key) it depends on"
143
+
144
+ }}
145
+ """
146
+
147
+ supported_experiments=[
148
+
149
+ "introduce_point_mutation",
150
+ "predict_structure",
151
+ "analyze_epitopes",
152
+ "cdr_identification",
153
+ "cdr_docking_with_epitopes",
154
+ "domain_determination"
155
+ ]
156
+
157
+
158
+ CONSTRAINT_FORMAT="""
159
+ {{
160
+ "expression_system": string | null,
161
+ "avoid_aggregation": true | false | null,
162
+ "solubility_score_min": float (0.0–1.0) | null,
163
+ "yield_level": "low" | "medium" | "high" | null,
164
+ "codon_optimization": string | null,
165
+ "expression_temperature": string | null
166
+ }}
167
+ """
168
+
169
+ supported_constraints=[
170
+ "expression_system",
171
+ "avoid_aggregation",
172
+ "solubility_score_min",
173
+ "yield_level",
174
+ "codon_optimization",
175
+ "expression_temperature"
176
+ ]
177
+
178
+ EXECUTED_WORKFLOW=None
179
+
180
+
181
+ PROMPT_FOR_CONSTRAINTS_V2=f"""
182
+ ROLE:
183
+ You are a manufacturability constraint extractor for biological AI systems.
184
+ TASK:
185
+ Extract technical constraints from casual biological descriptions. Parse ANY phrasing - formal requests, casual mentions, or implied requirements.
186
+ OUTPUT FORMAT:
187
+ {CONSTRAINT_FORMAT}
188
+ RULES:
189
+ ❌ Do not include explanations, comments, markdown, or extra text.
190
+ ✅ Output only a valid JSON object using proper, correct JSON syntax with single curly braces.
191
+ 🚫 No markdown code blocks (no ```).
192
+ ⚠️ Only include valid keys listed below. Use `null` where no constraint is mentioned or implied.
193
+ PARSING STRATEGY:
194
+ 🔍 SCAN for biological keywords and casual mentions:
195
+ - Expression systems: "E.coli", "yeast", "mammalian", "bacterial", "expressible in X"
196
+ - Yield indicators: "high", "low", "boost", "maximize", "poor yield"
197
+ - Solubility clues: "soluble", "aggregation", "misfolding", "inclusion bodies"
198
+ - Temperature hints: specific temps (16C), "cold", "low temp", "room temperature"
199
+ - Optimization cues: "optimize codons", "codon usage", "expression optimization"
200
+ 🧠 INFERENCE RULES:
201
+ - Any expression system mention → also set codon_optimization to same value
202
+ - Aggregation/misfolding concerns → avoid_aggregation: true
203
+ - Temperature specifications → extract numeric value
204
+ - Yield descriptors → map to "high"/"moderate"/"low"
205
+ - Solubility percentages → convert to decimal (80% → 0.8)
206
+ ✅ SUPPORTED CONSTRAINTS:
207
+ {supported_constraints}
208
+ 🧪 MINIMAL EXAMPLES:
209
+ "expressible in E.coli" → {{"expression_system": "E.coli", "codon_optimization": "E.coli", "avoid_aggregation": null, "solubility_score_min": null, "yield_level": null, "expression_temperature": null}}
210
+ "prevent aggregation" → {{"expression_system": null, "avoid_aggregation": true, "solubility_score_min": null, "yield_level": null, "codon_optimization": null, "expression_temperature": null}}
211
+ "80% soluble" → {{"expression_system": null, "avoid_aggregation": null, "solubility_score_min": 0.8, "yield_level": null, "codon_optimization": null, "expression_temperature": null}}
212
+ Now extract from:
213
+ """
214
+
215
+
216
+
217
+
218
+
219
+
220
+
221
+
222
+ PROMPT_FOR_PLANNER=f"""
223
+ ROLE:
224
+ You are a biological AI workflow planner.
225
+ You help convert high-level experimental goals into step-by-step computational workflows that can be executed in a virtual biology lab.
226
+ INPUT:
227
+ A user's biological intent or problem description, in natural language.
228
+ GOAL:
229
+ Respond with a list of ordered workflow steps, where each step is a JSON object with:
230
+ "operation": a task from the supported operations list (see below)
231
+ "biological_inputs": required fields
232
+ "depends": the operation on which the current operation depends on
233
+ Format your output strictly (required) as:
234
+ {BCL_TASK_FORMAT_FOR_EXP_V2}
235
+ RULES:
236
+ ❌ Do not include explanations, comments, markdown, or extra text.
237
+ ✅ Output only a valid JSON array using proper , correct JSON syntax, use single curly braces.
238
+ 🚫 No markdown code blocks (no ```).
239
+ ⚠️ Only include operations listed in the SUPPORTED OPERATIONS section.
240
+ ⚠️ If the user's input cannot be mapped to any of the supported operations, respond exactly as:
241
+ {{
242
+ "decision": "reject"
243
+ }}
244
+
245
+ ✅ SUPPORTED OPERATIONS:
246
+ {supported_experiments}
247
+ 🧪 EXAMPLE INPUT PROMPT (User)
248
+ "Design a nanobody that targets the HER2 S310F mutant."
249
+ ✅ EXAMPLE OUTPUT (Planner Response)
250
+ [
251
+ {{
252
+
253
+ {{
254
+ "operation":"introduce_point_mutation",
255
+ "biological_inputs": {{
256
+ "wildtype_sequence": "",
257
+ "mutation": "S310F"
258
+ }},
259
+ "depends": "retrieve_uniprot_sequence"
260
+ }}
261
+ ]
262
+ """
263
+
264
+
265
+
266
+
267
+ #Use prior step outputs as inputs where needed.
268
+
269
+
270
+
271
+
272
+ PROMPT_FOR_PLANNER_V2=f"""
273
+ ROLE:
274
+ You are a biological AI workflow planner.
275
+ You help convert high-level experimental goals into step-by-step computational workflows that can be executed in a virtual biology lab.
276
+ INPUT:
277
+ A user's biological intent or problem description, in natural language.
278
+ GOAL:
279
+ Respond with a list of ordered workflow steps, where each step is a JSON object with:
280
+ "operation": a task from the supported operations list (see below)
281
+ "biological_inputs": required fields
282
+ "depends": the operation on which the current operation depends on
283
+ EXECUTED OPERATIONS:
284
+ {EXECUTED_WORKFLOW}
285
+ INSTRUCTION:
286
+ 🔁 Before generating the workflow, check the EXECUTED OPERATIONS.
287
+ ✅ Do not include any step in your response if it is already present in EXECUTED OPERATIONS with all required biological inputs.
288
+ ✅ Generate the minimal necessary workflow to accomplish the user’s intent, continuing from the most recent executed step.
289
+ Format your output strictly (required) as:
290
+ {BCL_TASK_FORMAT_FOR_EXP_V2}
291
+ RULES:
292
+ ❌ Do not include explanations, comments, markdown, or extra text.
293
+ ✅ Output only a valid JSON array using proper, correct JSON syntax, use single curly braces.
294
+ 🚫 No markdown code blocks (no ```).
295
+ ⚠️ Only include operations listed in the SUPPORTED OPERATIONS section.
296
+ ⚠️ If the user's input cannot be mapped to any of the supported operations, respond exactly as:
297
+ {{
298
+ "decision": "reject"
299
+ }}
300
+ ✅ SUPPORTED OPERATIONS:
301
+ {supported_experiments}
302
+ 🧪 EXAMPLE INPUT PROMPT (User)
303
+ "Design a nanobody that targets the HER2 S310F mutant."
304
+ ✅ EXAMPLE OUTPUT (Planner Response)
305
+ [
306
+ {{
307
+ "operation":"introduce_point_mutation",
308
+ "biological_inputs": {{
309
+ "wildtype_sequence": "",
310
+ "mutation": "S310F"
311
+ }},
312
+ "depends": "retrieve_uniprot_sequence"
313
+ }}
314
+ ]
315
+ """
316
+
317
+
318
+
319
+
320
+ class xFORCE_BIOLOGICAL_CONTEXT_LANGUAGE():
321
+ def __init__(self):
322
+ pass
323
+
324
+
325
+ def _BCL_CONSTRAINTS(self,userinput):
326
+
327
+
328
+
329
+ messages=[
330
+
331
+ {"role":"system","content":PROMPT_FOR_CONSTRAINTS_V2},
332
+ {"role":"user","content":userinput}
333
+ ]
334
+
335
+ response = client.chat.completions.create(
336
+ model="llama-3.3-70b-versatile",
337
+ messages=messages,
338
+ stream=False,
339
+ max_completion_tokens=5000
340
+ )
341
+ response_message = response.choices[0].message.content
342
+ return response_message
343
+
344
+
345
+
346
+ def BCL_PLANNER(self,userinput,id):
347
+
348
+
349
+
350
+
351
+
352
+
353
+ global EXECUTED_WORKFLOW
354
+
355
+ ops_status=de.CheckEmptyOps(id)
356
+
357
+
358
+
359
+
360
+ if ops_status==True:
361
+
362
+ de.InsertMemory({
363
+ "bcl_id":id,
364
+ "executed_operations":EXECUTED_WORKFLOW,
365
+ "executed_operations_results":None
366
+ })
367
+
368
+ elif ops_status==False:
369
+
370
+
371
+
372
+ executed_ops=de.FetchMemory(id)
373
+
374
+ operations=executed_ops.get("executed_operations")
375
+
376
+ EXECUTED_WORKFLOW=operations
377
+
378
+
379
+
380
+
381
+
382
+ status=de.CheckEmpty(id)
383
+
384
+ actual_preserved_message={"role":"system","content":PROMPT_FOR_PLANNER}
385
+
386
+
387
+
388
+ g_messages=[
389
+ actual_preserved_message
390
+ ]
391
+
392
+ #HISTORY=None
393
+
394
+ if status == True:
395
+
396
+
397
+ de.Insert_Conversation({
398
+ "bcl_id":id,
399
+ "messages":[
400
+ {"role":"user","content":userinput}
401
+ ]
402
+ })
403
+ g_messages.append({"role":"user","content":userinput})
404
+
405
+
406
+ elif status == False:
407
+
408
+
409
+ de.Update_Conversation(id,[{"role":"user","content":userinput}])
410
+
411
+
412
+
413
+ history=de.FetchConversation(id)
414
+ history=history.get("messages")
415
+
416
+
417
+ for message in history:
418
+ g_messages.append(message)
419
+
420
+
421
+ if len(g_messages) > 8:
422
+
423
+
424
+ #frequent_messages=g_messages[1:4]
425
+
426
+ g_messages=g_messages[-4:]
427
+ g_messages.insert(0,actual_preserved_message)
428
+
429
+
430
+
431
+ '''
432
+ filtered_chat_history=[m for m in frequent_messages if m["role"] in {"user", "assistant"}]
433
+
434
+ response=client.chat.completions.create(
435
+ model="llama-3.3-70b-versatile",
436
+ messages=[
437
+ {"role":"system","content":PROMPT_FOR_SUMMARIZATION()},
438
+ {"role":"user","content":f""" CONVERSATION_HISTORY : {filtered_chat_history} """}
439
+ ],
440
+ stream=False,
441
+ max_completion_tokens=5000,
442
+ )
443
+ actual_response=response.choices[0].message.content
444
+ g_messages.insert(1,{"role":"system","content":f"""
445
+ Conversation History Summary L
446
+ {json.loads(actual_response)}
447
+
448
+ """})
449
+ '''
450
+
451
+
452
+ response = client.chat.completions.create(
453
+ model="llama-3.3-70b-versatile",
454
+ messages=g_messages,
455
+ stream=False,
456
+ max_completion_tokens=5000
457
+ )
458
+ response_message = response.choices[0].message.content
459
+
460
+
461
+
462
+
463
+ '''----------Chat Response is updated here----------------------'''
464
+ rm=[{"role":"assistant","content":response_message}]
465
+
466
+ de.Update_Conversation(id,rm)
467
+ '''-------------------------------------------------------------'''
468
+
469
+
470
+
471
+ if isinstance(json.loads(response_message), dict) and "decision" in json.loads(response_message):
472
+ return response_message
473
+
474
+ else:
475
+
476
+ time.sleep(5)
477
+ constraints=self._BCL_CONSTRAINTS(userinput)
478
+ print(constraints)
479
+
480
+ BCL_SCHEMA={
481
+
482
+ "experiments":json.loads(response_message),
483
+ "constraints_mode":"",
484
+ "constraints":constraints
485
+ }
486
+
487
+
488
+
489
+
490
+ return BCL_SCHEMA
491
+