Spaces:

thexForce
/

originbio-component1

Sleeping

App Files Files Community

Junaidb commited on Dec 29, 2025

Commit

dacf721

verified ·

1 Parent(s): 073ab01

Create biological_context_language.py

Browse files

Files changed (1) hide show

biological_context_language.py +491 -0

biological_context_language.py ADDED Viewed

	@@ -0,0 +1,491 @@

+from groq import Groq
+from  jsonschema import validate , ValidationError
+import json
+import time
+from databaseengine import DatabaseEngine
+de=DatabaseEngine()
+client=Groq(api_key="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")
+uniprot_sequence='''
+ FORMAT FOR retrieve_uniprot_sequence:
+     {{
+        "operation": "retrieve_uniprot_sequence",
+        "biological_inputs": {{
+          "gene_symbol": "HER2"
+        }},
+      }}
+'''
+BCL_TASK_FORMAT_FOR_EXP_V2="""
+FORMAT FOR introduce_point_mutation:
+    {{
+        "operation": "introduce_point_mutation",
+        "biological_inputs": {{
+          "wildtype_sequence": "",
+          "mutation": "S310F"
+        }},
+        "depends": "retrieve_uniprot_sequence"
+    }}
+FORMAT FOR predict_structure:
+    {{
+        "operation":"predict_structure",
+        "biological_inputs":{{
+            "sequence_for_structure":""
+        }}
+        "depends": "domain_determination"
+    }}
+FORMAT FOR analyze_epitopes:
+    {{
+        "operation":"analyze_epitopes"
+        "biological_inputs":{{
+            "structure":""
+        }}
+        "depends": "predict_structure"
+    }}
+FORMAT FOR domain_determination:
+    {{
+        "operation":"domain_determination",
+        "biological_inputs": {{
+            "sequence":"",
+    }},
+        "depends":"introduce_point_mutation"
+    }}
+FORMAT FOR fetch_nanobody_template:
+    {{
+    "operation":"fetch_nanobody_template",
+    "biological_inputs":{{
+        "nanobody":""
+    }},
+    "depends":"None"
+    }}
+FORMAT FOR observe_orient_decide_act_loop:
+    {{
+    "operation": "observe_orient_decide_act_loop",
+       "biological_inputs": {{
+           "sequence": "",
+           "raw_prompt": "<fill this with the actual high level bio query received from the user"
+            }},
+    "depends": "fetch_template_nanobody"
+    }}
+FORMAT FOR nanobody_template_mutator:
+    {{
+    "operation":"nanobody_template_mutator",
+    "biological_inputs":{{
+        "sequence":""
+    }},
+    "depends":"observe_orient_decide_act_loop"
+    }}
+FORMAT FOR engineer_nanobody:
+    {{
+    "operation":"engineer_nanobody",
+    "biological_inputs":{{
+        "template_sequence":""
+    }},
+    "depends":"nanobody_template_mutator"
+    }}
+"""
+BCL_TASK_FORMAT_FOR_EXP="""
+FORMAT FOR introduce_point_mutation:
+    {{
+        "operation": "introduce_point_mutation",
+        "biological_inputs": {{
+          "wildtype_sequence": "",
+          "mutation": "S310F"
+        }},
+        "depends": "name of the operation (operation key)  it depends on"
+    }}
+FORMAT FOR predict_structure:
+    {{
+        "operation":"predict_structure",
+        "biological_inputs":{{
+            "sequence":""
+        }}
+        "depends": "name of the operation (operation key)  it depends on"
+    }}
+FORMAT FOR analyze_epitopes:
+    {{
+        "operation":"analyze_epitopes"
+        "biological_inputs":{{
+            "structure":""
+        }}
+        "depends": "name of the operation (operation key)  it depends on"
+    }}
+FORMAT FOR domain_determination:
+    {{
+        "operation":"domain_determination",
+        "biological_inputs": {{
+            "sequence":"",
+    }},
+        "depends":"name of the (operation key) it depends on"
+    }}
+"""
+supported_experiments=[
+      "introduce_point_mutation",
+       "predict_structure",
+        "analyze_epitopes",
+        "cdr_identification",
+        "cdr_docking_with_epitopes",
+        "domain_determination"
+]
+CONSTRAINT_FORMAT="""
+{{
+  "expression_system": string | null,
+  "avoid_aggregation": true | false | null,
+  "solubility_score_min": float (0.0–1.0) | null,
+  "yield_level": "low" | "medium" | "high" | null,
+  "codon_optimization": string | null,
+  "expression_temperature": string | null
+}}
+"""
+supported_constraints=[
+    "expression_system",
+    "avoid_aggregation",
+    "solubility_score_min",
+    "yield_level",
+    "codon_optimization",
+    "expression_temperature"
+]
+EXECUTED_WORKFLOW=None
+PROMPT_FOR_CONSTRAINTS_V2=f"""
+ROLE:
+You are a manufacturability constraint extractor for biological AI systems.
+TASK:
+Extract technical constraints from casual biological descriptions. Parse ANY phrasing - formal requests, casual mentions, or implied requirements.
+OUTPUT FORMAT:
+{CONSTRAINT_FORMAT}
+RULES:
+❌ Do not include explanations, comments, markdown, or extra text.
+✅ Output only a valid JSON object using proper, correct JSON syntax with single curly braces.
+🚫 No markdown code blocks (no ```).
+⚠️ Only include valid keys listed below. Use `null` where no constraint is mentioned or implied.
+PARSING STRATEGY:
+🔍 SCAN for biological keywords and casual mentions:
+- Expression systems: "E.coli", "yeast", "mammalian", "bacterial", "expressible in X"
+- Yield indicators: "high", "low", "boost", "maximize", "poor yield"
+- Solubility clues: "soluble", "aggregation", "misfolding", "inclusion bodies"
+- Temperature hints: specific temps (16C), "cold", "low temp", "room temperature"
+- Optimization cues: "optimize codons", "codon usage", "expression optimization"
+🧠 INFERENCE RULES:
+- Any expression system mention → also set codon_optimization to same value
+- Aggregation/misfolding concerns → avoid_aggregation: true
+- Temperature specifications → extract numeric value
+- Yield descriptors → map to "high"/"moderate"/"low"
+- Solubility percentages → convert to decimal (80% → 0.8)
+✅ SUPPORTED CONSTRAINTS:
+{supported_constraints}
+🧪 MINIMAL EXAMPLES:
+"expressible in E.coli" → {{"expression_system": "E.coli", "codon_optimization": "E.coli", "avoid_aggregation": null, "solubility_score_min": null, "yield_level": null, "expression_temperature": null}}
+"prevent aggregation" → {{"expression_system": null, "avoid_aggregation": true, "solubility_score_min": null, "yield_level": null, "codon_optimization": null, "expression_temperature": null}}
+"80% soluble" → {{"expression_system": null, "avoid_aggregation": null, "solubility_score_min": 0.8, "yield_level": null, "codon_optimization": null, "expression_temperature": null}}
+Now extract from:
+"""
+PROMPT_FOR_PLANNER=f"""
+ROLE:
+You are a biological AI workflow planner.
+You help convert high-level experimental goals into step-by-step computational workflows that can be executed in a virtual biology lab.
+INPUT:
+A user's biological intent or problem description, in natural language.
+GOAL:
+Respond with a list of ordered workflow steps, where each step is a JSON object with:
+"operation": a task from the supported operations list (see below)
+"biological_inputs": required fields
+"depends": the operation on which the current operation depends on
+Format your output strictly (required) as:
+{BCL_TASK_FORMAT_FOR_EXP_V2}
+RULES:
+❌ Do not include explanations, comments, markdown, or extra text.
+✅ Output only a valid JSON array using proper , correct JSON syntax, use single curly braces.
+🚫 No markdown code blocks (no ```).
+⚠️ Only include operations  listed in the SUPPORTED OPERATIONS section.
+⚠️ If the user's input cannot be mapped to any of the supported operations, respond exactly as:
+    {{
+      "decision": "reject"
+    }}
+✅ SUPPORTED OPERATIONS:
+{supported_experiments}
+🧪 EXAMPLE INPUT PROMPT (User)
+"Design a nanobody that targets the HER2 S310F mutant."
+✅ EXAMPLE OUTPUT (Planner Response)
+[
+  {{
+    {{
+    "operation":"introduce_point_mutation",
+    "biological_inputs": {{
+      "wildtype_sequence": "",
+      "mutation": "S310F"
+    }},
+    "depends": "retrieve_uniprot_sequence"
+    }}
+  ]
+"""
+#Use prior step outputs as inputs where needed.
+PROMPT_FOR_PLANNER_V2=f"""
+ROLE:
+You are a biological AI workflow planner.
+You help convert high-level experimental goals into step-by-step computational workflows that can be executed in a virtual biology lab.
+INPUT:
+A user's biological intent or problem description, in natural language.
+GOAL:
+Respond with a list of ordered workflow steps, where each step is a JSON object with:
+"operation": a task from the supported operations list (see below)
+"biological_inputs": required fields
+"depends": the operation on which the current operation depends on
+EXECUTED OPERATIONS:
+{EXECUTED_WORKFLOW}
+INSTRUCTION:
+🔁 Before generating the workflow, check the EXECUTED OPERATIONS.
+✅ Do not include any step in your response if it is already present in EXECUTED OPERATIONS with all required biological inputs.
+✅ Generate the minimal necessary workflow to accomplish the user’s intent, continuing from the most recent executed step.
+Format your output strictly (required) as:
+{BCL_TASK_FORMAT_FOR_EXP_V2}
+RULES:
+❌ Do not include explanations, comments, markdown, or extra text.
+✅ Output only a valid JSON array using proper, correct JSON syntax, use single curly braces.
+🚫 No markdown code blocks (no ```).
+⚠️ Only include operations listed in the SUPPORTED OPERATIONS section.
+⚠️ If the user's input cannot be mapped to any of the supported operations, respond exactly as:
+    {{
+      "decision": "reject"
+    }}
+✅ SUPPORTED OPERATIONS:
+{supported_experiments}
+🧪 EXAMPLE INPUT PROMPT (User)
+"Design a nanobody that targets the HER2 S310F mutant."
+✅ EXAMPLE OUTPUT (Planner Response)
+[
+  {{
+    "operation":"introduce_point_mutation",
+    "biological_inputs": {{
+      "wildtype_sequence": "",
+      "mutation": "S310F"
+    }},
+    "depends": "retrieve_uniprot_sequence"
+  }}
+]
+"""
+class xFORCE_BIOLOGICAL_CONTEXT_LANGUAGE():
+    def __init__(self):
+        pass
+    def _BCL_CONSTRAINTS(self,userinput):
+        messages=[
+            {"role":"system","content":PROMPT_FOR_CONSTRAINTS_V2},
+            {"role":"user","content":userinput}
+        ]
+        response = client.chat.completions.create(
+            model="llama-3.3-70b-versatile",
+            messages=messages,
+            stream=False,
+            max_completion_tokens=5000
+        )
+        response_message = response.choices[0].message.content
+        return response_message
+    def BCL_PLANNER(self,userinput,id):
+        global EXECUTED_WORKFLOW
+        ops_status=de.CheckEmptyOps(id)
+        if ops_status==True:
+          de.InsertMemory({
+               "bcl_id":id,
+                "executed_operations":EXECUTED_WORKFLOW,
+                "executed_operations_results":None
+           })
+        elif ops_status==False:
+            executed_ops=de.FetchMemory(id)
+            operations=executed_ops.get("executed_operations")
+            EXECUTED_WORKFLOW=operations
+        status=de.CheckEmpty(id)
+        actual_preserved_message={"role":"system","content":PROMPT_FOR_PLANNER}
+        g_messages=[
+            actual_preserved_message
+        ]
+        #HISTORY=None
+        if status == True:
+            de.Insert_Conversation({
+                "bcl_id":id,
+                "messages":[
+                     {"role":"user","content":userinput}
+                ]
+            })
+            g_messages.append({"role":"user","content":userinput})
+        elif status == False:
+            de.Update_Conversation(id,[{"role":"user","content":userinput}])
+            history=de.FetchConversation(id)
+            history=history.get("messages")
+            for message in history:
+                g_messages.append(message)
+            if len(g_messages) > 8:
+                #frequent_messages=g_messages[1:4]
+                g_messages=g_messages[-4:]
+                g_messages.insert(0,actual_preserved_message)
+                '''
+                filtered_chat_history=[m for m in frequent_messages if m["role"] in {"user", "assistant"}]
+                response=client.chat.completions.create(
+                model="llama-3.3-70b-versatile",
+                    messages=[
+                        {"role":"system","content":PROMPT_FOR_SUMMARIZATION()},
+                        {"role":"user","content":f""" CONVERSATION_HISTORY : {filtered_chat_history} """}
+                    ],
+                    stream=False,
+                    max_completion_tokens=5000,
+                )
+                actual_response=response.choices[0].message.content
+                g_messages.insert(1,{"role":"system","content":f"""
+                    Conversation History  Summary L
+                    {json.loads(actual_response)}
+                """})
+                '''
+        response = client.chat.completions.create(
+            model="llama-3.3-70b-versatile",
+            messages=g_messages,
+            stream=False,
+            max_completion_tokens=5000
+        )
+        response_message = response.choices[0].message.content
+        '''----------Chat Response is updated here----------------------'''
+        rm=[{"role":"assistant","content":response_message}]
+        de.Update_Conversation(id,rm)
+        '''-------------------------------------------------------------'''
+        if isinstance(json.loads(response_message), dict) and "decision" in json.loads(response_message):
+            return response_message
+        else:
+            time.sleep(5)
+            constraints=self._BCL_CONSTRAINTS(userinput)
+            print(constraints)
+            BCL_SCHEMA={
+                "experiments":json.loads(response_message),
+                "constraints_mode":"",
+                "constraints":constraints
+            }
+            return BCL_SCHEMA