Spaces:
Sleeping
Sleeping
| from groq import Groq | |
| from jsonschema import validate , ValidationError | |
| import json | |
| import time | |
| from databaseengine import DatabaseEngine | |
| de=DatabaseEngine() | |
| client=Groq(api_key="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") | |
| uniprot_sequence=''' | |
| FORMAT FOR retrieve_uniprot_sequence: | |
| {{ | |
| "operation": "retrieve_uniprot_sequence", | |
| "biological_inputs": {{ | |
| "gene_symbol": "HER2" | |
| }}, | |
| }} | |
| ''' | |
| BCL_TASK_FORMAT_FOR_EXP_V2=""" | |
| FORMAT FOR introduce_point_mutation: | |
| {{ | |
| "operation": "introduce_point_mutation", | |
| "biological_inputs": {{ | |
| "wildtype_sequence": "", | |
| "mutation": "S310F" | |
| }}, | |
| "depends": "retrieve_uniprot_sequence" | |
| }} | |
| FORMAT FOR predict_structure: | |
| {{ | |
| "operation":"predict_structure", | |
| "biological_inputs":{{ | |
| "sequence_for_structure":"" | |
| }} | |
| "depends": "domain_determination" | |
| }} | |
| FORMAT FOR analyze_epitopes: | |
| {{ | |
| "operation":"analyze_epitopes" | |
| "biological_inputs":{{ | |
| "structure":"" | |
| }} | |
| "depends": "predict_structure" | |
| }} | |
| FORMAT FOR domain_determination: | |
| {{ | |
| "operation":"domain_determination", | |
| "biological_inputs": {{ | |
| "sequence":"", | |
| }}, | |
| "depends":"introduce_point_mutation" | |
| }} | |
| FORMAT FOR fetch_nanobody_template: | |
| {{ | |
| "operation":"fetch_nanobody_template", | |
| "biological_inputs":{{ | |
| "nanobody":"" | |
| }}, | |
| "depends":"None" | |
| }} | |
| FORMAT FOR observe_orient_decide_act_loop: | |
| {{ | |
| "operation": "observe_orient_decide_act_loop", | |
| "biological_inputs": {{ | |
| "sequence": "", | |
| "raw_prompt": "<fill this with the actual high level bio query received from the user" | |
| }}, | |
| "depends": "fetch_template_nanobody" | |
| }} | |
| FORMAT FOR nanobody_template_mutator: | |
| {{ | |
| "operation":"nanobody_template_mutator", | |
| "biological_inputs":{{ | |
| "sequence":"" | |
| }}, | |
| "depends":"observe_orient_decide_act_loop" | |
| }} | |
| FORMAT FOR engineer_nanobody: | |
| {{ | |
| "operation":"engineer_nanobody", | |
| "biological_inputs":{{ | |
| "template_sequence":"" | |
| }}, | |
| "depends":"nanobody_template_mutator" | |
| }} | |
| """ | |
| BCL_TASK_FORMAT_FOR_EXP=""" | |
| FORMAT FOR introduce_point_mutation: | |
| {{ | |
| "operation": "introduce_point_mutation", | |
| "biological_inputs": {{ | |
| "wildtype_sequence": "", | |
| "mutation": "S310F" | |
| }}, | |
| "depends": "name of the operation (operation key) it depends on" | |
| }} | |
| FORMAT FOR predict_structure: | |
| {{ | |
| "operation":"predict_structure", | |
| "biological_inputs":{{ | |
| "sequence":"" | |
| }} | |
| "depends": "name of the operation (operation key) it depends on" | |
| }} | |
| FORMAT FOR analyze_epitopes: | |
| {{ | |
| "operation":"analyze_epitopes" | |
| "biological_inputs":{{ | |
| "structure":"" | |
| }} | |
| "depends": "name of the operation (operation key) it depends on" | |
| }} | |
| FORMAT FOR domain_determination: | |
| {{ | |
| "operation":"domain_determination", | |
| "biological_inputs": {{ | |
| "sequence":"", | |
| }}, | |
| "depends":"name of the (operation key) it depends on" | |
| }} | |
| """ | |
| supported_experiments=[ | |
| "introduce_point_mutation", | |
| "predict_structure", | |
| "analyze_epitopes", | |
| "cdr_identification", | |
| "cdr_docking_with_epitopes", | |
| "domain_determination" | |
| ] | |
| CONSTRAINT_FORMAT=""" | |
| {{ | |
| "expression_system": string | null, | |
| "avoid_aggregation": true | false | null, | |
| "solubility_score_min": float (0.0–1.0) | null, | |
| "yield_level": "low" | "medium" | "high" | null, | |
| "codon_optimization": string | null, | |
| "expression_temperature": string | null | |
| }} | |
| """ | |
| supported_constraints=[ | |
| "expression_system", | |
| "avoid_aggregation", | |
| "solubility_score_min", | |
| "yield_level", | |
| "codon_optimization", | |
| "expression_temperature" | |
| ] | |
| EXECUTED_WORKFLOW=None | |
| PROMPT_FOR_CONSTRAINTS_V2=f""" | |
| ROLE: | |
| You are a manufacturability constraint extractor for biological AI systems. | |
| TASK: | |
| Extract technical constraints from casual biological descriptions. Parse ANY phrasing - formal requests, casual mentions, or implied requirements. | |
| OUTPUT FORMAT: | |
| {CONSTRAINT_FORMAT} | |
| RULES: | |
| ❌ Do not include explanations, comments, markdown, or extra text. | |
| ✅ Output only a valid JSON object using proper, correct JSON syntax with single curly braces. | |
| 🚫 No markdown code blocks (no ```). | |
| ⚠️ Only include valid keys listed below. Use `null` where no constraint is mentioned or implied. | |
| PARSING STRATEGY: | |
| 🔍 SCAN for biological keywords and casual mentions: | |
| - Expression systems: "E.coli", "yeast", "mammalian", "bacterial", "expressible in X" | |
| - Yield indicators: "high", "low", "boost", "maximize", "poor yield" | |
| - Solubility clues: "soluble", "aggregation", "misfolding", "inclusion bodies" | |
| - Temperature hints: specific temps (16C), "cold", "low temp", "room temperature" | |
| - Optimization cues: "optimize codons", "codon usage", "expression optimization" | |
| 🧠 INFERENCE RULES: | |
| - Any expression system mention → also set codon_optimization to same value | |
| - Aggregation/misfolding concerns → avoid_aggregation: true | |
| - Temperature specifications → extract numeric value | |
| - Yield descriptors → map to "high"/"moderate"/"low" | |
| - Solubility percentages → convert to decimal (80% → 0.8) | |
| ✅ SUPPORTED CONSTRAINTS: | |
| {supported_constraints} | |
| 🧪 MINIMAL EXAMPLES: | |
| "expressible in E.coli" → {{"expression_system": "E.coli", "codon_optimization": "E.coli", "avoid_aggregation": null, "solubility_score_min": null, "yield_level": null, "expression_temperature": null}} | |
| "prevent aggregation" → {{"expression_system": null, "avoid_aggregation": true, "solubility_score_min": null, "yield_level": null, "codon_optimization": null, "expression_temperature": null}} | |
| "80% soluble" → {{"expression_system": null, "avoid_aggregation": null, "solubility_score_min": 0.8, "yield_level": null, "codon_optimization": null, "expression_temperature": null}} | |
| Now extract from: | |
| """ | |
| PROMPT_FOR_PLANNER=f""" | |
| ROLE: | |
| You are a biological AI workflow planner. | |
| You help convert high-level experimental goals into step-by-step computational workflows that can be executed in a virtual biology lab. | |
| INPUT: | |
| A user's biological intent or problem description, in natural language. | |
| GOAL: | |
| Respond with a list of ordered workflow steps, where each step is a JSON object with: | |
| "operation": a task from the supported operations list (see below) | |
| "biological_inputs": required fields | |
| "depends": the operation on which the current operation depends on | |
| Format your output strictly (required) as: | |
| {BCL_TASK_FORMAT_FOR_EXP_V2} | |
| RULES: | |
| ❌ Do not include explanations, comments, markdown, or extra text. | |
| ✅ Output only a valid JSON array using proper , correct JSON syntax, use single curly braces. | |
| 🚫 No markdown code blocks (no ```). | |
| ⚠️ Only include operations listed in the SUPPORTED OPERATIONS section. | |
| ⚠️ If the user's input cannot be mapped to any of the supported operations, respond exactly as: | |
| {{ | |
| "decision": "reject" | |
| }} | |
| ✅ SUPPORTED OPERATIONS: | |
| {supported_experiments} | |
| 🧪 EXAMPLE INPUT PROMPT (User) | |
| "Design a nanobody that targets the HER2 S310F mutant." | |
| ✅ EXAMPLE OUTPUT (Planner Response) | |
| [ | |
| {{ | |
| {{ | |
| "operation":"introduce_point_mutation", | |
| "biological_inputs": {{ | |
| "wildtype_sequence": "", | |
| "mutation": "S310F" | |
| }}, | |
| "depends": "retrieve_uniprot_sequence" | |
| }} | |
| ] | |
| """ | |
| #Use prior step outputs as inputs where needed. | |
| PROMPT_FOR_PLANNER_V2=f""" | |
| ROLE: | |
| You are a biological AI workflow planner. | |
| You help convert high-level experimental goals into step-by-step computational workflows that can be executed in a virtual biology lab. | |
| INPUT: | |
| A user's biological intent or problem description, in natural language. | |
| GOAL: | |
| Respond with a list of ordered workflow steps, where each step is a JSON object with: | |
| "operation": a task from the supported operations list (see below) | |
| "biological_inputs": required fields | |
| "depends": the operation on which the current operation depends on | |
| EXECUTED OPERATIONS: | |
| {EXECUTED_WORKFLOW} | |
| INSTRUCTION: | |
| 🔁 Before generating the workflow, check the EXECUTED OPERATIONS. | |
| ✅ Do not include any step in your response if it is already present in EXECUTED OPERATIONS with all required biological inputs. | |
| ✅ Generate the minimal necessary workflow to accomplish the user’s intent, continuing from the most recent executed step. | |
| Format your output strictly (required) as: | |
| {BCL_TASK_FORMAT_FOR_EXP_V2} | |
| RULES: | |
| ❌ Do not include explanations, comments, markdown, or extra text. | |
| ✅ Output only a valid JSON array using proper, correct JSON syntax, use single curly braces. | |
| 🚫 No markdown code blocks (no ```). | |
| ⚠️ Only include operations listed in the SUPPORTED OPERATIONS section. | |
| ⚠️ If the user's input cannot be mapped to any of the supported operations, respond exactly as: | |
| {{ | |
| "decision": "reject" | |
| }} | |
| ✅ SUPPORTED OPERATIONS: | |
| {supported_experiments} | |
| 🧪 EXAMPLE INPUT PROMPT (User) | |
| "Design a nanobody that targets the HER2 S310F mutant." | |
| ✅ EXAMPLE OUTPUT (Planner Response) | |
| [ | |
| {{ | |
| "operation":"introduce_point_mutation", | |
| "biological_inputs": {{ | |
| "wildtype_sequence": "", | |
| "mutation": "S310F" | |
| }}, | |
| "depends": "retrieve_uniprot_sequence" | |
| }} | |
| ] | |
| """ | |
| class xFORCE_BIOLOGICAL_CONTEXT_LANGUAGE(): | |
| def __init__(self): | |
| pass | |
| def _BCL_CONSTRAINTS(self,userinput): | |
| messages=[ | |
| {"role":"system","content":PROMPT_FOR_CONSTRAINTS_V2}, | |
| {"role":"user","content":userinput} | |
| ] | |
| response = client.chat.completions.create( | |
| model="llama-3.3-70b-versatile", | |
| messages=messages, | |
| stream=False, | |
| max_completion_tokens=5000 | |
| ) | |
| response_message = response.choices[0].message.content | |
| return response_message | |
| def BCL_PLANNER(self,userinput,id): | |
| global EXECUTED_WORKFLOW | |
| ops_status=de.CheckEmptyOps(id) | |
| if ops_status==True: | |
| de.InsertMemory({ | |
| "bcl_id":id, | |
| "executed_operations":EXECUTED_WORKFLOW, | |
| "executed_operations_results":None | |
| }) | |
| elif ops_status==False: | |
| executed_ops=de.FetchMemory(id) | |
| operations=executed_ops.get("executed_operations") | |
| EXECUTED_WORKFLOW=operations | |
| status=de.CheckEmpty(id) | |
| actual_preserved_message={"role":"system","content":PROMPT_FOR_PLANNER} | |
| g_messages=[ | |
| actual_preserved_message | |
| ] | |
| #HISTORY=None | |
| if status == True: | |
| de.Insert_Conversation({ | |
| "bcl_id":id, | |
| "messages":[ | |
| {"role":"user","content":userinput} | |
| ] | |
| }) | |
| g_messages.append({"role":"user","content":userinput}) | |
| elif status == False: | |
| de.Update_Conversation(id,[{"role":"user","content":userinput}]) | |
| history=de.FetchConversation(id) | |
| history=history.get("messages") | |
| for message in history: | |
| g_messages.append(message) | |
| if len(g_messages) > 8: | |
| #frequent_messages=g_messages[1:4] | |
| g_messages=g_messages[-4:] | |
| g_messages.insert(0,actual_preserved_message) | |
| ''' | |
| filtered_chat_history=[m for m in frequent_messages if m["role"] in {"user", "assistant"}] | |
| response=client.chat.completions.create( | |
| model="llama-3.3-70b-versatile", | |
| messages=[ | |
| {"role":"system","content":PROMPT_FOR_SUMMARIZATION()}, | |
| {"role":"user","content":f""" CONVERSATION_HISTORY : {filtered_chat_history} """} | |
| ], | |
| stream=False, | |
| max_completion_tokens=5000, | |
| ) | |
| actual_response=response.choices[0].message.content | |
| g_messages.insert(1,{"role":"system","content":f""" | |
| Conversation History Summary L | |
| {json.loads(actual_response)} | |
| """}) | |
| ''' | |
| response = client.chat.completions.create( | |
| model="llama-3.3-70b-versatile", | |
| messages=g_messages, | |
| stream=False, | |
| max_completion_tokens=5000 | |
| ) | |
| response_message = response.choices[0].message.content | |
| '''----------Chat Response is updated here----------------------''' | |
| rm=[{"role":"assistant","content":response_message}] | |
| de.Update_Conversation(id,rm) | |
| '''-------------------------------------------------------------''' | |
| if isinstance(json.loads(response_message), dict) and "decision" in json.loads(response_message): | |
| return response_message | |
| else: | |
| time.sleep(5) | |
| constraints=self._BCL_CONSTRAINTS(userinput) | |
| print(constraints) | |
| BCL_SCHEMA={ | |
| "experiments":json.loads(response_message), | |
| "constraints_mode":"", | |
| "constraints":constraints | |
| } | |
| return BCL_SCHEMA | |