Spaces:

thexForce
/

originbio-component1

Sleeping

App Files Files Community

originbio-component1 / biological_context_language.py

Junaidb

Create biological_context_language.py

dacf721 verified about 2 months ago

raw

history blame contribute delete

13.9 kB

	from groq import Groq
	from jsonschema import validate , ValidationError
	import json
	import time
	from databaseengine import DatabaseEngine


	de=DatabaseEngine()



	client=Groq(api_key="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")



	uniprot_sequence='''
	FORMAT FOR retrieve_uniprot_sequence:
	{{
	"operation": "retrieve_uniprot_sequence",
	"biological_inputs": {{
	"gene_symbol": "HER2"
	}},
	}}
	'''

	BCL_TASK_FORMAT_FOR_EXP_V2="""
	FORMAT FOR introduce_point_mutation:
	{{
	"operation": "introduce_point_mutation",
	"biological_inputs": {{
	"wildtype_sequence": "",
	"mutation": "S310F"
	}},
	"depends": "retrieve_uniprot_sequence"
	}}
	FORMAT FOR predict_structure:
	{{
	"operation":"predict_structure",
	"biological_inputs":{{
	"sequence_for_structure":""
	}}
	"depends": "domain_determination"
	}}
	FORMAT FOR analyze_epitopes:
	{{
	"operation":"analyze_epitopes"
	"biological_inputs":{{
	"structure":""
	}}
	"depends": "predict_structure"
	}}
	FORMAT FOR domain_determination:
	{{

	"operation":"domain_determination",

	"biological_inputs": {{
	"sequence":"",

	}},
	"depends":"introduce_point_mutation"

	}}
	FORMAT FOR fetch_nanobody_template:
	{{
	"operation":"fetch_nanobody_template",
	"biological_inputs":{{

	"nanobody":""
	}},
	"depends":"None"

	}}
	FORMAT FOR observe_orient_decide_act_loop:
	{{

	"operation": "observe_orient_decide_act_loop",
	"biological_inputs": {{
	"sequence": "",
	"raw_prompt": "<fill this with the actual high level bio query received from the user"
	}},
	"depends": "fetch_template_nanobody"
	}}
	FORMAT FOR nanobody_template_mutator:
	{{

	"operation":"nanobody_template_mutator",
	"biological_inputs":{{

	"sequence":""
	}},
	"depends":"observe_orient_decide_act_loop"

	}}
	FORMAT FOR engineer_nanobody:
	{{
	"operation":"engineer_nanobody",

	"biological_inputs":{{
	"template_sequence":""
	}},
	"depends":"nanobody_template_mutator"

	}}
	"""

	BCL_TASK_FORMAT_FOR_EXP="""
	FORMAT FOR introduce_point_mutation:
	{{
	"operation": "introduce_point_mutation",
	"biological_inputs": {{
	"wildtype_sequence": "",
	"mutation": "S310F"
	}},
	"depends": "name of the operation (operation key) it depends on"
	}}
	FORMAT FOR predict_structure:
	{{
	"operation":"predict_structure",
	"biological_inputs":{{
	"sequence":""
	}}
	"depends": "name of the operation (operation key) it depends on"
	}}
	FORMAT FOR analyze_epitopes:
	{{
	"operation":"analyze_epitopes"
	"biological_inputs":{{
	"structure":""
	}}
	"depends": "name of the operation (operation key) it depends on"
	}}
	FORMAT FOR domain_determination:
	{{

	"operation":"domain_determination",

	"biological_inputs": {{
	"sequence":"",

	}},
	"depends":"name of the (operation key) it depends on"

	}}
	"""

	supported_experiments=[

	"introduce_point_mutation",
	"predict_structure",
	"analyze_epitopes",
	"cdr_identification",
	"cdr_docking_with_epitopes",
	"domain_determination"
	]


	CONSTRAINT_FORMAT="""
	{{
	"expression_system": string \| null,
	"avoid_aggregation": true \| false \| null,
	"solubility_score_min": float (0.0–1.0) \| null,
	"yield_level": "low" \| "medium" \| "high" \| null,
	"codon_optimization": string \| null,
	"expression_temperature": string \| null
	}}
	"""

	supported_constraints=[
	"expression_system",
	"avoid_aggregation",
	"solubility_score_min",
	"yield_level",
	"codon_optimization",
	"expression_temperature"
	]

	EXECUTED_WORKFLOW=None


	PROMPT_FOR_CONSTRAINTS_V2=f"""
	ROLE:
	You are a manufacturability constraint extractor for biological AI systems.
	TASK:
	Extract technical constraints from casual biological descriptions. Parse ANY phrasing - formal requests, casual mentions, or implied requirements.
	OUTPUT FORMAT:
	{CONSTRAINT_FORMAT}
	RULES:
	❌ Do not include explanations, comments, markdown, or extra text.
	✅ Output only a valid JSON object using proper, correct JSON syntax with single curly braces.
	🚫 No markdown code blocks (no ```).
	⚠️ Only include valid keys listed below. Use `null` where no constraint is mentioned or implied.
	PARSING STRATEGY:
	🔍 SCAN for biological keywords and casual mentions:
	- Expression systems: "E.coli", "yeast", "mammalian", "bacterial", "expressible in X"
	- Yield indicators: "high", "low", "boost", "maximize", "poor yield"
	- Solubility clues: "soluble", "aggregation", "misfolding", "inclusion bodies"
	- Temperature hints: specific temps (16C), "cold", "low temp", "room temperature"
	- Optimization cues: "optimize codons", "codon usage", "expression optimization"
	🧠 INFERENCE RULES:
	- Any expression system mention → also set codon_optimization to same value
	- Aggregation/misfolding concerns → avoid_aggregation: true
	- Temperature specifications → extract numeric value
	- Yield descriptors → map to "high"/"moderate"/"low"
	- Solubility percentages → convert to decimal (80% → 0.8)
	✅ SUPPORTED CONSTRAINTS:
	{supported_constraints}
	🧪 MINIMAL EXAMPLES:
	"expressible in E.coli" → {{"expression_system": "E.coli", "codon_optimization": "E.coli", "avoid_aggregation": null, "solubility_score_min": null, "yield_level": null, "expression_temperature": null}}
	"prevent aggregation" → {{"expression_system": null, "avoid_aggregation": true, "solubility_score_min": null, "yield_level": null, "codon_optimization": null, "expression_temperature": null}}
	"80% soluble" → {{"expression_system": null, "avoid_aggregation": null, "solubility_score_min": 0.8, "yield_level": null, "codon_optimization": null, "expression_temperature": null}}
	Now extract from:
	"""








	PROMPT_FOR_PLANNER=f"""
	ROLE:
	You are a biological AI workflow planner.
	You help convert high-level experimental goals into step-by-step computational workflows that can be executed in a virtual biology lab.
	INPUT:
	A user's biological intent or problem description, in natural language.
	GOAL:
	Respond with a list of ordered workflow steps, where each step is a JSON object with:
	"operation": a task from the supported operations list (see below)
	"biological_inputs": required fields
	"depends": the operation on which the current operation depends on
	Format your output strictly (required) as:
	{BCL_TASK_FORMAT_FOR_EXP_V2}
	RULES:
	❌ Do not include explanations, comments, markdown, or extra text.
	✅ Output only a valid JSON array using proper , correct JSON syntax, use single curly braces.
	🚫 No markdown code blocks (no ```).
	⚠️ Only include operations listed in the SUPPORTED OPERATIONS section.
	⚠️ If the user's input cannot be mapped to any of the supported operations, respond exactly as:
	{{
	"decision": "reject"
	}}

	✅ SUPPORTED OPERATIONS:
	{supported_experiments}
	🧪 EXAMPLE INPUT PROMPT (User)
	"Design a nanobody that targets the HER2 S310F mutant."
	✅ EXAMPLE OUTPUT (Planner Response)
	[
	{{

	{{
	"operation":"introduce_point_mutation",
	"biological_inputs": {{
	"wildtype_sequence": "",
	"mutation": "S310F"
	}},
	"depends": "retrieve_uniprot_sequence"
	}}
	]
	"""




	#Use prior step outputs as inputs where needed.




	PROMPT_FOR_PLANNER_V2=f"""
	ROLE:
	You are a biological AI workflow planner.
	You help convert high-level experimental goals into step-by-step computational workflows that can be executed in a virtual biology lab.
	INPUT:
	A user's biological intent or problem description, in natural language.
	GOAL:
	Respond with a list of ordered workflow steps, where each step is a JSON object with:
	"operation": a task from the supported operations list (see below)
	"biological_inputs": required fields
	"depends": the operation on which the current operation depends on
	EXECUTED OPERATIONS:
	{EXECUTED_WORKFLOW}
	INSTRUCTION:
	🔁 Before generating the workflow, check the EXECUTED OPERATIONS.
	✅ Do not include any step in your response if it is already present in EXECUTED OPERATIONS with all required biological inputs.
	✅ Generate the minimal necessary workflow to accomplish the user’s intent, continuing from the most recent executed step.
	Format your output strictly (required) as:
	{BCL_TASK_FORMAT_FOR_EXP_V2}
	RULES:
	❌ Do not include explanations, comments, markdown, or extra text.
	✅ Output only a valid JSON array using proper, correct JSON syntax, use single curly braces.
	🚫 No markdown code blocks (no ```).
	⚠️ Only include operations listed in the SUPPORTED OPERATIONS section.
	⚠️ If the user's input cannot be mapped to any of the supported operations, respond exactly as:
	{{
	"decision": "reject"
	}}
	✅ SUPPORTED OPERATIONS:
	{supported_experiments}
	🧪 EXAMPLE INPUT PROMPT (User)
	"Design a nanobody that targets the HER2 S310F mutant."
	✅ EXAMPLE OUTPUT (Planner Response)
	[
	{{
	"operation":"introduce_point_mutation",
	"biological_inputs": {{
	"wildtype_sequence": "",
	"mutation": "S310F"
	}},
	"depends": "retrieve_uniprot_sequence"
	}}
	]
	"""




	class xFORCE_BIOLOGICAL_CONTEXT_LANGUAGE():
	def __init__(self):
	pass


	def _BCL_CONSTRAINTS(self,userinput):



	messages=[

	{"role":"system","content":PROMPT_FOR_CONSTRAINTS_V2},
	{"role":"user","content":userinput}
	]

	response = client.chat.completions.create(
	model="llama-3.3-70b-versatile",
	messages=messages,
	stream=False,
	max_completion_tokens=5000
	)
	response_message = response.choices[0].message.content
	return response_message



	def BCL_PLANNER(self,userinput,id):






	global EXECUTED_WORKFLOW

	ops_status=de.CheckEmptyOps(id)




	if ops_status==True:

	de.InsertMemory({
	"bcl_id":id,
	"executed_operations":EXECUTED_WORKFLOW,
	"executed_operations_results":None
	})

	elif ops_status==False:



	executed_ops=de.FetchMemory(id)

	operations=executed_ops.get("executed_operations")

	EXECUTED_WORKFLOW=operations





	status=de.CheckEmpty(id)

	actual_preserved_message={"role":"system","content":PROMPT_FOR_PLANNER}



	g_messages=[
	actual_preserved_message
	]

	#HISTORY=None

	if status == True:


	de.Insert_Conversation({
	"bcl_id":id,
	"messages":[
	{"role":"user","content":userinput}
	]
	})
	g_messages.append({"role":"user","content":userinput})


	elif status == False:


	de.Update_Conversation(id,[{"role":"user","content":userinput}])



	history=de.FetchConversation(id)
	history=history.get("messages")


	for message in history:
	g_messages.append(message)


	if len(g_messages) > 8:


	#frequent_messages=g_messages[1:4]

	g_messages=g_messages[-4:]
	g_messages.insert(0,actual_preserved_message)



	'''
	filtered_chat_history=[m for m in frequent_messages if m["role"] in {"user", "assistant"}]

	response=client.chat.completions.create(
	model="llama-3.3-70b-versatile",
	messages=[
	{"role":"system","content":PROMPT_FOR_SUMMARIZATION()},
	{"role":"user","content":f""" CONVERSATION_HISTORY : {filtered_chat_history} """}
	],
	stream=False,
	max_completion_tokens=5000,
	)
	actual_response=response.choices[0].message.content
	g_messages.insert(1,{"role":"system","content":f"""
	Conversation History Summary L
	{json.loads(actual_response)}

	"""})
	'''


	response = client.chat.completions.create(
	model="llama-3.3-70b-versatile",
	messages=g_messages,
	stream=False,
	max_completion_tokens=5000
	)
	response_message = response.choices[0].message.content




	'''----------Chat Response is updated here----------------------'''
	rm=[{"role":"assistant","content":response_message}]

	de.Update_Conversation(id,rm)
	'''-------------------------------------------------------------'''



	if isinstance(json.loads(response_message), dict) and "decision" in json.loads(response_message):
	return response_message

	else:

	time.sleep(5)
	constraints=self._BCL_CONSTRAINTS(userinput)
	print(constraints)

	BCL_SCHEMA={

	"experiments":json.loads(response_message),
	"constraints_mode":"",
	"constraints":constraints
	}




	return BCL_SCHEMA