Spaces:

CorpOfLight
/

qulab-infinite

No application file

App Files Files Community

qulab-infinite / experiment_workflows.py

workofarttattoo

🚀 QuLab MCP Server: Complete Experiment Taxonomy Deployment

91994bf 8 days ago

raw

history blame contribute delete

20.3 kB

	"""
	Copyright (c) 2025 Joshua Hendricks Cole (DBA: Corporation of Light). All Rights Reserved. PATENT PENDING.

	Experiment Workflows - Complex Workflow Composition System
	Chains together multiple experiments in sequence with data flow and dependencies.
	Supports chemical synthesis workflows, analytical pipelines, and multi-step processes.
	"""

	from dataclasses import dataclass, field
	from typing import Dict, List, Any, Optional, Callable, Union
	from datetime import datetime, timedelta
	import asyncio
	from enum import Enum
	import json

	from experiment_taxonomy import experiment_taxonomy, ExperimentTemplate
	from experiment_protocols import experiment_protocols


	class WorkflowStepType(Enum):
	"""Types of workflow steps"""
	EXPERIMENT_EXECUTION = "experiment_execution"
	DATA_TRANSFORMATION = "data_transformation"
	CONDITIONAL_BRANCHING = "conditional_branching"
	PARALLEL_EXECUTION = "parallel_execution"
	QUALITY_CHECK = "quality_check"


	class DataFlow(Enum):
	"""Types of data flow between workflow steps"""
	DIRECT_PASS = "direct_pass" # Pass result directly to next step
	PARAMETER_MAPPING = "parameter_mapping" # Map specific parameters
	TRANSFORMATION = "transformation" # Apply transformation function
	ACCUMULATION = "accumulation" # Accumulate results across steps


	@dataclass
	class WorkflowStep:
	"""Individual step in a workflow"""
	step_id: str
	name: str
	step_type: WorkflowStepType
	experiment_id: Optional[str] = None
	parameters: Dict[str, Any] = field(default_factory=dict)
	data_flow: DataFlow = DataFlow.DIRECT_PASS
	transformation_function: Optional[Callable] = None
	dependencies: List[str] = field(default_factory=list) # Step IDs this depends on
	timeout_minutes: Optional[int] = None
	retry_count: int = 0
	critical_step: bool = False # If this fails, abort workflow


	@dataclass
	class WorkflowResult:
	"""Result of a workflow execution"""
	workflow_id: str
	success: bool
	total_steps: int
	completed_steps: int
	failed_steps: int
	execution_time_seconds: float
	results: Dict[str, Any] = field(default_factory=dict)
	errors: List[str] = field(default_factory=list)
	step_results: List[Dict[str, Any]] = field(default_factory=list)
	timestamp: datetime = field(default_factory=datetime.now)


	@dataclass
	class ExperimentWorkflow:
	"""Complex workflow combining multiple experiments"""
	workflow_id: str
	name: str
	description: str
	category: str # e.g., "organic_synthesis", "drug_discovery", "material_characterization"
	steps: List[WorkflowStep] = field(default_factory=list)
	input_parameters: Dict[str, Any] = field(default_factory=dict)
	output_schema: Dict[str, Any] = field(default_factory=dict)
	estimated_duration: Optional[timedelta] = None
	required_equipment: List[str] = field(default_factory=list)
	safety_requirements: List[str] = field(default_factory=list)
	quality_checks: List[str] = field(default_factory=list)

	def validate_workflow(self) -> List[str]:
	"""Validate workflow structure and dependencies"""
	errors = []

	# Check for duplicate step IDs
	step_ids = [step.step_id for step in self.steps]
	if len(step_ids) != len(set(step_ids)):
	errors.append("Duplicate step IDs found")

	# Check dependency validity
	for step in self.steps:
	for dep in step.dependencies:
	if dep not in step_ids:
	errors.append(f"Step {step.step_id} depends on unknown step {dep}")

	# Check for circular dependencies
	if self._has_circular_dependencies():
	errors.append("Circular dependencies detected")

	# Validate experiment IDs exist
	for step in self.steps:
	if step.experiment_id and not experiment_taxonomy.get_experiment(step.experiment_id):
	errors.append(f"Unknown experiment ID: {step.experiment_id}")

	return errors

	def _has_circular_dependencies(self) -> bool:
	"""Check for circular dependencies using topological sort"""
	# Simplified circular dependency check
	visited = set()
	visiting = set()

	def has_cycle(step_id: str) -> bool:
	if step_id in visiting:
	return True
	if step_id in visited:
	return False

	visiting.add(step_id)
	step = next((s for s in self.steps if s.step_id == step_id), None)
	if step:
	for dep in step.dependencies:
	if has_cycle(dep):
	return True

	visiting.remove(step_id)
	visited.add(step_id)
	return False

	for step in self.steps:
	if has_cycle(step.step_id):
	return True

	return False

	def get_execution_order(self) -> List[str]:
	"""Get steps in execution order (topological sort)"""
	# Simple topological sort implementation
	result = []
	visited = set()
	visiting = set()

	def visit(step_id: str):
	if step_id in visiting:
	raise ValueError(f"Circular dependency involving {step_id}")
	if step_id in visited:
	return

	visiting.add(step_id)
	step = next((s for s in self.steps if s.step_id == step_id), None)
	if step:
	for dep in step.dependencies:
	visit(dep)

	visiting.remove(step_id)
	visited.add(step_id)
	result.append(step_id)

	# Visit all steps
	for step in self.steps:
	if step.step_id not in visited:
	visit(step.step_id)

	return result


	class WorkflowExecutor:
	"""
	Executes complex workflows by orchestrating multiple experiments.
	Handles data flow, error recovery, and parallel execution.
	"""

	def __init__(self, mcp_server=None):
	self.mcp_server = mcp_server
	self.active_workflows: Dict[str, WorkflowResult] = {}

	async def execute_workflow(self, workflow: ExperimentWorkflow,
	input_data: Dict[str, Any] = None) -> WorkflowResult:
	"""Execute a complete workflow"""
	start_time = datetime.now()
	workflow_id = f"{workflow.workflow_id}_{start_time.strftime('%Y%m%d_%H%M%S')}"

	result = WorkflowResult(
	workflow_id=workflow_id,
	success=False,
	total_steps=len(workflow.steps),
	completed_steps=0,
	failed_steps=0,
	execution_time_seconds=0,
	results={},
	errors=[]
	)

	self.active_workflows[workflow_id] = result

	try:
	# Validate workflow
	validation_errors = workflow.validate_workflow()
	if validation_errors:
	result.errors.extend(validation_errors)
	result.execution_time_seconds = (datetime.now() - start_time).total_seconds()
	return result

	# Get execution order
	execution_order = workflow.get_execution_order()
	step_results = {}

	# Execute steps in order
	for step_id in execution_order:
	step = next((s for s in workflow.steps if s.step_id == step_id), None)
	if not step:
	continue

	# Prepare step parameters
	step_params = self._prepare_step_parameters(step, step_results, input_data or {})

	# Execute step
	step_result = await self._execute_workflow_step(step, step_params)

	step_results[step_id] = step_result
	result.step_results.append({
	'step_id': step_id,
	'success': step_result.get('success', False),
	'result': step_result,
	'execution_time': step_result.get('execution_time', 0)
	})

	if step_result.get('success'):
	result.completed_steps += 1
	else:
	result.failed_steps += 1
	result.errors.append(f"Step {step_id} failed: {step_result.get('error', 'Unknown error')}")

	# Abort if critical step fails
	if step.critical_step:
	break

	# Aggregate results
	result.success = result.failed_steps == 0
	result.results = self._aggregate_workflow_results(workflow, step_results)

	except Exception as e:
	result.errors.append(f"Workflow execution error: {str(e)}")

	result.execution_time_seconds = (datetime.now() - start_time).total_seconds()
	return result

	def _prepare_step_parameters(self, step: WorkflowStep,
	previous_results: Dict[str, Any],
	input_data: Dict[str, Any]) -> Dict[str, Any]:
	"""Prepare parameters for a workflow step"""
	params = step.parameters.copy()

	if step.data_flow == DataFlow.DIRECT_PASS:
	# Pass results from dependencies directly
	for dep in step.dependencies:
	if dep in previous_results:
	dep_result = previous_results[dep]
	if 'result' in dep_result and 'results' in dep_result['result']:
	# Extract product or key output from dependency
	exp_results = dep_result['result']['results']
	if 'product' in exp_results:
	params['input_data'] = exp_results['product']
	elif 'output' in exp_results:
	params['input_data'] = exp_results['output']

	elif step.data_flow == DataFlow.PARAMETER_MAPPING:
	# Map specific parameters from dependencies
	for dep in step.dependencies:
	if dep in previous_results:
	dep_result = previous_results[dep]
	# Custom parameter mapping logic here
	pass

	elif step.data_flow == DataFlow.TRANSFORMATION:
	# Apply transformation function
	if step.transformation_function:
	transformed_data = step.transformation_function(previous_results)
	params.update(transformed_data)

	# Add input data
	params.update(input_data)

	return params

	async def _execute_workflow_step(self, step: WorkflowStep,
	parameters: Dict[str, Any]) -> Dict[str, Any]:
	"""Execute a single workflow step"""
	start_time = datetime.now()

	try:
	if step.step_type == WorkflowStepType.EXPERIMENT_EXECUTION:
	# Execute experiment via MCP server
	if self.mcp_server and step.experiment_id:
	from qulab_mcp_server import MCPRequest

	request = MCPRequest(
	tool=f"experiment.{step.experiment_id}",
	parameters=parameters,
	request_id=f"workflow_{step.step_id}_{datetime.now().strftime('%H%M%S')}"
	)

	response = await self.mcp_server.execute_tool(request)

	execution_time = (datetime.now() - start_time).total_seconds()

	return {
	'success': response.status == 'success',
	'result': response.result if response.result else {},
	'error': response.error,
	'execution_time': execution_time
	}
	else:
	return {
	'success': False,
	'error': 'MCP server not available or experiment ID missing',
	'execution_time': (datetime.now() - start_time).total_seconds()
	}

	elif step.step_type == WorkflowStepType.DATA_TRANSFORMATION:
	# Apply data transformation
	if step.transformation_function:
	result = step.transformation_function(parameters)
	return {
	'success': True,
	'result': result,
	'execution_time': (datetime.now() - start_time).total_seconds()
	}
	else:
	return {
	'success': False,
	'error': 'No transformation function provided',
	'execution_time': (datetime.now() - start_time).total_seconds()
	}

	else:
	return {
	'success': False,
	'error': f'Unsupported step type: {step.step_type}',
	'execution_time': (datetime.now() - start_time).total_seconds()
	}

	except Exception as e:
	return {
	'success': False,
	'error': str(e),
	'execution_time': (datetime.now() - start_time).total_seconds()
	}

	def _aggregate_workflow_results(self, workflow: ExperimentWorkflow,
	step_results: Dict[str, Any]) -> Dict[str, Any]:
	"""Aggregate results from all workflow steps"""
	aggregated = {
	'workflow_name': workflow.name,
	'category': workflow.category,
	'final_outputs': {},
	'quality_metrics': {},
	'execution_summary': {
	'total_steps': len(workflow.steps),
	'successful_steps': sum(1 for r in step_results.values() if r.get('success')),
	'failed_steps': sum(1 for r in step_results.values() if not r.get('success'))
	}
	}

	# Extract key outputs from final steps
	final_step_ids = [s.step_id for s in workflow.steps if not any(s.step_id in other.dependencies for other in workflow.steps)]

	for step_id in final_step_ids:
	if step_id in step_results:
	step_result = step_results[step_id]
	if step_result.get('success') and 'result' in step_result:
	exp_result = step_result['result'].get('results', {})
	aggregated['final_outputs'][step_id] = exp_result

	return aggregated


	class WorkflowTemplates:
	"""
	Pre-defined workflow templates for common scientific processes.
	"""

	@staticmethod
	def create_organic_synthesis_workflow(target_molecule: str) -> ExperimentWorkflow:
	"""Create a multi-step organic synthesis workflow"""
	return ExperimentWorkflow(
	workflow_id="organic_synthesis_workflow",
	name=f"Synthesis of {target_molecule}",
	description=f"Multi-step synthesis workflow for {target_molecule}",
	category="organic_synthesis",
	steps=[
	WorkflowStep(
	step_id="retrosynthesis",
	name="Retrosynthetic Analysis",
	step_type=WorkflowStepType.EXPERIMENT_EXECUTION,
	experiment_id="organic_synthesis",
	parameters={"target": target_molecule, "strategy": "retrosynthesis"},
	critical_step=True
	),
	WorkflowStep(
	step_id="step1_reaction",
	name="First Synthetic Step",
	step_type=WorkflowStepType.EXPERIMENT_EXECUTION,
	experiment_id="aldol_condensation",
	parameters={"temperature": 0, "solvent": "ethanol"},
	dependencies=["retrosynthesis"],
	data_flow=DataFlow.PARAMETER_MAPPING
	),
	WorkflowStep(
	step_id="purification",
	name="Product Purification",
	step_type=WorkflowStepType.EXPERIMENT_EXECUTION,
	experiment_id="recrystallization",
	dependencies=["step1_reaction"],
	data_flow=DataFlow.DIRECT_PASS
	),
	WorkflowStep(
	step_id="characterization",
	name="Product Characterization",
	step_type=WorkflowStepType.EXPERIMENT_EXECUTION,
	experiment_id="nmr_spectroscopy",
	dependencies=["purification"],
	data_flow=DataFlow.DIRECT_PASS
	)
	],
	estimated_duration=timedelta(hours=8),
	required_equipment=["reaction_vessel", "spectrometer", "chromatography_system"],
	safety_requirements=["chemical_handling", "instrument_safety"],
	quality_checks=["purity_check", "yield_calculation", "spectral_analysis"]
	)

	@staticmethod
	def create_drug_discovery_workflow() -> ExperimentWorkflow:
	"""Create a drug discovery workflow"""
	return ExperimentWorkflow(
	workflow_id="drug_discovery_workflow",
	name="High-Throughput Drug Discovery",
	description="Complete drug discovery pipeline from virtual screening to optimization",
	category="drug_discovery",
	steps=[
	WorkflowStep(
	step_id="virtual_screening",
	name="Virtual Screening",
	step_type=WorkflowStepType.EXPERIMENT_EXECUTION,
	experiment_id="molecular_docking",
	parameters={"library_size": 1000000, "target_protein": "PDB_ID"}
	),
	WorkflowStep(
	step_id="hit_identification",
	name="Hit Identification",
	step_type=WorkflowStepType.EXPERIMENT_EXECUTION,
	experiment_id="binding_assay",
	dependencies=["virtual_screening"],
	data_flow=DataFlow.TRANSFORMATION
	),
	WorkflowStep(
	step_id="lead_optimization",
	name="Lead Optimization",
	step_type=WorkflowStepType.EXPERIMENT_EXECUTION,
	experiment_id="structure_activity_relationship",
	dependencies=["hit_identification"],
	data_flow=DataFlow.PARAMETER_MAPPING
	),
	WorkflowStep(
	step_id="admet_profiling",
	name="ADMET Profiling",
	step_type=WorkflowStepType.EXPERIMENT_EXECUTION,
	experiment_id="pharmacokinetic_modeling",
	dependencies=["lead_optimization"],
	data_flow=DataFlow.DIRECT_PASS
	)
	],
	estimated_duration=timedelta(days=30),
	required_equipment=["computational_cluster", "high_throughput_screening", "analytical_instruments"],
	safety_requirements=["biological_safety", "chemical_handling"],
	quality_checks=["potency_assay", "selectivity_test", "toxicity_screening"]
	)


	# Global workflow executor instance
	workflow_executor = WorkflowExecutor()


	if __name__ == '__main__':
	# Create a sample organic synthesis workflow
	synthesis_workflow = WorkflowTemplates.create_organic_synthesis_workflow("ibuprofen")

	print(f"Created workflow: {synthesis_workflow.name}")
	print(f"Steps: {len(synthesis_workflow.steps)}")
	print(f"Estimated duration: {synthesis_workflow.estimated_duration}")

	# Validate workflow
	errors = synthesis_workflow.validate_workflow()
	if errors:
	print(f"Validation errors: {errors}")
	else:
	print("Workflow validation passed")

	# Show execution order
	execution_order = synthesis_workflow.get_execution_order()
	print(f"Execution order: {execution_order}")

	print("\nWorkflow steps:")
	for step in synthesis_workflow.steps:
	print(f" - {step.step_id}: {step.name} ({step.step_type.value})")
	if step.dependencies:
	print(f" Dependencies: {step.dependencies}")