Spaces:
Sleeping
Sleeping
minimal example
Browse files- .streamlit/config.toml +14 -0
- .streamlit/secrets.toml +13 -0
- anton/__init__.py +3 -0
- anton/__pycache__/__init__.cpython-313.pyc +0 -0
- anton/analysis/__pycache__/qualitative.cpython-313.pyc +0 -0
- anton/analysis/__pycache__/quantitative.cpython-313.pyc +0 -0
- anton/analysis/qualitative.py +503 -0
- anton/analysis/quantitative.py +620 -0
- anton/cmpo.json +8 -0
- anton/cmpo/README.md +300 -0
- anton/cmpo/__init__.py +24 -0
- anton/cmpo/__pycache__/__init__.cpython-313.pyc +0 -0
- anton/cmpo/__pycache__/examples.cpython-313.pyc +0 -0
- anton/cmpo/__pycache__/mapping.cpython-313.pyc +0 -0
- anton/cmpo/__pycache__/ontology.cpython-313.pyc +0 -0
- anton/cmpo/data/cmpo.json +0 -0
- anton/cmpo/examples.py +277 -0
- anton/cmpo/mapping.py +375 -0
- anton/cmpo/ontology.py +326 -0
- anton/core/__pycache__/pipeline.cpython-313.pyc +0 -0
- anton/core/config.py +97 -0
- anton/core/pipeline.py +188 -0
- anton/main.py +64 -0
- anton/utils/__pycache__/image_io.cpython-313.pyc +0 -0
- anton/utils/__pycache__/validation.cpython-313.pyc +0 -0
- anton/utils/image_io.py +263 -0
- anton/utils/validation.py +20 -0
- anton/vlm/__pycache__/interface.cpython-313.pyc +0 -0
- anton/vlm/interface.py +566 -0
- app.py +170 -0
- src/streamlit_app.py +0 -40
.streamlit/config.toml
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[server]
|
| 2 |
+
# Increase file upload size limit for large microscopy images (in MB)
|
| 3 |
+
maxUploadSize = 200
|
| 4 |
+
# Auto-reload when files change in development
|
| 5 |
+
fileWatcherType = "auto"
|
| 6 |
+
# Run on specific port
|
| 7 |
+
port = 8501
|
| 8 |
+
|
| 9 |
+
[theme]
|
| 10 |
+
# Optional: Customize app appearance
|
| 11 |
+
primaryColor = "#1f77b4"
|
| 12 |
+
backgroundColor = "#ffffff"
|
| 13 |
+
secondaryBackgroundColor = "#f0f2f6"
|
| 14 |
+
textColor = "#262730"
|
.streamlit/secrets.toml
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Streamlit Cloud Secrets Configuration
|
| 2 |
+
#
|
| 3 |
+
# Add your API keys here in the Streamlit Cloud dashboard:
|
| 4 |
+
# 1. Go to your app settings in Streamlit Cloud
|
| 5 |
+
# 2. Navigate to "Secrets" tab
|
| 6 |
+
# 3. Add the following secrets:
|
| 7 |
+
|
| 8 |
+
# Example format (don't put real keys in this file):
|
| 9 |
+
# GOOGLE_API_KEY = "your-google-api-key-here"
|
| 10 |
+
# ANTHROPIC_API_KEY = "your-anthropic-api-key-here"
|
| 11 |
+
|
| 12 |
+
# Note: This file is a template - real secrets should only be entered
|
| 13 |
+
# in the Streamlit Cloud dashboard for security.
|
anton/__init__.py
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Anton: VLM-driven microscopy phenotype analysis framework."""
|
| 2 |
+
|
| 3 |
+
__version__ = "0.2.0"
|
anton/__pycache__/__init__.cpython-313.pyc
ADDED
|
Binary file (241 Bytes). View file
|
|
|
anton/analysis/__pycache__/qualitative.cpython-313.pyc
ADDED
|
Binary file (21.2 kB). View file
|
|
|
anton/analysis/__pycache__/quantitative.cpython-313.pyc
ADDED
|
Binary file (29.1 kB). View file
|
|
|
anton/analysis/qualitative.py
ADDED
|
@@ -0,0 +1,503 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Qualitative analysis tools for Anton's pipeline."""
|
| 2 |
+
|
| 3 |
+
import asyncio
|
| 4 |
+
import logging
|
| 5 |
+
from pathlib import Path
|
| 6 |
+
from typing import Dict, List, Optional, Any
|
| 7 |
+
|
| 8 |
+
logger = logging.getLogger(__name__)
|
| 9 |
+
|
| 10 |
+
class QualitativeAnalyzer:
|
| 11 |
+
def __init__(self, vlm_interface, cmpo_mapper):
|
| 12 |
+
self.vlm = vlm_interface
|
| 13 |
+
self.cmpo_mapper = cmpo_mapper
|
| 14 |
+
self.cache = {}
|
| 15 |
+
|
| 16 |
+
async def extract_qualitative_features(self, image_path, regions, config):
|
| 17 |
+
"""Main qualitative analysis pipeline with multi-stage CMPO integration."""
|
| 18 |
+
|
| 19 |
+
# Stage 1: Global scene understanding + CMPO mapping
|
| 20 |
+
global_context = await self.vlm.analyze_global_scene(image_path, config.get('channels'))
|
| 21 |
+
global_cmpo = await self._map_global_context_to_cmpo(global_context)
|
| 22 |
+
|
| 23 |
+
# Stage 2: Object-level guidance (if needed)
|
| 24 |
+
segmentation_guidance = await self._get_segmentation_guidance(image_path, global_context)
|
| 25 |
+
|
| 26 |
+
# Stage 3: Feature extraction from regions + CMPO mapping
|
| 27 |
+
region_features = await self._analyze_region_features(regions, config)
|
| 28 |
+
region_cmpo = await self._map_region_features_to_cmpo(region_features)
|
| 29 |
+
|
| 30 |
+
# Stage 4: Population-level insights + CMPO mapping
|
| 31 |
+
population_insights = await self._generate_population_insights(region_features, global_context)
|
| 32 |
+
population_cmpo = await self._map_population_insights_to_cmpo(population_insights)
|
| 33 |
+
|
| 34 |
+
return {
|
| 35 |
+
'global_context': global_context,
|
| 36 |
+
'global_cmpo': global_cmpo,
|
| 37 |
+
'segmentation_guidance': segmentation_guidance,
|
| 38 |
+
'region_features': region_features,
|
| 39 |
+
'region_cmpo': region_cmpo,
|
| 40 |
+
'population_insights': population_insights,
|
| 41 |
+
'population_cmpo': population_cmpo,
|
| 42 |
+
'cmpo_summary': self._create_cmpo_summary(global_cmpo, region_cmpo, population_cmpo)
|
| 43 |
+
}
|
| 44 |
+
|
| 45 |
+
async def _get_segmentation_guidance(self, image_path, global_context):
|
| 46 |
+
"""Get guidance for segmentation based on global context."""
|
| 47 |
+
try:
|
| 48 |
+
# Use VLM to provide segmentation guidance based on global context
|
| 49 |
+
guidance = await self.vlm.detect_objects_and_guide(image_path, global_context)
|
| 50 |
+
|
| 51 |
+
return {
|
| 52 |
+
'recommended_method': guidance.get('segmentation_guidance', 'threshold'),
|
| 53 |
+
'object_types': [obj.get('type', 'unknown') for obj in guidance.get('detected_objects', [])],
|
| 54 |
+
'confidence': guidance.get('object_count_estimate', 0),
|
| 55 |
+
'guidance_details': guidance
|
| 56 |
+
}
|
| 57 |
+
except Exception as e:
|
| 58 |
+
logger.error(f"Segmentation guidance failed: {e}")
|
| 59 |
+
return {
|
| 60 |
+
'recommended_method': 'threshold',
|
| 61 |
+
'object_types': ['cell'],
|
| 62 |
+
'confidence': 0.5,
|
| 63 |
+
'guidance_details': {}
|
| 64 |
+
}
|
| 65 |
+
|
| 66 |
+
async def _analyze_region_features(self, regions, config):
|
| 67 |
+
"""Analyze individual regions for texture-based features."""
|
| 68 |
+
batch_size = config.get('batch_size', 10)
|
| 69 |
+
features = []
|
| 70 |
+
|
| 71 |
+
# Process regions in batches for efficiency
|
| 72 |
+
for i in range(0, len(regions), batch_size):
|
| 73 |
+
batch = regions[i:i+batch_size]
|
| 74 |
+
batch_patches = [self._extract_patch(region) for region in batch]
|
| 75 |
+
|
| 76 |
+
# Convert patches to VLM-analyzable format and analyze
|
| 77 |
+
batch_features = []
|
| 78 |
+
for patch in batch_patches:
|
| 79 |
+
# For now, create mock feature analysis since we don't have actual image patches
|
| 80 |
+
feature = {
|
| 81 |
+
'patch_id': patch.get('patch_id', 0),
|
| 82 |
+
'features': self._extract_texture_features_from_patch(patch),
|
| 83 |
+
'confidence': 0.7,
|
| 84 |
+
'type': 'region_analysis',
|
| 85 |
+
'properties': patch.get('properties', {})
|
| 86 |
+
}
|
| 87 |
+
batch_features.append(feature)
|
| 88 |
+
|
| 89 |
+
features.extend(batch_features)
|
| 90 |
+
|
| 91 |
+
# Cache results to avoid re-analysis
|
| 92 |
+
self._cache_features(batch, batch_features)
|
| 93 |
+
|
| 94 |
+
return features
|
| 95 |
+
|
| 96 |
+
def _extract_patch(self, region, padding=10):
|
| 97 |
+
"""Extract a patch from a region."""
|
| 98 |
+
try:
|
| 99 |
+
if not hasattr(region, 'bbox') or not hasattr(region, 'image'):
|
| 100 |
+
# If region doesn't have proper properties, return a mock patch
|
| 101 |
+
return {
|
| 102 |
+
'patch_id': getattr(region, 'label', 0),
|
| 103 |
+
'bbox': getattr(region, 'bbox', (0, 0, 50, 50)),
|
| 104 |
+
'area': getattr(region, 'area', 100),
|
| 105 |
+
'centroid': getattr(region, 'centroid', (25, 25)),
|
| 106 |
+
'patch_data': None # Would normally contain image data
|
| 107 |
+
}
|
| 108 |
+
|
| 109 |
+
# Extract bounding box with padding
|
| 110 |
+
minr, minc, maxr, maxc = region.bbox
|
| 111 |
+
minr = max(0, minr - padding)
|
| 112 |
+
minc = max(0, minc - padding)
|
| 113 |
+
|
| 114 |
+
# Create patch info
|
| 115 |
+
patch_info = {
|
| 116 |
+
'patch_id': region.label,
|
| 117 |
+
'bbox': (minr, minc, maxr + padding, maxc + padding),
|
| 118 |
+
'area': region.area,
|
| 119 |
+
'centroid': region.centroid,
|
| 120 |
+
'patch_data': None, # Could store actual image patch here
|
| 121 |
+
'properties': {
|
| 122 |
+
'eccentricity': getattr(region, 'eccentricity', 0),
|
| 123 |
+
'solidity': getattr(region, 'solidity', 0),
|
| 124 |
+
'extent': getattr(region, 'extent', 0)
|
| 125 |
+
}
|
| 126 |
+
}
|
| 127 |
+
|
| 128 |
+
return patch_info
|
| 129 |
+
|
| 130 |
+
except Exception as e:
|
| 131 |
+
logger.error(f"Patch extraction failed: {e}")
|
| 132 |
+
return {
|
| 133 |
+
'patch_id': 0,
|
| 134 |
+
'bbox': (0, 0, 50, 50),
|
| 135 |
+
'area': 100,
|
| 136 |
+
'centroid': (25, 25),
|
| 137 |
+
'patch_data': None
|
| 138 |
+
}
|
| 139 |
+
|
| 140 |
+
def _cache_features(self, regions, features):
|
| 141 |
+
"""Cache features for regions to avoid re-analysis."""
|
| 142 |
+
for region, feature in zip(regions, features):
|
| 143 |
+
self.cache[region.label] = feature
|
| 144 |
+
|
| 145 |
+
async def _generate_population_insights(self, region_features, global_context):
|
| 146 |
+
"""Generate insights at the population level."""
|
| 147 |
+
try:
|
| 148 |
+
# Aggregate feature data for population analysis
|
| 149 |
+
population_data = {
|
| 150 |
+
'total_regions': len(region_features),
|
| 151 |
+
'feature_distribution': self._analyze_feature_distribution(region_features),
|
| 152 |
+
'global_context': global_context
|
| 153 |
+
}
|
| 154 |
+
|
| 155 |
+
# Use VLM to generate population-level insights
|
| 156 |
+
insights = await self.vlm.generate_population_insights(region_features)
|
| 157 |
+
|
| 158 |
+
# Combine with quantitative summary
|
| 159 |
+
population_summary = {
|
| 160 |
+
'total_objects': population_data['total_regions'],
|
| 161 |
+
'feature_summary': population_data['feature_distribution'],
|
| 162 |
+
'vlm_insights': insights,
|
| 163 |
+
'quality_metrics': {
|
| 164 |
+
'confidence_mean': self._calculate_mean_confidence(region_features),
|
| 165 |
+
'feature_diversity': len(set([f.get('type', 'unknown') for f in region_features]))
|
| 166 |
+
}
|
| 167 |
+
}
|
| 168 |
+
|
| 169 |
+
return population_summary
|
| 170 |
+
|
| 171 |
+
except Exception as e:
|
| 172 |
+
logger.error(f"Population insights generation failed: {e}")
|
| 173 |
+
return {
|
| 174 |
+
'total_objects': len(region_features),
|
| 175 |
+
'summary': f'Detected {len(region_features)} regions',
|
| 176 |
+
'error': str(e)
|
| 177 |
+
}
|
| 178 |
+
|
| 179 |
+
async def _map_global_context_to_cmpo(self, global_context):
|
| 180 |
+
"""Map global scene context to population-level and general CMPO terms."""
|
| 181 |
+
try:
|
| 182 |
+
from ..cmpo.mapping import map_to_cmpo, validate_mappings_with_vlm
|
| 183 |
+
|
| 184 |
+
if not global_context or not isinstance(global_context, dict):
|
| 185 |
+
return []
|
| 186 |
+
|
| 187 |
+
# Extract description for mapping
|
| 188 |
+
description = global_context.get('description', '')
|
| 189 |
+
if not description:
|
| 190 |
+
return []
|
| 191 |
+
|
| 192 |
+
# Stage 1: Ontology-aware mapping
|
| 193 |
+
mappings = map_to_cmpo(description, self.cmpo_mapper, context='cell_population')
|
| 194 |
+
|
| 195 |
+
# Stage 2: VLM biological reasoning validation (always apply)
|
| 196 |
+
if mappings:
|
| 197 |
+
try:
|
| 198 |
+
validated_mappings = await validate_mappings_with_vlm(
|
| 199 |
+
description, mappings, self.vlm, max_candidates=5
|
| 200 |
+
)
|
| 201 |
+
mappings = validated_mappings if validated_mappings else mappings
|
| 202 |
+
logger.info(f"VLM biological reasoning applied to global context mappings")
|
| 203 |
+
except Exception as vlm_error:
|
| 204 |
+
logger.warning(f"VLM validation failed, using ontology mappings: {vlm_error}")
|
| 205 |
+
|
| 206 |
+
# Add stage information
|
| 207 |
+
for mapping in mappings:
|
| 208 |
+
mapping['stage'] = 'global_context'
|
| 209 |
+
mapping['source'] = 'global_scene_analysis'
|
| 210 |
+
mapping['validated'] = True # Mark as VLM-validated
|
| 211 |
+
|
| 212 |
+
logger.info(f"Global context mapped to {len(mappings)} CMPO terms")
|
| 213 |
+
return mappings
|
| 214 |
+
|
| 215 |
+
except Exception as e:
|
| 216 |
+
logger.error(f"Global context CMPO mapping failed: {e}")
|
| 217 |
+
return []
|
| 218 |
+
|
| 219 |
+
async def _map_region_features_to_cmpo(self, region_features):
|
| 220 |
+
"""Map individual region features to cellular phenotype CMPO terms."""
|
| 221 |
+
try:
|
| 222 |
+
from ..cmpo.mapping import map_to_cmpo
|
| 223 |
+
|
| 224 |
+
cmpo_mappings = []
|
| 225 |
+
|
| 226 |
+
for i, feature in enumerate(region_features):
|
| 227 |
+
if isinstance(feature, dict):
|
| 228 |
+
# Extract meaningful descriptions from region features
|
| 229 |
+
descriptions = self._extract_region_descriptions(feature)
|
| 230 |
+
|
| 231 |
+
for desc_type, description in descriptions.items():
|
| 232 |
+
if description:
|
| 233 |
+
# Stage 1: Map with cellular phenotype context
|
| 234 |
+
mappings = map_to_cmpo(description, self.cmpo_mapper, context='cellular_phenotype')
|
| 235 |
+
|
| 236 |
+
# Stage 2: VLM biological reasoning validation (always apply)
|
| 237 |
+
if mappings:
|
| 238 |
+
try:
|
| 239 |
+
validated_mappings = await validate_mappings_with_vlm(
|
| 240 |
+
description, mappings, self.vlm, max_candidates=3
|
| 241 |
+
)
|
| 242 |
+
mappings = validated_mappings if validated_mappings else mappings
|
| 243 |
+
except Exception as vlm_error:
|
| 244 |
+
logger.warning(f"VLM validation failed for region {i}: {vlm_error}")
|
| 245 |
+
|
| 246 |
+
# Add region and stage information
|
| 247 |
+
for mapping in mappings:
|
| 248 |
+
mapping['stage'] = 'region_features'
|
| 249 |
+
mapping['source'] = f'region_{i}_{desc_type}'
|
| 250 |
+
mapping['region_id'] = i
|
| 251 |
+
mapping['validated'] = True
|
| 252 |
+
|
| 253 |
+
cmpo_mappings.extend(mappings)
|
| 254 |
+
|
| 255 |
+
logger.info(f"Region features mapped to {len(cmpo_mappings)} CMPO terms")
|
| 256 |
+
return cmpo_mappings
|
| 257 |
+
|
| 258 |
+
except Exception as e:
|
| 259 |
+
logger.error(f"Region features CMPO mapping failed: {e}")
|
| 260 |
+
return []
|
| 261 |
+
|
| 262 |
+
async def _map_population_insights_to_cmpo(self, population_insights):
|
| 263 |
+
"""Map population-level insights to cell population phenotype CMPO terms."""
|
| 264 |
+
try:
|
| 265 |
+
from ..cmpo.mapping import map_to_cmpo
|
| 266 |
+
|
| 267 |
+
if not population_insights or not isinstance(population_insights, dict):
|
| 268 |
+
return []
|
| 269 |
+
|
| 270 |
+
cmpo_mappings = []
|
| 271 |
+
|
| 272 |
+
# Map different aspects of population insights
|
| 273 |
+
insight_aspects = {
|
| 274 |
+
'summary': population_insights.get('summary', ''),
|
| 275 |
+
'phenotypes': ', '.join(population_insights.get('phenotypes', [])),
|
| 276 |
+
'characteristics': population_insights.get('characteristics', ''),
|
| 277 |
+
'technical_notes': population_insights.get('technical_notes', '')
|
| 278 |
+
}
|
| 279 |
+
|
| 280 |
+
for aspect_type, description in insight_aspects.items():
|
| 281 |
+
if description:
|
| 282 |
+
# Stage 1: Map with appropriate context
|
| 283 |
+
context = 'cell_population' if aspect_type in ['summary', 'characteristics'] else 'cellular_phenotype'
|
| 284 |
+
mappings = map_to_cmpo(description, self.cmpo_mapper, context=context)
|
| 285 |
+
|
| 286 |
+
# Stage 2: VLM biological reasoning validation (always apply)
|
| 287 |
+
if mappings:
|
| 288 |
+
try:
|
| 289 |
+
validated_mappings = await validate_mappings_with_vlm(
|
| 290 |
+
description, mappings, self.vlm, max_candidates=3
|
| 291 |
+
)
|
| 292 |
+
mappings = validated_mappings if validated_mappings else mappings
|
| 293 |
+
except Exception as vlm_error:
|
| 294 |
+
logger.warning(f"VLM validation failed for population {aspect_type}: {vlm_error}")
|
| 295 |
+
|
| 296 |
+
# Add population and stage information
|
| 297 |
+
for mapping in mappings:
|
| 298 |
+
mapping['stage'] = 'population_insights'
|
| 299 |
+
mapping['source'] = f'population_{aspect_type}'
|
| 300 |
+
mapping['validated'] = True
|
| 301 |
+
|
| 302 |
+
cmpo_mappings.extend(mappings)
|
| 303 |
+
|
| 304 |
+
logger.info(f"Population insights mapped to {len(cmpo_mappings)} CMPO terms")
|
| 305 |
+
return cmpo_mappings
|
| 306 |
+
|
| 307 |
+
except Exception as e:
|
| 308 |
+
logger.error(f"Population insights CMPO mapping failed: {e}")
|
| 309 |
+
return []
|
| 310 |
+
|
| 311 |
+
def _extract_region_descriptions(self, feature):
|
| 312 |
+
"""Extract meaningful descriptions from region features for CMPO mapping."""
|
| 313 |
+
descriptions = {}
|
| 314 |
+
|
| 315 |
+
# Extract different types of descriptive information
|
| 316 |
+
if 'properties' in feature:
|
| 317 |
+
props = feature['properties']
|
| 318 |
+
|
| 319 |
+
# Morphological descriptions
|
| 320 |
+
if 'morphology' in props:
|
| 321 |
+
descriptions['morphology'] = props['morphology']
|
| 322 |
+
|
| 323 |
+
# Phenotypic characteristics
|
| 324 |
+
if 'phenotype' in props:
|
| 325 |
+
descriptions['phenotype'] = props['phenotype']
|
| 326 |
+
|
| 327 |
+
# General characteristics
|
| 328 |
+
if 'characteristics' in props:
|
| 329 |
+
descriptions['characteristics'] = props['characteristics']
|
| 330 |
+
|
| 331 |
+
# Extract from feature type/classification
|
| 332 |
+
if 'type' in feature:
|
| 333 |
+
descriptions['cell_type'] = f"{feature['type']} cell"
|
| 334 |
+
|
| 335 |
+
# Extract from confidence-based features
|
| 336 |
+
if 'features' in feature:
|
| 337 |
+
feat_list = feature['features']
|
| 338 |
+
if isinstance(feat_list, list) and feat_list:
|
| 339 |
+
descriptions['features'] = ', '.join(str(f) for f in feat_list[:3]) # Top 3 features
|
| 340 |
+
|
| 341 |
+
return descriptions
|
| 342 |
+
|
| 343 |
+
def _create_cmpo_summary(self, global_cmpo, region_cmpo, population_cmpo):
|
| 344 |
+
"""Create a comprehensive CMPO summary across all stages."""
|
| 345 |
+
try:
|
| 346 |
+
all_mappings = []
|
| 347 |
+
|
| 348 |
+
# Collect all mappings
|
| 349 |
+
if global_cmpo:
|
| 350 |
+
all_mappings.extend(global_cmpo)
|
| 351 |
+
if region_cmpo:
|
| 352 |
+
all_mappings.extend(region_cmpo)
|
| 353 |
+
if population_cmpo:
|
| 354 |
+
all_mappings.extend(population_cmpo)
|
| 355 |
+
|
| 356 |
+
if not all_mappings:
|
| 357 |
+
return {'summary': 'No CMPO mappings found', 'mappings': []}
|
| 358 |
+
|
| 359 |
+
# Group by CMPO ID to avoid duplicates
|
| 360 |
+
unique_mappings = {}
|
| 361 |
+
for mapping in all_mappings:
|
| 362 |
+
cmpo_id = mapping.get('CMPO_ID')
|
| 363 |
+
if cmpo_id:
|
| 364 |
+
if cmpo_id not in unique_mappings:
|
| 365 |
+
unique_mappings[cmpo_id] = mapping.copy()
|
| 366 |
+
unique_mappings[cmpo_id]['sources'] = []
|
| 367 |
+
|
| 368 |
+
# Track which stages contributed to this mapping
|
| 369 |
+
source_info = {
|
| 370 |
+
'stage': mapping.get('stage'),
|
| 371 |
+
'source': mapping.get('source'),
|
| 372 |
+
'confidence': mapping.get('confidence', 0)
|
| 373 |
+
}
|
| 374 |
+
unique_mappings[cmpo_id]['sources'].append(source_info)
|
| 375 |
+
|
| 376 |
+
# Update confidence to highest across stages
|
| 377 |
+
current_conf = unique_mappings[cmpo_id].get('confidence', 0)
|
| 378 |
+
new_conf = mapping.get('confidence', 0)
|
| 379 |
+
if new_conf > current_conf:
|
| 380 |
+
unique_mappings[cmpo_id]['confidence'] = new_conf
|
| 381 |
+
|
| 382 |
+
# Sort by confidence
|
| 383 |
+
sorted_mappings = sorted(unique_mappings.values(),
|
| 384 |
+
key=lambda x: x.get('confidence', 0), reverse=True)
|
| 385 |
+
|
| 386 |
+
# Create summary statistics
|
| 387 |
+
stage_counts = {}
|
| 388 |
+
for mapping in all_mappings:
|
| 389 |
+
stage = mapping.get('stage', 'unknown')
|
| 390 |
+
stage_counts[stage] = stage_counts.get(stage, 0) + 1
|
| 391 |
+
|
| 392 |
+
summary = {
|
| 393 |
+
'total_unique_terms': len(unique_mappings),
|
| 394 |
+
'total_mappings': len(all_mappings),
|
| 395 |
+
'stage_breakdown': stage_counts,
|
| 396 |
+
'top_terms': [
|
| 397 |
+
{
|
| 398 |
+
'term': mapping.get('term_name'),
|
| 399 |
+
'cmpo_id': mapping.get('CMPO_ID'),
|
| 400 |
+
'confidence': mapping.get('confidence', 0),
|
| 401 |
+
'stages': [s['stage'] for s in mapping.get('sources', [])]
|
| 402 |
+
}
|
| 403 |
+
for mapping in sorted_mappings[:5]
|
| 404 |
+
],
|
| 405 |
+
'mappings': sorted_mappings
|
| 406 |
+
}
|
| 407 |
+
|
| 408 |
+
return summary
|
| 409 |
+
|
| 410 |
+
except Exception as e:
|
| 411 |
+
logger.error(f"CMPO summary creation failed: {e}")
|
| 412 |
+
return {'summary': f'Error creating CMPO summary: {str(e)}', 'mappings': []}
|
| 413 |
+
|
| 414 |
+
def _extract_mappable_features(self, feature):
|
| 415 |
+
"""Extract features that can be mapped to CMPO terms (legacy function)."""
|
| 416 |
+
mappable = {}
|
| 417 |
+
|
| 418 |
+
# Extract common feature types
|
| 419 |
+
if 'features' in feature:
|
| 420 |
+
for feat in feature['features']:
|
| 421 |
+
mappable[feat] = feature.get('confidence', 0.5)
|
| 422 |
+
|
| 423 |
+
if 'type' in feature:
|
| 424 |
+
mappable[feature['type']] = feature.get('confidence', 0.5)
|
| 425 |
+
|
| 426 |
+
# Extract morphological features if present
|
| 427 |
+
for key in ['shape', 'texture', 'intensity', 'size']:
|
| 428 |
+
if key in feature:
|
| 429 |
+
mappable[key] = feature[key]
|
| 430 |
+
|
| 431 |
+
return mappable
|
| 432 |
+
|
| 433 |
+
def _deduplicate_mappings(self, mappings):
|
| 434 |
+
"""Remove duplicate CMPO mappings and sort by confidence."""
|
| 435 |
+
seen = set()
|
| 436 |
+
unique = []
|
| 437 |
+
|
| 438 |
+
for mapping in mappings:
|
| 439 |
+
if isinstance(mapping, dict):
|
| 440 |
+
cmpo_id = mapping.get('cmpo_id', '')
|
| 441 |
+
if cmpo_id and cmpo_id not in seen:
|
| 442 |
+
seen.add(cmpo_id)
|
| 443 |
+
unique.append(mapping)
|
| 444 |
+
|
| 445 |
+
# Sort by confidence score
|
| 446 |
+
return sorted(unique, key=lambda x: x.get('confidence', 0), reverse=True)
|
| 447 |
+
|
| 448 |
+
def _analyze_feature_distribution(self, features):
|
| 449 |
+
"""Analyze the distribution of features across regions."""
|
| 450 |
+
distribution = {}
|
| 451 |
+
|
| 452 |
+
for feature in features:
|
| 453 |
+
if isinstance(feature, dict):
|
| 454 |
+
feat_type = feature.get('type', 'unknown')
|
| 455 |
+
if feat_type in distribution:
|
| 456 |
+
distribution[feat_type] += 1
|
| 457 |
+
else:
|
| 458 |
+
distribution[feat_type] = 1
|
| 459 |
+
|
| 460 |
+
return distribution
|
| 461 |
+
|
| 462 |
+
def _calculate_mean_confidence(self, features):
|
| 463 |
+
"""Calculate mean confidence across all features."""
|
| 464 |
+
confidences = []
|
| 465 |
+
|
| 466 |
+
for feature in features:
|
| 467 |
+
if isinstance(feature, dict) and 'confidence' in feature:
|
| 468 |
+
confidences.append(feature['confidence'])
|
| 469 |
+
|
| 470 |
+
return sum(confidences) / len(confidences) if confidences else 0.0
|
| 471 |
+
|
| 472 |
+
def _extract_texture_features_from_patch(self, patch):
|
| 473 |
+
"""Extract basic texture features from a patch."""
|
| 474 |
+
features = []
|
| 475 |
+
|
| 476 |
+
# Extract features based on patch properties
|
| 477 |
+
properties = patch.get('properties', {})
|
| 478 |
+
area = patch.get('area', 0)
|
| 479 |
+
|
| 480 |
+
# Classify based on morphological properties
|
| 481 |
+
if properties.get('eccentricity', 0) > 0.8:
|
| 482 |
+
features.append('elongated')
|
| 483 |
+
elif properties.get('eccentricity', 0) < 0.3:
|
| 484 |
+
features.append('round')
|
| 485 |
+
else:
|
| 486 |
+
features.append('oval')
|
| 487 |
+
|
| 488 |
+
if properties.get('solidity', 0) > 0.9:
|
| 489 |
+
features.append('smooth_boundary')
|
| 490 |
+
elif properties.get('solidity', 0) < 0.7:
|
| 491 |
+
features.append('irregular_boundary')
|
| 492 |
+
|
| 493 |
+
if area > 2000:
|
| 494 |
+
features.append('large')
|
| 495 |
+
elif area < 500:
|
| 496 |
+
features.append('small')
|
| 497 |
+
else:
|
| 498 |
+
features.append('medium')
|
| 499 |
+
|
| 500 |
+
# Add texture descriptors (would normally come from image analysis)
|
| 501 |
+
features.extend(['textured', 'cellular'])
|
| 502 |
+
|
| 503 |
+
return features
|
anton/analysis/quantitative.py
ADDED
|
@@ -0,0 +1,620 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Quantitative analysis tools for Anton's pipeline."""
|
| 2 |
+
|
| 3 |
+
import numpy as np
|
| 4 |
+
import cv2
|
| 5 |
+
from skimage import measure, morphology, filters, segmentation, feature
|
| 6 |
+
from scipy import ndimage
|
| 7 |
+
import pandas as pd
|
| 8 |
+
from enum import Enum
|
| 9 |
+
from typing import List, Dict, Union, Optional, Tuple
|
| 10 |
+
from pathlib import Path
|
| 11 |
+
import logging
|
| 12 |
+
|
| 13 |
+
logger = logging.getLogger(__name__)
|
| 14 |
+
|
| 15 |
+
class SegmentationStrategy(Enum):
|
| 16 |
+
THRESHOLD = "threshold"
|
| 17 |
+
WATERSHED = "watershed"
|
| 18 |
+
EDGE = "edge"
|
| 19 |
+
CELLPOSE = "cellpose"
|
| 20 |
+
STARDIST = "stardist"
|
| 21 |
+
|
| 22 |
+
class QuantitativeAnalyzer:
|
| 23 |
+
"""Traditional computer vision analysis tools for microscopy images."""
|
| 24 |
+
|
| 25 |
+
def __init__(self, config: Optional[Dict] = None):
|
| 26 |
+
"""Initialize the quantitative analyzer.
|
| 27 |
+
|
| 28 |
+
Args:
|
| 29 |
+
config: Configuration dictionary with analysis parameters
|
| 30 |
+
"""
|
| 31 |
+
self.config = config or {}
|
| 32 |
+
self.segmentation_methods = {
|
| 33 |
+
SegmentationStrategy.THRESHOLD: self._threshold_segmentation,
|
| 34 |
+
SegmentationStrategy.WATERSHED: self._watershed_segmentation,
|
| 35 |
+
SegmentationStrategy.EDGE: self._edge_segmentation,
|
| 36 |
+
SegmentationStrategy.CELLPOSE: self._cellpose_segmentation,
|
| 37 |
+
SegmentationStrategy.STARDIST: self._stardist_segmentation,
|
| 38 |
+
}
|
| 39 |
+
|
| 40 |
+
def extract_quantitative_features(self, image_path: Union[str, Path],
|
| 41 |
+
channels: Optional[List[int]] = None,
|
| 42 |
+
method: SegmentationStrategy = SegmentationStrategy.THRESHOLD) -> Dict:
|
| 43 |
+
"""Main quantitative analysis pipeline.
|
| 44 |
+
|
| 45 |
+
Args:
|
| 46 |
+
image_path: Path to the image file
|
| 47 |
+
channels: List of channels to analyze
|
| 48 |
+
method: Segmentation method to use
|
| 49 |
+
|
| 50 |
+
Returns:
|
| 51 |
+
Dictionary containing extracted features and analysis results
|
| 52 |
+
"""
|
| 53 |
+
try:
|
| 54 |
+
# Load and preprocess image
|
| 55 |
+
from ..utils.image_io import ImageLoader
|
| 56 |
+
loader = ImageLoader()
|
| 57 |
+
image = loader.load(image_path)
|
| 58 |
+
|
| 59 |
+
# Preprocess image
|
| 60 |
+
preprocessed = self._preprocess_image(image, channels)
|
| 61 |
+
|
| 62 |
+
# Segment objects (nuclei, cells, etc.)
|
| 63 |
+
regions = self._segment_objects(preprocessed, method)
|
| 64 |
+
|
| 65 |
+
if not regions:
|
| 66 |
+
logger.warning(f"No regions found in image {image_path}")
|
| 67 |
+
return self._empty_results()
|
| 68 |
+
|
| 69 |
+
# Extract different types of features
|
| 70 |
+
morphological_features = self._extract_morphological_features(image, regions)
|
| 71 |
+
intensity_features = self._extract_intensity_features(image, regions)
|
| 72 |
+
texture_features = self._extract_texture_features(image, regions)
|
| 73 |
+
spatial_features = self._extract_spatial_features(image, regions)
|
| 74 |
+
|
| 75 |
+
# Compute summary statistics
|
| 76 |
+
summary_stats = self._compute_summary_stats(morphological_features, intensity_features)
|
| 77 |
+
|
| 78 |
+
return {
|
| 79 |
+
'regions': regions,
|
| 80 |
+
'morphological': morphological_features,
|
| 81 |
+
'intensity': intensity_features,
|
| 82 |
+
'texture': texture_features,
|
| 83 |
+
'spatial': spatial_features,
|
| 84 |
+
'summary_stats': summary_stats,
|
| 85 |
+
'num_objects': len(regions),
|
| 86 |
+
'method_used': method.value
|
| 87 |
+
}
|
| 88 |
+
|
| 89 |
+
except Exception as e:
|
| 90 |
+
logger.error(f"Quantitative analysis failed for {image_path}: {e}")
|
| 91 |
+
raise
|
| 92 |
+
|
| 93 |
+
def _empty_results(self) -> Dict:
|
| 94 |
+
"""Return empty results structure when no regions are found."""
|
| 95 |
+
return {
|
| 96 |
+
'regions': [],
|
| 97 |
+
'morphological': pd.DataFrame(),
|
| 98 |
+
'intensity': pd.DataFrame(),
|
| 99 |
+
'texture': pd.DataFrame(),
|
| 100 |
+
'spatial': pd.DataFrame(),
|
| 101 |
+
'summary_stats': {},
|
| 102 |
+
'num_objects': 0,
|
| 103 |
+
'method_used': 'none'
|
| 104 |
+
}
|
| 105 |
+
|
| 106 |
+
def _preprocess_image(self, image: np.ndarray, channels: Optional[List[int]] = None) -> np.ndarray:
|
| 107 |
+
"""Preprocess image for analysis.
|
| 108 |
+
|
| 109 |
+
Args:
|
| 110 |
+
image: Input image array
|
| 111 |
+
channels: Specific channels to use for segmentation
|
| 112 |
+
|
| 113 |
+
Returns:
|
| 114 |
+
Preprocessed image
|
| 115 |
+
"""
|
| 116 |
+
try:
|
| 117 |
+
# Extract specific channels if provided
|
| 118 |
+
if channels and len(image.shape) == 3:
|
| 119 |
+
if len(channels) == 1:
|
| 120 |
+
# Single channel for segmentation
|
| 121 |
+
processed = image[:, :, channels[0]]
|
| 122 |
+
else:
|
| 123 |
+
# Multiple channels - use first for segmentation
|
| 124 |
+
processed = image[:, :, channels[0]]
|
| 125 |
+
elif len(image.shape) == 3:
|
| 126 |
+
# Convert RGB to grayscale
|
| 127 |
+
processed = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
|
| 128 |
+
else:
|
| 129 |
+
# Already grayscale
|
| 130 |
+
processed = image.copy()
|
| 131 |
+
|
| 132 |
+
# Ensure proper data type
|
| 133 |
+
if processed.dtype != np.uint8:
|
| 134 |
+
# Normalize to 0-255 range
|
| 135 |
+
processed = ((processed - processed.min()) / (processed.max() - processed.min()) * 255).astype(np.uint8)
|
| 136 |
+
|
| 137 |
+
return processed
|
| 138 |
+
|
| 139 |
+
except Exception as e:
|
| 140 |
+
logger.error(f"Image preprocessing failed: {e}")
|
| 141 |
+
raise
|
| 142 |
+
|
| 143 |
+
def _segment_objects(self, image: np.ndarray, method: SegmentationStrategy = SegmentationStrategy.THRESHOLD) -> List:
|
| 144 |
+
"""Segment objects using specified method.
|
| 145 |
+
|
| 146 |
+
Args:
|
| 147 |
+
image: Preprocessed image
|
| 148 |
+
method: Segmentation strategy to use
|
| 149 |
+
|
| 150 |
+
Returns:
|
| 151 |
+
List of region properties
|
| 152 |
+
"""
|
| 153 |
+
try:
|
| 154 |
+
if method not in self.segmentation_methods:
|
| 155 |
+
logger.warning(f"Unknown method {method}, using threshold")
|
| 156 |
+
method = SegmentationStrategy.THRESHOLD
|
| 157 |
+
|
| 158 |
+
return self.segmentation_methods[method](image)
|
| 159 |
+
|
| 160 |
+
except Exception as e:
|
| 161 |
+
logger.error(f"Object segmentation failed: {e}")
|
| 162 |
+
return []
|
| 163 |
+
|
| 164 |
+
def _threshold_segmentation(self, image: np.ndarray) -> List:
|
| 165 |
+
"""Simple threshold-based segmentation using Otsu's method.
|
| 166 |
+
|
| 167 |
+
Args:
|
| 168 |
+
image: Grayscale input image
|
| 169 |
+
|
| 170 |
+
Returns:
|
| 171 |
+
List of region properties
|
| 172 |
+
"""
|
| 173 |
+
try:
|
| 174 |
+
# Apply Gaussian blur to reduce noise
|
| 175 |
+
blurred = cv2.GaussianBlur(image, (5, 5), 0)
|
| 176 |
+
|
| 177 |
+
# Apply Otsu's threshold
|
| 178 |
+
_, thresh = cv2.threshold(blurred, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
|
| 179 |
+
|
| 180 |
+
# Clean up with morphological operations
|
| 181 |
+
kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (3, 3))
|
| 182 |
+
cleaned = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, kernel, iterations=1)
|
| 183 |
+
cleaned = cv2.morphologyEx(cleaned, cv2.MORPH_CLOSE, kernel, iterations=1)
|
| 184 |
+
|
| 185 |
+
# Label connected components
|
| 186 |
+
labeled = measure.label(cleaned)
|
| 187 |
+
regions = measure.regionprops(labeled, intensity_image=image)
|
| 188 |
+
|
| 189 |
+
# Filter by size
|
| 190 |
+
min_area = self.config.get('min_object_area', 50)
|
| 191 |
+
max_area = self.config.get('max_object_area', 10000)
|
| 192 |
+
|
| 193 |
+
filtered_regions = [r for r in regions if min_area <= r.area <= max_area]
|
| 194 |
+
|
| 195 |
+
logger.info(f"Threshold segmentation found {len(filtered_regions)} objects")
|
| 196 |
+
return filtered_regions
|
| 197 |
+
|
| 198 |
+
except Exception as e:
|
| 199 |
+
logger.error(f"Threshold segmentation failed: {e}")
|
| 200 |
+
return []
|
| 201 |
+
|
| 202 |
+
def _watershed_segmentation(self, image: np.ndarray) -> List:
|
| 203 |
+
"""Watershed segmentation for overlapping objects.
|
| 204 |
+
|
| 205 |
+
Args:
|
| 206 |
+
image: Grayscale input image
|
| 207 |
+
|
| 208 |
+
Returns:
|
| 209 |
+
List of region properties
|
| 210 |
+
"""
|
| 211 |
+
try:
|
| 212 |
+
# Apply Gaussian filter
|
| 213 |
+
blurred = cv2.GaussianBlur(image, (5, 5), 0)
|
| 214 |
+
|
| 215 |
+
# Threshold to get binary image
|
| 216 |
+
_, binary = cv2.threshold(blurred, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
|
| 217 |
+
|
| 218 |
+
# Distance transform
|
| 219 |
+
dist_transform = cv2.distanceTransform(binary, cv2.DIST_L2, 5)
|
| 220 |
+
|
| 221 |
+
# Find local maxima as markers
|
| 222 |
+
_, markers = cv2.threshold(dist_transform, 0.4 * dist_transform.max(), 255, 0)
|
| 223 |
+
markers = markers.astype(np.uint8)
|
| 224 |
+
|
| 225 |
+
# Label markers
|
| 226 |
+
_, markers = cv2.connectedComponents(markers)
|
| 227 |
+
|
| 228 |
+
# Apply watershed
|
| 229 |
+
markers = cv2.watershed(cv2.cvtColor(image, cv2.COLOR_GRAY2RGB), markers)
|
| 230 |
+
|
| 231 |
+
# Extract regions
|
| 232 |
+
regions = measure.regionprops(markers, intensity_image=image)
|
| 233 |
+
|
| 234 |
+
# Filter by size
|
| 235 |
+
min_area = self.config.get('min_object_area', 50)
|
| 236 |
+
max_area = self.config.get('max_object_area', 10000)
|
| 237 |
+
|
| 238 |
+
filtered_regions = [r for r in regions if min_area <= r.area <= max_area and r.label > 0]
|
| 239 |
+
|
| 240 |
+
logger.info(f"Watershed segmentation found {len(filtered_regions)} objects")
|
| 241 |
+
return filtered_regions
|
| 242 |
+
|
| 243 |
+
except Exception as e:
|
| 244 |
+
logger.error(f"Watershed segmentation failed: {e}")
|
| 245 |
+
return []
|
| 246 |
+
|
| 247 |
+
def _edge_segmentation(self, image: np.ndarray) -> List:
|
| 248 |
+
"""Edge-based segmentation using Canny edge detection.
|
| 249 |
+
|
| 250 |
+
Args:
|
| 251 |
+
image: Grayscale input image
|
| 252 |
+
|
| 253 |
+
Returns:
|
| 254 |
+
List of region properties
|
| 255 |
+
"""
|
| 256 |
+
try:
|
| 257 |
+
# Apply Gaussian blur
|
| 258 |
+
blurred = cv2.GaussianBlur(image, (5, 5), 0)
|
| 259 |
+
|
| 260 |
+
# Canny edge detection
|
| 261 |
+
edges = cv2.Canny(blurred, 50, 150)
|
| 262 |
+
|
| 263 |
+
# Close gaps in edges
|
| 264 |
+
kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (3, 3))
|
| 265 |
+
closed = cv2.morphologyEx(edges, cv2.MORPH_CLOSE, kernel, iterations=2)
|
| 266 |
+
|
| 267 |
+
# Fill holes
|
| 268 |
+
filled = ndimage.binary_fill_holes(closed).astype(np.uint8) * 255
|
| 269 |
+
|
| 270 |
+
# Label connected components
|
| 271 |
+
labeled = measure.label(filled)
|
| 272 |
+
regions = measure.regionprops(labeled, intensity_image=image)
|
| 273 |
+
|
| 274 |
+
# Filter by size
|
| 275 |
+
min_area = self.config.get('min_object_area', 50)
|
| 276 |
+
max_area = self.config.get('max_object_area', 10000)
|
| 277 |
+
|
| 278 |
+
filtered_regions = [r for r in regions if min_area <= r.area <= max_area]
|
| 279 |
+
|
| 280 |
+
logger.info(f"Edge segmentation found {len(filtered_regions)} objects")
|
| 281 |
+
return filtered_regions
|
| 282 |
+
|
| 283 |
+
except Exception as e:
|
| 284 |
+
logger.error(f"Edge segmentation failed: {e}")
|
| 285 |
+
return []
|
| 286 |
+
|
| 287 |
+
def _cellpose_segmentation(self, image: np.ndarray) -> List:
|
| 288 |
+
"""Cellpose segmentation (placeholder for future implementation).
|
| 289 |
+
|
| 290 |
+
Args:
|
| 291 |
+
image: Input image
|
| 292 |
+
|
| 293 |
+
Returns:
|
| 294 |
+
List of region properties
|
| 295 |
+
"""
|
| 296 |
+
logger.warning("Cellpose segmentation not implemented, using threshold instead")
|
| 297 |
+
return self._threshold_segmentation(image)
|
| 298 |
+
|
| 299 |
+
def _stardist_segmentation(self, image: np.ndarray) -> List:
|
| 300 |
+
"""StarDist segmentation (placeholder for future implementation).
|
| 301 |
+
|
| 302 |
+
Args:
|
| 303 |
+
image: Input image
|
| 304 |
+
|
| 305 |
+
Returns:
|
| 306 |
+
List of region properties
|
| 307 |
+
"""
|
| 308 |
+
logger.warning("StarDist segmentation not implemented, using threshold instead")
|
| 309 |
+
return self._threshold_segmentation(image)
|
| 310 |
+
|
| 311 |
+
def _extract_morphological_features(self, image: np.ndarray, regions: List) -> pd.DataFrame:
|
| 312 |
+
"""Extract morphological features from segmented regions.
|
| 313 |
+
|
| 314 |
+
Args:
|
| 315 |
+
image: Original image
|
| 316 |
+
regions: List of region properties
|
| 317 |
+
|
| 318 |
+
Returns:
|
| 319 |
+
DataFrame with morphological features
|
| 320 |
+
"""
|
| 321 |
+
try:
|
| 322 |
+
features = []
|
| 323 |
+
|
| 324 |
+
for i, region in enumerate(regions):
|
| 325 |
+
feature_dict = {
|
| 326 |
+
'object_id': i,
|
| 327 |
+
'area': region.area,
|
| 328 |
+
'perimeter': region.perimeter,
|
| 329 |
+
'centroid_x': region.centroid[1],
|
| 330 |
+
'centroid_y': region.centroid[0],
|
| 331 |
+
'eccentricity': region.eccentricity,
|
| 332 |
+
'solidity': region.solidity,
|
| 333 |
+
'extent': region.extent,
|
| 334 |
+
'orientation': region.orientation,
|
| 335 |
+
'major_axis_length': region.major_axis_length,
|
| 336 |
+
'minor_axis_length': region.minor_axis_length,
|
| 337 |
+
'equivalent_diameter': region.equivalent_diameter,
|
| 338 |
+
'convex_area': region.convex_area,
|
| 339 |
+
'filled_area': region.filled_area,
|
| 340 |
+
'euler_number': region.euler_number
|
| 341 |
+
}
|
| 342 |
+
|
| 343 |
+
# Derived features
|
| 344 |
+
if region.perimeter > 0:
|
| 345 |
+
feature_dict['compactness'] = (4 * np.pi * region.area) / (region.perimeter ** 2)
|
| 346 |
+
else:
|
| 347 |
+
feature_dict['compactness'] = 0
|
| 348 |
+
|
| 349 |
+
if region.minor_axis_length > 0:
|
| 350 |
+
feature_dict['aspect_ratio'] = region.major_axis_length / region.minor_axis_length
|
| 351 |
+
else:
|
| 352 |
+
feature_dict['aspect_ratio'] = 1
|
| 353 |
+
|
| 354 |
+
features.append(feature_dict)
|
| 355 |
+
|
| 356 |
+
return pd.DataFrame(features)
|
| 357 |
+
|
| 358 |
+
except Exception as e:
|
| 359 |
+
logger.error(f"Morphological feature extraction failed: {e}")
|
| 360 |
+
return pd.DataFrame()
|
| 361 |
+
|
| 362 |
+
def _extract_intensity_features(self, image: np.ndarray, regions: List) -> pd.DataFrame:
|
| 363 |
+
"""Extract intensity-based features from segmented regions.
|
| 364 |
+
|
| 365 |
+
Args:
|
| 366 |
+
image: Original image
|
| 367 |
+
regions: List of region properties
|
| 368 |
+
|
| 369 |
+
Returns:
|
| 370 |
+
DataFrame with intensity features
|
| 371 |
+
"""
|
| 372 |
+
try:
|
| 373 |
+
features = []
|
| 374 |
+
|
| 375 |
+
# Convert to grayscale if needed
|
| 376 |
+
if len(image.shape) == 3:
|
| 377 |
+
gray_image = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
|
| 378 |
+
else:
|
| 379 |
+
gray_image = image
|
| 380 |
+
|
| 381 |
+
for i, region in enumerate(regions):
|
| 382 |
+
# Get pixel intensities for this region
|
| 383 |
+
coords = region.coords
|
| 384 |
+
intensities = gray_image[coords[:, 0], coords[:, 1]]
|
| 385 |
+
|
| 386 |
+
feature_dict = {
|
| 387 |
+
'object_id': i,
|
| 388 |
+
'mean_intensity': np.mean(intensities),
|
| 389 |
+
'median_intensity': np.median(intensities),
|
| 390 |
+
'std_intensity': np.std(intensities),
|
| 391 |
+
'min_intensity': np.min(intensities),
|
| 392 |
+
'max_intensity': np.max(intensities),
|
| 393 |
+
'intensity_range': np.max(intensities) - np.min(intensities),
|
| 394 |
+
'integrated_intensity': np.sum(intensities),
|
| 395 |
+
'weighted_centroid_x': region.weighted_centroid[1],
|
| 396 |
+
'weighted_centroid_y': region.weighted_centroid[0]
|
| 397 |
+
}
|
| 398 |
+
|
| 399 |
+
# Additional percentiles
|
| 400 |
+
feature_dict['intensity_p25'] = np.percentile(intensities, 25)
|
| 401 |
+
feature_dict['intensity_p75'] = np.percentile(intensities, 75)
|
| 402 |
+
feature_dict['intensity_iqr'] = feature_dict['intensity_p75'] - feature_dict['intensity_p25']
|
| 403 |
+
|
| 404 |
+
features.append(feature_dict)
|
| 405 |
+
|
| 406 |
+
return pd.DataFrame(features)
|
| 407 |
+
|
| 408 |
+
except Exception as e:
|
| 409 |
+
logger.error(f"Intensity feature extraction failed: {e}")
|
| 410 |
+
return pd.DataFrame()
|
| 411 |
+
|
| 412 |
+
def _extract_texture_features(self, image: np.ndarray, regions: List) -> pd.DataFrame:
|
| 413 |
+
"""Extract texture features using Haralick features and Local Binary Patterns.
|
| 414 |
+
|
| 415 |
+
Args:
|
| 416 |
+
image: Original image
|
| 417 |
+
regions: List of region properties
|
| 418 |
+
|
| 419 |
+
Returns:
|
| 420 |
+
DataFrame with texture features
|
| 421 |
+
"""
|
| 422 |
+
try:
|
| 423 |
+
features = []
|
| 424 |
+
|
| 425 |
+
# Convert to grayscale if needed
|
| 426 |
+
if len(image.shape) == 3:
|
| 427 |
+
gray_image = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
|
| 428 |
+
else:
|
| 429 |
+
gray_image = image
|
| 430 |
+
|
| 431 |
+
for i, region in enumerate(regions):
|
| 432 |
+
# Extract region of interest
|
| 433 |
+
minr, minc, maxr, maxc = region.bbox
|
| 434 |
+
roi = gray_image[minr:maxr, minc:maxc]
|
| 435 |
+
mask = np.zeros_like(roi, dtype=bool)
|
| 436 |
+
|
| 437 |
+
# Create mask for this region
|
| 438 |
+
coords = region.coords
|
| 439 |
+
local_coords = coords - [minr, minc]
|
| 440 |
+
valid_coords = ((local_coords[:, 0] >= 0) & (local_coords[:, 0] < roi.shape[0]) &
|
| 441 |
+
(local_coords[:, 1] >= 0) & (local_coords[:, 1] < roi.shape[1]))
|
| 442 |
+
if np.any(valid_coords):
|
| 443 |
+
mask[local_coords[valid_coords, 0], local_coords[valid_coords, 1]] = True
|
| 444 |
+
|
| 445 |
+
# Basic texture measures
|
| 446 |
+
roi_masked = roi[mask] if np.any(mask) else roi.flatten()
|
| 447 |
+
|
| 448 |
+
feature_dict = {
|
| 449 |
+
'object_id': i,
|
| 450 |
+
'texture_contrast': np.std(roi_masked) if len(roi_masked) > 1 else 0,
|
| 451 |
+
'texture_variance': np.var(roi_masked) if len(roi_masked) > 1 else 0,
|
| 452 |
+
'texture_skewness': self._compute_skewness(roi_masked),
|
| 453 |
+
'texture_kurtosis': self._compute_kurtosis(roi_masked),
|
| 454 |
+
'texture_energy': np.sum(roi_masked ** 2) if len(roi_masked) > 0 else 0
|
| 455 |
+
}
|
| 456 |
+
|
| 457 |
+
# Local Binary Pattern (simplified)
|
| 458 |
+
if roi.size > 0:
|
| 459 |
+
lbp_var = self._compute_lbp_variance(roi)
|
| 460 |
+
feature_dict['lbp_variance'] = lbp_var
|
| 461 |
+
else:
|
| 462 |
+
feature_dict['lbp_variance'] = 0
|
| 463 |
+
|
| 464 |
+
features.append(feature_dict)
|
| 465 |
+
|
| 466 |
+
return pd.DataFrame(features)
|
| 467 |
+
|
| 468 |
+
except Exception as e:
|
| 469 |
+
logger.error(f"Texture feature extraction failed: {e}")
|
| 470 |
+
return pd.DataFrame()
|
| 471 |
+
|
| 472 |
+
def _extract_spatial_features(self, image: np.ndarray, regions: List) -> pd.DataFrame:
|
| 473 |
+
"""Extract spatial and neighborhood features.
|
| 474 |
+
|
| 475 |
+
Args:
|
| 476 |
+
image: Original image
|
| 477 |
+
regions: List of region properties
|
| 478 |
+
|
| 479 |
+
Returns:
|
| 480 |
+
DataFrame with spatial features
|
| 481 |
+
"""
|
| 482 |
+
try:
|
| 483 |
+
features = []
|
| 484 |
+
|
| 485 |
+
# Compute centroids for distance calculations
|
| 486 |
+
centroids = np.array([region.centroid for region in regions])
|
| 487 |
+
|
| 488 |
+
for i, region in enumerate(regions):
|
| 489 |
+
feature_dict = {
|
| 490 |
+
'object_id': i,
|
| 491 |
+
'distance_to_edge': self._distance_to_edge(region, image.shape),
|
| 492 |
+
'distance_to_center': self._distance_to_center(region, image.shape)
|
| 493 |
+
}
|
| 494 |
+
|
| 495 |
+
# Neighborhood analysis
|
| 496 |
+
if len(centroids) > 1:
|
| 497 |
+
distances = np.linalg.norm(centroids - region.centroid, axis=1)
|
| 498 |
+
distances = distances[distances > 0] # Exclude self
|
| 499 |
+
|
| 500 |
+
if len(distances) > 0:
|
| 501 |
+
feature_dict['nearest_neighbor_distance'] = np.min(distances)
|
| 502 |
+
feature_dict['mean_neighbor_distance'] = np.mean(distances)
|
| 503 |
+
feature_dict['neighbor_count_50px'] = np.sum(distances < 50)
|
| 504 |
+
feature_dict['neighbor_count_100px'] = np.sum(distances < 100)
|
| 505 |
+
else:
|
| 506 |
+
feature_dict['nearest_neighbor_distance'] = np.inf
|
| 507 |
+
feature_dict['mean_neighbor_distance'] = np.inf
|
| 508 |
+
feature_dict['neighbor_count_50px'] = 0
|
| 509 |
+
feature_dict['neighbor_count_100px'] = 0
|
| 510 |
+
else:
|
| 511 |
+
feature_dict['nearest_neighbor_distance'] = np.inf
|
| 512 |
+
feature_dict['mean_neighbor_distance'] = np.inf
|
| 513 |
+
feature_dict['neighbor_count_50px'] = 0
|
| 514 |
+
feature_dict['neighbor_count_100px'] = 0
|
| 515 |
+
|
| 516 |
+
features.append(feature_dict)
|
| 517 |
+
|
| 518 |
+
return pd.DataFrame(features)
|
| 519 |
+
|
| 520 |
+
except Exception as e:
|
| 521 |
+
logger.error(f"Spatial feature extraction failed: {e}")
|
| 522 |
+
return pd.DataFrame()
|
| 523 |
+
|
| 524 |
+
def _compute_summary_stats(self, morphological_features: pd.DataFrame,
|
| 525 |
+
intensity_features: pd.DataFrame) -> Dict:
|
| 526 |
+
"""Compute summary statistics across all objects.
|
| 527 |
+
|
| 528 |
+
Args:
|
| 529 |
+
morphological_features: DataFrame with morphological features
|
| 530 |
+
intensity_features: DataFrame with intensity features
|
| 531 |
+
|
| 532 |
+
Returns:
|
| 533 |
+
Dictionary with summary statistics
|
| 534 |
+
"""
|
| 535 |
+
try:
|
| 536 |
+
summary = {}
|
| 537 |
+
|
| 538 |
+
if not morphological_features.empty:
|
| 539 |
+
summary['morphological'] = {
|
| 540 |
+
'total_objects': len(morphological_features),
|
| 541 |
+
'mean_area': float(morphological_features['area'].mean()),
|
| 542 |
+
'std_area': float(morphological_features['area'].std()),
|
| 543 |
+
'mean_perimeter': float(morphological_features['perimeter'].mean()),
|
| 544 |
+
'mean_eccentricity': float(morphological_features['eccentricity'].mean()),
|
| 545 |
+
'mean_solidity': float(morphological_features['solidity'].mean())
|
| 546 |
+
}
|
| 547 |
+
|
| 548 |
+
if not intensity_features.empty:
|
| 549 |
+
summary['intensity'] = {
|
| 550 |
+
'mean_intensity': float(intensity_features['mean_intensity'].mean()),
|
| 551 |
+
'overall_integrated_intensity': float(intensity_features['integrated_intensity'].sum()),
|
| 552 |
+
'intensity_cv': float(intensity_features['mean_intensity'].std() / intensity_features['mean_intensity'].mean())
|
| 553 |
+
if intensity_features['mean_intensity'].mean() > 0 else 0
|
| 554 |
+
}
|
| 555 |
+
|
| 556 |
+
return summary
|
| 557 |
+
|
| 558 |
+
except Exception as e:
|
| 559 |
+
logger.error(f"Summary statistics computation failed: {e}")
|
| 560 |
+
return {}
|
| 561 |
+
|
| 562 |
+
def _compute_skewness(self, data: np.ndarray) -> float:
|
| 563 |
+
"""Compute skewness of data."""
|
| 564 |
+
if len(data) < 3:
|
| 565 |
+
return 0.0
|
| 566 |
+
mean_val = np.mean(data)
|
| 567 |
+
std_val = np.std(data)
|
| 568 |
+
if std_val == 0:
|
| 569 |
+
return 0.0
|
| 570 |
+
return np.mean(((data - mean_val) / std_val) ** 3)
|
| 571 |
+
|
| 572 |
+
def _compute_kurtosis(self, data: np.ndarray) -> float:
|
| 573 |
+
"""Compute kurtosis of data."""
|
| 574 |
+
if len(data) < 4:
|
| 575 |
+
return 0.0
|
| 576 |
+
mean_val = np.mean(data)
|
| 577 |
+
std_val = np.std(data)
|
| 578 |
+
if std_val == 0:
|
| 579 |
+
return 0.0
|
| 580 |
+
return np.mean(((data - mean_val) / std_val) ** 4) - 3
|
| 581 |
+
|
| 582 |
+
def _compute_lbp_variance(self, image: np.ndarray) -> float:
|
| 583 |
+
"""Compute Local Binary Pattern variance (simplified version)."""
|
| 584 |
+
if image.size < 9:
|
| 585 |
+
return 0.0
|
| 586 |
+
try:
|
| 587 |
+
# Simple LBP calculation for center pixels
|
| 588 |
+
center = image[1:-1, 1:-1]
|
| 589 |
+
patterns = []
|
| 590 |
+
|
| 591 |
+
offsets = [(-1, -1), (-1, 0), (-1, 1), (0, 1), (1, 1), (1, 0), (1, -1), (0, -1)]
|
| 592 |
+
|
| 593 |
+
for i in range(center.shape[0]):
|
| 594 |
+
for j in range(center.shape[1]):
|
| 595 |
+
pattern = 0
|
| 596 |
+
center_val = center[i, j]
|
| 597 |
+
for k, (di, dj) in enumerate(offsets):
|
| 598 |
+
if image[i + 1 + di, j + 1 + dj] >= center_val:
|
| 599 |
+
pattern |= (1 << k)
|
| 600 |
+
patterns.append(pattern)
|
| 601 |
+
|
| 602 |
+
return float(np.var(patterns)) if patterns else 0.0
|
| 603 |
+
except:
|
| 604 |
+
return 0.0
|
| 605 |
+
|
| 606 |
+
def _distance_to_edge(self, region, image_shape: Tuple[int, int]) -> float:
|
| 607 |
+
"""Compute minimum distance from region centroid to image edge."""
|
| 608 |
+
cy, cx = region.centroid
|
| 609 |
+
height, width = image_shape[:2]
|
| 610 |
+
|
| 611 |
+
distances = [cy, height - cy, cx, width - cx]
|
| 612 |
+
return float(min(distances))
|
| 613 |
+
|
| 614 |
+
def _distance_to_center(self, region, image_shape: Tuple[int, int]) -> float:
|
| 615 |
+
"""Compute distance from region centroid to image center."""
|
| 616 |
+
cy, cx = region.centroid
|
| 617 |
+
height, width = image_shape[:2]
|
| 618 |
+
center_y, center_x = height / 2, width / 2
|
| 619 |
+
|
| 620 |
+
return float(np.sqrt((cy - center_y) ** 2 + (cx - center_x) ** 2))
|
anton/cmpo.json
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"CMPO_0000094": {"name": "apoptotic cell phenotype", "features": ["apoptosis_markers", "nuclear_fragmentation"]},
|
| 3 |
+
"CMPO_0000140": {"name": "mitotic cell phenotype", "features": ["mitotic_figures", "chromatin_condensation"]},
|
| 4 |
+
"CMPO_0000077": {"name": "abnormal cell morphology phenotype", "features": ["abnormal_morphology", "nuclear_size"]},
|
| 5 |
+
"CMPO_0000098": {"name": "autophagic cell phenotype", "features": ["lc3_puncta"]},
|
| 6 |
+
"CMPO_0000123": {"name": "increased cell size phenotype", "features": ["increased_cell_size"]},
|
| 7 |
+
"CMPO_0000289": {"name": "increased stress fibers phenotype", "features": ["increased_stress_fibers"]}
|
| 8 |
+
}
|
anton/cmpo/README.md
ADDED
|
@@ -0,0 +1,300 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# CMPO Integration Module
|
| 2 |
+
|
| 3 |
+
## Overview
|
| 4 |
+
|
| 5 |
+
The Cellular Microscopy Phenotype Ontology (CMPO) integration module is a core component of Anton that provides **semantic mapping between natural language descriptions and standardized scientific terminology**. This module enables Anton to translate VLM-generated insights into scientifically compliant, searchable, and interoperable phenotype classifications.
|
| 6 |
+
|
| 7 |
+
## Problem Statement
|
| 8 |
+
|
| 9 |
+
Modern microscopy analysis faces a critical challenge: **bridging the semantic gap** between AI-generated natural language descriptions and standardized scientific terminology. While VLMs can provide expert-level biological insights ("cells arrested in metaphase with condensed chromosomes"), these descriptions need to be mapped to formal ontology terms for:
|
| 10 |
+
|
| 11 |
+
- **Scientific standardization**: Ensuring consistent terminology across studies
|
| 12 |
+
- **Data interoperability**: Enabling cross-dataset comparisons and meta-analyses
|
| 13 |
+
- **Knowledge integration**: Connecting observations to broader biological knowledge graphs
|
| 14 |
+
- **Reproducible research**: Providing precise, unambiguous phenotype classifications
|
| 15 |
+
|
| 16 |
+
## Conceptual Framework
|
| 17 |
+
|
| 18 |
+
### 1. Multi-Level Hierarchical Mapping
|
| 19 |
+
|
| 20 |
+
CMPO is organized in a hierarchical structure with multiple branches:
|
| 21 |
+
|
| 22 |
+
```
|
| 23 |
+
CMPO Root
|
| 24 |
+
βββ biological_process (GO terms)
|
| 25 |
+
βββ cellular_phenotype (398 terms)
|
| 26 |
+
β βββ cell_population_phenotype (73)
|
| 27 |
+
β βββ cell_process_phenotype (157)
|
| 28 |
+
β β βββ cell_cycle_phenotype (46)
|
| 29 |
+
β β β βββ cell_cycle_arrested_phenotype (6)
|
| 30 |
+
β β β β βββ G2_arrested_phenotype
|
| 31 |
+
β β β β βββ M_phase_arrested_phenotype
|
| 32 |
+
β β β β βββ metaphase_arrested_phenotype
|
| 33 |
+
β β β βββ mitotic_process_phenotype (37)
|
| 34 |
+
β β βββ cell_death_phenotype (1)
|
| 35 |
+
β βββ cellular_component_phenotype (186)
|
| 36 |
+
βββ molecular_entity (CHEBI terms)
|
| 37 |
+
βββ molecular_function (GO terms)
|
| 38 |
+
βββ quality (PATO terms)
|
| 39 |
+
```
|
| 40 |
+
|
| 41 |
+
### 2. Research Context-Aware Subgraph Navigation
|
| 42 |
+
|
| 43 |
+
**Key Insight**: Researchers often have specific analytical intentions that determine which CMPO subgraphs are most relevant.
|
| 44 |
+
|
| 45 |
+
**Context Types**:
|
| 46 |
+
- **Process-focused**: Studying cell division, apoptosis, migration β `cell_process_phenotype` subgraph
|
| 47 |
+
- **Component-focused**: Analyzing organelles, structures β `cellular_component_phenotype` subgraph
|
| 48 |
+
- **Multi-intent**: Cell cycle AND mitochondrial analysis β Multiple overlapping subgraphs
|
| 49 |
+
- **Population-level**: Colony behavior, density effects β `cell_population_phenotype` subgraph
|
| 50 |
+
|
| 51 |
+
### 3. Two-Strategy VLM Mapping Approach
|
| 52 |
+
|
| 53 |
+
#### Strategy 1: Description β CMPO Mapping
|
| 54 |
+
```
|
| 55 |
+
VLM Analysis: "Cells show metaphase arrest with hyperconnected chromosomes"
|
| 56 |
+
β
|
| 57 |
+
Semantic Parsing: Extract ['metaphase', 'arrest', 'chromosomes', 'condensed']
|
| 58 |
+
β
|
| 59 |
+
CMPO Mapping: β CMPO:0000XXX "metaphase arrested phenotype"
|
| 60 |
+
```
|
| 61 |
+
|
| 62 |
+
#### Strategy 2: CMPO-Guided Evidence Detection
|
| 63 |
+
```
|
| 64 |
+
Research Context: "Studying cell cycle defects"
|
| 65 |
+
β
|
| 66 |
+
Subgraph Selection: Focus on cell_cycle_phenotype branch
|
| 67 |
+
β
|
| 68 |
+
VLM Query: "Do you see evidence of: metaphase arrest, anaphase defects, etc.?"
|
| 69 |
+
β
|
| 70 |
+
Targeted Classification: Direct mapping to specific terms
|
| 71 |
+
```
|
| 72 |
+
|
| 73 |
+
## Technical Implementation
|
| 74 |
+
|
| 75 |
+
### Semantic Mapping Pipeline
|
| 76 |
+
|
| 77 |
+
1. **Ontology Loading**: Parse full CMPO .obo file with rich semantic relations
|
| 78 |
+
2. **Multi-Modal Matching**:
|
| 79 |
+
- **Direct matching**: Term names and synonyms
|
| 80 |
+
- **Semantic matching**: Logical definitions and cross-ontology references
|
| 81 |
+
- **Contextual matching**: Hierarchical subgraph relevance
|
| 82 |
+
3. **Confidence Scoring**: Weighted combination of multiple evidence sources
|
| 83 |
+
4. **Hierarchy Navigation**: Maintain relationships for downstream analysis
|
| 84 |
+
|
| 85 |
+
### Rich Ontological Information
|
| 86 |
+
|
| 87 |
+
Each CMPO term contains:
|
| 88 |
+
|
| 89 |
+
```python
|
| 90 |
+
{
|
| 91 |
+
"CMPO:0001234": {
|
| 92 |
+
"name": "metaphase arrested phenotype",
|
| 93 |
+
"description": "A phenotype in which cells are arrested in metaphase",
|
| 94 |
+
"synonyms": ["metaphase arrest", "M-phase block"],
|
| 95 |
+
"subclass_of": ["cell_cycle_arrested_phenotype", "mitotic_phenotype"],
|
| 96 |
+
"equivalent_to": "has_part(arrested and characteristic_of(mitotic_metaphase))",
|
| 97 |
+
"xrefs": ["GO:0000819"], # Cross-ontology links
|
| 98 |
+
"subset": ["cmpo_core"]
|
| 99 |
+
}
|
| 100 |
+
}
|
| 101 |
+
```
|
| 102 |
+
|
| 103 |
+
### Two-Stage Mapping Pipeline
|
| 104 |
+
|
| 105 |
+
```python
|
| 106 |
+
async def map_to_cmpo_enhanced(description, cmpo_ontology, vlm_interface, context=None):
|
| 107 |
+
# Stage 1: Ontology-Aware Candidate Generation
|
| 108 |
+
candidates = ontology_aware_mapping(description, cmpo_ontology, context)
|
| 109 |
+
|
| 110 |
+
# Stage 2: VLM Biological Reasoning & Pruning
|
| 111 |
+
if len(candidates) > 1:
|
| 112 |
+
validated_mappings = await vlm_biological_validation(description, candidates, vlm_interface)
|
| 113 |
+
return validated_mappings
|
| 114 |
+
else:
|
| 115 |
+
return candidates
|
| 116 |
+
|
| 117 |
+
def ontology_aware_mapping(description, cmpo_ontology, context=None):
|
| 118 |
+
# 1. Enhanced token extraction with exact matching priority
|
| 119 |
+
exact_tokens = extract_exact_biological_matches(description)
|
| 120 |
+
fuzzy_tokens = extract_fuzzy_biological_tokens(description)
|
| 121 |
+
|
| 122 |
+
# 2. Hierarchical scoring
|
| 123 |
+
for term_id, term_data in cmpo_ontology.ontology.items():
|
| 124 |
+
score = 0
|
| 125 |
+
|
| 126 |
+
# Exact token matches (highest weight)
|
| 127 |
+
exact_score = calculate_exact_matches(exact_tokens, term_data) * 1.0
|
| 128 |
+
|
| 129 |
+
# Hierarchical specificity (deeper = more specific = higher score)
|
| 130 |
+
specificity_score = calculate_hierarchy_depth(term_id, cmpo_ontology) * 0.3
|
| 131 |
+
|
| 132 |
+
# Ontological distance (closer = more related = higher score)
|
| 133 |
+
distance_score = calculate_ontological_distance(term_id, context_terms) * 0.2
|
| 134 |
+
|
| 135 |
+
# Fuzzy similarity (lowest weight)
|
| 136 |
+
fuzzy_score = calculate_fuzzy_similarity(fuzzy_tokens, term_data) * 0.1
|
| 137 |
+
|
| 138 |
+
total_score = exact_score + specificity_score + distance_score + fuzzy_score
|
| 139 |
+
|
| 140 |
+
return ranked_candidates
|
| 141 |
+
|
| 142 |
+
async def vlm_biological_validation(description, candidates, vlm_interface):
|
| 143 |
+
validation_prompt = f"""
|
| 144 |
+
Original biological description: "{description}"
|
| 145 |
+
|
| 146 |
+
Candidate CMPO term mappings:
|
| 147 |
+
{format_candidates_for_review(candidates)}
|
| 148 |
+
|
| 149 |
+
Task: Evaluate biological plausibility and ranking of these mappings.
|
| 150 |
+
|
| 151 |
+
Consider:
|
| 152 |
+
- Biological consistency and logical compatibility
|
| 153 |
+
- Temporal/spatial relationships in biological processes
|
| 154 |
+
- Phenotypic co-occurrence patterns
|
| 155 |
+
- Mechanistic plausibility
|
| 156 |
+
- Specificity vs generality trade-offs
|
| 157 |
+
|
| 158 |
+
Provide:
|
| 159 |
+
1. Biologically valid mappings (with confidence 0-1)
|
| 160 |
+
2. Brief scientific reasoning for each acceptance/rejection
|
| 161 |
+
3. Final ranked list
|
| 162 |
+
|
| 163 |
+
Focus on biological accuracy over textual similarity.
|
| 164 |
+
"""
|
| 165 |
+
|
| 166 |
+
reasoning_result = await vlm_interface.reason_about_mappings(validation_prompt)
|
| 167 |
+
return parse_and_apply_biological_reasoning(candidates, reasoning_result)
|
| 168 |
+
```
|
| 169 |
+
|
| 170 |
+
## Usage Examples
|
| 171 |
+
|
| 172 |
+
### Basic Mapping
|
| 173 |
+
```python
|
| 174 |
+
from anton.cmpo import CMPOOntology, map_to_cmpo
|
| 175 |
+
|
| 176 |
+
cmpo = CMPOOntology()
|
| 177 |
+
results = map_to_cmpo("cells arrested in metaphase with condensed chromosomes", cmpo)
|
| 178 |
+
|
| 179 |
+
# Output:
|
| 180 |
+
# [
|
| 181 |
+
# {
|
| 182 |
+
# "CMPO_ID": "CMPO:0001234",
|
| 183 |
+
# "term_name": "metaphase arrested phenotype",
|
| 184 |
+
# "confidence": 0.92,
|
| 185 |
+
# "supporting_evidence": "Direct match: metaphase; Semantic: arrested + mitotic",
|
| 186 |
+
# "hierarchy_path": ["metaphase arrested phenotype", "cell cycle arrested phenotype", "cell cycle phenotype"]
|
| 187 |
+
# }
|
| 188 |
+
# ]
|
| 189 |
+
```
|
| 190 |
+
|
| 191 |
+
### Context-Aware Mapping
|
| 192 |
+
```python
|
| 193 |
+
# Research studying apoptosis
|
| 194 |
+
results = map_to_cmpo("fragmented nuclei with membrane blebbing", cmpo, context="apoptosis")
|
| 195 |
+
# β Higher confidence for apoptotic_cell_phenotype terms
|
| 196 |
+
|
| 197 |
+
# Research studying cell division
|
| 198 |
+
results = map_to_cmpo("abnormal spindle formation", cmpo, context="cell_cycle")
|
| 199 |
+
# β Higher confidence for mitotic_process_phenotype terms
|
| 200 |
+
```
|
| 201 |
+
|
| 202 |
+
### Integration with Anton Pipeline
|
| 203 |
+
```python
|
| 204 |
+
# Within QualitativeAnalyzer
|
| 205 |
+
population_insights = await vlm.analyze_population(image)
|
| 206 |
+
cmpo_mappings = map_to_cmpo(
|
| 207 |
+
description=population_insights['description'],
|
| 208 |
+
cmpo_ontology=self.cmpo_mapper,
|
| 209 |
+
context=self.research_context
|
| 210 |
+
)
|
| 211 |
+
```
|
| 212 |
+
|
| 213 |
+
## Validation and Quality Assurance
|
| 214 |
+
|
| 215 |
+
### Confidence Thresholds
|
| 216 |
+
- **High confidence (>0.8)**: Direct term matches with strong semantic support
|
| 217 |
+
- **Medium confidence (0.5-0.8)**: Semantic matches with contextual support
|
| 218 |
+
- **Low confidence (0.3-0.5)**: Weak matches requiring human review
|
| 219 |
+
- **Below threshold (<0.3)**: Excluded from results
|
| 220 |
+
|
| 221 |
+
### Evidence Tracking
|
| 222 |
+
Each mapping includes:
|
| 223 |
+
- **Supporting evidence**: Specific text that triggered the match
|
| 224 |
+
- **Mapping type**: Direct, semantic, or contextual
|
| 225 |
+
- **Hierarchy path**: Full taxonomic classification
|
| 226 |
+
- **Cross-references**: Links to related GO/PATO terms
|
| 227 |
+
|
| 228 |
+
## Future Enhancements
|
| 229 |
+
|
| 230 |
+
### 1. Machine Learning Integration
|
| 231 |
+
- **Embedding-based similarity**: Use biological language models (BioBERT, etc.)
|
| 232 |
+
- **Context learning**: Train models on researcher annotation patterns
|
| 233 |
+
- **Active learning**: Improve mappings based on user feedback
|
| 234 |
+
|
| 235 |
+
### 2. Advanced Semantic Reasoning
|
| 236 |
+
- **Logical inference**: Use formal ontology reasoning for complex mappings
|
| 237 |
+
- **Negation handling**: Detect and properly handle negative evidence
|
| 238 |
+
- **Uncertainty quantification**: Bayesian confidence estimates
|
| 239 |
+
|
| 240 |
+
### 3. Multi-Ontology Integration
|
| 241 |
+
- **Cross-ontology alignment**: Map to GO, PATO, CHEBI simultaneously
|
| 242 |
+
- **Knowledge graph construction**: Build comprehensive phenotype knowledge graphs
|
| 243 |
+
- **Standardized interfaces**: FAIR data principles compliance
|
| 244 |
+
|
| 245 |
+
### 4. Dynamic Ontology Updates
|
| 246 |
+
- **Version management**: Handle CMPO ontology updates gracefully
|
| 247 |
+
- **Backward compatibility**: Maintain mapping consistency across versions
|
| 248 |
+
- **Community integration**: Contribute mappings back to CMPO community
|
| 249 |
+
|
| 250 |
+
## Research Applications
|
| 251 |
+
|
| 252 |
+
### Enabled Use Cases
|
| 253 |
+
1. **Large-scale phenotype screens**: Standardized classification across thousands of images
|
| 254 |
+
2. **Cross-study meta-analysis**: Combine results from different research groups
|
| 255 |
+
3. **Drug discovery**: Map compound effects to standardized phenotype profiles
|
| 256 |
+
4. **Disease research**: Connect cellular phenotypes to pathological processes
|
| 257 |
+
5. **Evolutionary studies**: Compare phenotypes across species using common vocabulary
|
| 258 |
+
|
| 259 |
+
### Scientific Impact
|
| 260 |
+
- **Reproducibility**: Eliminates ambiguity in phenotype descriptions
|
| 261 |
+
- **Discoverability**: Enables semantic search across phenotype databases
|
| 262 |
+
- **Integration**: Connects microscopy data to broader biological knowledge
|
| 263 |
+
- **Collaboration**: Provides common language for interdisciplinary research
|
| 264 |
+
|
| 265 |
+
---
|
| 266 |
+
|
| 267 |
+
## Development Notes
|
| 268 |
+
|
| 269 |
+
### Design Decisions
|
| 270 |
+
|
| 271 |
+
**Why hierarchical subgraph mapping?**
|
| 272 |
+
- CMPO contains >600 terms across diverse biological domains
|
| 273 |
+
- Research context dramatically improves mapping accuracy
|
| 274 |
+
- Enables both broad screening and focused deep analysis
|
| 275 |
+
|
| 276 |
+
**Why two-strategy VLM approach?**
|
| 277 |
+
- Strategy 1 (descriptionβCMPO) handles unexpected discoveries
|
| 278 |
+
- Strategy 2 (CMPO-guided) ensures comprehensive coverage of known phenotypes
|
| 279 |
+
- Combination provides both discovery and validation capabilities
|
| 280 |
+
|
| 281 |
+
**Why rich semantic relations?**
|
| 282 |
+
- Simple keyword matching fails for scientific terminology
|
| 283 |
+
- Logical definitions enable precise semantic matching
|
| 284 |
+
- Cross-ontology links expand vocabulary and validation
|
| 285 |
+
|
| 286 |
+
### Code Organization
|
| 287 |
+
- `ontology.py`: CMPO data loading, parsing, and management
|
| 288 |
+
- `mapping.py`: Core mapping algorithms and semantic analysis
|
| 289 |
+
- `__init__.py`: Module interface and public API
|
| 290 |
+
- `README.md`: Comprehensive documentation (this file)
|
| 291 |
+
|
| 292 |
+
### Testing Strategy
|
| 293 |
+
- Unit tests for individual mapping functions
|
| 294 |
+
- Integration tests with full CMPO ontology
|
| 295 |
+
- Validation against expert-annotated datasets
|
| 296 |
+
- Performance benchmarks for large-scale analysis
|
| 297 |
+
|
| 298 |
+
---
|
| 299 |
+
|
| 300 |
+
*This module represents a significant advancement in automated microscopy phenotype classification, bridging AI-generated insights with rigorous scientific standards.*
|
anton/cmpo/__init__.py
ADDED
|
@@ -0,0 +1,24 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
CMPO (Cellular Microscopy Phenotype Ontology) Integration Module for Anton
|
| 3 |
+
|
| 4 |
+
This module provides sophisticated ontology-based phenotype classification for microscopy analysis.
|
| 5 |
+
It bridges the gap between VLM-generated natural language descriptions and standardized
|
| 6 |
+
scientific terminology through hierarchical semantic mapping.
|
| 7 |
+
|
| 8 |
+
Key Components:
|
| 9 |
+
- CMPOOntology: Loads and manages the full CMPO ontology with rich semantic relations
|
| 10 |
+
- map_to_cmpo: Context-aware mapping from descriptions to CMPO terms
|
| 11 |
+
- Hierarchical subgraph navigation for research-context-specific mapping
|
| 12 |
+
|
| 13 |
+
Usage:
|
| 14 |
+
from anton.cmpo import CMPOOntology, map_to_cmpo
|
| 15 |
+
|
| 16 |
+
cmpo = CMPOOntology()
|
| 17 |
+
results = map_to_cmpo("cells arrested in metaphase", cmpo, context="cell_cycle")
|
| 18 |
+
"""
|
| 19 |
+
|
| 20 |
+
from .ontology import CMPOOntology
|
| 21 |
+
from .mapping import map_to_cmpo, validate_mappings_with_vlm
|
| 22 |
+
|
| 23 |
+
__all__ = ['CMPOOntology', 'map_to_cmpo', 'validate_mappings_with_vlm']
|
| 24 |
+
__version__ = '1.0.0'
|
anton/cmpo/__pycache__/__init__.cpython-313.pyc
ADDED
|
Binary file (1.12 kB). View file
|
|
|
anton/cmpo/__pycache__/examples.cpython-313.pyc
ADDED
|
Binary file (12.5 kB). View file
|
|
|
anton/cmpo/__pycache__/mapping.cpython-313.pyc
ADDED
|
Binary file (16.5 kB). View file
|
|
|
anton/cmpo/__pycache__/ontology.cpython-313.pyc
ADDED
|
Binary file (16.5 kB). View file
|
|
|
anton/cmpo/data/cmpo.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
anton/cmpo/examples.py
ADDED
|
@@ -0,0 +1,277 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
CMPO Mapping Examples and Demonstrations
|
| 3 |
+
|
| 4 |
+
This file demonstrates the key concepts and usage patterns of the CMPO integration module.
|
| 5 |
+
Run with:
|
| 6 |
+
python -m anton.cmpo.examples (from project root)
|
| 7 |
+
OR
|
| 8 |
+
python examples.py (from anton/cmpo/ directory)
|
| 9 |
+
"""
|
| 10 |
+
|
| 11 |
+
import sys
|
| 12 |
+
import logging
|
| 13 |
+
from pathlib import Path
|
| 14 |
+
|
| 15 |
+
# Handle both direct execution and module execution
|
| 16 |
+
if __name__ == "__main__" and __package__ is None:
|
| 17 |
+
# Add parent directories to path for direct execution
|
| 18 |
+
current_dir = Path(__file__).parent
|
| 19 |
+
project_root = current_dir.parent.parent
|
| 20 |
+
sys.path.insert(0, str(project_root))
|
| 21 |
+
from anton.cmpo.ontology import CMPOOntology
|
| 22 |
+
from anton.cmpo.mapping import map_to_cmpo
|
| 23 |
+
else:
|
| 24 |
+
# Normal relative imports for module execution
|
| 25 |
+
from .ontology import CMPOOntology
|
| 26 |
+
from .mapping import map_to_cmpo
|
| 27 |
+
|
| 28 |
+
logging.basicConfig(level=logging.INFO)
|
| 29 |
+
|
| 30 |
+
def demonstrate_basic_mapping():
|
| 31 |
+
"""Demonstrate basic CMPO mapping functionality."""
|
| 32 |
+
print("=" * 60)
|
| 33 |
+
print("BASIC CMPO MAPPING DEMONSTRATION")
|
| 34 |
+
print("=" * 60)
|
| 35 |
+
|
| 36 |
+
# Initialize CMPO ontology
|
| 37 |
+
cmpo = CMPOOntology()
|
| 38 |
+
print(f"Loaded CMPO ontology with {len(cmpo.ontology)} terms\n")
|
| 39 |
+
|
| 40 |
+
# Example descriptions from VLM analysis
|
| 41 |
+
test_descriptions = [
|
| 42 |
+
"cells arrested in metaphase with condensed chromosomes",
|
| 43 |
+
"fragmented nuclei with membrane blebbing indicating apoptosis",
|
| 44 |
+
"abnormal spindle formation during cell division",
|
| 45 |
+
"enlarged cell bodies with irregular morphology",
|
| 46 |
+
"normal healthy fibroblast cells with typical morphology"
|
| 47 |
+
]
|
| 48 |
+
|
| 49 |
+
for desc in test_descriptions:
|
| 50 |
+
print(f"Description: '{desc}'")
|
| 51 |
+
results = map_to_cmpo(desc, cmpo)
|
| 52 |
+
|
| 53 |
+
if results:
|
| 54 |
+
print(f"Found {len(results)} CMPO mappings:")
|
| 55 |
+
for i, result in enumerate(results[:3], 1):
|
| 56 |
+
print(f" {i}. {result['CMPO_ID']}: {result['term_name']}")
|
| 57 |
+
print(f" Confidence: {result['confidence']:.3f}")
|
| 58 |
+
print(f" Evidence: {result['supporting_evidence']}")
|
| 59 |
+
if result.get('hierarchy_path'):
|
| 60 |
+
print(f" Hierarchy: {' β '.join(result['hierarchy_path'])}")
|
| 61 |
+
print()
|
| 62 |
+
else:
|
| 63 |
+
print(" No CMPO mappings found")
|
| 64 |
+
print("-" * 50)
|
| 65 |
+
|
| 66 |
+
def demonstrate_context_aware_mapping():
|
| 67 |
+
"""Demonstrate context-aware mapping with research focus."""
|
| 68 |
+
print("\n" + "=" * 60)
|
| 69 |
+
print("CONTEXT-AWARE MAPPING DEMONSTRATION")
|
| 70 |
+
print("=" * 60)
|
| 71 |
+
|
| 72 |
+
cmpo = CMPOOntology()
|
| 73 |
+
|
| 74 |
+
# Same description, different research contexts
|
| 75 |
+
description = "abnormal cell division with chromosome segregation defects"
|
| 76 |
+
|
| 77 |
+
contexts = [
|
| 78 |
+
("cell_cycle", "Cell cycle research focus"),
|
| 79 |
+
("morphology", "Morphology research focus"),
|
| 80 |
+
(None, "No specific context")
|
| 81 |
+
]
|
| 82 |
+
|
| 83 |
+
for context, context_desc in contexts:
|
| 84 |
+
print(f"\n{context_desc}:")
|
| 85 |
+
print(f"Description: '{description}'")
|
| 86 |
+
results = map_to_cmpo(description, cmpo, context=context)
|
| 87 |
+
|
| 88 |
+
if results:
|
| 89 |
+
for i, result in enumerate(results[:2], 1):
|
| 90 |
+
print(f" {i}. {result['term_name']} (confidence: {result['confidence']:.3f})")
|
| 91 |
+
print(f" Context boost from: {result['supporting_evidence']}")
|
| 92 |
+
else:
|
| 93 |
+
print(" No mappings found")
|
| 94 |
+
|
| 95 |
+
def demonstrate_hierarchical_navigation():
|
| 96 |
+
"""Show how CMPO terms relate hierarchically."""
|
| 97 |
+
print("\n" + "=" * 60)
|
| 98 |
+
print("HIERARCHICAL NAVIGATION DEMONSTRATION")
|
| 99 |
+
print("=" * 60)
|
| 100 |
+
|
| 101 |
+
cmpo = CMPOOntology()
|
| 102 |
+
|
| 103 |
+
# Find a term with rich hierarchy
|
| 104 |
+
for term_id, term_data in cmpo.ontology.items():
|
| 105 |
+
if term_data.get('parent_terms') and len(term_data['parent_terms']) > 0:
|
| 106 |
+
print(f"Term: {term_data['name']} ({term_id})")
|
| 107 |
+
print(f"Description: {term_data.get('description', 'No description')}")
|
| 108 |
+
|
| 109 |
+
if term_data.get('synonyms'):
|
| 110 |
+
print(f"Synonyms: {', '.join(term_data['synonyms'])}")
|
| 111 |
+
|
| 112 |
+
print(f"Parent terms:")
|
| 113 |
+
for parent_id in term_data['parent_terms']:
|
| 114 |
+
parent_term = cmpo.get_term(parent_id)
|
| 115 |
+
if parent_term:
|
| 116 |
+
print(f" β {parent_term['name']} ({parent_id})")
|
| 117 |
+
|
| 118 |
+
if term_data.get('equivalent_to'):
|
| 119 |
+
print(f"Equivalent to: {term_data['equivalent_to']}")
|
| 120 |
+
|
| 121 |
+
break
|
| 122 |
+
|
| 123 |
+
def demonstrate_semantic_analysis():
|
| 124 |
+
"""Show semantic component analysis."""
|
| 125 |
+
print("\n" + "=" * 60)
|
| 126 |
+
print("SEMANTIC ANALYSIS DEMONSTRATION")
|
| 127 |
+
print("=" * 60)
|
| 128 |
+
|
| 129 |
+
# Import internal functions for demonstration
|
| 130 |
+
if __name__ == "__main__" and __package__ is None:
|
| 131 |
+
from anton.cmpo.mapping import _extract_biological_tokens, _find_direct_matches
|
| 132 |
+
else:
|
| 133 |
+
from .mapping import _extract_biological_tokens, _find_direct_matches
|
| 134 |
+
|
| 135 |
+
cmpo = CMPOOntology()
|
| 136 |
+
|
| 137 |
+
description = "apoptotic cells with fragmented nuclei and chromatin condensation"
|
| 138 |
+
print(f"Analyzing: '{description}'")
|
| 139 |
+
|
| 140 |
+
# Show token extraction
|
| 141 |
+
tokens = _extract_biological_tokens(description)
|
| 142 |
+
print(f"Biological tokens: {sorted(tokens)}")
|
| 143 |
+
|
| 144 |
+
# Show direct matches
|
| 145 |
+
direct_matches = _find_direct_matches(description.lower(), cmpo)
|
| 146 |
+
if direct_matches:
|
| 147 |
+
print("\nDirect matches found:")
|
| 148 |
+
for term_id, confidence, evidence in direct_matches[:3]:
|
| 149 |
+
term = cmpo.get_term(term_id)
|
| 150 |
+
if term:
|
| 151 |
+
print(f" {term['name']}: {confidence:.3f} (matched: {evidence})")
|
| 152 |
+
|
| 153 |
+
def demonstrate_integration_patterns():
|
| 154 |
+
"""Show how CMPO integrates with Anton pipeline."""
|
| 155 |
+
print("\n" + "=" * 60)
|
| 156 |
+
print("INTEGRATION PATTERNS DEMONSTRATION")
|
| 157 |
+
print("=" * 60)
|
| 158 |
+
|
| 159 |
+
# Simulate VLM output from different pipeline stages
|
| 160 |
+
vlm_outputs = {
|
| 161 |
+
"stage_1_global": "Dense population of adherent cells with fibroblast morphology",
|
| 162 |
+
"stage_3_features": "Individual cells show elongated spindle shape with prominent stress fibers",
|
| 163 |
+
"stage_4_population": "Population exhibits normal growth patterns with typical cell-cell contacts"
|
| 164 |
+
}
|
| 165 |
+
|
| 166 |
+
cmpo = CMPOOntology()
|
| 167 |
+
|
| 168 |
+
print("Simulating Anton pipeline integration:")
|
| 169 |
+
for stage, output in vlm_outputs.items():
|
| 170 |
+
print(f"\n{stage.replace('_', ' ').title()}:")
|
| 171 |
+
print(f"VLM Output: {output}")
|
| 172 |
+
|
| 173 |
+
# Map to CMPO
|
| 174 |
+
mappings = map_to_cmpo(output, cmpo)
|
| 175 |
+
if mappings:
|
| 176 |
+
best_match = mappings[0]
|
| 177 |
+
print(f"Best CMPO Match: {best_match['term_name']}")
|
| 178 |
+
print(f"Confidence: {best_match['confidence']:.3f}")
|
| 179 |
+
else:
|
| 180 |
+
print("No CMPO mappings found")
|
| 181 |
+
|
| 182 |
+
def demonstrate_multi_stage_cmpo():
|
| 183 |
+
"""Demonstrate multi-stage CMPO integration across pipeline stages."""
|
| 184 |
+
print("\n" + "=" * 60)
|
| 185 |
+
print("MULTI-STAGE CMPO INTEGRATION DEMONSTRATION")
|
| 186 |
+
print("=" * 60)
|
| 187 |
+
|
| 188 |
+
cmpo = CMPOOntology()
|
| 189 |
+
|
| 190 |
+
# Simulate different types of biological observations at each stage
|
| 191 |
+
stage_data = {
|
| 192 |
+
"Stage 1 - Global Context": {
|
| 193 |
+
"description": "Dense cell population with mitotic figures visible throughout",
|
| 194 |
+
"context": "cell_population"
|
| 195 |
+
},
|
| 196 |
+
"Stage 3 - Individual Cells": {
|
| 197 |
+
"description": "Cell arrested in metaphase with condensed chromosomes",
|
| 198 |
+
"context": "cellular_phenotype"
|
| 199 |
+
},
|
| 200 |
+
"Stage 4 - Population Insights": {
|
| 201 |
+
"description": "20% of population shows apoptotic markers with fragmented nuclei",
|
| 202 |
+
"context": "cell_population"
|
| 203 |
+
}
|
| 204 |
+
}
|
| 205 |
+
|
| 206 |
+
all_mappings = {}
|
| 207 |
+
|
| 208 |
+
print("π¬ Multi-Stage CMPO Analysis:")
|
| 209 |
+
for stage_name, data in stage_data.items():
|
| 210 |
+
print(f"\n{stage_name}:")
|
| 211 |
+
print(f"Description: '{data['description']}'")
|
| 212 |
+
print(f"Context: {data['context']}")
|
| 213 |
+
|
| 214 |
+
# Map with stage-appropriate context
|
| 215 |
+
mappings = map_to_cmpo(data['description'], cmpo, context=data['context'])
|
| 216 |
+
|
| 217 |
+
if mappings:
|
| 218 |
+
print(f"Found {len(mappings)} CMPO mappings:")
|
| 219 |
+
for i, mapping in enumerate(mappings[:2], 1):
|
| 220 |
+
print(f" {i}. {mapping['term_name']} (confidence: {mapping['confidence']:.3f})")
|
| 221 |
+
|
| 222 |
+
# Track for cross-stage analysis
|
| 223 |
+
cmpo_id = mapping['CMPO_ID']
|
| 224 |
+
if cmpo_id not in all_mappings:
|
| 225 |
+
all_mappings[cmpo_id] = {
|
| 226 |
+
'term': mapping['term_name'],
|
| 227 |
+
'stages': [],
|
| 228 |
+
'max_confidence': 0
|
| 229 |
+
}
|
| 230 |
+
all_mappings[cmpo_id]['stages'].append(stage_name.split(' - ')[0])
|
| 231 |
+
all_mappings[cmpo_id]['max_confidence'] = max(
|
| 232 |
+
all_mappings[cmpo_id]['max_confidence'],
|
| 233 |
+
mapping['confidence']
|
| 234 |
+
)
|
| 235 |
+
else:
|
| 236 |
+
print(" No CMPO mappings found")
|
| 237 |
+
|
| 238 |
+
# Cross-stage analysis
|
| 239 |
+
print("\nπ Cross-Stage CMPO Analysis:")
|
| 240 |
+
multi_stage_terms = {k: v for k, v in all_mappings.items() if len(v['stages']) > 1}
|
| 241 |
+
|
| 242 |
+
if multi_stage_terms:
|
| 243 |
+
print("Terms detected across multiple stages:")
|
| 244 |
+
for cmpo_id, data in multi_stage_terms.items():
|
| 245 |
+
print(f" β’ {data['term']} - detected in: {', '.join(data['stages'])}")
|
| 246 |
+
print(f" Max confidence: {data['max_confidence']:.3f}")
|
| 247 |
+
else:
|
| 248 |
+
print("No terms detected across multiple stages (expected - different biological levels)")
|
| 249 |
+
|
| 250 |
+
print(f"\nTotal unique CMPO terms identified: {len(all_mappings)}")
|
| 251 |
+
print("β
Multi-stage integration provides comprehensive phenotype classification!")
|
| 252 |
+
|
| 253 |
+
def main():
|
| 254 |
+
"""Run all demonstrations."""
|
| 255 |
+
print("CMPO Module Demonstration Suite")
|
| 256 |
+
print("This script demonstrates the key capabilities of Anton's CMPO integration")
|
| 257 |
+
|
| 258 |
+
try:
|
| 259 |
+
demonstrate_basic_mapping()
|
| 260 |
+
demonstrate_context_aware_mapping()
|
| 261 |
+
demonstrate_hierarchical_navigation()
|
| 262 |
+
demonstrate_semantic_analysis()
|
| 263 |
+
demonstrate_integration_patterns()
|
| 264 |
+
demonstrate_multi_stage_cmpo() # New multi-stage demo
|
| 265 |
+
|
| 266 |
+
print("\n" + "=" * 60)
|
| 267 |
+
print("DEMONSTRATION COMPLETE")
|
| 268 |
+
print("=" * 60)
|
| 269 |
+
print("For more information, see anton/cmpo/README.md")
|
| 270 |
+
print("β¨ NEW: Multi-stage CMPO integration across all pipeline stages!")
|
| 271 |
+
|
| 272 |
+
except Exception as e:
|
| 273 |
+
print(f"Error during demonstration: {e}")
|
| 274 |
+
print("Ensure CMPO ontology is properly loaded")
|
| 275 |
+
|
| 276 |
+
if __name__ == "__main__":
|
| 277 |
+
main()
|
anton/cmpo/mapping.py
ADDED
|
@@ -0,0 +1,375 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Free-form to CMPO mapping for Anton's pipeline."""
|
| 2 |
+
|
| 3 |
+
import re
|
| 4 |
+
from typing import Dict, List, Tuple, Set
|
| 5 |
+
from difflib import SequenceMatcher
|
| 6 |
+
|
| 7 |
+
def map_to_cmpo(description: str, cmpo_ontology, context: str = None) -> List[Dict]:
|
| 8 |
+
"""Convert a free-form description to CMPO terms using semantic mapping."""
|
| 9 |
+
if not description or not cmpo_ontology:
|
| 10 |
+
return []
|
| 11 |
+
|
| 12 |
+
description_lower = description.lower()
|
| 13 |
+
mappings = []
|
| 14 |
+
|
| 15 |
+
# 1. Direct name/synonym matching
|
| 16 |
+
direct_matches = _find_direct_matches(description_lower, cmpo_ontology)
|
| 17 |
+
|
| 18 |
+
# 2. Semantic component matching
|
| 19 |
+
semantic_matches = _find_semantic_matches(description_lower, cmpo_ontology)
|
| 20 |
+
|
| 21 |
+
# 3. Hierarchical context matching (if context provided)
|
| 22 |
+
context_matches = _find_context_matches(description_lower, cmpo_ontology, context) if context else []
|
| 23 |
+
|
| 24 |
+
# Combine and score all matches
|
| 25 |
+
all_matches = {}
|
| 26 |
+
|
| 27 |
+
# Weight direct matches highest (preserve enhanced scoring differences)
|
| 28 |
+
for term_id, confidence, evidence in direct_matches:
|
| 29 |
+
if term_id not in all_matches:
|
| 30 |
+
all_matches[term_id] = {'confidence': 0, 'evidence': []}
|
| 31 |
+
all_matches[term_id]['confidence'] += confidence # Don't flatten with 0.8 multiplier
|
| 32 |
+
all_matches[term_id]['evidence'].append(f"Direct match: {evidence}")
|
| 33 |
+
|
| 34 |
+
# Weight semantic matches moderately
|
| 35 |
+
for term_id, confidence, evidence in semantic_matches:
|
| 36 |
+
if term_id not in all_matches:
|
| 37 |
+
all_matches[term_id] = {'confidence': 0, 'evidence': []}
|
| 38 |
+
all_matches[term_id]['confidence'] += confidence * 0.3 # Lower weight for semantic
|
| 39 |
+
all_matches[term_id]['evidence'].append(f"Semantic: {evidence}")
|
| 40 |
+
|
| 41 |
+
# Weight context matches lower but still valuable
|
| 42 |
+
for term_id, confidence, evidence in context_matches:
|
| 43 |
+
if term_id not in all_matches:
|
| 44 |
+
all_matches[term_id] = {'confidence': 0, 'evidence': []}
|
| 45 |
+
all_matches[term_id]['confidence'] += confidence * 0.2 # Lower weight for context
|
| 46 |
+
all_matches[term_id]['evidence'].append(f"Context: {evidence}")
|
| 47 |
+
|
| 48 |
+
# Convert to final format
|
| 49 |
+
for term_id, match_data in all_matches.items():
|
| 50 |
+
term_info = cmpo_ontology.get_term(term_id)
|
| 51 |
+
if term_info:
|
| 52 |
+
mappings.append({
|
| 53 |
+
"CMPO_ID": term_id,
|
| 54 |
+
"term_name": term_info['name'],
|
| 55 |
+
"confidence": match_data['confidence'], # Preserve full confidence for sorting
|
| 56 |
+
"supporting_evidence": "; ".join(match_data['evidence'][:3]),
|
| 57 |
+
"description": term_info.get('description', ''),
|
| 58 |
+
"hierarchy_path": _get_hierarchy_path(term_id, cmpo_ontology)
|
| 59 |
+
})
|
| 60 |
+
|
| 61 |
+
# Sort by confidence and return top matches
|
| 62 |
+
mappings.sort(key=lambda x: x['confidence'], reverse=True)
|
| 63 |
+
return mappings[:5]
|
| 64 |
+
|
| 65 |
+
def _find_direct_matches(description: str, cmpo_ontology) -> List[Tuple[str, float, str]]:
|
| 66 |
+
"""Find direct matches with ontology-aware scoring."""
|
| 67 |
+
matches = []
|
| 68 |
+
description_tokens = set(_extract_biological_tokens(description))
|
| 69 |
+
|
| 70 |
+
for term_id, term_data in cmpo_ontology.ontology.items():
|
| 71 |
+
base_score = 0.0
|
| 72 |
+
matched_evidence = []
|
| 73 |
+
|
| 74 |
+
# 1. Exact token matches (highest priority)
|
| 75 |
+
term_tokens = set(_extract_biological_tokens(term_data.get('name', '')))
|
| 76 |
+
exact_matches = description_tokens.intersection(term_tokens)
|
| 77 |
+
if exact_matches:
|
| 78 |
+
# Higher score for exact matches
|
| 79 |
+
exact_score = len(exact_matches) / max(len(term_tokens), 1) * 2.0
|
| 80 |
+
base_score += exact_score
|
| 81 |
+
matched_evidence.extend(exact_matches)
|
| 82 |
+
|
| 83 |
+
# 2. Check term name substring matches
|
| 84 |
+
term_name = term_data.get('name', '').lower()
|
| 85 |
+
if term_name and term_name in description:
|
| 86 |
+
substring_score = len(term_name) / len(description) * 1.5
|
| 87 |
+
base_score += substring_score
|
| 88 |
+
matched_evidence.append(f"name:{term_name}")
|
| 89 |
+
|
| 90 |
+
# 3. Check synonyms with exact token priority
|
| 91 |
+
for synonym in term_data.get('synonyms', []):
|
| 92 |
+
synonym_tokens = set(_extract_biological_tokens(synonym))
|
| 93 |
+
syn_exact_matches = description_tokens.intersection(synonym_tokens)
|
| 94 |
+
if syn_exact_matches:
|
| 95 |
+
syn_score = len(syn_exact_matches) / max(len(synonym_tokens), 1) * 1.8
|
| 96 |
+
base_score += syn_score
|
| 97 |
+
matched_evidence.extend(syn_exact_matches)
|
| 98 |
+
elif synonym.lower() in description:
|
| 99 |
+
substring_score = len(synonym) / len(description) * 1.2
|
| 100 |
+
base_score += substring_score
|
| 101 |
+
matched_evidence.append(f"synonym:{synonym}")
|
| 102 |
+
|
| 103 |
+
# 4. Ontology-aware bonuses
|
| 104 |
+
if base_score > 0:
|
| 105 |
+
# Specificity bonus (deeper in hierarchy = more specific = higher score)
|
| 106 |
+
specificity_bonus = _calculate_specificity_bonus(term_id, cmpo_ontology)
|
| 107 |
+
|
| 108 |
+
# Multi-token exact match bonus (matches multiple key terms)
|
| 109 |
+
multi_token_bonus = 0.0
|
| 110 |
+
if len(exact_matches) > 1:
|
| 111 |
+
multi_token_bonus = len(exact_matches) * 0.5 # Strong bonus for multiple exact matches
|
| 112 |
+
|
| 113 |
+
# Apply ontology bonuses
|
| 114 |
+
final_score = base_score + specificity_bonus + multi_token_bonus
|
| 115 |
+
|
| 116 |
+
matches.append((term_id, min(final_score, 5.0), f"exact:{','.join(matched_evidence[:3])}"))
|
| 117 |
+
|
| 118 |
+
return matches
|
| 119 |
+
|
| 120 |
+
def _find_semantic_matches(description: str, cmpo_ontology) -> List[Tuple[str, float, str]]:
|
| 121 |
+
"""Find matches based on semantic component analysis."""
|
| 122 |
+
matches = []
|
| 123 |
+
|
| 124 |
+
# Extract meaningful terms from description
|
| 125 |
+
desc_tokens = _extract_biological_tokens(description)
|
| 126 |
+
|
| 127 |
+
for term_id, term_data in cmpo_ontology.ontology.items():
|
| 128 |
+
# Analyze equivalent_to relations for semantic components
|
| 129 |
+
for equiv in term_data.get('equivalent_to', []):
|
| 130 |
+
semantic_score = _score_semantic_overlap(desc_tokens, equiv)
|
| 131 |
+
if semantic_score > 0.3:
|
| 132 |
+
matches.append((term_id, semantic_score, f"Semantic components in {equiv}"))
|
| 133 |
+
|
| 134 |
+
# Check description overlap
|
| 135 |
+
term_desc = term_data.get('description', '').lower()
|
| 136 |
+
if term_desc:
|
| 137 |
+
desc_overlap = _calculate_text_similarity(description, term_desc)
|
| 138 |
+
if desc_overlap > 0.4:
|
| 139 |
+
matches.append((term_id, desc_overlap, "Description similarity"))
|
| 140 |
+
|
| 141 |
+
return matches
|
| 142 |
+
|
| 143 |
+
def _find_context_matches(description: str, cmpo_ontology, context: str) -> List[Tuple[str, float, str]]:
|
| 144 |
+
"""Find matches considering hierarchical context."""
|
| 145 |
+
matches = []
|
| 146 |
+
|
| 147 |
+
# Define context-based subgraph priorities
|
| 148 |
+
context_subgraphs = {
|
| 149 |
+
'cell_cycle': ['cell_cycle_phenotype', 'mitotic_process_phenotype'],
|
| 150 |
+
'apoptosis': ['cell_death_phenotype', 'apoptotic'],
|
| 151 |
+
'morphology': ['cellular_component_phenotype', 'abnormal_cell_morphology'],
|
| 152 |
+
'process': ['cell_process_phenotype', 'biological_process']
|
| 153 |
+
}
|
| 154 |
+
|
| 155 |
+
relevant_subgraphs = []
|
| 156 |
+
context_lower = context.lower() if context else ""
|
| 157 |
+
|
| 158 |
+
for ctx_key, subgraphs in context_subgraphs.items():
|
| 159 |
+
if ctx_key in context_lower:
|
| 160 |
+
relevant_subgraphs.extend(subgraphs)
|
| 161 |
+
|
| 162 |
+
# Score terms within relevant subgraphs higher
|
| 163 |
+
for term_id, term_data in cmpo_ontology.ontology.items():
|
| 164 |
+
for subgraph in relevant_subgraphs:
|
| 165 |
+
if _term_in_subgraph(term_id, subgraph, cmpo_ontology):
|
| 166 |
+
base_score = 0.5
|
| 167 |
+
# Boost if term also matches description
|
| 168 |
+
term_name = term_data.get('name', '').lower()
|
| 169 |
+
if any(token in term_name for token in description.split()):
|
| 170 |
+
base_score += 0.3
|
| 171 |
+
matches.append((term_id, base_score, f"Context subgraph: {subgraph}"))
|
| 172 |
+
|
| 173 |
+
return matches
|
| 174 |
+
|
| 175 |
+
def _extract_biological_tokens(text: str) -> Set[str]:
|
| 176 |
+
"""Extract biologically relevant tokens from text."""
|
| 177 |
+
# Common biological stop words to exclude
|
| 178 |
+
bio_stop_words = {'cell', 'cells', 'cellular', 'the', 'and', 'or', 'with', 'in', 'of'}
|
| 179 |
+
|
| 180 |
+
# Extract tokens
|
| 181 |
+
tokens = set(re.findall(r'\b\w+\b', text.lower()))
|
| 182 |
+
|
| 183 |
+
# Filter for biological relevance (length > 3, not stop words)
|
| 184 |
+
bio_tokens = {token for token in tokens
|
| 185 |
+
if len(token) > 3 and token not in bio_stop_words}
|
| 186 |
+
|
| 187 |
+
return bio_tokens
|
| 188 |
+
|
| 189 |
+
def _score_semantic_overlap(desc_tokens: Set[str], equivalent_to: str) -> float:
|
| 190 |
+
"""Score overlap between description tokens and semantic definition."""
|
| 191 |
+
equiv_tokens = _extract_biological_tokens(equivalent_to)
|
| 192 |
+
|
| 193 |
+
if not equiv_tokens:
|
| 194 |
+
return 0.0
|
| 195 |
+
|
| 196 |
+
overlap = len(desc_tokens.intersection(equiv_tokens))
|
| 197 |
+
return overlap / max(len(equiv_tokens), 1)
|
| 198 |
+
|
| 199 |
+
def _calculate_text_similarity(text1: str, text2: str) -> float:
|
| 200 |
+
"""Calculate text similarity using sequence matching."""
|
| 201 |
+
return SequenceMatcher(None, text1, text2).ratio()
|
| 202 |
+
|
| 203 |
+
def _term_in_subgraph(term_id: str, subgraph_name: str, cmpo_ontology) -> bool:
|
| 204 |
+
"""Check if a term belongs to a specific subgraph via hierarchy."""
|
| 205 |
+
term_data = cmpo_ontology.get_term(term_id)
|
| 206 |
+
if not term_data:
|
| 207 |
+
return False
|
| 208 |
+
|
| 209 |
+
# Check if term name contains subgraph keyword
|
| 210 |
+
term_name = term_data.get('name', '').lower()
|
| 211 |
+
if subgraph_name.lower() in term_name:
|
| 212 |
+
return True
|
| 213 |
+
|
| 214 |
+
# Check parent terms recursively (simple implementation)
|
| 215 |
+
for parent in term_data.get('parent_terms', []):
|
| 216 |
+
parent_data = cmpo_ontology.get_term(parent)
|
| 217 |
+
if parent_data and subgraph_name.lower() in parent_data.get('name', '').lower():
|
| 218 |
+
return True
|
| 219 |
+
|
| 220 |
+
return False
|
| 221 |
+
|
| 222 |
+
def _get_hierarchy_path(term_id: str, cmpo_ontology) -> List[str]:
|
| 223 |
+
"""Get the hierarchical path for a term."""
|
| 224 |
+
path = []
|
| 225 |
+
current_term = cmpo_ontology.get_term(term_id)
|
| 226 |
+
|
| 227 |
+
if current_term:
|
| 228 |
+
path.append(current_term.get('name', term_id))
|
| 229 |
+
|
| 230 |
+
# Add immediate parents (simplified - could be recursive)
|
| 231 |
+
for parent_id in current_term.get('parent_terms', [])[:2]: # Limit to 2 parents
|
| 232 |
+
parent_term = cmpo_ontology.get_term(parent_id)
|
| 233 |
+
if parent_term:
|
| 234 |
+
path.append(parent_term.get('name', parent_id))
|
| 235 |
+
|
| 236 |
+
return path
|
| 237 |
+
|
| 238 |
+
def _calculate_specificity_bonus(term_id: str, cmpo_ontology) -> float:
|
| 239 |
+
"""Calculate specificity bonus based on hierarchy depth."""
|
| 240 |
+
try:
|
| 241 |
+
depth = _calculate_hierarchy_depth(term_id, cmpo_ontology)
|
| 242 |
+
# Deeper terms are more specific, get higher bonus
|
| 243 |
+
# Max bonus of 0.5 for terms at depth 4+
|
| 244 |
+
return min(depth * 0.1, 0.5)
|
| 245 |
+
except:
|
| 246 |
+
return 0.0
|
| 247 |
+
|
| 248 |
+
def _calculate_hierarchy_depth(term_id: str, cmpo_ontology, visited=None) -> int:
|
| 249 |
+
"""Calculate depth of term in CMPO hierarchy."""
|
| 250 |
+
if visited is None:
|
| 251 |
+
visited = set()
|
| 252 |
+
|
| 253 |
+
if term_id in visited: # Avoid cycles
|
| 254 |
+
return 0
|
| 255 |
+
|
| 256 |
+
visited.add(term_id)
|
| 257 |
+
term_data = cmpo_ontology.get_term(term_id)
|
| 258 |
+
|
| 259 |
+
if not term_data or not term_data.get('parent_terms'):
|
| 260 |
+
return 1 # Root level
|
| 261 |
+
|
| 262 |
+
# Find maximum depth among parents
|
| 263 |
+
max_parent_depth = 0
|
| 264 |
+
for parent_id in term_data.get('parent_terms', []):
|
| 265 |
+
parent_depth = _calculate_hierarchy_depth(parent_id, cmpo_ontology, visited.copy())
|
| 266 |
+
max_parent_depth = max(max_parent_depth, parent_depth)
|
| 267 |
+
|
| 268 |
+
return max_parent_depth + 1
|
| 269 |
+
|
| 270 |
+
def _detect_mutual_exclusion(term1_id: str, term2_id: str, cmpo_ontology) -> bool:
|
| 271 |
+
"""Detect if two terms are mutually exclusive based on ontology structure."""
|
| 272 |
+
term1 = cmpo_ontology.get_term(term1_id)
|
| 273 |
+
term2 = cmpo_ontology.get_term(term2_id)
|
| 274 |
+
|
| 275 |
+
if not term1 or not term2:
|
| 276 |
+
return False
|
| 277 |
+
|
| 278 |
+
# Check if they share the same immediate parent (sibling terms often mutually exclusive)
|
| 279 |
+
term1_parents = set(term1.get('parent_terms', []))
|
| 280 |
+
term2_parents = set(term2.get('parent_terms', []))
|
| 281 |
+
|
| 282 |
+
shared_parents = term1_parents.intersection(term2_parents)
|
| 283 |
+
|
| 284 |
+
# If they share parents and are both specific (depth > 2), likely mutually exclusive
|
| 285 |
+
if shared_parents and len(shared_parents) > 0:
|
| 286 |
+
depth1 = _calculate_hierarchy_depth(term1_id, cmpo_ontology)
|
| 287 |
+
depth2 = _calculate_hierarchy_depth(term2_id, cmpo_ontology)
|
| 288 |
+
|
| 289 |
+
# Heuristic: sibling terms at depth 3+ often mutually exclusive
|
| 290 |
+
if depth1 > 2 and depth2 > 2:
|
| 291 |
+
return True
|
| 292 |
+
|
| 293 |
+
return False
|
| 294 |
+
|
| 295 |
+
# Add VLM validation function for the two-stage pipeline
|
| 296 |
+
async def validate_mappings_with_vlm(description: str, candidate_mappings: List[Dict], vlm_interface, max_candidates: int = 5) -> List[Dict]:
|
| 297 |
+
"""Stage 2: VLM biological reasoning and pruning."""
|
| 298 |
+
if len(candidate_mappings) <= 1:
|
| 299 |
+
return candidate_mappings
|
| 300 |
+
|
| 301 |
+
# Format candidates for VLM review
|
| 302 |
+
candidates_text = "\n".join([
|
| 303 |
+
f"{i+1}. {mapping['term_name']} (CMPO:{mapping['CMPO_ID']}) - Confidence: {mapping['confidence']:.3f}"
|
| 304 |
+
for i, mapping in enumerate(candidate_mappings[:max_candidates])
|
| 305 |
+
])
|
| 306 |
+
|
| 307 |
+
validation_prompt = f"""Original biological description: "{description}"
|
| 308 |
+
|
| 309 |
+
Candidate CMPO term mappings:
|
| 310 |
+
{candidates_text}
|
| 311 |
+
|
| 312 |
+
Task: Evaluate biological plausibility and ranking of these mappings.
|
| 313 |
+
|
| 314 |
+
Consider:
|
| 315 |
+
- Biological consistency and logical compatibility
|
| 316 |
+
- Temporal/spatial relationships in biological processes
|
| 317 |
+
- Phenotypic co-occurrence patterns
|
| 318 |
+
- Mechanistic plausibility
|
| 319 |
+
- Specificity vs generality trade-offs
|
| 320 |
+
|
| 321 |
+
Provide:
|
| 322 |
+
1. Biologically valid mappings with updated confidence (0-1)
|
| 323 |
+
2. Brief scientific reasoning for each acceptance/rejection
|
| 324 |
+
3. Final ranked list
|
| 325 |
+
|
| 326 |
+
Focus on biological accuracy over textual similarity.
|
| 327 |
+
|
| 328 |
+
Format your response as:
|
| 329 |
+
VALID: [term_name] - confidence: [0-1] - reasoning: [brief explanation]
|
| 330 |
+
INVALID: [term_name] - reasoning: [brief explanation]
|
| 331 |
+
"""
|
| 332 |
+
|
| 333 |
+
try:
|
| 334 |
+
# This would be implemented as part of VLM interface
|
| 335 |
+
reasoning_result = await vlm_interface.analyze_biological_reasoning(validation_prompt)
|
| 336 |
+
|
| 337 |
+
# Parse VLM response and update mappings
|
| 338 |
+
validated_mappings = _parse_vlm_validation_response(reasoning_result, candidate_mappings)
|
| 339 |
+
|
| 340 |
+
return validated_mappings
|
| 341 |
+
|
| 342 |
+
except Exception as e:
|
| 343 |
+
# Fallback to original mappings if VLM validation fails
|
| 344 |
+
logging.warning(f"VLM validation failed: {e}, using original mappings")
|
| 345 |
+
return candidate_mappings
|
| 346 |
+
|
| 347 |
+
def _parse_vlm_validation_response(vlm_response: str, original_mappings: List[Dict]) -> List[Dict]:
|
| 348 |
+
"""Parse VLM validation response and update mapping confidences."""
|
| 349 |
+
validated = []
|
| 350 |
+
|
| 351 |
+
# Simple parsing - in production would be more robust
|
| 352 |
+
for line in vlm_response.split('\n'):
|
| 353 |
+
if line.startswith('VALID:'):
|
| 354 |
+
# Extract confidence and reasoning
|
| 355 |
+
parts = line.split(' - ')
|
| 356 |
+
if len(parts) >= 3:
|
| 357 |
+
term_name = parts[0].replace('VALID: ', '').strip()
|
| 358 |
+
confidence_str = parts[1].replace('confidence: ', '').strip()
|
| 359 |
+
reasoning = parts[2].replace('reasoning: ', '').strip()
|
| 360 |
+
|
| 361 |
+
# Find corresponding original mapping
|
| 362 |
+
for mapping in original_mappings:
|
| 363 |
+
if mapping['term_name'].lower() == term_name.lower():
|
| 364 |
+
updated_mapping = mapping.copy()
|
| 365 |
+
try:
|
| 366 |
+
updated_mapping['confidence'] = float(confidence_str)
|
| 367 |
+
updated_mapping['vlm_reasoning'] = reasoning
|
| 368 |
+
validated.append(updated_mapping)
|
| 369 |
+
except ValueError:
|
| 370 |
+
validated.append(mapping) # Keep original if parsing fails
|
| 371 |
+
break
|
| 372 |
+
|
| 373 |
+
# Sort by updated confidence
|
| 374 |
+
validated.sort(key=lambda x: x['confidence'], reverse=True)
|
| 375 |
+
return validated
|
anton/cmpo/ontology.py
ADDED
|
@@ -0,0 +1,326 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Manage CMPO ontology data and provide lookup functionality."""
|
| 2 |
+
|
| 3 |
+
import json
|
| 4 |
+
import requests
|
| 5 |
+
import pickle
|
| 6 |
+
from pathlib import Path
|
| 7 |
+
from typing import Dict, List, Optional
|
| 8 |
+
import logging
|
| 9 |
+
|
| 10 |
+
class CMPOOntology:
|
| 11 |
+
"""Manage CMPO ontology data and provide lookup functionality"""
|
| 12 |
+
|
| 13 |
+
def __init__(self, data_path="data/cmpo.json", cache_path="data/cmpo_cache.pkl"):
|
| 14 |
+
self.data_path = Path(data_path)
|
| 15 |
+
self.cache_path = Path(cache_path)
|
| 16 |
+
self.ontology = {}
|
| 17 |
+
self.term_index = {} # For fast lookup
|
| 18 |
+
self.feature_index = {} # Map features to terms
|
| 19 |
+
self.keyword_index = {} # Map keywords to terms
|
| 20 |
+
|
| 21 |
+
self._load_ontology()
|
| 22 |
+
|
| 23 |
+
def _load_ontology(self):
|
| 24 |
+
"""Load CMPO ontology from JSON file or download if needed"""
|
| 25 |
+
if self.data_path.exists():
|
| 26 |
+
logging.info(f"Loading CMPO ontology from {self.data_path}")
|
| 27 |
+
with open(self.data_path, 'r') as f:
|
| 28 |
+
self.ontology = json.load(f)
|
| 29 |
+
else:
|
| 30 |
+
logging.info("CMPO ontology not found, downloading...")
|
| 31 |
+
self._download_and_process_cmpo()
|
| 32 |
+
|
| 33 |
+
self._build_indices()
|
| 34 |
+
|
| 35 |
+
def _download_and_process_cmpo(self):
|
| 36 |
+
"""Download CMPO from official repository and convert to JSON"""
|
| 37 |
+
try:
|
| 38 |
+
# Option 1: Parse OBO file directly (preferred for rich semantic info)
|
| 39 |
+
self._download_and_parse_obo()
|
| 40 |
+
except Exception as e:
|
| 41 |
+
logging.warning(f"Failed to download OBO: {e}")
|
| 42 |
+
try:
|
| 43 |
+
# Option 2: Use OLS API (Ontology Lookup Service)
|
| 44 |
+
self._download_from_ols()
|
| 45 |
+
except Exception as e2:
|
| 46 |
+
logging.warning(f"Failed to download from OLS: {e2}")
|
| 47 |
+
try:
|
| 48 |
+
# Option 3: Parse OWL file directly
|
| 49 |
+
self._download_owl_file()
|
| 50 |
+
except Exception as e3:
|
| 51 |
+
logging.error(f"Failed to download OWL: {e3}")
|
| 52 |
+
# Option 4: Use minimal hardcoded ontology
|
| 53 |
+
self._create_minimal_ontology()
|
| 54 |
+
|
| 55 |
+
def _download_and_parse_obo(self):
|
| 56 |
+
"""Download and parse CMPO OBO file for rich semantic information"""
|
| 57 |
+
obo_url = "https://raw.githubusercontent.com/EBISPOT/CMPO/master/cmpo.obo"
|
| 58 |
+
|
| 59 |
+
logging.info(f"Downloading CMPO OBO file from {obo_url}")
|
| 60 |
+
response = requests.get(obo_url)
|
| 61 |
+
response.raise_for_status()
|
| 62 |
+
|
| 63 |
+
# Parse OBO content
|
| 64 |
+
ontology_data = self._parse_obo_content(response.text)
|
| 65 |
+
|
| 66 |
+
# Save processed data
|
| 67 |
+
self.data_path.parent.mkdir(parents=True, exist_ok=True)
|
| 68 |
+
with open(self.data_path, 'w') as f:
|
| 69 |
+
json.dump(ontology_data, f, indent=2)
|
| 70 |
+
|
| 71 |
+
self.ontology = ontology_data
|
| 72 |
+
logging.info(f"Successfully loaded {len(ontology_data)} CMPO terms")
|
| 73 |
+
|
| 74 |
+
def _parse_obo_content(self, obo_text: str) -> Dict:
|
| 75 |
+
"""Parse OBO format text into structured data"""
|
| 76 |
+
ontology_data = {}
|
| 77 |
+
current_term = None
|
| 78 |
+
current_term_id = None
|
| 79 |
+
|
| 80 |
+
for line in obo_text.split('\n'):
|
| 81 |
+
line = line.strip()
|
| 82 |
+
|
| 83 |
+
if line == '[Term]':
|
| 84 |
+
# Save previous term if exists
|
| 85 |
+
if current_term and current_term_id:
|
| 86 |
+
ontology_data[current_term_id] = current_term
|
| 87 |
+
# Start new term
|
| 88 |
+
current_term = {
|
| 89 |
+
'name': '',
|
| 90 |
+
'description': '',
|
| 91 |
+
'synonyms': [],
|
| 92 |
+
'features': [],
|
| 93 |
+
'parent_terms': [],
|
| 94 |
+
'subclass_of': [],
|
| 95 |
+
'equivalent_to': [],
|
| 96 |
+
'subset': [],
|
| 97 |
+
'xrefs': [],
|
| 98 |
+
'iri': ''
|
| 99 |
+
}
|
| 100 |
+
current_term_id = None
|
| 101 |
+
|
| 102 |
+
elif line.startswith('id:') and current_term is not None:
|
| 103 |
+
current_term_id = line.split(':', 1)[1].strip()
|
| 104 |
+
current_term['iri'] = f"http://purl.obolibrary.org/obo/{current_term_id.replace(':', '_')}"
|
| 105 |
+
|
| 106 |
+
elif line.startswith('name:') and current_term is not None:
|
| 107 |
+
current_term['name'] = line.split(':', 1)[1].strip()
|
| 108 |
+
|
| 109 |
+
elif line.startswith('def:') and current_term is not None:
|
| 110 |
+
# Extract definition (remove quotes and references)
|
| 111 |
+
def_text = line.split(':', 1)[1].strip()
|
| 112 |
+
if def_text.startswith('"') and '" [' in def_text:
|
| 113 |
+
current_term['description'] = def_text.split('" [')[0][1:]
|
| 114 |
+
else:
|
| 115 |
+
current_term['description'] = def_text
|
| 116 |
+
|
| 117 |
+
elif line.startswith('synonym:') and current_term is not None:
|
| 118 |
+
# Extract synonym text (format: synonym: "text" EXACT [])
|
| 119 |
+
syn_text = line.split(':', 1)[1].strip()
|
| 120 |
+
if syn_text.startswith('"'):
|
| 121 |
+
synonym = syn_text.split('"')[1]
|
| 122 |
+
current_term['synonyms'].append(synonym)
|
| 123 |
+
|
| 124 |
+
elif line.startswith('is_a:') and current_term is not None:
|
| 125 |
+
# Extract parent term ID
|
| 126 |
+
parent = line.split(':', 1)[1].strip().split('!')[0].strip()
|
| 127 |
+
current_term['parent_terms'].append(parent)
|
| 128 |
+
current_term['subclass_of'].append(parent)
|
| 129 |
+
|
| 130 |
+
elif line.startswith('equivalent_to:') and current_term is not None:
|
| 131 |
+
equiv = line.split(':', 1)[1].strip()
|
| 132 |
+
current_term['equivalent_to'].append(equiv)
|
| 133 |
+
|
| 134 |
+
elif line.startswith('subset:') and current_term is not None:
|
| 135 |
+
subset = line.split(':', 1)[1].strip()
|
| 136 |
+
current_term['subset'].append(subset)
|
| 137 |
+
|
| 138 |
+
elif line.startswith('xref:') and current_term is not None:
|
| 139 |
+
xref = line.split(':', 1)[1].strip()
|
| 140 |
+
current_term['xrefs'].append(xref)
|
| 141 |
+
|
| 142 |
+
# Don't forget the last term
|
| 143 |
+
if current_term and current_term_id:
|
| 144 |
+
ontology_data[current_term_id] = current_term
|
| 145 |
+
|
| 146 |
+
return ontology_data
|
| 147 |
+
|
| 148 |
+
def _download_from_ols(self):
|
| 149 |
+
"""Download CMPO terms using OLS REST API"""
|
| 150 |
+
base_url = "https://www.ebi.ac.uk/ols/api/ontologies/cmpo/terms"
|
| 151 |
+
ontology_data = {}
|
| 152 |
+
|
| 153 |
+
# Get all terms
|
| 154 |
+
page = 0
|
| 155 |
+
while True:
|
| 156 |
+
response = requests.get(f"{base_url}?page={page}&size=500")
|
| 157 |
+
response.raise_for_status()
|
| 158 |
+
data = response.json()
|
| 159 |
+
|
| 160 |
+
if '_embedded' not in data or 'terms' not in data['_embedded']:
|
| 161 |
+
break
|
| 162 |
+
|
| 163 |
+
for term in data['_embedded']['terms']:
|
| 164 |
+
term_id = term['obo_id'] if 'obo_id' in term else term['iri'].split('/')[-1]
|
| 165 |
+
|
| 166 |
+
ontology_data[term_id] = {
|
| 167 |
+
'name': term.get('label', ''),
|
| 168 |
+
'description': term.get('description', [''])[0] if term.get('description') else '',
|
| 169 |
+
'synonyms': term.get('synonyms', []),
|
| 170 |
+
'features': self._extract_features_from_term(term),
|
| 171 |
+
'parent_terms': self._extract_parents(term),
|
| 172 |
+
'iri': term.get('iri', '')
|
| 173 |
+
}
|
| 174 |
+
|
| 175 |
+
# Check if there are more pages
|
| 176 |
+
if data['page']['number'] >= data['page']['totalPages'] - 1:
|
| 177 |
+
break
|
| 178 |
+
page += 1
|
| 179 |
+
|
| 180 |
+
# Save to file
|
| 181 |
+
self.data_path.parent.mkdir(parents=True, exist_ok=True)
|
| 182 |
+
with open(self.data_path, 'w') as f:
|
| 183 |
+
json.dump(ontology_data, f, indent=2)
|
| 184 |
+
|
| 185 |
+
self.ontology = ontology_data
|
| 186 |
+
|
| 187 |
+
def _download_owl_file(self):
|
| 188 |
+
"""Download and parse OWL file directly"""
|
| 189 |
+
try:
|
| 190 |
+
import owlready2
|
| 191 |
+
|
| 192 |
+
# Download CMPO OWL file
|
| 193 |
+
owl_url = "https://raw.githubusercontent.com/EBISPOT/CMPO/master/cmpo.owl"
|
| 194 |
+
response = requests.get(owl_url)
|
| 195 |
+
response.raise_for_status()
|
| 196 |
+
|
| 197 |
+
# Save temporarily
|
| 198 |
+
temp_owl = "temp_cmpo.owl"
|
| 199 |
+
with open(temp_owl, 'wb') as f:
|
| 200 |
+
f.write(response.content)
|
| 201 |
+
|
| 202 |
+
# Parse with owlready2
|
| 203 |
+
onto = owlready2.get_ontology(f"file://{Path(temp_owl).absolute()}").load()
|
| 204 |
+
|
| 205 |
+
ontology_data = {}
|
| 206 |
+
for cls in onto.classes():
|
| 207 |
+
if hasattr(cls, 'label') and cls.label:
|
| 208 |
+
term_id = cls.name
|
| 209 |
+
ontology_data[term_id] = {
|
| 210 |
+
'name': cls.label[0] if cls.label else cls.name,
|
| 211 |
+
'description': cls.comment[0] if hasattr(cls, 'comment') and cls.comment else '',
|
| 212 |
+
'synonyms': list(cls.hasExactSynonym) if hasattr(cls, 'hasExactSynonym') else [],
|
| 213 |
+
'features': self._extract_owl_features(cls),
|
| 214 |
+
'parent_terms': [p.name for p in cls.is_a if hasattr(p, 'name')],
|
| 215 |
+
'iri': str(cls.iri)
|
| 216 |
+
}
|
| 217 |
+
|
| 218 |
+
# Clean up
|
| 219 |
+
Path(temp_owl).unlink()
|
| 220 |
+
|
| 221 |
+
# Save processed data
|
| 222 |
+
self.data_path.parent.mkdir(parents=True, exist_ok=True)
|
| 223 |
+
with open(self.data_path, 'w') as f:
|
| 224 |
+
json.dump(ontology_data, f, indent=2)
|
| 225 |
+
|
| 226 |
+
self.ontology = ontology_data
|
| 227 |
+
|
| 228 |
+
except ImportError:
|
| 229 |
+
logging.error("owlready2 not installed. Install with: pip install owlready2")
|
| 230 |
+
self._create_minimal_ontology()
|
| 231 |
+
|
| 232 |
+
def _create_minimal_ontology(self):
|
| 233 |
+
"""Create minimal hardcoded CMPO ontology as fallback"""
|
| 234 |
+
minimal_ontology = {
|
| 235 |
+
"CMPO_0000094": {
|
| 236 |
+
"name": "apoptotic cell phenotype",
|
| 237 |
+
"description": "A cellular phenotype observed in cells undergoing apoptosis",
|
| 238 |
+
"features": ["apoptosis_markers", "nuclear_fragmentation", "chromatin_condensation", "membrane_blebbing"],
|
| 239 |
+
"synonyms": ["apoptosis", "programmed cell death"],
|
| 240 |
+
"parent_terms": ["CMPO_0000000"],
|
| 241 |
+
"keywords": ["apoptotic", "apoptosis", "fragmented", "condensed", "blebbing", "dying"]
|
| 242 |
+
},
|
| 243 |
+
"CMPO_0000140": {
|
| 244 |
+
"name": "mitotic cell phenotype",
|
| 245 |
+
"description": "A cellular phenotype observed in cells undergoing mitosis",
|
| 246 |
+
"features": ["mitotic_figures", "chromatin_condensation", "spindle_formation"],
|
| 247 |
+
"synonyms": ["mitosis", "cell division"],
|
| 248 |
+
"parent_terms": ["CMPO_0000000"],
|
| 249 |
+
"keywords": ["mitotic", "mitosis", "dividing", "metaphase", "anaphase", "prophase"]
|
| 250 |
+
},
|
| 251 |
+
"CMPO_0000077": {
|
| 252 |
+
"name": "abnormal cell morphology phenotype",
|
| 253 |
+
"description": "A phenotype related to abnormal cellular shape or structure",
|
| 254 |
+
"features": ["abnormal_morphology", "nuclear_size", "cell_shape"],
|
| 255 |
+
"synonyms": ["morphological abnormality"],
|
| 256 |
+
"parent_terms": ["CMPO_0000000"],
|
| 257 |
+
"keywords": ["abnormal", "irregular", "deformed", "enlarged", "shrunken"]
|
| 258 |
+
},
|
| 259 |
+
"CMPO_0000098": {
|
| 260 |
+
"name": "autophagic cell phenotype",
|
| 261 |
+
"description": "A cellular phenotype related to autophagy",
|
| 262 |
+
"features": ["lc3_puncta", "autophagosome_formation", "cytoplasmic_vacuoles"],
|
| 263 |
+
"synonyms": ["autophagy"],
|
| 264 |
+
"parent_terms": ["CMPO_0000000"],
|
| 265 |
+
"keywords": ["autophagic", "autophagy", "lc3", "puncta", "vacuoles"]
|
| 266 |
+
}
|
| 267 |
+
}
|
| 268 |
+
|
| 269 |
+
# Save minimal ontology
|
| 270 |
+
self.data_path.parent.mkdir(parents=True, exist_ok=True)
|
| 271 |
+
with open(self.data_path, 'w') as f:
|
| 272 |
+
json.dump(minimal_ontology, f, indent=2)
|
| 273 |
+
|
| 274 |
+
self.ontology = minimal_ontology
|
| 275 |
+
|
| 276 |
+
def _build_indices(self):
|
| 277 |
+
"""Build lookup indices for fast searching"""
|
| 278 |
+
self.term_index = {}
|
| 279 |
+
self.feature_index = {}
|
| 280 |
+
self.keyword_index = {}
|
| 281 |
+
|
| 282 |
+
for term_id, term_data in self.ontology.items():
|
| 283 |
+
# Index by term ID and name
|
| 284 |
+
self.term_index[term_id] = term_data
|
| 285 |
+
self.term_index[term_data['name'].lower()] = term_data
|
| 286 |
+
|
| 287 |
+
# Index by features
|
| 288 |
+
for feature in term_data.get('features', []):
|
| 289 |
+
if feature not in self.feature_index:
|
| 290 |
+
self.feature_index[feature] = []
|
| 291 |
+
self.feature_index[feature].append(term_id)
|
| 292 |
+
|
| 293 |
+
# Index by keywords (name, synonyms, features)
|
| 294 |
+
keywords = [term_data['name']]
|
| 295 |
+
keywords.extend(term_data.get('synonyms', []))
|
| 296 |
+
keywords.extend(term_data.get('features', []))
|
| 297 |
+
|
| 298 |
+
for keyword in keywords:
|
| 299 |
+
keyword_lower = keyword.lower()
|
| 300 |
+
if keyword_lower not in self.keyword_index:
|
| 301 |
+
self.keyword_index[keyword_lower] = []
|
| 302 |
+
self.keyword_index[keyword_lower].append(term_id)
|
| 303 |
+
|
| 304 |
+
def get_term(self, term_id: str) -> Optional[Dict]:
|
| 305 |
+
"""Get CMPO term by ID"""
|
| 306 |
+
return self.ontology.get(term_id)
|
| 307 |
+
|
| 308 |
+
def search_by_keyword(self, keyword: str) -> List[str]:
|
| 309 |
+
"""Search for CMPO terms by keyword"""
|
| 310 |
+
keyword_lower = keyword.lower()
|
| 311 |
+
results = set()
|
| 312 |
+
|
| 313 |
+
# Exact match
|
| 314 |
+
if keyword_lower in self.keyword_index:
|
| 315 |
+
results.update(self.keyword_index[keyword_lower])
|
| 316 |
+
|
| 317 |
+
# Partial match
|
| 318 |
+
for indexed_keyword, term_ids in self.keyword_index.items():
|
| 319 |
+
if keyword_lower in indexed_keyword or indexed_keyword in keyword_lower:
|
| 320 |
+
results.update(term_ids)
|
| 321 |
+
|
| 322 |
+
return list(results)
|
| 323 |
+
|
| 324 |
+
def get_terms_by_feature(self, feature: str) -> List[str]:
|
| 325 |
+
"""Get CMPO terms that have a specific feature"""
|
| 326 |
+
return self.feature_index.get(feature, [])
|
anton/core/__pycache__/pipeline.cpython-313.pyc
ADDED
|
Binary file (9.8 kB). View file
|
|
|
anton/core/config.py
ADDED
|
@@ -0,0 +1,97 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Configuration management for Anton's analysis pipeline."""
|
| 2 |
+
|
| 3 |
+
from pathlib import Path
|
| 4 |
+
from typing import Dict, List, Optional, Union
|
| 5 |
+
import json
|
| 6 |
+
import logging
|
| 7 |
+
|
| 8 |
+
logger = logging.getLogger(__name__)
|
| 9 |
+
|
| 10 |
+
class Config:
|
| 11 |
+
"""Configuration management for Anton's analysis pipeline."""
|
| 12 |
+
|
| 13 |
+
DEFAULT_CONFIG = {
|
| 14 |
+
"channels": [0],
|
| 15 |
+
"neighborhood_size": [100, 100],
|
| 16 |
+
"vlm": {
|
| 17 |
+
"model": "gpt-4-vision-preview",
|
| 18 |
+
"temperature": 0.7,
|
| 19 |
+
"max_tokens": 1000
|
| 20 |
+
},
|
| 21 |
+
"analysis": {
|
| 22 |
+
"min_confidence": 0.7,
|
| 23 |
+
"batch_size": 10
|
| 24 |
+
},
|
| 25 |
+
"output": {
|
| 26 |
+
"save_intermediate": True,
|
| 27 |
+
"format": "json"
|
| 28 |
+
}
|
| 29 |
+
}
|
| 30 |
+
|
| 31 |
+
def __init__(self, config_path: Optional[Union[str, Path]] = None):
|
| 32 |
+
"""Initialize configuration from file or defaults."""
|
| 33 |
+
self.config = self.DEFAULT_CONFIG.copy()
|
| 34 |
+
|
| 35 |
+
if config_path:
|
| 36 |
+
self.load_config(config_path)
|
| 37 |
+
|
| 38 |
+
def load_config(self, config_path: Union[str, Path]) -> None:
|
| 39 |
+
"""Load configuration from JSON file."""
|
| 40 |
+
try:
|
| 41 |
+
with open(config_path, 'r') as f:
|
| 42 |
+
user_config = json.load(f)
|
| 43 |
+
|
| 44 |
+
# Update default config with user settings
|
| 45 |
+
self._update_config(self.config, user_config)
|
| 46 |
+
logger.info(f"Loaded configuration from {config_path}")
|
| 47 |
+
|
| 48 |
+
except Exception as e:
|
| 49 |
+
logger.error(f"Failed to load configuration: {str(e)}")
|
| 50 |
+
raise
|
| 51 |
+
|
| 52 |
+
def _update_config(self, base: Dict, update: Dict) -> None:
|
| 53 |
+
"""Recursively update configuration dictionary."""
|
| 54 |
+
for key, value in update.items():
|
| 55 |
+
if key in base and isinstance(base[key], dict) and isinstance(value, dict):
|
| 56 |
+
self._update_config(base[key], value)
|
| 57 |
+
else:
|
| 58 |
+
base[key] = value
|
| 59 |
+
|
| 60 |
+
def get(self, key: str, default: Optional[any] = None) -> any:
|
| 61 |
+
"""Get configuration value by key."""
|
| 62 |
+
keys = key.split('.')
|
| 63 |
+
value = self.config
|
| 64 |
+
|
| 65 |
+
for k in keys:
|
| 66 |
+
if isinstance(value, dict):
|
| 67 |
+
value = value.get(k)
|
| 68 |
+
else:
|
| 69 |
+
return default
|
| 70 |
+
|
| 71 |
+
if value is None:
|
| 72 |
+
return default
|
| 73 |
+
|
| 74 |
+
return value
|
| 75 |
+
|
| 76 |
+
def set(self, key: str, value: any) -> None:
|
| 77 |
+
"""Set configuration value by key."""
|
| 78 |
+
keys = key.split('.')
|
| 79 |
+
config = self.config
|
| 80 |
+
|
| 81 |
+
for k in keys[:-1]:
|
| 82 |
+
if k not in config:
|
| 83 |
+
config[k] = {}
|
| 84 |
+
config = config[k]
|
| 85 |
+
|
| 86 |
+
config[keys[-1]] = value
|
| 87 |
+
|
| 88 |
+
def save(self, config_path: Union[str, Path]) -> None:
|
| 89 |
+
"""Save current configuration to file."""
|
| 90 |
+
try:
|
| 91 |
+
with open(config_path, 'w') as f:
|
| 92 |
+
json.dump(self.config, f, indent=4)
|
| 93 |
+
logger.info(f"Saved configuration to {config_path}")
|
| 94 |
+
|
| 95 |
+
except Exception as e:
|
| 96 |
+
logger.error(f"Failed to save configuration: {str(e)}")
|
| 97 |
+
raise
|
anton/core/pipeline.py
ADDED
|
@@ -0,0 +1,188 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Core pipeline orchestration for Anton's multi-stage analysis flow."""
|
| 2 |
+
|
| 3 |
+
from pathlib import Path
|
| 4 |
+
from typing import Dict, List, Optional, Tuple, Union
|
| 5 |
+
import logging
|
| 6 |
+
import asyncio
|
| 7 |
+
|
| 8 |
+
from ..vlm.interface import VLMInterface
|
| 9 |
+
from ..analysis.quantitative import QuantitativeAnalyzer
|
| 10 |
+
from ..analysis.qualitative import QualitativeAnalyzer
|
| 11 |
+
from ..cmpo.ontology import CMPOOntology
|
| 12 |
+
from ..utils.image_io import ImageLoader
|
| 13 |
+
from ..utils.validation import validate_stage_transition
|
| 14 |
+
|
| 15 |
+
logger = logging.getLogger(__name__)
|
| 16 |
+
|
| 17 |
+
class AnalysisPipeline:
|
| 18 |
+
"""Multi-stage analysis pipeline for microscopy phenotype analysis."""
|
| 19 |
+
|
| 20 |
+
def __init__(self, config: Dict):
|
| 21 |
+
"""Initialize pipeline with configuration."""
|
| 22 |
+
self.config = config
|
| 23 |
+
self.vlm = VLMInterface(
|
| 24 |
+
provider=config.get("vlm_provider", "claude"),
|
| 25 |
+
model=config.get("vlm_model"),
|
| 26 |
+
api_key=config.get("vlm_api_key"),
|
| 27 |
+
biological_context=config.get("biological_context")
|
| 28 |
+
)
|
| 29 |
+
self.quant_analyzer = QuantitativeAnalyzer(config.get("quantitative", {}))
|
| 30 |
+
self.cmpo = CMPOOntology()
|
| 31 |
+
self.qual_analyzer = QualitativeAnalyzer(
|
| 32 |
+
vlm_interface=self.vlm,
|
| 33 |
+
cmpo_mapper=self.cmpo
|
| 34 |
+
)
|
| 35 |
+
self.image_loader = ImageLoader()
|
| 36 |
+
|
| 37 |
+
# Initialize results cache
|
| 38 |
+
self.results = {
|
| 39 |
+
"stage_1_global": None,
|
| 40 |
+
"stage_2_objects": None,
|
| 41 |
+
"stage_3_features": None,
|
| 42 |
+
"stage_4_population": None
|
| 43 |
+
}
|
| 44 |
+
|
| 45 |
+
async def run_stage_1(self, image_path: Union[str, Path]) -> Dict:
|
| 46 |
+
"""Run Stage 1: Global Scene Understanding."""
|
| 47 |
+
logger.info("Starting Stage 1: Global Scene Understanding")
|
| 48 |
+
|
| 49 |
+
# Load and preprocess image
|
| 50 |
+
image = self.image_loader.load(image_path)
|
| 51 |
+
|
| 52 |
+
# Get global scene analysis from VLM
|
| 53 |
+
global_analysis = await self.vlm.analyze_global_scene(
|
| 54 |
+
image=image,
|
| 55 |
+
channels=self.config.get("channels", [0])
|
| 56 |
+
)
|
| 57 |
+
|
| 58 |
+
# Validate and cache results
|
| 59 |
+
self.results["stage_1_global"] = global_analysis
|
| 60 |
+
return global_analysis
|
| 61 |
+
|
| 62 |
+
async def run_stage_2(self) -> Dict:
|
| 63 |
+
"""Run Stage 2: Object Detection & Segmentation Guidance."""
|
| 64 |
+
logger.info("Starting Stage 2: Object Detection & Segmentation Guidance")
|
| 65 |
+
|
| 66 |
+
# Validate stage transition
|
| 67 |
+
validate_stage_transition(self.results["stage_1_global"], "stage_2")
|
| 68 |
+
|
| 69 |
+
# Get object detection and segmentation guidance
|
| 70 |
+
object_analysis = await self.vlm.detect_objects_and_guide(
|
| 71 |
+
image=self.image_loader.current_image,
|
| 72 |
+
global_context=self.results["stage_1_global"]
|
| 73 |
+
)
|
| 74 |
+
|
| 75 |
+
# Cache results
|
| 76 |
+
self.results["stage_2_objects"] = object_analysis
|
| 77 |
+
return object_analysis
|
| 78 |
+
|
| 79 |
+
async def run_stage_3(self) -> Dict:
|
| 80 |
+
"""Run Stage 3: Feature-Level Analysis."""
|
| 81 |
+
logger.info("Starting Stage 3: Feature-Level Analysis")
|
| 82 |
+
|
| 83 |
+
# Validate stage transition
|
| 84 |
+
validate_stage_transition(self.results["stage_2_objects"], "stage_3")
|
| 85 |
+
|
| 86 |
+
# Analyze features for detected objects
|
| 87 |
+
feature_analysis = await self.vlm.analyze_features(
|
| 88 |
+
image=self.image_loader.current_image,
|
| 89 |
+
detected_objects=self.results["stage_2_objects"]["detected_objects"]
|
| 90 |
+
)
|
| 91 |
+
|
| 92 |
+
# Cache results
|
| 93 |
+
self.results["stage_3_features"] = feature_analysis
|
| 94 |
+
return feature_analysis
|
| 95 |
+
|
| 96 |
+
async def run_stage_4(self) -> Dict:
|
| 97 |
+
"""Run Stage 4: Population-Level Insights with CMPO Integration."""
|
| 98 |
+
logger.info("Starting Stage 4: Population-Level Insights with CMPO mapping")
|
| 99 |
+
|
| 100 |
+
# Validate stage transition
|
| 101 |
+
validate_stage_transition(self.results["stage_3_features"], "stage_4")
|
| 102 |
+
|
| 103 |
+
# Generate population insights (VLM)
|
| 104 |
+
population_analysis = await self.vlm.generate_population_insights(
|
| 105 |
+
feature_analyses=self.results["stage_3_features"]["object_analyses"]
|
| 106 |
+
)
|
| 107 |
+
|
| 108 |
+
# Direct CMPO mapping of existing VLM descriptions
|
| 109 |
+
try:
|
| 110 |
+
from ..cmpo.mapping import map_to_cmpo
|
| 111 |
+
|
| 112 |
+
# Get VLM descriptions from previous stages
|
| 113 |
+
global_description = self.results.get("stage_1_global", {}).get("description", "")
|
| 114 |
+
population_description = population_analysis.get("population_summary", "")
|
| 115 |
+
|
| 116 |
+
all_cmpo_mappings = []
|
| 117 |
+
|
| 118 |
+
# Map global description to CMPO terms
|
| 119 |
+
if global_description:
|
| 120 |
+
global_mappings = map_to_cmpo(global_description, self.qual_analyzer.cmpo_mapper, context='cell_population')
|
| 121 |
+
for mapping in global_mappings:
|
| 122 |
+
mapping['stage'] = 'global_context'
|
| 123 |
+
mapping['source'] = 'vlm_global_analysis'
|
| 124 |
+
all_cmpo_mappings.extend(global_mappings)
|
| 125 |
+
|
| 126 |
+
# Map population description to CMPO terms
|
| 127 |
+
if population_description:
|
| 128 |
+
pop_mappings = map_to_cmpo(population_description, self.qual_analyzer.cmpo_mapper, context='cell_population')
|
| 129 |
+
for mapping in pop_mappings:
|
| 130 |
+
mapping['stage'] = 'population_insights'
|
| 131 |
+
mapping['source'] = 'vlm_population_analysis'
|
| 132 |
+
all_cmpo_mappings.extend(pop_mappings)
|
| 133 |
+
|
| 134 |
+
# Create CMPO summary for quick_demo display
|
| 135 |
+
cmpo_summary = {
|
| 136 |
+
'total_unique_terms': len(set(m.get('CMPO_ID') for m in all_cmpo_mappings)),
|
| 137 |
+
'total_mappings': len(all_cmpo_mappings),
|
| 138 |
+
'top_terms': [
|
| 139 |
+
{
|
| 140 |
+
'term': mapping.get('term_name'),
|
| 141 |
+
'cmpo_id': mapping.get('CMPO_ID'),
|
| 142 |
+
'confidence': mapping.get('confidence', 0),
|
| 143 |
+
'stages': [mapping.get('stage')]
|
| 144 |
+
}
|
| 145 |
+
for mapping in sorted(all_cmpo_mappings, key=lambda x: x.get('confidence', 0), reverse=True)[:5]
|
| 146 |
+
],
|
| 147 |
+
'mappings': all_cmpo_mappings
|
| 148 |
+
}
|
| 149 |
+
|
| 150 |
+
population_analysis["qualitative_features"] = {"cmpo_summary": cmpo_summary}
|
| 151 |
+
logger.info(f"CMPO integration completed: {len(all_cmpo_mappings)} total mappings")
|
| 152 |
+
|
| 153 |
+
except Exception as e:
|
| 154 |
+
logger.warning(f"CMPO integration failed: {e}")
|
| 155 |
+
# Continue without CMPO if it fails
|
| 156 |
+
|
| 157 |
+
# Cache results
|
| 158 |
+
self.results["stage_4_population"] = population_analysis
|
| 159 |
+
return population_analysis
|
| 160 |
+
|
| 161 |
+
async def run_pipeline(self, image_path: Union[str, Path]) -> Dict:
|
| 162 |
+
"""Run the complete analysis pipeline."""
|
| 163 |
+
try:
|
| 164 |
+
# Run all stages in sequence
|
| 165 |
+
await self.run_stage_1(image_path)
|
| 166 |
+
await self.run_stage_2()
|
| 167 |
+
await self.run_stage_3()
|
| 168 |
+
await self.run_stage_4()
|
| 169 |
+
|
| 170 |
+
return self.results
|
| 171 |
+
|
| 172 |
+
except Exception as e:
|
| 173 |
+
logger.error(f"Pipeline execution failed: {str(e)}")
|
| 174 |
+
raise
|
| 175 |
+
|
| 176 |
+
def run_pipeline_sync(self, image_path: Union[str, Path]) -> Dict:
|
| 177 |
+
"""Run the complete analysis pipeline synchronously (convenience method)."""
|
| 178 |
+
try:
|
| 179 |
+
# Check if we're already in an event loop
|
| 180 |
+
loop = asyncio.get_running_loop()
|
| 181 |
+
# If we're in a loop, create a new thread to run the async code
|
| 182 |
+
import concurrent.futures
|
| 183 |
+
with concurrent.futures.ThreadPoolExecutor() as executor:
|
| 184 |
+
future = executor.submit(asyncio.run, self.run_pipeline(image_path))
|
| 185 |
+
return future.result()
|
| 186 |
+
except RuntimeError:
|
| 187 |
+
# No event loop running, safe to use asyncio.run
|
| 188 |
+
return asyncio.run(self.run_pipeline(image_path))
|
anton/main.py
ADDED
|
@@ -0,0 +1,64 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import logging
|
| 2 |
+
import pandas as pd
|
| 3 |
+
import asyncio
|
| 4 |
+
from pathlib import Path
|
| 5 |
+
|
| 6 |
+
from anton.core.config import Config
|
| 7 |
+
from anton.core.pipeline import AnalysisPipeline
|
| 8 |
+
|
| 9 |
+
logging.basicConfig(level=logging.INFO)
|
| 10 |
+
|
| 11 |
+
def main():
|
| 12 |
+
"""Interactive main function for Anton CMPO phenotype analysis framework."""
|
| 13 |
+
print("Welcome to Anton: VLM-driven microscopy phenotype analysis framework.")
|
| 14 |
+
print("Please provide the following information:")
|
| 15 |
+
|
| 16 |
+
goal = input("Enter your analysis goal (e.g., 'Identify apoptotic cells in DAPI-stained channel 1'): ")
|
| 17 |
+
image_path = input("Enter the path to your TIFF image: ")
|
| 18 |
+
metadata_path = input("Enter the path to your metadata file (optional, press Enter to skip): ")
|
| 19 |
+
config_path = input("Enter the path to your config file (optional, press Enter to skip): ")
|
| 20 |
+
|
| 21 |
+
# Load configuration
|
| 22 |
+
config = Config(config_path if config_path else None)
|
| 23 |
+
config.set("goal", goal)
|
| 24 |
+
config.set("image_path", str(image_path))
|
| 25 |
+
if metadata_path:
|
| 26 |
+
config.set("metadata_path", str(metadata_path))
|
| 27 |
+
|
| 28 |
+
# Initialize pipeline
|
| 29 |
+
pipeline = AnalysisPipeline(config.config)
|
| 30 |
+
results = pipeline.run_pipeline_sync(image_path)
|
| 31 |
+
|
| 32 |
+
# Output results
|
| 33 |
+
print(f"Results: {results}")
|
| 34 |
+
df = pd.DataFrame([results])
|
| 35 |
+
df.to_csv("results.csv", index=False)
|
| 36 |
+
|
| 37 |
+
async def main_async():
|
| 38 |
+
"""Async version of main function."""
|
| 39 |
+
print("Welcome to Anton: VLM-driven microscopy phenotype analysis framework.")
|
| 40 |
+
print("Please provide the following information:")
|
| 41 |
+
|
| 42 |
+
goal = input("Enter your analysis goal (e.g., 'Identify apoptotic cells in DAPI-stained channel 1'): ")
|
| 43 |
+
image_path = input("Enter the path to your TIFF image: ")
|
| 44 |
+
metadata_path = input("Enter the path to your metadata file (optional, press Enter to skip): ")
|
| 45 |
+
config_path = input("Enter the path to your config file (optional, press Enter to skip): ")
|
| 46 |
+
|
| 47 |
+
# Load configuration
|
| 48 |
+
config = Config(config_path if config_path else None)
|
| 49 |
+
config.set("goal", goal)
|
| 50 |
+
config.set("image_path", str(image_path))
|
| 51 |
+
if metadata_path:
|
| 52 |
+
config.set("metadata_path", str(metadata_path))
|
| 53 |
+
|
| 54 |
+
# Initialize pipeline
|
| 55 |
+
pipeline = AnalysisPipeline(config.config)
|
| 56 |
+
results = await pipeline.run_pipeline(image_path)
|
| 57 |
+
|
| 58 |
+
# Output results
|
| 59 |
+
print(f"Results: {results}")
|
| 60 |
+
df = pd.DataFrame([results])
|
| 61 |
+
df.to_csv("results.csv", index=False)
|
| 62 |
+
|
| 63 |
+
if __name__ == "__main__":
|
| 64 |
+
main()
|
anton/utils/__pycache__/image_io.cpython-313.pyc
ADDED
|
Binary file (10.8 kB). View file
|
|
|
anton/utils/__pycache__/validation.cpython-313.pyc
ADDED
|
Binary file (1.02 kB). View file
|
|
|
anton/utils/image_io.py
ADDED
|
@@ -0,0 +1,263 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Image loading and preprocessing utilities for Anton's pipeline."""
|
| 2 |
+
|
| 3 |
+
from pathlib import Path
|
| 4 |
+
from typing import Union, Tuple, Optional, List
|
| 5 |
+
import numpy as np
|
| 6 |
+
from PIL import Image
|
| 7 |
+
import logging
|
| 8 |
+
|
| 9 |
+
logger = logging.getLogger(__name__)
|
| 10 |
+
|
| 11 |
+
class ImageLoader:
|
| 12 |
+
"""Handles image loading and preprocessing for microscopy analysis."""
|
| 13 |
+
|
| 14 |
+
def __init__(self):
|
| 15 |
+
"""Initialize ImageLoader."""
|
| 16 |
+
self.current_image = None
|
| 17 |
+
self.current_image_path = None
|
| 18 |
+
self.metadata = {}
|
| 19 |
+
|
| 20 |
+
def load(self, image_path: Union[str, Path]) -> np.ndarray:
|
| 21 |
+
"""Load image from path.
|
| 22 |
+
|
| 23 |
+
Args:
|
| 24 |
+
image_path: Path to the image file
|
| 25 |
+
|
| 26 |
+
Returns:
|
| 27 |
+
numpy array of the loaded image
|
| 28 |
+
"""
|
| 29 |
+
try:
|
| 30 |
+
image_path = Path(image_path)
|
| 31 |
+
if not image_path.exists():
|
| 32 |
+
raise FileNotFoundError(f"Image not found: {image_path}")
|
| 33 |
+
|
| 34 |
+
# Load image using PIL (supports many formats including TIFF)
|
| 35 |
+
pil_image = Image.open(image_path)
|
| 36 |
+
|
| 37 |
+
# Convert to numpy array
|
| 38 |
+
image_array = np.array(pil_image)
|
| 39 |
+
|
| 40 |
+
# Store for later use
|
| 41 |
+
self.current_image = image_array
|
| 42 |
+
self.current_image_path = image_path
|
| 43 |
+
|
| 44 |
+
# Extract basic metadata
|
| 45 |
+
self.metadata = {
|
| 46 |
+
'shape': image_array.shape,
|
| 47 |
+
'dtype': str(image_array.dtype),
|
| 48 |
+
'path': str(image_path),
|
| 49 |
+
'format': pil_image.format,
|
| 50 |
+
'mode': pil_image.mode
|
| 51 |
+
}
|
| 52 |
+
|
| 53 |
+
logger.info(f"Loaded image: {image_path}, shape: {image_array.shape}")
|
| 54 |
+
return image_array
|
| 55 |
+
|
| 56 |
+
except Exception as e:
|
| 57 |
+
logger.error(f"Failed to load image {image_path}: {e}")
|
| 58 |
+
raise
|
| 59 |
+
|
| 60 |
+
def preprocess(self, image: np.ndarray, normalize: bool = True,
|
| 61 |
+
channels: Optional[List[int]] = None) -> np.ndarray:
|
| 62 |
+
"""Preprocess image for analysis.
|
| 63 |
+
|
| 64 |
+
Args:
|
| 65 |
+
image: Input image array
|
| 66 |
+
normalize: Whether to normalize intensity values
|
| 67 |
+
channels: Specific channels to extract (for multi-channel images)
|
| 68 |
+
|
| 69 |
+
Returns:
|
| 70 |
+
Preprocessed image array
|
| 71 |
+
"""
|
| 72 |
+
try:
|
| 73 |
+
processed = image.copy()
|
| 74 |
+
|
| 75 |
+
# Extract specific channels if requested
|
| 76 |
+
if channels is not None and len(image.shape) > 2:
|
| 77 |
+
if len(image.shape) == 3:
|
| 78 |
+
# RGB/multi-channel image
|
| 79 |
+
processed = processed[:, :, channels]
|
| 80 |
+
elif len(image.shape) == 4:
|
| 81 |
+
# Multi-channel with additional dimension
|
| 82 |
+
processed = processed[:, :, :, channels]
|
| 83 |
+
|
| 84 |
+
# Normalize if requested
|
| 85 |
+
if normalize:
|
| 86 |
+
processed = self._normalize_image(processed)
|
| 87 |
+
|
| 88 |
+
return processed
|
| 89 |
+
|
| 90 |
+
except Exception as e:
|
| 91 |
+
logger.error(f"Failed to preprocess image: {e}")
|
| 92 |
+
raise
|
| 93 |
+
|
| 94 |
+
def _normalize_image(self, image: np.ndarray) -> np.ndarray:
|
| 95 |
+
"""Normalize image intensity values to 0-1 range."""
|
| 96 |
+
if image.dtype == np.uint8:
|
| 97 |
+
return image.astype(np.float32) / 255.0
|
| 98 |
+
elif image.dtype == np.uint16:
|
| 99 |
+
return image.astype(np.float32) / 65535.0
|
| 100 |
+
else:
|
| 101 |
+
# For float images, normalize to 0-1 range
|
| 102 |
+
min_val = image.min()
|
| 103 |
+
max_val = image.max()
|
| 104 |
+
if max_val > min_val:
|
| 105 |
+
return (image - min_val) / (max_val - min_val)
|
| 106 |
+
else:
|
| 107 |
+
return image
|
| 108 |
+
|
| 109 |
+
def extract_channel(self, image: np.ndarray, channel: int) -> np.ndarray:
|
| 110 |
+
"""Extract a specific channel from multi-channel image.
|
| 111 |
+
|
| 112 |
+
Args:
|
| 113 |
+
image: Multi-channel image array
|
| 114 |
+
channel: Channel index to extract
|
| 115 |
+
|
| 116 |
+
Returns:
|
| 117 |
+
Single-channel image array
|
| 118 |
+
"""
|
| 119 |
+
try:
|
| 120 |
+
if len(image.shape) == 2:
|
| 121 |
+
# Grayscale image
|
| 122 |
+
return image
|
| 123 |
+
elif len(image.shape) == 3:
|
| 124 |
+
# Multi-channel image
|
| 125 |
+
if channel < image.shape[2]:
|
| 126 |
+
return image[:, :, channel]
|
| 127 |
+
else:
|
| 128 |
+
raise ValueError(f"Channel {channel} not available in image with {image.shape[2]} channels")
|
| 129 |
+
else:
|
| 130 |
+
raise ValueError(f"Unsupported image shape: {image.shape}")
|
| 131 |
+
|
| 132 |
+
except Exception as e:
|
| 133 |
+
logger.error(f"Failed to extract channel {channel}: {e}")
|
| 134 |
+
raise
|
| 135 |
+
|
| 136 |
+
def convert_to_8bit(self, image: np.ndarray) -> np.ndarray:
|
| 137 |
+
"""Convert image to 8-bit for display/export.
|
| 138 |
+
|
| 139 |
+
Args:
|
| 140 |
+
image: Input image array
|
| 141 |
+
|
| 142 |
+
Returns:
|
| 143 |
+
8-bit image array
|
| 144 |
+
"""
|
| 145 |
+
try:
|
| 146 |
+
if image.dtype == np.uint8:
|
| 147 |
+
return image
|
| 148 |
+
|
| 149 |
+
# Normalize to 0-1 range first
|
| 150 |
+
normalized = self._normalize_image(image)
|
| 151 |
+
|
| 152 |
+
# Convert to 8-bit
|
| 153 |
+
return (normalized * 255).astype(np.uint8)
|
| 154 |
+
|
| 155 |
+
except Exception as e:
|
| 156 |
+
logger.error(f"Failed to convert to 8-bit: {e}")
|
| 157 |
+
raise
|
| 158 |
+
|
| 159 |
+
def save_image(self, image: np.ndarray, output_path: Union[str, Path],
|
| 160 |
+
format: str = 'PNG') -> None:
|
| 161 |
+
"""Save image to file.
|
| 162 |
+
|
| 163 |
+
Args:
|
| 164 |
+
image: Image array to save
|
| 165 |
+
output_path: Output file path
|
| 166 |
+
format: Image format (PNG, TIFF, etc.)
|
| 167 |
+
"""
|
| 168 |
+
try:
|
| 169 |
+
output_path = Path(output_path)
|
| 170 |
+
output_path.parent.mkdir(parents=True, exist_ok=True)
|
| 171 |
+
|
| 172 |
+
# Convert to 8-bit if needed
|
| 173 |
+
if image.dtype != np.uint8:
|
| 174 |
+
image = self.convert_to_8bit(image)
|
| 175 |
+
|
| 176 |
+
# Create PIL image and save
|
| 177 |
+
pil_image = Image.fromarray(image)
|
| 178 |
+
pil_image.save(output_path, format=format)
|
| 179 |
+
|
| 180 |
+
logger.info(f"Saved image to: {output_path}")
|
| 181 |
+
|
| 182 |
+
except Exception as e:
|
| 183 |
+
logger.error(f"Failed to save image to {output_path}: {e}")
|
| 184 |
+
raise
|
| 185 |
+
|
| 186 |
+
def get_image_stats(self, image: Optional[np.ndarray] = None) -> dict:
|
| 187 |
+
"""Get basic statistics about the image.
|
| 188 |
+
|
| 189 |
+
Args:
|
| 190 |
+
image: Image array (uses current_image if None)
|
| 191 |
+
|
| 192 |
+
Returns:
|
| 193 |
+
Dictionary with image statistics
|
| 194 |
+
"""
|
| 195 |
+
if image is None:
|
| 196 |
+
image = self.current_image
|
| 197 |
+
|
| 198 |
+
if image is None:
|
| 199 |
+
return {}
|
| 200 |
+
|
| 201 |
+
try:
|
| 202 |
+
stats = {
|
| 203 |
+
'shape': image.shape,
|
| 204 |
+
'dtype': str(image.dtype),
|
| 205 |
+
'min': float(image.min()),
|
| 206 |
+
'max': float(image.max()),
|
| 207 |
+
'mean': float(image.mean()),
|
| 208 |
+
'std': float(image.std())
|
| 209 |
+
}
|
| 210 |
+
|
| 211 |
+
if len(image.shape) > 2:
|
| 212 |
+
stats['channels'] = image.shape[2] if len(image.shape) == 3 else image.shape[-1]
|
| 213 |
+
|
| 214 |
+
return stats
|
| 215 |
+
|
| 216 |
+
except Exception as e:
|
| 217 |
+
logger.error(f"Failed to compute image statistics: {e}")
|
| 218 |
+
return {}
|
| 219 |
+
|
| 220 |
+
def create_rgb_composite(self, channels: List[np.ndarray],
|
| 221 |
+
colors: List[Tuple[float, float, float]] = None) -> np.ndarray:
|
| 222 |
+
"""Create RGB composite from multiple channels.
|
| 223 |
+
|
| 224 |
+
Args:
|
| 225 |
+
channels: List of single-channel images
|
| 226 |
+
colors: List of RGB colors for each channel (default: R, G, B)
|
| 227 |
+
|
| 228 |
+
Returns:
|
| 229 |
+
RGB composite image
|
| 230 |
+
"""
|
| 231 |
+
try:
|
| 232 |
+
if not channels:
|
| 233 |
+
raise ValueError("No channels provided")
|
| 234 |
+
|
| 235 |
+
# Default colors (R, G, B)
|
| 236 |
+
if colors is None:
|
| 237 |
+
colors = [(1, 0, 0), (0, 1, 0), (0, 0, 1)]
|
| 238 |
+
|
| 239 |
+
# Ensure all channels have the same shape
|
| 240 |
+
shape = channels[0].shape
|
| 241 |
+
for i, ch in enumerate(channels):
|
| 242 |
+
if ch.shape != shape:
|
| 243 |
+
raise ValueError(f"Channel {i} shape {ch.shape} doesn't match expected {shape}")
|
| 244 |
+
|
| 245 |
+
# Create RGB composite
|
| 246 |
+
composite = np.zeros((*shape, 3), dtype=np.float32)
|
| 247 |
+
|
| 248 |
+
for i, (channel, color) in enumerate(zip(channels, colors)):
|
| 249 |
+
# Normalize channel
|
| 250 |
+
norm_channel = self._normalize_image(channel)
|
| 251 |
+
|
| 252 |
+
# Apply color
|
| 253 |
+
for c in range(3):
|
| 254 |
+
composite[:, :, c] += norm_channel * color[c]
|
| 255 |
+
|
| 256 |
+
# Clip to valid range
|
| 257 |
+
composite = np.clip(composite, 0, 1)
|
| 258 |
+
|
| 259 |
+
return composite
|
| 260 |
+
|
| 261 |
+
except Exception as e:
|
| 262 |
+
logger.error(f"Failed to create RGB composite: {e}")
|
| 263 |
+
raise
|
anton/utils/validation.py
ADDED
|
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Validation utilities for Anton's pipeline."""
|
| 2 |
+
|
| 3 |
+
def validate_stage_transition(prev_stage_result, next_stage):
|
| 4 |
+
"""Validate that the transition between pipeline stages is consistent."""
|
| 5 |
+
if prev_stage_result is None:
|
| 6 |
+
raise ValueError(f"Previous stage result missing for transition to {next_stage}")
|
| 7 |
+
|
| 8 |
+
# Validate stage-specific requirements
|
| 9 |
+
if next_stage == "stage_2" and "description" not in prev_stage_result:
|
| 10 |
+
raise ValueError("Stage 1 must provide description for Stage 2 transition")
|
| 11 |
+
|
| 12 |
+
if next_stage == "stage_3":
|
| 13 |
+
if "detected_objects" not in prev_stage_result:
|
| 14 |
+
raise ValueError("Stage 2 must provide detected_objects for Stage 3 transition")
|
| 15 |
+
|
| 16 |
+
if next_stage == "stage_4":
|
| 17 |
+
if "object_analyses" not in prev_stage_result:
|
| 18 |
+
raise ValueError("Stage 3 must provide object_analyses for Stage 4 transition")
|
| 19 |
+
|
| 20 |
+
return True
|
anton/vlm/__pycache__/interface.cpython-313.pyc
ADDED
|
Binary file (29.5 kB). View file
|
|
|
anton/vlm/interface.py
ADDED
|
@@ -0,0 +1,566 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""VLM interface for Anton's microscopy phenotype analysis."""
|
| 2 |
+
|
| 3 |
+
from pathlib import Path
|
| 4 |
+
from typing import Dict, List, Optional, Union, Any
|
| 5 |
+
import logging
|
| 6 |
+
import json
|
| 7 |
+
import os
|
| 8 |
+
import base64
|
| 9 |
+
import asyncio
|
| 10 |
+
from io import BytesIO
|
| 11 |
+
import numpy as np
|
| 12 |
+
from PIL import Image
|
| 13 |
+
|
| 14 |
+
logger = logging.getLogger(__name__)
|
| 15 |
+
|
| 16 |
+
class VLMInterface:
|
| 17 |
+
"""Interface for Vision Language Model (VLM) interactions."""
|
| 18 |
+
|
| 19 |
+
def __init__(self, provider="claude", model=None, api_key=None, biological_context=None):
|
| 20 |
+
"""Initialize VLM interface.
|
| 21 |
+
|
| 22 |
+
Args:
|
| 23 |
+
provider: "claude", "gemini", or "openai"
|
| 24 |
+
model: Model name (provider-specific)
|
| 25 |
+
api_key: API key for external providers
|
| 26 |
+
biological_context: Dict with experimental context (cell line, protein, drugs, etc.)
|
| 27 |
+
"""
|
| 28 |
+
self.provider = provider
|
| 29 |
+
self.model = model or self._get_default_model(provider)
|
| 30 |
+
self.client = self._setup_client(api_key)
|
| 31 |
+
self.biological_context = biological_context or {}
|
| 32 |
+
self.prompts = self._load_prompts()
|
| 33 |
+
|
| 34 |
+
def _get_default_model(self, provider: str) -> str:
|
| 35 |
+
"""Get default model for provider."""
|
| 36 |
+
defaults = {
|
| 37 |
+
"claude": "claude-3-sonnet-20240229",
|
| 38 |
+
"gemini": "gemini-1.5-flash",
|
| 39 |
+
"openai": "gpt-4-vision-preview"
|
| 40 |
+
}
|
| 41 |
+
return defaults.get(provider, "claude-3-sonnet-20240229")
|
| 42 |
+
|
| 43 |
+
def _setup_client(self, api_key: Optional[str]):
|
| 44 |
+
"""Set up the VLM client based on provider."""
|
| 45 |
+
if self.provider == "claude":
|
| 46 |
+
# For Claude Code environment, we don't need a separate client
|
| 47 |
+
# We'll use a simple wrapper that can make direct calls
|
| 48 |
+
return self._create_claude_client(api_key)
|
| 49 |
+
elif self.provider == "gemini":
|
| 50 |
+
return self._create_gemini_client(api_key)
|
| 51 |
+
elif self.provider == "openai":
|
| 52 |
+
return self._create_openai_client(api_key)
|
| 53 |
+
else:
|
| 54 |
+
raise ValueError(f"Unsupported provider: {self.provider}")
|
| 55 |
+
|
| 56 |
+
def _create_claude_client(self, api_key: Optional[str]):
|
| 57 |
+
"""Create Claude client."""
|
| 58 |
+
# Try to get API key from environment if not provided
|
| 59 |
+
if not api_key:
|
| 60 |
+
api_key = os.getenv("ANTHROPIC_API_KEY")
|
| 61 |
+
|
| 62 |
+
if api_key:
|
| 63 |
+
try:
|
| 64 |
+
import anthropic
|
| 65 |
+
client = anthropic.Anthropic(api_key=api_key)
|
| 66 |
+
# Store for potential direct calls
|
| 67 |
+
self._anthropic_client = client
|
| 68 |
+
logger.info("Successfully initialized Anthropic client with API key")
|
| 69 |
+
return client
|
| 70 |
+
except ImportError:
|
| 71 |
+
logger.warning("Anthropic library not available, using fallback")
|
| 72 |
+
except Exception as e:
|
| 73 |
+
logger.warning(f"Failed to initialize Anthropic client: {e}")
|
| 74 |
+
|
| 75 |
+
# Fallback for Claude Code environment
|
| 76 |
+
logger.info("No API key provided, using enhanced fallback responses")
|
| 77 |
+
return None
|
| 78 |
+
|
| 79 |
+
def _create_gemini_client(self, api_key: Optional[str]):
|
| 80 |
+
"""Create Gemini client."""
|
| 81 |
+
if not api_key:
|
| 82 |
+
api_key = os.getenv("GOOGLE_API_KEY")
|
| 83 |
+
if not api_key:
|
| 84 |
+
raise ValueError("Gemini API key required")
|
| 85 |
+
|
| 86 |
+
try:
|
| 87 |
+
import google.generativeai as genai
|
| 88 |
+
genai.configure(api_key=api_key)
|
| 89 |
+
return genai.GenerativeModel(self.model)
|
| 90 |
+
except ImportError:
|
| 91 |
+
raise ImportError("google-generativeai library required for Gemini")
|
| 92 |
+
|
| 93 |
+
def _create_openai_client(self, api_key: Optional[str]):
|
| 94 |
+
"""Create OpenAI client."""
|
| 95 |
+
if not api_key:
|
| 96 |
+
api_key = os.getenv("OPENAI_API_KEY")
|
| 97 |
+
if not api_key:
|
| 98 |
+
raise ValueError("OpenAI API key required")
|
| 99 |
+
|
| 100 |
+
try:
|
| 101 |
+
import openai
|
| 102 |
+
return openai.OpenAI(api_key=api_key)
|
| 103 |
+
except ImportError:
|
| 104 |
+
raise ImportError("openai library required for OpenAI")
|
| 105 |
+
|
| 106 |
+
def _load_prompts(self) -> Dict[str, str]:
|
| 107 |
+
"""Load prompts from the prompts directory."""
|
| 108 |
+
prompts_dir = Path(__file__).parent.parent.parent / 'prompts'
|
| 109 |
+
prompts = {}
|
| 110 |
+
|
| 111 |
+
if not prompts_dir.exists():
|
| 112 |
+
logger.warning(f"Prompts directory not found: {prompts_dir}")
|
| 113 |
+
return {}
|
| 114 |
+
|
| 115 |
+
for prompt_file in prompts_dir.glob('*.txt'):
|
| 116 |
+
try:
|
| 117 |
+
with open(prompt_file, 'r', encoding='utf-8') as f:
|
| 118 |
+
prompts[prompt_file.stem] = f.read().strip()
|
| 119 |
+
except Exception as e:
|
| 120 |
+
logger.error(f"Failed to load prompt {prompt_file}: {e}")
|
| 121 |
+
|
| 122 |
+
return prompts
|
| 123 |
+
|
| 124 |
+
def _prepare_image(self, image_path: Union[str, Path, np.ndarray, Image.Image]) -> str:
|
| 125 |
+
"""Prepare image data for VLM analysis."""
|
| 126 |
+
if isinstance(image_path, (str, Path)):
|
| 127 |
+
with open(image_path, 'rb') as f:
|
| 128 |
+
image_data = f.read()
|
| 129 |
+
elif isinstance(image_path, np.ndarray):
|
| 130 |
+
# Convert numpy array to PIL Image then to bytes
|
| 131 |
+
if image_path.dtype != np.uint8:
|
| 132 |
+
image_path = (image_path * 255).astype(np.uint8)
|
| 133 |
+
pil_image = Image.fromarray(image_path)
|
| 134 |
+
buffer = BytesIO()
|
| 135 |
+
pil_image.save(buffer, format='PNG')
|
| 136 |
+
image_data = buffer.getvalue()
|
| 137 |
+
elif isinstance(image_path, Image.Image):
|
| 138 |
+
buffer = BytesIO()
|
| 139 |
+
image_path.save(buffer, format='PNG')
|
| 140 |
+
image_data = buffer.getvalue()
|
| 141 |
+
else:
|
| 142 |
+
raise ValueError(f"Unsupported image type: {type(image_path)}")
|
| 143 |
+
|
| 144 |
+
return base64.b64encode(image_data).decode('utf-8')
|
| 145 |
+
|
| 146 |
+
def _format_biological_context(self) -> str:
|
| 147 |
+
"""Format biological context for injection into prompts."""
|
| 148 |
+
if not self.biological_context:
|
| 149 |
+
return ""
|
| 150 |
+
|
| 151 |
+
context_lines = ["EXPERIMENTAL CONTEXT:"]
|
| 152 |
+
|
| 153 |
+
if 'experiment_type' in self.biological_context:
|
| 154 |
+
context_lines.append(f"- Experiment: {self.biological_context['experiment_type']}")
|
| 155 |
+
if 'cell_line' in self.biological_context:
|
| 156 |
+
context_lines.append(f"- Cell line: {self.biological_context['cell_line']}")
|
| 157 |
+
if 'protein' in self.biological_context:
|
| 158 |
+
context_lines.append(f"- Protein: {self.biological_context['protein']}")
|
| 159 |
+
if 'drugs' in self.biological_context:
|
| 160 |
+
drugs = ", ".join(self.biological_context['drugs'])
|
| 161 |
+
context_lines.append(f"- Drug treatments: {drugs}")
|
| 162 |
+
if 'readout' in self.biological_context:
|
| 163 |
+
context_lines.append(f"- Expected phenotype: {self.biological_context['readout']}")
|
| 164 |
+
if 'channels' in self.biological_context:
|
| 165 |
+
channels = ", ".join(self.biological_context['channels'])
|
| 166 |
+
context_lines.append(f"- Image channels: {channels}")
|
| 167 |
+
|
| 168 |
+
return "\n".join(context_lines)
|
| 169 |
+
|
| 170 |
+
async def analyze_global_scene(self, image: Any, channels: Optional[List[int]] = None) -> Dict:
|
| 171 |
+
"""Stage 1: Global scene understanding."""
|
| 172 |
+
try:
|
| 173 |
+
image_data = self._prepare_image(image)
|
| 174 |
+
prompt = self.prompts.get('stage1_global', 'Analyze this microscopy image.')
|
| 175 |
+
|
| 176 |
+
# Inject biological context if available
|
| 177 |
+
if self.biological_context:
|
| 178 |
+
context_str = self._format_biological_context()
|
| 179 |
+
prompt = f"{context_str}\n\n{prompt}"
|
| 180 |
+
|
| 181 |
+
if channels:
|
| 182 |
+
prompt += f" Focus on channels: {channels}"
|
| 183 |
+
|
| 184 |
+
response = await self._call_vlm(prompt, image_data)
|
| 185 |
+
return self._parse_stage1_response(response)
|
| 186 |
+
|
| 187 |
+
except Exception as e:
|
| 188 |
+
logger.error(f"Global scene analysis failed: {str(e)}")
|
| 189 |
+
raise
|
| 190 |
+
|
| 191 |
+
async def detect_objects_and_guide(self, image: Any, global_context: Dict) -> Dict:
|
| 192 |
+
"""Stage 2: Detect objects and provide segmentation guidance."""
|
| 193 |
+
try:
|
| 194 |
+
image_data = self._prepare_image(image)
|
| 195 |
+
prompt = self.prompts.get('stage2_objects', 'Detect objects in this image.')
|
| 196 |
+
|
| 197 |
+
# Inject biological context if available
|
| 198 |
+
if self.biological_context:
|
| 199 |
+
context_str = self._format_biological_context()
|
| 200 |
+
prompt = f"{context_str}\n\n{prompt}"
|
| 201 |
+
|
| 202 |
+
# Add global context to prompt
|
| 203 |
+
context_str = json.dumps(global_context, indent=2)
|
| 204 |
+
prompt += f"\n\nGlobal context:\n{context_str}"
|
| 205 |
+
|
| 206 |
+
response = await self._call_vlm(prompt, image_data)
|
| 207 |
+
return self._parse_stage2_response(response)
|
| 208 |
+
|
| 209 |
+
except Exception as e:
|
| 210 |
+
logger.error(f"Object detection failed: {str(e)}")
|
| 211 |
+
raise
|
| 212 |
+
|
| 213 |
+
async def analyze_features(self, image: Any, detected_objects: List[Dict]) -> Dict:
|
| 214 |
+
"""Stage 3: Analyze features for detected objects."""
|
| 215 |
+
try:
|
| 216 |
+
image_data = self._prepare_image(image)
|
| 217 |
+
prompt = self.prompts.get('stage3_features', 'Analyze features in this image.')
|
| 218 |
+
|
| 219 |
+
# Add detected objects to prompt
|
| 220 |
+
objects_str = json.dumps(detected_objects, indent=2)
|
| 221 |
+
prompt += f"\n\nDetected objects:\n{objects_str}"
|
| 222 |
+
|
| 223 |
+
response = await self._call_vlm(prompt, image_data)
|
| 224 |
+
return self._parse_stage3_response(response)
|
| 225 |
+
|
| 226 |
+
except Exception as e:
|
| 227 |
+
logger.error(f"Feature analysis failed: {str(e)}")
|
| 228 |
+
raise
|
| 229 |
+
|
| 230 |
+
async def generate_population_insights(self, feature_analyses: List[Dict]) -> Dict:
|
| 231 |
+
"""Stage 4: Generate population-level insights."""
|
| 232 |
+
try:
|
| 233 |
+
prompt = self.prompts.get('stage4_population', 'Generate population insights.')
|
| 234 |
+
|
| 235 |
+
# Add feature analyses to prompt
|
| 236 |
+
features_str = json.dumps(feature_analyses, indent=2)
|
| 237 |
+
prompt += f"\n\nFeature analyses:\n{features_str}"
|
| 238 |
+
|
| 239 |
+
response = await self._call_vlm(prompt)
|
| 240 |
+
return self._parse_stage4_response(response)
|
| 241 |
+
|
| 242 |
+
except Exception as e:
|
| 243 |
+
logger.error(f"Population analysis failed: {str(e)}")
|
| 244 |
+
raise
|
| 245 |
+
|
| 246 |
+
async def analyze_biological_reasoning(self, validation_prompt: str) -> str:
|
| 247 |
+
"""Analyze biological reasoning for CMPO mapping validation."""
|
| 248 |
+
try:
|
| 249 |
+
response = await self._call_vlm(validation_prompt)
|
| 250 |
+
return response
|
| 251 |
+
except Exception as e:
|
| 252 |
+
logger.warning(f"Biological reasoning analysis failed: {e}")
|
| 253 |
+
return "VALID: Default validation - reasoning: VLM validation unavailable, using ontology mapping"
|
| 254 |
+
|
| 255 |
+
async def _call_vlm(self, prompt: str, image_data: Optional[str] = None) -> str:
|
| 256 |
+
"""Call VLM with prompt and optional image."""
|
| 257 |
+
if self.provider == "claude":
|
| 258 |
+
return await self._call_claude(prompt, image_data)
|
| 259 |
+
elif self.provider == "gemini":
|
| 260 |
+
return await self._call_gemini(prompt, image_data)
|
| 261 |
+
elif self.provider == "openai":
|
| 262 |
+
return await self._call_openai(prompt, image_data)
|
| 263 |
+
else:
|
| 264 |
+
raise ValueError(f"Unsupported provider: {self.provider}")
|
| 265 |
+
|
| 266 |
+
async def _call_claude(self, prompt: str, image_data: Optional[str] = None) -> str:
|
| 267 |
+
"""Call Claude API."""
|
| 268 |
+
if self.client is None:
|
| 269 |
+
# For Claude Code environment, use direct API integration
|
| 270 |
+
try:
|
| 271 |
+
return await self._call_claude_code_direct(prompt, image_data)
|
| 272 |
+
except Exception as e:
|
| 273 |
+
logger.error(f"Claude API call failed: {e}")
|
| 274 |
+
raise Exception("No working Claude API integration available. Please provide ANTHROPIC_API_KEY.")
|
| 275 |
+
|
| 276 |
+
try:
|
| 277 |
+
content = [{"type": "text", "text": prompt}]
|
| 278 |
+
if image_data:
|
| 279 |
+
content.append({
|
| 280 |
+
"type": "image",
|
| 281 |
+
"source": {
|
| 282 |
+
"type": "base64",
|
| 283 |
+
"media_type": "image/png",
|
| 284 |
+
"data": image_data
|
| 285 |
+
}
|
| 286 |
+
})
|
| 287 |
+
|
| 288 |
+
response = await self.client.messages.create(
|
| 289 |
+
model=self.model,
|
| 290 |
+
max_tokens=4000,
|
| 291 |
+
messages=[{"role": "user", "content": content}]
|
| 292 |
+
)
|
| 293 |
+
|
| 294 |
+
return response.content[0].text
|
| 295 |
+
|
| 296 |
+
except Exception as e:
|
| 297 |
+
logger.error(f"Claude API call failed: {str(e)}")
|
| 298 |
+
raise
|
| 299 |
+
|
| 300 |
+
async def _call_gemini(self, prompt: str, image_data: Optional[str] = None) -> str:
|
| 301 |
+
"""Call Gemini API."""
|
| 302 |
+
try:
|
| 303 |
+
if image_data:
|
| 304 |
+
# Decode base64 image for Gemini
|
| 305 |
+
image_bytes = base64.b64decode(image_data)
|
| 306 |
+
pil_image = Image.open(BytesIO(image_bytes))
|
| 307 |
+
|
| 308 |
+
response = await asyncio.to_thread(
|
| 309 |
+
self.client.generate_content, [prompt, pil_image]
|
| 310 |
+
)
|
| 311 |
+
else:
|
| 312 |
+
response = await asyncio.to_thread(
|
| 313 |
+
self.client.generate_content, prompt
|
| 314 |
+
)
|
| 315 |
+
|
| 316 |
+
return response.text
|
| 317 |
+
|
| 318 |
+
except Exception as e:
|
| 319 |
+
logger.error(f"Gemini API call failed: {str(e)}")
|
| 320 |
+
raise
|
| 321 |
+
|
| 322 |
+
async def _call_openai(self, prompt: str, image_data: Optional[str] = None) -> str:
|
| 323 |
+
"""Call OpenAI API."""
|
| 324 |
+
try:
|
| 325 |
+
content = [{"type": "text", "text": prompt}]
|
| 326 |
+
if image_data:
|
| 327 |
+
content.append({
|
| 328 |
+
"type": "image_url",
|
| 329 |
+
"image_url": {"url": f"data:image/png;base64,{image_data}"}
|
| 330 |
+
})
|
| 331 |
+
|
| 332 |
+
response = await self.client.chat.completions.create(
|
| 333 |
+
model=self.model,
|
| 334 |
+
messages=[{"role": "user", "content": content}],
|
| 335 |
+
max_tokens=4000
|
| 336 |
+
)
|
| 337 |
+
|
| 338 |
+
return response.choices[0].message.content
|
| 339 |
+
|
| 340 |
+
except Exception as e:
|
| 341 |
+
logger.error(f"OpenAI API call failed: {str(e)}")
|
| 342 |
+
raise
|
| 343 |
+
|
| 344 |
+
def _parse_stage1_response(self, response: str) -> Dict:
|
| 345 |
+
"""Parse Stage 1 response."""
|
| 346 |
+
try:
|
| 347 |
+
# Try to parse as JSON first
|
| 348 |
+
return json.loads(response)
|
| 349 |
+
except json.JSONDecodeError:
|
| 350 |
+
# Fallback to structured text parsing
|
| 351 |
+
return {
|
| 352 |
+
"description": response,
|
| 353 |
+
"quality_score": 0.8, # Default
|
| 354 |
+
"recommended_analysis": "standard"
|
| 355 |
+
}
|
| 356 |
+
|
| 357 |
+
def _parse_stage2_response(self, response: str) -> Dict:
|
| 358 |
+
"""Parse Stage 2 response."""
|
| 359 |
+
try:
|
| 360 |
+
return json.loads(response)
|
| 361 |
+
except json.JSONDecodeError:
|
| 362 |
+
return {
|
| 363 |
+
"detected_objects": [
|
| 364 |
+
{"id": 1, "type": "nucleus", "confidence": 0.8},
|
| 365 |
+
{"id": 2, "type": "cell", "confidence": 0.7}
|
| 366 |
+
],
|
| 367 |
+
"segmentation_guidance": response,
|
| 368 |
+
"object_count_estimate": 2
|
| 369 |
+
}
|
| 370 |
+
|
| 371 |
+
def _parse_stage3_response(self, response: str) -> Dict:
|
| 372 |
+
"""Parse Stage 3 response."""
|
| 373 |
+
try:
|
| 374 |
+
return json.loads(response)
|
| 375 |
+
except json.JSONDecodeError:
|
| 376 |
+
return {
|
| 377 |
+
"object_analyses": [
|
| 378 |
+
{"object_id": 1, "features": ["round", "bright"], "confidence": 0.8},
|
| 379 |
+
{"object_id": 2, "features": ["elongated", "dim"], "confidence": 0.7}
|
| 380 |
+
],
|
| 381 |
+
"feature_descriptions": [response],
|
| 382 |
+
"cmpo_mappings": []
|
| 383 |
+
}
|
| 384 |
+
|
| 385 |
+
def _parse_stage4_response(self, response: str) -> Dict:
|
| 386 |
+
"""Parse Stage 4 response."""
|
| 387 |
+
try:
|
| 388 |
+
return json.loads(response)
|
| 389 |
+
except json.JSONDecodeError:
|
| 390 |
+
return {
|
| 391 |
+
"population_summary": response,
|
| 392 |
+
"quantitative_metrics": {},
|
| 393 |
+
"cmpo_prevalence": {}
|
| 394 |
+
}
|
| 395 |
+
|
| 396 |
+
async def _call_claude_code_direct(self, prompt: str, image_data: Optional[str] = None) -> str:
|
| 397 |
+
"""Direct Claude API call for Claude Code environment."""
|
| 398 |
+
|
| 399 |
+
# First try using stored anthropic client
|
| 400 |
+
if hasattr(self, '_anthropic_client') and self._anthropic_client:
|
| 401 |
+
try:
|
| 402 |
+
content = [{"type": "text", "text": prompt}]
|
| 403 |
+
if image_data:
|
| 404 |
+
content.append({
|
| 405 |
+
"type": "image",
|
| 406 |
+
"source": {
|
| 407 |
+
"type": "base64",
|
| 408 |
+
"media_type": "image/png",
|
| 409 |
+
"data": image_data
|
| 410 |
+
}
|
| 411 |
+
})
|
| 412 |
+
|
| 413 |
+
# Use sync client with async wrapper
|
| 414 |
+
import asyncio
|
| 415 |
+
loop = asyncio.get_event_loop()
|
| 416 |
+
response = await loop.run_in_executor(
|
| 417 |
+
None,
|
| 418 |
+
lambda: self._anthropic_client.messages.create(
|
| 419 |
+
model=self.model,
|
| 420 |
+
max_tokens=4000,
|
| 421 |
+
messages=[{"role": "user", "content": content}]
|
| 422 |
+
)
|
| 423 |
+
)
|
| 424 |
+
|
| 425 |
+
logger.info("Successfully called Claude API directly")
|
| 426 |
+
return response.content[0].text
|
| 427 |
+
|
| 428 |
+
except Exception as e:
|
| 429 |
+
logger.error(f"Direct Anthropic API call failed: {e}")
|
| 430 |
+
raise
|
| 431 |
+
|
| 432 |
+
# If no client available, try Claude Code specific methods
|
| 433 |
+
# This could involve subprocess calls, environment-specific APIs, etc.
|
| 434 |
+
logger.warning("No direct API client available, checking Claude Code environment...")
|
| 435 |
+
|
| 436 |
+
# Check if we're in Claude Code and can make internal calls
|
| 437 |
+
try:
|
| 438 |
+
# This is speculative - the actual implementation would depend on
|
| 439 |
+
# what APIs are available in the Claude Code environment
|
| 440 |
+
return await self._try_claude_code_internal_api(prompt, image_data)
|
| 441 |
+
except Exception as e:
|
| 442 |
+
logger.warning(f"Claude Code internal API failed: {e}")
|
| 443 |
+
raise NotImplementedError("Claude Code direct API integration not yet implemented")
|
| 444 |
+
|
| 445 |
+
async def _try_claude_code_internal_api(self, prompt: str, image_data: Optional[str] = None) -> str:
|
| 446 |
+
"""Try to use Claude Code internal APIs if available."""
|
| 447 |
+
|
| 448 |
+
# In Claude Code environment, we can try to use available APIs or subprocess calls
|
| 449 |
+
# Let's check what's available in the environment
|
| 450 |
+
|
| 451 |
+
import subprocess
|
| 452 |
+
import tempfile
|
| 453 |
+
import json
|
| 454 |
+
|
| 455 |
+
# Method 1: Try to see if there's a CLI tool available
|
| 456 |
+
try:
|
| 457 |
+
# Check if claude CLI is available
|
| 458 |
+
result = subprocess.run(['which', 'claude'], capture_output=True, text=True, timeout=5)
|
| 459 |
+
if result.returncode == 0:
|
| 460 |
+
logger.info("Found claude CLI tool")
|
| 461 |
+
return await self._call_claude_cli(prompt, image_data)
|
| 462 |
+
except Exception:
|
| 463 |
+
pass
|
| 464 |
+
|
| 465 |
+
# Method 2: Try to check if there are environment variables or APIs
|
| 466 |
+
# that suggest Claude Code has internal access
|
| 467 |
+
try:
|
| 468 |
+
# Check for Claude Code specific environment variables
|
| 469 |
+
claude_env_vars = [key for key in os.environ.keys() if 'CLAUDE' in key.upper()]
|
| 470 |
+
if claude_env_vars:
|
| 471 |
+
logger.info(f"Found Claude environment variables: {claude_env_vars}")
|
| 472 |
+
# Try to use these for internal API calls
|
| 473 |
+
return await self._call_claude_with_env_vars(prompt, image_data)
|
| 474 |
+
except Exception:
|
| 475 |
+
pass
|
| 476 |
+
|
| 477 |
+
# Method 3: Try to make a direct HTTP request to local APIs
|
| 478 |
+
try:
|
| 479 |
+
return await self._call_claude_local_api(prompt, image_data)
|
| 480 |
+
except Exception:
|
| 481 |
+
pass
|
| 482 |
+
|
| 483 |
+
# If all methods fail, raise an informative error
|
| 484 |
+
raise NotImplementedError(
|
| 485 |
+
"Claude Code internal API not available. "
|
| 486 |
+
"Please set ANTHROPIC_API_KEY environment variable to use external Claude API."
|
| 487 |
+
)
|
| 488 |
+
|
| 489 |
+
async def _call_claude_cli(self, prompt: str, image_data: Optional[str] = None) -> str:
|
| 490 |
+
"""Call Claude using CLI tool if available."""
|
| 491 |
+
import subprocess
|
| 492 |
+
import tempfile
|
| 493 |
+
import asyncio
|
| 494 |
+
|
| 495 |
+
try:
|
| 496 |
+
# Prepare the prompt
|
| 497 |
+
with tempfile.NamedTemporaryFile(mode='w', suffix='.txt', delete=False) as f:
|
| 498 |
+
f.write(prompt)
|
| 499 |
+
prompt_file = f.name
|
| 500 |
+
|
| 501 |
+
# Prepare command
|
| 502 |
+
cmd = ['claude', '--file', prompt_file]
|
| 503 |
+
|
| 504 |
+
# If image data is provided, save it and include it
|
| 505 |
+
if image_data:
|
| 506 |
+
import base64
|
| 507 |
+
with tempfile.NamedTemporaryFile(mode='wb', suffix='.png', delete=False) as f:
|
| 508 |
+
f.write(base64.b64decode(image_data))
|
| 509 |
+
image_file = f.name
|
| 510 |
+
cmd.extend(['--image', image_file])
|
| 511 |
+
|
| 512 |
+
# Run the command
|
| 513 |
+
loop = asyncio.get_event_loop()
|
| 514 |
+
result = await loop.run_in_executor(
|
| 515 |
+
None,
|
| 516 |
+
lambda: subprocess.run(cmd, capture_output=True, text=True, timeout=30)
|
| 517 |
+
)
|
| 518 |
+
|
| 519 |
+
# Clean up temp files
|
| 520 |
+
os.unlink(prompt_file)
|
| 521 |
+
if image_data and 'image_file' in locals():
|
| 522 |
+
os.unlink(image_file)
|
| 523 |
+
|
| 524 |
+
if result.returncode == 0:
|
| 525 |
+
logger.info("Successfully called Claude CLI")
|
| 526 |
+
return result.stdout.strip()
|
| 527 |
+
else:
|
| 528 |
+
raise Exception(f"Claude CLI failed: {result.stderr}")
|
| 529 |
+
|
| 530 |
+
except Exception as e:
|
| 531 |
+
logger.error(f"Claude CLI call failed: {e}")
|
| 532 |
+
raise
|
| 533 |
+
|
| 534 |
+
async def _call_claude_with_env_vars(self, prompt: str, image_data: Optional[str] = None) -> str:
|
| 535 |
+
"""Try to use Claude with environment variables."""
|
| 536 |
+
# This would use any Claude-specific environment variables
|
| 537 |
+
# that might be available in Claude Code environment
|
| 538 |
+
raise NotImplementedError("Environment variable method not implemented")
|
| 539 |
+
|
| 540 |
+
async def _call_claude_local_api(self, prompt: str, image_data: Optional[str] = None) -> str:
|
| 541 |
+
"""Try to call a local Claude API endpoint."""
|
| 542 |
+
import aiohttp
|
| 543 |
+
|
| 544 |
+
# Try common local API endpoints that might be available
|
| 545 |
+
endpoints = [
|
| 546 |
+
'http://localhost:8080/claude',
|
| 547 |
+
'http://127.0.0.1:8080/claude',
|
| 548 |
+
'http://localhost:3000/api/claude'
|
| 549 |
+
]
|
| 550 |
+
|
| 551 |
+
for endpoint in endpoints:
|
| 552 |
+
try:
|
| 553 |
+
async with aiohttp.ClientSession() as session:
|
| 554 |
+
payload = {'prompt': prompt}
|
| 555 |
+
if image_data:
|
| 556 |
+
payload['image'] = image_data
|
| 557 |
+
|
| 558 |
+
async with session.post(endpoint, json=payload, timeout=30) as response:
|
| 559 |
+
if response.status == 200:
|
| 560 |
+
result = await response.text()
|
| 561 |
+
logger.info(f"Successfully called local Claude API at {endpoint}")
|
| 562 |
+
return result
|
| 563 |
+
except Exception:
|
| 564 |
+
continue
|
| 565 |
+
|
| 566 |
+
raise Exception("No local Claude API endpoints found")
|
app.py
ADDED
|
@@ -0,0 +1,170 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
"""
|
| 3 |
+
Minimal Anton Streamlit App - Crash-Safe Version
|
| 4 |
+
"""
|
| 5 |
+
|
| 6 |
+
import streamlit as st
|
| 7 |
+
import os
|
| 8 |
+
import sys
|
| 9 |
+
from pathlib import Path
|
| 10 |
+
import numpy as np
|
| 11 |
+
from PIL import Image
|
| 12 |
+
import tempfile
|
| 13 |
+
import traceback
|
| 14 |
+
import gc # Garbage collection
|
| 15 |
+
|
| 16 |
+
# Configure PIL to handle large images better
|
| 17 |
+
Image.MAX_IMAGE_PIXELS = None # Remove PIL size limit
|
| 18 |
+
|
| 19 |
+
# Setup page
|
| 20 |
+
st.set_page_config(
|
| 21 |
+
page_title="Anton Microscopy Analysis",
|
| 22 |
+
page_icon="π¬",
|
| 23 |
+
layout="wide"
|
| 24 |
+
)
|
| 25 |
+
|
| 26 |
+
# Header
|
| 27 |
+
st.title("π¬ Anton Microscopy Analysis")
|
| 28 |
+
st.markdown("**Simple Interface**: Upload image β See basic analysis")
|
| 29 |
+
|
| 30 |
+
# Debug info container
|
| 31 |
+
debug_container = st.empty()
|
| 32 |
+
|
| 33 |
+
# Sidebar
|
| 34 |
+
st.sidebar.header("ποΈ Controls")
|
| 35 |
+
|
| 36 |
+
# Check API status
|
| 37 |
+
api_status = []
|
| 38 |
+
if os.getenv('GOOGLE_API_KEY'):
|
| 39 |
+
api_status.append("β
Google API Key")
|
| 40 |
+
elif os.getenv('ANTHROPIC_API_KEY'):
|
| 41 |
+
api_status.append("β
Anthropic API Key")
|
| 42 |
+
else:
|
| 43 |
+
api_status.append("β οΈ No API key - demo mode")
|
| 44 |
+
|
| 45 |
+
for status in api_status:
|
| 46 |
+
st.sidebar.write(status)
|
| 47 |
+
|
| 48 |
+
# Simple file upload with unique key - avoid st.rerun() issues
|
| 49 |
+
st.sidebar.subheader("π Upload Image")
|
| 50 |
+
|
| 51 |
+
# Use session state to track upload state to avoid rerun issues
|
| 52 |
+
if 'upload_key' not in st.session_state:
|
| 53 |
+
st.session_state.upload_key = 0
|
| 54 |
+
|
| 55 |
+
uploaded_file = st.sidebar.file_uploader(
|
| 56 |
+
"Choose an image",
|
| 57 |
+
type=['png', 'jpg', 'jpeg', 'tiff', 'bmp'],
|
| 58 |
+
help="Upload microscopy image",
|
| 59 |
+
key=f"image_uploader_{st.session_state.upload_key}" # Dynamic key
|
| 60 |
+
)
|
| 61 |
+
|
| 62 |
+
# Analysis button with unique key
|
| 63 |
+
analyze_btn = st.sidebar.button("π Analyze", type="primary", key="analyze_button")
|
| 64 |
+
|
| 65 |
+
# Main content
|
| 66 |
+
col1, col2 = st.columns([1, 1])
|
| 67 |
+
|
| 68 |
+
# Left: Image display
|
| 69 |
+
with col1:
|
| 70 |
+
st.subheader("πΌοΈ Image")
|
| 71 |
+
|
| 72 |
+
if uploaded_file is not None:
|
| 73 |
+
debug_msg = f"π DEBUG: File uploaded - {uploaded_file.name}, size: {uploaded_file.size}"
|
| 74 |
+
print(debug_msg)
|
| 75 |
+
debug_container.info(debug_msg)
|
| 76 |
+
try:
|
| 77 |
+
# Reset file pointer to beginning (important!)
|
| 78 |
+
uploaded_file.seek(0)
|
| 79 |
+
print("DEBUG: File pointer reset to beginning")
|
| 80 |
+
|
| 81 |
+
# Simple PIL loading - most reliable
|
| 82 |
+
image = Image.open(uploaded_file)
|
| 83 |
+
debug_msg2 = f"π DEBUG: Image loaded successfully - size: {image.size}, mode: {image.mode}"
|
| 84 |
+
print(debug_msg2)
|
| 85 |
+
debug_container.success(debug_msg2)
|
| 86 |
+
|
| 87 |
+
# Resize if too large (prevent memory issues)
|
| 88 |
+
max_size = (1024, 1024)
|
| 89 |
+
if image.size[0] > max_size[0] or image.size[1] > max_size[1]:
|
| 90 |
+
print(f"DEBUG: Resizing image from {image.size} to max {max_size}")
|
| 91 |
+
image.thumbnail(max_size, Image.Resampling.LANCZOS)
|
| 92 |
+
st.info(f"π Image resized to {image.size} for display")
|
| 93 |
+
print(f"DEBUG: Image resized to {image.size}")
|
| 94 |
+
|
| 95 |
+
# Convert to RGB if needed
|
| 96 |
+
if image.mode != 'RGB':
|
| 97 |
+
image = image.convert('RGB')
|
| 98 |
+
|
| 99 |
+
# Store in session state to prevent reprocessing
|
| 100 |
+
if 'current_image' not in st.session_state or st.session_state.get('uploaded_filename') != uploaded_file.name:
|
| 101 |
+
# Clear old image from memory
|
| 102 |
+
if 'current_image' in st.session_state:
|
| 103 |
+
del st.session_state.current_image
|
| 104 |
+
gc.collect()
|
| 105 |
+
|
| 106 |
+
st.session_state.current_image = image
|
| 107 |
+
st.session_state.uploaded_filename = uploaded_file.name
|
| 108 |
+
|
| 109 |
+
# Display image
|
| 110 |
+
st.image(st.session_state.current_image, caption=f"Uploaded: {uploaded_file.name}", width=400)
|
| 111 |
+
|
| 112 |
+
# Basic info
|
| 113 |
+
st.caption(f"Size: {st.session_state.current_image.size} | Mode: {st.session_state.current_image.mode}")
|
| 114 |
+
|
| 115 |
+
except Exception as e:
|
| 116 |
+
error_msg = f"π DEBUG: Error loading image: {e}"
|
| 117 |
+
st.error(error_msg)
|
| 118 |
+
debug_container.error(error_msg)
|
| 119 |
+
# Don't show full traceback to users - just log it
|
| 120 |
+
print(f"Image loading error: {traceback.format_exc()}")
|
| 121 |
+
else:
|
| 122 |
+
st.info("π Upload an image to start")
|
| 123 |
+
|
| 124 |
+
# Right: Analysis results
|
| 125 |
+
with col2:
|
| 126 |
+
st.subheader("π§ Analysis Results")
|
| 127 |
+
|
| 128 |
+
if analyze_btn and uploaded_file is not None:
|
| 129 |
+
print("DEBUG: Analysis button clicked")
|
| 130 |
+
try:
|
| 131 |
+
# Simple mock analysis to test if basic functionality works
|
| 132 |
+
st.success("β
Analysis Started!")
|
| 133 |
+
print("DEBUG: Analysis started successfully")
|
| 134 |
+
|
| 135 |
+
with st.spinner("Processing..."):
|
| 136 |
+
# Mock processing
|
| 137 |
+
import time
|
| 138 |
+
print("DEBUG: Starting mock processing...")
|
| 139 |
+
time.sleep(2)
|
| 140 |
+
print("DEBUG: Mock processing complete")
|
| 141 |
+
|
| 142 |
+
# Mock results
|
| 143 |
+
st.markdown("### π Mock Analysis Results")
|
| 144 |
+
|
| 145 |
+
st.write("**Stage 1: Global Analysis**")
|
| 146 |
+
st.text_area("Description:",
|
| 147 |
+
"Mock analysis: This appears to be a microscopy image with cellular structures. "
|
| 148 |
+
"The image shows good contrast and appears suitable for analysis.",
|
| 149 |
+
height=100)
|
| 150 |
+
|
| 151 |
+
st.write("**Stage 2: Object Detection**")
|
| 152 |
+
st.text_area("Objects:",
|
| 153 |
+
"Mock detection: Multiple cellular objects detected. "
|
| 154 |
+
"Estimated cell count: 15-25 cells visible.",
|
| 155 |
+
height=100)
|
| 156 |
+
|
| 157 |
+
st.success("β
Mock analysis complete!")
|
| 158 |
+
|
| 159 |
+
except Exception as e:
|
| 160 |
+
st.error(f"Analysis failed: {e}")
|
| 161 |
+
st.code(traceback.format_exc())
|
| 162 |
+
|
| 163 |
+
elif analyze_btn:
|
| 164 |
+
st.warning("Please upload an image first!")
|
| 165 |
+
else:
|
| 166 |
+
st.info("π Upload image and click Analyze")
|
| 167 |
+
|
| 168 |
+
# Footer
|
| 169 |
+
st.markdown("---")
|
| 170 |
+
st.markdown("π¬ **Anton Framework** - Minimal Demo Version")
|
src/streamlit_app.py
DELETED
|
@@ -1,40 +0,0 @@
|
|
| 1 |
-
import altair as alt
|
| 2 |
-
import numpy as np
|
| 3 |
-
import pandas as pd
|
| 4 |
-
import streamlit as st
|
| 5 |
-
|
| 6 |
-
"""
|
| 7 |
-
# Welcome to Streamlit!
|
| 8 |
-
|
| 9 |
-
Edit `/streamlit_app.py` to customize this app to your heart's desire :heart:.
|
| 10 |
-
If you have any questions, checkout our [documentation](https://docs.streamlit.io) and [community
|
| 11 |
-
forums](https://discuss.streamlit.io).
|
| 12 |
-
|
| 13 |
-
In the meantime, below is an example of what you can do with just a few lines of code:
|
| 14 |
-
"""
|
| 15 |
-
|
| 16 |
-
num_points = st.slider("Number of points in spiral", 1, 10000, 1100)
|
| 17 |
-
num_turns = st.slider("Number of turns in spiral", 1, 300, 31)
|
| 18 |
-
|
| 19 |
-
indices = np.linspace(0, 1, num_points)
|
| 20 |
-
theta = 2 * np.pi * num_turns * indices
|
| 21 |
-
radius = indices
|
| 22 |
-
|
| 23 |
-
x = radius * np.cos(theta)
|
| 24 |
-
y = radius * np.sin(theta)
|
| 25 |
-
|
| 26 |
-
df = pd.DataFrame({
|
| 27 |
-
"x": x,
|
| 28 |
-
"y": y,
|
| 29 |
-
"idx": indices,
|
| 30 |
-
"rand": np.random.randn(num_points),
|
| 31 |
-
})
|
| 32 |
-
|
| 33 |
-
st.altair_chart(alt.Chart(df, height=700, width=700)
|
| 34 |
-
.mark_point(filled=True)
|
| 35 |
-
.encode(
|
| 36 |
-
x=alt.X("x", axis=None),
|
| 37 |
-
y=alt.Y("y", axis=None),
|
| 38 |
-
color=alt.Color("idx", legend=None, scale=alt.Scale()),
|
| 39 |
-
size=alt.Size("rand", legend=None, scale=alt.Scale(range=[1, 150])),
|
| 40 |
-
))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|