pskeshu commited on
Commit
b329e55
·
1 Parent(s): 4012c3c

fixes bug

Browse files
CLAUDE.md ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Anton Microscopy Project Memory
2
+
3
+ ## VLM Integration Options
4
+
5
+ ### Current Implementation
6
+ - Uses **Google Gemini 1.5 Flash** via `google-generativeai` library
7
+ - Authentication: `GOOGLE_API_KEY` environment variable
8
+ - Location: `/anton/vlm/interface.py:79-91`
9
+ - Supports multimodal microscopy image analysis
10
+
11
+ ### Hugging Face VLM Alternatives
12
+
13
+ #### Open Source Models (free, run locally):
14
+ - `microsoft/kosmos-2-patch14-224` - Good for object detection
15
+ - `Salesforce/blip2-opt-2.7b` - Image captioning and QA
16
+ - `llava-hf/llava-1.5-7b-hf` - Strong multimodal reasoning
17
+
18
+ #### Hugging Face Inference API (hosted):
19
+ - `meta-llama/Llama-3.2-11B-Vision-Instruct`
20
+ - `microsoft/Phi-3.5-vision-instruct`
21
+ - `Qwen/Qwen2-VL-7B-Instruct`
22
+
23
+ #### Benefits of switching to HF VLMs:
24
+ - No API costs for local models
25
+ - Better privacy (data stays local)
26
+ - More control over model behavior
27
+ - Can fine-tune for microscopy-specific tasks
28
+
29
+ ### Integration Points
30
+ - Main VLM interface: `/anton/vlm/interface.py`
31
+ - Pipeline integration: `/anton/core/pipeline.py:23-28`
32
+ - UI configuration: `/app.py:57-69`
33
+ - Dependencies: `requirements.txt:8` (google-generativeai)
anton/cmpo/ontology.py CHANGED
@@ -10,7 +10,13 @@ import logging
10
  class CMPOOntology:
11
  """Manage CMPO ontology data and provide lookup functionality"""
12
 
13
- def __init__(self, data_path="data/cmpo.json", cache_path="data/cmpo_cache.pkl"):
 
 
 
 
 
 
14
  self.data_path = Path(data_path)
15
  self.cache_path = Path(cache_path)
16
  self.ontology = {}
@@ -20,6 +26,23 @@ class CMPOOntology:
20
 
21
  self._load_ontology()
22
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
23
  def _load_ontology(self):
24
  """Load CMPO ontology from JSON file or download if needed"""
25
  if self.data_path.exists():
@@ -64,10 +87,7 @@ class CMPOOntology:
64
  ontology_data = self._parse_obo_content(response.text)
65
 
66
  # Save processed data
67
- self.data_path.parent.mkdir(parents=True, exist_ok=True)
68
- with open(self.data_path, 'w') as f:
69
- json.dump(ontology_data, f, indent=2)
70
-
71
  self.ontology = ontology_data
72
  logging.info(f"Successfully loaded {len(ontology_data)} CMPO terms")
73
 
@@ -178,10 +198,7 @@ class CMPOOntology:
178
  page += 1
179
 
180
  # Save to file
181
- self.data_path.parent.mkdir(parents=True, exist_ok=True)
182
- with open(self.data_path, 'w') as f:
183
- json.dump(ontology_data, f, indent=2)
184
-
185
  self.ontology = ontology_data
186
 
187
  def _download_owl_file(self):
@@ -219,10 +236,7 @@ class CMPOOntology:
219
  Path(temp_owl).unlink()
220
 
221
  # Save processed data
222
- self.data_path.parent.mkdir(parents=True, exist_ok=True)
223
- with open(self.data_path, 'w') as f:
224
- json.dump(ontology_data, f, indent=2)
225
-
226
  self.ontology = ontology_data
227
 
228
  except ImportError:
@@ -267,10 +281,7 @@ class CMPOOntology:
267
  }
268
 
269
  # Save minimal ontology
270
- self.data_path.parent.mkdir(parents=True, exist_ok=True)
271
- with open(self.data_path, 'w') as f:
272
- json.dump(minimal_ontology, f, indent=2)
273
-
274
  self.ontology = minimal_ontology
275
 
276
  def _build_indices(self):
 
10
  class CMPOOntology:
11
  """Manage CMPO ontology data and provide lookup functionality"""
12
 
13
+ def __init__(self, data_path=None, cache_path=None):
14
+ # Default to package-relative paths
15
+ if data_path is None:
16
+ data_path = Path(__file__).parent / "data" / "cmpo.json"
17
+ if cache_path is None:
18
+ cache_path = Path(__file__).parent / "data" / "cmpo_cache.pkl"
19
+
20
  self.data_path = Path(data_path)
21
  self.cache_path = Path(cache_path)
22
  self.ontology = {}
 
26
 
27
  self._load_ontology()
28
 
29
+ def _save_ontology_data(self, ontology_data):
30
+ """Save ontology data with fallback to temp directory if permission denied"""
31
+ try:
32
+ self.data_path.parent.mkdir(parents=True, exist_ok=True)
33
+ with open(self.data_path, 'w') as f:
34
+ json.dump(ontology_data, f, indent=2)
35
+ except PermissionError:
36
+ # If we can't write to package directory, use temp directory
37
+ import tempfile
38
+ temp_dir = Path(tempfile.gettempdir()) / "anton_cmpo"
39
+ temp_dir.mkdir(exist_ok=True)
40
+ temp_file = temp_dir / "cmpo.json"
41
+ with open(temp_file, 'w') as f:
42
+ json.dump(ontology_data, f, indent=2)
43
+ logging.info(f"Saved CMPO data to temporary location: {temp_file}")
44
+ self.data_path = temp_file
45
+
46
  def _load_ontology(self):
47
  """Load CMPO ontology from JSON file or download if needed"""
48
  if self.data_path.exists():
 
87
  ontology_data = self._parse_obo_content(response.text)
88
 
89
  # Save processed data
90
+ self._save_ontology_data(ontology_data)
 
 
 
91
  self.ontology = ontology_data
92
  logging.info(f"Successfully loaded {len(ontology_data)} CMPO terms")
93
 
 
198
  page += 1
199
 
200
  # Save to file
201
+ self._save_ontology_data(ontology_data)
 
 
 
202
  self.ontology = ontology_data
203
 
204
  def _download_owl_file(self):
 
236
  Path(temp_owl).unlink()
237
 
238
  # Save processed data
239
+ self._save_ontology_data(ontology_data)
 
 
 
240
  self.ontology = ontology_data
241
 
242
  except ImportError:
 
281
  }
282
 
283
  # Save minimal ontology
284
+ self._save_ontology_data(minimal_ontology)
 
 
 
285
  self.ontology = minimal_ontology
286
 
287
  def _build_indices(self):
prompts/cmpo_mapping.txt ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Given this microscopy description: "{FREEFORM_DESCRIPTION}"
2
+ Map relevant phrases to CMPO ontology terms.
3
+
4
+ Available CMPO terms: {CMPO_CONTEXT}
5
+
6
+ Output format:
7
+ {
8
+ "mappings": [
9
+ {
10
+ "phrase": "nuclear fragmentation",
11
+ "cmpo_id": "CMPO_0000094",
12
+ "confidence": 0.95,
13
+ "reasoning": "Clear indication of apoptotic phenotype"
14
+ }
15
+ ]
16
+ }
17
+
prompts/stage1_global.txt ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ Analyze this fluorescence microscopy image for overall scene understanding.
2
+ Provide:
3
+ 1. Image quality assessment
4
+ 2. Staining type identification
5
+ 3. General cellular/tissue characteristics
6
+ 4. Recommended analysis approach
7
+
8
+ Focus on: [DYNAMIC_CONTEXT]
9
+ Output format: Natural language description + structured assessment
prompts/stage2_objects.txt ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ Task: Identify major structures and suggest segmentation strategies for this fluorescence microscopy image.
2
+ Input: [image, global context]
3
+ Provide:
4
+ 1. List of detected objects/structures
5
+ 2. Segmentation guidance (e.g., nuclei, cytoplasm)
6
+ 3. Object count estimate
7
+ Output format: Structured JSON with detected_objects, segmentation_guidance, object_count_estimate
prompts/stage3_features.txt ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Task: Analyze segmented regions for complex CMPO features, focusing on texture-based patterns.
2
+ Input:
3
+ - Region patches: [nuclei, cytoplasm patches]
4
+ - Config: {stain: "{STAIN}", channel: {CHANNEL}, phenotype_focus: "{PHENOTYPE}"}
5
+ - Target features: ["chromatin_condensation", "LC3_puncta", "nuclear_fragmentation"]
6
+ Analyze:
7
+ 1. Texture patterns (granular, smooth, fragmented)
8
+ 2. Intensity distributions (bright spots, uniform, heterogeneous)
9
+ 3. Morphological features (shape irregularities, size variations)
10
+ Provide:
11
+ 1. Feature descriptions with confidence scores
12
+ 2. CMPO term mappings
13
+ 3. Supporting visual evidence
14
+ Output format: {features: [{name: str, confidence: float, evidence: str, cmpo_id: str}]}
prompts/stage4_population.txt ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ Task: Analyze population-level patterns in this fluorescence microscopy image.
2
+ Input:
3
+ - Individual cell feature analyses (morphology, intensity, localization patterns)
4
+ - Experimental context and biological metadata
5
+ Provide:
6
+ 1. Population summary describing overall cellular characteristics and phenotype prevalence
7
+ 2. Quantitative estimates (e.g., percentage of cells showing specific phenotypes)
8
+ 3. Biological interpretation of patterns in experimental context
9
+ 4. CMPO phenotype relevance assessment
10
+ Output format: Natural language biological analysis with structured sections for population summary, quantitative insights, and biological interpretation
requirements.txt CHANGED
@@ -10,4 +10,5 @@ anthropic
10
  python-dotenv
11
  streamlit==1.30.0
12
  plotly
13
- networkx
 
 
10
  python-dotenv
11
  streamlit==1.30.0
12
  plotly
13
+ networkx
14
+ owlready2