alphagenome_mcp / tools /tissue_ontology_mapping.py
Paper2Agent's picture
Upload 10 files
0cdac39 verified
"""
Tissue ontology mapping tutorial for navigating biological data ontologies in AlphaGenome.
This MCP Server provides 2 tools:
1. explore_output_metadata: Explore and filter output metadata for specific organisms and search terms
2. count_tracks_by_output_type: Count tracks by output type for human and mouse organisms
All tools extracted from `https://github.com/google-deepmind/alphagenome/tree/main/colabs/tissue_ontology_mapping.ipynb`.
"""
# Standard imports
from typing import Annotated, Literal, Any
import pandas as pd
import numpy as np
from pathlib import Path
import os
from fastmcp import FastMCP
from datetime import datetime
# Base persistent directory (HF Spaces guarantees /data is writable & persistent)
BASE_DIR = Path("/data")
DEFAULT_INPUT_DIR = BASE_DIR / "tmp_inputs"
DEFAULT_OUTPUT_DIR = BASE_DIR / "tmp_outputs"
INPUT_DIR = Path(os.environ.get("TISSUE_ONTOLOGY_MAPPING_INPUT_DIR", DEFAULT_INPUT_DIR))
OUTPUT_DIR = Path(os.environ.get("TISSUE_ONTOLOGY_MAPPING_OUTPUT_DIR", DEFAULT_OUTPUT_DIR))
# Ensure directories exist
INPUT_DIR.mkdir(parents=True, exist_ok=True)
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
# Timestamp for unique outputs
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
ALPHAGENOME_API_KEY = os.environ["ALPHAGENOME_API_KEY"]
# MCP server instance
tissue_ontology_mapping_mcp = FastMCP(name="tissue_ontology_mapping")
@tissue_ontology_mapping_mcp.tool
def explore_output_metadata(
# Analysis parameters with tutorial defaults
organism: Annotated[Literal["HOMO_SAPIENS", "MUS_MUSCULUS"], "Target organism for metadata exploration"] = "HOMO_SAPIENS",
api_key: Annotated[str, "AlphaGenome API key for accessing the DNA model"] = ALPHAGENOME_API_KEY,
out_prefix: Annotated[str | None, "Output file prefix"] = None,
) -> dict:
"""
Explore output metadata for specific organisms to find ontology terms and tissue types.
Input is organism selection and API key and output is metadata table for interactive exploration.
"""
# Import required modules
from alphagenome.models import dna_client
if not api_key:
raise ValueError("API key must be provided")
# Create DNA model client
dna_model = dna_client.create(api_key)
# Get organism enum
org_enum = getattr(dna_client.Organism, organism)
# Get output metadata
output_metadata = dna_model.output_metadata(org_enum).concatenate()
# Set output filename
if out_prefix is None:
out_prefix = f"output_metadata_{organism.lower()}"
output_file = OUTPUT_DIR / f"{out_prefix}_{timestamp}.csv"
# Save metadata as CSV
output_metadata.to_csv(output_file, index=False)
# Return standardized format
return {
"message": f"Output metadata exploration completed for {organism}",
"reference": "https://github.com/google-deepmind/alphagenome/tree/main/colabs/tissue_ontology_mapping.ipynb",
"artifacts": [
{
"description": f"Output metadata for {organism}",
"path": str(output_file.resolve())
}
]
}
@tissue_ontology_mapping_mcp.tool
def count_tracks_by_output_type(
# Analysis parameters with tutorial defaults
api_key: Annotated[str, "AlphaGenome API key for accessing the DNA model"] = ALPHAGENOME_API_KEY,
out_prefix: Annotated[str | None, "Output file prefix"] = None,
) -> dict:
"""
Count tracks by output type for both human and mouse organisms to understand data availability.
Input is API key and output is track counts table comparing human vs mouse availability.
"""
# Import required modules
from alphagenome.models import dna_client
if not api_key:
raise ValueError("API key must be provided")
# Create DNA model client
dna_model = dna_client.create(api_key)
# Count human tracks
human_tracks = (
dna_model.output_metadata(dna_client.Organism.HOMO_SAPIENS)
.concatenate()
.groupby('output_type')
.size()
.rename('# Human tracks')
)
# Count mouse tracks
mouse_tracks = (
dna_model.output_metadata(dna_client.Organism.MUS_MUSCULUS)
.concatenate()
.groupby('output_type')
.size()
.rename('# Mouse tracks')
)
# Combine the results
track_counts = pd.concat([human_tracks, mouse_tracks], axis=1).astype(pd.Int64Dtype())
# Set output filename
if out_prefix is None:
out_prefix = "track_counts"
output_file = OUTPUT_DIR / f"{out_prefix}_{timestamp}.csv"
# Save track counts as CSV
track_counts.to_csv(output_file)
# Return standardized format
return {
"message": "Track counting by output type completed successfully",
"reference": "https://github.com/google-deepmind/alphagenome/tree/main/colabs/tissue_ontology_mapping.ipynb",
"artifacts": [
{
"description": "Track counts by output type",
"path": str(output_file.resolve())
}
]
}