Spaces:
Running
Running
File size: 5,109 Bytes
0cdac39 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 |
"""
Tissue ontology mapping tutorial for navigating biological data ontologies in AlphaGenome.
This MCP Server provides 2 tools:
1. explore_output_metadata: Explore and filter output metadata for specific organisms and search terms
2. count_tracks_by_output_type: Count tracks by output type for human and mouse organisms
All tools extracted from `https://github.com/google-deepmind/alphagenome/tree/main/colabs/tissue_ontology_mapping.ipynb`.
"""
# Standard imports
from typing import Annotated, Literal, Any
import pandas as pd
import numpy as np
from pathlib import Path
import os
from fastmcp import FastMCP
from datetime import datetime
# Base persistent directory (HF Spaces guarantees /data is writable & persistent)
BASE_DIR = Path("/data")
DEFAULT_INPUT_DIR = BASE_DIR / "tmp_inputs"
DEFAULT_OUTPUT_DIR = BASE_DIR / "tmp_outputs"
INPUT_DIR = Path(os.environ.get("TISSUE_ONTOLOGY_MAPPING_INPUT_DIR", DEFAULT_INPUT_DIR))
OUTPUT_DIR = Path(os.environ.get("TISSUE_ONTOLOGY_MAPPING_OUTPUT_DIR", DEFAULT_OUTPUT_DIR))
# Ensure directories exist
INPUT_DIR.mkdir(parents=True, exist_ok=True)
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
# Timestamp for unique outputs
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
ALPHAGENOME_API_KEY = os.environ["ALPHAGENOME_API_KEY"]
# MCP server instance
tissue_ontology_mapping_mcp = FastMCP(name="tissue_ontology_mapping")
@tissue_ontology_mapping_mcp.tool
def explore_output_metadata(
# Analysis parameters with tutorial defaults
organism: Annotated[Literal["HOMO_SAPIENS", "MUS_MUSCULUS"], "Target organism for metadata exploration"] = "HOMO_SAPIENS",
api_key: Annotated[str, "AlphaGenome API key for accessing the DNA model"] = ALPHAGENOME_API_KEY,
out_prefix: Annotated[str | None, "Output file prefix"] = None,
) -> dict:
"""
Explore output metadata for specific organisms to find ontology terms and tissue types.
Input is organism selection and API key and output is metadata table for interactive exploration.
"""
# Import required modules
from alphagenome.models import dna_client
if not api_key:
raise ValueError("API key must be provided")
# Create DNA model client
dna_model = dna_client.create(api_key)
# Get organism enum
org_enum = getattr(dna_client.Organism, organism)
# Get output metadata
output_metadata = dna_model.output_metadata(org_enum).concatenate()
# Set output filename
if out_prefix is None:
out_prefix = f"output_metadata_{organism.lower()}"
output_file = OUTPUT_DIR / f"{out_prefix}_{timestamp}.csv"
# Save metadata as CSV
output_metadata.to_csv(output_file, index=False)
# Return standardized format
return {
"message": f"Output metadata exploration completed for {organism}",
"reference": "https://github.com/google-deepmind/alphagenome/tree/main/colabs/tissue_ontology_mapping.ipynb",
"artifacts": [
{
"description": f"Output metadata for {organism}",
"path": str(output_file.resolve())
}
]
}
@tissue_ontology_mapping_mcp.tool
def count_tracks_by_output_type(
# Analysis parameters with tutorial defaults
api_key: Annotated[str, "AlphaGenome API key for accessing the DNA model"] = ALPHAGENOME_API_KEY,
out_prefix: Annotated[str | None, "Output file prefix"] = None,
) -> dict:
"""
Count tracks by output type for both human and mouse organisms to understand data availability.
Input is API key and output is track counts table comparing human vs mouse availability.
"""
# Import required modules
from alphagenome.models import dna_client
if not api_key:
raise ValueError("API key must be provided")
# Create DNA model client
dna_model = dna_client.create(api_key)
# Count human tracks
human_tracks = (
dna_model.output_metadata(dna_client.Organism.HOMO_SAPIENS)
.concatenate()
.groupby('output_type')
.size()
.rename('# Human tracks')
)
# Count mouse tracks
mouse_tracks = (
dna_model.output_metadata(dna_client.Organism.MUS_MUSCULUS)
.concatenate()
.groupby('output_type')
.size()
.rename('# Mouse tracks')
)
# Combine the results
track_counts = pd.concat([human_tracks, mouse_tracks], axis=1).astype(pd.Int64Dtype())
# Set output filename
if out_prefix is None:
out_prefix = "track_counts"
output_file = OUTPUT_DIR / f"{out_prefix}_{timestamp}.csv"
# Save track counts as CSV
track_counts.to_csv(output_file)
# Return standardized format
return {
"message": "Track counting by output type completed successfully",
"reference": "https://github.com/google-deepmind/alphagenome/tree/main/colabs/tissue_ontology_mapping.ipynb",
"artifacts": [
{
"description": "Track counts by output type",
"path": str(output_file.resolve())
}
]
} |