Spaces:
Running
Running
| """ | |
| Tissue ontology mapping tutorial for navigating biological data ontologies in AlphaGenome. | |
| This MCP Server provides 2 tools: | |
| 1. explore_output_metadata: Explore and filter output metadata for specific organisms and search terms | |
| 2. count_tracks_by_output_type: Count tracks by output type for human and mouse organisms | |
| All tools extracted from `https://github.com/google-deepmind/alphagenome/tree/main/colabs/tissue_ontology_mapping.ipynb`. | |
| """ | |
| # Standard imports | |
| from typing import Annotated, Literal, Any | |
| import pandas as pd | |
| import numpy as np | |
| from pathlib import Path | |
| import os | |
| from fastmcp import FastMCP | |
| from datetime import datetime | |
| # Base persistent directory (HF Spaces guarantees /data is writable & persistent) | |
| BASE_DIR = Path("/data") | |
| DEFAULT_INPUT_DIR = BASE_DIR / "tmp_inputs" | |
| DEFAULT_OUTPUT_DIR = BASE_DIR / "tmp_outputs" | |
| INPUT_DIR = Path(os.environ.get("TISSUE_ONTOLOGY_MAPPING_INPUT_DIR", DEFAULT_INPUT_DIR)) | |
| OUTPUT_DIR = Path(os.environ.get("TISSUE_ONTOLOGY_MAPPING_OUTPUT_DIR", DEFAULT_OUTPUT_DIR)) | |
| # Ensure directories exist | |
| INPUT_DIR.mkdir(parents=True, exist_ok=True) | |
| OUTPUT_DIR.mkdir(parents=True, exist_ok=True) | |
| # Timestamp for unique outputs | |
| timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") | |
| ALPHAGENOME_API_KEY = os.environ["ALPHAGENOME_API_KEY"] | |
| # MCP server instance | |
| tissue_ontology_mapping_mcp = FastMCP(name="tissue_ontology_mapping") | |
| def explore_output_metadata( | |
| # Analysis parameters with tutorial defaults | |
| organism: Annotated[Literal["HOMO_SAPIENS", "MUS_MUSCULUS"], "Target organism for metadata exploration"] = "HOMO_SAPIENS", | |
| api_key: Annotated[str, "AlphaGenome API key for accessing the DNA model"] = ALPHAGENOME_API_KEY, | |
| out_prefix: Annotated[str | None, "Output file prefix"] = None, | |
| ) -> dict: | |
| """ | |
| Explore output metadata for specific organisms to find ontology terms and tissue types. | |
| Input is organism selection and API key and output is metadata table for interactive exploration. | |
| """ | |
| # Import required modules | |
| from alphagenome.models import dna_client | |
| if not api_key: | |
| raise ValueError("API key must be provided") | |
| # Create DNA model client | |
| dna_model = dna_client.create(api_key) | |
| # Get organism enum | |
| org_enum = getattr(dna_client.Organism, organism) | |
| # Get output metadata | |
| output_metadata = dna_model.output_metadata(org_enum).concatenate() | |
| # Set output filename | |
| if out_prefix is None: | |
| out_prefix = f"output_metadata_{organism.lower()}" | |
| output_file = OUTPUT_DIR / f"{out_prefix}_{timestamp}.csv" | |
| # Save metadata as CSV | |
| output_metadata.to_csv(output_file, index=False) | |
| # Return standardized format | |
| return { | |
| "message": f"Output metadata exploration completed for {organism}", | |
| "reference": "https://github.com/google-deepmind/alphagenome/tree/main/colabs/tissue_ontology_mapping.ipynb", | |
| "artifacts": [ | |
| { | |
| "description": f"Output metadata for {organism}", | |
| "path": str(output_file.resolve()) | |
| } | |
| ] | |
| } | |
| def count_tracks_by_output_type( | |
| # Analysis parameters with tutorial defaults | |
| api_key: Annotated[str, "AlphaGenome API key for accessing the DNA model"] = ALPHAGENOME_API_KEY, | |
| out_prefix: Annotated[str | None, "Output file prefix"] = None, | |
| ) -> dict: | |
| """ | |
| Count tracks by output type for both human and mouse organisms to understand data availability. | |
| Input is API key and output is track counts table comparing human vs mouse availability. | |
| """ | |
| # Import required modules | |
| from alphagenome.models import dna_client | |
| if not api_key: | |
| raise ValueError("API key must be provided") | |
| # Create DNA model client | |
| dna_model = dna_client.create(api_key) | |
| # Count human tracks | |
| human_tracks = ( | |
| dna_model.output_metadata(dna_client.Organism.HOMO_SAPIENS) | |
| .concatenate() | |
| .groupby('output_type') | |
| .size() | |
| .rename('# Human tracks') | |
| ) | |
| # Count mouse tracks | |
| mouse_tracks = ( | |
| dna_model.output_metadata(dna_client.Organism.MUS_MUSCULUS) | |
| .concatenate() | |
| .groupby('output_type') | |
| .size() | |
| .rename('# Mouse tracks') | |
| ) | |
| # Combine the results | |
| track_counts = pd.concat([human_tracks, mouse_tracks], axis=1).astype(pd.Int64Dtype()) | |
| # Set output filename | |
| if out_prefix is None: | |
| out_prefix = "track_counts" | |
| output_file = OUTPUT_DIR / f"{out_prefix}_{timestamp}.csv" | |
| # Save track counts as CSV | |
| track_counts.to_csv(output_file) | |
| # Return standardized format | |
| return { | |
| "message": "Track counting by output type completed successfully", | |
| "reference": "https://github.com/google-deepmind/alphagenome/tree/main/colabs/tissue_ontology_mapping.ipynb", | |
| "artifacts": [ | |
| { | |
| "description": "Track counts by output type", | |
| "path": str(output_file.resolve()) | |
| } | |
| ] | |
| } |