""" Essential commands for AlphaGenome API interaction covering data structures and operations. This MCP Server provides 8 tools: 1. create_genomic_interval: Create genomic intervals for DNA regions 2. create_genomic_variant: Create genomic variants for genetic changes 3. create_track_data: Create TrackData objects from arrays and metadata 4. create_variant_scores: Create AnnData objects for variant scoring results 5. genomic_interval_operations: Perform operations on genomic intervals 6. variant_interval_operations: Check variant overlaps with intervals 7. track_data_operations: Filter, resize, slice and transform TrackData 8. track_data_resolution_conversion: Convert between track data resolutions All tools extracted from `https://github.com/google-deepmind/alphagenome/tree/main/colabs/essential_commands.ipynb`. """ # Standard imports from typing import Annotated, Literal, Any import pandas as pd import numpy as np from pathlib import Path import os from fastmcp import FastMCP from datetime import datetime # AlphaGenome imports from alphagenome.data import genome, track_data from alphagenome.models import dna_client import anndata # Base persistent directory (HF Spaces guarantees /data is writable & persistent) BASE_DIR = Path("/data") DEFAULT_INPUT_DIR = BASE_DIR / "tmp_inputs" DEFAULT_OUTPUT_DIR = BASE_DIR / "tmp_outputs" INPUT_DIR = Path(os.environ.get("ESSENTIAL_COMMANDS_INPUT_DIR", DEFAULT_INPUT_DIR)) OUTPUT_DIR = Path(os.environ.get("ESSENTIAL_COMMANDS_OUTPUT_DIR", DEFAULT_OUTPUT_DIR)) # Ensure directories exist INPUT_DIR.mkdir(parents=True, exist_ok=True) OUTPUT_DIR.mkdir(parents=True, exist_ok=True) # Timestamp for unique outputs timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") # MCP server instance essential_commands_mcp = FastMCP(name="essential_commands") @essential_commands_mcp.tool def create_genomic_interval( chromosome: Annotated[str, "Chromosome name (e.g., 'chr1', 'chr2')"] = "chr1", start: Annotated[int, "Start position (0-based)"] = 1000, end: Annotated[int, "End position (0-based, exclusive)"] = 1010, strand: Annotated[Literal["+", "-", "."], "DNA strand"] = ".", name: Annotated[str, "Interval name"] = "", out_prefix: Annotated[str | None, "Output file prefix"] = None, ) -> dict: """ Create genomic intervals for DNA regions with basic properties and operations. Input is genomic coordinates and output is interval object with properties and operations results. """ # Create genomic interval interval = genome.Interval(chromosome=chromosome, start=start, end=end, strand=strand, name=name) # Calculate basic properties results = { "interval": { "chromosome": interval.chromosome, "start": interval.start, "end": interval.end, "strand": interval.strand, "name": interval.name, "center": interval.center(), "width": interval.width, }, "operations": { "resize_100bp": str(interval.resize(100)), "resize_1MB": str(interval.resize(dna_client.SEQUENCE_LENGTH_1MB)), } } # Save results prefix = out_prefix or f"genomic_interval_{timestamp}" output_file = OUTPUT_DIR / f"{prefix}.csv" # Flatten results for CSV rows = [] for category, data in results.items(): for key, value in data.items(): rows.append({"category": category, "property": key, "value": str(value)}) df = pd.DataFrame(rows) df.to_csv(output_file, index=False) return { "message": f"Genomic interval created: {interval.chromosome}:{interval.start}-{interval.end}", "reference": "https://github.com/google-deepmind/alphagenome/tree/main/colabs/essential_commands.ipynb", "artifacts": [ { "description": "Genomic interval properties", "path": str(output_file.resolve()) } ] } @essential_commands_mcp.tool def create_genomic_variant( chromosome: Annotated[str, "Chromosome name (e.g., 'chr3')"] = "chr3", position: Annotated[int, "1-based position"] = 10000, reference_bases: Annotated[str, "Reference sequence (e.g., 'A')"] = "A", alternate_bases: Annotated[str, "Alternate sequence (e.g., 'C')"] = "C", out_prefix: Annotated[str | None, "Output file prefix"] = None, ) -> dict: """ Create genomic variants for genetic changes with reference interval operations. Input is variant coordinates and sequences and output is variant properties and intervals. """ # Create genomic variant variant = genome.Variant( chromosome=chromosome, position=position, reference_bases=reference_bases, alternate_bases=alternate_bases ) # Get reference interval and properties ref_interval = variant.reference_interval input_interval = ref_interval.resize(dna_client.SEQUENCE_LENGTH_1MB) # Create test interval for overlap testing test_interval = genome.Interval(chromosome=chromosome, start=position+5, end=position+10) results = { "variant": { "chromosome": variant.chromosome, "position": variant.position, "reference_bases": variant.reference_bases, "alternate_bases": variant.alternate_bases, "reference_interval": str(ref_interval), "input_interval_width": input_interval.width, }, "overlap_tests": { "reference_overlaps_test": variant.reference_overlaps(test_interval), "alternate_overlaps_test": variant.alternate_overlaps(test_interval), } } # Save results prefix = out_prefix or f"genomic_variant_{timestamp}" output_file = OUTPUT_DIR / f"{prefix}.csv" # Flatten results for CSV rows = [] for category, data in results.items(): for key, value in data.items(): rows.append({"category": category, "property": key, "value": str(value)}) df = pd.DataFrame(rows) df.to_csv(output_file, index=False) return { "message": f"Genomic variant created: {variant.chromosome}:{variant.position} {variant.reference_bases}>{variant.alternate_bases}", "reference": "https://github.com/google-deepmind/alphagenome/tree/main/colabs/essential_commands.ipynb", "artifacts": [ { "description": "Genomic variant properties", "path": str(output_file.resolve()) } ] } @essential_commands_mcp.tool def create_track_data( values_array: Annotated[str | None, "Path to CSV file with values array (shape: sequence_length x num_tracks)"] = None, track_names: Annotated[list[str], "List of track names"] = ["track1", "track1", "track2"], track_strands: Annotated[list[str], "List of track strands (+, -, .)"] = ["+", "-", "."], chromosome: Annotated[str, "Chromosome for interval"] = "chr1", start: Annotated[int, "Start position"] = 1000, end: Annotated[int, "End position"] = 1004, resolution: Annotated[int, "Base pair resolution"] = 1, out_prefix: Annotated[str | None, "Output file prefix"] = None, ) -> dict: """ Create TrackData objects from user arrays and metadata with validation. Input is values array and track metadata and output is TrackData object properties. """ if values_array is None: # Use tutorial example data if no input provided values = np.array([[0, 1, 2], [3, 4, 5], [6, 7, 8], [9, 10, 11]]).astype(np.float32) else: # Load user data values = np.loadtxt(values_array, delimiter=',').astype(np.float32) # Create metadata metadata = pd.DataFrame({ 'name': track_names, 'strand': track_strands, }) # Create genomic interval interval = genome.Interval(chromosome=chromosome, start=start, end=end) # Create TrackData object tdata = track_data.TrackData( values=values, metadata=metadata, resolution=resolution, interval=interval ) # Save results prefix = out_prefix or f"track_data_{timestamp}" values_file = OUTPUT_DIR / f"{prefix}_values.csv" metadata_file = OUTPUT_DIR / f"{prefix}_metadata.csv" # Save values and metadata np.savetxt(values_file, tdata.values, delimiter=',') tdata.metadata.to_csv(metadata_file, index=False) results = { "shape": tdata.values.shape, "resolution": tdata.resolution, "interval": str(tdata.interval) if tdata.interval else "None", "num_tracks": len(tdata.metadata), "track_names": tdata.metadata.name.tolist(), "track_strands": tdata.metadata.strand.tolist(), } return { "message": f"TrackData created with shape {tdata.values.shape} and {len(tdata.metadata)} tracks", "reference": "https://github.com/google-deepmind/alphagenome/tree/main/colabs/essential_commands.ipynb", "artifacts": [ { "description": "Track data values", "path": str(values_file.resolve()) }, { "description": "Track metadata", "path": str(metadata_file.resolve()) } ] } @essential_commands_mcp.tool def create_variant_scores( scores_array: Annotated[str | None, "Path to CSV file with scores array (shape: num_genes x num_tracks)"] = None, gene_ids: Annotated[list[str], "List of gene IDs"] = ["ENSG0001", "ENSG0002", "ENSG0003"], track_names: Annotated[list[str], "List of track names"] = ["track1", "track2"], track_strands: Annotated[list[str], "List of track strands"] = ["+", "-"], out_prefix: Annotated[str | None, "Output file prefix"] = None, ) -> dict: """ Create AnnData objects for variant scoring results with gene and track metadata. Input is scores array and metadata and output is AnnData object structure. """ if scores_array is None: # Use tutorial example data if no input provided scores = np.array([[0.1, 0.2], [0.3, 0.4], [0.5, 0.6]]) else: # Load user data scores = np.loadtxt(scores_array, delimiter=',') # Create metadata gene_metadata = pd.DataFrame({'gene_id': gene_ids}) track_metadata = pd.DataFrame({ 'name': track_names, 'strand': track_strands }) # Create AnnData object variant_scores = anndata.AnnData( X=scores, obs=gene_metadata, var=track_metadata ) # Save results prefix = out_prefix or f"variant_scores_{timestamp}" scores_file = OUTPUT_DIR / f"{prefix}_scores.csv" gene_metadata_file = OUTPUT_DIR / f"{prefix}_gene_metadata.csv" track_metadata_file = OUTPUT_DIR / f"{prefix}_track_metadata.csv" # Save components np.savetxt(scores_file, variant_scores.X, delimiter=',') variant_scores.obs.to_csv(gene_metadata_file, index=False) variant_scores.var.to_csv(track_metadata_file, index=False) results = { "scores_shape": variant_scores.X.shape, "num_genes": variant_scores.n_obs, "num_tracks": variant_scores.n_vars, "gene_ids": variant_scores.obs.gene_id.tolist(), "track_names": variant_scores.var.name.tolist(), } return { "message": f"Variant scores AnnData created with {variant_scores.n_obs} genes and {variant_scores.n_vars} tracks", "reference": "https://github.com/google-deepmind/alphagenome/tree/main/colabs/essential_commands.ipynb", "artifacts": [ { "description": "Variant scores matrix", "path": str(scores_file.resolve()) }, { "description": "Gene metadata", "path": str(gene_metadata_file.resolve()) }, { "description": "Track metadata", "path": str(track_metadata_file.resolve()) } ] } @essential_commands_mcp.tool def genomic_interval_operations( interval1_chromosome: Annotated[str, "First interval chromosome"] = "chr1", interval1_start: Annotated[int, "First interval start"] = 1000, interval1_end: Annotated[int, "First interval end"] = 1010, interval2_chromosome: Annotated[str, "Second interval chromosome"] = "chr1", interval2_start: Annotated[int, "Second interval start"] = 1005, interval2_end: Annotated[int, "Second interval end"] = 1015, out_prefix: Annotated[str | None, "Output file prefix"] = None, ) -> dict: """ Perform operations on genomic intervals including overlaps, intersections and comparisons. Input is two genomic intervals and output is comparison operations results. """ # Create intervals interval1 = genome.Interval(chromosome=interval1_chromosome, start=interval1_start, end=interval1_end) interval2 = genome.Interval(chromosome=interval2_chromosome, start=interval2_start, end=interval2_end) # Perform operations operations = { "interval1": str(interval1), "interval2": str(interval2), "interval1_center": interval1.center(), "interval1_width": interval1.width, "interval2_center": interval2.center(), "interval2_width": interval2.width, "overlaps": interval1.overlaps(interval2), "contains": interval1.contains(interval2), "intersect": str(interval1.intersect(interval2)) if interval1.overlaps(interval2) else "No overlap", "interval1_resized_100": str(interval1.resize(100)), "interval2_resized_100": str(interval2.resize(100)), } # Save results prefix = out_prefix or f"genomic_interval_operations_{timestamp}" output_file = OUTPUT_DIR / f"{prefix}.csv" # Convert to DataFrame df = pd.DataFrame([ {"operation": key, "result": str(value)} for key, value in operations.items() ]) df.to_csv(output_file, index=False) return { "message": f"Genomic interval operations completed for {interval1} and {interval2}", "reference": "https://github.com/google-deepmind/alphagenome/tree/main/colabs/essential_commands.ipynb", "artifacts": [ { "description": "Interval operations results", "path": str(output_file.resolve()) } ] } @essential_commands_mcp.tool def variant_interval_operations( variant_chromosome: Annotated[str, "Variant chromosome"] = "chr3", variant_position: Annotated[int, "Variant position (1-based)"] = 10000, variant_ref: Annotated[str, "Reference bases"] = "T", variant_alt: Annotated[str, "Alternate bases"] = "CGTCAAT", interval_chromosome: Annotated[str, "Test interval chromosome"] = "chr3", interval_start: Annotated[int, "Test interval start"] = 10005, interval_end: Annotated[int, "Test interval end"] = 10010, out_prefix: Annotated[str | None, "Output file prefix"] = None, ) -> dict: """ Check variant overlaps with genomic intervals for reference and alternate alleles. Input is variant and interval coordinates and output is overlap test results. """ # Create variant and interval variant = genome.Variant( chromosome=variant_chromosome, position=variant_position, reference_bases=variant_ref, alternate_bases=variant_alt, ) interval = genome.Interval( chromosome=interval_chromosome, start=interval_start, end=interval_end ) # Perform overlap tests results = { "variant": f"{variant.chromosome}:{variant.position} {variant.reference_bases}>{variant.alternate_bases}", "interval": str(interval), "reference_interval": str(variant.reference_interval), "reference_overlaps": variant.reference_overlaps(interval), "alternate_overlaps": variant.alternate_overlaps(interval), "variant_ref_length": len(variant.reference_bases), "variant_alt_length": len(variant.alternate_bases), } # Save results prefix = out_prefix or f"variant_interval_operations_{timestamp}" output_file = OUTPUT_DIR / f"{prefix}.csv" # Convert to DataFrame df = pd.DataFrame([ {"property": key, "value": str(value)} for key, value in results.items() ]) df.to_csv(output_file, index=False) return { "message": f"Variant interval operations completed: ref_overlaps={results['reference_overlaps']}, alt_overlaps={results['alternate_overlaps']}", "reference": "https://github.com/google-deepmind/alphagenome/tree/main/colabs/essential_commands.ipynb", "artifacts": [ { "description": "Variant interval operations results", "path": str(output_file.resolve()) } ] } @essential_commands_mcp.tool def track_data_operations( values_array: Annotated[str | None, "Path to CSV file with values array"] = None, track_names: Annotated[list[str], "List of track names"] = ["track1", "track1", "track2"], track_strands: Annotated[list[str], "List of track strands"] = ["+", "-", "."], operation: Annotated[Literal["filter", "resize", "slice", "subset", "reverse_complement"], "Operation to perform"] = "filter", filter_strand: Annotated[Literal["+", "-", "."], "Strand to filter to"] = "+", resize_width: Annotated[int, "Width to resize to"] = 8, slice_start: Annotated[int, "Slice start position"] = 2, slice_end: Annotated[int, "Slice end position"] = 4, subset_names: Annotated[list[str], "Track names to subset to"] = ["track1"], chromosome: Annotated[str, "Chromosome"] = "chr1", start: Annotated[int, "Start position"] = 1000, end: Annotated[int, "End position"] = 1004, strand: Annotated[Literal["+", "-", "."], "Interval strand"] = "+", out_prefix: Annotated[str | None, "Output file prefix"] = None, ) -> dict: """ Filter, resize, slice and transform TrackData objects with strand-aware operations. Input is TrackData and operation parameters and output is transformed TrackData. """ if values_array is None: # Use tutorial example data values = np.array([[0, 1, 2], [3, 4, 5], [6, 7, 8], [9, 10, 11]]).astype(np.float32) else: values = np.loadtxt(values_array, delimiter=',').astype(np.float32) # Create metadata and interval metadata = pd.DataFrame({ 'name': track_names, 'strand': track_strands, }) interval = genome.Interval(chromosome=chromosome, start=start, end=end, strand=strand) tdata = track_data.TrackData(values=values, metadata=metadata, resolution=1, interval=interval) # Perform operation if operation == "filter": if filter_strand == "+": result_tdata = tdata.filter_to_positive_strand() elif filter_strand == "-": result_tdata = tdata.filter_to_negative_strand() else: result_tdata = tdata.filter_to_unstranded() operation_info = f"filtered to {filter_strand} strand" elif operation == "resize": result_tdata = tdata.resize(width=resize_width) operation_info = f"resized to width {resize_width}" elif operation == "slice": result_tdata = tdata.slice_by_positions(start=slice_start, end=slice_end) operation_info = f"sliced from {slice_start} to {slice_end}" elif operation == "subset": result_tdata = tdata.select_tracks_by_name(names=subset_names) operation_info = f"subset to tracks {subset_names}" elif operation == "reverse_complement": result_tdata = tdata.reverse_complement() operation_info = "reverse complemented" # Save results prefix = out_prefix or f"track_data_{operation}_{timestamp}" values_file = OUTPUT_DIR / f"{prefix}_values.csv" metadata_file = OUTPUT_DIR / f"{prefix}_metadata.csv" # Save values and metadata np.savetxt(values_file, result_tdata.values, delimiter=',') result_tdata.metadata.to_csv(metadata_file, index=False) return { "message": f"TrackData {operation_info}: shape {result_tdata.values.shape}", "reference": "https://github.com/google-deepmind/alphagenome/tree/main/colabs/essential_commands.ipynb", "artifacts": [ { "description": f"Track data values after {operation}", "path": str(values_file.resolve()) }, { "description": f"Track metadata after {operation}", "path": str(metadata_file.resolve()) } ] } @essential_commands_mcp.tool def track_data_resolution_conversion( values_array: Annotated[str | None, "Path to CSV file with values array"] = None, track_names: Annotated[list[str], "List of track names"] = ["track1", "track1", "track2"], track_strands: Annotated[list[str], "List of track strands"] = ["+", "-", "."], original_resolution: Annotated[int, "Original resolution in bp"] = 1, target_resolution: Annotated[int, "Target resolution in bp"] = 2, chromosome: Annotated[str, "Chromosome"] = "chr1", start: Annotated[int, "Start position"] = 1000, end: Annotated[int, "End position"] = 1004, out_prefix: Annotated[str | None, "Output file prefix"] = None, ) -> dict: """ Convert between different resolutions by upsampling or downsampling TrackData. Input is TrackData and resolution parameters and output is resolution-converted data. """ if values_array is None: # Use tutorial example data values = np.array([[0, 1, 2], [3, 4, 5], [6, 7, 8], [9, 10, 11]]).astype(np.float32) else: values = np.loadtxt(values_array, delimiter=',').astype(np.float32) # Create metadata and interval metadata = pd.DataFrame({ 'name': track_names, 'strand': track_strands, }) interval = genome.Interval(chromosome=chromosome, start=start, end=end) tdata = track_data.TrackData( values=values, metadata=metadata, resolution=original_resolution, interval=interval ) # Change resolution converted_tdata = tdata.change_resolution(resolution=target_resolution) # Save results prefix = out_prefix or f"track_data_resolution_{original_resolution}_to_{target_resolution}_{timestamp}" original_file = OUTPUT_DIR / f"{prefix}_original.csv" converted_file = OUTPUT_DIR / f"{prefix}_converted.csv" metadata_file = OUTPUT_DIR / f"{prefix}_metadata.csv" # Save original, converted values, and metadata np.savetxt(original_file, tdata.values, delimiter=',') np.savetxt(converted_file, converted_tdata.values, delimiter=',') converted_tdata.metadata.to_csv(metadata_file, index=False) return { "message": f"Resolution converted from {original_resolution}bp to {target_resolution}bp: {tdata.values.shape} -> {converted_tdata.values.shape}", "reference": "https://github.com/google-deepmind/alphagenome/tree/main/colabs/essential_commands.ipynb", "artifacts": [ { "description": f"Original values at {original_resolution}bp resolution", "path": str(original_file.resolve()) }, { "description": f"Converted values at {target_resolution}bp resolution", "path": str(converted_file.resolve()) }, { "description": "Track metadata", "path": str(metadata_file.resolve()) } ] }