Spaces:

Paper2Agent
/

alphagenome_mcp

Running

App Files Files Community

alphagenome_mcp / tools /quick_start.py

Paper2Agent

Upload 10 files

0cdac39 verified 3 months ago

raw

history blame contribute delete

26.8 kB

	"""
	AlphaGenome Quick Start tutorial tools for DNA sequence analysis and prediction.

	This MCP Server provides 6 tools:
	1. predict_dna_sequence: Predict genomic tracks from DNA sequence
	2. predict_genome_interval: Predict genomic tracks for reference genome intervals
	3. predict_variant_effects: Predict and visualize genetic variant effects
	4. score_variant_effect: Score genetic variant effects using variant scorers
	5. ism_analysis: Perform in silico mutagenesis analysis with sequence logos
	6. mouse_predictions: Make predictions for mouse sequences and intervals

	All tools extracted from `https://github.com/google-deepmind/alphagenome/tree/main/colabs/quick_start.ipynb`.
	"""

	# Standard imports
	from typing import Annotated, Literal, Any
	import pandas as pd
	import numpy as np
	from pathlib import Path
	import os
	from fastmcp import FastMCP
	from datetime import datetime

	# AlphaGenome imports
	from alphagenome import colab_utils
	from alphagenome.data import gene_annotation
	from alphagenome.data import genome
	from alphagenome.data import transcript as transcript_utils
	from alphagenome.interpretation import ism
	from alphagenome.models import dna_client
	from alphagenome.models import variant_scorers
	from alphagenome.visualization import plot_components
	import matplotlib.pyplot as plt

	# Base persistent directory (HF Spaces guarantees /data is writable & persistent)
	BASE_DIR = Path("/data")

	DEFAULT_INPUT_DIR = BASE_DIR / "tmp_inputs"
	DEFAULT_OUTPUT_DIR = BASE_DIR / "tmp_outputs"

	INPUT_DIR = Path(os.environ.get("QUICK_START_INPUT_DIR", DEFAULT_INPUT_DIR))
	OUTPUT_DIR = Path(os.environ.get("QUICK_START_OUTPUT_DIR", DEFAULT_OUTPUT_DIR))

	# Ensure directories exist
	INPUT_DIR.mkdir(parents=True, exist_ok=True)
	OUTPUT_DIR.mkdir(parents=True, exist_ok=True)

	# Timestamp for unique outputs
	timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")

	# MCP server instance
	quick_start_mcp = FastMCP(name="quick_start")

	@quick_start_mcp.tool
	def predict_dna_sequence(
	api_key: Annotated[str, "AlphaGenome API key for authentication"],
	sequence: Annotated[str, "DNA sequence to analyze (will be center-padded to valid length)"] = 'GATTACA',
	sequence_length: Annotated[Literal["2KB", "16KB", "100KB", "500KB", "1MB"], "Model input sequence length"] = "2KB",
	output_types: Annotated[list[str], "List of output types to predict (e.g. ['DNASE', 'CAGE', 'RNA_SEQ'])"] = ["DNASE"],
	ontology_terms: Annotated[list[str], "List of ontology terms for tissues/cell types (e.g. ['UBERON:0002048'])"] = ["UBERON:0002048"],
	out_prefix: Annotated[str \| None, "Output file prefix"] = None,
	) -> dict:
	"""
	Predict genomic tracks from a DNA sequence using AlphaGenome model.
	Input is DNA sequence string and output is track predictions with metadata tables.
	"""
	# Set up output prefix
	if out_prefix is None:
	out_prefix = f"predict_dna_sequence_{timestamp}"

	# Create DNA model
	dna_model = dna_client.create(api_key)

	# Convert sequence length to client constant
	length_map = {
	"2KB": dna_client.SEQUENCE_LENGTH_2KB,
	"16KB": dna_client.SEQUENCE_LENGTH_16KB,
	"100KB": dna_client.SEQUENCE_LENGTH_100KB,
	"500KB": dna_client.SEQUENCE_LENGTH_500KB,
	"1MB": dna_client.SEQUENCE_LENGTH_1MB
	}
	target_length = length_map[sequence_length]

	# Pad sequence to valid length
	padded_sequence = sequence.center(target_length, 'N')

	# Convert output types to client enums
	output_enums = []
	for output_type in output_types:
	output_enums.append(getattr(dna_client.OutputType, output_type))

	# Make prediction
	output = dna_model.predict_sequence(
	sequence=padded_sequence,
	requested_outputs=output_enums,
	ontology_terms=ontology_terms,
	)

	artifacts = []

	# Save track data and metadata for each output type
	for output_type in output_types:
	track_data = getattr(output, output_type.lower())

	# Save values
	values_file = OUTPUT_DIR / f"{out_prefix}_{output_type.lower()}_values.csv"
	pd.DataFrame(track_data.values).to_csv(values_file, index=False)
	artifacts.append({
	"description": f"{output_type} prediction values",
	"path": str(values_file.resolve())
	})

	# Save metadata
	metadata_file = OUTPUT_DIR / f"{out_prefix}_{output_type.lower()}_metadata.csv"
	track_data.metadata.to_csv(metadata_file, index=False)
	artifacts.append({
	"description": f"{output_type} track metadata",
	"path": str(metadata_file.resolve())
	})

	return {
	"message": f"DNA sequence predictions completed for {len(output_types)} output types",
	"reference": "https://github.com/google-deepmind/alphagenome/tree/main/colabs/quick_start.ipynb",
	"artifacts": artifacts
	}

	@quick_start_mcp.tool
	def predict_genome_interval(
	api_key: Annotated[str, "AlphaGenome API key for authentication"],
	chromosome: Annotated[str, "Chromosome name (e.g. 'chr19')"] = "chr19",
	start_position: Annotated[int, "Start position on chromosome"] = 40991281,
	end_position: Annotated[int, "End position on chromosome"] = 41018398,
	strand: Annotated[Literal["+", "-", "."], "Strand orientation"] = "+",
	sequence_length: Annotated[Literal["2KB", "16KB", "100KB", "500KB", "1MB"], "Model input sequence length"] = "1MB",
	output_types: Annotated[list[str], "List of output types to predict (e.g. ['RNA_SEQ'])"] = ["RNA_SEQ"],
	ontology_terms: Annotated[list[str], "List of ontology terms for tissues/cell types"] = ["UBERON:0001114"],
	gene_symbol: Annotated[str \| None, "Gene symbol to center interval on (overrides coordinates)"] = "CYP2B6",
	out_prefix: Annotated[str \| None, "Output file prefix"] = None,
	) -> dict:
	"""
	Predict genomic tracks for a reference genome interval with transcript visualization.
	Input is genomic coordinates or gene symbol and output is prediction plot and metadata.
	"""
	# Set up output prefix
	if out_prefix is None:
	out_prefix = f"predict_genome_interval_{timestamp}"

	# Create DNA model
	dna_model = dna_client.create(api_key)

	# Load GTF file for gene annotation
	gtf = pd.read_feather(
	'https://storage.googleapis.com/alphagenome/reference/gencode/'
	'hg38/gencode.v46.annotation.gtf.gz.feather'
	)

	# Set up transcript extractors
	gtf_transcripts = gene_annotation.filter_protein_coding(gtf)
	gtf_transcripts = gene_annotation.filter_to_longest_transcript(gtf_transcripts)
	transcript_extractor = transcript_utils.TranscriptExtractor(gtf_transcripts)

	# Create interval - use gene symbol if provided, otherwise coordinates
	if gene_symbol:
	interval = gene_annotation.get_gene_interval(gtf, gene_symbol=gene_symbol)
	else:
	interval = genome.Interval(chromosome, start_position, end_position, strand)

	# Resize to model-compatible length
	length_map = {
	"2KB": dna_client.SEQUENCE_LENGTH_2KB,
	"16KB": dna_client.SEQUENCE_LENGTH_16KB,
	"100KB": dna_client.SEQUENCE_LENGTH_100KB,
	"500KB": dna_client.SEQUENCE_LENGTH_500KB,
	"1MB": dna_client.SEQUENCE_LENGTH_1MB
	}
	interval = interval.resize(length_map[sequence_length])

	# Convert output types to client enums
	output_enums = []
	for output_type in output_types:
	output_enums.append(getattr(dna_client.OutputType, output_type))

	# Make prediction
	output = dna_model.predict_interval(
	interval=interval,
	requested_outputs=output_enums,
	ontology_terms=ontology_terms,
	)

	# Extract transcripts for visualization
	longest_transcripts = transcript_extractor.extract(interval)

	artifacts = []

	# Save metadata for each output type
	for output_type in output_types:
	track_data = getattr(output, output_type.lower())

	# Save metadata
	metadata_file = OUTPUT_DIR / f"{out_prefix}_{output_type.lower()}_metadata.csv"
	track_data.metadata.to_csv(metadata_file, index=False)
	artifacts.append({
	"description": f"{output_type} track metadata",
	"path": str(metadata_file.resolve())
	})

	# Create visualization - full interval
	plt.figure(figsize=(15, 8))
	track_data = getattr(output, output_types[0].lower())
	plot_components.plot(
	components=[
	plot_components.TranscriptAnnotation(longest_transcripts),
	plot_components.Tracks(track_data),
	],
	interval=track_data.interval,
	)

	plot_file = OUTPUT_DIR / f"{out_prefix}_{output_types[0].lower()}_plot.png"
	plt.savefig(plot_file, dpi=300, bbox_inches='tight')
	plt.close()
	artifacts.append({
	"description": f"{output_types[0]} prediction plot",
	"path": str(plot_file.resolve())
	})

	# Create zoomed visualization
	plt.figure(figsize=(15, 8))
	plot_components.plot(
	components=[
	plot_components.TranscriptAnnotation(longest_transcripts, fig_height=0.1),
	plot_components.Tracks(track_data),
	],
	interval=track_data.interval.resize(2**15),
	)

	plot_zoomed_file = OUTPUT_DIR / f"{out_prefix}_{output_types[0].lower()}_plot_zoomed.png"
	plt.savefig(plot_zoomed_file, dpi=300, bbox_inches='tight')
	plt.close()
	artifacts.append({
	"description": f"{output_types[0]} prediction plot (zoomed)",
	"path": str(plot_zoomed_file.resolve())
	})

	return {
	"message": f"Genome interval predictions completed with {len(longest_transcripts)} transcripts",
	"reference": "https://github.com/google-deepmind/alphagenome/tree/main/colabs/quick_start.ipynb",
	"artifacts": artifacts
	}


	def predict_variant_effects(
	api_key: Annotated[str, "AlphaGenome API key for authentication"],
	chromosome: Annotated[str, "Chromosome name (e.g. 'chr22')"] = "chr22",
	position: Annotated[int, "Variant position"] = 36201698,
	reference_bases: Annotated[str, "Reference allele"] = "A",
	alternate_bases: Annotated[str, "Alternative allele"] = "C",
	sequence_length: Annotated[Literal["2KB", "16KB", "100KB", "500KB", "1MB"], "Model input sequence length"] = "1MB",
	output_types: Annotated[list[str], "List of output types to predict"] = ["RNA_SEQ"],
	ontology_terms: Annotated[list[str], "List of ontology terms for tissues/cell types"] = ["UBERON:0001157"],
	out_prefix: Annotated[str \| None, "Output file prefix"] = None,
	) -> dict:
	"""
	Predict and visualize genetic variant effects comparing REF vs ALT predictions.
	Input is variant coordinates and output is overlaid REF/ALT visualization plot.
	"""
	# Set up output prefix
	if out_prefix is None:
	out_prefix = f"predict_variant_effects_{timestamp}"

	# Create DNA model
	dna_model = dna_client.create(api_key)

	# Create variant object
	variant = genome.Variant(
	chromosome=chromosome,
	position=position,
	reference_bases=reference_bases,
	alternate_bases=alternate_bases,
	)

	# Create interval from variant
	length_map = {
	"2KB": dna_client.SEQUENCE_LENGTH_2KB,
	"16KB": dna_client.SEQUENCE_LENGTH_16KB,
	"100KB": dna_client.SEQUENCE_LENGTH_100KB,
	"500KB": dna_client.SEQUENCE_LENGTH_500KB,
	"1MB": dna_client.SEQUENCE_LENGTH_1MB
	}
	interval = variant.reference_interval.resize(length_map[sequence_length])

	# Convert output types to client enums
	output_enums = []
	for output_type in output_types:
	output_enums.append(getattr(dna_client.OutputType, output_type))

	# Make variant prediction
	variant_output = dna_model.predict_variant(
	interval=interval,
	variant=variant,
	requested_outputs=output_enums,
	ontology_terms=ontology_terms,
	)

	# Load GTF for transcript annotation
	gtf = pd.read_feather(
	'https://storage.googleapis.com/alphagenome/reference/gencode/'
	'hg38/gencode.v46.annotation.gtf.gz.feather'
	)
	gtf_transcripts = gene_annotation.filter_protein_coding(gtf)
	gtf_transcripts = gene_annotation.filter_to_longest_transcript(gtf_transcripts)
	transcript_extractor = transcript_utils.TranscriptExtractor(gtf_transcripts)
	longest_transcripts = transcript_extractor.extract(interval)

	artifacts = []

	# Create overlaid REF vs ALT visualization
	plt.figure(figsize=(15, 8))
	ref_track_data = getattr(variant_output.reference, output_types[0].lower())
	alt_track_data = getattr(variant_output.alternate, output_types[0].lower())

	plot_components.plot(
	[
	plot_components.TranscriptAnnotation(longest_transcripts),
	plot_components.OverlaidTracks(
	tdata={
	'REF': ref_track_data,
	'ALT': alt_track_data,
	},
	colors={'REF': 'dimgrey', 'ALT': 'red'},
	),
	],
	interval=ref_track_data.interval.resize(2**15),
	# Annotate the location of the variant as a vertical line
	annotations=[plot_components.VariantAnnotation([variant], alpha=0.8)],
	)

	plot_file = OUTPUT_DIR / f"{out_prefix}_{output_types[0].lower()}_variant_plot.png"
	plt.savefig(plot_file, dpi=300, bbox_inches='tight')
	plt.close()
	artifacts.append({
	"description": f"{output_types[0]} REF vs ALT comparison plot",
	"path": str(plot_file.resolve())
	})

	return {
	"message": f"Variant effect predictions completed for {chromosome}:{position}:{reference_bases}>{alternate_bases}",
	"reference": "https://github.com/google-deepmind/alphagenome/tree/main/colabs/quick_start.ipynb",
	"artifacts": artifacts
	}


	def score_variant_effect(
	api_key: Annotated[str, "AlphaGenome API key for authentication"],
	chromosome: Annotated[str, "Chromosome name (e.g. 'chr22')"] = "chr22",
	position: Annotated[int, "Variant position"] = 36201698,
	reference_bases: Annotated[str, "Reference allele"] = "A",
	alternate_bases: Annotated[str, "Alternative allele"] = "C",
	sequence_length: Annotated[Literal["2KB", "16KB", "100KB", "500KB", "1MB"], "Model input sequence length"] = "1MB",
	scorer_type: Annotated[Literal["RNA_SEQ", "DNASE", "CAGE", "ATAC"], "Variant scorer type to use"] = "RNA_SEQ",
	out_prefix: Annotated[str \| None, "Output file prefix"] = None,
	) -> dict:
	"""
	Score genetic variant effects using recommended variant scorers and produce tidy scores.
	Input is variant coordinates and output is variant scores table with genes and tracks.
	"""
	# Set up output prefix
	if out_prefix is None:
	out_prefix = f"score_variant_effect_{timestamp}"

	# Create DNA model
	dna_model = dna_client.create(api_key)

	# Create variant object
	variant = genome.Variant(
	chromosome=chromosome,
	position=position,
	reference_bases=reference_bases,
	alternate_bases=alternate_bases,
	)

	# Create interval from variant
	length_map = {
	"2KB": dna_client.SEQUENCE_LENGTH_2KB,
	"16KB": dna_client.SEQUENCE_LENGTH_16KB,
	"100KB": dna_client.SEQUENCE_LENGTH_100KB,
	"500KB": dna_client.SEQUENCE_LENGTH_500KB,
	"1MB": dna_client.SEQUENCE_LENGTH_1MB
	}
	interval = variant.reference_interval.resize(length_map[sequence_length])

	# Get recommended variant scorer
	variant_scorer = variant_scorers.RECOMMENDED_VARIANT_SCORERS[scorer_type]

	# Score variant
	variant_scores = dna_model.score_variant(
	interval=interval,
	variant=variant,
	variant_scorers=[variant_scorer]
	)

	artifacts = []

	# Extract first scorer results
	scores_adata = variant_scores[0]

	# Save gene metadata (obs)
	genes_file = OUTPUT_DIR / f"{out_prefix}_{scorer_type}_genes.csv"
	scores_adata.obs.to_csv(genes_file, index=True)
	artifacts.append({
	"description": f"{scorer_type} gene metadata",
	"path": str(genes_file.resolve())
	})

	# Save track metadata (var)
	tracks_file = OUTPUT_DIR / f"{out_prefix}_{scorer_type}_tracks.csv"
	scores_adata.var.to_csv(tracks_file, index=True)
	artifacts.append({
	"description": f"{scorer_type} track metadata",
	"path": str(tracks_file.resolve())
	})

	# Save raw scores matrix
	raw_scores_file = OUTPUT_DIR / f"{out_prefix}_{scorer_type}_raw_scores.csv"
	pd.DataFrame(scores_adata.X,
	index=scores_adata.obs.index,
	columns=scores_adata.var.index).to_csv(raw_scores_file, index=True)
	artifacts.append({
	"description": f"{scorer_type} raw scores matrix",
	"path": str(raw_scores_file.resolve())
	})

	# Create tidy scores dataframe
	tidy_scores_df = variant_scorers.tidy_scores([scores_adata], match_gene_strand=True)

	# Save tidy scores
	tidy_file = OUTPUT_DIR / f"{out_prefix}_{scorer_type}_tidy_scores.csv"
	tidy_scores_df.to_csv(tidy_file, index=False)
	artifacts.append({
	"description": f"{scorer_type} tidy scores table",
	"path": str(tidy_file.resolve())
	})

	return {
	"message": f"Variant scoring completed: {scores_adata.X.shape[0]} genes × {scores_adata.X.shape[1]} tracks",
	"reference": "https://github.com/google-deepmind/alphagenome/tree/main/colabs/quick_start.ipynb",
	"artifacts": artifacts
	}

	@quick_start_mcp.tool
	def ism_analysis(
	api_key: Annotated[str, "AlphaGenome API key for authentication"],
	chromosome: Annotated[str, "Chromosome for ISM analysis"] = "chr20",
	start_position: Annotated[int, "Start position for sequence context"] = 3753000,
	end_position: Annotated[int, "End position for sequence context"] = 3753400,
	sequence_length: Annotated[Literal["2KB", "16KB", "100KB", "500KB", "1MB"], "Model input sequence length"] = "2KB",
	ism_width: Annotated[int, "Width of region to mutate systematically"] = 256,
	output_type: Annotated[Literal["DNASE", "RNA_SEQ", "CAGE", "ATAC"], "Output type for scoring variants"] = "DNASE",
	mask_width: Annotated[int, "Width of center mask for scoring"] = 501,
	target_cell_line: Annotated[str, "Ontology term for specific cell line/tissue"] = "EFO:0002067",
	out_prefix: Annotated[str \| None, "Output file prefix"] = None,
	) -> dict:
	"""
	Perform in silico mutagenesis analysis with sequence logo visualization of important regions.
	Input is genomic coordinates and output is ISM matrix and sequence logo plot.
	"""
	# Set up output prefix
	if out_prefix is None:
	out_prefix = f"ism_analysis_{timestamp}"

	# Create DNA model
	dna_model = dna_client.create(api_key)

	# Create sequence interval
	sequence_interval = genome.Interval(chromosome, start_position, end_position)
	length_map = {
	"2KB": dna_client.SEQUENCE_LENGTH_2KB,
	"16KB": dna_client.SEQUENCE_LENGTH_16KB,
	"100KB": dna_client.SEQUENCE_LENGTH_100KB,
	"500KB": dna_client.SEQUENCE_LENGTH_500KB,
	"1MB": dna_client.SEQUENCE_LENGTH_1MB
	}
	sequence_interval = sequence_interval.resize(length_map[sequence_length])

	# Create ISM interval (region to mutate)
	ism_interval = sequence_interval.resize(ism_width)

	# Create variant scorer
	output_enum = getattr(dna_client.OutputType, output_type)
	variant_scorer = variant_scorers.CenterMaskScorer(
	requested_output=output_enum,
	width=mask_width,
	aggregation_type=variant_scorers.AggregationType.DIFF_MEAN,
	)

	# Score all ISM variants
	variant_scores = dna_model.score_ism_variants(
	interval=sequence_interval,
	ism_interval=ism_interval,
	variant_scorers=[variant_scorer],
	)

	# Extract scores for target cell line/tissue
	def extract_target_scores(adata):
	values = adata.X[:, adata.var['ontology_curie'] == target_cell_line]
	if values.size == 0:
	# If target not found, use first available track
	values = adata.X[:, 0:1]
	assert values.size >= 1
	return values.flatten()[0]

	# Create ISM matrix
	ism_result = ism.ism_matrix(
	[extract_target_scores(x[0]) for x in variant_scores],
	variants=[v[0].uns['variant'] for v in variant_scores],
	)

	artifacts = []

	# Save ISM matrix
	ism_matrix_file = OUTPUT_DIR / f"{out_prefix}_ism_matrix.csv"
	pd.DataFrame(ism_result).to_csv(ism_matrix_file, index=False)
	artifacts.append({
	"description": "ISM contribution matrix",
	"path": str(ism_matrix_file.resolve())
	})

	# Create sequence logo plot
	plt.figure(figsize=(35, 6))
	plot_components.plot(
	[
	plot_components.SeqLogo(
	scores=ism_result,
	scores_interval=ism_interval,
	ylabel=f'ISM {target_cell_line} {output_type}',
	)
	],
	interval=ism_interval,
	fig_width=35,
	)

	logo_file = OUTPUT_DIR / f"{out_prefix}_sequence_logo.png"
	plt.savefig(logo_file, dpi=300, bbox_inches='tight')
	plt.close()
	artifacts.append({
	"description": "ISM sequence logo plot",
	"path": str(logo_file.resolve())
	})

	return {
	"message": f"ISM analysis completed: {len(variant_scores)} variants scored ({ism_width} positions)",
	"reference": "https://github.com/google-deepmind/alphagenome/tree/main/colabs/quick_start.ipynb",
	"artifacts": artifacts
	}


	def mouse_predictions(
	api_key: Annotated[str, "AlphaGenome API key for authentication"],
	sequence: Annotated[str \| None, "DNA sequence for sequence prediction"] = 'GATTACA',
	chromosome: Annotated[str \| None, "Mouse chromosome for interval prediction"] = "chr1",
	start_position: Annotated[int \| None, "Start position for interval prediction"] = 3000000,
	end_position: Annotated[int \| None, "End position for interval prediction"] = 3000001,
	prediction_type: Annotated[Literal["sequence", "interval"], "Type of prediction to perform"] = "sequence",
	sequence_length: Annotated[Literal["2KB", "16KB", "100KB", "500KB", "1MB"], "Model input sequence length"] = "2KB",
	output_types: Annotated[list[str], "List of output types to predict"] = ["DNASE"],
	ontology_terms: Annotated[list[str], "List of ontology terms for mouse tissues"] = ["UBERON:0002048"],
	out_prefix: Annotated[str \| None, "Output file prefix"] = None,
	) -> dict:
	"""
	Make predictions for mouse sequences and genomic intervals using MUS_MUSCULUS organism.
	Input is mouse DNA sequence or coordinates and output is prediction metadata tables.
	"""
	# Set up output prefix
	if out_prefix is None:
	out_prefix = f"mouse_predictions_{timestamp}"

	# Create DNA model
	dna_model = dna_client.create(api_key)

	# Convert sequence length to client constant
	length_map = {
	"2KB": dna_client.SEQUENCE_LENGTH_2KB,
	"16KB": dna_client.SEQUENCE_LENGTH_16KB,
	"100KB": dna_client.SEQUENCE_LENGTH_100KB,
	"500KB": dna_client.SEQUENCE_LENGTH_500KB,
	"1MB": dna_client.SEQUENCE_LENGTH_1MB
	}
	target_length = length_map[sequence_length]

	# Convert output types to client enums
	output_enums = []
	for output_type in output_types:
	output_enums.append(getattr(dna_client.OutputType, output_type))

	artifacts = []

	if prediction_type == "sequence":
	# Sequence prediction for mouse
	if sequence is None:
	raise ValueError("sequence must be provided for sequence prediction")

	# Pad sequence to valid length
	padded_sequence = sequence.center(target_length, 'N')

	# Make mouse sequence prediction
	output = dna_model.predict_sequence(
	sequence=padded_sequence,
	organism=dna_client.Organism.MUS_MUSCULUS,
	requested_outputs=output_enums,
	ontology_terms=ontology_terms,
	)

	# Save results for each output type
	for output_type in output_types:
	track_data = getattr(output, output_type.lower())

	# Save values
	values_file = OUTPUT_DIR / f"{out_prefix}_{output_type.lower()}_values.csv"
	pd.DataFrame(track_data.values).to_csv(values_file, index=False)
	artifacts.append({
	"description": f"Mouse {output_type} prediction values",
	"path": str(values_file.resolve())
	})

	# Save metadata
	metadata_file = OUTPUT_DIR / f"{out_prefix}_{output_type.lower()}_metadata.csv"
	track_data.metadata.to_csv(metadata_file, index=False)
	artifacts.append({
	"description": f"Mouse {output_type} track metadata",
	"path": str(metadata_file.resolve())
	})

	elif prediction_type == "interval":
	# Interval prediction for mouse
	if chromosome is None or start_position is None or end_position is None:
	raise ValueError("chromosome, start_position, and end_position must be provided for interval prediction")

	# Create mouse interval
	interval = genome.Interval(chromosome, start_position, end_position).resize(target_length)

	# Make mouse interval prediction
	output = dna_model.predict_interval(
	interval=interval,
	organism=dna_client.Organism.MUS_MUSCULUS,
	requested_outputs=output_enums,
	ontology_terms=ontology_terms,
	)

	# Save results for each output type
	for output_type in output_types:
	track_data = getattr(output, output_type.lower())

	# Save metadata
	metadata_file = OUTPUT_DIR / f"{out_prefix}_{output_type.lower()}_metadata.csv"
	track_data.metadata.to_csv(metadata_file, index=False)
	artifacts.append({
	"description": f"Mouse {output_type} interval metadata",
	"path": str(metadata_file.resolve())
	})

	return {
	"message": f"Mouse {prediction_type} predictions completed for {len(output_types)} output types",
	"reference": "https://github.com/google-deepmind/alphagenome/tree/main/colabs/quick_start.ipynb",
	"artifacts": artifacts
	}