Spaces:
Sleeping
Sleeping
File size: 4,039 Bytes
31d5c57 f3aa8aa 31d5c57 11a087f 31d5c57 f3aa8aa 31d5c57 f3aa8aa 31d5c57 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 | import streamlit as st
import scanpy as sc
import pandas as pd
import logging
import os
from pathlib import Path
from typing import Optional
from huggingface_hub import hf_hub_download, snapshot_download
logger = logging.getLogger(__name__)
REPO_ID = 'Angione-Lab/spMetaTME-Atlas'
@st.cache_data(show_spinner=False)
def get_metadata():
"""Fetch and cache metadata from Hugging Face."""
try:
metadata_path = hf_hub_download(
repo_id=REPO_ID,
filename="sp_metabolic_metadata.csv",
repo_type="dataset"
)
return pd.read_csv(metadata_path)
except Exception as e:
logger.error(f"Error loading metadata: {e}")
return pd.DataFrame()
@st.cache_data(show_spinner=False)
def get_organ_stats(meta_df: pd.DataFrame):
"""Calculate summary statistics for organs from metadata."""
if meta_df.empty:
return pd.DataFrame()
# Check if necessary columns exist
if 'organ' not in meta_df.columns:
return pd.DataFrame()
# Try to find a column for reaction count
count_col = 'n_vars' if 'n_vars' in meta_df.columns else ('n_genes' if 'n_genes' in meta_df.columns else None)
# Basic aggregation
stats = meta_df.groupby('organ').agg(
sample_count=('id', 'count') if 'id' in meta_df.columns else ('dataset_title', 'count')
).reset_index()
# Add average reactions if column exists
if count_col:
avg_stats = meta_df.groupby('organ')[count_col].mean().reset_index()
avg_stats.columns = ['organ', 'avg_reactions']
stats = stats.merge(avg_stats, on='organ')
else:
stats['avg_reactions'] = 0
# Sort by sample count descending
stats = stats.sort_values('sample_count', ascending=False)
return stats
@st.cache_data(show_spinner=False)
def load_metabolic_flux_from_hf(filename: str):
"""
Load spatial metabolic flux data from Hugging Face Hub with caching.
"""
# Priority to local example data for faster dev cycle
example_path = os.path.join(os.getcwd(), "example_data", filename)
if os.path.exists(example_path):
try:
adata = sc.read_h5ad(example_path)
logger.info(f"Loaded {filename} from local example_data folder.")
return adata
except Exception as e:
logger.warning(f"Could not load local {filename}: {e}. Retrying HF.")
try:
local_path = hf_hub_download(
repo_id=REPO_ID,
filename=f"SM/{filename}",
repo_type="dataset"
)
adata = sc.read_h5ad(local_path)
return adata
except Exception as e:
logger.error(f"Error loading {filename}: {str(e)}")
return None
def download_metabolic_flux_from_hf(filename: str, local_dir: Optional[str] = None):
"""
Download spatial metabolic flux file from Hugging Face Hub to local directory.
"""
try:
if local_dir is None:
local_dir = os.path.expanduser("~/Downloads/spMetaTME-Atlas")
os.makedirs(local_dir, exist_ok=True)
snapshot_download(
repo_id=REPO_ID,
allow_patterns=[f"SM/{filename}"],
repo_type="dataset",
local_dir=local_dir
)
return local_dir
except Exception as e:
logger.error(f"Error downloading {filename}: {str(e)}")
return None
def process_upload(uploaded_file, data_type: str):
"""
Process uploaded file and return AnnData object.
"""
try:
import tempfile
# Save uploaded file to temp location
with tempfile.NamedTemporaryFile(delete=False, suffix=".h5ad") as tmp:
tmp.write(uploaded_file.getvalue())
temp_path = tmp.name
adata = sc.read_h5ad(temp_path)
# Clean up temp file
os.unlink(temp_path)
return adata
except Exception as e:
logger.error(f"Error loading {data_type} file: {str(e)}")
return None
|