File size: 4,039 Bytes
31d5c57
 
 
 
 
 
 
 
 
 
 
 
 
f3aa8aa
31d5c57
 
 
11a087f
 
 
 
 
 
31d5c57
 
 
 
f3aa8aa
31d5c57
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f3aa8aa
31d5c57
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
import streamlit as st
import scanpy as sc
import pandas as pd
import logging
import os
from pathlib import Path
from typing import Optional
from huggingface_hub import hf_hub_download, snapshot_download

logger = logging.getLogger(__name__)

REPO_ID = 'Angione-Lab/spMetaTME-Atlas'

@st.cache_data(show_spinner=False)
def get_metadata():
    """Fetch and cache metadata from Hugging Face."""
    try:
        metadata_path = hf_hub_download(
            repo_id=REPO_ID,
            filename="sp_metabolic_metadata.csv",
            repo_type="dataset"
        )
        return pd.read_csv(metadata_path)
    except Exception as e:
        logger.error(f"Error loading metadata: {e}")
        return pd.DataFrame()

@st.cache_data(show_spinner=False)
def get_organ_stats(meta_df: pd.DataFrame):
    """Calculate summary statistics for organs from metadata."""
    if meta_df.empty:
        return pd.DataFrame()
    
    # Check if necessary columns exist
    if 'organ' not in meta_df.columns:
        return pd.DataFrame()
        
    # Try to find a column for reaction count
    count_col = 'n_vars' if 'n_vars' in meta_df.columns else ('n_genes' if 'n_genes' in meta_df.columns else None)
    
    # Basic aggregation
    stats = meta_df.groupby('organ').agg(
        sample_count=('id', 'count') if 'id' in meta_df.columns else ('dataset_title', 'count')
    ).reset_index()
    
    # Add average reactions if column exists
    if count_col:
        avg_stats = meta_df.groupby('organ')[count_col].mean().reset_index()
        avg_stats.columns = ['organ', 'avg_reactions']
        stats = stats.merge(avg_stats, on='organ')
    else:
        stats['avg_reactions'] = 0
        
    # Sort by sample count descending
    stats = stats.sort_values('sample_count', ascending=False)
    return stats

@st.cache_data(show_spinner=False)
def load_metabolic_flux_from_hf(filename: str):
    """
    Load spatial metabolic flux data from Hugging Face Hub with caching.
    """
    # Priority to local example data for faster dev cycle
    example_path = os.path.join(os.getcwd(), "example_data", filename)
    if os.path.exists(example_path):
        try:
            adata = sc.read_h5ad(example_path)
            logger.info(f"Loaded {filename} from local example_data folder.")
            return adata
        except Exception as e:
            logger.warning(f"Could not load local {filename}: {e}. Retrying HF.")

    try:     
        local_path = hf_hub_download(
            repo_id=REPO_ID,
            filename=f"SM/{filename}",
            repo_type="dataset"
        )
        
        adata = sc.read_h5ad(local_path)
        return adata
    except Exception as e:
        logger.error(f"Error loading {filename}: {str(e)}")
        return None

def download_metabolic_flux_from_hf(filename: str, local_dir: Optional[str] = None):
    """
    Download spatial metabolic flux file from Hugging Face Hub to local directory.
    """
    try:
        if local_dir is None:
            local_dir = os.path.expanduser("~/Downloads/spMetaTME-Atlas")
        
        os.makedirs(local_dir, exist_ok=True)
        
        snapshot_download(
            repo_id=REPO_ID,
            allow_patterns=[f"SM/{filename}"],
            repo_type="dataset",
            local_dir=local_dir
        )
        return local_dir
    except Exception as e:
        logger.error(f"Error downloading {filename}: {str(e)}")
        return None

def process_upload(uploaded_file, data_type: str):
    """
    Process uploaded file and return AnnData object.
    """
    try:
        import tempfile
        # Save uploaded file to temp location
        with tempfile.NamedTemporaryFile(delete=False, suffix=".h5ad") as tmp:
            tmp.write(uploaded_file.getvalue())
            temp_path = tmp.name
        
        adata = sc.read_h5ad(temp_path)
        # Clean up temp file
        os.unlink(temp_path)
        return adata
    except Exception as e:
        logger.error(f"Error loading {data_type} file: {str(e)}")
        return None