Spaces:
Sleeping
Sleeping
| #src/search/dataset_utils.py | |
| import os | |
| import shutil | |
| import pandas as pd | |
| import logging | |
| import streamlit as st | |
| import pangaeapy.pandataset as pdataset | |
| # Function to fetch dataset based on DOI | |
| #@st.cache_data(ttl=3600) | |
| def fetch_dataset(doi): | |
| if doi in st.session_state.datasets_cache: | |
| logging.debug("Dataset for DOI %s already in cache.", doi) | |
| dataset, name = st.session_state.datasets_cache[doi] | |
| st.session_state.dataset_dfs[doi] = dataset | |
| st.session_state.dataset_names[doi] = name | |
| return dataset, name | |
| dataset_id = doi.split('.')[-1].strip(')') | |
| try: | |
| logging.debug("Fetching dataset for DOI %s with ID %s", doi, dataset_id) | |
| ds = pdataset.PanDataSet(int(dataset_id)) | |
| logging.debug("Dataset fetched with title: %s", ds.title) | |
| # Removed code that saves dataset to disk | |
| st.session_state.datasets_cache[doi] = (ds.data, ds.title) | |
| st.session_state.dataset_dfs[doi] = ds.data | |
| st.session_state.dataset_names[doi] = ds.title | |
| return ds.data, ds.title | |
| except Exception as e: | |
| logging.error("Error fetching dataset for DOI %s: %s", doi, e) | |
| return None, None | |
| # Function to fetch dataset details using pangaeapy | |
| def fetch_dataset_details(doi): | |
| try: | |
| dataset = pdataset.PanDataSet(id=doi) | |
| dataset.setMetadata() | |
| abstract = getattr(dataset, 'abstract', "No description available") or "No description available" | |
| param_dict = dataset.getParamDict() | |
| short_names = param_dict.get('shortName', []) | |
| parameters = ', '.join(short_names) + "..." if len(short_names) > 10 else ', '.join(short_names) | |
| return abstract, parameters | |
| except Exception as e: | |
| logging.error(f"Error fetching dataset details for DOI {doi}: {e}") | |
| return "No description available", "No parameters available" | |
| # Conversion function | |
| def convert_df_to_csv(df): | |
| logging.debug("Converting DataFrame to CSV") | |
| return df.to_csv().encode('utf-8') | |