pangaeagpt / src /search /dataset_utils.py
dmpantiu's picture
Upload 22 files
754cec9 unverified
#src/search/dataset_utils.py
import os
import shutil
import pandas as pd
import logging
import streamlit as st
import pangaeapy.pandataset as pdataset
# Function to fetch dataset based on DOI
#@st.cache_data(ttl=3600)
def fetch_dataset(doi):
if doi in st.session_state.datasets_cache:
logging.debug("Dataset for DOI %s already in cache.", doi)
dataset, name = st.session_state.datasets_cache[doi]
st.session_state.dataset_dfs[doi] = dataset
st.session_state.dataset_names[doi] = name
return dataset, name
dataset_id = doi.split('.')[-1].strip(')')
try:
logging.debug("Fetching dataset for DOI %s with ID %s", doi, dataset_id)
ds = pdataset.PanDataSet(int(dataset_id))
logging.debug("Dataset fetched with title: %s", ds.title)
# Removed code that saves dataset to disk
st.session_state.datasets_cache[doi] = (ds.data, ds.title)
st.session_state.dataset_dfs[doi] = ds.data
st.session_state.dataset_names[doi] = ds.title
return ds.data, ds.title
except Exception as e:
logging.error("Error fetching dataset for DOI %s: %s", doi, e)
return None, None
# Function to fetch dataset details using pangaeapy
def fetch_dataset_details(doi):
try:
dataset = pdataset.PanDataSet(id=doi)
dataset.setMetadata()
abstract = getattr(dataset, 'abstract', "No description available") or "No description available"
param_dict = dataset.getParamDict()
short_names = param_dict.get('shortName', [])
parameters = ', '.join(short_names) + "..." if len(short_names) > 10 else ', '.join(short_names)
return abstract, parameters
except Exception as e:
logging.error(f"Error fetching dataset details for DOI {doi}: {e}")
return "No description available", "No parameters available"
# Conversion function
def convert_df_to_csv(df):
logging.debug("Converting DataFrame to CSV")
return df.to_csv().encode('utf-8')