#src/search/dataset_utils.py
import os
import shutil
import pandas as pd
import logging
import streamlit as st
import pangaeapy.pandataset as pdataset

# Function to fetch dataset based on DOI
#@st.cache_data(ttl=3600)
def fetch_dataset(doi):
    if doi in st.session_state.datasets_cache:
        logging.debug("Dataset for DOI %s already in cache.", doi)
        dataset, name = st.session_state.datasets_cache[doi]
        st.session_state.dataset_dfs[doi] = dataset
        st.session_state.dataset_names[doi] = name
        return dataset, name

    dataset_id = doi.split('.')[-1].strip(')')
    try:
        logging.debug("Fetching dataset for DOI %s with ID %s", doi, dataset_id)
        ds = pdataset.PanDataSet(int(dataset_id))
        logging.debug("Dataset fetched with title: %s", ds.title)

        # Removed code that saves dataset to disk

        st.session_state.datasets_cache[doi] = (ds.data, ds.title)
        st.session_state.dataset_dfs[doi] = ds.data
        st.session_state.dataset_names[doi] = ds.title
        return ds.data, ds.title
    except Exception as e:
        logging.error("Error fetching dataset for DOI %s: %s", doi, e)
        return None, None


# Function to fetch dataset details using pangaeapy
def fetch_dataset_details(doi):
    try:
        dataset = pdataset.PanDataSet(id=doi)
        dataset.setMetadata()
        abstract = getattr(dataset, 'abstract', "No description available") or "No description available"
        param_dict = dataset.getParamDict()
        short_names = param_dict.get('shortName', [])
        parameters = ', '.join(short_names) + "..." if len(short_names) > 10 else ', '.join(short_names)

        return abstract, parameters

    except Exception as e:
        logging.error(f"Error fetching dataset details for DOI {doi}: {e}")
        return "No description available", "No parameters available"

# Conversion function
def convert_df_to_csv(df):
    logging.debug("Converting DataFrame to CSV")
    return df.to_csv().encode('utf-8')