Spaces:
Sleeping
Sleeping
File size: 3,811 Bytes
aa68823 492f039 aa68823 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 |
import pandas as pd
import os
import pickle
import logging
# Configure logging for this module
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
CACHE_DIR = "/tmp/.cache"
# Create the cache directory if it doesn't exist
if not os.path.exists(CACHE_DIR):
os.makedirs(CACHE_DIR)
logging.info(f"Created cache directory: {CACHE_DIR}")
def get_dataset_path(name):
"""Constructs the file path for a dataset pickle file."""
return os.path.join(CACHE_DIR, f"{name}.pkl")
def get_model_results_path(name):
"""Constructs the file path for model results pickle file."""
return os.path.join(CACHE_DIR, f"{name}_model_results.pkl")
def store_dataset(name, dataframe):
"""Saves a DataFrame to a pickle file in the cache directory.
Args:
name (str): The name to associate with the dataset (used for filename).
dataframe (pd.DataFrame): The DataFrame to be stored.
"""
path = get_dataset_path(name)
try:
dataframe.to_pickle(path)
logging.info(f"Dataset '{name}' stored successfully at {path}")
except Exception as e:
logging.error(f"Error storing dataset '{name}' to {path}: {e}", exc_info=True)
def get_dataset(name):
"""Loads a DataFrame from a pickle file in the cache directory.
Args:
name (str): The name of the dataset to retrieve.
Returns:
pd.DataFrame or None: The loaded DataFrame if found, otherwise None.
"""
path = get_dataset_path(name)
if os.path.exists(path):
try:
df = pd.read_pickle(path)
logging.info(f"Dataset '{name}' loaded successfully from {path}")
return df
except Exception as e:
logging.error(f"Error loading dataset '{name}' from {path}: {e}", exc_info=True)
return None
logging.info(f"Dataset '{name}' not found at {path}")
return None
def store_model_results(name, model, y_test, y_pred, y_pred_proba, X_test):
"""Saves trained model, test data, predictions, and probabilities to a pickle file.
Args:
name (str): The name to associate with the model results.
model: The trained model object.
y_test (pd.Series): Actual target values from the test set.
y_pred (np.array): Predicted target values for the test set.
y_pred_proba (np.array, optional): Predicted probabilities for classification tasks.
X_test (pd.DataFrame): Feature values from the test set.
"""
path = get_model_results_path(name)
results = {
"model": model,
"y_test": y_test,
"y_pred": y_pred,
"y_pred_proba": y_pred_proba,
"X_test": X_test
}
try:
with open(path, 'wb') as f:
pickle.dump(results, f)
logging.info(f"Model results for '{name}' stored successfully at {path}")
except Exception as e:
logging.error(f"Error storing model results for '{name}' to {path}: {e}", exc_info=True)
def get_model_results(name):
"""Loads trained model, test data, predictions, and probabilities from a pickle file.
Args:
name (str): The name of the model results to retrieve.
Returns:
dict or None: A dictionary containing model results if found, otherwise None.
"""
path = get_model_results_path(name)
if os.path.exists(path):
try:
with open(path, 'rb') as f:
results = pickle.load(f)
logging.info(f"Model results for '{name}' loaded successfully from {path}")
return results
except Exception as e:
logging.error(f"Error loading model results for '{name}' from {path}: {e}", exc_info=True)
return None
logging.info(f"Model results for '{name}' not found at {path}")
return None
|