Spaces:
Sleeping
Sleeping
| import pandas as pd | |
| import os | |
| import pickle | |
| import logging | |
| # Configure logging for this module | |
| logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') | |
| CACHE_DIR = "/tmp/.cache" | |
| # Create the cache directory if it doesn't exist | |
| if not os.path.exists(CACHE_DIR): | |
| os.makedirs(CACHE_DIR) | |
| logging.info(f"Created cache directory: {CACHE_DIR}") | |
| def get_dataset_path(name): | |
| """Constructs the file path for a dataset pickle file.""" | |
| return os.path.join(CACHE_DIR, f"{name}.pkl") | |
| def get_model_results_path(name): | |
| """Constructs the file path for model results pickle file.""" | |
| return os.path.join(CACHE_DIR, f"{name}_model_results.pkl") | |
| def store_dataset(name, dataframe): | |
| """Saves a DataFrame to a pickle file in the cache directory. | |
| Args: | |
| name (str): The name to associate with the dataset (used for filename). | |
| dataframe (pd.DataFrame): The DataFrame to be stored. | |
| """ | |
| path = get_dataset_path(name) | |
| try: | |
| dataframe.to_pickle(path) | |
| logging.info(f"Dataset '{name}' stored successfully at {path}") | |
| except Exception as e: | |
| logging.error(f"Error storing dataset '{name}' to {path}: {e}", exc_info=True) | |
| def get_dataset(name): | |
| """Loads a DataFrame from a pickle file in the cache directory. | |
| Args: | |
| name (str): The name of the dataset to retrieve. | |
| Returns: | |
| pd.DataFrame or None: The loaded DataFrame if found, otherwise None. | |
| """ | |
| path = get_dataset_path(name) | |
| if os.path.exists(path): | |
| try: | |
| df = pd.read_pickle(path) | |
| logging.info(f"Dataset '{name}' loaded successfully from {path}") | |
| return df | |
| except Exception as e: | |
| logging.error(f"Error loading dataset '{name}' from {path}: {e}", exc_info=True) | |
| return None | |
| logging.info(f"Dataset '{name}' not found at {path}") | |
| return None | |
| def store_model_results(name, model, y_test, y_pred, y_pred_proba, X_test): | |
| """Saves trained model, test data, predictions, and probabilities to a pickle file. | |
| Args: | |
| name (str): The name to associate with the model results. | |
| model: The trained model object. | |
| y_test (pd.Series): Actual target values from the test set. | |
| y_pred (np.array): Predicted target values for the test set. | |
| y_pred_proba (np.array, optional): Predicted probabilities for classification tasks. | |
| X_test (pd.DataFrame): Feature values from the test set. | |
| """ | |
| path = get_model_results_path(name) | |
| results = { | |
| "model": model, | |
| "y_test": y_test, | |
| "y_pred": y_pred, | |
| "y_pred_proba": y_pred_proba, | |
| "X_test": X_test | |
| } | |
| try: | |
| with open(path, 'wb') as f: | |
| pickle.dump(results, f) | |
| logging.info(f"Model results for '{name}' stored successfully at {path}") | |
| except Exception as e: | |
| logging.error(f"Error storing model results for '{name}' to {path}: {e}", exc_info=True) | |
| def get_model_results(name): | |
| """Loads trained model, test data, predictions, and probabilities from a pickle file. | |
| Args: | |
| name (str): The name of the model results to retrieve. | |
| Returns: | |
| dict or None: A dictionary containing model results if found, otherwise None. | |
| """ | |
| path = get_model_results_path(name) | |
| if os.path.exists(path): | |
| try: | |
| with open(path, 'rb') as f: | |
| results = pickle.load(f) | |
| logging.info(f"Model results for '{name}' loaded successfully from {path}") | |
| return results | |
| except Exception as e: | |
| logging.error(f"Error loading model results for '{name}' from {path}: {e}", exc_info=True) | |
| return None | |
| logging.info(f"Model results for '{name}' not found at {path}") | |
| return None | |