Spaces:
Running
Running
File size: 1,571 Bytes
9eecab5 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 | class DataFrameStore:
"""
A simple in-memory manager for storing and accessing multiple datasets.
Each dataset is stored with:
- original: the untouched DataFrame
- working: a copy used for transformations
- schema: metadata describing the dataset structure
"""
def __init__(self):
"""Initialize an empty dataset store."""
self.datasets = {}
def add_dataset(self, name, df, schema):
"""
Add a dataset to the store.
Parameters
----------
name : str
Unique name used to identify the dataset.
df : pandas.DataFrame
The DataFrame to store.
schema : dict
Metadata describing column types or structure.
"""
if name in self.datasets:
raise ValueError(f"Dataset '{name}' already loaded")
self.datasets[name] = {
"original": df,
"working": df.copy(),
"schema": schema
}
def list_datasets(self):
"""
Return a list of all dataset names currently stored.
"""
return list(self.datasets.keys())
def get_dataset(self, name):
"""
Retrieve the dataset dictionary for a given dataset name.
Returns
-------
dict
Contains 'original', 'working', and 'schema'.
"""
return self.datasets.get(name)
def get_schema(self, name):
"""
Get the schema metadata for a specific dataset.
"""
return self.datasets[name]["schema"] |