Spaces:
Sleeping
Sleeping
| from datetime import date | |
| from typing import Optional | |
| from huggingface_hub import hf_hub_download, list_repo_files | |
| import pandas as pd | |
| from pandas import DataFrame | |
| class DatasetData: | |
| __repo_id: str = "Ya-Alex/anki-addons" | |
| __revision: str = "main" | |
| def __init__(self): | |
| self.parquet_files: Optional[dict[date, DataFrame]] = None | |
| def get_parquet_dict(self) -> dict[date, DataFrame]: | |
| if self.parquet_files is None: | |
| parquet_files_paths: dict[date, str] = self.__list_history_parquet_files() | |
| self.parquet_files = {day: self.__read_parquet(file) for day, file in parquet_files_paths.items()} | |
| return self.parquet_files | |
| def __list_history_parquet_files(self) -> dict[date, str]: | |
| all_files: list[str] = list_repo_files(self.__repo_id, repo_type="dataset", revision=self.__revision) | |
| history_files: list[str] = [file for file in all_files if file.startswith("history")] | |
| parquet_files: list[str] = [file for file in history_files if file.endswith("data.parquet")] | |
| parquet_file_dict: dict[date, str] = {date.fromisoformat(file.split("/")[1]): file for file in parquet_files} | |
| return parquet_file_dict | |
| def __read_parquet(self, parquet_file: str) -> DataFrame: | |
| return pd.read_parquet( | |
| hf_hub_download(repo_id=self.__repo_id, filename=parquet_file, repo_type="dataset", | |
| revision=self.__revision)) | |