Spaces:
Sleeping
Sleeping
Switch to "main" branch
Browse files- src/dataset_data.py +3 -2
src/dataset_data.py
CHANGED
|
@@ -8,6 +8,7 @@ from pandas import DataFrame
|
|
| 8 |
|
| 9 |
class DatasetData:
|
| 10 |
__repo_id: str = "Ya-Alex/anki-addons"
|
|
|
|
| 11 |
|
| 12 |
def __init__(self):
|
| 13 |
self.parquet_files: Optional[dict[date, DataFrame]] = None
|
|
@@ -19,7 +20,7 @@ class DatasetData:
|
|
| 19 |
return self.parquet_files
|
| 20 |
|
| 21 |
def __list_history_parquet_files(self) -> dict[date, str]:
|
| 22 |
-
all_files: list[str] = list_repo_files(self.__repo_id, repo_type="dataset", revision=
|
| 23 |
history_files: list[str] = [file for file in all_files if file.startswith("history")]
|
| 24 |
parquet_files: list[str] = [file for file in history_files if file.endswith("data.parquet")]
|
| 25 |
parquet_file_dict: dict[date, str] = {date.fromisoformat(file.split("/")[1]): file for file in parquet_files}
|
|
@@ -28,4 +29,4 @@ class DatasetData:
|
|
| 28 |
def __read_parquet(self, parquet_file: str) -> DataFrame:
|
| 29 |
return pd.read_parquet(
|
| 30 |
hf_hub_download(repo_id=self.__repo_id, filename=parquet_file, repo_type="dataset",
|
| 31 |
-
revision=
|
|
|
|
| 8 |
|
| 9 |
class DatasetData:
|
| 10 |
__repo_id: str = "Ya-Alex/anki-addons"
|
| 11 |
+
__revision: str = "main"
|
| 12 |
|
| 13 |
def __init__(self):
|
| 14 |
self.parquet_files: Optional[dict[date, DataFrame]] = None
|
|
|
|
| 20 |
return self.parquet_files
|
| 21 |
|
| 22 |
def __list_history_parquet_files(self) -> dict[date, str]:
|
| 23 |
+
all_files: list[str] = list_repo_files(self.__repo_id, repo_type="dataset", revision=self.__revision)
|
| 24 |
history_files: list[str] = [file for file in all_files if file.startswith("history")]
|
| 25 |
parquet_files: list[str] = [file for file in history_files if file.endswith("data.parquet")]
|
| 26 |
parquet_file_dict: dict[date, str] = {date.fromisoformat(file.split("/")[1]): file for file in parquet_files}
|
|
|
|
| 29 |
def __read_parquet(self, parquet_file: str) -> DataFrame:
|
| 30 |
return pd.read_parquet(
|
| 31 |
hf_hub_download(repo_id=self.__repo_id, filename=parquet_file, repo_type="dataset",
|
| 32 |
+
revision=self.__revision))
|