| import json | |
| import os | |
| from typing import Tuple | |
| class FileProgressTracker: | |
| """Manage loading and storing latest completely processed file index | |
| This class save information required to continue processing in a file. | |
| The file structure will be: | |
| { | |
| [experiment]: { | |
| [self._file_ind_key]: int, | |
| [self._ending_ind_key]: int, | |
| [self._segment_id_key]: int | |
| } | |
| } | |
| """ | |
| def __init__(self, save_path: str, experiment: str): | |
| self.path = save_path | |
| self.experiment = experiment | |
| self._file_ind_key = "file_ind" | |
| self._ending_ind_key = "ending_ind" | |
| self._segment_id_key = "segment_id" | |
| self._completed_key = "is_completed" | |
| def _load_file(self) -> dict: | |
| """Load processing info from file | |
| Returns: | |
| A dictionary having structure as descripted in the class info | |
| """ | |
| data = {} | |
| if os.path.exists(self.path): | |
| with open(self.path) as f: | |
| data = json.load(f) | |
| if self.experiment not in data: | |
| data[self.experiment] = { | |
| self._file_ind_key: 0, | |
| self._ending_ind_key: 0, | |
| self._segment_id_key: -1, | |
| self._completed_key: False, | |
| } | |
| return data | |
| def _update_file(self, update_dict: dict) -> None: | |
| """Update specified keys in file""" | |
| data = self._load_file() | |
| data[self.experiment].update(update_dict) | |
| with open(self.path, "w+") as f: | |
| json.dump(data, f) | |
| def get_last_file_ind(self) -> Tuple[int, int, int]: | |
| """Get last file that was processed for this experiment | |
| Returns: | |
| A tuple containing file index, ending index in the file, and the segment number of the last processed file | |
| """ | |
| data = self._load_file() | |
| return ( | |
| data[self.experiment][self._file_ind_key], | |
| data[self.experiment][self._ending_ind_key], | |
| data[self.experiment][self._segment_id_key], | |
| ) | |
| def update_last_file_ind( | |
| self, file_ind: int, ending_ind: int, segment_id: int | |
| ) -> None: | |
| """Update last file processed info in this experiment without changing other info in file if necessary""" | |
| self._update_file( | |
| { | |
| self._file_ind_key: file_ind, | |
| self._ending_ind_key: ending_ind, | |
| self._segment_id_key: segment_id, | |
| } | |
| ) | |
| def mark_completion_status(self, completed: bool = True) -> None: | |
| self._update_file({self._completed_key: completed}) | |
| def is_completed(self) -> bool: | |
| data = self._load_file() | |
| return data[self.experiment].get(self._completed_key, False) | |
| def reset_process(self) -> None: | |
| """Reset file processing status""" | |
| self.mark_completion_status(completed=False) | |
| self.update_last_file_ind(0, 0, -1) | |