File size: 2,959 Bytes
a35137b | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 | import json
import os
from typing import Tuple
class FileProgressTracker:
"""Manage loading and storing latest completely processed file index
This class save information required to continue processing in a file.
The file structure will be:
{
[experiment]: {
[self._file_ind_key]: int,
[self._ending_ind_key]: int,
[self._segment_id_key]: int
}
}
"""
def __init__(self, save_path: str, experiment: str):
self.path = save_path
self.experiment = experiment
self._file_ind_key = "file_ind"
self._ending_ind_key = "ending_ind"
self._segment_id_key = "segment_id"
self._completed_key = "is_completed"
def _load_file(self) -> dict:
"""Load processing info from file
Returns:
A dictionary having structure as descripted in the class info
"""
data = {}
if os.path.exists(self.path):
with open(self.path) as f:
data = json.load(f)
if self.experiment not in data:
data[self.experiment] = {
self._file_ind_key: 0,
self._ending_ind_key: 0,
self._segment_id_key: -1,
self._completed_key: False,
}
return data
def _update_file(self, update_dict: dict) -> None:
"""Update specified keys in file"""
data = self._load_file()
data[self.experiment].update(update_dict)
with open(self.path, "w+") as f:
json.dump(data, f)
def get_last_file_ind(self) -> Tuple[int, int, int]:
"""Get last file that was processed for this experiment
Returns:
A tuple containing file index, ending index in the file, and the segment number of the last processed file
"""
data = self._load_file()
return (
data[self.experiment][self._file_ind_key],
data[self.experiment][self._ending_ind_key],
data[self.experiment][self._segment_id_key],
)
def update_last_file_ind(
self, file_ind: int, ending_ind: int, segment_id: int
) -> None:
"""Update last file processed info in this experiment without changing other info in file if necessary"""
self._update_file(
{
self._file_ind_key: file_ind,
self._ending_ind_key: ending_ind,
self._segment_id_key: segment_id,
}
)
def mark_completion_status(self, completed: bool = True) -> None:
self._update_file({self._completed_key: completed})
def is_completed(self) -> bool:
data = self._load_file()
return data[self.experiment].get(self._completed_key, False)
def reset_process(self) -> None:
"""Reset file processing status"""
self.mark_completion_status(completed=False)
self.update_last_file_ind(0, 0, -1)
|