DeltaVenom's picture
add project
cff1a2a
import json
import os
import hashlib
from typing import Dict
class IngestionState:
def __init__(self, state_file: str = "ingestion/state.json"):
self.state_file = state_file
self.state = self._load_state()
def _load_state(self) -> Dict[str, str]:
if os.path.exists(self.state_file):
with open(self.state_file, 'r') as f:
return json.load(f)
return {}
def save_state(self):
with open(self.state_file, 'w') as f:
json.dump(self.state, f, indent=4)
def get_file_hash(self, file_path: str) -> str:
hasher = hashlib.md5()
with open(file_path, 'rb') as f:
for chunk in iter(lambda: f.read(4096), b""):
hasher.update(chunk)
return hasher.hexdigest()
def is_file_changed(self, file_path: str) -> bool:
current_hash = self.get_file_hash(file_path)
last_hash = self.state.get(file_path)
return current_hash != last_hash
def update_file(self, file_path: str):
self.state[file_path] = self.get_file_hash(file_path)