feedbacks-scoring / backend /persistence.py
Matis Codjia
Scoring app
1d8c2e0
"""
Persistence utilities for datasets and scores
"""
import json
from pathlib import Path
from datetime import datetime
class SessionManager:
"""Gère la persistance des sessions de scoring"""
def __init__(self, data_dir="/app/data"):
self.data_dir = Path(data_dir)
self.data_dir.mkdir(exist_ok=True)
self.dataset_file = self.data_dir / "dataset.jsonl"
self.session_file = self.data_dir / "session.json"
def save_dataset(self, dataset, source_info=None):
"""
Sauvegarde le dataset en JSONL
Args:
dataset: Liste de dicts
source_info: Dict avec info sur la source (nom HF, fichier, etc.)
"""
# Sauvegarder le dataset
with open(self.dataset_file, 'w', encoding='utf-8') as f:
for item in dataset:
f.write(json.dumps(item, ensure_ascii=False) + '\n')
# Sauvegarder les métadonnées
metadata = {
'saved_at': datetime.now().isoformat(),
'total_items': len(dataset),
'source_info': source_info or {}
}
metadata_file = self.data_dir / "dataset_metadata.json"
with open(metadata_file, 'w', encoding='utf-8') as f:
json.dump(metadata, f, indent=2, ensure_ascii=False)
return True
def load_dataset(self):
"""
Charge le dataset sauvegardé
Returns:
Tuple (dataset, metadata) ou (None, None) si pas de dataset sauvegardé
"""
if not self.dataset_file.exists():
return None, None
# Charger le dataset
dataset = []
with open(self.dataset_file, 'r', encoding='utf-8') as f:
for line in f:
if line.strip():
dataset.append(json.loads(line))
# Charger les métadonnées
metadata_file = self.data_dir / "dataset_metadata.json"
metadata = None
if metadata_file.exists():
with open(metadata_file, 'r', encoding='utf-8') as f:
metadata = json.load(f)
return dataset, metadata
def save_session(self, feedback_scores, feedback_comments, scoring_index):
"""
Sauvegarde l'état de la session (scores, commentaires, position)
Args:
feedback_scores: Dict {idx: score}
feedback_comments: Dict {idx: comment}
scoring_index: Index actuel
"""
session_data = {
'feedback_scores': {str(k): v for k, v in feedback_scores.items()},
'feedback_comments': {str(k): v for k, v in feedback_comments.items()},
'scoring_index': scoring_index,
'last_saved': datetime.now().isoformat(),
'total_scored': len(feedback_scores)
}
with open(self.session_file, 'w', encoding='utf-8') as f:
json.dump(session_data, f, indent=2, ensure_ascii=False)
return True
def load_session(self):
"""
Charge l'état de la session sauvegardée
Returns:
Dict avec feedback_scores, feedback_comments, scoring_index
ou None si pas de session sauvegardée
"""
if not self.session_file.exists():
return None
with open(self.session_file, 'r', encoding='utf-8') as f:
session_data = json.load(f)
# Convertir les clés en int
session_data['feedback_scores'] = {
int(k): v for k, v in session_data['feedback_scores'].items()
}
session_data['feedback_comments'] = {
int(k): v for k, v in session_data['feedback_comments'].items()
}
return session_data
def has_saved_session(self):
"""Vérifie s'il existe une session sauvegardée"""
return self.dataset_file.exists() and self.session_file.exists()
def clear_session(self):
"""Supprime la session sauvegardée"""
if self.dataset_file.exists():
self.dataset_file.unlink()
if self.session_file.exists():
self.session_file.unlink()
metadata_file = self.data_dir / "dataset_metadata.json"
if metadata_file.exists():
metadata_file.unlink()
return True
def get_session_info(self):
"""
Récupère les informations sur la session sauvegardée
Returns:
Dict avec les infos ou None
"""
if not self.has_saved_session():
return None
dataset, dataset_metadata = self.load_dataset()
session_data = self.load_session()
if dataset and session_data:
return {
'dataset_size': len(dataset),
'total_scored': session_data['total_scored'],
'last_saved': session_data['last_saved'],
'scoring_index': session_data['scoring_index'],
'source_info': dataset_metadata.get('source_info', {}) if dataset_metadata else {}
}
return None