Spaces:

matis35
/

feedbacks-scoring

Sleeping

feedbacks-scoring / backend /annotator_config.py

Matis Codjia

Scoring app

1d8c2e0 about 1 month ago

8.22 kB

	"""
	Configuration et gestion des annotateurs
	Permet de définir qui annote quelle partie du dataset
	"""

	import json
	import os
	import streamlit as st
	from pathlib import Path


	# Configuration par défaut des annotateurs
	DEFAULT_ANNOTATOR_CONFIG = {
	"annotator_1": {
	"name": "Expert A",
	"start_idx": 0,
	"end_idx": 100,
	"description": "Première portion du dataset"
	},
	"annotator_2": {
	"name": "Expert B",
	"start_idx": 100,
	"end_idx": 200,
	"description": "Deuxième portion du dataset"
	},
	"annotator_3": {
	"name": "Expert C",
	"start_idx": 200,
	"end_idx": 300,
	"description": "Troisième portion du dataset"
	},
	}


	def load_annotator_config():
	"""
	Charge la configuration des annotateurs.

	Ordre de priorité:
	1. Fichier annotators.json dans /app/data/
	2. st.secrets["ANNOTATOR_CONFIG"]
	3. Variable d'environnement ANNOTATOR_CONFIG
	4. Configuration par défaut

	Returns:
	dict: Configuration des annotateurs
	"""
	# 1. Essayer de charger depuis un fichier local
	config_file = Path("/app/data/annotators.json")
	if config_file.exists():
	try:
	with open(config_file, 'r', encoding='utf-8') as f:
	config = json.load(f)
	return config
	except Exception as e:
	st.warning(f"⚠️ Erreur lors du chargement de annotators.json: {e}")

	# 2. Essayer st.secrets (HF Spaces)
	try:
	config_str = st.secrets.get("ANNOTATOR_CONFIG")
	if config_str:
	return json.loads(config_str)
	except (FileNotFoundError, KeyError, json.JSONDecodeError):
	pass

	# 3. Essayer variable d'environnement
	config_str = os.getenv("ANNOTATOR_CONFIG")
	if config_str:
	try:
	return json.loads(config_str)
	except json.JSONDecodeError:
	st.warning("⚠️ ANNOTATOR_CONFIG mal formaté")

	# 4. Retourner la config par défaut
	return DEFAULT_ANNOTATOR_CONFIG


	def save_annotator_config(config):
	"""
	Sauvegarde la configuration des annotateurs dans un fichier local.

	Args:
	config: Dict de configuration

	Returns:
	bool: True si succès
	"""
	try:
	config_file = Path("/app/data/annotators.json")
	config_file.parent.mkdir(exist_ok=True)

	with open(config_file, 'w', encoding='utf-8') as f:
	json.dump(config, f, indent=2, ensure_ascii=False)

	return True
	except Exception as e:
	st.error(f"❌ Erreur lors de la sauvegarde: {e}")
	return False


	def get_annotator_config(annotator_id):
	"""
	Récupère la configuration d'un annotateur spécifique.

	Args:
	annotator_id: ID de l'annotateur

	Returns:
	dict ou None: Configuration de l'annotateur
	"""
	config = load_annotator_config()
	return config.get(annotator_id)


	def filter_dataset_for_annotator(dataset, annotator_config):
	"""
	Filtre un dataset pour ne garder que la portion d'un annotateur.

	Args:
	dataset: Liste d'items du dataset
	annotator_config: Config de l'annotateur avec start_idx et end_idx

	Returns:
	list: Portion filtrée du dataset
	"""
	start = annotator_config.get("start_idx", 0)
	end = annotator_config.get("end_idx", len(dataset))

	# S'assurer que les indices sont valides
	start = max(0, min(start, len(dataset)))
	end = max(start, min(end, len(dataset)))

	return dataset[start:end]


	def validate_annotator_config(config):
	"""
	Valide une configuration d'annotateurs.

	Args:
	config: Dict de configuration

	Returns:
	(bool, list): (is_valid, list_of_errors)
	"""
	errors = []

	if not isinstance(config, dict):
	errors.append("La configuration doit être un dictionnaire")
	return False, errors

	for ann_id, ann_config in config.items():
	if not isinstance(ann_config, dict):
	errors.append(f"{ann_id}: La configuration doit être un dict")
	continue

	# Vérifier les champs requis
	required_fields = ["name", "start_idx", "end_idx"]
	for field in required_fields:
	if field not in ann_config:
	errors.append(f"{ann_id}: Champ '{field}' manquant")

	# Vérifier les types
	if "start_idx" in ann_config and not isinstance(ann_config["start_idx"], int):
	errors.append(f"{ann_id}: start_idx doit être un entier")

	if "end_idx" in ann_config and not isinstance(ann_config["end_idx"], int):
	errors.append(f"{ann_id}: end_idx doit être un entier")

	# Vérifier la logique
	if "start_idx" in ann_config and "end_idx" in ann_config:
	if ann_config["start_idx"] >= ann_config["end_idx"]:
	errors.append(f"{ann_id}: start_idx doit être < end_idx")

	return len(errors) == 0, errors


	def create_annotator_config_from_chunks(num_annotators, total_items):
	"""
	Crée automatiquement une configuration pour diviser un dataset en chunks.

	Args:
	num_annotators: Nombre d'annotateurs
	total_items: Nombre total d'items dans le dataset

	Returns:
	dict: Configuration générée
	"""
	items_per_annotator = total_items // num_annotators
	config = {}

	for i in range(num_annotators):
	ann_id = f"annotator_{i+1}"
	start_idx = i * items_per_annotator

	# Le dernier annotateur prend tout ce qui reste
	if i == num_annotators - 1:
	end_idx = total_items
	else:
	end_idx = (i + 1) * items_per_annotator

	config[ann_id] = {
	"name": f"Annotateur {i+1}",
	"start_idx": start_idx,
	"end_idx": end_idx,
	"description": f"Items {start_idx} à {end_idx-1}"
	}

	return config


	def show_annotator_config_editor():
	"""
	Affiche un éditeur de configuration des annotateurs (admin).
	"""
	st.markdown("## ⚙️ Configuration des Annotateurs")

	config = load_annotator_config()

	st.info("Cette section permet de configurer les portions du dataset pour chaque annotateur")

	# Afficher la config actuelle
	st.json(config)

	# Option pour créer une nouvelle config
	with st.expander("Créer une nouvelle configuration"):
	col1, col2 = st.columns(2)

	with col1:
	num_annotators = st.number_input(
	"Nombre d'annotateurs",
	min_value=1,
	max_value=20,
	value=3
	)

	with col2:
	total_items = st.number_input(
	"Nombre total d'items",
	min_value=1,
	value=300
	)

	if st.button("Générer la configuration", type="primary"):
	new_config = create_annotator_config_from_chunks(num_annotators, total_items)
	st.json(new_config)

	if st.button("Sauvegarder cette configuration"):
	if save_annotator_config(new_config):
	st.success("✅ Configuration sauvegardée")
	st.rerun()

	# Éditeur manuel
	with st.expander("Éditer manuellement (JSON)"):
	st.markdown("Format attendu:")
	st.code(json.dumps(DEFAULT_ANNOTATOR_CONFIG, indent=2), language="json")

	config_text = st.text_area(
	"Configuration JSON",
	value=json.dumps(config, indent=2),
	height=300
	)

	if st.button("Valider et sauvegarder"):
	try:
	new_config = json.loads(config_text)
	is_valid, errors = validate_annotator_config(new_config)

	if is_valid:
	if save_annotator_config(new_config):
	st.success("✅ Configuration validée et sauvegardée")
	st.rerun()
	else:
	st.error("❌ Configuration invalide:")
	for error in errors:
	st.error(f" - {error}")

	except json.JSONDecodeError as e:
	st.error(f"❌ JSON invalide: {e}")