""" Data loading utilities """ import json from datasets import load_dataset def load_dataset_from_jsonl(file): """Charge un dataset depuis un fichier JSONL""" data = [] content = file.getvalue().decode('utf-8') for line in content.split('\n'): if line.strip(): data.append(json.loads(line)) return data def load_dataset_from_hf(dataset_name, split='train'): """Charge un dataset depuis HuggingFace""" dataset = load_dataset(dataset_name, split=split) return list(dataset) def filter_items_with_positive(dataset): """Filtre les items qui ont un champ 'positive' non vide""" items_with_positive = [] for idx, item in enumerate(dataset): if 'positive' in item and item['positive']: items_with_positive.append((idx, item)) return items_with_positive