Spaces:
Sleeping
Sleeping
| """ | |
| Data loading utilities | |
| """ | |
| import json | |
| from datasets import load_dataset | |
| def load_dataset_from_jsonl(file): | |
| """Charge un dataset depuis un fichier JSONL""" | |
| data = [] | |
| content = file.getvalue().decode('utf-8') | |
| for line in content.split('\n'): | |
| if line.strip(): | |
| data.append(json.loads(line)) | |
| return data | |
| def load_dataset_from_hf(dataset_name, split='train'): | |
| """Charge un dataset depuis HuggingFace""" | |
| dataset = load_dataset(dataset_name, split=split) | |
| return list(dataset) | |
| def filter_items_with_positive(dataset): | |
| """Filtre les items qui ont un champ 'positive' non vide""" | |
| items_with_positive = [] | |
| for idx, item in enumerate(dataset): | |
| if 'positive' in item and item['positive']: | |
| items_with_positive.append((idx, item)) | |
| return items_with_positive | |