feedbacks-scoring / backend /data_loader.py
Matis Codjia
Scoring app
1d8c2e0
raw
history blame contribute delete
842 Bytes
"""
Data loading utilities
"""
import json
from datasets import load_dataset
def load_dataset_from_jsonl(file):
"""Charge un dataset depuis un fichier JSONL"""
data = []
content = file.getvalue().decode('utf-8')
for line in content.split('\n'):
if line.strip():
data.append(json.loads(line))
return data
def load_dataset_from_hf(dataset_name, split='train'):
"""Charge un dataset depuis HuggingFace"""
dataset = load_dataset(dataset_name, split=split)
return list(dataset)
def filter_items_with_positive(dataset):
"""Filtre les items qui ont un champ 'positive' non vide"""
items_with_positive = []
for idx, item in enumerate(dataset):
if 'positive' in item and item['positive']:
items_with_positive.append((idx, item))
return items_with_positive