| import os | |
| import json | |
| def detect_datasets(base_path="~/humigence_data"): | |
| base_path = os.path.expanduser(base_path) | |
| choices = [] | |
| for root, dirs, files in os.walk(base_path): | |
| for file in files: | |
| if file.endswith(".jsonl") or file.endswith(".json"): | |
| full_path = os.path.join(root, file) | |
| try: | |
| with open(full_path, "r") as f: | |
| if file.endswith(".jsonl"): | |
| count = sum(1 for _ in f) | |
| else: | |
| data = json.load(f) | |
| count = len(data) | |
| display_name = f"{file} ({count} samples)" | |
| choices.append((display_name, full_path)) | |
| except Exception: | |
| continue | |
| return choices |