| import os | |
| import ujson | |
| from collections import defaultdict | |
| from colbert.utils.utils import print_message, file_tqdm | |
| def load_collection_(path, retain_titles): | |
| with open(path) as f: | |
| collection = [] | |
| for line in file_tqdm(f): | |
| _, passage, title = line.strip().split('\t') | |
| if retain_titles: | |
| passage = title + ' | ' + passage | |
| collection.append(passage) | |
| return collection | |
| def load_qas_(path): | |
| print_message("#> Loading the reference QAs from", path) | |
| triples = [] | |
| with open(path) as f: | |
| for line in f: | |
| qa = ujson.loads(line) | |
| triples.append((qa['qid'], qa['question'], qa['answers'])) | |
| return triples | |