Spaces:
Runtime error
Runtime error
| import h5py | |
| from tqdm import tqdm | |
| import numpy as np | |
| import codecs | |
| from knowledge.utils import file_hash | |
| class TextDB: | |
| def __init__(self, text_db): | |
| self.feature, self.text = self.load(text_db) | |
| self.file_hash = file_hash(text_db) | |
| def load(self, text_db): | |
| with h5py.File(text_db, 'r') as f: | |
| db_size = 0 | |
| for i in range(len(f)): | |
| db_size += len(f[f"{i}/feature"]) | |
| _, d = f[f"0/feature"].shape | |
| with h5py.File(text_db, 'r') as f: | |
| feature = np.zeros((db_size, d), dtype=np.float16) | |
| text = [] | |
| N = 0 | |
| for i in tqdm(range(len(f)), desc="Load text DB", dynamic_ncols=True, mininterval=1.0): | |
| fi = f[f"{i}/feature"][:] | |
| feature[N:N+len(fi)] = fi | |
| N += len(fi) | |
| text.extend(f[f"{i}/text"][:]) | |
| text = [codecs.decode(t) for t in text] | |
| return feature, text | |
| def __getitem__(self, idx): | |
| f = self.feature[idx] | |
| try: | |
| t = [self.text[i] for i in idx] | |
| except TypeError: | |
| t = self.text[idx] | |
| return f, t | |