Spaces:
Runtime error
Runtime error
| from .sourcer import search_web | |
| import pandas as pd | |
| import os | |
| import glob | |
| root_dir = 'data/datasets' | |
| pira_df = pd.read_csv(os.path.join(root_dir, 'pira_simplified.csv')) | |
| pira_corpus = pira_df.text.to_list() | |
| txt_path = os.path.join(root_dir, 'onu') | |
| filenames = glob.glob(txt_path + '/*.txt') | |
| onu_corpus = [] | |
| for filename in filenames: | |
| with open(filename, 'r') as f: | |
| onu_corpus.append(f.read()) | |
| def gen_corpus(query: str, pira: bool=True, ONU: bool=True, web: bool=True)->list: | |
| corpus = [] | |
| if not (pira or ONU or web): | |
| # TODO: raise error | |
| pass | |
| if pira: | |
| corpus += pira_corpus | |
| if ONU: | |
| corpus += onu_corpus | |
| if web: | |
| corpus += search_web(query) | |
| return corpus |