Spaces:
Build error
Build error
PeteBleackley
commited on
Commit
·
d63afc6
1
Parent(s):
9ca9d81
Script for preparing training datasets
Browse files- scripts.py +35 -0
scripts.py
CHANGED
|
@@ -69,6 +69,39 @@ def train_base_model(task,filename):
|
|
| 69 |
print(model.evaluate(test_data))
|
| 70 |
model.save(filename)
|
| 71 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 72 |
|
| 73 |
|
| 74 |
|
|
@@ -84,4 +117,6 @@ if __name__ == '__main__':
|
|
| 84 |
train_base_model(args.training_task,args.filename)
|
| 85 |
elif args.task == 'prepare_wiki_qa':
|
| 86 |
prepare_wiki_qa(args.filename,args.outputfile)
|
|
|
|
|
|
|
| 87 |
|
|
|
|
| 69 |
print(model.evaluate(test_data))
|
| 70 |
model.save(filename)
|
| 71 |
|
| 72 |
+
def prepare_training_datasets():
|
| 73 |
+
wikiqa = pandas.read_csv('corpora/WikiQA.csv')
|
| 74 |
+
avicenna = pandas.read_csv('corpora/Avicenna_Train.csv')
|
| 75 |
+
snli = pandas.read_csv('corpora/snli_1.0_train.csv')
|
| 76 |
+
question_answering = wikiqa.loc[wikiqa['Label']==1,
|
| 77 |
+
['Cleaned_question',
|
| 78 |
+
'Resolved_answer']].rename(columns={'Cleaned_question':'question',
|
| 79 |
+
'Resolved_answer':'answer'})
|
| 80 |
+
reasoning = avicenna.loc[avicenna['Syllogistic relation']=='yes',
|
| 81 |
+
['Premise 1',
|
| 82 |
+
'Premise 2',
|
| 83 |
+
'Conclustion']].rename(columns={'Premise 1':'proposition0',
|
| 84 |
+
'Premise 2':'proposition1',
|
| 85 |
+
'Conclusion':'conclusion'})
|
| 86 |
+
consistency = snli[['sentence1',
|
| 87 |
+
'sentence2']].rename(columns={'sentence1':'statement0',
|
| 88 |
+
'sentence2':'statement1'})
|
| 89 |
+
mapping = {'entailment':1.0,
|
| 90 |
+
'neutral':0.0,
|
| 91 |
+
'contradiction':-1.0}
|
| 92 |
+
consistency['consistency'] = snli['gold_label'].apply(lambda x:mapping[x])
|
| 93 |
+
all_text = pandas.concatenate([wikiqa['Resolved_answer'],
|
| 94 |
+
avicenna['Premise 1'],
|
| 95 |
+
avicenna['Premise 1'],
|
| 96 |
+
reasoning['conclusion'],
|
| 97 |
+
snli['sentence1'],
|
| 98 |
+
snli['sentence2']]).to_frame(name='all_text')
|
| 99 |
+
all_text.to_csv('corpora/all_text.csv')
|
| 100 |
+
question_answering.to_csv('corpora/question_answering.csv')
|
| 101 |
+
reasoning.to_csv('corpora/reasoning_train.csv')
|
| 102 |
+
consistency.to_csv('corpora/consistency.csv')
|
| 103 |
+
|
| 104 |
+
|
| 105 |
|
| 106 |
|
| 107 |
|
|
|
|
| 117 |
train_base_model(args.training_task,args.filename)
|
| 118 |
elif args.task == 'prepare_wiki_qa':
|
| 119 |
prepare_wiki_qa(args.filename,args.outputfile)
|
| 120 |
+
elif args.task == 'prepare_training_datasets':
|
| 121 |
+
prepare_training_datasets()
|
| 122 |
|