PeteBleackley commited on
Commit
d63afc6
·
1 Parent(s): 9ca9d81

Script for preparing training datasets

Browse files
Files changed (1) hide show
  1. scripts.py +35 -0
scripts.py CHANGED
@@ -69,6 +69,39 @@ def train_base_model(task,filename):
69
  print(model.evaluate(test_data))
70
  model.save(filename)
71
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
72
 
73
 
74
 
@@ -84,4 +117,6 @@ if __name__ == '__main__':
84
  train_base_model(args.training_task,args.filename)
85
  elif args.task == 'prepare_wiki_qa':
86
  prepare_wiki_qa(args.filename,args.outputfile)
 
 
87
 
 
69
  print(model.evaluate(test_data))
70
  model.save(filename)
71
 
72
+ def prepare_training_datasets():
73
+ wikiqa = pandas.read_csv('corpora/WikiQA.csv')
74
+ avicenna = pandas.read_csv('corpora/Avicenna_Train.csv')
75
+ snli = pandas.read_csv('corpora/snli_1.0_train.csv')
76
+ question_answering = wikiqa.loc[wikiqa['Label']==1,
77
+ ['Cleaned_question',
78
+ 'Resolved_answer']].rename(columns={'Cleaned_question':'question',
79
+ 'Resolved_answer':'answer'})
80
+ reasoning = avicenna.loc[avicenna['Syllogistic relation']=='yes',
81
+ ['Premise 1',
82
+ 'Premise 2',
83
+ 'Conclustion']].rename(columns={'Premise 1':'proposition0',
84
+ 'Premise 2':'proposition1',
85
+ 'Conclusion':'conclusion'})
86
+ consistency = snli[['sentence1',
87
+ 'sentence2']].rename(columns={'sentence1':'statement0',
88
+ 'sentence2':'statement1'})
89
+ mapping = {'entailment':1.0,
90
+ 'neutral':0.0,
91
+ 'contradiction':-1.0}
92
+ consistency['consistency'] = snli['gold_label'].apply(lambda x:mapping[x])
93
+ all_text = pandas.concatenate([wikiqa['Resolved_answer'],
94
+ avicenna['Premise 1'],
95
+ avicenna['Premise 1'],
96
+ reasoning['conclusion'],
97
+ snli['sentence1'],
98
+ snli['sentence2']]).to_frame(name='all_text')
99
+ all_text.to_csv('corpora/all_text.csv')
100
+ question_answering.to_csv('corpora/question_answering.csv')
101
+ reasoning.to_csv('corpora/reasoning_train.csv')
102
+ consistency.to_csv('corpora/consistency.csv')
103
+
104
+
105
 
106
 
107
 
 
117
  train_base_model(args.training_task,args.filename)
118
  elif args.task == 'prepare_wiki_qa':
119
  prepare_wiki_qa(args.filename,args.outputfile)
120
+ elif args.task == 'prepare_training_datasets':
121
+ prepare_training_datasets()
122