PeteBleackley commited on
Commit
ab0523b
·
1 Parent(s): d63afc6
Files changed (1) hide show
  1. scripts.py +14 -12
scripts.py CHANGED
@@ -71,7 +71,7 @@ def train_base_model(task,filename):
71
 
72
  def prepare_training_datasets():
73
  wikiqa = pandas.read_csv('corpora/WikiQA.csv')
74
- avicenna = pandas.read_csv('corpora/Avicenna_Train.csv')
75
  snli = pandas.read_csv('corpora/snli_1.0_train.csv')
76
  question_answering = wikiqa.loc[wikiqa['Label']==1,
77
  ['Cleaned_question',
@@ -80,22 +80,24 @@ def prepare_training_datasets():
80
  reasoning = avicenna.loc[avicenna['Syllogistic relation']=='yes',
81
  ['Premise 1',
82
  'Premise 2',
83
- 'Conclustion']].rename(columns={'Premise 1':'proposition0',
84
  'Premise 2':'proposition1',
85
  'Conclusion':'conclusion'})
86
- consistency = snli[['sentence1',
87
- 'sentence2']].rename(columns={'sentence1':'statement0',
88
- 'sentence2':'statement1'})
 
89
  mapping = {'entailment':1.0,
90
  'neutral':0.0,
91
  'contradiction':-1.0}
92
- consistency['consistency'] = snli['gold_label'].apply(lambda x:mapping[x])
93
- all_text = pandas.concatenate([wikiqa['Resolved_answer'],
94
- avicenna['Premise 1'],
95
- avicenna['Premise 1'],
96
- reasoning['conclusion'],
97
- snli['sentence1'],
98
- snli['sentence2']]).to_frame(name='all_text')
 
99
  all_text.to_csv('corpora/all_text.csv')
100
  question_answering.to_csv('corpora/question_answering.csv')
101
  reasoning.to_csv('corpora/reasoning_train.csv')
 
71
 
72
  def prepare_training_datasets():
73
  wikiqa = pandas.read_csv('corpora/WikiQA.csv')
74
+ avicenna = pandas.read_csv('corpora/Avicenna_Train.csv',encoding='iso-8859-1')
75
  snli = pandas.read_csv('corpora/snli_1.0_train.csv')
76
  question_answering = wikiqa.loc[wikiqa['Label']==1,
77
  ['Cleaned_question',
 
80
  reasoning = avicenna.loc[avicenna['Syllogistic relation']=='yes',
81
  ['Premise 1',
82
  'Premise 2',
83
+ 'Conclusion']].rename(columns={'Premise 1':'proposition0',
84
  'Premise 2':'proposition1',
85
  'Conclusion':'conclusion'})
86
+ consistency = snli.loc[snli['gold_label']!='-',
87
+ ['sentence1',
88
+ 'sentence2']].rename(columns={'sentence1':'statement0',
89
+ 'sentence2':'statement1'})
90
  mapping = {'entailment':1.0,
91
  'neutral':0.0,
92
  'contradiction':-1.0}
93
+ consistency['consistency'] = snli.loc[snli['gold_label']!='-',
94
+ 'gold_label'].apply(lambda x:mapping[x])
95
+ all_text = pandas.concat([wikiqa['Resolved_answer'],
96
+ avicenna['Premise 1'],
97
+ avicenna['Premise 1'],
98
+ reasoning['conclusion'],
99
+ snli['sentence1'],
100
+ snli['sentence2']]).to_frame(name='all_text')
101
  all_text.to_csv('corpora/all_text.csv')
102
  question_answering.to_csv('corpora/question_answering.csv')
103
  reasoning.to_csv('corpora/reasoning_train.csv')