VinayNR commited on
Commit
f5c1d3a
·
1 Parent(s): 8a8dcbd

Upload hypothesis.py

Browse files
Files changed (1) hide show
  1. hypothesis.py +98 -0
hypothesis.py ADDED
@@ -0,0 +1,98 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import scipy.stats as stats
2
+
3
+ def isfloat(text):
4
+ try:
5
+ # Attempt to convert the string to a float
6
+ fl = float(text)
7
+ return True
8
+ except ValueError:
9
+ return False
10
+
11
+ def extract_p_val(text):
12
+ parts = text.split(' ')
13
+ return parts[-1], parts[-2]
14
+
15
+
16
+ class HypothesisTest:
17
+ def __init__(self, text):
18
+ # split text to form the parts of the test reporting
19
+ # "t-test ( 37 ) = 1.414"
20
+
21
+ # initialization
22
+ self.test_type = ''
23
+ self.test_stat = 0.0
24
+ self.df1 = 0
25
+ self.tails = 1
26
+
27
+ parts = text.split(' ')
28
+
29
+ if parts[0].lower().startswith('t'):
30
+ self.test_type = 't'
31
+ elif parts[0].lower().startswith('z'):
32
+ self.test_type = 'z'
33
+ else:
34
+ raise Exception('Failed to parse the test')
35
+
36
+ for part in parts:
37
+ if isfloat(part):
38
+ if part.isdigit():
39
+ self.df1 = part
40
+ else:
41
+ self.test_stat = part
42
+
43
+ @property
44
+ def reported_p_val(self):
45
+ return self._reported_p_val
46
+
47
+ @reported_p_val.setter
48
+ def reported_p_val(self, value):
49
+ # Add any validation or processing logic here
50
+ self._reported_p_val = value
51
+
52
+ @property
53
+ def reported_p_val_dir(self):
54
+ return self._reported_p_val_dir
55
+
56
+ @reported_p_val_dir.setter
57
+ def reported_p_val_dir(self, dirn):
58
+ # Add any validation or processing logic here
59
+ if dirn in ['<', '>', '=']:
60
+ self._reported_p_val_dir = dirn
61
+ else:
62
+ print("The direction can be one of <, >, =")
63
+
64
+ def calculate_p_val(self):
65
+ if self.test_type == 't':
66
+ return self.tails*(1 - stats.t.cdf(abs(float(self.test_stat)), df=int(self.df1)))
67
+ elif self.test_type == 'z':
68
+ return self.tails*(1 - stats.norm.cdf(abs(float(self.test_stat))))
69
+
70
+ @staticmethod
71
+ def get_reported_stat_tests(sentence):
72
+ tests = []
73
+
74
+ # group tests with p values
75
+ labeled_entities = sentence.get_labels('ner')
76
+ for idx, entity in enumerate(labeled_entities):
77
+ if entity.value == 'T':
78
+ try:
79
+ test = HypothesisTest(entity.data_point.text)
80
+
81
+ # get the p-value closest to this span
82
+ # assume this to be at the next index in the list
83
+ p_val_span = labeled_entities[idx+1].data_point.text
84
+ p_val, dirn = extract_p_val(p_val_span)
85
+
86
+ test.reported_p_val = p_val
87
+ test.reported_p_val_dir = dirn
88
+
89
+ tests.append(test)
90
+
91
+ except:
92
+ # print('Not a test!')
93
+ pass
94
+
95
+ return tests
96
+
97
+ def __str__(self):
98
+ return "Test Type : " + self.test_type + " | " + "Test Stat : " + str(self.test_stat) + " | " + "DF : " + str(self.df1) + " | " + "Rep P-val : " + str(self._reported_p_val_dir) + str(self.reported_p_val)