Update Parser.py
Browse files
Parser.py
CHANGED
|
@@ -59,9 +59,8 @@ def get_splits(folderpath):
|
|
| 59 |
full_body_text = "\n".join(body_texts).replace('-', '')
|
| 60 |
full_body_text = correct(full_body_text, "justice")
|
| 61 |
|
| 62 |
-
# split_p = re.compile('((\n|^)\s*Per Curiam\.\s*\n)|((\n|^)\s*(Mr\.\s*(chief)?\s*)?Justice[A-z\s\n,]*delivered the opinion)|((\n|^)\s*(mr\.\s*)?justice[A-Za-z\n\s,β-]*(concurring|dissenting)[A-Za-z\n\s,β]*\.)', re.IGNORECASE)
|
| 63 |
split_p = re.compile('((\n|^)\s*Per Curiam\.\s*\n)|(Justice[A-z\s\n,]*delivered the opinion)|((\n|^)\s*(mr\.\s*)?justice[A-Za-z\n\s,β-]*(concurring|dissenting)[A-Za-z\n\s,β]*\.)', re.IGNORECASE)
|
| 64 |
-
|
| 65 |
splits_m = list(re.finditer(split_p, full_body_text))
|
| 66 |
splits = []
|
| 67 |
|
|
@@ -147,6 +146,20 @@ def correct(corpus, keyword):
|
|
| 147 |
words[ind] = result[0][0]
|
| 148 |
return " ".join(words)
|
| 149 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 150 |
class Opinion:
|
| 151 |
def __init__(self, opinion_type, author, joining, body_text, fn_text, header_text):
|
| 152 |
self.opinion_type = opinion_type
|
|
@@ -193,7 +206,11 @@ class Case:
|
|
| 193 |
def update_majority_joining(self):
|
| 194 |
print("Getting updated list")
|
| 195 |
cy = court_from_year(self.date)
|
| 196 |
-
known = [j for d in self.dissents for j in d.joining] + [d.author for d in self.dissents] + [j for c in
|
|
|
|
|
|
|
|
|
|
|
|
|
| 197 |
all_justices = [aj for aj in cy['Associate']]
|
| 198 |
if cy['Chief'] is not None:
|
| 199 |
all_justices.append('Chief')
|
|
@@ -254,8 +271,18 @@ class Case:
|
|
| 254 |
if self.case_citation is not None and self.case_citation.lower().split('s.')[-1].strip() == "":
|
| 255 |
self.case_citation = self.case_citation.strip() + ' ' + str(self.page_numbers[0])
|
| 256 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 257 |
def process(self):
|
| 258 |
self.get_date()
|
|
|
|
| 259 |
self.update_recused()
|
| 260 |
self.update_majority_joining()
|
| 261 |
self.get_cert_info()
|
|
|
|
| 59 |
full_body_text = "\n".join(body_texts).replace('-', '')
|
| 60 |
full_body_text = correct(full_body_text, "justice")
|
| 61 |
|
|
|
|
| 62 |
split_p = re.compile('((\n|^)\s*Per Curiam\.\s*\n)|(Justice[A-z\s\n,]*delivered the opinion)|((\n|^)\s*(mr\.\s*)?justice[A-Za-z\n\s,β-]*(concurring|dissenting)[A-Za-z\n\s,β]*\.)', re.IGNORECASE)
|
| 63 |
+
# ((\n|^)\s*(Mr\.\s*(chief)?\s*)?Justice[A-z\s\n,]*delivered the opinion)
|
| 64 |
splits_m = list(re.finditer(split_p, full_body_text))
|
| 65 |
splits = []
|
| 66 |
|
|
|
|
| 146 |
words[ind] = result[0][0]
|
| 147 |
return " ".join(words)
|
| 148 |
|
| 149 |
+
def closest_justice(name, datetime):
|
| 150 |
+
cy = court_from_year(datetime)
|
| 151 |
+
justices = cy['Associate']
|
| 152 |
+
if cy['Chief'] is not None:
|
| 153 |
+
justices += [cy['Chief']]
|
| 154 |
+
if name.capitalize() not in justices:
|
| 155 |
+
scores = [distance(j, name) for (i,j) in enumerate(justices)]
|
| 156 |
+
closest_name = justices[np.argmin(scores)]
|
| 157 |
+
if closest_name.capitalize() == cy['Chief']:
|
| 158 |
+
closest_name = "Chief"
|
| 159 |
+
return closest_name
|
| 160 |
+
else:
|
| 161 |
+
return name
|
| 162 |
+
|
| 163 |
class Opinion:
|
| 164 |
def __init__(self, opinion_type, author, joining, body_text, fn_text, header_text):
|
| 165 |
self.opinion_type = opinion_type
|
|
|
|
| 206 |
def update_majority_joining(self):
|
| 207 |
print("Getting updated list")
|
| 208 |
cy = court_from_year(self.date)
|
| 209 |
+
known = [j for d in self.dissents for j in d.joining] + [d.author for d in self.dissents] + [j for c in
|
| 210 |
+
self.concurrences
|
| 211 |
+
for j in
|
| 212 |
+
c.joining] + [
|
| 213 |
+
c.author for c in self.concurrences] + [self.majority.author] + [r for r in self.recused]
|
| 214 |
all_justices = [aj for aj in cy['Associate']]
|
| 215 |
if cy['Chief'] is not None:
|
| 216 |
all_justices.append('Chief')
|
|
|
|
| 271 |
if self.case_citation is not None and self.case_citation.lower().split('s.')[-1].strip() == "":
|
| 272 |
self.case_citation = self.case_citation.strip() + ' ' + str(self.page_numbers[0])
|
| 273 |
|
| 274 |
+
def update_justice_names(self):
|
| 275 |
+
if self.majority.author.lower() != "per curiam":
|
| 276 |
+
self.majority.author = closest_justice(self.majority.author, self.date)
|
| 277 |
+
for (i,cons) in enumerate(self.concurrences):
|
| 278 |
+
self.concurrences[i].author = closest_justice(self.concurrences[i].author, self.date)
|
| 279 |
+
for (i,dissents) in enumerate(self.dissents):
|
| 280 |
+
self.dissents[i].author = closest_justice(self.dissents[i].author, self.date)
|
| 281 |
+
return
|
| 282 |
+
|
| 283 |
def process(self):
|
| 284 |
self.get_date()
|
| 285 |
+
self.update_justice_names()
|
| 286 |
self.update_recused()
|
| 287 |
self.update_majority_joining()
|
| 288 |
self.get_cert_info()
|