cools commited on
Commit
ddda62b
Β·
1 Parent(s): 052daae

Update Parser.py

Browse files
Files changed (1) hide show
  1. Parser.py +30 -3
Parser.py CHANGED
@@ -59,9 +59,8 @@ def get_splits(folderpath):
59
  full_body_text = "\n".join(body_texts).replace('-', '')
60
  full_body_text = correct(full_body_text, "justice")
61
 
62
- # split_p = re.compile('((\n|^)\s*Per Curiam\.\s*\n)|((\n|^)\s*(Mr\.\s*(chief)?\s*)?Justice[A-z\s\n,]*delivered the opinion)|((\n|^)\s*(mr\.\s*)?justice[A-Za-z\n\s,–-]*(concurring|dissenting)[A-Za-z\n\s,–]*\.)', re.IGNORECASE)
63
  split_p = re.compile('((\n|^)\s*Per Curiam\.\s*\n)|(Justice[A-z\s\n,]*delivered the opinion)|((\n|^)\s*(mr\.\s*)?justice[A-Za-z\n\s,–-]*(concurring|dissenting)[A-Za-z\n\s,–]*\.)', re.IGNORECASE)
64
-
65
  splits_m = list(re.finditer(split_p, full_body_text))
66
  splits = []
67
 
@@ -147,6 +146,20 @@ def correct(corpus, keyword):
147
  words[ind] = result[0][0]
148
  return " ".join(words)
149
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
150
  class Opinion:
151
  def __init__(self, opinion_type, author, joining, body_text, fn_text, header_text):
152
  self.opinion_type = opinion_type
@@ -193,7 +206,11 @@ class Case:
193
  def update_majority_joining(self):
194
  print("Getting updated list")
195
  cy = court_from_year(self.date)
196
- known = [j for d in self.dissents for j in d.joining] + [d.author for d in self.dissents] + [j for c in self.concurrences for j in c.joining] + [c.author for c in self.concurrences] + [self.majority.author] + [r for r in self.recused]
 
 
 
 
197
  all_justices = [aj for aj in cy['Associate']]
198
  if cy['Chief'] is not None:
199
  all_justices.append('Chief')
@@ -254,8 +271,18 @@ class Case:
254
  if self.case_citation is not None and self.case_citation.lower().split('s.')[-1].strip() == "":
255
  self.case_citation = self.case_citation.strip() + ' ' + str(self.page_numbers[0])
256
 
 
 
 
 
 
 
 
 
 
257
  def process(self):
258
  self.get_date()
 
259
  self.update_recused()
260
  self.update_majority_joining()
261
  self.get_cert_info()
 
59
  full_body_text = "\n".join(body_texts).replace('-', '')
60
  full_body_text = correct(full_body_text, "justice")
61
 
 
62
  split_p = re.compile('((\n|^)\s*Per Curiam\.\s*\n)|(Justice[A-z\s\n,]*delivered the opinion)|((\n|^)\s*(mr\.\s*)?justice[A-Za-z\n\s,–-]*(concurring|dissenting)[A-Za-z\n\s,–]*\.)', re.IGNORECASE)
63
+ # ((\n|^)\s*(Mr\.\s*(chief)?\s*)?Justice[A-z\s\n,]*delivered the opinion)
64
  splits_m = list(re.finditer(split_p, full_body_text))
65
  splits = []
66
 
 
146
  words[ind] = result[0][0]
147
  return " ".join(words)
148
 
149
+ def closest_justice(name, datetime):
150
+ cy = court_from_year(datetime)
151
+ justices = cy['Associate']
152
+ if cy['Chief'] is not None:
153
+ justices += [cy['Chief']]
154
+ if name.capitalize() not in justices:
155
+ scores = [distance(j, name) for (i,j) in enumerate(justices)]
156
+ closest_name = justices[np.argmin(scores)]
157
+ if closest_name.capitalize() == cy['Chief']:
158
+ closest_name = "Chief"
159
+ return closest_name
160
+ else:
161
+ return name
162
+
163
  class Opinion:
164
  def __init__(self, opinion_type, author, joining, body_text, fn_text, header_text):
165
  self.opinion_type = opinion_type
 
206
  def update_majority_joining(self):
207
  print("Getting updated list")
208
  cy = court_from_year(self.date)
209
+ known = [j for d in self.dissents for j in d.joining] + [d.author for d in self.dissents] + [j for c in
210
+ self.concurrences
211
+ for j in
212
+ c.joining] + [
213
+ c.author for c in self.concurrences] + [self.majority.author] + [r for r in self.recused]
214
  all_justices = [aj for aj in cy['Associate']]
215
  if cy['Chief'] is not None:
216
  all_justices.append('Chief')
 
271
  if self.case_citation is not None and self.case_citation.lower().split('s.')[-1].strip() == "":
272
  self.case_citation = self.case_citation.strip() + ' ' + str(self.page_numbers[0])
273
 
274
+ def update_justice_names(self):
275
+ if self.majority.author.lower() != "per curiam":
276
+ self.majority.author = closest_justice(self.majority.author, self.date)
277
+ for (i,cons) in enumerate(self.concurrences):
278
+ self.concurrences[i].author = closest_justice(self.concurrences[i].author, self.date)
279
+ for (i,dissents) in enumerate(self.dissents):
280
+ self.dissents[i].author = closest_justice(self.dissents[i].author, self.date)
281
+ return
282
+
283
  def process(self):
284
  self.get_date()
285
+ self.update_justice_names()
286
  self.update_recused()
287
  self.update_majority_joining()
288
  self.get_cert_info()