Spaces:

cools
/

Gideon

Runtime error

App Files Files Community

cools commited on Jul 16, 2023

Commit

2c92324

1 Parent(s): f22d6c0

Update Tagger.py

Browse files

Updated with fixes

Files changed (1) hide show

Tagger.py +62 -14

Tagger.py CHANGED Viewed

@@ -1,3 +1,4 @@
 import pandas as pd
 import numpy as np
 import re
@@ -37,31 +38,73 @@ def get_majority_author_sentence(paras_text):
             if ("justice" in s and "opinion" in s and "court" in s and ("delivered" in s or "announced" in s)):
                 if j != 0 and j != len(sents)-1:
                     print("Located, but not within first or last paragraph")
-                return [s, i]
-        for (j,s) in enumerate(sents):  # Per curiam
             s = s.lower()
             if ("per" in s and "curiam" in s):
                 if j != 0 and j != len(sents)-1:
                     print("Located, but not within first or last paragraph")
-                return [s, i]
     raise Exception("Could Not Locate Authoring Justice Sentence/Paragraph")
-def get_other_author_sentence(paras_text, ind_maj):
     data = {}
-    data['Concurrences'], data['Dissents'] = [], []
     for (i,pt) in enumerate(paras_text):
         if i < ind_maj:
             continue
         sents = sent_tokenize(pt)
         for (j,s) in enumerate(sents):
             s = s.lower()
-            if "justice" in s and "," in s:
-                if "concurring" in s:
-                    data['Concurrences'].append((s,i))
-                if "dissenting" in s:
-                    data['Dissents'].append((s,i))
     return data
 def draw_line_above_sent(folderpath, sent, para_ind, color=(0,0,0)):
     data_df = pd.read_csv(folderpath + '/data.csv')
     paras_df = pd.read_csv(folderpath + '/paragraphs.csv')
@@ -80,14 +123,19 @@ def draw_line_above_sent(folderpath, sent, para_ind, color=(0,0,0)):
     image = cv2.line(image, (line_bbox[0] - 10, line_bbox[1]), (line_bbox[2] + 10, line_bbox[1]), color=color, thickness=2)
     cv2.imwrite(folderpath + '/' + str(pg_ind) + '-processed.png', image)
 def process_file(folderpath, draw=False):
     paras_text = get_paragraphed_text(folderpath)
     maj = get_majority_author_sentence(paras_text)
-    data = get_other_author_sentence(paras_text, maj[1])
     if draw:
         draw_line_above_sent(folderpath, maj[0], maj[1])
-        for c in data['Concurrences']:
             draw_line_above_sent(folderpath, c[0], c[1], color=(0,100,0))
-        for d in data['Dissents']:
-            draw_line_above_sent(folderpath, d[0], d[1], color=(100,0,0))

+# This file tags the major text
 import pandas as pd
 import numpy as np
 import re
             if ("justice" in s and "opinion" in s and "court" in s and ("delivered" in s or "announced" in s)):
                 if j != 0 and j != len(sents)-1:
                     print("Located, but not within first or last paragraph")
+                return [s, i, 0]
+        for (j,s) in enumerate(sents): # Per curiam
             s = s.lower()
             if ("per" in s and "curiam" in s):
                 if j != 0 and j != len(sents)-1:
                     print("Located, but not within first or last paragraph")
+                return [s, i, 0]
     raise Exception("Could Not Locate Authoring Justice Sentence/Paragraph")
+def get_other_justice_sentences(paras_text, ind_maj):
     data = {}
+    counter = 0
+    data['Concurrences'], data['Dissents'], data['Recused'], last = [], [], [], None
     for (i,pt) in enumerate(paras_text):
         if i < ind_maj:
             continue
         sents = sent_tokenize(pt)
         for (j,s) in enumerate(sents):
             s = s.lower()
+            if "justice" in s:
+                if ("concurring" in s and "," in s):
+                    counter += 1
+                    last = "C"
+                    data['Concurrences'].append((s,i,counter))
+                elif ("dissenting" in s and "," in s):
+                    counter += 1
+                    data['Dissents'].append((s,i,counter))
+                    last = "D"
+                elif "join" in s:
+                    counter += 1
+                    if last == "C":
+                        data['Concurrences'].append((s,i,counter))
+                    if last == "D":
+                        data['Dissents'].append((s,i,counter))
+            if "took no part" in s:
+                counter += 1
+                data['Recused'].append((s,i, counter))
     return data
+def split(paras_text, maj, other_data):
+    opinions = []
+    opinions.append(('Majority', maj[0], maj[1], maj[2]))
+    for c in other_data['Concurrences']:
+        opinions.append(('Concurrence', c[0], c[1], c[2]))
+    for d in other_data['Dissents']:
+        opinions.append(('Dissent', d[0], d[1], d[2]))
+    for r in other_data['Recused']:
+        opinions.append(('Recused', r[0], r[1], r[2]))
+    opinions_data = []
+    opinions = np.array(opinions)
+    order = opinions[:, 3].astype(int)
+    opinions = opinions[order.argsort()]
+    for (i, opinion) in enumerate(opinions):
+        if i == len(opinions) - 1:
+            end_ind = len(paras_text)
+        else:
+            end_ind = int(opinions[i + 1][
+                              2])  # Next one is where current left off? Or ideally, would also work with all the BS "Supreme Court of US" stuff?
+        start_ind = int(opinion[2])
+        o = {'Type': opinion[0], 'Author Sent': opinion[1], 'Start Para Ind': start_ind, 'End Para Ind': end_ind}
+        o['Text'] = "<PARA>".join(paras_text[start_ind:end_ind])
+        opinions_data.append(o)
+    opinions_df = pd.DataFrame(data=opinions_data)
+    return opinions_df
 def draw_line_above_sent(folderpath, sent, para_ind, color=(0,0,0)):
     data_df = pd.read_csv(folderpath + '/data.csv')
     paras_df = pd.read_csv(folderpath + '/paragraphs.csv')
     image = cv2.line(image, (line_bbox[0] - 10, line_bbox[1]), (line_bbox[2] + 10, line_bbox[1]), color=color, thickness=2)
     cv2.imwrite(folderpath + '/' + str(pg_ind) + '-processed.png', image)
 def process_file(folderpath, draw=False):
     paras_text = get_paragraphed_text(folderpath)
     maj = get_majority_author_sentence(paras_text)
+    other_data = get_other_justice_sentences(paras_text, maj[1])
+    opinions_df = split(paras_text, maj, other_data)
+    opinions_df.to_csv(folderpath + '/opinions.csv', index=False)
     if draw:
         draw_line_above_sent(folderpath, maj[0], maj[1])
+        for c in other_data['Concurrences']:
             draw_line_above_sent(folderpath, c[0], c[1], color=(0,100,0))
+        for d in other_data['Dissents']:
+            draw_line_above_sent(folderpath, d[0], d[1], color=(0,0,100))