Spaces:

cools
/

Gideon

Runtime error

App Files Files Community

cools commited on Jul 28, 2023

Commit

9314300

1 Parent(s): c65f678

Update app.py

Browse files

Files changed (1) hide show

app.py +68 -46

app.py CHANGED Viewed

@@ -13,59 +13,81 @@ import requests
 import shutil
 import pandas as pd
 st.set_page_config(layout="wide")
-# loc_link = st.text_input('LOC Link', placeholder='https://www.supremecourt.gov/opinions/22pdf/21-476_c185.pdf')
-volume = st.slider('Volume', 500, 550, 525)
-r = requests.get('https://www.loc.gov/collections/united-states-reports/?fa=partof%3Au.s.+reports%3A+volume+'+str(volume) +'&st=list&c=250')
-soup = BeautifulSoup(r.text)
-html_links = soup.findAll('link',attrs={'rel': 'alternate'})
-pdf_links = []
-for hl in html_links:
-    if len(hl['href'].split('/')[-1].split('.gif')[0])==  11:
-        pdf_links.append(hl['href'].split('.gif')[0]+'.pdf')
-case_num = st.slider('Case Number', 0, len(pdf_links), 1, step=1)
-print(pdf_links)
-run = st.button("Run")
-if run:
-    with st.spinner("Downloading"):
-        loc_link = pdf_links[case_num]
-        if os.path.exists('PDF Cases/Temp'):
-            shutil.rmtree('PDF Cases/Temp')
-        Scraper.download_loc(loc_link)
-        ImageProcessor.process_file('PDF Cases/Temp')
-        TextProcessor.process_file('PDF Cases/Temp')
-        Tagger.process_file('PDF Cases/Temp', draw=True)
-        st.header('Opinions')
-        opinions_df = pd.read_csv('PDF Cases/Temp/opinions.csv')
-        types = opinions_df['Type'].tolist()
-        author_sents = opinions_df['Author Sent'].tolist()
-        texts = opinions_df['Text'].tolist()
-        for (t, a_s) in zip(types, author_sents):
-            st.text(t + ":\t" + a_s)
-        tabs = st.tabs(types)
-        for (i, tab) in enumerate(tabs):
-            with tab:
-                paras = texts[i].split('<PARA>')
-                t = "</div><br/><div>".join(paras)
-                t = "<div>" + t
-                st.markdown(t, unsafe_allow_html=True)
-        st.divider()
-        cols = st.columns(4)
-        image_filenames = [f for f in os.listdir('PDF Cases/Temp') if 'processed.png' in f]
-        for (i,f) in enumerate(image_filenames):
-            image = cv2.imread('PDF Cases/Temp/' + str(i) + '-processed.png')
-            with cols[i%4]:
-                st.image(image)

 import shutil
 import pandas as pd
+aws_access_key = os.getenv("aws_access_key")
+aws_secret_key = os.getenv("aws_secret_key")
+# Display the cases
+s3 = boto3.client('s3')
+s3.download_file('gidon-corpis', 'OBJECT_NAME', 'FILE_NAME')
 st.set_page_config(layout="wide")
+def get_subdirectories(prefix):
+    subdirectories = set()
+    paginator = s3.get_paginator('list_objects_v2')
+    for result in paginator.paginate(Bucket="gideon-corpus", Prefix=prefix, Delimiter='/'):
+        if result.get('CommonPrefixes'):
+            subdirectories.update(subdir.get('Prefix') for subdir in result.get('CommonPrefixes'))
+    subdirectories = list(subdirectories)
+    subs = [s.split('/')[1] for s in subdirectories]
+    return subs
+subs = get_subdirectories("Cases/")
+# volume = st.slider('Volume', 500, 550, 525)
+# r = requests.get('https://www.loc.gov/collections/united-states-reports/?fa=partof%3Au.s.+reports%3A+volume+'+str(volume) +'&st=list&c=250')
+# soup = BeautifulSoup(r.text)
+# html_links = soup.findAll('link',attrs={'rel': 'alternate'})
+# pdf_links = []
+# for hl in html_links:
+#     if len(hl['href'].split('/')[-1].split('.gif')[0])==  11:
+#         pdf_links.append(hl['href'].split('.gif')[0]+'.pdf')
+# case_num = st.slider('Case Number', 0, len(pdf_links), 1, step=1)
+# print(pdf_links)
+# run = st.button("Run")
+# if run:
+#     with st.spinner("Downloading"):
+#         loc_link = pdf_links[case_num]
+#         if os.path.exists('PDF Cases/Temp'):
+#             shutil.rmtree('PDF Cases/Temp')
+#         Scraper.download_loc(loc_link)
+#         ImageProcessor.process_file('PDF Cases/Temp')
+#         TextProcessor.process_file('PDF Cases/Temp')
+#         Tagger.process_file('PDF Cases/Temp', draw=True)
+#         st.header('Opinions')
+#         opinions_df = pd.read_csv('PDF Cases/Temp/opinions.csv')
+#         types = opinions_df['Type'].tolist()
+#         author_sents = opinions_df['Author Sent'].tolist()
+#         texts = opinions_df['Text'].tolist()
+#         for (t, a_s) in zip(types, author_sents):
+#             st.text(t + ":\t" + a_s)
+#         tabs = st.tabs(types)
+#         for (i, tab) in enumerate(tabs):
+#             with tab:
+#                 paras = texts[i].split('<PARA>')
+#                 t = "</div><br/><div>".join(paras)
+#                 t = "<div>" + t
+#                 st.markdown(t, unsafe_allow_html=True)
+#         st.divider()
+#         cols = st.columns(4)
+#         image_filenames = [f for f in os.listdir('PDF Cases/Temp') if 'processed.png' in f]
+#         for (i,f) in enumerate(image_filenames):
+#             image = cv2.imread('PDF Cases/Temp/' + str(i) + '-processed.png')
+#             with cols[i%4]:
+#                 st.image(image)