cools commited on
Commit
9314300
·
1 Parent(s): c65f678

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +68 -46
app.py CHANGED
@@ -13,59 +13,81 @@ import requests
13
  import shutil
14
  import pandas as pd
15
 
 
 
 
 
 
 
 
 
 
 
16
 
17
  st.set_page_config(layout="wide")
18
 
19
- # loc_link = st.text_input('LOC Link', placeholder='https://www.supremecourt.gov/opinions/22pdf/21-476_c185.pdf')
20
-
21
- volume = st.slider('Volume', 500, 550, 525)
22
- r = requests.get('https://www.loc.gov/collections/united-states-reports/?fa=partof%3Au.s.+reports%3A+volume+'+str(volume) +'&st=list&c=250')
23
- soup = BeautifulSoup(r.text)
24
- html_links = soup.findAll('link',attrs={'rel': 'alternate'})
25
- pdf_links = []
26
- for hl in html_links:
27
- if len(hl['href'].split('/')[-1].split('.gif')[0])== 11:
28
- pdf_links.append(hl['href'].split('.gif')[0]+'.pdf')
29
- case_num = st.slider('Case Number', 0, len(pdf_links), 1, step=1)
30
- print(pdf_links)
 
 
 
 
 
 
 
 
 
 
 
 
31
 
32
 
33
- run = st.button("Run")
34
- if run:
35
- with st.spinner("Downloading"):
36
- loc_link = pdf_links[case_num]
37
- if os.path.exists('PDF Cases/Temp'):
38
- shutil.rmtree('PDF Cases/Temp')
39
- Scraper.download_loc(loc_link)
40
- ImageProcessor.process_file('PDF Cases/Temp')
41
- TextProcessor.process_file('PDF Cases/Temp')
42
- Tagger.process_file('PDF Cases/Temp', draw=True)
43
-
44
-
45
- st.header('Opinions')
46
- opinions_df = pd.read_csv('PDF Cases/Temp/opinions.csv')
47
- types = opinions_df['Type'].tolist()
48
- author_sents = opinions_df['Author Sent'].tolist()
49
- texts = opinions_df['Text'].tolist()
50
- for (t, a_s) in zip(types, author_sents):
51
- st.text(t + ":\t" + a_s)
52
-
53
- tabs = st.tabs(types)
54
- for (i, tab) in enumerate(tabs):
55
- with tab:
56
- paras = texts[i].split('<PARA>')
57
- t = "</div><br/><div>".join(paras)
58
- t = "<div>" + t
59
- st.markdown(t, unsafe_allow_html=True)
60
 
61
- st.divider()
62
 
63
- cols = st.columns(4)
64
- image_filenames = [f for f in os.listdir('PDF Cases/Temp') if 'processed.png' in f]
65
- for (i,f) in enumerate(image_filenames):
66
- image = cv2.imread('PDF Cases/Temp/' + str(i) + '-processed.png')
67
- with cols[i%4]:
68
- st.image(image)
69
 
70
 
71
 
 
13
  import shutil
14
  import pandas as pd
15
 
16
+ aws_access_key = os.getenv("aws_access_key")
17
+ aws_secret_key = os.getenv("aws_secret_key")
18
+
19
+ # Display the cases
20
+
21
+
22
+ s3 = boto3.client('s3')
23
+ s3.download_file('gidon-corpis', 'OBJECT_NAME', 'FILE_NAME')
24
+
25
+
26
 
27
  st.set_page_config(layout="wide")
28
 
29
+
30
+ def get_subdirectories(prefix):
31
+ subdirectories = set()
32
+ paginator = s3.get_paginator('list_objects_v2')
33
+ for result in paginator.paginate(Bucket="gideon-corpus", Prefix=prefix, Delimiter='/'):
34
+ if result.get('CommonPrefixes'):
35
+ subdirectories.update(subdir.get('Prefix') for subdir in result.get('CommonPrefixes'))
36
+ subdirectories = list(subdirectories)
37
+ subs = [s.split('/')[1] for s in subdirectories]
38
+ return subs
39
+
40
+ subs = get_subdirectories("Cases/")
41
+
42
+
43
+ # volume = st.slider('Volume', 500, 550, 525)
44
+ # r = requests.get('https://www.loc.gov/collections/united-states-reports/?fa=partof%3Au.s.+reports%3A+volume+'+str(volume) +'&st=list&c=250')
45
+ # soup = BeautifulSoup(r.text)
46
+ # html_links = soup.findAll('link',attrs={'rel': 'alternate'})
47
+ # pdf_links = []
48
+ # for hl in html_links:
49
+ # if len(hl['href'].split('/')[-1].split('.gif')[0])== 11:
50
+ # pdf_links.append(hl['href'].split('.gif')[0]+'.pdf')
51
+ # case_num = st.slider('Case Number', 0, len(pdf_links), 1, step=1)
52
+ # print(pdf_links)
53
 
54
 
55
+ # run = st.button("Run")
56
+ # if run:
57
+ # with st.spinner("Downloading"):
58
+ # loc_link = pdf_links[case_num]
59
+ # if os.path.exists('PDF Cases/Temp'):
60
+ # shutil.rmtree('PDF Cases/Temp')
61
+ # Scraper.download_loc(loc_link)
62
+ # ImageProcessor.process_file('PDF Cases/Temp')
63
+ # TextProcessor.process_file('PDF Cases/Temp')
64
+ # Tagger.process_file('PDF Cases/Temp', draw=True)
65
+
66
+
67
+ # st.header('Opinions')
68
+ # opinions_df = pd.read_csv('PDF Cases/Temp/opinions.csv')
69
+ # types = opinions_df['Type'].tolist()
70
+ # author_sents = opinions_df['Author Sent'].tolist()
71
+ # texts = opinions_df['Text'].tolist()
72
+ # for (t, a_s) in zip(types, author_sents):
73
+ # st.text(t + ":\t" + a_s)
74
+
75
+ # tabs = st.tabs(types)
76
+ # for (i, tab) in enumerate(tabs):
77
+ # with tab:
78
+ # paras = texts[i].split('<PARA>')
79
+ # t = "</div><br/><div>".join(paras)
80
+ # t = "<div>" + t
81
+ # st.markdown(t, unsafe_allow_html=True)
82
 
83
+ # st.divider()
84
 
85
+ # cols = st.columns(4)
86
+ # image_filenames = [f for f in os.listdir('PDF Cases/Temp') if 'processed.png' in f]
87
+ # for (i,f) in enumerate(image_filenames):
88
+ # image = cv2.imread('PDF Cases/Temp/' + str(i) + '-processed.png')
89
+ # with cols[i%4]:
90
+ # st.image(image)
91
 
92
 
93