cools commited on
Commit
7a5575d
·
1 Parent(s): 4cb57bb

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +14 -3
app.py CHANGED
@@ -4,17 +4,28 @@ import Scraper
4
  import Manager
5
  import os
6
  import pickle
 
7
 
8
 
9
  st.set_page_config(layout="wide")
10
 
11
  # loc_link = st.text_input('LOC Link', placeholder='https://tile.loc.gov/storage-services/service/ll/usrep/usrep200/usrep200361/usrep200361.pdf')
12
- volume = st.slider('Volume', 200, 550, 1)
13
- page = st.slider('Page', 0, 1200, 1)
 
 
 
 
 
 
 
 
 
14
  run = st.button("Run")
15
  if run:
16
  with st.spinner("Downloading"):
17
- loc_link = 'https://tile.loc.gov/storage-services/service/ll/usrep/usrep' + str(volume) + '/usrep' + str(volume) + str(page) +'/usrep' + str(volume) + str(page) +'.pdf'
 
18
  Scraper.download_loc(loc_link)
19
  Manager.run('Temp')
20
  # file = open("PDF Cases/Temp/processed.pkl",'rb')
 
4
  import Manager
5
  import os
6
  import pickle
7
+ from bs4 import BeautifulSoup
8
 
9
 
10
  st.set_page_config(layout="wide")
11
 
12
  # loc_link = st.text_input('LOC Link', placeholder='https://tile.loc.gov/storage-services/service/ll/usrep/usrep200/usrep200361/usrep200361.pdf')
13
+ volume = st.slider('Volume', 200, 550, 400)
14
+ r = requests.get('https://www.loc.gov/collections/united-states-reports/?fa=partof%3Au.s.+reports%3A+volume+'+str(volume) +'&st=list&c=250')
15
+ soup = BeautifulSoup(r.text)
16
+ html_links = soup.findAll('link',attrs={'rel': 'alternate'})
17
+ pdf_links = []
18
+ for hl in html_links:
19
+ if len(hl['href'].split('/')[-1].split('.gif')[0])== 11:
20
+ pdf_links.append(hl['href'].split('.gif')[0]+'.pdf')
21
+ case_num = st.slider('Case Number', 0, len(pdf_links), 1, step=1)
22
+ print(pdf_links)
23
+
24
  run = st.button("Run")
25
  if run:
26
  with st.spinner("Downloading"):
27
+ loc_link = pdf_links[case_num]
28
+ # loc_link = 'https://tile.loc.gov/storage-services/service/ll/usrep/usrep' + str(volume) + '/usrep' + str(volume) + str(page) +'/usrep' + str(volume) + str(page) +'.pdf'
29
  Scraper.download_loc(loc_link)
30
  Manager.run('Temp')
31
  # file = open("PDF Cases/Temp/processed.pkl",'rb')