Spaces:

cools
/

Gideon

Runtime error

cools commited on May 27, 2023

Commit

7a5575d

1 Parent(s): 4cb57bb

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -4,17 +4,28 @@ import Scraper
 import Manager
 import os
 import pickle
 st.set_page_config(layout="wide")
 # loc_link = st.text_input('LOC Link', placeholder='https://tile.loc.gov/storage-services/service/ll/usrep/usrep200/usrep200361/usrep200361.pdf')
-volume = st.slider('Volume', 200, 550, 1)
-page = st.slider('Page', 0, 1200, 1)
 run = st.button("Run")
 if run:
     with st.spinner("Downloading"):
-        loc_link = 'https://tile.loc.gov/storage-services/service/ll/usrep/usrep' + str(volume) + '/usrep' + str(volume) + str(page) +'/usrep' + str(volume) + str(page) +'.pdf'
         Scraper.download_loc(loc_link)
         Manager.run('Temp')
         # file = open("PDF Cases/Temp/processed.pkl",'rb')

 import Manager
 import os
 import pickle
+from bs4 import BeautifulSoup
 st.set_page_config(layout="wide")
 # loc_link = st.text_input('LOC Link', placeholder='https://tile.loc.gov/storage-services/service/ll/usrep/usrep200/usrep200361/usrep200361.pdf')
+volume = st.slider('Volume', 200, 550, 400)
+r = requests.get('https://www.loc.gov/collections/united-states-reports/?fa=partof%3Au.s.+reports%3A+volume+'+str(volume) +'&st=list&c=250')
+soup = BeautifulSoup(r.text)
+html_links = soup.findAll('link',attrs={'rel': 'alternate'})
+pdf_links = []
+for hl in html_links:
+    if len(hl['href'].split('/')[-1].split('.gif')[0])==  11:
+        pdf_links.append(hl['href'].split('.gif')[0]+'.pdf')
+case_num = st.slider('Case Number', 0, len(pdf_links), 1, step=1)
+print(pdf_links)
 run = st.button("Run")
 if run:
     with st.spinner("Downloading"):
+        loc_link = pdf_links[case_num]
+        # loc_link = 'https://tile.loc.gov/storage-services/service/ll/usrep/usrep' + str(volume) + '/usrep' + str(volume) + str(page) +'/usrep' + str(volume) + str(page) +'.pdf'
         Scraper.download_loc(loc_link)
         Manager.run('Temp')
         # file = open("PDF Cases/Temp/processed.pkl",'rb')