Update app.py
Browse files
app.py
CHANGED
|
@@ -4,17 +4,28 @@ import Scraper
|
|
| 4 |
import Manager
|
| 5 |
import os
|
| 6 |
import pickle
|
|
|
|
| 7 |
|
| 8 |
|
| 9 |
st.set_page_config(layout="wide")
|
| 10 |
|
| 11 |
# loc_link = st.text_input('LOC Link', placeholder='https://tile.loc.gov/storage-services/service/ll/usrep/usrep200/usrep200361/usrep200361.pdf')
|
| 12 |
-
volume = st.slider('Volume', 200, 550,
|
| 13 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 14 |
run = st.button("Run")
|
| 15 |
if run:
|
| 16 |
with st.spinner("Downloading"):
|
| 17 |
-
loc_link =
|
|
|
|
| 18 |
Scraper.download_loc(loc_link)
|
| 19 |
Manager.run('Temp')
|
| 20 |
# file = open("PDF Cases/Temp/processed.pkl",'rb')
|
|
|
|
| 4 |
import Manager
|
| 5 |
import os
|
| 6 |
import pickle
|
| 7 |
+
from bs4 import BeautifulSoup
|
| 8 |
|
| 9 |
|
| 10 |
st.set_page_config(layout="wide")
|
| 11 |
|
| 12 |
# loc_link = st.text_input('LOC Link', placeholder='https://tile.loc.gov/storage-services/service/ll/usrep/usrep200/usrep200361/usrep200361.pdf')
|
| 13 |
+
volume = st.slider('Volume', 200, 550, 400)
|
| 14 |
+
r = requests.get('https://www.loc.gov/collections/united-states-reports/?fa=partof%3Au.s.+reports%3A+volume+'+str(volume) +'&st=list&c=250')
|
| 15 |
+
soup = BeautifulSoup(r.text)
|
| 16 |
+
html_links = soup.findAll('link',attrs={'rel': 'alternate'})
|
| 17 |
+
pdf_links = []
|
| 18 |
+
for hl in html_links:
|
| 19 |
+
if len(hl['href'].split('/')[-1].split('.gif')[0])== 11:
|
| 20 |
+
pdf_links.append(hl['href'].split('.gif')[0]+'.pdf')
|
| 21 |
+
case_num = st.slider('Case Number', 0, len(pdf_links), 1, step=1)
|
| 22 |
+
print(pdf_links)
|
| 23 |
+
|
| 24 |
run = st.button("Run")
|
| 25 |
if run:
|
| 26 |
with st.spinner("Downloading"):
|
| 27 |
+
loc_link = pdf_links[case_num]
|
| 28 |
+
# loc_link = 'https://tile.loc.gov/storage-services/service/ll/usrep/usrep' + str(volume) + '/usrep' + str(volume) + str(page) +'/usrep' + str(volume) + str(page) +'.pdf'
|
| 29 |
Scraper.download_loc(loc_link)
|
| 30 |
Manager.run('Temp')
|
| 31 |
# file = open("PDF Cases/Temp/processed.pkl",'rb')
|