Gideon / app.py
cools's picture
Update app.py
73e00e8
raw
history blame
1.74 kB
import streamlit as st
import cv2
import Scraper
import Manager
import ImageProcessor
import TextProcessor
import Tagger
import Parser
import os
import pickle
from bs4 import BeautifulSoup
import requests
import shutil
import pandas as pd
st.set_page_config(layout="wide")
# loc_link = st.text_input('LOC Link', placeholder='https://tile.loc.gov/storage-services/service/ll/usrep/usrep200/usrep200361/usrep200361.pdf')
volume = st.slider('Volume', 200, 550, 400)
r = requests.get('https://www.loc.gov/collections/united-states-reports/?fa=partof%3Au.s.+reports%3A+volume+'+str(volume) +'&st=list&c=250')
soup = BeautifulSoup(r.text)
html_links = soup.findAll('link',attrs={'rel': 'alternate'})
pdf_links = []
for hl in html_links:
if len(hl['href'].split('/')[-1].split('.gif')[0])== 11:
pdf_links.append(hl['href'].split('.gif')[0]+'.pdf')
case_num = st.slider('Case Number', 0, len(pdf_links), 1, step=1)
print(pdf_links)
run = st.button("Run")
if run:
with st.spinner("Downloading"):
loc_link = pdf_links[case_num]
if os.path.exists('PDF Cases/Temp'):
shutil.rmtree('PDF Cases/Temp')
Scraper.download_loc(loc_link)
ImageProcessor.process_file('PDF Cases/Temp')
TextProcessor.process_file('PDF Cases/Temp')
Tagger.process_file('PDF Cases/Temp', draw=True)
cols = st.columns(4)
image_filenames = [f for f in os.listdir('PDF Cases/Temp') if 'processed.png' in f]
for (i,f) in enumerate(image_filenames):
image = cv2.imread('PDF Cases/Temp/' + str(i) + '-processed.png')
with cols[i%4]:
st.image(image)
opinions_df = pd.read_csv('PDF Cases/Temp')
st.write(opinions_df)