import streamlit as st import cv2 import Scraper import Manager import ImageProcessor import TextProcessor import Tagger import Parser import os import pickle from bs4 import BeautifulSoup import requests import shutil import pandas as pd st.set_page_config(layout="wide") # loc_link = st.text_input('LOC Link', placeholder='https://tile.loc.gov/storage-services/service/ll/usrep/usrep200/usrep200361/usrep200361.pdf') volume = st.slider('Volume', 200, 550, 400) r = requests.get('https://www.loc.gov/collections/united-states-reports/?fa=partof%3Au.s.+reports%3A+volume+'+str(volume) +'&st=list&c=250') soup = BeautifulSoup(r.text) html_links = soup.findAll('link',attrs={'rel': 'alternate'}) pdf_links = [] for hl in html_links: if len(hl['href'].split('/')[-1].split('.gif')[0])== 11: pdf_links.append(hl['href'].split('.gif')[0]+'.pdf') case_num = st.slider('Case Number', 0, len(pdf_links), 1, step=1) print(pdf_links) run = st.button("Run") if run: with st.spinner("Downloading"): loc_link = pdf_links[case_num] if os.path.exists('PDF Cases/Temp'): shutil.rmtree('PDF Cases/Temp') Scraper.download_loc(loc_link) ImageProcessor.process_file('PDF Cases/Temp') TextProcessor.process_file('PDF Cases/Temp') Tagger.process_file('PDF Cases/Temp', draw=True) cols = st.columns(4) image_filenames = [f for f in os.listdir('PDF Cases/Temp') if 'processed.png' in f] for (i,f) in enumerate(image_filenames): image = cv2.imread('PDF Cases/Temp/' + str(i) + '-processed.png') with cols[i%4]: st.image(image) opinions_df = pd.read_csv('PDF Cases/Temp') st.write(opinions_df)