| | import streamlit as st |
| | import cv2 |
| | import Scraper |
| | import Manager |
| | import ImageProcessor |
| | import TextProcessor |
| | import Tagger |
| | import Parser |
| | import os |
| | import pickle |
| | from bs4 import BeautifulSoup |
| | import requests |
| | import shutil |
| | import pandas as pd |
| |
|
| |
|
| | st.set_page_config(layout="wide") |
| |
|
| | |
| | volume = st.slider('Volume', 200, 550, 400) |
| | r = requests.get('https://www.loc.gov/collections/united-states-reports/?fa=partof%3Au.s.+reports%3A+volume+'+str(volume) +'&st=list&c=250') |
| | soup = BeautifulSoup(r.text) |
| | html_links = soup.findAll('link',attrs={'rel': 'alternate'}) |
| | pdf_links = [] |
| | for hl in html_links: |
| | if len(hl['href'].split('/')[-1].split('.gif')[0])== 11: |
| | pdf_links.append(hl['href'].split('.gif')[0]+'.pdf') |
| | case_num = st.slider('Case Number', 0, len(pdf_links), 1, step=1) |
| | print(pdf_links) |
| |
|
| | run = st.button("Run") |
| | if run: |
| | with st.spinner("Downloading"): |
| | loc_link = pdf_links[case_num] |
| | if os.path.exists('PDF Cases/Temp'): |
| | shutil.rmtree('PDF Cases/Temp') |
| | Scraper.download_loc(loc_link) |
| | ImageProcessor.process_file('PDF Cases/Temp') |
| | TextProcessor.process_file('PDF Cases/Temp') |
| | Tagger.process_file('PDF Cases/Temp', draw=True) |
| |
|
| |
|
| | cols = st.columns(4) |
| | image_filenames = [f for f in os.listdir('PDF Cases/Temp') if 'processed.png' in f] |
| | for (i,f) in enumerate(image_filenames): |
| | image = cv2.imread('PDF Cases/Temp/' + str(i) + '-processed.png') |
| | with cols[i%4]: |
| | st.image(image) |
| |
|
| |
|
| | opinions_df = pd.read_csv('PDF Cases/Temp') |
| | st.write(opinions_df) |
| | |