File size: 1,601 Bytes
30420b9 4ec14ff 30420b9 4ec14ff 30420b9 12ac50d 0d8cc50 30420b9 56a5bfc 30420b9 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 | from bs4 import BeautifulSoup
import re
import requests
import os
def download_slip(link):
r = requests.get("https://www.supremecourt.gov" + link, stream=True)
base = link.split('/')[-1].split('.pdf')[0]
base = "Temp"
if not os.path.isdir('PDF Cases/' + base):
os.mkdir('PDF Cases/' + base)
name = 'PDF Cases/' + base + '/' + "opinion.pdf"
with open(name, 'wb') as f:
for chunk in r.iter_content(chunk_size=1024):
f.write(chunk)
def download_loc(link):
base = link.split('/')[-1].split('.pdf')[0]
volume = int(base.split('usrep')[-1][0:3])
page = int(base.split('usrep')[-1][3:])
foldername = str(volume) + '_' + str(page)
foldername = "Temp"
r = requests.get(link, stream=True)
if not os.path.isdir('PDF Cases/' + foldername):
print("making dir")
os.makedirs('PDF Cases/' + foldername, exist_ok=True)
name = 'PDF Cases/' + foldername + '/' + "opinion.pdf"
with open(name, 'wb') as f:
for chunk in r.iter_content(chunk_size=1024):
f.write(chunk)
print(os.listdir('PDF Cases/' + foldername))
def slip_pipeline(year):
page = requests.get("https://www.supremecourt.gov/opinions/slipopinion/" + str(year))
soup = BeautifulSoup(page.text)
html_links = soup.findAll('div', attrs={'id': 'accordion'})[0].findAll('a')
links = []
for link in html_links:
if ".pdf" in link.get('href').lower() and "new" not in link.get('href') and "diff" not in link.get('href'):
links.append(link.get('href'))
for l in links:
download_slip(l) |