| from bs4 import BeautifulSoup | |
| import re | |
| import requests | |
| import os | |
| def download_slip(link): | |
| r = requests.get("https://www.supremecourt.gov" + link, stream=True) | |
| base = link.split('/')[-1].split('.pdf')[0] | |
| base = "Temp" | |
| if not os.path.isdir('PDF Cases/' + base): | |
| os.mkdir('PDF Cases/' + base) | |
| name = 'PDF Cases/' + base + '/' + "opinion.pdf" | |
| with open(name, 'wb') as f: | |
| for chunk in r.iter_content(chunk_size=1024): | |
| f.write(chunk) | |
| def download_loc(link): | |
| base = link.split('/')[-1].split('.pdf')[0] | |
| volume = int(base.split('usrep')[-1][0:3]) | |
| page = int(base.split('usrep')[-1][3:]) | |
| foldername = str(volume) + '_' + str(page) | |
| foldername = "Temp" | |
| r = requests.get(link, stream=True) | |
| if not os.path.isdir('PDF Cases/' + foldername): | |
| print("making dir") | |
| os.makedirs('PDF Cases/' + foldername, exist_ok=True) | |
| name = 'PDF Cases/' + foldername + '/' + "opinion.pdf" | |
| with open(name, 'wb') as f: | |
| for chunk in r.iter_content(chunk_size=1024): | |
| f.write(chunk) | |
| print(os.listdir('PDF Cases/' + foldername)) | |
| def slip_pipeline(year): | |
| page = requests.get("https://www.supremecourt.gov/opinions/slipopinion/" + str(year)) | |
| soup = BeautifulSoup(page.text) | |
| html_links = soup.findAll('div', attrs={'id': 'accordion'})[0].findAll('a') | |
| links = [] | |
| for link in html_links: | |
| if ".pdf" in link.get('href').lower() and "new" not in link.get('href') and "diff" not in link.get('href'): | |
| links.append(link.get('href')) | |
| for l in links: | |
| download_slip(l) |