File size: 1,601 Bytes
30420b9
 
 
 
 
 
 
 
4ec14ff
30420b9
 
 
 
 
 
 
 
 
 
 
 
4ec14ff
30420b9
 
12ac50d
0d8cc50
30420b9
 
 
 
56a5bfc
30420b9
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
from bs4 import BeautifulSoup
import re
import requests
import os

def download_slip(link):
    r = requests.get("https://www.supremecourt.gov" + link, stream=True)
    base = link.split('/')[-1].split('.pdf')[0]
    base = "Temp"
    if not os.path.isdir('PDF Cases/' + base):
        os.mkdir('PDF Cases/' + base)
    name = 'PDF Cases/' + base + '/' + "opinion.pdf"
    with open(name, 'wb') as f:
        for chunk in r.iter_content(chunk_size=1024):
            f.write(chunk)

def download_loc(link):
    base = link.split('/')[-1].split('.pdf')[0]
    volume = int(base.split('usrep')[-1][0:3])
    page = int(base.split('usrep')[-1][3:])
    foldername = str(volume) + '_' + str(page)
    foldername = "Temp"
    r = requests.get(link, stream=True)
    if not os.path.isdir('PDF Cases/' + foldername):
        print("making dir")
        os.makedirs('PDF Cases/' + foldername, exist_ok=True)
    name = 'PDF Cases/' + foldername + '/' + "opinion.pdf"
    with open(name, 'wb') as f:
        for chunk in r.iter_content(chunk_size=1024):
            f.write(chunk)
    print(os.listdir('PDF Cases/' + foldername))


def slip_pipeline(year):
    page = requests.get("https://www.supremecourt.gov/opinions/slipopinion/" + str(year))
    soup = BeautifulSoup(page.text)
    html_links = soup.findAll('div', attrs={'id': 'accordion'})[0].findAll('a')
    links = []
    for link in html_links:
        if ".pdf" in link.get('href').lower() and "new" not in link.get('href') and "diff" not in link.get('href'):
            links.append(link.get('href'))

    for l in links:
        download_slip(l)