Delete Scraper.py
Browse files- Scraper.py +0 -44
Scraper.py
DELETED
|
@@ -1,44 +0,0 @@
|
|
| 1 |
-
from bs4 import BeautifulSoup
|
| 2 |
-
import re
|
| 3 |
-
import requests
|
| 4 |
-
import os
|
| 5 |
-
|
| 6 |
-
def download_slip(link):
|
| 7 |
-
r = requests.get("https://www.supremecourt.gov" + link, stream=True)
|
| 8 |
-
base = link.split('/')[-1].split('.pdf')[0]
|
| 9 |
-
base = "Temp"
|
| 10 |
-
if not os.path.isdir('PDF Cases/' + base):
|
| 11 |
-
os.mkdir('PDF Cases/' + base)
|
| 12 |
-
name = 'PDF Cases/' + base + '/' + "opinion.pdf"
|
| 13 |
-
with open(name, 'wb') as f:
|
| 14 |
-
for chunk in r.iter_content(chunk_size=1024):
|
| 15 |
-
f.write(chunk)
|
| 16 |
-
|
| 17 |
-
def download_loc(link):
|
| 18 |
-
base = link.split('/')[-1].split('.pdf')[0]
|
| 19 |
-
volume = int(base.split('usrep')[-1][0:3])
|
| 20 |
-
page = int(base.split('usrep')[-1][3:])
|
| 21 |
-
foldername = str(volume) + '_' + str(page)
|
| 22 |
-
foldername = "Temp"
|
| 23 |
-
r = requests.get(link, stream=True)
|
| 24 |
-
if not os.path.isdir('PDF Cases/' + foldername):
|
| 25 |
-
print("making dir")
|
| 26 |
-
os.makedirs('PDF Cases/' + foldername, exist_ok=True)
|
| 27 |
-
name = 'PDF Cases/' + foldername + '/' + "opinion.pdf"
|
| 28 |
-
with open(name, 'wb') as f:
|
| 29 |
-
for chunk in r.iter_content(chunk_size=1024):
|
| 30 |
-
f.write(chunk)
|
| 31 |
-
print(os.listdir('PDF Cases/' + foldername))
|
| 32 |
-
|
| 33 |
-
|
| 34 |
-
def slip_pipeline(year):
|
| 35 |
-
page = requests.get("https://www.supremecourt.gov/opinions/slipopinion/" + str(year))
|
| 36 |
-
soup = BeautifulSoup(page.text)
|
| 37 |
-
html_links = soup.findAll('div', attrs={'id': 'accordion'})[0].findAll('a')
|
| 38 |
-
links = []
|
| 39 |
-
for link in html_links:
|
| 40 |
-
if ".pdf" in link.get('href').lower() and "new" not in link.get('href') and "diff" not in link.get('href'):
|
| 41 |
-
links.append(link.get('href'))
|
| 42 |
-
|
| 43 |
-
for l in links:
|
| 44 |
-
download_slip(l)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|