cools commited on
Commit
fb4beeb
·
1 Parent(s): db9a867

Delete Scraper.py

Browse files
Files changed (1) hide show
  1. Scraper.py +0 -44
Scraper.py DELETED
@@ -1,44 +0,0 @@
1
- from bs4 import BeautifulSoup
2
- import re
3
- import requests
4
- import os
5
-
6
- def download_slip(link):
7
- r = requests.get("https://www.supremecourt.gov" + link, stream=True)
8
- base = link.split('/')[-1].split('.pdf')[0]
9
- base = "Temp"
10
- if not os.path.isdir('PDF Cases/' + base):
11
- os.mkdir('PDF Cases/' + base)
12
- name = 'PDF Cases/' + base + '/' + "opinion.pdf"
13
- with open(name, 'wb') as f:
14
- for chunk in r.iter_content(chunk_size=1024):
15
- f.write(chunk)
16
-
17
- def download_loc(link):
18
- base = link.split('/')[-1].split('.pdf')[0]
19
- volume = int(base.split('usrep')[-1][0:3])
20
- page = int(base.split('usrep')[-1][3:])
21
- foldername = str(volume) + '_' + str(page)
22
- foldername = "Temp"
23
- r = requests.get(link, stream=True)
24
- if not os.path.isdir('PDF Cases/' + foldername):
25
- print("making dir")
26
- os.makedirs('PDF Cases/' + foldername, exist_ok=True)
27
- name = 'PDF Cases/' + foldername + '/' + "opinion.pdf"
28
- with open(name, 'wb') as f:
29
- for chunk in r.iter_content(chunk_size=1024):
30
- f.write(chunk)
31
- print(os.listdir('PDF Cases/' + foldername))
32
-
33
-
34
- def slip_pipeline(year):
35
- page = requests.get("https://www.supremecourt.gov/opinions/slipopinion/" + str(year))
36
- soup = BeautifulSoup(page.text)
37
- html_links = soup.findAll('div', attrs={'id': 'accordion'})[0].findAll('a')
38
- links = []
39
- for link in html_links:
40
- if ".pdf" in link.get('href').lower() and "new" not in link.get('href') and "diff" not in link.get('href'):
41
- links.append(link.get('href'))
42
-
43
- for l in links:
44
- download_slip(l)