cools commited on
Commit
b0c9dfb
·
1 Parent(s): 0eaaaba

delete awshandler.py

Browse files
Files changed (1) hide show
  1. AWSHandler.py +0 -48
AWSHandler.py DELETED
@@ -1,48 +0,0 @@
1
- import boto3
2
- import os
3
- import pandas as pd
4
- import json
5
-
6
-
7
- def upload_files(origin_path, destination_path, aws_access_key, aws_secret_key):
8
- session = boto3.Session(aws_access_key_id=aws_access_key, aws_secret_access_key=aws_secret_key)
9
- s3 = session.resource('s3')
10
- bucket = s3.Bucket('gideon-corpus')
11
-
12
- for subdir, dirs, files in os.walk(origin_path):
13
- for file in files:
14
- full_path = os.path.join(subdir, file)
15
- with open(full_path, 'rb') as data:
16
- bucket.put_object(Key=destination_path + origin_path.split('/')[-1] + '/' + full_path[len(origin_path) + 1:], Body=data)
17
-
18
-
19
- def retrieve_logs(aws_access_key, aws_secret_key):
20
- s3 = boto3.client('s3')
21
- logs_response = s3.get_object(Bucket='gideon-corpus', Key='logs/logs.csv')
22
- logs_df = pd.read_csv(logs_response['Body'])
23
- return logs_df
24
-
25
- def retrieve_casedocs(case_num, aws_access_key, aws_secret_key): # Note: this is how stuff is stored on AWS
26
- s3 = boto3.client('s3')
27
- opinions_response = s3.get_object(Bucket='gideon-corpus', Key='Cases/' + case_num + '/opinions.csv')
28
- opinions_df = pd.read_csv(opinions_response['Body'])
29
- metadata_response = s3.get_object(Bucket='gideon-corpus', Key='Cases/' + case_num + '/metadata.json')
30
- metadata = json.loads(metadata_response['Body'].read().decode('utf-8'))
31
- return opinions_df, metadata
32
-
33
-
34
- def retrieve_all_casedocs(prefix, aws_access_key, aws_secret_key):
35
- s3 = boto3.client('s3')
36
- subdirectories = set()
37
- paginator = s3.get_paginator('list_objects_v2')
38
- for result in paginator.paginate(Bucket="gideon-corpus", Prefix=prefix, Delimiter='/'):
39
- if result.get('CommonPrefixes'):
40
- subdirectories.update(subdir.get('Prefix') for subdir in result.get('CommonPrefixes'))
41
- subdirectories = list(subdirectories)
42
- subs = [s.split('/')[1] for s in subdirectories]
43
-
44
- casedocs = []
45
- for s in subs:
46
- opinions_df, metadata = retrieve_casedocs(s, aws_access_key, aws_secret_key)
47
- casedocs.append((opinions_df, metadata))
48
- return casedocs