delete awshandler.py
Browse files- AWSHandler.py +0 -48
AWSHandler.py
DELETED
|
@@ -1,48 +0,0 @@
|
|
| 1 |
-
import boto3
|
| 2 |
-
import os
|
| 3 |
-
import pandas as pd
|
| 4 |
-
import json
|
| 5 |
-
|
| 6 |
-
|
| 7 |
-
def upload_files(origin_path, destination_path, aws_access_key, aws_secret_key):
|
| 8 |
-
session = boto3.Session(aws_access_key_id=aws_access_key, aws_secret_access_key=aws_secret_key)
|
| 9 |
-
s3 = session.resource('s3')
|
| 10 |
-
bucket = s3.Bucket('gideon-corpus')
|
| 11 |
-
|
| 12 |
-
for subdir, dirs, files in os.walk(origin_path):
|
| 13 |
-
for file in files:
|
| 14 |
-
full_path = os.path.join(subdir, file)
|
| 15 |
-
with open(full_path, 'rb') as data:
|
| 16 |
-
bucket.put_object(Key=destination_path + origin_path.split('/')[-1] + '/' + full_path[len(origin_path) + 1:], Body=data)
|
| 17 |
-
|
| 18 |
-
|
| 19 |
-
def retrieve_logs(aws_access_key, aws_secret_key):
|
| 20 |
-
s3 = boto3.client('s3')
|
| 21 |
-
logs_response = s3.get_object(Bucket='gideon-corpus', Key='logs/logs.csv')
|
| 22 |
-
logs_df = pd.read_csv(logs_response['Body'])
|
| 23 |
-
return logs_df
|
| 24 |
-
|
| 25 |
-
def retrieve_casedocs(case_num, aws_access_key, aws_secret_key): # Note: this is how stuff is stored on AWS
|
| 26 |
-
s3 = boto3.client('s3')
|
| 27 |
-
opinions_response = s3.get_object(Bucket='gideon-corpus', Key='Cases/' + case_num + '/opinions.csv')
|
| 28 |
-
opinions_df = pd.read_csv(opinions_response['Body'])
|
| 29 |
-
metadata_response = s3.get_object(Bucket='gideon-corpus', Key='Cases/' + case_num + '/metadata.json')
|
| 30 |
-
metadata = json.loads(metadata_response['Body'].read().decode('utf-8'))
|
| 31 |
-
return opinions_df, metadata
|
| 32 |
-
|
| 33 |
-
|
| 34 |
-
def retrieve_all_casedocs(prefix, aws_access_key, aws_secret_key):
|
| 35 |
-
s3 = boto3.client('s3')
|
| 36 |
-
subdirectories = set()
|
| 37 |
-
paginator = s3.get_paginator('list_objects_v2')
|
| 38 |
-
for result in paginator.paginate(Bucket="gideon-corpus", Prefix=prefix, Delimiter='/'):
|
| 39 |
-
if result.get('CommonPrefixes'):
|
| 40 |
-
subdirectories.update(subdir.get('Prefix') for subdir in result.get('CommonPrefixes'))
|
| 41 |
-
subdirectories = list(subdirectories)
|
| 42 |
-
subs = [s.split('/')[1] for s in subdirectories]
|
| 43 |
-
|
| 44 |
-
casedocs = []
|
| 45 |
-
for s in subs:
|
| 46 |
-
opinions_df, metadata = retrieve_casedocs(s, aws_access_key, aws_secret_key)
|
| 47 |
-
casedocs.append((opinions_df, metadata))
|
| 48 |
-
return casedocs
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|