Spaces:

cools
/

Gideon

Runtime error

App Files Files Community

cools commited on Aug 14, 2023

Commit

0a55f72

1 Parent(s): b0c9dfb

Upload AWSHandler.py

Browse files

Files changed (1) hide show

AWSHandler.py +48 -0

AWSHandler.py ADDED Viewed

	@@ -0,0 +1,48 @@

+import boto3
+import os
+import pandas as pd
+import json
+def upload_files(origin_path, destination_path, aws_access_key, aws_secret_key):
+    session = boto3.Session(aws_access_key_id=aws_access_key, aws_secret_access_key=aws_secret_key)
+    s3 = session.resource('s3')
+    bucket = s3.Bucket('gideon-corpus')
+    for subdir, dirs, files in os.walk(origin_path):
+        for file in files:
+            full_path = os.path.join(subdir, file)
+            with open(full_path, 'rb') as data:
+                bucket.put_object(Key=destination_path + origin_path.split('/')[-1] + '/' + full_path[len(origin_path) + 1:], Body=data)
+def retrieve_logs(aws_access_key, aws_secret_key):
+    s3 = boto3.client('s3', aws_access_key_id=aws_access_key, aws_secret_access_key=aws_secret_key)
+    logs_response = s3.get_object(Bucket='gideon-corpus', Key='logs/logs.csv')
+    logs_df = pd.read_csv(logs_response['Body'])
+    return logs_df
+def retrieve_casedocs(case_num, aws_access_key, aws_secret_key): # Note: this is how stuff is stored on AWS
+    s3 = boto3.client('s3', aws_access_key_id=aws_access_key, aws_secret_access_key=aws_secret_key)
+    opinions_response = s3.get_object(Bucket='gideon-corpus', Key='Cases/' + case_num + '/opinions.csv')
+    opinions_df = pd.read_csv(opinions_response['Body'])
+    metadata_response = s3.get_object(Bucket='gideon-corpus', Key='Cases/' + case_num + '/metadata.json')
+    metadata = json.loads(metadata_response['Body'].read().decode('utf-8'))
+    return opinions_df, metadata
+def retrieve_all_casedocs(prefix, aws_access_key, aws_secret_key):
+    s3 = boto3.client(aws_access_key_id=aws_access_key, aws_secret_access_key=aws_secret_key)
+    subdirectories = set()
+    paginator = s3.get_paginator('list_objects_v2')
+    for result in paginator.paginate(Bucket="gideon-corpus", Prefix=prefix, Delimiter='/'):
+        if result.get('CommonPrefixes'):
+            subdirectories.update(subdir.get('Prefix') for subdir in result.get('CommonPrefixes'))
+    subdirectories = list(subdirectories)
+    subs = [s.split('/')[1] for s in subdirectories]
+    casedocs = []
+    for s in subs:
+        opinions_df, metadata = retrieve_casedocs(s, aws_access_key, aws_secret_key)
+        casedocs.append((opinions_df, metadata))
+    return casedocs