Roland Ding commited on
Commit ·
45f7b67
1
Parent(s): 4cca473
1.1.4.13 update cloud functions and .gitignore.
Browse files- .gitignore +4 -0
- cloud_db.py +12 -0
- cloud_storage.py +49 -5
- utility.py +8 -9
.gitignore
CHANGED
|
@@ -1,4 +1,8 @@
|
|
| 1 |
.archive
|
| 2 |
.venv
|
|
|
|
| 3 |
|
| 4 |
*.ipynb
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
.archive
|
| 2 |
.venv
|
| 3 |
+
.samples
|
| 4 |
|
| 5 |
*.ipynb
|
| 6 |
+
*.pdf
|
| 7 |
+
|
| 8 |
+
__pycache__/
|
cloud_db.py
CHANGED
|
@@ -52,6 +52,18 @@ def delete_item(table_name:str,key:dict):
|
|
| 52 |
return res
|
| 53 |
|
| 54 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 55 |
'''
|
| 56 |
dynamodb structure management
|
| 57 |
'''
|
|
|
|
| 52 |
return res
|
| 53 |
|
| 54 |
|
| 55 |
+
'''
|
| 56 |
+
'''
|
| 57 |
+
def get_item(table_name:str,key:dict):
|
| 58 |
+
try:
|
| 59 |
+
res = db_client.get_item(
|
| 60 |
+
TableName = table_name,
|
| 61 |
+
Key = py_dict_to_db_map(key)
|
| 62 |
+
)
|
| 63 |
+
except Exception as e:
|
| 64 |
+
return {"Error":e}
|
| 65 |
+
return res
|
| 66 |
+
|
| 67 |
'''
|
| 68 |
dynamodb structure management
|
| 69 |
'''
|
cloud_storage.py
CHANGED
|
@@ -1,5 +1,6 @@
|
|
| 1 |
import boto3
|
| 2 |
import os
|
|
|
|
| 3 |
|
| 4 |
from utility import aws_access_key_id, aws_secret_access_key
|
| 5 |
|
|
@@ -13,7 +14,7 @@ s3 = boto3.client(
|
|
| 13 |
def upload_file(path, bucket, object_name=None):
|
| 14 |
"""Upload a file to an S3 bucket
|
| 15 |
|
| 16 |
-
:param
|
| 17 |
:param bucket: Bucket to upload to
|
| 18 |
:param object_name: S3 object name.
|
| 19 |
If not specified then file_name is used
|
|
@@ -30,20 +31,41 @@ def upload_file(path, bucket, object_name=None):
|
|
| 30 |
except Exception as e:
|
| 31 |
print(e)
|
| 32 |
return False
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 33 |
return True
|
| 34 |
|
| 35 |
# get a file from s3
|
| 36 |
-
def download_file(
|
| 37 |
"""Download a file from an S3 bucket
|
| 38 |
|
| 39 |
-
:param file_name: File to download
|
| 40 |
:param bucket: Bucket to download from
|
|
|
|
| 41 |
:param object_name: S3 object name.
|
| 42 |
If not specified then file_name is used
|
| 43 |
:return: True if file was downloaded, else False
|
| 44 |
"""
|
| 45 |
-
if
|
| 46 |
-
|
| 47 |
|
| 48 |
try:
|
| 49 |
s3.download_file(bucket, object_name, file_name)
|
|
@@ -52,6 +74,28 @@ def download_file(file_name, bucket, object_name=None):
|
|
| 52 |
return False
|
| 53 |
return True
|
| 54 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 55 |
# delete a file from s3
|
| 56 |
def delete_file(bucket, object_name):
|
| 57 |
"""Delete a file from an S3 bucket
|
|
|
|
| 1 |
import boto3
|
| 2 |
import os
|
| 3 |
+
import tempfile
|
| 4 |
|
| 5 |
from utility import aws_access_key_id, aws_secret_access_key
|
| 6 |
|
|
|
|
| 14 |
def upload_file(path, bucket, object_name=None):
|
| 15 |
"""Upload a file to an S3 bucket
|
| 16 |
|
| 17 |
+
:param path: path to file for upload
|
| 18 |
:param bucket: Bucket to upload to
|
| 19 |
:param object_name: S3 object name.
|
| 20 |
If not specified then file_name is used
|
|
|
|
| 31 |
except Exception as e:
|
| 32 |
print(e)
|
| 33 |
return False
|
| 34 |
+
return
|
| 35 |
+
|
| 36 |
+
def upload_fileobj(file_obj, bucket, object_name=None):
|
| 37 |
+
'''
|
| 38 |
+
Upload a file object to an S3 bucket
|
| 39 |
+
|
| 40 |
+
:param file_obj: File object to upload
|
| 41 |
+
:param bucket: Bucket to upload to
|
| 42 |
+
:param object_name: S3 object name.
|
| 43 |
+
If not specified then file_name is used
|
| 44 |
+
|
| 45 |
+
:return: True if file was uploaded, else False
|
| 46 |
+
'''
|
| 47 |
+
if object_name is None:
|
| 48 |
+
object_name = file_obj.name
|
| 49 |
+
|
| 50 |
+
try:
|
| 51 |
+
s3.upload_fileobj(file_obj, bucket, object_name)
|
| 52 |
+
except Exception as e:
|
| 53 |
+
print(e)
|
| 54 |
+
return False
|
| 55 |
return True
|
| 56 |
|
| 57 |
# get a file from s3
|
| 58 |
+
def download_file(bucket, object_name, file_name=None):
|
| 59 |
"""Download a file from an S3 bucket
|
| 60 |
|
|
|
|
| 61 |
:param bucket: Bucket to download from
|
| 62 |
+
:param file_name: File to download
|
| 63 |
:param object_name: S3 object name.
|
| 64 |
If not specified then file_name is used
|
| 65 |
:return: True if file was downloaded, else False
|
| 66 |
"""
|
| 67 |
+
if file_name is None:
|
| 68 |
+
file_name = object_name
|
| 69 |
|
| 70 |
try:
|
| 71 |
s3.download_file(bucket, object_name, file_name)
|
|
|
|
| 74 |
return False
|
| 75 |
return True
|
| 76 |
|
| 77 |
+
# download a file object from s3
|
| 78 |
+
def download_fileobj(bucket, object_name, temp_obj=None):
|
| 79 |
+
'''
|
| 80 |
+
Download a file object from an S3 bucket
|
| 81 |
+
|
| 82 |
+
:param file_name: File to download
|
| 83 |
+
:param bucket: Bucket to download from
|
| 84 |
+
:param object_name: S3 object name.
|
| 85 |
+
If not specified then file_name is used
|
| 86 |
+
|
| 87 |
+
:return: True if file was downloaded, else False
|
| 88 |
+
'''
|
| 89 |
+
if temp_obj is None:
|
| 90 |
+
temp_obj = tempfile.TemporaryFile()
|
| 91 |
+
|
| 92 |
+
try:
|
| 93 |
+
s3.download_fileobj(bucket, object_name, temp_obj)
|
| 94 |
+
except Exception as e:
|
| 95 |
+
print(e)
|
| 96 |
+
return False
|
| 97 |
+
return temp_obj
|
| 98 |
+
|
| 99 |
# delete a file from s3
|
| 100 |
def delete_file(bucket, object_name):
|
| 101 |
"""Delete a file from an S3 bucket
|
utility.py
CHANGED
|
@@ -1,6 +1,6 @@
|
|
| 1 |
import json
|
| 2 |
-
import os
|
| 3 |
import PyPDF2
|
|
|
|
| 4 |
|
| 5 |
from application import *
|
| 6 |
|
|
@@ -11,10 +11,10 @@ following functions are for file manipulation
|
|
| 11 |
# read pdf file and return text
|
| 12 |
def read_pdf(file_path):
|
| 13 |
# open the pdf file
|
| 14 |
-
|
| 15 |
filename = file_path
|
| 16 |
pdfFileObj = open(file_path, 'rb')
|
| 17 |
-
|
| 18 |
filename = file_path.name
|
| 19 |
pdfFileObj = open(file_path.name, 'rb')
|
| 20 |
|
|
@@ -32,16 +32,15 @@ def read_pdf(file_path):
|
|
| 32 |
page_obj = pdfReader.pages[page_num]
|
| 33 |
text += page_obj.extract_text ()
|
| 34 |
|
|
|
|
|
|
|
|
|
|
| 35 |
# close the pdf file object
|
| 36 |
pdfFileObj.close()
|
| 37 |
|
| 38 |
-
|
| 39 |
-
|
| 40 |
-
with open(f"{filename.split('.')[0]}.txt", "w") as f:
|
| 41 |
-
f.write(text)
|
| 42 |
|
| 43 |
-
|
| 44 |
-
return text, pdfReader.metadata
|
| 45 |
|
| 46 |
'''
|
| 47 |
following functions are for format standard response
|
|
|
|
| 1 |
import json
|
|
|
|
| 2 |
import PyPDF2
|
| 3 |
+
import tempfile
|
| 4 |
|
| 5 |
from application import *
|
| 6 |
|
|
|
|
| 11 |
# read pdf file and return text
|
| 12 |
def read_pdf(file_path):
|
| 13 |
# open the pdf file
|
| 14 |
+
if type(file_path) is str:
|
| 15 |
filename = file_path
|
| 16 |
pdfFileObj = open(file_path, 'rb')
|
| 17 |
+
elif type(file_path) is tempfile._TemporaryFileWrapper:
|
| 18 |
filename = file_path.name
|
| 19 |
pdfFileObj = open(file_path.name, 'rb')
|
| 20 |
|
|
|
|
| 32 |
page_obj = pdfReader.pages[page_num]
|
| 33 |
text += page_obj.extract_text ()
|
| 34 |
|
| 35 |
+
text = remove_symbols(text)
|
| 36 |
+
|
| 37 |
+
meta = pdfReader.metadata
|
| 38 |
# close the pdf file object
|
| 39 |
pdfFileObj.close()
|
| 40 |
|
| 41 |
+
|
|
|
|
|
|
|
|
|
|
| 42 |
|
| 43 |
+
return text, meta
|
|
|
|
| 44 |
|
| 45 |
'''
|
| 46 |
following functions are for format standard response
|