billing / utility.py
Roland Ding
0.0.1.2 included common app files and utilities.
9c6b97d
import json
import os
# import PyPDF2
import pandas as pd
from application import *
'''
following functions are for file manipulation
'''
# read pdf file and return text
def read_pdf(file_path):
# open the pdf file
try:
filename = file_path
pdfFileObj = open(file_path, 'rb')
except TypeError:
filename = file_path.name
pdfFileObj = open(file_path.name, 'rb')
# create a pdf reader object
pdfReader = PyPDF2.PdfReader(pdfFileObj)
# get the number of pages in the pdf file
num_pages = len(pdfReader.pages)
# create an empty string
text = ''
# iterate through all the pages
for page_num in range(num_pages):
page_obj = pdfReader.pages[page_num]
text += page_obj.extract_text ()
# close the pdf file object
pdfFileObj.close()
text = remove_symbols(text)
with open(f"{filename.split('.')[0]}.txt", "w") as f:
f.write(text)
# return the string of text
return text, pdfReader.metadata
'''
following functions are for format standard response
'''
# format standard response for status code and data
def format_response(code,data):
return {
"statusCode":code,
"headers":{
"Access-Control-Allow-Origin": "*",
"Content-Type": "application/json"
},
"body":json.dumps(data),
"isBase64Encoded": False
}
'''
following functions are for string manipulation
'''
# format text output by removing excessive characters
def format_text(text,remove_char_ls = ["\\n--\\n","\\n\\n","\n"]):
for c in remove_char_ls:
text = text.replace(c,"")
return text
# function to remove symbols that are not in unicode
def remove_symbols(text):
# remove symbols that are not in unicode
text = text.encode("ascii", "ignore").decode()
# remove the break word new line return
text = text.replace('-\n', '')
return text
def str_to_tuple(s):
return tuple(s.replace("(","").replace(")","").split(","))
'''
following functions are for dynamodb data manipulation
'''
# convert dynamodb map to python dictionary
def db_map_to_py_dict(db_map):
py_dict = {}
for k,i in db_map.items():
for l,v in i.items():
if l == "M":
py_dict[k] = db_map_to_py_dict(v)
elif l == "S":
py_dict[k] = v
elif l == "N":
py_dict[k] = int(v) if float(v)%1 ==0 else float(v)
elif l == "L":
py_dict[k] = db_list_to_py_list(v)
else:
py_dict[k] = v
return py_dict
def db_map_to_pd_dataframe(db_map):
py_dict = db_map_to_py_dict(db_map)
return pd.DataFrame(py_dict)
# convert python dictionary to dynamodb map
def py_dict_to_db_map(py_dict):
db_map = {}
for key,value in py_dict.items():
key = str(key)
if type(value) is str:
db_map[key] = {"S":value}
elif type(value) is int or type(value) is float:
db_map[key] = {"N":value}
elif type(value) is dict:
db_map[key] = {"M":py_dict_to_db_map(value)}
elif type(value) is list:
db_map[key] = {"L":py_list_to_db_list(value)}
return db_map
# convert dynamodb list to python list
def db_list_to_py_list(db_list):
py_list = []
for d in db_list:
for t,v in d.items():
if t == "M":
py_list.append(db_map_to_py_dict(v))
elif t == "L":
py_list.append(db_list_to_py_list(v))
else:
py_list.append(v)
return py_list
# convert python list to dynamodb list
def py_list_to_db_list(py_list):
db_list = []
for value in py_list:
if type(value) is str:
item = {"S":value}
elif type(value) is int or float:
item = {"N":value}
elif type(value) is dict:
item = {"M":py_dict_to_db_map(value)}
elif type(value) is list:
item = {"L":py_list_to_db_list(value)}
db_list.append(item)
return db_list
'''
following functions are used for business logic. (to be moved to business logic layer)
'''
# function to calculate the estimated cost of the translation
def est_cost(n_tokens,rate):
return round(rate*n_tokens/1000,4)