devices / utility.py
Roland Ding
1.1.1.1 updated ui and corresponding features for the devices arrangement ui.
0d5eb6c
import json
from application import *
import tiktoken
from pdfminer.high_level import extract_text
from pdfminer.pdfparser import PDFParser
from pdfminer.pdfdocument import PDFDocument
token_encoder = tiktoken.get_encoding("cl100k_base")
def count_tokens(text):
'''
this function count the number of tokens in the text
Parameters
----------
text: str
text to be counted
Returns
-------
n_tokens: int
number of tokens in the text
'''
return len(token_encoder.encode(text))
'''
following functions are for file manipulation
'''
keyword_search = lambda kw, text: kw.lower() in text.lower()
list_or = lambda l: sum(l)>0
list_and = lambda l: sum(l)==len(l)
# read pdf file and return text
def read_pdf(file_path):
'''
this function read the pdf file and return the text
Parameters
----------'''
# open the pdf file
if type(file_path) is str:
filename = file_path
pdfFileObj = open(file_path, 'rb')
# elif type(file_path) is tempfile._TemporaryFileWrapper:
else:
filename = file_path.name
pdfFileObj = open(file_path.name, 'rb')
# # create a pdf reader object
# pdfReader = PyPDF2.PdfReader(pdfFileObj)
# # get the number of pages in the pdf file
# num_pages = len(pdfReader.pages)
# # create an empty string
# text = ''
# # iterate through all the pages
# for page_num in range(num_pages):
# page_obj = pdfReader.pages[page_num]
# text += page_obj.extract_text ()
text = extract_text(pdfFileObj)
text = remove_symbols(text)
# meta = pdfReader.metadata
parser = PDFParser(pdfFileObj)
doc = PDFDocument(parser)
meta = doc.info
# close the pdf file object
pdfFileObj.close()
return text, meta
'''
following functions are for format standard response
'''
# format standard response for status code and data
def format_response(code,data):
return {
"statusCode":code,
"headers":{
"Access-Control-Allow-Origin": "*",
"Content-Type": "application/json"
},
"body":json.dumps(data),
"isBase64Encoded": False
}
'''
following functions are for string manipulation
'''
# format text output by removing excessive characters
def format_text(text,remove_char_ls = ["\\n--\\n","\\n\\n","\n"]):
for c in remove_char_ls:
text = text.replace(c,"")
return text
# function to remove symbols that are not in unicode
def remove_symbols(text):
# remove symbols that are not in unicode
text = text.encode("ascii", "ignore").decode()
# remove the break word new line return
text = text.replace('-\n', '')
return text
def str_to_tuple(s):
return tuple(s.replace("(","").replace(")","").split(","))
'''
following functions are for dynamodb data manipulation
'''
# convert dynamodb map to python dictionary
def db_map_to_py_dict(db_map):
py_dict = {}
for k,i in db_map.items():
for l,v in i.items():
if l == "M":
py_dict[k] = db_map_to_py_dict(v)
elif l == "S":
py_dict[k] = v
elif l == "N":
py_dict[k] = int(v) if float(v)%1 ==0 else float(v)
elif l == "L":
py_dict[k] = db_list_to_py_list(v)
elif l == "BS":
py_dict[k] = v
elif l == "BOOL":
py_dict[k] = v
elif l =="NULL":
py_dict[k] = None
else:
py_dict[k] = v
return py_dict
# convert python dictionary to dynamodb map
def py_dict_to_db_map(py_dict):
db_map = {}
for key,value in py_dict.items():
key = str(key)
if type(value) is str:
db_map[key] = {"S":value}
elif type(value) is int or type(value) is float:
db_map[key] = {"N":value}
elif type(value) is dict:
db_map[key] = {"M":py_dict_to_db_map(value)}
elif type(value) is list:
db_map[key] = {"L":py_list_to_db_list(value)}
elif type(value) is bytes:
db_map[key] = {"BS":value}
elif type(value) is bool:
db_map[key] = {"BOOL":value}
elif value is None:
db_map[key] = {"NULL":True}
return db_map
# convert dynamodb list to python list
def db_list_to_py_list(db_list):
py_list = []
for d in db_list:
for t,v in d.items():
if t == "M":
py_list.append(db_map_to_py_dict(v))
elif t == "L":
py_list.append(db_list_to_py_list(v))
elif t =="N" or t =="S" or t =="B" or t =="BOOL" or t =="NULL" or t =="SS" or t =="NS" or t =="BS":
py_list.append(v)
else:
py_list.append(db_map_to_py_dict(v))
return py_list
# convert python list to dynamodb list
def py_list_to_db_list(py_list):
db_list = []
for value in py_list:
if type(value) is str:
item = {"S":value}
elif type(value) is int or type(value) is float:
item = {"N":value}
elif type(value) is dict:
item = {"M":py_dict_to_db_map(value)}
# item = py_dict_to_db_map(value)
elif type(value) is list:
item = {"L":py_list_to_db_list(value)}
elif type(value) is tuple:
item = {"L":py_list_to_db_list(value)}
elif type(value) is bytes:
item = {"BS":value}
elif type(value) is bool:
item = {"BOOL":value}
elif value is None:
item = {"NULL":True}
db_list.append(item)
return db_list
'''
following functions are used for business logic. (to be moved to business logic layer)
'''
# function to calculate the estimated cost of the translation
def est_cost(n_tokens,rate:float=0.004):
return round(rate*n_tokens/1000,4)
def create_md_tables(devices):
'''
create markdown tables for the articles.
Parameters
----------
devices: list
list of devices
Returns
-------
md_text: str
'''
md_text = ""
md_text += "| Device name | Device Type | Intended Use | \n| --- | --- | --- | \n"
for device in devices:
md_table = f"| {device['device_name']} | {device['device_type']} | {device['intended_use']} | \n"
md_text += md_table
return md_text