Spaces:

amra-ai
/

devices

Sleeping

devices / utility.py

Roland Ding

1.1.1.1 updated ui and corresponding features for the devices arrangement ui.

0d5eb6c over 2 years ago

6.48 kB

	import json

	from application import *

	import tiktoken
	from pdfminer.high_level import extract_text
	from pdfminer.pdfparser import PDFParser
	from pdfminer.pdfdocument import PDFDocument

	token_encoder = tiktoken.get_encoding("cl100k_base")

	def count_tokens(text):
	'''
	this function count the number of tokens in the text

	Parameters
	----------
	text: str
	text to be counted

	Returns
	-------
	n_tokens: int
	number of tokens in the text
	'''

	return len(token_encoder.encode(text))

	'''
	following functions are for file manipulation
	'''

	keyword_search = lambda kw, text: kw.lower() in text.lower()
	list_or = lambda l: sum(l)>0
	list_and = lambda l: sum(l)==len(l)

	# read pdf file and return text
	def read_pdf(file_path):
	'''
	this function read the pdf file and return the text

	Parameters
	----------'''
	# open the pdf file
	if type(file_path) is str:
	filename = file_path
	pdfFileObj = open(file_path, 'rb')
	# elif type(file_path) is tempfile._TemporaryFileWrapper:
	else:
	filename = file_path.name
	pdfFileObj = open(file_path.name, 'rb')

	# # create a pdf reader object
	# pdfReader = PyPDF2.PdfReader(pdfFileObj)

	# # get the number of pages in the pdf file
	# num_pages = len(pdfReader.pages)

	# # create an empty string
	# text = ''

	# # iterate through all the pages
	# for page_num in range(num_pages):
	# page_obj = pdfReader.pages[page_num]
	# text += page_obj.extract_text ()

	text = extract_text(pdfFileObj)
	text = remove_symbols(text)

	# meta = pdfReader.metadata

	parser = PDFParser(pdfFileObj)
	doc = PDFDocument(parser)

	meta = doc.info
	# close the pdf file object
	pdfFileObj.close()

	return text, meta

	'''
	following functions are for format standard response
	'''

	# format standard response for status code and data
	def format_response(code,data):
	return {
	"statusCode":code,
	"headers":{
	"Access-Control-Allow-Origin": "*",
	"Content-Type": "application/json"
	},
	"body":json.dumps(data),
	"isBase64Encoded": False
	}

	'''
	following functions are for string manipulation
	'''

	# format text output by removing excessive characters
	def format_text(text,remove_char_ls = ["\\n--\\n","\\n\\n","\n"]):
	for c in remove_char_ls:
	text = text.replace(c,"")

	return text

	# function to remove symbols that are not in unicode
	def remove_symbols(text):
	# remove symbols that are not in unicode
	text = text.encode("ascii", "ignore").decode()
	# remove the break word new line return
	text = text.replace('-\n', '')
	return text

	def str_to_tuple(s):
	return tuple(s.replace("(","").replace(")","").split(","))

	'''
	following functions are for dynamodb data manipulation
	'''
	# convert dynamodb map to python dictionary
	def db_map_to_py_dict(db_map):
	py_dict = {}
	for k,i in db_map.items():
	for l,v in i.items():
	if l == "M":
	py_dict[k] = db_map_to_py_dict(v)
	elif l == "S":
	py_dict[k] = v
	elif l == "N":
	py_dict[k] = int(v) if float(v)%1 ==0 else float(v)
	elif l == "L":
	py_dict[k] = db_list_to_py_list(v)
	elif l == "BS":
	py_dict[k] = v
	elif l == "BOOL":
	py_dict[k] = v
	elif l =="NULL":
	py_dict[k] = None
	else:
	py_dict[k] = v

	return py_dict

	# convert python dictionary to dynamodb map
	def py_dict_to_db_map(py_dict):
	db_map = {}
	for key,value in py_dict.items():
	key = str(key)
	if type(value) is str:
	db_map[key] = {"S":value}
	elif type(value) is int or type(value) is float:
	db_map[key] = {"N":value}
	elif type(value) is dict:
	db_map[key] = {"M":py_dict_to_db_map(value)}
	elif type(value) is list:
	db_map[key] = {"L":py_list_to_db_list(value)}
	elif type(value) is bytes:
	db_map[key] = {"BS":value}
	elif type(value) is bool:
	db_map[key] = {"BOOL":value}
	elif value is None:
	db_map[key] = {"NULL":True}
	return db_map

	# convert dynamodb list to python list
	def db_list_to_py_list(db_list):
	py_list = []
	for d in db_list:
	for t,v in d.items():
	if t == "M":
	py_list.append(db_map_to_py_dict(v))
	elif t == "L":
	py_list.append(db_list_to_py_list(v))
	elif t =="N" or t =="S" or t =="B" or t =="BOOL" or t =="NULL" or t =="SS" or t =="NS" or t =="BS":
	py_list.append(v)
	else:
	py_list.append(db_map_to_py_dict(v))

	return py_list

	# convert python list to dynamodb list
	def py_list_to_db_list(py_list):
	db_list = []
	for value in py_list:
	if type(value) is str:
	item = {"S":value}
	elif type(value) is int or type(value) is float:
	item = {"N":value}
	elif type(value) is dict:
	item = {"M":py_dict_to_db_map(value)}
	# item = py_dict_to_db_map(value)
	elif type(value) is list:
	item = {"L":py_list_to_db_list(value)}
	elif type(value) is tuple:
	item = {"L":py_list_to_db_list(value)}
	elif type(value) is bytes:
	item = {"BS":value}
	elif type(value) is bool:
	item = {"BOOL":value}
	elif value is None:
	item = {"NULL":True}

	db_list.append(item)

	return db_list

	'''
	following functions are used for business logic. (to be moved to business logic layer)
	'''

	# function to calculate the estimated cost of the translation
	def est_cost(n_tokens,rate:float=0.004):
	return round(rate*n_tokens/1000,4)

	def create_md_tables(devices):
	'''
	create markdown tables for the articles.

	Parameters
	----------
	devices: list
	list of devices

	Returns
	-------
	md_text: str
	'''
	md_text = ""
	md_text += "\| Device name \| Device Type \| Intended Use \| \n\| --- \| --- \| --- \| \n"

	for device in devices:
	md_table = f"\| {device['device_name']} \| {device['device_type']} \| {device['intended_use']} \| \n"
	md_text += md_table

	return md_text