Spaces:

amra-ai
/

billing

Runtime error

File size: 4,367 Bytes

9c6b97d

import json
import os
# import PyPDF2
import pandas as pd

from application import *

'''
following functions are for file manipulation
'''

# read pdf file and return text
def read_pdf(file_path):
    # open the pdf file
    try:
        filename = file_path
        pdfFileObj = open(file_path, 'rb')    
    except TypeError:
        filename = file_path.name
        pdfFileObj = open(file_path.name, 'rb')

    # create a pdf reader object
    pdfReader = PyPDF2.PdfReader(pdfFileObj)

    # get the number of pages in the pdf file
    num_pages = len(pdfReader.pages)

    # create an empty string
    text = ''

    # iterate through all the pages
    for page_num in range(num_pages):
        page_obj = pdfReader.pages[page_num]
        text += page_obj.extract_text ()

    # close the pdf file object
    pdfFileObj.close()

    text = remove_symbols(text)

    with open(f"{filename.split('.')[0]}.txt", "w") as f:
        f.write(text)

    # return the string of text
    return text, pdfReader.metadata

'''
following functions are for format standard response
'''

# format standard response for status code and data
def format_response(code,data):
    return {
        "statusCode":code,
        "headers":{
            "Access-Control-Allow-Origin": "*",
            "Content-Type": "application/json"
        },
        "body":json.dumps(data),
        "isBase64Encoded": False
    }

'''
following functions are for string manipulation
'''

# format text output by removing excessive characters
def format_text(text,remove_char_ls = ["\\n--\\n","\\n\\n","\n"]):
    for c in remove_char_ls:
        text = text.replace(c,"")
    
    return text

# function to remove symbols that are not in unicode
def remove_symbols(text):
    # remove symbols that are not in unicode
    text = text.encode("ascii", "ignore").decode()
    # remove the break word new line return
    text = text.replace('-\n', '')
    return text

def str_to_tuple(s):
    return tuple(s.replace("(","").replace(")","").split(","))

'''
following functions are for dynamodb data manipulation
'''
# convert dynamodb map to python dictionary
def db_map_to_py_dict(db_map):
    py_dict = {}
    for k,i in db_map.items():
        for l,v in i.items():
            if l == "M":
                py_dict[k] = db_map_to_py_dict(v)
            elif l == "S": 
                py_dict[k] = v
            elif l == "N":
                py_dict[k] = int(v) if float(v)%1 ==0 else float(v)
            elif l == "L":
                py_dict[k] = db_list_to_py_list(v)
            else:
                py_dict[k] = v
        
    return py_dict

def db_map_to_pd_dataframe(db_map):
    py_dict = db_map_to_py_dict(db_map)
    return pd.DataFrame(py_dict)

# convert python dictionary to dynamodb map
def py_dict_to_db_map(py_dict):
    db_map = {}
    for key,value in py_dict.items():
        key = str(key)
        if type(value) is str:
            db_map[key] = {"S":value}
        elif type(value) is int or type(value) is float:
            db_map[key] = {"N":value}
        elif type(value) is dict:
            db_map[key] = {"M":py_dict_to_db_map(value)}
        elif type(value) is list:
            db_map[key] = {"L":py_list_to_db_list(value)}
            
    return db_map

# convert dynamodb list to python list
def db_list_to_py_list(db_list):
    py_list = []
    for d in db_list:
        for t,v in d.items():
            if t == "M":
                py_list.append(db_map_to_py_dict(v))
            elif t == "L":
                py_list.append(db_list_to_py_list(v))
            else:
                py_list.append(v)
                
    return py_list

# convert python list to dynamodb list
def py_list_to_db_list(py_list):
    db_list = []
    for value in py_list:
        if type(value) is str:
            item = {"S":value}
        elif type(value) is int or float:
            item = {"N":value}
        elif type(value) is dict:
            item = {"M":py_dict_to_db_map(value)}
        elif type(value) is list:
            item = {"L":py_list_to_db_list(value)}
        
        db_list.append(item)
        
    return db_list

'''
following functions are used for business logic. (to be moved to business logic layer)
'''

# function to calculate the estimated cost of the translation
def est_cost(n_tokens,rate):
    return round(rate*n_tokens/1000,4)