Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -18,6 +18,8 @@ import categories
|
|
| 18 |
from categories import Category
|
| 19 |
from main import process_image, process_pdf
|
| 20 |
|
|
|
|
|
|
|
| 21 |
HF_TOKEN = os.getenv("HF_TOKEN")
|
| 22 |
PDF_IFRAME = """
|
| 23 |
<div style="border-radius: 10px; width: 100%; overflow: hidden;">
|
|
@@ -109,6 +111,52 @@ def extract_text(input_file):
|
|
| 109 |
return text
|
| 110 |
|
| 111 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 112 |
def categorize_text(text):
|
| 113 |
"""Takes the extracted text and updates the category"""
|
| 114 |
category = categories.categorize_text(text)
|
|
@@ -241,6 +289,7 @@ def process_and_output_files(input_files):
|
|
| 241 |
for file in input_files:
|
| 242 |
# Extract and categorize text for each file
|
| 243 |
text = extract_text(file)
|
|
|
|
| 244 |
category = categorize_text(text)
|
| 245 |
|
| 246 |
chatbot_response = query(category, text) # Convert the generator to a list
|
|
@@ -256,6 +305,9 @@ def process_and_output_files(input_files):
|
|
| 256 |
# chats[1]["value"][0][1] ,
|
| 257 |
# )
|
| 258 |
|
|
|
|
|
|
|
|
|
|
| 259 |
response_dict = json.loads(chats[1]["value"][0][1])
|
| 260 |
if category.name == "TRAVEL_CAB" :
|
| 261 |
# Extract the relevant data
|
|
@@ -265,7 +317,7 @@ def process_and_output_files(input_files):
|
|
| 265 |
"Nature of Expenditure": response_dict.get("summary"),
|
| 266 |
"Billing Date": response_dict.get("issue_date"),
|
| 267 |
"Bill/Invoice No.": "NA",
|
| 268 |
-
"Amount(Rs.)": response_dict.get("total"),
|
| 269 |
|
| 270 |
}
|
| 271 |
else:
|
|
@@ -274,12 +326,12 @@ def process_and_output_files(input_files):
|
|
| 274 |
"Nature of Expenditure": response_dict.get("summary"),
|
| 275 |
"Billing Date": response_dict.get("issue_date"),
|
| 276 |
"Bill/Invoice No.": response_dict.get("uids"),
|
| 277 |
-
"Amount(Rs.)": response_dict.get("total")
|
| 278 |
|
| 279 |
}
|
| 280 |
|
| 281 |
|
| 282 |
-
total_amount+=response_dict.get("total")
|
| 283 |
|
| 284 |
# Append the relevant data for this file to the data list
|
| 285 |
data.append(extracted_data)
|
|
|
|
| 18 |
from categories import Category
|
| 19 |
from main import process_image, process_pdf
|
| 20 |
|
| 21 |
+
from forex_python.converter import CurrencyRates
|
| 22 |
+
|
| 23 |
HF_TOKEN = os.getenv("HF_TOKEN")
|
| 24 |
PDF_IFRAME = """
|
| 25 |
<div style="border-radius: 10px; width: 100%; overflow: hidden;">
|
|
|
|
| 111 |
return text
|
| 112 |
|
| 113 |
|
| 114 |
+
def find_currency_symbol(text):
|
| 115 |
+
currency_symbols = {
|
| 116 |
+
'USD': ['$', 'US$', 'US Dollar', 'United States Dollar'],
|
| 117 |
+
'EUR': ['€', 'Euro'],
|
| 118 |
+
'GBP': ['£', 'British Pound', 'Pound Sterling'],
|
| 119 |
+
'JPY': ['¥', 'Japanese Yen'],
|
| 120 |
+
'AUD': ['A$', 'AU$', 'Australian Dollar'],
|
| 121 |
+
'CAD': ['C$', 'CA$', 'Canadian Dollar'],
|
| 122 |
+
'CHF': ['Swiss Franc'],
|
| 123 |
+
'CNY': ['CN¥', 'Chinese Yuan', 'Renminbi'],
|
| 124 |
+
'HKD': ['HK$', 'Hong Kong Dollar'],
|
| 125 |
+
'NZD': ['NZ$', 'New Zealand Dollar'],
|
| 126 |
+
'SEK': ['Swedish Krona'],
|
| 127 |
+
'KRW': ['₩', 'South Korean Won'],
|
| 128 |
+
'SGD': ['S$', 'Singapore Dollar'],
|
| 129 |
+
'NOK': ['Norwegian Krone'],
|
| 130 |
+
'MXN': ['Mexican Peso'],
|
| 131 |
+
'INR': ['₹', 'Indian Rupee'],
|
| 132 |
+
'RUB': ['₽', 'Russian Ruble'],
|
| 133 |
+
'ZAR': ['South African Rand'],
|
| 134 |
+
'BRL': ['R$', 'Brazilian Real'],
|
| 135 |
+
}
|
| 136 |
+
|
| 137 |
+
detected_currency = None
|
| 138 |
+
|
| 139 |
+
for currency, symbols in currency_symbols.items():
|
| 140 |
+
for symbol in symbols:
|
| 141 |
+
if symbol in text:
|
| 142 |
+
detected_currency = currency
|
| 143 |
+
break
|
| 144 |
+
if detected_currency:
|
| 145 |
+
break
|
| 146 |
+
|
| 147 |
+
return detected_currency
|
| 148 |
+
|
| 149 |
+
def get_exchange_rate_to_inr(currency):
|
| 150 |
+
c = CurrencyRates()
|
| 151 |
+
if currency == 'INR' or currency == None:
|
| 152 |
+
return 1
|
| 153 |
+
else:
|
| 154 |
+
try:
|
| 155 |
+
exchange_rate = c.get_rate(currency, 'INR')
|
| 156 |
+
return exchange_rate
|
| 157 |
+
except:
|
| 158 |
+
return None
|
| 159 |
+
|
| 160 |
def categorize_text(text):
|
| 161 |
"""Takes the extracted text and updates the category"""
|
| 162 |
category = categories.categorize_text(text)
|
|
|
|
| 289 |
for file in input_files:
|
| 290 |
# Extract and categorize text for each file
|
| 291 |
text = extract_text(file)
|
| 292 |
+
currency = find_currency_symbol(text)
|
| 293 |
category = categorize_text(text)
|
| 294 |
|
| 295 |
chatbot_response = query(category, text) # Convert the generator to a list
|
|
|
|
| 305 |
# chats[1]["value"][0][1] ,
|
| 306 |
# )
|
| 307 |
|
| 308 |
+
exchange_rate = get_exchange_rate_to_inr(currency)
|
| 309 |
+
exchange_rate = float("{:.2f}".format(exchange_rate))
|
| 310 |
+
|
| 311 |
response_dict = json.loads(chats[1]["value"][0][1])
|
| 312 |
if category.name == "TRAVEL_CAB" :
|
| 313 |
# Extract the relevant data
|
|
|
|
| 317 |
"Nature of Expenditure": response_dict.get("summary"),
|
| 318 |
"Billing Date": response_dict.get("issue_date"),
|
| 319 |
"Bill/Invoice No.": "NA",
|
| 320 |
+
"Amount(Rs.)": response_dict.get("total") * exchange_rate,
|
| 321 |
|
| 322 |
}
|
| 323 |
else:
|
|
|
|
| 326 |
"Nature of Expenditure": response_dict.get("summary"),
|
| 327 |
"Billing Date": response_dict.get("issue_date"),
|
| 328 |
"Bill/Invoice No.": response_dict.get("uids"),
|
| 329 |
+
"Amount(Rs.)": response_dict.get("total") * exchange_rate
|
| 330 |
|
| 331 |
}
|
| 332 |
|
| 333 |
|
| 334 |
+
total_amount+=response_dict.get("total") * exchange_rate
|
| 335 |
|
| 336 |
# Append the relevant data for this file to the data list
|
| 337 |
data.append(extracted_data)
|