Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
|
@@ -1,42 +1,59 @@
|
|
| 1 |
-
from sepa import parser
|
| 2 |
-
import re
|
| 3 |
-
import pandas as pd
|
| 4 |
import gradio as gr
|
|
|
|
| 5 |
import numpy as np
|
|
|
|
|
|
|
| 6 |
|
| 7 |
-
def full_function(xml_file):
|
| 8 |
|
| 9 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 10 |
def strip_namespace(xml):
|
| 11 |
return re.sub(' xmlns="[^"]+"', '', xml, count=1)
|
| 12 |
|
| 13 |
# Read file
|
| 14 |
-
with open(
|
| 15 |
input_data = f.read()
|
| 16 |
|
| 17 |
# Parse the bank statement XML to dictionary
|
|
|
|
| 18 |
camt_dict = parser.parse_string(parser.bank_to_customer_statement, bytes(strip_namespace(input_data), 'utf8'))
|
| 19 |
|
| 20 |
statements = pd.DataFrame.from_dict(camt_dict['statements'])
|
| 21 |
all_entries = []
|
|
|
|
|
|
|
|
|
|
| 22 |
for i,_ in statements.iterrows():
|
| 23 |
if 'entries' in camt_dict['statements'][i]:
|
| 24 |
|
| 25 |
#create empty df
|
| 26 |
df = pd.DataFrame()
|
| 27 |
-
dd = pd.DataFrame.from_records(camt_dict['statements'][i]['entries'])
|
| 28 |
|
| 29 |
df['reference'] = dd['reference']
|
| 30 |
df['credit_debit_indicator'] = dd['credit_debit_indicator']
|
| 31 |
df['status'] = dd['status']
|
| 32 |
df['account_servicer_reference'] = dd['account_servicer_reference']
|
| 33 |
-
|
| 34 |
iban = camt_dict['statements'][i]['account']['id']['iban']
|
| 35 |
name = camt_dict['statements'][i]['account']['name']
|
| 36 |
df['iban'] = iban
|
| 37 |
df['name'] = name
|
| 38 |
df['currency'] = dd['amount'].str['currency']
|
| 39 |
df['amount'] = dd['amount'].str['_value']
|
|
|
|
| 40 |
|
| 41 |
df['value_date'] = dd['value_date'].str['date']
|
| 42 |
df['value_date'] = pd.to_datetime(df['value_date']).dt.strftime('%Y-%m-%d')
|
|
@@ -54,38 +71,68 @@ def full_function(xml_file):
|
|
| 54 |
#transaction details
|
| 55 |
df['debtor_name'] = dd['entry_details'].str[0].str['transaction_details'].str[0].str['related_parties'].str['debtor'].str['name']
|
| 56 |
df['debtor_iban'] = dd['entry_details'].str[0].str['transaction_details'].str[0].str['related_parties'].str['debtor_account'].str['id'].str['iban']
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 57 |
|
| 58 |
df['account_servicer_reference'] = dd['entry_details'].str[0].str['transaction_details'].str[0].str['refs'].str['account_servicer_reference']
|
| 59 |
df['end_to_end_id'] = dd['entry_details'].str[0].str['transaction_details'].str[0].str['refs'].str['end_to_end_id']
|
| 60 |
|
|
|
|
| 61 |
|
| 62 |
-
|
| 63 |
-
|
| 64 |
df_entries = pd.concat(all_entries)
|
| 65 |
-
df_entries.head()
|
| 66 |
|
| 67 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 68 |
|
| 69 |
|
| 70 |
|
|
|
|
|
|
|
|
|
|
| 71 |
|
| 72 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 73 |
desc = "Upload XML file, convert to .csv file, and analyze transactions"
|
| 74 |
|
| 75 |
with gr.Blocks() as demo:
|
| 76 |
|
| 77 |
xml_file = gr.File(label = "Upload XML file here")
|
| 78 |
|
| 79 |
-
#
|
| 80 |
-
|
| 81 |
-
|
| 82 |
-
|
| 83 |
-
|
| 84 |
-
|
| 85 |
-
|
| 86 |
-
submit_btn = gr.Button("Run analysis on XML file")
|
| 87 |
|
| 88 |
|
| 89 |
-
gr.
|
|
|
|
| 90 |
|
| 91 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
import gradio as gr
|
| 2 |
+
import pandas as pd
|
| 3 |
import numpy as np
|
| 4 |
+
from sepa import parser
|
| 5 |
+
import re
|
| 6 |
|
|
|
|
| 7 |
|
| 8 |
+
##################################################################
|
| 9 |
+
##################################################################
|
| 10 |
+
##################################################################
|
| 11 |
+
|
| 12 |
+
def full_function(xml_file):
|
| 13 |
+
|
| 14 |
+
#for gradio: swap with xml_file for local testing
|
| 15 |
+
full_name = xml_file.name
|
| 16 |
+
#full_name = xml_file
|
| 17 |
+
|
| 18 |
+
|
| 19 |
+
print("File name in gradio is ")
|
| 20 |
+
print(full_name)
|
| 21 |
+
|
| 22 |
def strip_namespace(xml):
|
| 23 |
return re.sub(' xmlns="[^"]+"', '', xml, count=1)
|
| 24 |
|
| 25 |
# Read file
|
| 26 |
+
with open(full_name, 'r') as f:
|
| 27 |
input_data = f.read()
|
| 28 |
|
| 29 |
# Parse the bank statement XML to dictionary
|
| 30 |
+
print("Parse full xml string")
|
| 31 |
camt_dict = parser.parse_string(parser.bank_to_customer_statement, bytes(strip_namespace(input_data), 'utf8'))
|
| 32 |
|
| 33 |
statements = pd.DataFrame.from_dict(camt_dict['statements'])
|
| 34 |
all_entries = []
|
| 35 |
+
dd_all = []
|
| 36 |
+
|
| 37 |
+
print("Start loop all the transactions and add to df")
|
| 38 |
for i,_ in statements.iterrows():
|
| 39 |
if 'entries' in camt_dict['statements'][i]:
|
| 40 |
|
| 41 |
#create empty df
|
| 42 |
df = pd.DataFrame()
|
| 43 |
+
dd = pd.DataFrame.from_records(camt_dict['statements'][i]['entries'])
|
| 44 |
|
| 45 |
df['reference'] = dd['reference']
|
| 46 |
df['credit_debit_indicator'] = dd['credit_debit_indicator']
|
| 47 |
df['status'] = dd['status']
|
| 48 |
df['account_servicer_reference'] = dd['account_servicer_reference']
|
| 49 |
+
|
| 50 |
iban = camt_dict['statements'][i]['account']['id']['iban']
|
| 51 |
name = camt_dict['statements'][i]['account']['name']
|
| 52 |
df['iban'] = iban
|
| 53 |
df['name'] = name
|
| 54 |
df['currency'] = dd['amount'].str['currency']
|
| 55 |
df['amount'] = dd['amount'].str['_value']
|
| 56 |
+
df['reference'] = dd['reference']
|
| 57 |
|
| 58 |
df['value_date'] = dd['value_date'].str['date']
|
| 59 |
df['value_date'] = pd.to_datetime(df['value_date']).dt.strftime('%Y-%m-%d')
|
|
|
|
| 71 |
#transaction details
|
| 72 |
df['debtor_name'] = dd['entry_details'].str[0].str['transaction_details'].str[0].str['related_parties'].str['debtor'].str['name']
|
| 73 |
df['debtor_iban'] = dd['entry_details'].str[0].str['transaction_details'].str[0].str['related_parties'].str['debtor_account'].str['id'].str['iban']
|
| 74 |
+
df['creditor_name'] = dd['entry_details'].str[0].str['transaction_details'].str[0].str['related_parties'].str['creditor'].str['name']
|
| 75 |
+
df['creditor_iban'] = dd['entry_details'].str[0].str['transaction_details'].str[0].str['related_parties'].str['creditor_account'].str['id'].str['iban']
|
| 76 |
+
|
| 77 |
+
df['bic'] = dd['entry_details'].str[0].str['transaction_details'].str[0].str['related_agents'].str['debtor_agent'].str['financial_institution'].str['bic']
|
| 78 |
+
df['remittance_information'] = dd['entry_details'].str[0].str['transaction_details'].str[0].str['remittance_information'].str['unstructured'].str[0]
|
| 79 |
|
| 80 |
df['account_servicer_reference'] = dd['entry_details'].str[0].str['transaction_details'].str[0].str['refs'].str['account_servicer_reference']
|
| 81 |
df['end_to_end_id'] = dd['entry_details'].str[0].str['transaction_details'].str[0].str['refs'].str['end_to_end_id']
|
| 82 |
|
| 83 |
+
all_entries.append(df)
|
| 84 |
|
| 85 |
+
print("merge the list into df")
|
|
|
|
| 86 |
df_entries = pd.concat(all_entries)
|
|
|
|
| 87 |
|
| 88 |
+
#drop duplicates
|
| 89 |
+
print("remove duplicate rows")
|
| 90 |
+
df_entries = df_entries.drop_duplicates(subset=['reference'], keep='last')
|
| 91 |
+
|
| 92 |
+
print("all done")
|
| 93 |
+
|
| 94 |
+
df_entries_example = df_entries[['reference', 'credit_debit_indicator', 'iban', 'name', 'currency', 'amount', 'value_date', 'debtor_name', 'debtor_iban', 'creditor_name', 'creditor_iban', 'remittance_information']].head(20)
|
| 95 |
+
#print(df_entries_example)
|
| 96 |
+
|
| 97 |
+
return df_entries, df_entries_example
|
| 98 |
|
| 99 |
|
| 100 |
|
| 101 |
+
##################################################################
|
| 102 |
+
##################################################################
|
| 103 |
+
##################################################################
|
| 104 |
|
| 105 |
|
| 106 |
+
def export_csv(xml_file):
|
| 107 |
+
|
| 108 |
+
df_entries, df_entries_example = full_function(xml_file)
|
| 109 |
+
|
| 110 |
+
df_entries.to_csv("./output.csv")
|
| 111 |
+
|
| 112 |
+
out = gr.File.update(value="output.csv", visible=True)
|
| 113 |
+
|
| 114 |
+
return out, df_entries_example
|
| 115 |
+
|
| 116 |
+
##################################################################
|
| 117 |
+
##################################################################
|
| 118 |
+
##################################################################
|
| 119 |
+
|
| 120 |
desc = "Upload XML file, convert to .csv file, and analyze transactions"
|
| 121 |
|
| 122 |
with gr.Blocks() as demo:
|
| 123 |
|
| 124 |
xml_file = gr.File(label = "Upload XML file here")
|
| 125 |
|
| 126 |
+
#output table.
|
| 127 |
+
df_entries_example = gr.DataFrame(label="Example output table, top 20 rows (not all columns)")
|
| 128 |
+
|
| 129 |
+
with gr.Row():
|
| 130 |
+
#export_button = gr.Button("Export")
|
| 131 |
+
out = gr.File(label = "Output file", interactive=False, visible=False)
|
|
|
|
|
|
|
| 132 |
|
| 133 |
|
| 134 |
+
#submit_btn = gr.Button("Run analysis on XML file")
|
| 135 |
+
#export_button.click(export_csv, df_entries, csv)
|
| 136 |
|
| 137 |
|
| 138 |
+
gr.Interface(fn=export_csv, inputs=xml_file, outputs=[out, df_entries_example], title=desc).launch(share=True, debug =True)
|