Documents-Manager

Sleeping

App Files Files Community

rairo commited on Jan 30, 2025

Commit

35994d7

verified ·

1 Parent(s): da1091c

Update app.py

Browse files

Files changed (1) hide show

app.py +155 -0

app.py CHANGED Viewed

	@@ -0,0 +1,155 @@

+import re
+import pandas as pd
+import streamlit as st
+from datetime import datetime
+import pypdf
+def read_pdf(file_path):
+    text_content = []
+    with open(file_path, 'rb') as file:
+        pdf_reader = pypdf.PdfReader(file)
+        for page_num in range(len(pdf_reader.pages)):
+            page = pdf_reader.pages[page_num]
+            text = page.extract_text()
+            if text:
+                text_content.append(text)
+    return text_content
+def preprocess_text(text_pages):
+    full_text = '\n'.join(text_pages)
+    return full_text
+def parse_amount(amount_str):
+    if not amount_str:
+        return 0.0
+    amount_str = amount_str.replace(' ', '').replace(',', '.')
+    if '-' in amount_str:
+        amount_str = amount_str.replace('-', '')
+        return -float(amount_str)
+    return float(amount_str)
+def extract_transactions(text):
+    lines = text.split('\n')
+    transactions = []
+    current_transaction = None
+    for line in lines:
+        date_match = re.match(r'^(\d{1,2}/\d{2}/\d{4})', line)
+        if date_match:
+            if current_transaction:
+                transactions.append(current_transaction)
+            date_str = date_match.group(1)
+            remaining_line = line[len(date_str):].strip()
+            parts = remaining_line.split()
+            charge_code = None
+            debit = None
+            credit = None
+            balance = None
+            description_parts = []
+            i = 0
+            while i < len(parts):
+                part = parts[i]
+                if part in ('A', 'C', 'M', 'S', 'T', 'V'):
+                    charge_code = part
+                    i += 1
+                    break
+                if re.match(r'^[\d\.,-]+$', part):
+                    break
+                description_parts.append(part)
+                i += 1
+            description = ' '.join(description_parts).strip()
+            amount_parts = parts[i:]
+            if amount_parts:
+                try:
+                    balance = parse_amount(amount_parts[-1])
+                    amount_parts = amount_parts[:-1]
+                except:
+                    balance = None
+                for amt in amount_parts:
+                    if ' ' in amt or ',' in amt or '.' in amt:
+                        if debit is None:
+                            debit = parse_amount(amt)
+                        else:
+                            credit = parse_amount(amt)
+            current_transaction = {
+                'Date': date_str,
+                'Description': description,
+                'Charge Code': charge_code,
+                'Debit': debit if debit != 0 else None,
+                'Credit': credit if credit != 0 else None,
+                'Balance': balance
+            }
+        else:
+            if current_transaction:
+                current_transaction['Description'] += ' ' + line.strip()
+    if current_transaction:
+        transactions.append(current_transaction)
+    data = []
+    for t in transactions:
+        date = datetime.strptime(t['Date'], '%d/%m/%Y').strftime('%d/%m/%Y')
+        desc = t['Description']
+        charge_code = t['Charge Code']
+        debit = t['Debit']
+        credit = t['Credit']
+        if charge_code:
+            if debit is not None:
+                data.append({
+                    'Date': date,
+                    'Description': desc,
+                    'Amount': -abs(debit),
+                    'Type': 'bank charge'
+                })
+        else:
+            if debit is not None and debit < 0:
+                data.append({
+                    'Date': date,
+                    'Description': desc,
+                    'Amount': debit,
+                    'Type': 'debit amount'
+                })
+            elif debit is not None and debit > 0:
+                data.append({
+                    'Date': date,
+                    'Description': desc,
+                    'Amount': -debit,
+                    'Type': 'debit amount'
+                })
+            if credit is not None and credit > 0:
+                data.append({
+                    'Date': date,
+                    'Description': desc,
+                    'Amount': credit,
+                    'Type': 'credit amount'
+                })
+    df = pd.DataFrame(data)
+    return df
+def main():
+    st.title("Bank Statement Parser")
+    uploaded_file = st.file_uploader("Upload a PDF bank statement", type="pdf")
+    if uploaded_file is not None:
+        with open("temp.pdf", "wb") as f:
+            f.write(uploaded_file.getbuffer())
+        text_content = read_pdf("temp.pdf")
+        processed_text = preprocess_text(text_content)
+        transactions_df = extract_transactions(processed_text)
+        if not transactions_df.empty:
+            st.write("### Extracted Transactions")
+            st.dataframe(transactions_df)
+        else:
+            st.write("No transactions found.")
+if __name__ == "__main__":
+    main()