| | import gradio as gr |
| | import pandas as pd |
| | import os |
| | import logging |
| | import shutil |
| | from datetime import datetime |
| | from transformers import pipeline, AutoTokenizer, AutoModelForSequenceClassification |
| | import torch |
| | import tempfile |
| | import io |
| | import pdfplumber |
| | from fuzzywuzzy import process |
| | import numpy as np |
| | from scipy import stats |
| | import plotly.express as px |
| | import openpyxl |
| |
|
| | |
| | from nebius_backend import ( |
| | NebiusFinanceProcessor, |
| | process_transactions_nebius, |
| | generate_financial_report_nebius, |
| | batch_process_transactions_nebius |
| | ) |
| |
|
| | |
| | logging.basicConfig(level=logging.DEBUG) |
| | logger = logging.getLogger(__name__) |
| |
|
| | |
| | IS_HF_SPACE = os.getenv("SPACE_ID") is not None |
| | HF_TOKEN = os.getenv("HF_TOKEN") or os.getenv("HUGGINGFACE_HUB_TOKEN") |
| | NEBIUS_API_KEY = os.getenv("NEBIUS_API_KEY") |
| |
|
| | |
| | finbert_model = None |
| | financial_classifier = None |
| |
|
| | |
| | try: |
| | from pdf_processor import PDFTransactionProcessor |
| | except ImportError: |
| | class PDFTransactionProcessor: |
| | """Fallback PDF processor using pdfplumber to extract tables.""" |
| | def process_pdf(self, pdf_path): |
| | try: |
| | logger.debug(f"Processing PDF with pdfplumber: {pdf_path}") |
| | data = [] |
| | with pdfplumber.open(pdf_path) as pdf: |
| | for page in pdf.pages: |
| | tables = page.extract_tables() |
| | for table in tables: |
| | if table and len(table) > 1: |
| | data.extend(table[1:]) |
| | if not data: |
| | raise ValueError("No tables found in PDF") |
| | |
| | df = pd.DataFrame(data[1:], columns=data[0]) |
| | |
| | df.columns = [str(col).lower().strip() if col else f"col_{i}" for i, col in enumerate(df.columns)] |
| | return df |
| | except Exception as e: |
| | logger.error(f"PDF processing failed: {str(e)}") |
| | raise ValueError(f"Failed to process PDF: {str(e)}") |
| |
|
| | pdf_processor = PDFTransactionProcessor() |
| |
|
| | def initialize_finbert(): |
| | """Initialize FinBERT model for financial sentiment analysis.""" |
| | global finbert_model |
| | if finbert_model is None: |
| | try: |
| | logger.info("Initializing FinBERT model...") |
| | finbert_model = pipeline( |
| | 'sentiment-analysis', |
| | model='ProsusAI/finbert', |
| | use_auth_token=HF_TOKEN, |
| | device=0 if torch.cuda.is_available() else -1 |
| | ) |
| | logger.info("FinBERT model initialized successfully") |
| | except Exception as e: |
| | logger.error(f"Failed to initialize FinBERT: {str(e)}") |
| | try: |
| | finbert_model = pipeline( |
| | 'sentiment-analysis', |
| | model='cardiffnlp/twitter-roberta-base-sentiment-latest', |
| | device=0 if torch.cuda.is_available() else -1 |
| | ) |
| | logger.info("Fallback sentiment model initialized") |
| | except Exception as fallback_error: |
| | logger.error(f"Failed to initialize fallback model: {str(fallback_error)}") |
| | finbert_model = None |
| |
|
| | def initialize_financial_classifier(): |
| | """Initialize a financial classification model.""" |
| | global financial_classifier |
| | if financial_classifier is None: |
| | try: |
| | logger.info("Initializing financial classifier...") |
| | financial_classifier = pipeline( |
| | 'text-classification', |
| | model='nlptown/bert-base-multilingual-uncased-sentiment', |
| | use_auth_token=HF_TOKEN, |
| | device=0 if torch.cuda.is_available() else -1 |
| | ) |
| | logger.info("Financial classifier initialized successfully") |
| | except Exception as e: |
| | logger.error(f"Failed to initialize financial classifier: {str(e)}") |
| | financial_classifier = None |
| |
|
| | |
| | nebius_available = bool(NEBIUS_API_KEY) |
| |
|
| | if nebius_available: |
| | try: |
| | test_processor_nebius = NebiusFinanceProcessor(NEBIUS_API_KEY) |
| | logger.info("Nebius AI Studio API available and initialized") |
| | except Exception as e: |
| | logger.warning(f"Nebius initialization failed: {str(e)}") |
| | nebius_available = False |
| |
|
| | def enhanced_categorize_transaction_local(description, amount): |
| | """Enhanced categorization using multiple Hugging Face models.""" |
| | try: |
| | if finbert_model is None: |
| | initialize_finbert() |
| | if financial_classifier is None: |
| | initialize_financial_classifier() |
| | |
| | category = rule_based_categorization(description, amount) |
| | |
| | if finbert_model is not None: |
| | try: |
| | sentiment = finbert_model(description)[0] |
| | confidence = sentiment['score'] |
| | if confidence > 0.8: |
| | category = refine_category_with_sentiment(category, description, amount, sentiment) |
| | except Exception as e: |
| | logger.debug(f"Sentiment analysis failed: {str(e)}") |
| | |
| | if financial_classifier is not None: |
| | try: |
| | classification = financial_classifier(description)[0] |
| | category = refine_category_with_classification(category, classification) |
| | except Exception as e: |
| | logger.debug(f"Financial classification failed: {str(e)}") |
| | |
| | return category |
| | except Exception as e: |
| | logger.error(f"Error in enhanced categorization: {str(e)}") |
| | return rule_based_categorization(description, amount) |
| |
|
| | def rule_based_categorization(description, amount): |
| | """Enhanced rule-based categorization with fuzzy matching.""" |
| | description = description.lower() |
| | |
| | |
| | categories = { |
| | 'Salary Income': ['salary', 'wage', 'payroll', 'pay', 'income', 'bonus'], |
| | 'Refunds & Returns': ['refund', 'return', 'cashback', 'rebate'], |
| | 'Investment Income': ['interest', 'dividend', 'investment', 'stock', 'bond'], |
| | 'Freelance Income': ['freelance', 'consulting', 'contract', 'side hustle'], |
| | 'Other Income': ['miscellaneous', 'other'], |
| | 'Groceries & Food': ['grocery', 'supermarket', 'food store', 'market', 'walmart', 'target', 'costco'], |
| | 'Transportation - Fuel': ['gas', 'fuel', 'petrol', 'station', 'shell', 'bp', 'exxon'], |
| | 'Transportation - Public/Ride': ['uber', 'lyft', 'taxi', 'bus', 'train', 'metro'], |
| | 'Transportation - Vehicle': ['car payment', 'auto loan', 'vehicle', 'maintenance', 'repair'], |
| | 'Utilities': ['electric', 'water', 'gas bill', 'internet', 'phone', 'cable'], |
| | 'Housing': ['rent', 'mortgage', 'housing', 'apartment', 'condo'], |
| | 'Dining Out': ['restaurant', 'dining', 'cafe', 'fast food', 'coffee', 'bar'], |
| | 'Healthcare': ['medical', 'doctor', 'hospital', 'pharmacy', 'dental', 'health'], |
| | 'Insurance': ['insurance', 'premium', 'coverage'], |
| | 'Shopping - Clothing': ['clothing', 'apparel', 'shoes', 'retail', 'store'], |
| | 'Shopping - Electronics': ['electronics', 'computer', 'phone', 'tech', 'gadget'], |
| | 'Entertainment': ['entertainment', 'movie', 'netflix', 'spotify', 'hulu', 'game'], |
| | 'Travel': ['travel', 'hotel', 'airbnb', 'flight', 'vacation'], |
| | 'Education': ['education', 'school', 'tuition', 'course', 'book'], |
| | 'Fitness': ['gym', 'fitness', 'sports', 'yoga', 'membership'], |
| | 'Charity': ['charity', 'donation', 'nonprofit', 'giving'], |
| | 'Taxes': ['tax', 'irs', 'federal', 'state', 'property tax'], |
| | 'Miscellaneous Expenses': ['miscellaneous', 'other'] |
| | } |
| | |
| | if amount > 0: |
| | for category, keywords in categories.items(): |
| | if category in ['Salary Income', 'Refunds & Returns', 'Investment Income', 'Freelance Income', 'Other Income']: |
| | for keyword in keywords: |
| | if process.extractOne(keyword, [description])[1] > 85: |
| | return category |
| | return 'Other Income' |
| | else: |
| | for category, keywords in categories.items(): |
| | if category not in ['Salary Income', 'Refunds & Returns', 'Investment Income', 'Freelance Income', 'Other Income']: |
| | for keyword in keywords: |
| | if process.extractOne(keyword, [description])[1] > 85: |
| | return category |
| | return 'Miscellaneous Expenses' |
| |
|
| | def refine_category_with_sentiment(current_category, description, amount, sentiment): |
| | """Refine category based on sentiment analysis results.""" |
| | sentiment_label = sentiment['label'].lower() |
| | confidence = sentiment['score'] |
| | |
| | if confidence > 0.8: |
| | if sentiment_label in ['negative', 'bearish']: |
| | if current_category in ['Dining Out', 'Entertainment', 'Shopping']: |
| | return f"Discretionary - {current_category}" |
| | elif current_category == 'Miscellaneous Expenses': |
| | return "Questionable Expense" |
| | elif sentiment_label in ['positive', 'bullish']: |
| | if current_category == 'Miscellaneous Expenses' and amount > 0: |
| | return "Unexpected Income" |
| | elif current_category in ['Groceries & Food', 'Healthcare']: |
| | return f"Essential - {current_category}" |
| | |
| | return current_category |
| |
|
| | def refine_category_with_classification(current_category, classification): |
| | """Refine category based on financial classification results.""" |
| | label = classification['label'].lower() |
| | confidence = classification['score'] |
| | |
| | if confidence > 0.8: |
| | if 'investment' in label: |
| | return 'Investment Related' |
| | elif 'loan' in label or 'credit' in label: |
| | return 'Debt Payment' |
| | elif 'subscription' in label: |
| | return 'Subscriptions' |
| | elif 'business' in label: |
| | return 'Business Expense' if current_category not in ['Salary Income', 'Freelance Income'] else current_category |
| | |
| | return current_category |
| |
|
| | def process_transactions(transactions, backend="local"): |
| | """Process transactions locally by categorizing them.""" |
| | processed = [] |
| | for tx in transactions: |
| | try: |
| | category = enhanced_categorize_transaction_local(tx['description'], tx['amount']) |
| | tx_copy = tx.copy() |
| | tx_copy['category'] = category |
| | processed.append(tx_copy) |
| | except Exception as e: |
| | logger.warning(f"Failed to categorize transaction {tx}: {str(e)}") |
| | tx_copy = tx.copy() |
| | tx_copy['category'] = 'Uncategorized' |
| | processed.append(tx_copy) |
| | return processed |
| |
|
| | def process_file(file_obj, backend="auto", currency="USD"): |
| | """Process an uploaded file and return categorized transactions, financial report, and expense chart.""" |
| | try: |
| | if file_obj is None: |
| | raise ValueError("No file provided for processing") |
| |
|
| | filename = file_obj.name |
| | logger.debug(f"Processing file: {filename} with backend: {backend}, currency: {currency}") |
| |
|
| | |
| | if filename.endswith('.pdf'): |
| | |
| | with tempfile.NamedTemporaryFile(suffix='.pdf', delete=False) as tmpfile: |
| | shutil.copyfileobj(file_obj, tmpfile) |
| | tmp_path = tmpfile.name |
| | try: |
| | logger.debug(f"Reading PDF: {tmp_path}") |
| | df = pdf_processor.process_pdf(tmp_path) |
| | if df.empty: |
| | raise ValueError("No data extracted from PDF") |
| | logger.debug(f"PDF DataFrame columns: {list(df.columns)}") |
| | except Exception as e: |
| | raise ValueError(f"Failed to process PDF file: {str(e)}") |
| | finally: |
| | try: |
| | os.unlink(tmp_path) |
| | logger.debug(f"Deleted temp PDF file: {tmp_path}") |
| | except Exception as e: |
| | logger.warning(f"Failed to delete temp PDF file: {str(e)}") |
| | elif filename.endswith('.csv'): |
| | try: |
| | logger.debug(f"Reading CSV: {filename}") |
| | df = pd.read_csv(file_obj) |
| | if df.empty: |
| | raise ValueError("CSV file is empty") |
| | logger.debug(f"CSV DataFrame columns: {list(df.columns)}") |
| | except Exception as e: |
| | raise ValueError(f"Failed to process CSV file: {str(e)}") |
| | elif filename.endswith(('.xlsx', '.xls')): |
| | try: |
| | logger.debug(f"Reading Excel: {filename}") |
| | |
| | file_content = file_obj.read() |
| | df = pd.read_excel(io.BytesIO(file_content), engine='openpyxl') |
| | if df.empty: |
| | raise ValueError("Excel file is empty") |
| | logger.debug(f"Excel DataFrame columns: {list(df.columns)}") |
| | except Exception as e: |
| | raise ValueError(f"Failed to process Excel file: {str(e)}") |
| | else: |
| | raise ValueError("Unsupported file format. Please upload PDF, CSV, or Excel.") |
| |
|
| | |
| | if df is None or len(df) == 0: |
| | raise ValueError("No data extracted from file") |
| |
|
| | |
| | df.columns = [str(col).lower().strip() for col in df.columns] |
| | column_mappings = { |
| | 'date': ['date', 'transaction_date', 'posted_date', 'dt', 'transaction_dt'], |
| | 'description': ['description', 'desc', 'memo', 'notes', 'transaction_description'], |
| | 'amount': ['amount', 'amt', 'value', 'transaction_amount', 'total'] |
| | } |
| | required_columns = ['date', 'description', 'amount'] |
| | mapped_columns = {} |
| | for req_col in required_columns: |
| | for col in df.columns: |
| | if any(process.extractOne(col, column_mappings[req_col])[1] > 90 for col in [col.lower()]): |
| | mapped_columns[req_col] = col |
| | break |
| | if req_col not in mapped_columns: |
| | raise ValueError(f"Missing required column similar to '{req_col}'. Found columns: {list(df.columns)}") |
| |
|
| | |
| | df = df.rename(columns={v: k for k, v in mapped_columns.items()}) |
| |
|
| | |
| | df['date'] = pd.to_datetime(df['date'], errors='coerce', infer_datetime_format=True) |
| | if df['date'].isna().all(): |
| | raise ValueError("No valid dates found in 'date' column") |
| | if df['date'].isna().any(): |
| | logger.warning(f"Dropping {df['date'].isna().sum()} rows with invalid dates") |
| | df = df.dropna(subset=['date']) |
| |
|
| | |
| | df['amount'] = pd.to_numeric(df['amount'], errors='coerce') |
| | if df['amount'].isna().all(): |
| | raise ValueError("No valid amounts found in 'amount' column") |
| | if df['amount'].isna().any(): |
| | logger.warning(f"Dropping {df['amount'].isna().sum()} rows with invalid amounts") |
| | df = df.dropna(subset=['amount']) |
| |
|
| | |
| | df['description'] = df['description'].astype(str) |
| |
|
| | if df.empty: |
| | raise ValueError("No valid transactions remain after data cleaning") |
| |
|
| | transactions = df.to_dict('records') |
| | logger.debug(f"Extracted {len(transactions)} transactions: {transactions[:2]}") |
| |
|
| | if backend == "auto": |
| | if not nebius_available: |
| | logger.warning("Nebius API key not set; falling back to local processing") |
| | backend = "local" |
| | else: |
| | backend = "nebius" |
| | logger.debug(f"Selected backend: {backend}") |
| |
|
| | if backend == "nebius" and nebius_available: |
| | with tempfile.NamedTemporaryFile(suffix='.csv', delete=False) as tmpfile: |
| | df.to_csv(tmpfile.name, index=False) |
| | logger.debug(f"Created temporary CSV file: {tmpfile.name}") |
| | if not os.path.exists(tmpfile.name): |
| | raise ValueError(f"Temporary CSV file {tmpfile.name} was not created") |
| | if os.path.getsize(tmpfile.name) == 0: |
| | raise ValueError(f"Temporary CSV file {tmpfile.name} is empty") |
| | processed_df = process_transactions_nebius(tmpfile.name) |
| | if processed_df.empty or 'error' in processed_df.columns: |
| | raise ValueError(f"Nebius processing failed: {processed_df.get('error', ['Unknown error'])[0]}") |
| | report = generate_financial_report_nebius(processed_df.to_dict('records')) |
| | logger.debug(f"Nebius report: {report}") |
| | try: |
| | os.unlink(tmpfile.name) |
| | logger.debug(f"Deleted temporary CSV file: {tmpfile.name}") |
| | except Exception as e: |
| | logger.warning(f"Failed to delete temp CSV file: {str(e)}") |
| | else: |
| | processed = process_transactions(transactions, "local") |
| | report = generate_report(processed, "local") |
| | processed_df = pd.DataFrame(processed) |
| | logger.debug(f"Local report: {report}") |
| |
|
| | |
| | currency_rates = { |
| | "USD": 1.0, |
| | "GBP": 0.79, |
| | "JPY": 157.5, |
| | "KES": 129.0 |
| | } |
| | rate = currency_rates.get(currency, 1.0) |
| | processed_df['Amount'] = processed_df['amount'] * rate |
| |
|
| | |
| | expense_chart = None |
| | if not processed_df.empty: |
| | expense_df = processed_df[processed_df['amount'] < 0].groupby('category')['Amount'].sum() * -1 |
| | if not expense_df.empty: |
| | top_expenses = expense_df.sort_values(ascending=False).head(5) |
| | chart_df = pd.DataFrame({ |
| | 'Category': top_expenses.index, |
| | 'Amount': top_expenses.values |
| | }) |
| | expense_chart = px.pie( |
| | chart_df, |
| | values='Amount', |
| | names='Category', |
| | title=f'Top 5 Expense Categories ({currency})', |
| | color_discrete_sequence=['#FF6384', '#36A2EB', '#FFCE56', '#4BC0C0', '#9966FF'] |
| | ) |
| | expense_chart.update_layout( |
| | margin=dict(t=50, b=10, l=10, r=10), |
| | legend=dict(orientation="h", yanchor="top", y=1.1, xanchor="center", x=0.5), |
| | font=dict(color="#333333") |
| | ) |
| |
|
| | |
| | if isinstance(report, dict) and 'error' not in report: |
| | summary = report.get('summary', report) |
| | summary['total_income'] = round(summary.get('total_income', 0) * rate, 2) |
| | summary['total_expenses'] = round(summary.get('total_expenses', 0) * rate, 2) |
| | summary['net_balance'] = round(summary.get('net_balance', 0) * rate, 2) |
| | formatted_summary = format_summary(summary, currency, processed_df) |
| | error_message = "" |
| | else: |
| | summary = {"error": str(report.get('error', 'Unknown error in report generation'))} |
| | formatted_summary = format_summary(summary, currency, pd.DataFrame()) |
| | error_message = summary.get('error', "Unknown error") |
| |
|
| | logger.debug("File processing completed successfully") |
| | |
| | with tempfile.NamedTemporaryFile(suffix='.csv', delete=False) as tmp_csv: |
| | tmp_csv.write(processed_df.to_csv(index=False).encode()) |
| | csv_path = tmp_csv.name |
| | with tempfile.NamedTemporaryFile(suffix='.xlsx', delete=False) as tmp_xlsx: |
| | output = io.BytesIO() |
| | processed_df.to_excel(output, index=False, engine='openpyxl') |
| | tmp_xlsx.write(output.getvalue()) |
| | xlsx_path = tmp_xlsx.name |
| |
|
| | return processed_df, formatted_summary, error_message, csv_path, xlsx_path, expense_chart |
| |
|
| | except Exception as e: |
| | logger.error(f"Error processing file: {str(e)}") |
| | error_message = str(e) |
| | return pd.DataFrame({"error": [str(e)]}), format_summary({"error": str(e)}, currency, pd.DataFrame()), error_message, None, None, None |
| |
|
| | def generate_report(transactions, backend="auto"): |
| | """Generate a financial report from processed transactions.""" |
| | try: |
| | if backend == "auto": |
| | if nebius_available: |
| | backend = "nebius" |
| | else: |
| | backend = "local" |
| | |
| | if backend == "nebius" and nebius_available: |
| | return generate_financial_report_nebius(transactions) |
| | else: |
| | return generate_local_report(transactions) |
| | |
| | except Exception as e: |
| | logger.error(f"Error in generate_report: {str(e)}") |
| | return generate_local_report(transactions) |
| |
|
| | def generate_local_report(transactions): |
| | """Generate a detailed financial report with enhanced AI insights.""" |
| | if not transactions: |
| | return { |
| | 'summary': { |
| | 'total_income': 0, |
| | 'total_expenses': 0, |
| | 'net_balance': 0, |
| | 'num_transactions': 0 |
| | }, |
| | 'trends': [], |
| | 'insights': ["No transactions provided for analysis."] |
| | } |
| | |
| | df = pd.DataFrame(transactions) |
| | |
| | |
| | df['date'] = pd.to_datetime(df['date'], errors='coerce') |
| | df = df.dropna(subset=['date']) |
| | |
| | |
| | income = df[df['amount'] > 0]['amount'].sum() |
| | expenses = df[df['amount'] < 0]['amount'].sum() * -1 |
| | net = income - expenses |
| | |
| | |
| | expense_categories = df[df['amount'] < 0].groupby('category')['amount'].sum().sort_values() |
| | income_categories = df[df['amount'] > 0].groupby('category')['amount'].sum().sort_values(ascending=False) |
| | |
| | |
| | df['month'] = df['date'].dt.to_period('M') |
| | df['year'] = df['date'].dt.to_period('Y') |
| | monthly_expenses = df[df['amount'] < 0].groupby('month')['amount'].sum() * -1 |
| | monthly_income = df[df['amount'] > 0].groupby('month')['amount'].sum() |
| | ytd_expenses = df[df['amount'] < 0].groupby('year')['amount'].sum() * -1 |
| | ytd_income = df[df['amount'] > 0].groupby('year')['amount'].sum() |
| | current_month = pd.to_datetime('2025-06-09').to_period('M') |
| | current_year = pd.to_datetime('2025-06-09').to_period('Y') |
| | mtd_expenses = df[(df['amount'] < 0) & (df['month'] == current_month)]['amount'].sum() * -1 |
| | mtd_income = df[(df['amount'] > 0) & (df['month'] == current_month)]['amount'].sum() |
| | ytd_expenses_total = ytd_expenses.get(current_year, 0) * -1 |
| | ytd_income_total = ytd_income.get(current_year, 0) |
| | |
| | |
| | insights = [] |
| | |
| | |
| | frequent_categories = df[df['amount'] < 0].groupby('category').size().sort_values(ascending=False).head(5) |
| | if not frequent_categories.empty: |
| | top_categories = [f"{cat} ({count} transactions, {df[(df['category'] == cat) & (df['amount'] < 0)]['amount'].sum() * -1:.2f})" |
| | for cat, count in frequent_categories.items()] |
| | insights.append(f"**Top Spending Categories**: {', '.join(top_categories)} account for the majority of your transactions.") |
| | |
| | |
| | amounts = df['amount'].abs() |
| | q1, q3 = amounts.quantile([0.25, 0.75]) |
| | iqr = q3 - q1 |
| | anomaly_threshold = q3 + 1.5 * iqr |
| | anomalies = df[amounts > anomaly_threshold] |
| | if not anomalies.empty: |
| | for _, anomaly in anomalies.iterrows(): |
| | insights.append(f"**Anomaly Detected**: Unusual {('income' if anomaly['amount'] > 0 else 'expense')} of {anomaly['amount']:.2f} {currency_symbols.get(currency, '$')} on {anomaly['date'].date()} for '{anomaly['description']}' (category: {anomaly['category']}).") |
| | |
| | |
| | if len(monthly_income) > 1: |
| | income_mean = monthly_income.mean() |
| | income_std = monthly_income.std() |
| | income_cv = income_std / income_mean if income_mean > 0 else 0 |
| | if income_cv > 0.3: |
| | insights.append(f"**Income Volatility**: Your income varies significantly (coefficient of variation: {income_cv:.2%}), indicating irregular earnings. Consider stabilizing income sources.") |
| | else: |
| | insights.append(f"Income Stability: Your monthly income appears consistent with low variability.") |
| | |
| | |
| | if len(monthly_expenses) > 3: |
| | expense_trend = stats.linregress(range(len(monthly_expenses)), monthly_expenses.values) |
| | if expense_trend.slope > 0: |
| | insights.append(f"**Spending Trend**: Your monthly expenses are increasing by approximately {expense_trend.slope:.2f} {currency_symbols.get(currency, '$')} per month. Review discretionary spending.") |
| | elif expense_trend.slope < 0: |
| | insights.append(f"**Spending Trend**: Your monthly expenses are decreasing by approximately {-expense_trend.slope:.2f} {currency_symbols.get(currency, '$')} per month. Good job controlling costs!") |
| | |
| | |
| | if income > 0: |
| | expense_to_income_ratio = expenses / income |
| | financial_health_score = max(0, min(10, 10 - (expense_to_income_ratio * 10))) |
| | health_assessment = "Healthy" if financial_health_score >= 7 else "Needs Improvement" if financial_health_score >= 4 else "At Risk" |
| | insights.append(f"**Financial Health Score**: {financial_health_score:.1f}/10 ({health_assessment})") |
| | if expense_to_income_ratio > 0.7: |
| | insights.append(f"**Alert**: Spending is {expense_to_income_ratio:.1%} of income. Aim to reduce discretionary expenses.") |
| | |
| | |
| | if mtd_expenses > 0: |
| | avg_daily_mtd = mtd_expenses / df[df['month'] == current_month]['date'].nunique() |
| | days_in_month = 30 |
| | projected_mtd = avg_daily_mtd * days_in_month |
| | insights.append(f"**MTD Spending (as of June 09, 2025)**: {mtd_expenses:.2f} {currency_symbols.get(currency, '$')}. Projected monthly spend: {projected_mtd:.2f} {currency_symbols.get(currency, '$')}.") |
| | if ytd_income_total > 0 and ytd_expenses_total > 0: |
| | ytd_savings_rate = 1 - (ytd_expenses_total / ytd_income_total) |
| | insights.append(f"**YTD Savings Rate**: {ytd_savings_rate:.1%} of income saved in 2025.") |
| | |
| | |
| | if len(monthly_expenses) > 1: |
| | avg_monthly_expense = monthly_expenses.mean() |
| | insights.append(f"**Forecast**: Based on historical data, expect next month's expenses to be around {avg_monthly_expense:.2f} {currency_symbols.get(currency, '$')} ± {monthly_expenses.std():.2f}.") |
| | |
| | |
| | if expenses > income * 0.7: |
| | savings_target = income * 0.2 |
| | insights.append(f"**Recommendation**: Reduce expenses by {savings_target:.2f} {currency_symbols.get(currency, '$')} (20% of income) to improve savings. Focus on discretionary categories like {frequent_categories.index[0] if not frequent_categories.empty else 'miscellaneous'}.") |
| | elif net > 0 and net < income * 0.1: |
| | insights.append("**Recommendation**: Consider investing excess funds in low-risk options to grow your wealth.") |
| | if 'Discretionary -' in df['category'].values: |
| | discretionary_expenses = df[df['category'].str.contains('Discretionary')]['amount'].sum() * -1 |
| | insights.append(f"**Recommendation**: Discretionary spending totals {discretionary_expenses:.2f} {currency_symbols.get(currency, '$')}. Consider cutting back on non-essential purchases.") |
| | |
| | |
| | if not insights: |
| | insights.append("No significant patterns detected. Review your spending for potential optimizations.") |
| |
|
| | report = { |
| | 'summary': { |
| | 'total_income': round(income, 2), |
| | 'total_expenses': round(expenses, 2), |
| | 'net_balance': round(net, 2), |
| | 'num_transactions': len(df) |
| | }, |
| | 'trends': { |
| | 'top_expense_categories': [ |
| | {'category': cat, 'amount': round(abs(amt), 2), 'count': len(df[df['category'] == cat])} |
| | for cat, amt in expense_categories.head(5).items() |
| | ], |
| | 'top_income_categories': [ |
| | {'category': cat, 'amount': round(amt, 2), 'count': len(df[df['category'] == cat])} |
| | for cat, amt in income_categories.head(3).items() |
| | ] |
| | }, |
| | 'insights': insights |
| | } |
| | |
| | return report |
| |
|
| | def format_summary(summary, currency, df): |
| | """Format financial summary as readable text with enhanced structure.""" |
| | currency_symbols = {"USD": "$", "GBP": "£", "JPY": "¥", "KES": "KSh"} |
| | symbol = currency_symbols.get(currency, "KSh") |
| |
|
| | if 'error' in summary: |
| | return f"**Error**: {summary['error']}" |
| | |
| | lines = ["## Financial Summary"] |
| | lines.append(f"**Overview** (in {currency})") |
| | lines.append(f"- **Total Income**: {symbol}{summary.get('total_income', 0):,.2f}") |
| | lines.append(f"- **Total Expenses**: {symbol}{summary.get('total_expenses', 0):,.2f}") |
| | lines.append(f"- **Net Balance**: {symbol}{summary.get('net_balance', 0):,.2f}") |
| | lines.append(f"- **Number of Transactions**: {summary.get('num_transactions', 0)}") |
| | |
| | if 'trends' in summary: |
| | trends = summary['trends'] |
| | if trends.get('top_expense_categories'): |
| | lines.append("\n**Top Expense Categories**") |
| | for item in trends['top_expense_categories']: |
| | lines.append(f" - {item['category']}: {symbol}{item['amount']:.2f} ({item['count']} transactions)") |
| | if trends.get('top_income_categories'): |
| | lines.append("\n**Top Income Categories**") |
| | for item in trends['top_income_categories']: |
| | lines.append(f" - {item['category']}: {symbol}{item['amount']:.2f} ({item['count']} transactions)") |
| | |
| | if 'insights' in summary: |
| | lines.append("\n## AI-Powered Insights") |
| | for insight in summary['insights']: |
| | lines.append(f"- {insight}") |
| | |
| | |
| | if not df.empty: |
| | expense_df = df[df['amount'] < 0].groupby('category')['amount'].sum().sort_values() * -1 |
| | if not expense_df.empty: |
| | lines.append("\n**Expense Distribution (ASCII)**") |
| | max_amount = expense_df.max() |
| | for cat, amt in expense_df.head(5).items(): |
| | bar_length = int((amt / max_amount) * 20) if max_amount > 0 else 0 |
| | lines.append(f"{cat}: {symbol}{amt:.2f} {'█' * bar_length}") |
| |
|
| | return "\n".join(lines) |
| |
|
| | |
| | currency = "KSh" |
| | currency_symbols = {"USD": "$", "GBP": "£", "JPY": "¥", "KES": "KSh"} |
| |
|
| | def create_ui(): |
| | with gr.Blocks(title="Financial Transaction Processor", theme="soft") as demo: |
| | gr.Markdown("# Financial Transaction Processor") |
| | gr.Markdown("Upload your bank transactions (PDF, CSV, or Excel) to get categorized spending and financial insights.") |
| |
|
| | with gr.Row(): |
| | with gr.Column(): |
| | file_input = gr.File(label="Upload Transactions (CSV, PDF, or Excel)") |
| | currency_selector = gr.Dropdown( |
| | choices=["USD", "GBP", "JPY", "KES"], |
| | value="KES", |
| | label="Select Currency" |
| | ) |
| | backend_radio = gr.Radio( |
| | choices=["auto", "nebius", "local"], |
| | value="auto", |
| | label="Processing Backend" |
| | ) |
| | process_btn = gr.Button("Process Transactions", variant="primary") |
| |
|
| | with gr.Column(): |
| | processed_table = gr.Dataframe( |
| | label="Processed Transactions", |
| | headers=["Date", "Description", "Amount", "Category"], |
| | datatype=["str", "str", "number", "str"] |
| | ) |
| | download_csv_btn = gr.DownloadButton( |
| | value=None, |
| | label="Download Table as CSV" |
| | ) |
| | download_xlsx_btn = gr.DownloadButton( |
| | value=None, |
| | label="Download Table as XLSX" |
| | ) |
| |
|
| | with gr.Row(): |
| | with gr.Column(elem_classes=["summary-column"], scale=1): |
| | report_summary = gr.Markdown(label="Financial Summary") |
| | expense_chart = gr.Plot(label="Expense Distribution Chart") |
| |
|
| | with gr.Row(): |
| | error_display = gr.Textbox(label="Error Details", visible=False) |
| |
|
| | |
| | demo.css = """ |
| | /* Container Styles */ |
| | .financial-container { |
| | font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif; |
| | max-width: 1200px; |
| | margin: 0 auto; |
| | padding: 20px; |
| | color: #333; |
| | } |
| | |
| | /* Summary Column Styles */ |
| | .summary-column { |
| | border: 2px solid #4a90e2; |
| | padding: 15px; |
| | margin: 10px; |
| | border-radius: 8px; |
| | background-color: #f8fafc; |
| | box-shadow: 0 2px 5px rgba(0,0,0,0.1); |
| | transition: all 0.3s ease; |
| | } |
| | |
| | .summary-column:hover { |
| | box-shadow: 0 5px 15px rgba(0,0,0,0.1); |
| | transform: translateY(-2px); |
| | } |
| | |
| | /* Header Styles */ |
| | .summary-header { |
| | color: #2c5282; |
| | font-size: 1.2rem; |
| | font-weight: 600; |
| | margin-bottom: 15px; |
| | padding-bottom: 8px; |
| | border-bottom: 1px solid #cbd5e0; |
| | } |
| | |
| | /* Data Display Styles */ |
| | .summary-data { |
| | display: flex; |
| | justify-content: space-between; |
| | margin: 8px 0; |
| | } |
| | |
| | .data-label { |
| | font-weight: 500; |
| | color: #4a5568; |
| | } |
| | |
| | .data-value { |
| | font-weight: 600; |
| | } |
| | |
| | .positive-value { |
| | color: #38a169; |
| | } |
| | |
| | .negative-value { |
| | color: #e53e3e; |
| | } |
| | |
| | /* Chart Container */ |
| | .chart-container { |
| | height: 250px; |
| | margin: 20px 0; |
| | padding: 15px; |
| | background: white; |
| | border-radius: 8px; |
| | box-shadow: 0 2px 10px rgba(0,0,0,0.05); |
| | } |
| | |
| | /* Responsive Grid */ |
| | .summary-grid { |
| | display: grid; |
| | grid-template-columns: repeat(auto-fit, minmax(300px, 1fr)); |
| | gap: 15px; |
| | } |
| | |
| | /* Button Styles */ |
| | .action-button { |
| | background-color: #4299e1; |
| | color: white; |
| | border: none; |
| | padding: 8px 16px; |
| | border-radius: 4px; |
| | cursor: pointer; |
| | font-size: 0.9rem; |
| | margin-top: 10px; |
| | transition: background-color 0.2s; |
| | } |
| | |
| | .action-button:hover { |
| | background-color: #3182ce; |
| | } |
| | |
| | /* Table Styles */ |
| | .transaction-table { |
| | width: 100; |
| | border-collapse: collapse; |
| | margin-top: 20px; |
| | } |
| | |
| | .transaction-table th { |
| | background-color: #4a90e2; |
| | color: white; |
| | padding: 12px; |
| | text-align: left; |
| | } |
| | |
| | .transaction-table td { |
| | padding: 10px; |
| | border-bottom: 1px solid #e2e8f0; |
| | } |
| | |
| | .transaction-table tr:hover { |
| | background-color: #ebf8ff; |
| | } |
| | |
| | /* Responsive Media Queries */ |
| | @media (max-width: 768px) { |
| | .summary-grid { |
| | grid-template-columns: 1fr; |
| | } |
| | |
| | .financial-container { |
| | padding: 10px; |
| | } |
| | } |
| | """ |
| | with gr.Accordion("Example Transaction File Format", open=False): |
| | gr.Markdown(""" |
| | Your file should include at least these columns (case-insensitive): |
| | - **Date**: Transaction date (YYYY-MM-DD format preferred) |
| | - **Description**: Transaction description/text |
| | - **Amount**: Positive for income, negative for expenses (in USD by default) |
| | |
| | Example CSV content: |
| | ``` |
| | Date,Description,Amount |
| | 2025-06-01,Salary Deposit,3000.00 |
| | 2025-06-02,Grocery Store,-125.50 |
| | 2025-06-03,Restaurant,-50.00 |
| | 2025-06-04,Netflix,-15.00 |
| | 2025-06-05,Fuel,-40.00 |
| | ``` |
| | """) |
| |
|
| | process_btn.click( |
| | fn=process_file, |
| | inputs=[file_input, backend_radio, currency_selector], |
| | outputs=[processed_table, report_summary, error_display, download_csv_btn, download_xlsx_btn, expense_chart] |
| | ).then( |
| | fn=lambda error_msg: gr.update(visible=bool(error_msg)), |
| | inputs=[error_display], |
| | outputs=[error_display] |
| | ) |
| |
|
| | currency_selector.change( |
| | fn=lambda x: globals().update(currency=x), |
| | inputs=[currency_selector], |
| | outputs=[] |
| | ) |
| |
|
| | return demo |
| |
|
| | if __name__ == "__main__": |
| | initialize_finbert() |
| | initialize_financial_classifier() |
| | demo = create_ui() |
| | demo.launch(server_name="0.0.0.0", server_port=7860) |