import gradio as gr import pandas as pd import os import logging import shutil from datetime import datetime from transformers import pipeline, AutoTokenizer, AutoModelForSequenceClassification import torch import tempfile import io import pdfplumber # Added for PDF fallback processing from fuzzywuzzy import process # Added for fuzzy matching import numpy as np from scipy import stats # For statistical analysis import plotly.express as px # For Plotly charts import openpyxl # Ensure XLSX dependency # Import all backends from nebius_backend import ( NebiusFinanceProcessor, process_transactions_nebius, generate_financial_report_nebius, batch_process_transactions_nebius ) # Set up logging logging.basicConfig(level=logging.DEBUG) logger = logging.getLogger(__name__) # Check if running on Hugging Face Spaces IS_HF_SPACE = os.getenv("SPACE_ID") is not None HF_TOKEN = os.getenv("HF_TOKEN") or os.getenv("HUGGINGFACE_HUB_TOKEN") NEBIUS_API_KEY = os.getenv("NEBIUS_API_KEY") # Initialize models for local processing finbert_model = None financial_classifier = None # Fallback PDFTransactionProcessor if custom module is unavailable try: from pdf_processor import PDFTransactionProcessor except ImportError: class PDFTransactionProcessor: """Fallback PDF processor using pdfplumber to extract tables.""" def process_pdf(self, pdf_path): try: logger.debug(f"Processing PDF with pdfplumber: {pdf_path}") data = [] with pdfplumber.open(pdf_path) as pdf: for page in pdf.pages: tables = page.extract_tables() for table in tables: if table and len(table) > 1: # Skip empty or header-only tables data.extend(table[1:]) # Skip header row if not data: raise ValueError("No tables found in PDF") # Assume first row has headers; adjust if needed df = pd.DataFrame(data[1:], columns=data[0]) # Clean column names df.columns = [str(col).lower().strip() if col else f"col_{i}" for i, col in enumerate(df.columns)] return df except Exception as e: logger.error(f"PDF processing failed: {str(e)}") raise ValueError(f"Failed to process PDF: {str(e)}") pdf_processor = PDFTransactionProcessor() def initialize_finbert(): """Initialize FinBERT model for financial sentiment analysis.""" global finbert_model if finbert_model is None: try: logger.info("Initializing FinBERT model...") finbert_model = pipeline( 'sentiment-analysis', model='ProsusAI/finbert', use_auth_token=HF_TOKEN, device=0 if torch.cuda.is_available() else -1 ) logger.info("FinBERT model initialized successfully") except Exception as e: logger.error(f"Failed to initialize FinBERT: {str(e)}") try: finbert_model = pipeline( 'sentiment-analysis', model='cardiffnlp/twitter-roberta-base-sentiment-latest', device=0 if torch.cuda.is_available() else -1 ) logger.info("Fallback sentiment model initialized") except Exception as fallback_error: logger.error(f"Failed to initialize fallback model: {str(fallback_error)}") finbert_model = None def initialize_financial_classifier(): """Initialize a financial classification model.""" global financial_classifier if financial_classifier is None: try: logger.info("Initializing financial classifier...") financial_classifier = pipeline( 'text-classification', model='nlptown/bert-base-multilingual-uncased-sentiment', use_auth_token=HF_TOKEN, device=0 if torch.cuda.is_available() else -1 ) logger.info("Financial classifier initialized successfully") except Exception as e: logger.error(f"Failed to initialize financial classifier: {str(e)}") financial_classifier = None # Check API availability nebius_available = bool(NEBIUS_API_KEY) if nebius_available: try: test_processor_nebius = NebiusFinanceProcessor(NEBIUS_API_KEY) logger.info("Nebius AI Studio API available and initialized") except Exception as e: logger.warning(f"Nebius initialization failed: {str(e)}") nebius_available = False def enhanced_categorize_transaction_local(description, amount): """Enhanced categorization using multiple Hugging Face models.""" try: if finbert_model is None: initialize_finbert() if financial_classifier is None: initialize_financial_classifier() category = rule_based_categorization(description, amount) if finbert_model is not None: try: sentiment = finbert_model(description)[0] confidence = sentiment['score'] if confidence > 0.8: category = refine_category_with_sentiment(category, description, amount, sentiment) except Exception as e: logger.debug(f"Sentiment analysis failed: {str(e)}") if financial_classifier is not None: try: classification = financial_classifier(description)[0] category = refine_category_with_classification(category, classification) except Exception as e: logger.debug(f"Financial classification failed: {str(e)}") return category except Exception as e: logger.error(f"Error in enhanced categorization: {str(e)}") return rule_based_categorization(description, amount) def rule_based_categorization(description, amount): """Enhanced rule-based categorization with fuzzy matching.""" description = description.lower() # Fuzzy matching for better category detection categories = { 'Salary Income': ['salary', 'wage', 'payroll', 'pay', 'income', 'bonus'], 'Refunds & Returns': ['refund', 'return', 'cashback', 'rebate'], 'Investment Income': ['interest', 'dividend', 'investment', 'stock', 'bond'], 'Freelance Income': ['freelance', 'consulting', 'contract', 'side hustle'], 'Other Income': ['miscellaneous', 'other'], 'Groceries & Food': ['grocery', 'supermarket', 'food store', 'market', 'walmart', 'target', 'costco'], 'Transportation - Fuel': ['gas', 'fuel', 'petrol', 'station', 'shell', 'bp', 'exxon'], 'Transportation - Public/Ride': ['uber', 'lyft', 'taxi', 'bus', 'train', 'metro'], 'Transportation - Vehicle': ['car payment', 'auto loan', 'vehicle', 'maintenance', 'repair'], 'Utilities': ['electric', 'water', 'gas bill', 'internet', 'phone', 'cable'], 'Housing': ['rent', 'mortgage', 'housing', 'apartment', 'condo'], 'Dining Out': ['restaurant', 'dining', 'cafe', 'fast food', 'coffee', 'bar'], 'Healthcare': ['medical', 'doctor', 'hospital', 'pharmacy', 'dental', 'health'], 'Insurance': ['insurance', 'premium', 'coverage'], 'Shopping - Clothing': ['clothing', 'apparel', 'shoes', 'retail', 'store'], 'Shopping - Electronics': ['electronics', 'computer', 'phone', 'tech', 'gadget'], 'Entertainment': ['entertainment', 'movie', 'netflix', 'spotify', 'hulu', 'game'], 'Travel': ['travel', 'hotel', 'airbnb', 'flight', 'vacation'], 'Education': ['education', 'school', 'tuition', 'course', 'book'], 'Fitness': ['gym', 'fitness', 'sports', 'yoga', 'membership'], 'Charity': ['charity', 'donation', 'nonprofit', 'giving'], 'Taxes': ['tax', 'irs', 'federal', 'state', 'property tax'], 'Miscellaneous Expenses': ['miscellaneous', 'other'] } if amount > 0: for category, keywords in categories.items(): if category in ['Salary Income', 'Refunds & Returns', 'Investment Income', 'Freelance Income', 'Other Income']: for keyword in keywords: if process.extractOne(keyword, [description])[1] > 85: # Fuzzy match threshold return category return 'Other Income' else: for category, keywords in categories.items(): if category not in ['Salary Income', 'Refunds & Returns', 'Investment Income', 'Freelance Income', 'Other Income']: for keyword in keywords: if process.extractOne(keyword, [description])[1] > 85: # Fuzzy match threshold return category return 'Miscellaneous Expenses' def refine_category_with_sentiment(current_category, description, amount, sentiment): """Refine category based on sentiment analysis results.""" sentiment_label = sentiment['label'].lower() confidence = sentiment['score'] if confidence > 0.8: if sentiment_label in ['negative', 'bearish']: if current_category in ['Dining Out', 'Entertainment', 'Shopping']: return f"Discretionary - {current_category}" elif current_category == 'Miscellaneous Expenses': return "Questionable Expense" elif sentiment_label in ['positive', 'bullish']: if current_category == 'Miscellaneous Expenses' and amount > 0: return "Unexpected Income" elif current_category in ['Groceries & Food', 'Healthcare']: return f"Essential - {current_category}" return current_category def refine_category_with_classification(current_category, classification): """Refine category based on financial classification results.""" label = classification['label'].lower() confidence = classification['score'] if confidence > 0.8: if 'investment' in label: return 'Investment Related' elif 'loan' in label or 'credit' in label: return 'Debt Payment' elif 'subscription' in label: return 'Subscriptions' elif 'business' in label: return 'Business Expense' if current_category not in ['Salary Income', 'Freelance Income'] else current_category return current_category def process_transactions(transactions, backend="local"): """Process transactions locally by categorizing them.""" processed = [] for tx in transactions: try: category = enhanced_categorize_transaction_local(tx['description'], tx['amount']) tx_copy = tx.copy() tx_copy['category'] = category processed.append(tx_copy) except Exception as e: logger.warning(f"Failed to categorize transaction {tx}: {str(e)}") tx_copy = tx.copy() tx_copy['category'] = 'Uncategorized' processed.append(tx_copy) return processed def process_file(file_obj, backend="auto", currency="USD"): """Process an uploaded file and return categorized transactions, financial report, and expense chart.""" try: if file_obj is None: raise ValueError("No file provided for processing") filename = file_obj.name logger.debug(f"Processing file: {filename} with backend: {backend}, currency: {currency}") # Process based on file type if filename.endswith('.pdf'): # Use a temporary file with proper cleanup with tempfile.NamedTemporaryFile(suffix='.pdf', delete=False) as tmpfile: shutil.copyfileobj(file_obj, tmpfile) tmp_path = tmpfile.name try: logger.debug(f"Reading PDF: {tmp_path}") df = pdf_processor.process_pdf(tmp_path) if df.empty: raise ValueError("No data extracted from PDF") logger.debug(f"PDF DataFrame columns: {list(df.columns)}") except Exception as e: raise ValueError(f"Failed to process PDF file: {str(e)}") finally: try: os.unlink(tmp_path) logger.debug(f"Deleted temp PDF file: {tmp_path}") except Exception as e: logger.warning(f"Failed to delete temp PDF file: {str(e)}") elif filename.endswith('.csv'): try: logger.debug(f"Reading CSV: {filename}") df = pd.read_csv(file_obj) if df.empty: raise ValueError("CSV file is empty") logger.debug(f"CSV DataFrame columns: {list(df.columns)}") except Exception as e: raise ValueError(f"Failed to process CSV file: {str(e)}") elif filename.endswith(('.xlsx', '.xls')): try: logger.debug(f"Reading Excel: {filename}") # Use openpyxl explicitly and read into memory to handle file-like objects file_content = file_obj.read() df = pd.read_excel(io.BytesIO(file_content), engine='openpyxl') if df.empty: raise ValueError("Excel file is empty") logger.debug(f"Excel DataFrame columns: {list(df.columns)}") except Exception as e: raise ValueError(f"Failed to process Excel file: {str(e)}") else: raise ValueError("Unsupported file format. Please upload PDF, CSV, or Excel.") # Validate DataFrame if df is None or len(df) == 0: raise ValueError("No data extracted from file") # Standardize and validate columns with fuzzy matching df.columns = [str(col).lower().strip() for col in df.columns] column_mappings = { 'date': ['date', 'transaction_date', 'posted_date', 'dt', 'transaction_dt'], 'description': ['description', 'desc', 'memo', 'notes', 'transaction_description'], 'amount': ['amount', 'amt', 'value', 'transaction_amount', 'total'] } required_columns = ['date', 'description', 'amount'] mapped_columns = {} for req_col in required_columns: for col in df.columns: if any(process.extractOne(col, column_mappings[req_col])[1] > 90 for col in [col.lower()]): mapped_columns[req_col] = col break if req_col not in mapped_columns: raise ValueError(f"Missing required column similar to '{req_col}'. Found columns: {list(df.columns)}") # Rename columns to standard names df = df.rename(columns={v: k for k, v in mapped_columns.items()}) # Handle date parsing with multiple formats df['date'] = pd.to_datetime(df['date'], errors='coerce', infer_datetime_format=True) if df['date'].isna().all(): raise ValueError("No valid dates found in 'date' column") if df['date'].isna().any(): logger.warning(f"Dropping {df['date'].isna().sum()} rows with invalid dates") df = df.dropna(subset=['date']) # Ensure amount is numeric df['amount'] = pd.to_numeric(df['amount'], errors='coerce') if df['amount'].isna().all(): raise ValueError("No valid amounts found in 'amount' column") if df['amount'].isna().any(): logger.warning(f"Dropping {df['amount'].isna().sum()} rows with invalid amounts") df = df.dropna(subset=['amount']) # Ensure description is string df['description'] = df['description'].astype(str) if df.empty: raise ValueError("No valid transactions remain after data cleaning") transactions = df.to_dict('records') logger.debug(f"Extracted {len(transactions)} transactions: {transactions[:2]}") if backend == "auto": if not nebius_available: logger.warning("Nebius API key not set; falling back to local processing") backend = "local" else: backend = "nebius" logger.debug(f"Selected backend: {backend}") if backend == "nebius" and nebius_available: with tempfile.NamedTemporaryFile(suffix='.csv', delete=False) as tmpfile: df.to_csv(tmpfile.name, index=False) logger.debug(f"Created temporary CSV file: {tmpfile.name}") if not os.path.exists(tmpfile.name): raise ValueError(f"Temporary CSV file {tmpfile.name} was not created") if os.path.getsize(tmpfile.name) == 0: raise ValueError(f"Temporary CSV file {tmpfile.name} is empty") processed_df = process_transactions_nebius(tmpfile.name) if processed_df.empty or 'error' in processed_df.columns: raise ValueError(f"Nebius processing failed: {processed_df.get('error', ['Unknown error'])[0]}") report = generate_financial_report_nebius(processed_df.to_dict('records')) logger.debug(f"Nebius report: {report}") try: os.unlink(tmpfile.name) logger.debug(f"Deleted temporary CSV file: {tmpfile.name}") except Exception as e: logger.warning(f"Failed to delete temp CSV file: {str(e)}") else: processed = process_transactions(transactions, "local") report = generate_report(processed, "local") processed_df = pd.DataFrame(processed) logger.debug(f"Local report: {report}") # Currency conversion rates (approximate as of June 09, 2025) currency_rates = { "USD": 1.0, "GBP": 0.79, "JPY": 157.5, "KES": 129.0 } rate = currency_rates.get(currency, 1.0) processed_df['Amount'] = processed_df['amount'] * rate # Generate expense chart data using Plotly expense_chart = None if not processed_df.empty: expense_df = processed_df[processed_df['amount'] < 0].groupby('category')['Amount'].sum() * -1 if not expense_df.empty: top_expenses = expense_df.sort_values(ascending=False).head(5) chart_df = pd.DataFrame({ 'Category': top_expenses.index, 'Amount': top_expenses.values }) expense_chart = px.pie( chart_df, values='Amount', names='Category', title=f'Top 5 Expense Categories ({currency})', color_discrete_sequence=['#FF6384', '#36A2EB', '#FFCE56', '#4BC0C0', '#9966FF'] ) expense_chart.update_layout( margin=dict(t=50, b=10, l=10, r=10), legend=dict(orientation="h", yanchor="top", y=1.1, xanchor="center", x=0.5), font=dict(color="#333333") ) # Format the summary with currency symbol if isinstance(report, dict) and 'error' not in report: summary = report.get('summary', report) summary['total_income'] = round(summary.get('total_income', 0) * rate, 2) summary['total_expenses'] = round(summary.get('total_expenses', 0) * rate, 2) summary['net_balance'] = round(summary.get('net_balance', 0) * rate, 2) formatted_summary = format_summary(summary, currency, processed_df) error_message = "" else: summary = {"error": str(report.get('error', 'Unknown error in report generation'))} formatted_summary = format_summary(summary, currency, pd.DataFrame()) error_message = summary.get('error', "Unknown error") logger.debug("File processing completed successfully") # Prepare temporary files for CSV and XLSX downloads with tempfile.NamedTemporaryFile(suffix='.csv', delete=False) as tmp_csv: tmp_csv.write(processed_df.to_csv(index=False).encode()) csv_path = tmp_csv.name with tempfile.NamedTemporaryFile(suffix='.xlsx', delete=False) as tmp_xlsx: output = io.BytesIO() processed_df.to_excel(output, index=False, engine='openpyxl') tmp_xlsx.write(output.getvalue()) xlsx_path = tmp_xlsx.name return processed_df, formatted_summary, error_message, csv_path, xlsx_path, expense_chart except Exception as e: logger.error(f"Error processing file: {str(e)}") error_message = str(e) return pd.DataFrame({"error": [str(e)]}), format_summary({"error": str(e)}, currency, pd.DataFrame()), error_message, None, None, None def generate_report(transactions, backend="auto"): """Generate a financial report from processed transactions.""" try: if backend == "auto": if nebius_available: backend = "nebius" else: backend = "local" if backend == "nebius" and nebius_available: return generate_financial_report_nebius(transactions) else: return generate_local_report(transactions) except Exception as e: logger.error(f"Error in generate_report: {str(e)}") return generate_local_report(transactions) def generate_local_report(transactions): """Generate a detailed financial report with enhanced AI insights.""" if not transactions: return { 'summary': { 'total_income': 0, 'total_expenses': 0, 'net_balance': 0, 'num_transactions': 0 }, 'trends': [], 'insights': ["No transactions provided for analysis."] } df = pd.DataFrame(transactions) # Convert date strings to datetime for analysis df['date'] = pd.to_datetime(df['date'], errors='coerce') df = df.dropna(subset=['date']) # Calculate basic metrics income = df[df['amount'] > 0]['amount'].sum() expenses = df[df['amount'] < 0]['amount'].sum() * -1 net = income - expenses # Group by categories expense_categories = df[df['amount'] < 0].groupby('category')['amount'].sum().sort_values() income_categories = df[df['amount'] > 0].groupby('category')['amount'].sum().sort_values(ascending=False) # Time-based analysis df['month'] = df['date'].dt.to_period('M') df['year'] = df['date'].dt.to_period('Y') monthly_expenses = df[df['amount'] < 0].groupby('month')['amount'].sum() * -1 monthly_income = df[df['amount'] > 0].groupby('month')['amount'].sum() ytd_expenses = df[df['amount'] < 0].groupby('year')['amount'].sum() * -1 ytd_income = df[df['amount'] > 0].groupby('year')['amount'].sum() current_month = pd.to_datetime('2025-06-09').to_period('M') current_year = pd.to_datetime('2025-06-09').to_period('Y') mtd_expenses = df[(df['amount'] < 0) & (df['month'] == current_month)]['amount'].sum() * -1 mtd_income = df[(df['amount'] > 0) & (df['month'] == current_month)]['amount'].sum() ytd_expenses_total = ytd_expenses.get(current_year, 0) * -1 ytd_income_total = ytd_income.get(current_year, 0) # Advanced AI Insights insights = [] # 1. Spending Behavior Analysis frequent_categories = df[df['amount'] < 0].groupby('category').size().sort_values(ascending=False).head(5) if not frequent_categories.empty: top_categories = [f"{cat} ({count} transactions, {df[(df['category'] == cat) & (df['amount'] < 0)]['amount'].sum() * -1:.2f})" for cat, count in frequent_categories.items()] insights.append(f"**Top Spending Categories**: {', '.join(top_categories)} account for the majority of your transactions.") # 2. Anomaly Detection (using IQR method) amounts = df['amount'].abs() q1, q3 = amounts.quantile([0.25, 0.75]) iqr = q3 - q1 anomaly_threshold = q3 + 1.5 * iqr anomalies = df[amounts > anomaly_threshold] if not anomalies.empty: for _, anomaly in anomalies.iterrows(): insights.append(f"**Anomaly Detected**: Unusual {('income' if anomaly['amount'] > 0 else 'expense')} of {anomaly['amount']:.2f} {currency_symbols.get(currency, '$')} on {anomaly['date'].date()} for '{anomaly['description']}' (category: {anomaly['category']}).") # 3. Income Stability and Volatility if len(monthly_income) > 1: income_mean = monthly_income.mean() income_std = monthly_income.std() income_cv = income_std / income_mean if income_mean > 0 else 0 if income_cv > 0.3: insights.append(f"**Income Volatility**: Your income varies significantly (coefficient of variation: {income_cv:.2%}), indicating irregular earnings. Consider stabilizing income sources.") else: insights.append(f"Income Stability: Your monthly income appears consistent with low variability.") # 4. Spending Trends (Seasonality Analysis) if len(monthly_expenses) > 3: expense_trend = stats.linregress(range(len(monthly_expenses)), monthly_expenses.values) if expense_trend.slope > 0: insights.append(f"**Spending Trend**: Your monthly expenses are increasing by approximately {expense_trend.slope:.2f} {currency_symbols.get(currency, '$')} per month. Review discretionary spending.") elif expense_trend.slope < 0: insights.append(f"**Spending Trend**: Your monthly expenses are decreasing by approximately {-expense_trend.slope:.2f} {currency_symbols.get(currency, '$')} per month. Good job controlling costs!") # 5. Financial Health if income > 0: expense_to_income_ratio = expenses / income financial_health_score = max(0, min(10, 10 - (expense_to_income_ratio * 10))) health_assessment = "Healthy" if financial_health_score >= 7 else "Needs Improvement" if financial_health_score >= 4 else "At Risk" insights.append(f"**Financial Health Score**: {financial_health_score:.1f}/10 ({health_assessment})") if expense_to_income_ratio > 0.7: insights.append(f"**Alert**: Spending is {expense_to_income_ratio:.1%} of income. Aim to reduce discretionary expenses.") # 6. MTD and YTD Insights if mtd_expenses > 0: avg_daily_mtd = mtd_expenses / df[df['month'] == current_month]['date'].nunique() days_in_month = 30 projected_mtd = avg_daily_mtd * days_in_month insights.append(f"**MTD Spending (as of June 09, 2025)**: {mtd_expenses:.2f} {currency_symbols.get(currency, '$')}. Projected monthly spend: {projected_mtd:.2f} {currency_symbols.get(currency, '$')}.") if ytd_income_total > 0 and ytd_expenses_total > 0: ytd_savings_rate = 1 - (ytd_expenses_total / ytd_income_total) insights.append(f"**YTD Savings Rate**: {ytd_savings_rate:.1%} of income saved in 2025.") # 7. Predictive Insights if len(monthly_expenses) > 1: avg_monthly_expense = monthly_expenses.mean() insights.append(f"**Forecast**: Based on historical data, expect next month's expenses to be around {avg_monthly_expense:.2f} {currency_symbols.get(currency, '$')} ± {monthly_expenses.std():.2f}.") # 8. Personalized Recommendations if expenses > income * 0.7: savings_target = income * 0.2 insights.append(f"**Recommendation**: Reduce expenses by {savings_target:.2f} {currency_symbols.get(currency, '$')} (20% of income) to improve savings. Focus on discretionary categories like {frequent_categories.index[0] if not frequent_categories.empty else 'miscellaneous'}.") elif net > 0 and net < income * 0.1: insights.append("**Recommendation**: Consider investing excess funds in low-risk options to grow your wealth.") if 'Discretionary -' in df['category'].values: discretionary_expenses = df[df['category'].str.contains('Discretionary')]['amount'].sum() * -1 insights.append(f"**Recommendation**: Discretionary spending totals {discretionary_expenses:.2f} {currency_symbols.get(currency, '$')}. Consider cutting back on non-essential purchases.") # Fallback if no insights if not insights: insights.append("No significant patterns detected. Review your spending for potential optimizations.") report = { 'summary': { 'total_income': round(income, 2), 'total_expenses': round(expenses, 2), 'net_balance': round(net, 2), 'num_transactions': len(df) }, 'trends': { 'top_expense_categories': [ {'category': cat, 'amount': round(abs(amt), 2), 'count': len(df[df['category'] == cat])} for cat, amt in expense_categories.head(5).items() ], 'top_income_categories': [ {'category': cat, 'amount': round(amt, 2), 'count': len(df[df['category'] == cat])} for cat, amt in income_categories.head(3).items() ] }, 'insights': insights } return report def format_summary(summary, currency, df): """Format financial summary as readable text with enhanced structure.""" currency_symbols = {"USD": "$", "GBP": "£", "JPY": "¥", "KES": "KSh"} symbol = currency_symbols.get(currency, "KSh") if 'error' in summary: return f"**Error**: {summary['error']}" lines = ["## Financial Summary"] lines.append(f"**Overview** (in {currency})") lines.append(f"- **Total Income**: {symbol}{summary.get('total_income', 0):,.2f}") lines.append(f"- **Total Expenses**: {symbol}{summary.get('total_expenses', 0):,.2f}") lines.append(f"- **Net Balance**: {symbol}{summary.get('net_balance', 0):,.2f}") lines.append(f"- **Number of Transactions**: {summary.get('num_transactions', 0)}") if 'trends' in summary: trends = summary['trends'] if trends.get('top_expense_categories'): lines.append("\n**Top Expense Categories**") for item in trends['top_expense_categories']: lines.append(f" - {item['category']}: {symbol}{item['amount']:.2f} ({item['count']} transactions)") if trends.get('top_income_categories'): lines.append("\n**Top Income Categories**") for item in trends['top_income_categories']: lines.append(f" - {item['category']}: {symbol}{item['amount']:.2f} ({item['count']} transactions)") if 'insights' in summary: lines.append("\n## AI-Powered Insights") for insight in summary['insights']: lines.append(f"- {insight}") # Add simple ASCII chart for expense categories if not df.empty: expense_df = df[df['amount'] < 0].groupby('category')['amount'].sum().sort_values() * -1 if not expense_df.empty: lines.append("\n**Expense Distribution (ASCII)**") max_amount = expense_df.max() for cat, amt in expense_df.head(5).items(): bar_length = int((amt / max_amount) * 20) if max_amount > 0 else 0 lines.append(f"{cat}: {symbol}{amt:.2f} {'█' * bar_length}") return "\n".join(lines) # Global variable to store the last selected currency currency = "KSh" currency_symbols = {"USD": "$", "GBP": "£", "JPY": "¥", "KES": "KSh"} def create_ui(): with gr.Blocks(title="Financial Transaction Processor", theme="soft") as demo: gr.Markdown("# Financial Transaction Processor") gr.Markdown("Upload your bank transactions (PDF, CSV, or Excel) to get categorized spending and financial insights.") with gr.Row(): with gr.Column(): file_input = gr.File(label="Upload Transactions (CSV, PDF, or Excel)") currency_selector = gr.Dropdown( choices=["USD", "GBP", "JPY", "KES"], value="KES", label="Select Currency" ) backend_radio = gr.Radio( choices=["auto", "nebius", "local"], value="auto", label="Processing Backend" ) process_btn = gr.Button("Process Transactions", variant="primary") with gr.Column(): processed_table = gr.Dataframe( label="Processed Transactions", headers=["Date", "Description", "Amount", "Category"], datatype=["str", "str", "number", "str"] ) download_csv_btn = gr.DownloadButton( value=None, label="Download Table as CSV" ) download_xlsx_btn = gr.DownloadButton( value=None, label="Download Table as XLSX" ) with gr.Row(): with gr.Column(elem_classes=["summary-column"], scale=1): report_summary = gr.Markdown(label="Financial Summary") expense_chart = gr.Plot(label="Expense Distribution Chart") with gr.Row(): error_display = gr.Textbox(label="Error Details", visible=False) # CSS definition demo.css = """ /* Container Styles */ .financial-container { font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif; max-width: 1200px; margin: 0 auto; padding: 20px; color: #333; } /* Summary Column Styles */ .summary-column { border: 2px solid #4a90e2; padding: 15px; margin: 10px; border-radius: 8px; background-color: #f8fafc; box-shadow: 0 2px 5px rgba(0,0,0,0.1); transition: all 0.3s ease; } .summary-column:hover { box-shadow: 0 5px 15px rgba(0,0,0,0.1); transform: translateY(-2px); } /* Header Styles */ .summary-header { color: #2c5282; font-size: 1.2rem; font-weight: 600; margin-bottom: 15px; padding-bottom: 8px; border-bottom: 1px solid #cbd5e0; } /* Data Display Styles */ .summary-data { display: flex; justify-content: space-between; margin: 8px 0; } .data-label { font-weight: 500; color: #4a5568; } .data-value { font-weight: 600; } .positive-value { color: #38a169; } .negative-value { color: #e53e3e; } /* Chart Container */ .chart-container { height: 250px; margin: 20px 0; padding: 15px; background: white; border-radius: 8px; box-shadow: 0 2px 10px rgba(0,0,0,0.05); } /* Responsive Grid */ .summary-grid { display: grid; grid-template-columns: repeat(auto-fit, minmax(300px, 1fr)); gap: 15px; } /* Button Styles */ .action-button { background-color: #4299e1; color: white; border: none; padding: 8px 16px; border-radius: 4px; cursor: pointer; font-size: 0.9rem; margin-top: 10px; transition: background-color 0.2s; } .action-button:hover { background-color: #3182ce; } /* Table Styles */ .transaction-table { width: 100; border-collapse: collapse; margin-top: 20px; } .transaction-table th { background-color: #4a90e2; color: white; padding: 12px; text-align: left; } .transaction-table td { padding: 10px; border-bottom: 1px solid #e2e8f0; } .transaction-table tr:hover { background-color: #ebf8ff; } /* Responsive Media Queries */ @media (max-width: 768px) { .summary-grid { grid-template-columns: 1fr; } .financial-container { padding: 10px; } } """ with gr.Accordion("Example Transaction File Format", open=False): gr.Markdown(""" Your file should include at least these columns (case-insensitive): - **Date**: Transaction date (YYYY-MM-DD format preferred) - **Description**: Transaction description/text - **Amount**: Positive for income, negative for expenses (in USD by default) Example CSV content: ``` Date,Description,Amount 2025-06-01,Salary Deposit,3000.00 2025-06-02,Grocery Store,-125.50 2025-06-03,Restaurant,-50.00 2025-06-04,Netflix,-15.00 2025-06-05,Fuel,-40.00 ``` """) process_btn.click( fn=process_file, inputs=[file_input, backend_radio, currency_selector], outputs=[processed_table, report_summary, error_display, download_csv_btn, download_xlsx_btn, expense_chart] ).then( fn=lambda error_msg: gr.update(visible=bool(error_msg)), inputs=[error_display], outputs=[error_display] ) currency_selector.change( fn=lambda x: globals().update(currency=x), inputs=[currency_selector], outputs=[] ) return demo if __name__ == "__main__": initialize_finbert() initialize_financial_classifier() demo = create_ui() demo.launch(server_name="0.0.0.0", server_port=7860)