Spaces:

smainye
/

Finapp

Sleeping

App Files Files Community

Finapp / app.py

smainye

Update app.py

f7b11e7 verified 9 months ago

raw

history blame contribute delete

38.8 kB

	import gradio as gr
	import pandas as pd
	import os
	import logging
	import shutil
	from datetime import datetime
	from transformers import pipeline, AutoTokenizer, AutoModelForSequenceClassification
	import torch
	import tempfile
	import io
	import pdfplumber # Added for PDF fallback processing
	from fuzzywuzzy import process # Added for fuzzy matching
	import numpy as np
	from scipy import stats # For statistical analysis
	import plotly.express as px # For Plotly charts
	import openpyxl # Ensure XLSX dependency

	# Import all backends
	from nebius_backend import (
	NebiusFinanceProcessor,
	process_transactions_nebius,
	generate_financial_report_nebius,
	batch_process_transactions_nebius
	)

	# Set up logging
	logging.basicConfig(level=logging.DEBUG)
	logger = logging.getLogger(__name__)

	# Check if running on Hugging Face Spaces
	IS_HF_SPACE = os.getenv("SPACE_ID") is not None
	HF_TOKEN = os.getenv("HF_TOKEN") or os.getenv("HUGGINGFACE_HUB_TOKEN")
	NEBIUS_API_KEY = os.getenv("NEBIUS_API_KEY")

	# Initialize models for local processing
	finbert_model = None
	financial_classifier = None

	# Fallback PDFTransactionProcessor if custom module is unavailable
	try:
	from pdf_processor import PDFTransactionProcessor
	except ImportError:
	class PDFTransactionProcessor:
	"""Fallback PDF processor using pdfplumber to extract tables."""
	def process_pdf(self, pdf_path):
	try:
	logger.debug(f"Processing PDF with pdfplumber: {pdf_path}")
	data = []
	with pdfplumber.open(pdf_path) as pdf:
	for page in pdf.pages:
	tables = page.extract_tables()
	for table in tables:
	if table and len(table) > 1: # Skip empty or header-only tables
	data.extend(table[1:]) # Skip header row
	if not data:
	raise ValueError("No tables found in PDF")
	# Assume first row has headers; adjust if needed
	df = pd.DataFrame(data[1:], columns=data[0])
	# Clean column names
	df.columns = [str(col).lower().strip() if col else f"col_{i}" for i, col in enumerate(df.columns)]
	return df
	except Exception as e:
	logger.error(f"PDF processing failed: {str(e)}")
	raise ValueError(f"Failed to process PDF: {str(e)}")

	pdf_processor = PDFTransactionProcessor()

	def initialize_finbert():
	"""Initialize FinBERT model for financial sentiment analysis."""
	global finbert_model
	if finbert_model is None:
	try:
	logger.info("Initializing FinBERT model...")
	finbert_model = pipeline(
	'sentiment-analysis',
	model='ProsusAI/finbert',
	use_auth_token=HF_TOKEN,
	device=0 if torch.cuda.is_available() else -1
	)
	logger.info("FinBERT model initialized successfully")
	except Exception as e:
	logger.error(f"Failed to initialize FinBERT: {str(e)}")
	try:
	finbert_model = pipeline(
	'sentiment-analysis',
	model='cardiffnlp/twitter-roberta-base-sentiment-latest',
	device=0 if torch.cuda.is_available() else -1
	)
	logger.info("Fallback sentiment model initialized")
	except Exception as fallback_error:
	logger.error(f"Failed to initialize fallback model: {str(fallback_error)}")
	finbert_model = None

	def initialize_financial_classifier():
	"""Initialize a financial classification model."""
	global financial_classifier
	if financial_classifier is None:
	try:
	logger.info("Initializing financial classifier...")
	financial_classifier = pipeline(
	'text-classification',
	model='nlptown/bert-base-multilingual-uncased-sentiment',
	use_auth_token=HF_TOKEN,
	device=0 if torch.cuda.is_available() else -1
	)
	logger.info("Financial classifier initialized successfully")
	except Exception as e:
	logger.error(f"Failed to initialize financial classifier: {str(e)}")
	financial_classifier = None

	# Check API availability
	nebius_available = bool(NEBIUS_API_KEY)

	if nebius_available:
	try:
	test_processor_nebius = NebiusFinanceProcessor(NEBIUS_API_KEY)
	logger.info("Nebius AI Studio API available and initialized")
	except Exception as e:
	logger.warning(f"Nebius initialization failed: {str(e)}")
	nebius_available = False

	def enhanced_categorize_transaction_local(description, amount):
	"""Enhanced categorization using multiple Hugging Face models."""
	try:
	if finbert_model is None:
	initialize_finbert()
	if financial_classifier is None:
	initialize_financial_classifier()

	category = rule_based_categorization(description, amount)

	if finbert_model is not None:
	try:
	sentiment = finbert_model(description)[0]
	confidence = sentiment['score']
	if confidence > 0.8:
	category = refine_category_with_sentiment(category, description, amount, sentiment)
	except Exception as e:
	logger.debug(f"Sentiment analysis failed: {str(e)}")

	if financial_classifier is not None:
	try:
	classification = financial_classifier(description)[0]
	category = refine_category_with_classification(category, classification)
	except Exception as e:
	logger.debug(f"Financial classification failed: {str(e)}")

	return category
	except Exception as e:
	logger.error(f"Error in enhanced categorization: {str(e)}")
	return rule_based_categorization(description, amount)

	def rule_based_categorization(description, amount):
	"""Enhanced rule-based categorization with fuzzy matching."""
	description = description.lower()

	# Fuzzy matching for better category detection
	categories = {
	'Salary Income': ['salary', 'wage', 'payroll', 'pay', 'income', 'bonus'],
	'Refunds & Returns': ['refund', 'return', 'cashback', 'rebate'],
	'Investment Income': ['interest', 'dividend', 'investment', 'stock', 'bond'],
	'Freelance Income': ['freelance', 'consulting', 'contract', 'side hustle'],
	'Other Income': ['miscellaneous', 'other'],
	'Groceries & Food': ['grocery', 'supermarket', 'food store', 'market', 'walmart', 'target', 'costco'],
	'Transportation - Fuel': ['gas', 'fuel', 'petrol', 'station', 'shell', 'bp', 'exxon'],
	'Transportation - Public/Ride': ['uber', 'lyft', 'taxi', 'bus', 'train', 'metro'],
	'Transportation - Vehicle': ['car payment', 'auto loan', 'vehicle', 'maintenance', 'repair'],
	'Utilities': ['electric', 'water', 'gas bill', 'internet', 'phone', 'cable'],
	'Housing': ['rent', 'mortgage', 'housing', 'apartment', 'condo'],
	'Dining Out': ['restaurant', 'dining', 'cafe', 'fast food', 'coffee', 'bar'],
	'Healthcare': ['medical', 'doctor', 'hospital', 'pharmacy', 'dental', 'health'],
	'Insurance': ['insurance', 'premium', 'coverage'],
	'Shopping - Clothing': ['clothing', 'apparel', 'shoes', 'retail', 'store'],
	'Shopping - Electronics': ['electronics', 'computer', 'phone', 'tech', 'gadget'],
	'Entertainment': ['entertainment', 'movie', 'netflix', 'spotify', 'hulu', 'game'],
	'Travel': ['travel', 'hotel', 'airbnb', 'flight', 'vacation'],
	'Education': ['education', 'school', 'tuition', 'course', 'book'],
	'Fitness': ['gym', 'fitness', 'sports', 'yoga', 'membership'],
	'Charity': ['charity', 'donation', 'nonprofit', 'giving'],
	'Taxes': ['tax', 'irs', 'federal', 'state', 'property tax'],
	'Miscellaneous Expenses': ['miscellaneous', 'other']
	}

	if amount > 0:
	for category, keywords in categories.items():
	if category in ['Salary Income', 'Refunds & Returns', 'Investment Income', 'Freelance Income', 'Other Income']:
	for keyword in keywords:
	if process.extractOne(keyword, [description])[1] > 85: # Fuzzy match threshold
	return category
	return 'Other Income'
	else:
	for category, keywords in categories.items():
	if category not in ['Salary Income', 'Refunds & Returns', 'Investment Income', 'Freelance Income', 'Other Income']:
	for keyword in keywords:
	if process.extractOne(keyword, [description])[1] > 85: # Fuzzy match threshold
	return category
	return 'Miscellaneous Expenses'

	def refine_category_with_sentiment(current_category, description, amount, sentiment):
	"""Refine category based on sentiment analysis results."""
	sentiment_label = sentiment['label'].lower()
	confidence = sentiment['score']

	if confidence > 0.8:
	if sentiment_label in ['negative', 'bearish']:
	if current_category in ['Dining Out', 'Entertainment', 'Shopping']:
	return f"Discretionary - {current_category}"
	elif current_category == 'Miscellaneous Expenses':
	return "Questionable Expense"
	elif sentiment_label in ['positive', 'bullish']:
	if current_category == 'Miscellaneous Expenses' and amount > 0:
	return "Unexpected Income"
	elif current_category in ['Groceries & Food', 'Healthcare']:
	return f"Essential - {current_category}"

	return current_category

	def refine_category_with_classification(current_category, classification):
	"""Refine category based on financial classification results."""
	label = classification['label'].lower()
	confidence = classification['score']

	if confidence > 0.8:
	if 'investment' in label:
	return 'Investment Related'
	elif 'loan' in label or 'credit' in label:
	return 'Debt Payment'
	elif 'subscription' in label:
	return 'Subscriptions'
	elif 'business' in label:
	return 'Business Expense' if current_category not in ['Salary Income', 'Freelance Income'] else current_category

	return current_category

	def process_transactions(transactions, backend="local"):
	"""Process transactions locally by categorizing them."""
	processed = []
	for tx in transactions:
	try:
	category = enhanced_categorize_transaction_local(tx['description'], tx['amount'])
	tx_copy = tx.copy()
	tx_copy['category'] = category
	processed.append(tx_copy)
	except Exception as e:
	logger.warning(f"Failed to categorize transaction {tx}: {str(e)}")
	tx_copy = tx.copy()
	tx_copy['category'] = 'Uncategorized'
	processed.append(tx_copy)
	return processed

	def process_file(file_obj, backend="auto", currency="USD"):
	"""Process an uploaded file and return categorized transactions, financial report, and expense chart."""
	try:
	if file_obj is None:
	raise ValueError("No file provided for processing")

	filename = file_obj.name
	logger.debug(f"Processing file: {filename} with backend: {backend}, currency: {currency}")

	# Process based on file type
	if filename.endswith('.pdf'):
	# Use a temporary file with proper cleanup
	with tempfile.NamedTemporaryFile(suffix='.pdf', delete=False) as tmpfile:
	shutil.copyfileobj(file_obj, tmpfile)
	tmp_path = tmpfile.name
	try:
	logger.debug(f"Reading PDF: {tmp_path}")
	df = pdf_processor.process_pdf(tmp_path)
	if df.empty:
	raise ValueError("No data extracted from PDF")
	logger.debug(f"PDF DataFrame columns: {list(df.columns)}")
	except Exception as e:
	raise ValueError(f"Failed to process PDF file: {str(e)}")
	finally:
	try:
	os.unlink(tmp_path)
	logger.debug(f"Deleted temp PDF file: {tmp_path}")
	except Exception as e:
	logger.warning(f"Failed to delete temp PDF file: {str(e)}")
	elif filename.endswith('.csv'):
	try:
	logger.debug(f"Reading CSV: {filename}")
	df = pd.read_csv(file_obj)
	if df.empty:
	raise ValueError("CSV file is empty")
	logger.debug(f"CSV DataFrame columns: {list(df.columns)}")
	except Exception as e:
	raise ValueError(f"Failed to process CSV file: {str(e)}")
	elif filename.endswith(('.xlsx', '.xls')):
	try:
	logger.debug(f"Reading Excel: {filename}")
	# Use openpyxl explicitly and read into memory to handle file-like objects
	file_content = file_obj.read()
	df = pd.read_excel(io.BytesIO(file_content), engine='openpyxl')
	if df.empty:
	raise ValueError("Excel file is empty")
	logger.debug(f"Excel DataFrame columns: {list(df.columns)}")
	except Exception as e:
	raise ValueError(f"Failed to process Excel file: {str(e)}")
	else:
	raise ValueError("Unsupported file format. Please upload PDF, CSV, or Excel.")

	# Validate DataFrame
	if df is None or len(df) == 0:
	raise ValueError("No data extracted from file")

	# Standardize and validate columns with fuzzy matching
	df.columns = [str(col).lower().strip() for col in df.columns]
	column_mappings = {
	'date': ['date', 'transaction_date', 'posted_date', 'dt', 'transaction_dt'],
	'description': ['description', 'desc', 'memo', 'notes', 'transaction_description'],
	'amount': ['amount', 'amt', 'value', 'transaction_amount', 'total']
	}
	required_columns = ['date', 'description', 'amount']
	mapped_columns = {}
	for req_col in required_columns:
	for col in df.columns:
	if any(process.extractOne(col, column_mappings[req_col])[1] > 90 for col in [col.lower()]):
	mapped_columns[req_col] = col
	break
	if req_col not in mapped_columns:
	raise ValueError(f"Missing required column similar to '{req_col}'. Found columns: {list(df.columns)}")

	# Rename columns to standard names
	df = df.rename(columns={v: k for k, v in mapped_columns.items()})

	# Handle date parsing with multiple formats
	df['date'] = pd.to_datetime(df['date'], errors='coerce', infer_datetime_format=True)
	if df['date'].isna().all():
	raise ValueError("No valid dates found in 'date' column")
	if df['date'].isna().any():
	logger.warning(f"Dropping {df['date'].isna().sum()} rows with invalid dates")
	df = df.dropna(subset=['date'])

	# Ensure amount is numeric
	df['amount'] = pd.to_numeric(df['amount'], errors='coerce')
	if df['amount'].isna().all():
	raise ValueError("No valid amounts found in 'amount' column")
	if df['amount'].isna().any():
	logger.warning(f"Dropping {df['amount'].isna().sum()} rows with invalid amounts")
	df = df.dropna(subset=['amount'])

	# Ensure description is string
	df['description'] = df['description'].astype(str)

	if df.empty:
	raise ValueError("No valid transactions remain after data cleaning")

	transactions = df.to_dict('records')
	logger.debug(f"Extracted {len(transactions)} transactions: {transactions[:2]}")

	if backend == "auto":
	if not nebius_available:
	logger.warning("Nebius API key not set; falling back to local processing")
	backend = "local"
	else:
	backend = "nebius"
	logger.debug(f"Selected backend: {backend}")

	if backend == "nebius" and nebius_available:
	with tempfile.NamedTemporaryFile(suffix='.csv', delete=False) as tmpfile:
	df.to_csv(tmpfile.name, index=False)
	logger.debug(f"Created temporary CSV file: {tmpfile.name}")
	if not os.path.exists(tmpfile.name):
	raise ValueError(f"Temporary CSV file {tmpfile.name} was not created")
	if os.path.getsize(tmpfile.name) == 0:
	raise ValueError(f"Temporary CSV file {tmpfile.name} is empty")
	processed_df = process_transactions_nebius(tmpfile.name)
	if processed_df.empty or 'error' in processed_df.columns:
	raise ValueError(f"Nebius processing failed: {processed_df.get('error', ['Unknown error'])[0]}")
	report = generate_financial_report_nebius(processed_df.to_dict('records'))
	logger.debug(f"Nebius report: {report}")
	try:
	os.unlink(tmpfile.name)
	logger.debug(f"Deleted temporary CSV file: {tmpfile.name}")
	except Exception as e:
	logger.warning(f"Failed to delete temp CSV file: {str(e)}")
	else:
	processed = process_transactions(transactions, "local")
	report = generate_report(processed, "local")
	processed_df = pd.DataFrame(processed)
	logger.debug(f"Local report: {report}")

	# Currency conversion rates (approximate as of June 09, 2025)
	currency_rates = {
	"USD": 1.0,
	"GBP": 0.79,
	"JPY": 157.5,
	"KES": 129.0
	}
	rate = currency_rates.get(currency, 1.0)
	processed_df['Amount'] = processed_df['amount'] * rate

	# Generate expense chart data using Plotly
	expense_chart = None
	if not processed_df.empty:
	expense_df = processed_df[processed_df['amount'] < 0].groupby('category')['Amount'].sum() * -1
	if not expense_df.empty:
	top_expenses = expense_df.sort_values(ascending=False).head(5)
	chart_df = pd.DataFrame({
	'Category': top_expenses.index,
	'Amount': top_expenses.values
	})
	expense_chart = px.pie(
	chart_df,
	values='Amount',
	names='Category',
	title=f'Top 5 Expense Categories ({currency})',
	color_discrete_sequence=['#FF6384', '#36A2EB', '#FFCE56', '#4BC0C0', '#9966FF']
	)
	expense_chart.update_layout(
	margin=dict(t=50, b=10, l=10, r=10),
	legend=dict(orientation="h", yanchor="top", y=1.1, xanchor="center", x=0.5),
	font=dict(color="#333333")
	)

	# Format the summary with currency symbol
	if isinstance(report, dict) and 'error' not in report:
	summary = report.get('summary', report)
	summary['total_income'] = round(summary.get('total_income', 0) * rate, 2)
	summary['total_expenses'] = round(summary.get('total_expenses', 0) * rate, 2)
	summary['net_balance'] = round(summary.get('net_balance', 0) * rate, 2)
	formatted_summary = format_summary(summary, currency, processed_df)
	error_message = ""
	else:
	summary = {"error": str(report.get('error', 'Unknown error in report generation'))}
	formatted_summary = format_summary(summary, currency, pd.DataFrame())
	error_message = summary.get('error', "Unknown error")

	logger.debug("File processing completed successfully")
	# Prepare temporary files for CSV and XLSX downloads
	with tempfile.NamedTemporaryFile(suffix='.csv', delete=False) as tmp_csv:
	tmp_csv.write(processed_df.to_csv(index=False).encode())
	csv_path = tmp_csv.name
	with tempfile.NamedTemporaryFile(suffix='.xlsx', delete=False) as tmp_xlsx:
	output = io.BytesIO()
	processed_df.to_excel(output, index=False, engine='openpyxl')
	tmp_xlsx.write(output.getvalue())
	xlsx_path = tmp_xlsx.name

	return processed_df, formatted_summary, error_message, csv_path, xlsx_path, expense_chart

	except Exception as e:
	logger.error(f"Error processing file: {str(e)}")
	error_message = str(e)
	return pd.DataFrame({"error": [str(e)]}), format_summary({"error": str(e)}, currency, pd.DataFrame()), error_message, None, None, None

	def generate_report(transactions, backend="auto"):
	"""Generate a financial report from processed transactions."""
	try:
	if backend == "auto":
	if nebius_available:
	backend = "nebius"
	else:
	backend = "local"

	if backend == "nebius" and nebius_available:
	return generate_financial_report_nebius(transactions)
	else:
	return generate_local_report(transactions)

	except Exception as e:
	logger.error(f"Error in generate_report: {str(e)}")
	return generate_local_report(transactions)

	def generate_local_report(transactions):
	"""Generate a detailed financial report with enhanced AI insights."""
	if not transactions:
	return {
	'summary': {
	'total_income': 0,
	'total_expenses': 0,
	'net_balance': 0,
	'num_transactions': 0
	},
	'trends': [],
	'insights': ["No transactions provided for analysis."]
	}

	df = pd.DataFrame(transactions)

	# Convert date strings to datetime for analysis
	df['date'] = pd.to_datetime(df['date'], errors='coerce')
	df = df.dropna(subset=['date'])

	# Calculate basic metrics
	income = df[df['amount'] > 0]['amount'].sum()
	expenses = df[df['amount'] < 0]['amount'].sum() * -1
	net = income - expenses

	# Group by categories
	expense_categories = df[df['amount'] < 0].groupby('category')['amount'].sum().sort_values()
	income_categories = df[df['amount'] > 0].groupby('category')['amount'].sum().sort_values(ascending=False)

	# Time-based analysis
	df['month'] = df['date'].dt.to_period('M')
	df['year'] = df['date'].dt.to_period('Y')
	monthly_expenses = df[df['amount'] < 0].groupby('month')['amount'].sum() * -1
	monthly_income = df[df['amount'] > 0].groupby('month')['amount'].sum()
	ytd_expenses = df[df['amount'] < 0].groupby('year')['amount'].sum() * -1
	ytd_income = df[df['amount'] > 0].groupby('year')['amount'].sum()
	current_month = pd.to_datetime('2025-06-09').to_period('M')
	current_year = pd.to_datetime('2025-06-09').to_period('Y')
	mtd_expenses = df[(df['amount'] < 0) & (df['month'] == current_month)]['amount'].sum() * -1
	mtd_income = df[(df['amount'] > 0) & (df['month'] == current_month)]['amount'].sum()
	ytd_expenses_total = ytd_expenses.get(current_year, 0) * -1
	ytd_income_total = ytd_income.get(current_year, 0)

	# Advanced AI Insights
	insights = []

	# 1. Spending Behavior Analysis
	frequent_categories = df[df['amount'] < 0].groupby('category').size().sort_values(ascending=False).head(5)
	if not frequent_categories.empty:
	top_categories = [f"{cat} ({count} transactions, {df[(df['category'] == cat) & (df['amount'] < 0)]['amount'].sum() * -1:.2f})"
	for cat, count in frequent_categories.items()]
	insights.append(f"Top Spending Categories: {', '.join(top_categories)} account for the majority of your transactions.")

	# 2. Anomaly Detection (using IQR method)
	amounts = df['amount'].abs()
	q1, q3 = amounts.quantile([0.25, 0.75])
	iqr = q3 - q1
	anomaly_threshold = q3 + 1.5 * iqr
	anomalies = df[amounts > anomaly_threshold]
	if not anomalies.empty:
	for _, anomaly in anomalies.iterrows():
	insights.append(f"Anomaly Detected: Unusual {('income' if anomaly['amount'] > 0 else 'expense')} of {anomaly['amount']:.2f} {currency_symbols.get(currency, '$')} on {anomaly['date'].date()} for '{anomaly['description']}' (category: {anomaly['category']}).")

	# 3. Income Stability and Volatility
	if len(monthly_income) > 1:
	income_mean = monthly_income.mean()
	income_std = monthly_income.std()
	income_cv = income_std / income_mean if income_mean > 0 else 0
	if income_cv > 0.3:
	insights.append(f"Income Volatility: Your income varies significantly (coefficient of variation: {income_cv:.2%}), indicating irregular earnings. Consider stabilizing income sources.")
	else:
	insights.append(f"Income Stability: Your monthly income appears consistent with low variability.")

	# 4. Spending Trends (Seasonality Analysis)
	if len(monthly_expenses) > 3:
	expense_trend = stats.linregress(range(len(monthly_expenses)), monthly_expenses.values)
	if expense_trend.slope > 0:
	insights.append(f"Spending Trend: Your monthly expenses are increasing by approximately {expense_trend.slope:.2f} {currency_symbols.get(currency, '$')} per month. Review discretionary spending.")
	elif expense_trend.slope < 0:
	insights.append(f"Spending Trend: Your monthly expenses are decreasing by approximately {-expense_trend.slope:.2f} {currency_symbols.get(currency, '$')} per month. Good job controlling costs!")

	# 5. Financial Health
	if income > 0:
	expense_to_income_ratio = expenses / income
	financial_health_score = max(0, min(10, 10 - (expense_to_income_ratio * 10)))
	health_assessment = "Healthy" if financial_health_score >= 7 else "Needs Improvement" if financial_health_score >= 4 else "At Risk"
	insights.append(f"Financial Health Score: {financial_health_score:.1f}/10 ({health_assessment})")
	if expense_to_income_ratio > 0.7:
	insights.append(f"Alert: Spending is {expense_to_income_ratio:.1%} of income. Aim to reduce discretionary expenses.")

	# 6. MTD and YTD Insights
	if mtd_expenses > 0:
	avg_daily_mtd = mtd_expenses / df[df['month'] == current_month]['date'].nunique()
	days_in_month = 30
	projected_mtd = avg_daily_mtd * days_in_month
	insights.append(f"MTD Spending (as of June 09, 2025): {mtd_expenses:.2f} {currency_symbols.get(currency, '$')}. Projected monthly spend: {projected_mtd:.2f} {currency_symbols.get(currency, '$')}.")
	if ytd_income_total > 0 and ytd_expenses_total > 0:
	ytd_savings_rate = 1 - (ytd_expenses_total / ytd_income_total)
	insights.append(f"YTD Savings Rate: {ytd_savings_rate:.1%} of income saved in 2025.")

	# 7. Predictive Insights
	if len(monthly_expenses) > 1:
	avg_monthly_expense = monthly_expenses.mean()
	insights.append(f"Forecast: Based on historical data, expect next month's expenses to be around {avg_monthly_expense:.2f} {currency_symbols.get(currency, '$')} ± {monthly_expenses.std():.2f}.")

	# 8. Personalized Recommendations
	if expenses > income * 0.7:
	savings_target = income * 0.2
	insights.append(f"Recommendation: Reduce expenses by {savings_target:.2f} {currency_symbols.get(currency, '$')} (20% of income) to improve savings. Focus on discretionary categories like {frequent_categories.index[0] if not frequent_categories.empty else 'miscellaneous'}.")
	elif net > 0 and net < income * 0.1:
	insights.append("Recommendation: Consider investing excess funds in low-risk options to grow your wealth.")
	if 'Discretionary -' in df['category'].values:
	discretionary_expenses = df[df['category'].str.contains('Discretionary')]['amount'].sum() * -1
	insights.append(f"Recommendation: Discretionary spending totals {discretionary_expenses:.2f} {currency_symbols.get(currency, '$')}. Consider cutting back on non-essential purchases.")

	# Fallback if no insights
	if not insights:
	insights.append("No significant patterns detected. Review your spending for potential optimizations.")

	report = {
	'summary': {
	'total_income': round(income, 2),
	'total_expenses': round(expenses, 2),
	'net_balance': round(net, 2),
	'num_transactions': len(df)
	},
	'trends': {
	'top_expense_categories': [
	{'category': cat, 'amount': round(abs(amt), 2), 'count': len(df[df['category'] == cat])}
	for cat, amt in expense_categories.head(5).items()
	],
	'top_income_categories': [
	{'category': cat, 'amount': round(amt, 2), 'count': len(df[df['category'] == cat])}
	for cat, amt in income_categories.head(3).items()
	]
	},
	'insights': insights
	}

	return report

	def format_summary(summary, currency, df):
	"""Format financial summary as readable text with enhanced structure."""
	currency_symbols = {"USD": "$", "GBP": "£", "JPY": "¥", "KES": "KSh"}
	symbol = currency_symbols.get(currency, "KSh")

	if 'error' in summary:
	return f"Error: {summary['error']}"

	lines = ["## Financial Summary"]
	lines.append(f"Overview (in {currency})")
	lines.append(f"- Total Income: {symbol}{summary.get('total_income', 0):,.2f}")
	lines.append(f"- Total Expenses: {symbol}{summary.get('total_expenses', 0):,.2f}")
	lines.append(f"- Net Balance: {symbol}{summary.get('net_balance', 0):,.2f}")
	lines.append(f"- Number of Transactions: {summary.get('num_transactions', 0)}")

	if 'trends' in summary:
	trends = summary['trends']
	if trends.get('top_expense_categories'):
	lines.append("\nTop Expense Categories")
	for item in trends['top_expense_categories']:
	lines.append(f" - {item['category']}: {symbol}{item['amount']:.2f} ({item['count']} transactions)")
	if trends.get('top_income_categories'):
	lines.append("\nTop Income Categories")
	for item in trends['top_income_categories']:
	lines.append(f" - {item['category']}: {symbol}{item['amount']:.2f} ({item['count']} transactions)")

	if 'insights' in summary:
	lines.append("\n## AI-Powered Insights")
	for insight in summary['insights']:
	lines.append(f"- {insight}")

	# Add simple ASCII chart for expense categories
	if not df.empty:
	expense_df = df[df['amount'] < 0].groupby('category')['amount'].sum().sort_values() * -1
	if not expense_df.empty:
	lines.append("\nExpense Distribution (ASCII)")
	max_amount = expense_df.max()
	for cat, amt in expense_df.head(5).items():
	bar_length = int((amt / max_amount) * 20) if max_amount > 0 else 0
	lines.append(f"{cat}: {symbol}{amt:.2f} {'█' * bar_length}")

	return "\n".join(lines)

	# Global variable to store the last selected currency
	currency = "KSh"
	currency_symbols = {"USD": "$", "GBP": "£", "JPY": "¥", "KES": "KSh"}

	def create_ui():
	with gr.Blocks(title="Financial Transaction Processor", theme="soft") as demo:
	gr.Markdown("# Financial Transaction Processor")
	gr.Markdown("Upload your bank transactions (PDF, CSV, or Excel) to get categorized spending and financial insights.")

	with gr.Row():
	with gr.Column():
	file_input = gr.File(label="Upload Transactions (CSV, PDF, or Excel)")
	currency_selector = gr.Dropdown(
	choices=["USD", "GBP", "JPY", "KES"],
	value="KES",
	label="Select Currency"
	)
	backend_radio = gr.Radio(
	choices=["auto", "nebius", "local"],
	value="auto",
	label="Processing Backend"
	)
	process_btn = gr.Button("Process Transactions", variant="primary")

	with gr.Column():
	processed_table = gr.Dataframe(
	label="Processed Transactions",
	headers=["Date", "Description", "Amount", "Category"],
	datatype=["str", "str", "number", "str"]
	)
	download_csv_btn = gr.DownloadButton(
	value=None,
	label="Download Table as CSV"
	)
	download_xlsx_btn = gr.DownloadButton(
	value=None,
	label="Download Table as XLSX"
	)

	with gr.Row():
	with gr.Column(elem_classes=["summary-column"], scale=1):
	report_summary = gr.Markdown(label="Financial Summary")
	expense_chart = gr.Plot(label="Expense Distribution Chart")

	with gr.Row():
	error_display = gr.Textbox(label="Error Details", visible=False)

	# CSS definition
	demo.css = """
	/* Container Styles */
	.financial-container {
	font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
	max-width: 1200px;
	margin: 0 auto;
	padding: 20px;
	color: #333;
	}

	/* Summary Column Styles */
	.summary-column {
	border: 2px solid #4a90e2;
	padding: 15px;
	margin: 10px;
	border-radius: 8px;
	background-color: #f8fafc;
	box-shadow: 0 2px 5px rgba(0,0,0,0.1);
	transition: all 0.3s ease;
	}

	.summary-column:hover {
	box-shadow: 0 5px 15px rgba(0,0,0,0.1);
	transform: translateY(-2px);
	}

	/* Header Styles */
	.summary-header {
	color: #2c5282;
	font-size: 1.2rem;
	font-weight: 600;
	margin-bottom: 15px;
	padding-bottom: 8px;
	border-bottom: 1px solid #cbd5e0;
	}

	/* Data Display Styles */
	.summary-data {
	display: flex;
	justify-content: space-between;
	margin: 8px 0;
	}

	.data-label {
	font-weight: 500;
	color: #4a5568;
	}

	.data-value {
	font-weight: 600;
	}

	.positive-value {
	color: #38a169;
	}

	.negative-value {
	color: #e53e3e;
	}

	/* Chart Container */
	.chart-container {
	height: 250px;
	margin: 20px 0;
	padding: 15px;
	background: white;
	border-radius: 8px;
	box-shadow: 0 2px 10px rgba(0,0,0,0.05);
	}

	/* Responsive Grid */
	.summary-grid {
	display: grid;
	grid-template-columns: repeat(auto-fit, minmax(300px, 1fr));
	gap: 15px;
	}

	/* Button Styles */
	.action-button {
	background-color: #4299e1;
	color: white;
	border: none;
	padding: 8px 16px;
	border-radius: 4px;
	cursor: pointer;
	font-size: 0.9rem;
	margin-top: 10px;
	transition: background-color 0.2s;
	}

	.action-button:hover {
	background-color: #3182ce;
	}

	/* Table Styles */
	.transaction-table {
	width: 100;
	border-collapse: collapse;
	margin-top: 20px;
	}

	.transaction-table th {
	background-color: #4a90e2;
	color: white;
	padding: 12px;
	text-align: left;
	}

	.transaction-table td {
	padding: 10px;
	border-bottom: 1px solid #e2e8f0;
	}

	.transaction-table tr:hover {
	background-color: #ebf8ff;
	}

	/* Responsive Media Queries */
	@media (max-width: 768px) {
	.summary-grid {
	grid-template-columns: 1fr;
	}

	.financial-container {
	padding: 10px;
	}
	}
	"""
	with gr.Accordion("Example Transaction File Format", open=False):
	gr.Markdown("""
	Your file should include at least these columns (case-insensitive):
	- Date: Transaction date (YYYY-MM-DD format preferred)
	- Description: Transaction description/text
	- Amount: Positive for income, negative for expenses (in USD by default)

	Example CSV content:
	```
	Date,Description,Amount
	2025-06-01,Salary Deposit,3000.00
	2025-06-02,Grocery Store,-125.50
	2025-06-03,Restaurant,-50.00
	2025-06-04,Netflix,-15.00
	2025-06-05,Fuel,-40.00
	```
	""")

	process_btn.click(
	fn=process_file,
	inputs=[file_input, backend_radio, currency_selector],
	outputs=[processed_table, report_summary, error_display, download_csv_btn, download_xlsx_btn, expense_chart]
	).then(
	fn=lambda error_msg: gr.update(visible=bool(error_msg)),
	inputs=[error_display],
	outputs=[error_display]
	)

	currency_selector.change(
	fn=lambda x: globals().update(currency=x),
	inputs=[currency_selector],
	outputs=[]
	)

	return demo

	if __name__ == "__main__":
	initialize_finbert()
	initialize_financial_classifier()
	demo = create_ui()
	demo.launch(server_name="0.0.0.0", server_port=7860)