Spaces:

walaa2022
/

fin_analysis

Build error

App Files Files Community

fin_analysis / app.py

walaa2022

Update app.py

ea6f9b4 verified about 1 year ago

raw

history blame contribute delete

19.6 kB

	import gradio as gr
	import pandas as pd
	import json
	from transformers import (
	AutoTokenizer,
	AutoModelForCausalLM,
	AutoModelForSequenceClassification
	)
	import torch
	import numpy as np
	import re

	class FinancialDataset:
	def __init__(self, texts, labels, tokenizer, max_length=512):
	self.texts = texts
	self.labels = labels
	self.tokenizer = tokenizer
	self.max_length = max_length

	def __len__(self):
	return len(self.texts)

	def __getitem__(self, idx):
	text = str(self.texts[idx])
	inputs = self.tokenizer(
	text,
	truncation=True,
	padding='max_length',
	max_length=self.max_length,
	return_tensors='pt'
	)
	return {
	'input_ids': inputs['input_ids'].squeeze(),
	'attention_mask': inputs['attention_mask'].squeeze(),
	'labels': torch.tensor(self.labels[idx], dtype=torch.long)
	}

	class FinancialAnalyzer:
	def __init__(self):
	print("Initializing Analyzer...")
	self.last_metrics = {}
	self.initialize_models()
	print("Initialization complete!")

	def initialize_models(self):
	"""Initialize TinyLlama model"""
	try:
	self.llama_tokenizer = AutoTokenizer.from_pretrained("TinyLlama/TinyLlama-1.1B-Chat-v1.0")
	self.llama_model = AutoModelForCausalLM.from_pretrained("TinyLlama/TinyLlama-1.1B-Chat-v1.0")
	self.llama_model.eval()
	print("Models loaded successfully!")
	except Exception as e:
	print(f"Error initializing models: {str(e)}")
	raise

	def clean_number(self, value):
	"""Clean and convert numerical values"""
	try:
	if isinstance(value, str):
	value = value.replace('$', '').replace(',', '').strip()
	if '(' in value and ')' in value:
	value = '-' + value.replace('(', '').replace(')', '')
	return float(value or 0)
	except:
	return 0.0

	def is_valid_markdown(self, file_path):
	"""Check if a file is a valid Markdown file"""
	try:
	with open(file_path, 'r') as f:
	content = f.read()
	return any(line.startswith('#') or '\|' in line for line in content.split('\n'))
	except:
	return False

	def parse_financial_data(self, content):
	"""Parse markdown content into structured data"""
	try:
	data = {}
	current_section = ""
	current_table = []
	headers = None

	for line in content.split('\n'):
	if line.startswith('#'):
	if current_table and headers:
	data[current_section] = self.process_table(headers, current_table)
	current_section = line.strip('# ')
	current_table = []
	headers = None
	elif '\|' in line:
	if '-\|-' not in line:
	row = [cell.strip() for cell in line.split('\|')[1:-1]]
	if not headers:
	headers = row
	else:
	current_table.append(row)

	if current_table and headers:
	data[current_section] = self.process_table(headers, current_table)

	return data
	except Exception as e:
	print(f"Error parsing financial data: {str(e)}")
	return {}

	def process_table(self, headers, rows):
	"""Process table data into structured format"""
	try:
	processed_data = {}
	for row in rows:
	if len(row) == len(headers):
	item_name = row[0].strip('*').strip()
	processed_data[item_name] = {}
	for i, value in enumerate(row[1:], 1):
	processed_data[item_name][headers[i]] = self.clean_number(value)
	return processed_data
	except Exception as e:
	print(f"Error processing table: {str(e)}")
	return {}

	def get_nested_value(self, data, section, key, year):
	"""Safely get nested dictionary value"""
	try:
	return data.get(section, {}).get(key, {}).get(str(year), 0)
	except:
	return 0

	def calculate_metrics(self, income_data, balance_data):
	"""Calculate all CFI standard financial metrics"""
	try:
	metrics = {}

	# 1. Gross Profit Margin Ratio
	# 1. Gross Profit Margin
	revenue = self.get_nested_value(income_data, "Revenue", "Total Net Revenue", "2025")
	cogs = self.get_nested_value(income_data, "Cost and Gross Profit", "Cost of Goods Sold", "2025")
	gross_profit = revenue - cogs
	metrics['gross_profit_margin'] = (gross_profit / revenue) * 100 if revenue != 0 else 0

	# 2. Current Ratio
	current_assets = self.get_nested_value(balance_data, "Key Totals", "Total_Current_Assets", "2025")
	current_liabilities = self.get_nested_value(balance_data, "Key Totals", "Total_Current_Liabilities", "2025")
	metrics['current_ratio'] = current_assets / current_liabilities if current_liabilities != 0 else 0

	# 3. Debt Ratio
	total_liabilities = self.get_nested_value(balance_data, "Key Totals", "Total_Liabilities", "2025")
	total_assets = self.get_nested_value(balance_data, "Key Totals", "Total_Assets", "2025")
	metrics['debt_ratio'] = (total_liabilities / total_assets) * 100 if total_assets != 0 else 0

	# 4. Sustainable Growth Rate (SGR)
	net_income = self.get_nested_value(income_data, "Profit Summary", "Net Earnings", "2025")
	equity = self.get_nested_value(balance_data, "Key Totals", "Total_Shareholders_Equity", "2025")
	dividends = self.get_nested_value(income_data, "Profit Summary", "Dividends Paid", "2025")

	roe = (net_income / equity) * 100 if equity != 0 else 0
	retention_ratio = (net_income - dividends) / net_income if net_income != 0 else 0
	metrics['sgr'] = roe * retention_ratio / 100 if roe != 0 else 0

	# 5. Accounts Receivable Turnover
	accounts_receivable = self.get_nested_value(balance_data, "Balance Sheet Data 2021-2025", "Accounts_Receivable", "2025")
	metrics['ar_turnover'] = revenue / accounts_receivable if accounts_receivable != 0 else 0

	# 6. Return on Equity (ROE)
	metrics['roe'] = roe

	# 7. Net Profit Margin
	metrics['net_profit_margin'] = (net_income / revenue) * 100 if revenue != 0 else 0

	# 8. Retained Earnings Ratio
	retained_earnings = self.get_nested_value(balance_data, "Balance Sheet Data 2021-2025", "Retained_Earnings", "2025")
	metrics['retained_earnings_ratio'] = (retained_earnings / total_assets) * 100 if total_assets != 0 else 0

	# 9. Revenue Growth (YoY)
	revenue_2024 = self.get_nested_value(income_data, "Revenue", "Total Net Revenue", "2024")
	metrics['revenue_growth'] = ((revenue / revenue_2024) - 1) * 100 if revenue_2024 != 0 else 0

	# 10. Revenue CAGR (2021-2025)
	revenue_2021 = self.get_nested_value(income_data, "Revenue", "Total Net Revenue", "2021")
	metrics['revenue_cagr'] = ((revenue / revenue_2021) ** (1 / 4) - 1) * 100 if revenue_2021 != 0 else 0

	return metrics
	except Exception as e:
	print(f"Error calculating metrics: {e}")
	return {}


	def analyze_financials(self, balance_sheet_path, income_statement_path):
	try:
	# Validate markdown files
	if not self.is_valid_markdown(balance_sheet_path):
	return "Invalid Balance Sheet file format. Please upload a valid Markdown file."
	if not self.is_valid_markdown(income_statement_path):
	return "Invalid Income Statement file format. Please upload a valid Markdown file."

	# Read and parse files
	with open(balance_sheet_path, 'r') as f:
	balance_content = f.read()
	with open(income_statement_path, 'r') as f:
	income_content = f.read()

	balance_data = self.parse_financial_data(balance_content)
	income_data = self.parse_financial_data(income_content)

	# Calculate metrics
	metrics = self.calculate_metrics(income_data, balance_data)

	# Generate analysis
	return self.generate_analysis(metrics)

	except Exception as e:
	return f"Error analyzing financials: {e}"


	def generate_analysis(self, metrics):
	"""Generate comprehensive analysis"""
	try:
	prompt = f"""[INST] As a financial analyst, provide a comprehensive analysis based on these metrics:

	1. Profitability:
	- Gross Profit Margin: {metrics.get('gross_profit_margin', 0):.2f}%
	- Net Profit Margin: {metrics.get('net_profit_margin', 0):.2f}%
	- Return on Equity: {metrics.get('roe', 0):.2f}%

	2. Liquidity & Efficiency:
	- Current Ratio: {metrics.get('current_ratio', 0):.2f}
	- Accounts Receivable Turnover: {metrics.get('ar_turnover', 0):.2f}

	3. Financial Structure:
	- Debt Ratio: {metrics.get('debt_ratio', 0):.2f}%
	- Retained Earnings Ratio: {metrics.get('retained_earnings_ratio', 0):.2f}%

	4. Growth:
	- Sustainable Growth Rate: {metrics.get('sgr', 0):.2f}%
	- Revenue Growth (YoY): {metrics.get('revenue_growth', 0):.2f}%

	Provide:
	1. Overall financial health assessment
	2. Key strengths and concerns
	3. Operational efficiency analysis
	4. Specific recommendations for improvement
	[/INST]"""

	inputs = self.llama_tokenizer(prompt, return_tensors="pt", truncation=True, max_length=2048)
	outputs = self.llama_model.generate(
	inputs["input_ids"],
	max_new_tokens=1024,
	min_new_tokens=200,
	temperature=0.7,
	top_p=0.95,
	repetition_penalty=1.2,
	length_penalty=1.5
	)

	analysis = self.llama_tokenizer.decode(outputs[0], skip_special_tokens=True)

	if len(analysis.split()) < 100:
	return self.generate_fallback_analysis(metrics)

	return analysis

	except Exception as e:
	print(f"Error generating analysis: {str(e)}")
	return self.generate_fallback_analysis(metrics)

	def generate_fallback_analysis(self, metrics):
	"""Generate basic analysis when model fails"""
	try:
	analysis = f"""Financial Analysis Summary:

	1. Profitability Assessment:
	- Gross Profit Margin: {metrics.get('gross_profit_margin', 0):.2f}%
	({self.interpret_metric('gross_profit_margin', metrics.get('gross_profit_margin', 0))})
	- Net Profit Margin: {metrics.get('net_profit_margin', 0):.2f}%
	({self.interpret_metric('net_profit_margin', metrics.get('net_profit_margin', 0))})
	- Return on Equity: {metrics.get('roe', 0):.2f}%
	({self.interpret_metric('roe', metrics.get('roe', 0))})

	2. Liquidity & Efficiency Analysis:
	- Current Ratio: {metrics.get('current_ratio', 0):.2f}
	({self.interpret_metric('current_ratio', metrics.get('current_ratio', 0))})
	- AR Turnover: {metrics.get('ar_turnover', 0):.2f}
	({self.interpret_metric('ar_turnover', metrics.get('ar_turnover', 0))})

	3. Financial Structure:
	- Debt Ratio: {metrics.get('debt_ratio', 0):.2f}%
	({self.interpret_metric('debt_ratio', metrics.get('debt_ratio', 0))})
	- Retained Earnings Ratio: {metrics.get('retained_earnings_ratio', 0):.2f}%
	({self.interpret_metric('retained_earnings_ratio', metrics.get('retained_earnings_ratio', 0))})

	4. Growth & Sustainability:
	- Sustainable Growth Rate: {metrics.get('sgr', 0):.2f}%
	({self.interpret_metric('sgr', metrics.get('sgr', 0))})
	- Revenue Growth: {metrics.get('revenue_growth', 0):.2f}%
	({self.interpret_metric('revenue_growth', metrics.get('revenue_growth', 0))})

	{self.generate_recommendations(metrics)}"""
	return analysis

	except Exception as e:
	return f"Error generating fallback analysis: {str(e)}"

	def interpret_metric(self, metric_name, value):
	"""Interpret individual metrics based on CFI standards"""
	interpretations = {
	'gross_profit_margin': lambda x: 'Strong' if x > 40 else 'Adequate' if x > 30 else 'Needs improvement',
	'current_ratio': lambda x: 'Strong' if x > 2 else 'Adequate' if x > 1 else 'Concerning',
	'debt_ratio': lambda x: 'Conservative' if x < 40 else 'Moderate' if x < 60 else 'High risk',
	'ar_turnover': lambda x: 'Excellent' if x > 8 else 'Good' if x > 4 else 'Needs improvement',
	'roe': lambda x: 'Strong' if x > 15 else 'Adequate' if x > 10 else 'Below target',
	'net_profit_margin': lambda x: 'Strong' if x > 10 else 'Adequate' if x > 5 else 'Needs improvement',
	'retained_earnings_ratio': lambda x: 'Strong' if x > 30 else 'Adequate' if x > 15 else 'Low retention',
	'sgr': lambda x: 'Strong' if x > 10 else 'Moderate' if x > 5 else 'Limited growth potential',
	'revenue_growth': lambda x: 'Strong' if x > 10 else 'Moderate' if x > 5 else 'Below industry average'
	}
	try:
	return interpretations.get(metric_name, lambda x: 'No interpretation')(value)
	except:
	return 'Unable to interpret'

	def generate_recommendations(self, metrics):
	"""Generate specific recommendations based on metrics"""
	recommendations = []

	if metrics.get('gross_profit_margin', 0) < 30:
	recommendations.append("- Review pricing strategy and cost structure to improve gross margins")
	if metrics.get('current_ratio', 0) < 1.5:
	recommendations.append("- Strengthen working capital management to improve liquidity")
	if metrics.get('debt_ratio', 0) > 60:
	recommendations.append("- Consider debt reduction strategies to improve financial flexibility")
	if metrics.get('ar_turnover', 0) < 4:
	recommendations.append("- Improve accounts receivable collection practices")
	if metrics.get('roe', 0) < 10:
	recommendations.append("- Focus on improving operational efficiency to enhance returns")
	if metrics.get('revenue_growth', 0) < 5:
	recommendations.append("- Develop strategies to accelerate revenue growth")
	recommendations.append("- Consider strategic acquisitions or new market entry")

	return "Key Recommendations:\n" + "\n".join(recommendations)

	def analyze_financials(self, balance_sheet_file, income_stmt_file):
	"""Main analysis function"""
	try:
	# Validate input files
	if not (self.is_valid_markdown(balance_sheet_file) and self.is_valid_markdown(income_stmt_file)):
	return "Error: One or both files are invalid or not in Markdown format."

	# Read files
	with open(balance_sheet_file, 'r') as f:
	balance_sheet = f.read()
	with open(income_stmt_file, 'r') as f:
	income_stmt = f.read()

	# Process financial data
	income_data = self.parse_financial_data(income_stmt)
	balance_data = self.parse_financial_data(balance_sheet)

	# Calculate metrics
	metrics = self.calculate_metrics(income_data, balance_data)
	self.last_metrics = metrics

	# Generate analysis
	analysis = self.generate_analysis(metrics)

	# Prepare final results
	results = {
	"Financial Analysis": {
	"Key Metrics": {
	"Profitability": {
	"Gross Profit Margin": f"{metrics['gross_profit_margin']:.2f}%",
	"Net Profit Margin": f"{metrics['net_profit_margin']:.2f}%",
	"Return on Equity": f"{metrics['roe']:.2f}%"
	},
	"Liquidity": {
	"Current Ratio": f"{metrics['current_ratio']:.2f}",
	"Accounts Receivable Turnover": f"{metrics['ar_turnover']:.2f}"
	},
	"Solvency": {
	"Debt Ratio": f"{metrics['debt_ratio']:.2f}%",
	"Retained Earnings Ratio": f"{metrics['retained_earnings_ratio']:.2f}%"
	},
	"Growth": {
	"Sustainable Growth Rate": f"{metrics['sgr']:.2f}%",
	"Revenue Growth (YoY)": f"{metrics['revenue_growth']:.2f}%"
	}
	},
	"Analysis": analysis,
	"Analysis Period": "2021-2025",
	"Note": "Analysis based on CFI standards"
	}
	}

	return json.dumps(results, indent=2)

	except Exception as e:
	return f"Error in analysis: {str(e)}\n\nDetails: {type(e).__name__}"

	def fine_tune_models(self, train_texts, train_labels, epochs=3):
	"""Fine-tune the model with custom data"""
	try:
	# Prepare dataset
	train_dataset = FinancialDataset(train_texts, train_labels, self.llama_tokenizer)

	# Training arguments
	training_args = TrainingArguments(
	output_dir="./financial_model_tuned",
	num_train_epochs=epochs,
	per_device_train_batch_size=4,
	logging_dir="./logs",
	logging_steps=10,
	save_steps=50,
	eval_steps=50,
	learning_rate=2e-5,
	weight_decay=0.01,
	warmup_steps=500
	)

	# Initialize trainer
	trainer = Trainer(
	model=self.llama_model,
	args=training_args,
	train_dataset=train_dataset
	)

	# Fine-tune the model
	trainer.train()

	# Save the fine-tuned model
	self.llama_model.save_pretrained("./financial_model_tuned")
	self.llama_tokenizer.save_pretrained("./financial_model_tuned")

	print("Fine-tuning completed successfully!")
	except Exception as e:
	print(f"Error in fine-tuning: {str(e)}")


	def create_interface():
	analyzer = FinancialAnalyzer()

	iface = gr.Interface(
	fn=analyzer.analyze_financials,
	inputs=[
	gr.File(label="Balance Sheet (Markdown)", type="filepath"),
	gr.File(label="Income Statement (Markdown)", type="filepath")
	],
	outputs=gr.Textbox(label="Analysis Results", lines=25),
	title="AI Financial Statement Analyzer",
	description="""Upload financial statements in Markdown format for AI-powered analysis.
	Analysis is based on Corporate Finance Institute (CFI) standards.""",

	cache_examples=False
	)

	return iface

	if __name__ == "__main__":
	iface = create_interface()
	iface.launch()