Spaces:

walaa2022
/

financial_analysis

Sleeping

App Files Files Community

financial_analysis / app.py

walaa2022

Update app.py

de70888 verified over 1 year ago

raw

history blame contribute delete

22.5 kB

	import gradio as gr
	import pandas as pd
	import json
	from transformers import (
	AutoTokenizer,
	AutoModelForCausalLM,
	AutoModelForSequenceClassification,
	TrainingArguments,
	Trainer
	)
	import torch
	import numpy as np
	from torch.utils.data import Dataset, DataLoader
	import re

	class FinancialDataset(Dataset):
	def __init__(self, texts, labels, tokenizer, max_length=512):
	self.texts = texts
	self.labels = labels
	self.tokenizer = tokenizer
	self.max_length = max_length

	def __len__(self):
	return len(self.texts)

	def __getitem__(self, idx):
	text = str(self.texts[idx])
	inputs = self.tokenizer(
	text,
	truncation=True,
	padding='max_length',
	max_length=self.max_length,
	return_tensors='pt'
	)
	return {
	'input_ids': inputs['input_ids'].squeeze(),
	'attention_mask': inputs['attention_mask'].squeeze(),
	'labels': torch.tensor(self.labels[idx], dtype=torch.long)
	}

	class FinancialAnalyzer:
	def __init__(self):
	print("Initializing Analyzer...")
	self.last_metrics = {}
	self.initialize_models()
	print("Initialization complete!")

	def initialize_models(self):
	"""Initialize both TinyLlama and FinBERT models"""
	try:
	# Initialize TinyLlama
	self.llama_tokenizer = AutoTokenizer.from_pretrained("TinyLlama/TinyLlama-1.1B-Chat-v1.0")
	self.llama_model = AutoModelForCausalLM.from_pretrained("TinyLlama/TinyLlama-1.1B-Chat-v1.0")
	self.llama_model.eval()

	# Initialize FinBERT
	self.finbert_tokenizer = AutoTokenizer.from_pretrained("ProsusAI/finbert")
	self.finbert_model = AutoModelForSequenceClassification.from_pretrained("ProsusAI/finbert")
	self.finbert_model.eval()

	print("Models loaded successfully!")
	except Exception as e:
	print(f"Error initializing models: {str(e)}")
	raise

	def clean_number(self, value):
	"""Clean and convert numerical values"""
	try:
	if isinstance(value, str):
	value = value.replace('$', '').replace(',', '').strip()
	if '(' in value and ')' in value:
	value = '-' + value.replace('(', '').replace(')', '')
	return float(value or 0)
	except:
	return 0.0

	def is_valid_markdown(self, file_path):
	"""Check if a file is a valid Markdown file"""
	try:
	with open(file_path, 'r') as f:
	content = f.read()
	return any(line.startswith('#') or '\|' in line for line in content.split('\n'))
	except:
	return False

	def parse_financial_data(self, content):
	"""Parse markdown content into structured data"""
	try:
	data = {}
	current_section = ""
	current_table = []
	headers = None

	for line in content.split('\n'):
	if line.startswith('#'):
	if current_table and headers:
	data[current_section] = self.process_table(headers, current_table)
	current_section = line.strip('# ')
	current_table = []
	headers = None
	elif '\|' in line:
	if '-\|-' not in line:
	row = [cell.strip() for cell in line.split('\|')[1:-1]]
	if not headers:
	headers = row
	else:
	current_table.append(row)

	if current_table and headers:
	data[current_section] = self.process_table(headers, current_table)

	return data
	except Exception as e:
	print(f"Error parsing financial data: {str(e)}")
	return {}

	def process_table(self, headers, rows):
	"""Process table data into structured format"""
	try:
	processed_data = {}
	for row in rows:
	if len(row) == len(headers):
	item_name = row[0].strip('*').strip()
	processed_data[item_name] = {}
	for i, value in enumerate(row[1:], 1):
	processed_data[item_name][headers[i]] = self.clean_number(value)
	return processed_data
	except Exception as e:
	print(f"Error processing table: {str(e)}")
	return {}

	def get_nested_value(self, data, section, key, year):
	"""Safely get nested dictionary value"""
	try:
	return data.get(section, {}).get(key, {}).get(str(year), 0)
	except:
	return 0

	def extract_metrics(self, income_data, balance_data):
	"""Extract and calculate key financial metrics"""
	try:
	metrics = {
	"Revenue": {
	"2025": self.get_nested_value(income_data, "Revenue", "Total Net Revenue", "2025"),
	"2024": self.get_nested_value(income_data, "Revenue", "Total Net Revenue", "2024"),
	"2021": self.get_nested_value(income_data, "Revenue", "Total Net Revenue", "2021")
	},
	"Profitability": {
	"Gross_Profit_2025": self.get_nested_value(income_data, "Cost and Gross Profit", "Gross Profit", "2025"),
	"EBIT_2025": self.get_nested_value(income_data, "Profit Summary", "EBIT", "2025"),
	"Net_Earnings_2025": self.get_nested_value(income_data, "Profit Summary", "Net Earnings", "2025"),
	"Operating_Expenses_2025": self.get_nested_value(income_data, "Operating Expenses", "Total Operating Expenses", "2025")
	},
	"Balance_Sheet": {
	"Total_Assets_2025": self.get_nested_value(balance_data, "Key Totals", "Total_Assets", "2025"),
	"Current_Assets_2025": self.get_nested_value(balance_data, "Key Totals", "Total_Current_Assets", "2025"),
	"Total_Liabilities_2025": self.get_nested_value(balance_data, "Key Totals", "Total_Liabilities", "2025"),
	"Current_Liabilities_2025": self.get_nested_value(balance_data, "Key Totals", "Total_Current_Liabilities", "2025"),
	"Equity_2025": self.get_nested_value(balance_data, "Key Totals", "Total_Shareholders_Equity", "2025"),
	"Inventory_2025": self.get_nested_value(balance_data, "Balance Sheet Data 2021-2025", "Inventory", "2025"),
	"Accounts_Receivable_2025": self.get_nested_value(balance_data, "Balance Sheet Data 2021-2025", "Accounts_Receivable", "2025"),
	"Long_Term_Debt_2025": self.get_nested_value(balance_data, "Balance Sheet Data 2021-2025", "Long_Term_Debt", "2025")
	},
	"Cash_Flow": {
	"Depreciation_2025": self.get_nested_value(income_data, "Operating Expenses", "Depreciation & Amortization", "2025"),
	"Interest_Expense_2025": self.get_nested_value(income_data, "Profit Summary", "Interest Expense", "2025")
	}
	}

	revenue_2025 = metrics["Revenue"]["2025"]
	if revenue_2025 != 0:
	metrics["Ratios"] = {
	"Gross_Margin": (metrics["Profitability"]["Gross_Profit_2025"] / revenue_2025) * 100,
	"Operating_Margin": (metrics["Profitability"]["EBIT_2025"] / revenue_2025) * 100,
	"Net_Margin": (metrics["Profitability"]["Net_Earnings_2025"] / revenue_2025) * 100,
	"Current_Ratio": metrics["Balance_Sheet"]["Current_Assets_2025"] / metrics["Balance_Sheet"]["Current_Liabilities_2025"] if metrics["Balance_Sheet"]["Current_Liabilities_2025"] != 0 else 0,
	"Quick_Ratio": (metrics["Balance_Sheet"]["Current_Assets_2025"] - metrics["Balance_Sheet"]["Inventory_2025"]) / metrics["Balance_Sheet"]["Current_Liabilities_2025"] if metrics["Balance_Sheet"]["Current_Liabilities_2025"] != 0 else 0,
	"Asset_Turnover": revenue_2025 / metrics["Balance_Sheet"]["Total_Assets_2025"] if metrics["Balance_Sheet"]["Total_Assets_2025"] != 0 else 0,
	"Receivables_Turnover": revenue_2025 / metrics["Balance_Sheet"]["Accounts_Receivable_2025"] if metrics["Balance_Sheet"]["Accounts_Receivable_2025"] != 0 else 0,
	"Debt_to_Equity": metrics["Balance_Sheet"]["Total_Liabilities_2025"] / metrics["Balance_Sheet"]["Equity_2025"] if metrics["Balance_Sheet"]["Equity_2025"] != 0 else 0,
	"Interest_Coverage": metrics["Profitability"]["EBIT_2025"] / metrics["Cash_Flow"]["Interest_Expense_2025"] if metrics["Cash_Flow"]["Interest_Expense_2025"] != 0 else 0,
	"Revenue_Growth": ((metrics["Revenue"]["2025"] / metrics["Revenue"]["2024"]) - 1) * 100 if metrics["Revenue"]["2024"] != 0 else 0,
	"5Year_Revenue_CAGR": ((metrics["Revenue"]["2025"] / metrics["Revenue"]["2021"]) ** (1/4) - 1) * 100 if metrics["Revenue"]["2021"] != 0 else 0
	}

	return metrics
	except Exception as e:
	print(f"Error extracting metrics: {str(e)}")
	return {}

	def convert_to_serializable(obj):
	"""Convert numpy values to Python native types"""
	if isinstance(obj, np.float32):
	return float(obj)
	elif isinstance(obj, np.ndarray):
	return obj.tolist()
	elif isinstance(obj, dict):
	return {key: convert_to_serializable(value) for key, value in obj.items()}
	elif isinstance(obj, list):
	return [convert_to_serializable(item) for item in obj]
	return obj

	def get_sentiment_analysis(self, metrics):
	"""Get financial sentiment analysis using FinBERT"""
	try:
	financial_text = f"""
	Revenue growth: {metrics['Ratios'].get('Revenue_Growth', 0):.2f}%
	Profit margin: {metrics['Ratios'].get('Net_Margin', 0):.2f}%
	Debt to equity: {metrics['Ratios'].get('Debt_to_Equity', 0):.2f}
	Interest coverage: {metrics['Ratios'].get('Interest_Coverage', 0):.2f}
	Current ratio: {metrics['Ratios'].get('Current_Ratio', 0):.2f}
	"""

	inputs = self.finbert_tokenizer(financial_text, return_tensors="pt", padding=True, truncation=True)
	outputs = self.finbert_model(**inputs)
	probabilities = torch.nn.functional.softmax(outputs.logits, dim=-1)
	sentiment_scores = probabilities.detach().numpy()[0]

	sentiments = ['negative', 'neutral', 'positive']
	sentiment_dict = dict(zip(sentiments, [float(score) for score in sentiment_scores]))

	return sentiment_dict
	except Exception as e:
	print(f"Error in sentiment analysis: {str(e)}")
	return {}

	def analyze_financials(self, balance_sheet_file, income_stmt_file):
	"""Main analysis function"""
	try:
	# Validate input files
	if not (self.is_valid_markdown(balance_sheet_file) and self.is_valid_markdown(income_stmt_file)):
	return "Error: One or both files are invalid or not in Markdown format."

	# Read files
	with open(balance_sheet_file, 'r') as f:
	balance_sheet = f.read()
	with open(income_stmt_file, 'r') as f:
	income_stmt = f.read()

	# Process financial data
	income_data = self.parse_financial_data(income_stmt)
	balance_data = self.parse_financial_data(balance_sheet)
	metrics = self.extract_metrics(income_data, balance_data)

	# Get sentiment analysis
	sentiment_dict = self.get_sentiment_analysis(metrics)

	# Generate and get analysis
	prompt = self.generate_prompt(metrics, sentiment_dict)
	analysis = self.generate_analysis(prompt)

	# Convert all numpy values to Python native types
	metrics = convert_to_serializable(metrics)
	sentiment_dict = convert_to_serializable(sentiment_dict)

	# Prepare final results
	results = {
	"Financial Analysis": {
	"Key Metrics": metrics,
	"Market Sentiment": sentiment_dict,
	"AI Insights": analysis,
	"Analysis Period": "2021-2025",
	"Note": "All monetary values in millions ($M)"
	}
	}

	return json.dumps(results, indent=2)

	except Exception as e:
	return f"Error in analysis: {str(e)}\n\nDetails: {type(e).__name__}"





	def generate_prompt(self, metrics, sentiment_dict):
	"""Create enhanced analysis prompt with sentiment"""
	try:
	return f"""[INST] As a financial analyst, provide a comprehensive analysis of this company's performance.

	Financial Metrics (2025):
	------------------------
	1. Revenue & Growth:
	- Revenue: ${metrics['Revenue']['2025']:,.1f}M
	- Growth Rate: {metrics['Ratios'].get('Revenue_Growth', 0):,.1f}%
	- 5-Year CAGR: {metrics['Ratios'].get('5Year_Revenue_CAGR', 0):,.1f}%

	2. Profitability:
	- Gross Profit: ${metrics['Profitability']['Gross_Profit_2025']:,.1f}M
	- EBIT: ${metrics['Profitability']['EBIT_2025']:,.1f}M
	- Net Earnings: ${metrics['Profitability']['Net_Earnings_2025']:,.1f}M
	- Margins:
	* Gross: {metrics['Ratios'].get('Gross_Margin', 0):,.1f}%
	* Operating: {metrics['Ratios'].get('Operating_Margin', 0):,.1f}%
	* Net: {metrics['Ratios'].get('Net_Margin', 0):,.1f}%

	3. Financial Position:
	- Assets: ${metrics['Balance_Sheet']['Total_Assets_2025']:,.1f}M
	- Liabilities: ${metrics['Balance_Sheet']['Total_Liabilities_2025']:,.1f}M
	- Equity: ${metrics['Balance_Sheet']['Equity_2025']:,.1f}M

	4. Key Ratios:
	- Liquidity: Current Ratio {metrics['Ratios'].get('Current_Ratio', 0):,.2f}x
	- Efficiency: Asset Turnover {metrics['Ratios'].get('Asset_Turnover', 0):,.2f}x
	- Solvency: Debt/Equity {metrics['Ratios'].get('Debt_to_Equity', 0):,.2f}x
	- Coverage: Interest Coverage {metrics['Ratios'].get('Interest_Coverage', 0):,.2f}x

	Market Sentiment Indicators:
	---------------------------
	- Positive: {sentiment_dict.get('positive', 0):,.2f}
	- Neutral: {sentiment_dict.get('neutral', 0):,.2f}
	- Negative: {sentiment_dict.get('negative', 0):,.2f}

	Provide:
	1. Overall financial health assessment
	2. Key strengths and concerns
	3. Operational efficiency analysis
	4. Recommendations for improvement
	[/INST]"""
	except Exception as e:
	print(f"Error generating prompt: {str(e)}")
	return ""

	def generate_analysis(self, prompt):
	"""Generate analysis using TinyLlama"""
	try:
	# Format the prompt in TinyLlama's expected format
	formatted_prompt = f"<human>: {prompt}\n<assistant>: Let me analyze these financial metrics in detail."

	inputs = self.llama_tokenizer(
	formatted_prompt,
	return_tensors="pt",
	truncation=True,
	max_length=2048,
	padding=True
	)

	# Generate with adjusted parameters
	outputs = self.llama_model.generate(
	inputs["input_ids"],
	max_new_tokens=1024,
	min_new_tokens=200, # Ensure minimum length
	temperature=0.8, # Slightly increased creativity
	top_p=0.92, # Slightly increased diversity
	do_sample=True,
	repetition_penalty=1.2,
	length_penalty=1.5, # Encourage longer generations
	num_return_sequences=1,
	pad_token_id=self.llama_tokenizer.eos_token_id,
	eos_token_id=self.llama_tokenizer.eos_token_id,
	early_stopping=True
	)

	# Decode and clean up the response
	analysis = self.llama_tokenizer.decode(outputs[0], skip_special_tokens=False)

	# Extract only the assistant's response
	if "<assistant>:" in analysis:
	analysis = analysis.split("<assistant>:")[-1].strip()

	# Clean up any remaining tags
	analysis = analysis.replace("<human>:", "").replace("<assistant>:", "").strip()

	# Validate output length and content
	if len(analysis.split()) < 100:
	# Fallback analysis if model generation is too short
	analysis = self.generate_fallback_analysis(self.last_metrics)

	return analysis

	except Exception as e:
	print(f"Detailed error in generate_analysis: {str(e)}")
	return self.generate_fallback_analysis(self.last_metrics)


	def generate_fallback_analysis(self, metrics):
	"""Generate a basic analysis when the model fails"""
	try:
	revenue_growth = metrics['Ratios'].get('Revenue_Growth', 0)
	net_margin = metrics['Ratios'].get('Net_Margin', 0)
	current_ratio = metrics['Ratios'].get('Current_Ratio', 0)
	debt_to_equity = metrics['Ratios'].get('Debt_to_Equity', 0)

	analysis = f"""
	Financial Analysis Summary:

	1. Revenue and Growth:
	The company shows a revenue growth of {revenue_growth:.1f}%, indicating {
	'strong' if revenue_growth > 5 else 'moderate' if revenue_growth > 0 else 'weak'} growth performance.

	2. Profitability:
	With a net margin of {net_margin:.1f}%, the company demonstrates {
	'strong' if net_margin > 10 else 'moderate' if net_margin > 5 else 'concerning'} profitability levels.

	3. Liquidity Position:
	The current ratio of {current_ratio:.2f}x suggests {
	'very strong' if current_ratio > 2 else 'adequate' if current_ratio > 1 else 'concerning'} liquidity position.

	4. Financial Leverage:
	With a debt-to-equity ratio of {debt_to_equity:.2f}, the company maintains {
	'conservative' if debt_to_equity < 0.5 else 'moderate' if debt_to_equity < 1 else 'aggressive'} leverage.

	Key Recommendations:
	1. {'Consider debt reduction' if debt_to_equity > 0.5 else 'Maintain current debt levels'}
	2. {'Focus on improving profit margins' if net_margin < 5 else 'Maintain profit efficiency'}
	3. {'Implement growth strategies' if revenue_growth < 2 else 'Sustain growth momentum'}

	This analysis is based on key financial metrics and standard industry benchmarks.
	"""
	return analysis
	except Exception as e:
	return f"Error generating fallback analysis: {str(e)}"

	def fine_tune_models(self, train_texts, train_labels, epochs=3):
	"""Fine-tune the models with custom data"""
	try:
	# Prepare dataset
	train_dataset = FinancialDataset(train_texts, train_labels, self.llama_tokenizer)

	# Training arguments
	training_args = TrainingArguments(
	output_dir="./financial_model_tuned",
	num_train_epochs=epochs,
	per_device_train_batch_size=4,
	logging_dir="./logs",
	logging_steps=10,
	save_steps=50,
	eval_steps=50,
	evaluation_strategy="steps",
	learning_rate=2e-5,
	weight_decay=0.01,
	warmup_steps=500,
	)

	# Initialize trainer
	trainer = Trainer(
	model=self.llama_model,
	args=training_args,
	train_dataset=train_dataset,
	)

	# Fine-tune the model
	trainer.train()

	# Save the fine-tuned model
	self.llama_model.save_pretrained("./financial_model_tuned")
	self.llama_tokenizer.save_pretrained("./financial_model_tuned")

	print("Fine-tuning completed successfully!")
	except Exception as e:
	print(f"Error in fine-tuning: {str(e)}")

	def analyze_financials(self, balance_sheet_file, income_stmt_file):
	"""Main analysis function"""
	try:
	# Validate input files
	if not (self.is_valid_markdown(balance_sheet_file) and self.is_valid_markdown(income_stmt_file)):
	return "Error: One or both files are invalid or not in Markdown format."

	# Read files
	with open(balance_sheet_file, 'r') as f:
	balance_sheet = f.read()
	with open(income_stmt_file, 'r') as f:
	income_stmt = f.read()

	# Process financial data
	income_data = self.parse_financial_data(income_stmt)
	balance_data = self.parse_financial_data(balance_sheet)
	metrics = self.extract_metrics(income_data, balance_data)
	self.last_metrics = metrics

	# Get sentiment analysis
	sentiment_dict = self.get_sentiment_analysis(metrics)

	# Generate and get analysis
	prompt = self.generate_prompt(metrics, sentiment_dict)
	analysis = self.generate_analysis(prompt)

	# Prepare final results
	results = {
	"Financial Analysis": {
	"Key Metrics": metrics,
	"Market Sentiment": sentiment_dict,
	"AI Insights": analysis,
	"Analysis Period": "2021-2025",
	"Note": "All monetary values in millions ($M)"
	}
	}

	return json.dumps(results, indent=2)

	except Exception as e:
	return f"Error in analysis: {str(e)}\n\nDetails: {type(e).__name__}"

	def create_interface():
	analyzer = FinancialAnalyzer()

	iface = gr.Interface(
	fn=analyzer.analyze_financials,
	inputs=[
	gr.File(label="Balance Sheet (Markdown)", type="filepath"),
	gr.File(label="Income Statement (Markdown)", type="filepath")
	],
	outputs=gr.Textbox(label="Analysis Results", lines=25),
	title="AI Financial Statement Analyzer",
	description="""Upload financial statements in Markdown format for AI-powered analysis.
	The analysis combines LLM-based insights with sentiment analysis."""
	)

	return iface

	if __name__ == "__main__":
	iface = create_interface()
	iface.launch()