Spaces:

walaa2022
/

financial_analysis

Sleeping

App Files Files Community

financial_analysis / app.py

Inni-23

Update app.py

d4b7ede verified over 1 year ago

raw

history blame

9.58 kB

	import gradio as gr
	import pandas as pd
	import json
	from transformers import AutoTokenizer, AutoModelForCausalLM
	import torch
	import re

	class FinancialAnalyzer:
	def __init__(self):
	print("Initializing Analyzer...")
	self.initialize_model()
	print("Initialization complete!")

	def initialize_model(self):
	"""Initialize TinyLlama model"""
	try:
	self.tokenizer = AutoTokenizer.from_pretrained("TinyLlama/TinyLlama-1.1B-Chat-v1.0")
	self.model = AutoModelForCausalLM.from_pretrained("TinyLlama/TinyLlama-1.1B-Chat-v1.0")
	self.model.eval()
	except Exception as e:
	print(f"Error initializing model: {str(e)}")
	raise

	def clean_number(self, value):
	"""Clean and convert numerical values"""
	try:
	if isinstance(value, str):
	# Remove currency symbols, commas, spaces
	value = value.replace('$', '').replace(',', '').strip()
	# Handle parentheses for negative numbers
	if '(' in value and ')' in value:
	value = '-' + value.replace('(', '').replace(')', '')
	return float(value or 0)
	except:
	return 0.0

	def is_valid_markdown(self, file_path):
	"""Check if a file is a valid Markdown file"""
	try:
	with open(file_path, 'r') as f:
	content = f.read()
	# Simple check for Markdown structure
	return any(line.startswith('#') or '\|' in line for line in content.split('\n'))
	except:
	return False

	def parse_financial_data(self, content):
	"""Parse markdown content into structured data"""
	try:
	data = {}
	current_section = ""
	current_table = []
	headers = None

	for line in content.split('\n'):
	if line.startswith('#'):
	if current_table and headers:
	data[current_section] = self.process_table(headers, current_table)
	current_section = line.strip('# ')
	current_table = []
	headers = None
	elif '\|' in line:
	if '-\|-' not in line: # Skip separator lines
	row = [cell.strip() for cell in line.split('\|')[1:-1]]
	if not headers:
	headers = row
	else:
	current_table.append(row)

	# Process last table
	if current_table and headers:
	data[current_section] = self.process_table(headers, current_table)

	return data
	except Exception as e:
	print(f"Error parsing financial data: {str(e)}")
	return {}

	def process_table(self, headers, rows):
	"""Process table data into structured format"""
	try:
	processed_data = {}
	for row in rows:
	if len(row) == len(headers):
	item_name = row[0].strip('*').strip()
	processed_data[item_name] = {}
	for i, value in enumerate(row[1:], 1):
	processed_data[item_name][headers[i]] = self.clean_number(value)
	return processed_data
	except Exception as e:
	print(f"Error processing table: {str(e)}")
	return {}

	def extract_metrics(self, income_data, balance_data):
	"""Extract and calculate key financial metrics"""
	try:
	metrics = {
	"Revenue": {
	"2025": self.get_nested_value(income_data, "Revenue", "Total Net Revenue", "2025"),
	"2021": self.get_nested_value(income_data, "Revenue", "Total Net Revenue", "2021")
	},
	"Profitability": {
	"Gross_Profit_2025": self.get_nested_value(income_data, "Cost and Gross Profit", "Gross Profit", "2025"),
	"Net_Earnings_2025": self.get_nested_value(income_data, "Profit Summary", "Net Earnings", "2025"),
	"Operating_Expenses_2025": self.get_nested_value(income_data, "Operating Expenses", "Total Operating Expenses", "2025")
	},
	"Balance_Sheet": {
	"Total_Assets_2025": self.get_nested_value(balance_data, "Key Totals", "Total_Assets", "2025"),
	"Total_Liabilities_2025": self.get_nested_value(balance_data, "Key Totals", "Total_Liabilities", "2025"),
	"Equity_2025": self.get_nested_value(balance_data, "Key Totals", "Total_Shareholders_Equity", "2025")
	}
	}

	# Calculate additional metrics
	revenue_2025 = metrics["Revenue"]["2025"]
	if revenue_2025 != 0:
	metrics["Profitability"]["Gross_Margin"] = (metrics["Profitability"]["Gross_Profit_2025"] / revenue_2025) * 100
	metrics["Profitability"]["Net_Margin"] = (metrics["Profitability"]["Net_Earnings_2025"] / revenue_2025) * 100

	return metrics
	except Exception as e:
	print(f"Error extracting metrics: {str(e)}")
	return {}

	def get_nested_value(self, data, section, key, year):
	"""Safely get nested dictionary value"""
	try:
	return data.get(section, {}).get(key, {}).get(year, 0)
	except:
	return 0

	def generate_analysis_prompt(self, metrics):
	"""Create analysis prompt from metrics"""
	try:
	return f"""<human>
	Analyze these financial metrics for 2025 with a focus on business performance, trends, and risks:

	Revenue and Profitability:
	- Total Revenue: ${metrics['Revenue']['2025']:,.1f}M
	- Gross Profit: ${metrics['Profitability']['Gross_Profit_2025']:,.1f}M
	- Net Earnings: ${metrics['Profitability']['Net_Earnings_2025']:,.1f}M
	- Gross Margin: {metrics['Profitability'].get('Gross_Margin', 0):,.1f}%
	- Net Margin: {metrics['Profitability'].get('Net_Margin', 0):,.1f}%

	Balance Sheet Strength:
	- Total Assets: ${metrics['Balance_Sheet']['Total_Assets_2025']:,.1f}M
	- Total Liabilities: ${metrics['Balance_Sheet']['Total_Liabilities_2025']:,.1f}M
	- Shareholders' Equity: ${metrics['Balance_Sheet']['Equity_2025']:,.1f}M

	Explain key financial ratios and their implications. Discuss strategies for growth and risk mitigation.
	</human>"""
	except Exception as e:
	print(f"Error generating prompt: {str(e)}")
	return ""

	def generate_analysis(self, prompt):
	"""Generate analysis using TinyLlama"""
	try:
	inputs = self.tokenizer(prompt, return_tensors="pt", truncation=True, max_length=1500)

	outputs = self.model.generate(
	inputs["input_ids"],
	max_new_tokens=500, # Generate up to 500 new tokens
	temperature=0.7,
	top_p=0.9,
	do_sample=True,
	pad_token_id=self.tokenizer.eos_token_id,
	no_repeat_ngram_size=3
	)

	analysis = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
	# Clean up the response
	analysis = analysis.split("<human>")[-1].strip()
	return analysis
	except Exception as e:
	return f"Error generating analysis: {str(e)}"

	def analyze_financials(self, balance_sheet_file, income_stmt_file):
	"""Main analysis function"""
	try:
	# Validate files
	if not (self.is_valid_markdown(balance_sheet_file) and self.is_valid_markdown(income_stmt_file)):
	return "Error: One or both files are invalid or not in Markdown format."

	# Read files
	with open(balance_sheet_file, 'r') as f:
	balance_sheet = f.read()
	with open(income_stmt_file, 'r') as f:
	income_stmt = f.read()

	# Parse financial data
	income_data = self.parse_financial_data(income_stmt)
	balance_data = self.parse_financial_data(balance_sheet)

	# Extract key metrics
	metrics = self.extract_metrics(income_data, balance_data)

	# Generate and get analysis
	prompt = self.generate_analysis_prompt(metrics)
	analysis = self.generate_analysis(prompt)

	# Prepare results
	results = {
	"Financial Analysis": {
	"Key Metrics": metrics,
	"AI Insights": analysis,
	"Analysis Period": "2021-2025",
	"Note": "All monetary values in millions ($M)"
	}
	}

	return json.dumps(results, indent=2)

	except Exception as e:
	return f"Error in analysis: {str(e)}\n\nDetails: {type(e).__name__}"

	def create_interface():
	analyzer = FinancialAnalyzer()

	iface = gr.Interface(
	fn=analyzer.analyze_financials,
	inputs=[
	gr.File(label="Balance Sheet (Markdown)", type="filepath"),
	gr.File(label="Income Statement (Markdown)", type="filepath")
	],
	outputs=gr.Textbox(label="Analysis Results", lines=25),
	title="Financial Statement Analyzer",
	description="Upload financial statements in Markdown format for AI-powered analysis"
	)

	return iface

	if __name__ == "__main__":
	iface = create_interface()
	iface.launch()