Spaces:

walaa2022
/

financial-analysis-system

Sleeping

App Files Files Community

financial-analysis-system / app.py

walaa2022

Update app.py

17c0709 verified over 1 year ago

raw

history blame contribute delete

6.63 kB

	import os
	import gradio as gr
	import pandas as pd
	import torch
	import logging
	from transformers import pipeline

	# Setup logging
	logging.basicConfig(
	level=logging.INFO,
	format='%(asctime)s - %(levelname)s - %(message)s'
	)
	logger = logging.getLogger(__name__)

	DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
	logger.info(f"Using device: {DEVICE}")

	class FinancialAnalyzer:
	def __init__(self):
	self.analysis_model = None
	self.sentiment_model = None
	self.load_models()

	def load_models(self):
	try:
	logger.info("Loading TinyLlama model...")
	self.analysis_model = pipeline(
	"text-generation",
	model="TinyLlama/TinyLlama-1.1B-Chat-v1.0",
	torch_dtype=torch.float16 if DEVICE == "cuda" else torch.float32
	)

	logger.info("Loading FinBERT model...")
	self.sentiment_model = pipeline(
	"text-classification",
	model="ProsusAI/finbert",
	torch_dtype=torch.float16 if DEVICE == "cuda" else torch.float32
	)

	logger.info("Models loaded successfully")
	except Exception as e:
	logger.error(f"Error loading models: {str(e)}")
	raise

	def extract_and_analyze(self, statement_text, statement_type):
	"""Extract information from financial statement text"""
	try:
	prompt = f"""[INST] As a financial analyst, analyze this {statement_type}:

	{statement_text}

	Extract and summarize:
	1. Key financial numbers for 2025
	2. Notable trends
	3. Important metrics

	Focus on the most recent year (2025) and key financial indicators.
	[/INST]"""

	response = self.analysis_model(
	prompt,
	max_new_tokens=300,
	temperature=0.3,
	num_return_sequences=1,
	truncation=True
	)

	return response[0]['generated_text']
	except Exception as e:
	logger.error(f"Error extracting data from {statement_type}: {str(e)}")
	raise

	def analyze_financials(self, income_text, balance_text):
	try:
	# First, extract key information from each statement
	logger.info("Analyzing Income Statement...")
	income_analysis = self.extract_and_analyze(income_text, "Income Statement")

	logger.info("Analyzing Balance Sheet...")
	balance_analysis = self.extract_and_analyze(balance_text, "Balance Sheet")

	# Combine the analyses
	combined_analysis = f"""Income Statement Analysis:
	{income_analysis}

	Balance Sheet Analysis:
	{balance_analysis}"""

	# Get sentiment
	sentiment = self.sentiment_model(
	combined_analysis[:512],
	truncation=True
	)[0]

	# Generate final analysis
	final_prompt = f"""[INST] Based on this financial analysis:

	{combined_analysis}

	Market Sentiment: {sentiment['label']} ({sentiment['score']:.2%})

	Provide a concise analysis with:
	1. Overall Financial Health (2-3 key points)
	2. Main Business Insights (2-3 insights)
	3. Key Recommendations (2-3 recommendations)
	[/INST]"""

	final_response = self.analysis_model(
	final_prompt,
	max_new_tokens=500,
	temperature=0.7,
	num_return_sequences=1,
	truncation=True
	)

	return self.format_response(final_response[0]['generated_text'], sentiment, combined_analysis)

	except Exception as e:
	logger.error(f"Analysis error: {str(e)}")
	return f"Error in analysis: {str(e)}"

	def format_response(self, analysis_text, sentiment, raw_analysis):
	try:
	sections = [
	"# Financial Analysis Report\n\n",
	f"## Market Sentiment: {sentiment['label'].upper()} ({sentiment['score']:.2%})\n\n",
	"## Extracted Financial Data\n```\n",
	raw_analysis,
	"\n```\n\n",
	"## Analysis\n\n"
	]

	for line in analysis_text.split('\n'):
	line = line.strip()
	if not line:
	continue

	if any(header in line for header in ["Financial Health", "Business Insights", "Recommendations"]):
	sections.append(f"\n### {line}\n")
	elif line:
	if not line.startswith('-'):
	line = f"- {line}"
	sections.append(f"{line}\n")

	return "".join(sections)
	except Exception as e:
	logger.error(f"Error formatting response: {str(e)}")
	return "Error formatting analysis"

	def analyze_statements(income_statement, balance_sheet):
	try:
	if not income_statement or not balance_sheet:
	return "Please upload both financial statements."

	logger.info("Reading financial statements...")
	# Read files as text
	income_df = pd.read_csv(income_statement)
	balance_df = pd.read_csv(balance_sheet)

	# Convert to string while preserving format
	income_text = income_df.to_string(index=False)
	balance_text = balance_df.to_string(index=False)

	logger.info("Initializing analysis...")
	analyzer = FinancialAnalyzer()
	result = analyzer.analyze_financials(income_text, balance_text)

	if DEVICE == "cuda":
	torch.cuda.empty_cache()

	return result

	except Exception as e:
	logger.error(f"Error: {str(e)}")
	return f"""Analysis Error: {str(e)}

	Please check:
	1. Files are readable CSV files
	2. Files contain financial data
	3. Files are not corrupted"""

	# Create Gradio interface
	iface = gr.Interface(
	fn=analyze_statements,
	inputs=[
	gr.File(label="Income Statement (CSV)", file_types=[".csv"]),
	gr.File(label="Balance Sheet (CSV)", file_types=[".csv"])
	],
	outputs=gr.Markdown(),
	title="AI Financial Statement Analyzer",
	description="""Upload your financial statements for AI analysis.
	The model will extract and analyze key financial information automatically.""",
	theme="default",
	flagging_mode="never"
	)

	# Launch
	if __name__ == "__main__":
	iface.launch(
	server_name="0.0.0.0",
	server_port=7860,
	share=False
	)