Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| import pandas as pd | |
| import json | |
| from transformers import AutoTokenizer, AutoModelForCausalLM | |
| import torch | |
| import re | |
| class FinancialAnalyzer: | |
| def __init__(self): | |
| print("Initializing Analyzer...") | |
| self.initialize_model() | |
| print("Initialization complete!") | |
| def initialize_model(self): | |
| """Initialize TinyLlama model""" | |
| try: | |
| self.tokenizer = AutoTokenizer.from_pretrained("TinyLlama/TinyLlama-1.1B-Chat-v1.0") | |
| self.model = AutoModelForCausalLM.from_pretrained("TinyLlama/TinyLlama-1.1B-Chat-v1.0") | |
| self.model.eval() | |
| except Exception as e: | |
| print(f"Error initializing model: {str(e)}") | |
| raise | |
| def clean_number(self, value): | |
| """Clean and convert numerical values""" | |
| try: | |
| if isinstance(value, str): | |
| # Remove currency symbols, commas, spaces | |
| value = value.replace('$', '').replace(',', '').strip() | |
| # Handle parentheses for negative numbers | |
| if '(' in value and ')' in value: | |
| value = '-' + value.replace('(', '').replace(')', '') | |
| return float(value or 0) | |
| except: | |
| return 0.0 | |
| def is_valid_markdown(self, file_path): | |
| """Check if a file is a valid Markdown file""" | |
| try: | |
| with open(file_path, 'r') as f: | |
| content = f.read() | |
| # Simple check for Markdown structure | |
| return any(line.startswith('#') or '|' in line for line in content.split('\n')) | |
| except: | |
| return False | |
| def parse_financial_data(self, content): | |
| """Parse markdown content into structured data""" | |
| try: | |
| data = {} | |
| current_section = "" | |
| current_table = [] | |
| headers = None | |
| for line in content.split('\n'): | |
| if line.startswith('#'): | |
| if current_table and headers: | |
| data[current_section] = self.process_table(headers, current_table) | |
| current_section = line.strip('# ') | |
| current_table = [] | |
| headers = None | |
| elif '|' in line: | |
| if '-|-' not in line: # Skip separator lines | |
| row = [cell.strip() for cell in line.split('|')[1:-1]] | |
| if not headers: | |
| headers = row | |
| else: | |
| current_table.append(row) | |
| # Process last table | |
| if current_table and headers: | |
| data[current_section] = self.process_table(headers, current_table) | |
| return data | |
| except Exception as e: | |
| print(f"Error parsing financial data: {str(e)}") | |
| return {} | |
| def process_table(self, headers, rows): | |
| """Process table data into structured format""" | |
| try: | |
| processed_data = {} | |
| for row in rows: | |
| if len(row) == len(headers): | |
| item_name = row[0].strip('*').strip() | |
| processed_data[item_name] = {} | |
| for i, value in enumerate(row[1:], 1): | |
| processed_data[item_name][headers[i]] = self.clean_number(value) | |
| return processed_data | |
| except Exception as e: | |
| print(f"Error processing table: {str(e)}") | |
| return {} | |
| def extract_metrics(self, income_data, balance_data): | |
| """Extract and calculate key financial metrics""" | |
| try: | |
| metrics = { | |
| "Revenue": { | |
| "2025": self.get_nested_value(income_data, "Revenue", "Total Net Revenue", "2025"), | |
| "2021": self.get_nested_value(income_data, "Revenue", "Total Net Revenue", "2021") | |
| }, | |
| "Profitability": { | |
| "Gross_Profit_2025": self.get_nested_value(income_data, "Cost and Gross Profit", "Gross Profit", "2025"), | |
| "Net_Earnings_2025": self.get_nested_value(income_data, "Profit Summary", "Net Earnings", "2025"), | |
| "Operating_Expenses_2025": self.get_nested_value(income_data, "Operating Expenses", "Total Operating Expenses", "2025") | |
| }, | |
| "Balance_Sheet": { | |
| "Total_Assets_2025": self.get_nested_value(balance_data, "Key Totals", "Total_Assets", "2025"), | |
| "Total_Liabilities_2025": self.get_nested_value(balance_data, "Key Totals", "Total_Liabilities", "2025"), | |
| "Equity_2025": self.get_nested_value(balance_data, "Key Totals", "Total_Shareholders_Equity", "2025") | |
| } | |
| } | |
| # Calculate additional metrics | |
| revenue_2025 = metrics["Revenue"]["2025"] | |
| if revenue_2025 != 0: | |
| metrics["Profitability"]["Gross_Margin"] = (metrics["Profitability"]["Gross_Profit_2025"] / revenue_2025) * 100 | |
| metrics["Profitability"]["Net_Margin"] = (metrics["Profitability"]["Net_Earnings_2025"] / revenue_2025) * 100 | |
| return metrics | |
| except Exception as e: | |
| print(f"Error extracting metrics: {str(e)}") | |
| return {} | |
| def get_nested_value(self, data, section, key, year): | |
| """Safely get nested dictionary value""" | |
| try: | |
| return data.get(section, {}).get(key, {}).get(year, 0) | |
| except: | |
| return 0 | |
| def generate_analysis_prompt(self, metrics): | |
| """Create analysis prompt from metrics""" | |
| try: | |
| return f"""<human> | |
| Analyze these financial metrics for 2025 with a focus on business performance, trends, and risks: | |
| Revenue and Profitability: | |
| - Total Revenue: ${metrics['Revenue']['2025']:,.1f}M | |
| - Gross Profit: ${metrics['Profitability']['Gross_Profit_2025']:,.1f}M | |
| - Net Earnings: ${metrics['Profitability']['Net_Earnings_2025']:,.1f}M | |
| - Gross Margin: {metrics['Profitability'].get('Gross_Margin', 0):,.1f}% | |
| - Net Margin: {metrics['Profitability'].get('Net_Margin', 0):,.1f}% | |
| Balance Sheet Strength: | |
| - Total Assets: ${metrics['Balance_Sheet']['Total_Assets_2025']:,.1f}M | |
| - Total Liabilities: ${metrics['Balance_Sheet']['Total_Liabilities_2025']:,.1f}M | |
| - Shareholders' Equity: ${metrics['Balance_Sheet']['Equity_2025']:,.1f}M | |
| Explain key financial ratios and their implications. Discuss strategies for growth and risk mitigation. | |
| </human>""" | |
| except Exception as e: | |
| print(f"Error generating prompt: {str(e)}") | |
| return "" | |
| def generate_analysis(self, prompt): | |
| """Generate analysis using TinyLlama""" | |
| try: | |
| inputs = self.tokenizer(prompt, return_tensors="pt", truncation=True, max_length=1500) | |
| outputs = self.model.generate( | |
| inputs["input_ids"], | |
| max_new_tokens=500, # Generate up to 500 new tokens | |
| temperature=0.7, | |
| top_p=0.9, | |
| do_sample=True, | |
| pad_token_id=self.tokenizer.eos_token_id, | |
| no_repeat_ngram_size=3 | |
| ) | |
| analysis = self.tokenizer.decode(outputs[0], skip_special_tokens=True) | |
| # Clean up the response | |
| analysis = analysis.split("<human>")[-1].strip() | |
| return analysis | |
| except Exception as e: | |
| return f"Error generating analysis: {str(e)}" | |
| def analyze_financials(self, balance_sheet_file, income_stmt_file): | |
| """Main analysis function""" | |
| try: | |
| # Validate files | |
| if not (self.is_valid_markdown(balance_sheet_file) and self.is_valid_markdown(income_stmt_file)): | |
| return "Error: One or both files are invalid or not in Markdown format." | |
| # Read files | |
| with open(balance_sheet_file, 'r') as f: | |
| balance_sheet = f.read() | |
| with open(income_stmt_file, 'r') as f: | |
| income_stmt = f.read() | |
| # Parse financial data | |
| income_data = self.parse_financial_data(income_stmt) | |
| balance_data = self.parse_financial_data(balance_sheet) | |
| # Extract key metrics | |
| metrics = self.extract_metrics(income_data, balance_data) | |
| # Generate and get analysis | |
| prompt = self.generate_analysis_prompt(metrics) | |
| analysis = self.generate_analysis(prompt) | |
| # Prepare results | |
| results = { | |
| "Financial Analysis": { | |
| "Key Metrics": metrics, | |
| "AI Insights": analysis, | |
| "Analysis Period": "2021-2025", | |
| "Note": "All monetary values in millions ($M)" | |
| } | |
| } | |
| return json.dumps(results, indent=2) | |
| except Exception as e: | |
| return f"Error in analysis: {str(e)}\n\nDetails: {type(e).__name__}" | |
| def create_interface(): | |
| analyzer = FinancialAnalyzer() | |
| iface = gr.Interface( | |
| fn=analyzer.analyze_financials, | |
| inputs=[ | |
| gr.File(label="Balance Sheet (Markdown)", type="filepath"), | |
| gr.File(label="Income Statement (Markdown)", type="filepath") | |
| ], | |
| outputs=gr.Textbox(label="Analysis Results", lines=25), | |
| title="Financial Statement Analyzer", | |
| description="Upload financial statements in Markdown format for AI-powered analysis" | |
| ) | |
| return iface | |
| if __name__ == "__main__": | |
| iface = create_interface() | |
| iface.launch() |