Spaces:
Build error
Build error
| import gradio as gr | |
| import pandas as pd | |
| import json | |
| from transformers import ( | |
| AutoTokenizer, | |
| AutoModelForCausalLM, | |
| AutoModelForSequenceClassification | |
| ) | |
| import torch | |
| import numpy as np | |
| import re | |
| class FinancialDataset: | |
| def __init__(self, texts, labels, tokenizer, max_length=512): | |
| self.texts = texts | |
| self.labels = labels | |
| self.tokenizer = tokenizer | |
| self.max_length = max_length | |
| def __len__(self): | |
| return len(self.texts) | |
| def __getitem__(self, idx): | |
| text = str(self.texts[idx]) | |
| inputs = self.tokenizer( | |
| text, | |
| truncation=True, | |
| padding='max_length', | |
| max_length=self.max_length, | |
| return_tensors='pt' | |
| ) | |
| return { | |
| 'input_ids': inputs['input_ids'].squeeze(), | |
| 'attention_mask': inputs['attention_mask'].squeeze(), | |
| 'labels': torch.tensor(self.labels[idx], dtype=torch.long) | |
| } | |
| class FinancialAnalyzer: | |
| def __init__(self): | |
| print("Initializing Analyzer...") | |
| self.last_metrics = {} | |
| self.initialize_models() | |
| print("Initialization complete!") | |
| def initialize_models(self): | |
| """Initialize TinyLlama model""" | |
| try: | |
| self.llama_tokenizer = AutoTokenizer.from_pretrained("TinyLlama/TinyLlama-1.1B-Chat-v1.0") | |
| self.llama_model = AutoModelForCausalLM.from_pretrained("TinyLlama/TinyLlama-1.1B-Chat-v1.0") | |
| self.llama_model.eval() | |
| print("Models loaded successfully!") | |
| except Exception as e: | |
| print(f"Error initializing models: {str(e)}") | |
| raise | |
| def clean_number(self, value): | |
| """Clean and convert numerical values""" | |
| try: | |
| if isinstance(value, str): | |
| value = value.replace('$', '').replace(',', '').strip() | |
| if '(' in value and ')' in value: | |
| value = '-' + value.replace('(', '').replace(')', '') | |
| return float(value or 0) | |
| except: | |
| return 0.0 | |
| def is_valid_markdown(self, file_path): | |
| """Check if a file is a valid Markdown file""" | |
| try: | |
| with open(file_path, 'r') as f: | |
| content = f.read() | |
| return any(line.startswith('#') or '|' in line for line in content.split('\n')) | |
| except: | |
| return False | |
| def parse_financial_data(self, content): | |
| """Parse markdown content into structured data""" | |
| try: | |
| data = {} | |
| current_section = "" | |
| current_table = [] | |
| headers = None | |
| for line in content.split('\n'): | |
| if line.startswith('#'): | |
| if current_table and headers: | |
| data[current_section] = self.process_table(headers, current_table) | |
| current_section = line.strip('# ') | |
| current_table = [] | |
| headers = None | |
| elif '|' in line: | |
| if '-|-' not in line: | |
| row = [cell.strip() for cell in line.split('|')[1:-1]] | |
| if not headers: | |
| headers = row | |
| else: | |
| current_table.append(row) | |
| if current_table and headers: | |
| data[current_section] = self.process_table(headers, current_table) | |
| return data | |
| except Exception as e: | |
| print(f"Error parsing financial data: {str(e)}") | |
| return {} | |
| def process_table(self, headers, rows): | |
| """Process table data into structured format""" | |
| try: | |
| processed_data = {} | |
| for row in rows: | |
| if len(row) == len(headers): | |
| item_name = row[0].strip('*').strip() | |
| processed_data[item_name] = {} | |
| for i, value in enumerate(row[1:], 1): | |
| processed_data[item_name][headers[i]] = self.clean_number(value) | |
| return processed_data | |
| except Exception as e: | |
| print(f"Error processing table: {str(e)}") | |
| return {} | |
| def get_nested_value(self, data, section, key, year): | |
| """Safely get nested dictionary value""" | |
| try: | |
| return data.get(section, {}).get(key, {}).get(str(year), 0) | |
| except: | |
| return 0 | |
| def calculate_metrics(self, income_data, balance_data): | |
| """Calculate all CFI standard financial metrics""" | |
| try: | |
| metrics = {} | |
| # 1. Gross Profit Margin Ratio | |
| # 1. Gross Profit Margin | |
| revenue = self.get_nested_value(income_data, "Revenue", "Total Net Revenue", "2025") | |
| cogs = self.get_nested_value(income_data, "Cost and Gross Profit", "Cost of Goods Sold", "2025") | |
| gross_profit = revenue - cogs | |
| metrics['gross_profit_margin'] = (gross_profit / revenue) * 100 if revenue != 0 else 0 | |
| # 2. Current Ratio | |
| current_assets = self.get_nested_value(balance_data, "Key Totals", "Total_Current_Assets", "2025") | |
| current_liabilities = self.get_nested_value(balance_data, "Key Totals", "Total_Current_Liabilities", "2025") | |
| metrics['current_ratio'] = current_assets / current_liabilities if current_liabilities != 0 else 0 | |
| # 3. Debt Ratio | |
| total_liabilities = self.get_nested_value(balance_data, "Key Totals", "Total_Liabilities", "2025") | |
| total_assets = self.get_nested_value(balance_data, "Key Totals", "Total_Assets", "2025") | |
| metrics['debt_ratio'] = (total_liabilities / total_assets) * 100 if total_assets != 0 else 0 | |
| # 4. Sustainable Growth Rate (SGR) | |
| net_income = self.get_nested_value(income_data, "Profit Summary", "Net Earnings", "2025") | |
| equity = self.get_nested_value(balance_data, "Key Totals", "Total_Shareholders_Equity", "2025") | |
| dividends = self.get_nested_value(income_data, "Profit Summary", "Dividends Paid", "2025") | |
| roe = (net_income / equity) * 100 if equity != 0 else 0 | |
| retention_ratio = (net_income - dividends) / net_income if net_income != 0 else 0 | |
| metrics['sgr'] = roe * retention_ratio / 100 if roe != 0 else 0 | |
| # 5. Accounts Receivable Turnover | |
| accounts_receivable = self.get_nested_value(balance_data, "Balance Sheet Data 2021-2025", "Accounts_Receivable", "2025") | |
| metrics['ar_turnover'] = revenue / accounts_receivable if accounts_receivable != 0 else 0 | |
| # 6. Return on Equity (ROE) | |
| metrics['roe'] = roe | |
| # 7. Net Profit Margin | |
| metrics['net_profit_margin'] = (net_income / revenue) * 100 if revenue != 0 else 0 | |
| # 8. Retained Earnings Ratio | |
| retained_earnings = self.get_nested_value(balance_data, "Balance Sheet Data 2021-2025", "Retained_Earnings", "2025") | |
| metrics['retained_earnings_ratio'] = (retained_earnings / total_assets) * 100 if total_assets != 0 else 0 | |
| # 9. Revenue Growth (YoY) | |
| revenue_2024 = self.get_nested_value(income_data, "Revenue", "Total Net Revenue", "2024") | |
| metrics['revenue_growth'] = ((revenue / revenue_2024) - 1) * 100 if revenue_2024 != 0 else 0 | |
| # 10. Revenue CAGR (2021-2025) | |
| revenue_2021 = self.get_nested_value(income_data, "Revenue", "Total Net Revenue", "2021") | |
| metrics['revenue_cagr'] = ((revenue / revenue_2021) ** (1 / 4) - 1) * 100 if revenue_2021 != 0 else 0 | |
| return metrics | |
| except Exception as e: | |
| print(f"Error calculating metrics: {e}") | |
| return {} | |
| def analyze_financials(self, balance_sheet_path, income_statement_path): | |
| try: | |
| # Validate markdown files | |
| if not self.is_valid_markdown(balance_sheet_path): | |
| return "Invalid Balance Sheet file format. Please upload a valid Markdown file." | |
| if not self.is_valid_markdown(income_statement_path): | |
| return "Invalid Income Statement file format. Please upload a valid Markdown file." | |
| # Read and parse files | |
| with open(balance_sheet_path, 'r') as f: | |
| balance_content = f.read() | |
| with open(income_statement_path, 'r') as f: | |
| income_content = f.read() | |
| balance_data = self.parse_financial_data(balance_content) | |
| income_data = self.parse_financial_data(income_content) | |
| # Calculate metrics | |
| metrics = self.calculate_metrics(income_data, balance_data) | |
| # Generate analysis | |
| return self.generate_analysis(metrics) | |
| except Exception as e: | |
| return f"Error analyzing financials: {e}" | |
| def generate_analysis(self, metrics): | |
| """Generate comprehensive analysis""" | |
| try: | |
| prompt = f"""[INST] As a financial analyst, provide a comprehensive analysis based on these metrics: | |
| 1. Profitability: | |
| - Gross Profit Margin: {metrics.get('gross_profit_margin', 0):.2f}% | |
| - Net Profit Margin: {metrics.get('net_profit_margin', 0):.2f}% | |
| - Return on Equity: {metrics.get('roe', 0):.2f}% | |
| 2. Liquidity & Efficiency: | |
| - Current Ratio: {metrics.get('current_ratio', 0):.2f} | |
| - Accounts Receivable Turnover: {metrics.get('ar_turnover', 0):.2f} | |
| 3. Financial Structure: | |
| - Debt Ratio: {metrics.get('debt_ratio', 0):.2f}% | |
| - Retained Earnings Ratio: {metrics.get('retained_earnings_ratio', 0):.2f}% | |
| 4. Growth: | |
| - Sustainable Growth Rate: {metrics.get('sgr', 0):.2f}% | |
| - Revenue Growth (YoY): {metrics.get('revenue_growth', 0):.2f}% | |
| Provide: | |
| 1. Overall financial health assessment | |
| 2. Key strengths and concerns | |
| 3. Operational efficiency analysis | |
| 4. Specific recommendations for improvement | |
| [/INST]""" | |
| inputs = self.llama_tokenizer(prompt, return_tensors="pt", truncation=True, max_length=2048) | |
| outputs = self.llama_model.generate( | |
| inputs["input_ids"], | |
| max_new_tokens=1024, | |
| min_new_tokens=200, | |
| temperature=0.7, | |
| top_p=0.95, | |
| repetition_penalty=1.2, | |
| length_penalty=1.5 | |
| ) | |
| analysis = self.llama_tokenizer.decode(outputs[0], skip_special_tokens=True) | |
| if len(analysis.split()) < 100: | |
| return self.generate_fallback_analysis(metrics) | |
| return analysis | |
| except Exception as e: | |
| print(f"Error generating analysis: {str(e)}") | |
| return self.generate_fallback_analysis(metrics) | |
| def generate_fallback_analysis(self, metrics): | |
| """Generate basic analysis when model fails""" | |
| try: | |
| analysis = f"""Financial Analysis Summary: | |
| 1. Profitability Assessment: | |
| - Gross Profit Margin: {metrics.get('gross_profit_margin', 0):.2f}% | |
| ({self.interpret_metric('gross_profit_margin', metrics.get('gross_profit_margin', 0))}) | |
| - Net Profit Margin: {metrics.get('net_profit_margin', 0):.2f}% | |
| ({self.interpret_metric('net_profit_margin', metrics.get('net_profit_margin', 0))}) | |
| - Return on Equity: {metrics.get('roe', 0):.2f}% | |
| ({self.interpret_metric('roe', metrics.get('roe', 0))}) | |
| 2. Liquidity & Efficiency Analysis: | |
| - Current Ratio: {metrics.get('current_ratio', 0):.2f} | |
| ({self.interpret_metric('current_ratio', metrics.get('current_ratio', 0))}) | |
| - AR Turnover: {metrics.get('ar_turnover', 0):.2f} | |
| ({self.interpret_metric('ar_turnover', metrics.get('ar_turnover', 0))}) | |
| 3. Financial Structure: | |
| - Debt Ratio: {metrics.get('debt_ratio', 0):.2f}% | |
| ({self.interpret_metric('debt_ratio', metrics.get('debt_ratio', 0))}) | |
| - Retained Earnings Ratio: {metrics.get('retained_earnings_ratio', 0):.2f}% | |
| ({self.interpret_metric('retained_earnings_ratio', metrics.get('retained_earnings_ratio', 0))}) | |
| 4. Growth & Sustainability: | |
| - Sustainable Growth Rate: {metrics.get('sgr', 0):.2f}% | |
| ({self.interpret_metric('sgr', metrics.get('sgr', 0))}) | |
| - Revenue Growth: {metrics.get('revenue_growth', 0):.2f}% | |
| ({self.interpret_metric('revenue_growth', metrics.get('revenue_growth', 0))}) | |
| {self.generate_recommendations(metrics)}""" | |
| return analysis | |
| except Exception as e: | |
| return f"Error generating fallback analysis: {str(e)}" | |
| def interpret_metric(self, metric_name, value): | |
| """Interpret individual metrics based on CFI standards""" | |
| interpretations = { | |
| 'gross_profit_margin': lambda x: 'Strong' if x > 40 else 'Adequate' if x > 30 else 'Needs improvement', | |
| 'current_ratio': lambda x: 'Strong' if x > 2 else 'Adequate' if x > 1 else 'Concerning', | |
| 'debt_ratio': lambda x: 'Conservative' if x < 40 else 'Moderate' if x < 60 else 'High risk', | |
| 'ar_turnover': lambda x: 'Excellent' if x > 8 else 'Good' if x > 4 else 'Needs improvement', | |
| 'roe': lambda x: 'Strong' if x > 15 else 'Adequate' if x > 10 else 'Below target', | |
| 'net_profit_margin': lambda x: 'Strong' if x > 10 else 'Adequate' if x > 5 else 'Needs improvement', | |
| 'retained_earnings_ratio': lambda x: 'Strong' if x > 30 else 'Adequate' if x > 15 else 'Low retention', | |
| 'sgr': lambda x: 'Strong' if x > 10 else 'Moderate' if x > 5 else 'Limited growth potential', | |
| 'revenue_growth': lambda x: 'Strong' if x > 10 else 'Moderate' if x > 5 else 'Below industry average' | |
| } | |
| try: | |
| return interpretations.get(metric_name, lambda x: 'No interpretation')(value) | |
| except: | |
| return 'Unable to interpret' | |
| def generate_recommendations(self, metrics): | |
| """Generate specific recommendations based on metrics""" | |
| recommendations = [] | |
| if metrics.get('gross_profit_margin', 0) < 30: | |
| recommendations.append("- Review pricing strategy and cost structure to improve gross margins") | |
| if metrics.get('current_ratio', 0) < 1.5: | |
| recommendations.append("- Strengthen working capital management to improve liquidity") | |
| if metrics.get('debt_ratio', 0) > 60: | |
| recommendations.append("- Consider debt reduction strategies to improve financial flexibility") | |
| if metrics.get('ar_turnover', 0) < 4: | |
| recommendations.append("- Improve accounts receivable collection practices") | |
| if metrics.get('roe', 0) < 10: | |
| recommendations.append("- Focus on improving operational efficiency to enhance returns") | |
| if metrics.get('revenue_growth', 0) < 5: | |
| recommendations.append("- Develop strategies to accelerate revenue growth") | |
| recommendations.append("- Consider strategic acquisitions or new market entry") | |
| return "Key Recommendations:\n" + "\n".join(recommendations) | |
| def analyze_financials(self, balance_sheet_file, income_stmt_file): | |
| """Main analysis function""" | |
| try: | |
| # Validate input files | |
| if not (self.is_valid_markdown(balance_sheet_file) and self.is_valid_markdown(income_stmt_file)): | |
| return "Error: One or both files are invalid or not in Markdown format." | |
| # Read files | |
| with open(balance_sheet_file, 'r') as f: | |
| balance_sheet = f.read() | |
| with open(income_stmt_file, 'r') as f: | |
| income_stmt = f.read() | |
| # Process financial data | |
| income_data = self.parse_financial_data(income_stmt) | |
| balance_data = self.parse_financial_data(balance_sheet) | |
| # Calculate metrics | |
| metrics = self.calculate_metrics(income_data, balance_data) | |
| self.last_metrics = metrics | |
| # Generate analysis | |
| analysis = self.generate_analysis(metrics) | |
| # Prepare final results | |
| results = { | |
| "Financial Analysis": { | |
| "Key Metrics": { | |
| "Profitability": { | |
| "Gross Profit Margin": f"{metrics['gross_profit_margin']:.2f}%", | |
| "Net Profit Margin": f"{metrics['net_profit_margin']:.2f}%", | |
| "Return on Equity": f"{metrics['roe']:.2f}%" | |
| }, | |
| "Liquidity": { | |
| "Current Ratio": f"{metrics['current_ratio']:.2f}", | |
| "Accounts Receivable Turnover": f"{metrics['ar_turnover']:.2f}" | |
| }, | |
| "Solvency": { | |
| "Debt Ratio": f"{metrics['debt_ratio']:.2f}%", | |
| "Retained Earnings Ratio": f"{metrics['retained_earnings_ratio']:.2f}%" | |
| }, | |
| "Growth": { | |
| "Sustainable Growth Rate": f"{metrics['sgr']:.2f}%", | |
| "Revenue Growth (YoY)": f"{metrics['revenue_growth']:.2f}%" | |
| } | |
| }, | |
| "Analysis": analysis, | |
| "Analysis Period": "2021-2025", | |
| "Note": "Analysis based on CFI standards" | |
| } | |
| } | |
| return json.dumps(results, indent=2) | |
| except Exception as e: | |
| return f"Error in analysis: {str(e)}\n\nDetails: {type(e).__name__}" | |
| def fine_tune_models(self, train_texts, train_labels, epochs=3): | |
| """Fine-tune the model with custom data""" | |
| try: | |
| # Prepare dataset | |
| train_dataset = FinancialDataset(train_texts, train_labels, self.llama_tokenizer) | |
| # Training arguments | |
| training_args = TrainingArguments( | |
| output_dir="./financial_model_tuned", | |
| num_train_epochs=epochs, | |
| per_device_train_batch_size=4, | |
| logging_dir="./logs", | |
| logging_steps=10, | |
| save_steps=50, | |
| eval_steps=50, | |
| learning_rate=2e-5, | |
| weight_decay=0.01, | |
| warmup_steps=500 | |
| ) | |
| # Initialize trainer | |
| trainer = Trainer( | |
| model=self.llama_model, | |
| args=training_args, | |
| train_dataset=train_dataset | |
| ) | |
| # Fine-tune the model | |
| trainer.train() | |
| # Save the fine-tuned model | |
| self.llama_model.save_pretrained("./financial_model_tuned") | |
| self.llama_tokenizer.save_pretrained("./financial_model_tuned") | |
| print("Fine-tuning completed successfully!") | |
| except Exception as e: | |
| print(f"Error in fine-tuning: {str(e)}") | |
| def create_interface(): | |
| analyzer = FinancialAnalyzer() | |
| iface = gr.Interface( | |
| fn=analyzer.analyze_financials, | |
| inputs=[ | |
| gr.File(label="Balance Sheet (Markdown)", type="filepath"), | |
| gr.File(label="Income Statement (Markdown)", type="filepath") | |
| ], | |
| outputs=gr.Textbox(label="Analysis Results", lines=25), | |
| title="AI Financial Statement Analyzer", | |
| description="""Upload financial statements in Markdown format for AI-powered analysis. | |
| Analysis is based on Corporate Finance Institute (CFI) standards.""", | |
| cache_examples=False | |
| ) | |
| return iface | |
| if __name__ == "__main__": | |
| iface = create_interface() | |
| iface.launch() |