Spaces:
Sleeping
Sleeping
| import pandas as pd | |
| import numpy as np | |
| import gradio as gr | |
| from datetime import datetime | |
| import plotly.express as px | |
| import plotly.graph_objects as go | |
| from PIL import Image | |
| import pytesseract | |
| import io | |
| import json | |
| import cv2 | |
| import os | |
| import numpy as np | |
| class DocumentProcessor: | |
| def __init__(self): | |
| self.upload_folder = "uploaded_documents" | |
| os.makedirs(self.upload_folder, exist_ok=True) | |
| def process_image(self, image): | |
| try: | |
| if image is None: | |
| return "No image uploaded", None | |
| # Convert gradio image input to CV2 format | |
| if isinstance(image, np.ndarray): | |
| img_array = image | |
| else: | |
| img_array = np.array(image) | |
| # Convert to grayscale if the image is in color | |
| if len(img_array.shape) == 3: | |
| gray = cv2.cvtColor(img_array, cv2.COLOR_BGR2GRAY) | |
| else: | |
| gray = img_array | |
| # Image preprocessing | |
| gray = cv2.convertScaleAbs(gray, alpha=1.5, beta=0) | |
| _, threshold = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU) | |
| # Perform OCR | |
| text = pytesseract.image_to_string(threshold) | |
| # Parse the extracted text | |
| parsed_data = self.parse_text(text) | |
| return f"Document processed successfully!\n\nExtracted Text:\n{text}", parsed_data | |
| except Exception as e: | |
| return f"Error processing document: {str(e)}", None | |
| def parse_text(self, text): | |
| lines = text.split('\n') | |
| parsed_data = { | |
| 'raw_text': text, | |
| 'line_count': len(lines), | |
| 'processed_date': datetime.now().strftime("%Y-%m-%d %H:%M:%S"), | |
| 'extracted_lines': [line for line in lines if line.strip()] | |
| } | |
| return parsed_data | |
| class BusinessManagementSystem: | |
| def __init__(self): | |
| self.doc_processor = DocumentProcessor() | |
| self.load_data() | |
| def load_data(self): | |
| try: | |
| self.bank_data = pd.read_csv('bank_statements.csv') | |
| self.marketing_data = pd.read_csv('marketing_data.csv') | |
| self.account_data = pd.read_csv('account_data.csv') | |
| self.invoices = pd.read_csv('invoices.csv') | |
| except FileNotFoundError: | |
| print("CSV files not found. Using mock data...") | |
| self.bank_data = self.mock_bank_data() | |
| self.marketing_data = self.mock_marketing_data() | |
| def mock_bank_data(self): | |
| return pd.DataFrame({ | |
| 'date': pd.date_range(start='2024-01-01', periods=10), | |
| 'transaction': [f'Transaction {i}' for i in range(10)], | |
| 'amount': np.random.randint(1000, 10000, 10) | |
| }) | |
| def mock_marketing_data(self): | |
| return pd.DataFrame({ | |
| 'campaign': [f'Campaign {i}' for i in range(5)], | |
| 'clicks': np.random.randint(100, 1000, 5), | |
| 'conversions': np.random.randint(10, 100, 5) | |
| }) | |
| def process_document(self, image): | |
| return self.doc_processor.process_image(image) | |
| def generate_bank_report(self): | |
| try: | |
| fig = go.Figure() | |
| fig.add_trace(go.Scatter( | |
| x=self.bank_data['date'], | |
| y=self.bank_data['amount'], | |
| mode='lines+markers', | |
| name='Transactions' | |
| )) | |
| fig.update_layout( | |
| title='Bank Transaction History', | |
| xaxis_title='Date', | |
| yaxis_title='Amount ($)' | |
| ) | |
| total_transactions = len(self.bank_data) | |
| total_amount = self.bank_data['amount'].sum() | |
| avg_transaction = self.bank_data['amount'].mean() | |
| summary = f""" | |
| Banking Summary: | |
| Total Transactions: {total_transactions} | |
| Total Amount: ${total_amount:,.2f} | |
| Average Transaction: ${avg_transaction:,.2f} | |
| """ | |
| return fig, summary | |
| except Exception as e: | |
| return None, f"Error generating bank report: {str(e)}" | |
| def analyze_marketing(self): | |
| try: | |
| self.marketing_data['conversion_rate'] = ( | |
| self.marketing_data['conversions'] / self.marketing_data['clicks'] * 100 | |
| ) | |
| fig = px.bar( | |
| self.marketing_data, | |
| x='campaign', | |
| y=['clicks', 'conversions'], | |
| title='Campaign Performance', | |
| barmode='group' | |
| ) | |
| summary = f""" | |
| Marketing Summary: | |
| Total Campaigns: {len(self.marketing_data)} | |
| Total Clicks: {self.marketing_data['clicks'].sum():,} | |
| Total Conversions: {self.marketing_data['conversions'].sum():,} | |
| Average Conversion Rate: {self.marketing_data['conversion_rate'].mean():.2f}% | |
| """ | |
| return fig, summary | |
| except Exception as e: | |
| return None, f"Error analyzing marketing data: {str(e)}" | |
| def create_gradio_interface(): | |
| bms = BusinessManagementSystem() | |
| with gr.Blocks(theme=gr.themes.Soft()) as interface: | |
| gr.Markdown(""" | |
| # AI-Driven Business Management System | |
| Upload documents, analyze banking data, and track marketing campaigns. | |
| """) | |
| with gr.Tabs(): | |
| # Document Processing Tab | |
| with gr.Tab("Document Processing"): | |
| gr.Markdown(""" | |
| ### Upload and Process Documents | |
| Support for PNG, JPG, and PDF files. The system will extract text and data from the documents. | |
| """) | |
| with gr.Row(): | |
| with gr.Column(): | |
| doc_input = gr.Image( | |
| label="Upload Document", | |
| type="numpy" | |
| ) | |
| process_btn = gr.Button("Process Document", variant="primary") | |
| with gr.Column(): | |
| doc_output = gr.Textbox( | |
| label="Processing Results", | |
| lines=10 | |
| ) | |
| json_output = gr.JSON( | |
| label="Extracted Data" | |
| ) | |
| process_btn.click( | |
| fn=bms.process_document, | |
| inputs=[doc_input], | |
| outputs=[doc_output, json_output] | |
| ) | |
| # Banking Tab | |
| with gr.Tab("Banking"): | |
| gr.Markdown("### Banking Analysis") | |
| bank_btn = gr.Button("Generate Bank Report", variant="primary") | |
| bank_plot = gr.Plot(label="Transaction History") | |
| bank_summary = gr.Textbox( | |
| label="Banking Summary", | |
| lines=5 | |
| ) | |
| bank_btn.click( | |
| fn=bms.generate_bank_report, | |
| inputs=[], | |
| outputs=[bank_plot, bank_summary] | |
| ) | |
| # Marketing Tab | |
| with gr.Tab("Marketing"): | |
| gr.Markdown("### Marketing Campaign Analysis") | |
| marketing_btn = gr.Button("Analyze Marketing Campaigns", variant="primary") | |
| marketing_plot = gr.Plot(label="Campaign Performance") | |
| marketing_summary = gr.Textbox( | |
| label="Marketing Summary", | |
| lines=5 | |
| ) | |
| marketing_btn.click( | |
| fn=bms.analyze_marketing, | |
| inputs=[], | |
| outputs=[marketing_plot, marketing_summary] | |
| ) | |
| return interface | |
| # For Google Colab, first run these installations | |
| #!pip install -q pytesseract opencv-python | |
| #!apt-get install -y tesseract-ocr > /dev/null 2>&1 | |
| # Launch the interface | |
| if __name__ == "__main__": | |
| interface = create_gradio_interface() | |
| interface.launch(share=True) |