CoderNoah commited on
Commit
8b7e8f0
·
0 Parent(s):

Initial commit

Browse files
.dockerignore ADDED
@@ -0,0 +1,75 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Python cache files
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+ *.so
6
+ .Python
7
+ env/
8
+ build/
9
+ develop-eggs/
10
+ dist/
11
+ downloads/
12
+ eggs/
13
+ .eggs/
14
+ lib/
15
+ lib64/
16
+ parts/
17
+ sdist/
18
+ var/
19
+ wheels/
20
+ *.egg-info/
21
+ .installed.cfg
22
+ *.egg
23
+
24
+ # Virtual environments
25
+ .venv/
26
+ venv/
27
+ ENV/
28
+ env/
29
+
30
+ # IDE files
31
+ .vscode/
32
+ .idea/
33
+ *.swp
34
+ *.swo
35
+ *~
36
+
37
+ # OS files
38
+ .DS_Store
39
+ Thumbs.db
40
+
41
+ # Git files
42
+ .git/
43
+ .gitignore
44
+
45
+ # Project specific
46
+ data/chroma_db/
47
+ data/*.log
48
+ uploads/*
49
+ !uploads/.gitkeep
50
+
51
+ # Environment files
52
+ .env
53
+
54
+ # Documentation
55
+ *.md
56
+ !README.md
57
+
58
+ # Lock files
59
+ uv.lock
60
+ poetry.lock
61
+ Pipfile.lock
62
+
63
+ # Testing
64
+ .pytest_cache/
65
+ .coverage
66
+ htmlcov/
67
+
68
+ # Jupyter notebooks
69
+ *.ipynb
70
+ .ipynb_checkpoints/
71
+
72
+ # Local development
73
+ local/
74
+ tmp/
75
+ temp/
.env.example ADDED
@@ -0,0 +1,92 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # =============================================================================
2
+ # LEGA.AI CONFIGURATION TEMPLATE
3
+ # =============================================================================
4
+ # Copy this file to .env and update the values below
5
+ # This is a template file - safe to commit to version control
6
+
7
+ # =============================================================================
8
+ # GOOGLE AI API CONFIGURATION (REQUIRED)
9
+ # =============================================================================
10
+ # Get your API key from: https://aistudio.google.com/
11
+ GOOGLE_API_KEY=your-google-api-key-here
12
+
13
+ # =============================================================================
14
+ # APPLICATION SETTINGS
15
+ # =============================================================================
16
+ DEBUG=True
17
+ LOG_LEVEL=INFO
18
+
19
+ # Streamlit server configuration
20
+ STREAMLIT_SERVER_PORT=8501
21
+ STREAMLIT_SERVER_ADDRESS=localhost
22
+
23
+ # File upload settings
24
+ MAX_FILE_SIZE_MB=10
25
+ SUPPORTED_FILE_TYPES=pdf,docx,txt
26
+
27
+ # =============================================================================
28
+ # AI MODEL SETTINGS
29
+ # =============================================================================
30
+ TEMPERATURE=0.2
31
+ MAX_TOKENS=2048
32
+ EMBEDDING_MODEL=models/text-embedding-004
33
+
34
+ # =============================================================================
35
+ # VECTOR STORE CONFIGURATION
36
+ # =============================================================================
37
+ CHROMA_PERSIST_DIRECTORY=./data/chroma_db
38
+
39
+ # =============================================================================
40
+ # STORAGE CONFIGURATION
41
+ # =============================================================================
42
+ UPLOAD_DIR=./uploads
43
+ DATA_DIR=./data
44
+ LOG_FILE=./data/app.log
45
+
46
+ # =============================================================================
47
+ # SECURITY SETTINGS
48
+ # =============================================================================
49
+ SECRET_KEY=your-secret-key-here
50
+ SESSION_TIMEOUT_MINUTES=60
51
+
52
+ # =============================================================================
53
+ # DATABASE CONFIGURATION
54
+ # =============================================================================
55
+ DATABASE_URL=sqlite:///./data/lega.db
56
+
57
+ # =============================================================================
58
+ # PERFORMANCE SETTINGS
59
+ # =============================================================================
60
+ MAX_CONCURRENT_UPLOADS=5
61
+ DOCUMENT_PROCESSING_TIMEOUT=300
62
+ ENABLE_CACHE=True
63
+ CACHE_TTL_SECONDS=3600
64
+
65
+ # =============================================================================
66
+ # FEATURE FLAGS
67
+ # =============================================================================
68
+ ENABLE_DOCUMENT_LIBRARY=True
69
+ ENABLE_QA_ASSISTANT=True
70
+ ENABLE_MARKET_COMPARISON=True
71
+ ENABLE_TIMELINE_TRACKER=True
72
+ ENABLE_EXPORT_FEATURES=True
73
+
74
+ # =============================================================================
75
+ # REGIONAL SETTINGS
76
+ # =============================================================================
77
+ DEFAULT_REGION=India
78
+ DEFAULT_CURRENCY=INR
79
+ TIMEZONE=Asia/Kolkata
80
+
81
+ # =============================================================================
82
+ # ADVANCED AI SETTINGS
83
+ # =============================================================================
84
+ RISK_SENSITIVITY=3
85
+ SIMPLIFICATION_LEVEL=intermediate
86
+ MAX_RISK_FACTORS=10
87
+
88
+ # =============================================================================
89
+ # API RATE LIMITING
90
+ # =============================================================================
91
+ API_REQUESTS_PER_MINUTE=60
92
+ API_REQUESTS_PER_DAY=1000
.env.hf ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # =============================================================================
2
+ # HUGGING FACE SPACES CONFIGURATION
3
+ # =============================================================================
4
+ # This file contains default configuration for Hugging Face Spaces deployment
5
+ # Set these environment variables in your Hugging Face Space settings
6
+
7
+ # =============================================================================
8
+ # REQUIRED: GOOGLE AI API CONFIGURATION
9
+ # =============================================================================
10
+ # Get your API key from: https://aistudio.google.com/
11
+ # Set this in Hugging Face Spaces under Settings -> Variables
12
+ GOOGLE_API_KEY=
13
+
14
+ # =============================================================================
15
+ # HUGGING FACE SPACES SETTINGS (Automatically configured)
16
+ # =============================================================================
17
+ # These are set automatically by the startup script
18
+ PORT=7860
19
+ STREAMLIT_SERVER_PORT=7860
20
+ STREAMLIT_SERVER_ADDRESS=0.0.0.0
21
+
22
+ # =============================================================================
23
+ # APPLICATION SETTINGS FOR PRODUCTION
24
+ # =============================================================================
25
+ DEBUG=False
26
+ LOG_LEVEL=INFO
27
+
28
+ # File upload settings (conservative for cloud deployment)
29
+ MAX_FILE_SIZE_MB=5
30
+ SUPPORTED_FILE_TYPES=pdf,docx,txt
31
+
32
+ # =============================================================================
33
+ # AI MODEL SETTINGS (Optimized for Hugging Face)
34
+ # =============================================================================
35
+ TEMPERATURE=0.2
36
+ MAX_TOKENS=2048
37
+ EMBEDDING_MODEL=models/text-embedding-004
38
+
39
+ # =============================================================================
40
+ # STORAGE CONFIGURATION (Cloud optimized)
41
+ # =============================================================================
42
+ CHROMA_PERSIST_DIRECTORY=./data/chroma_db
43
+ UPLOAD_DIR=./uploads
44
+ DATA_DIR=./data
45
+ LOG_FILE=./data/app.log
46
+
47
+ # =============================================================================
48
+ # SECURITY SETTINGS FOR CLOUD DEPLOYMENT
49
+ # =============================================================================
50
+ SECRET_KEY=huggingface-lega-ai-deployment
51
+ SESSION_TIMEOUT_MINUTES=30
.gitignore ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Python-generated files
2
+ __pycache__/
3
+ *.py[oc]
4
+ build/
5
+ dist/
6
+ wheels/
7
+ *.egg-info
8
+
9
+ # Virtual environments
10
+ .venv
11
+
12
+ # Environment variables (contains secrets)
13
+ .env
14
+
15
+ # Local data and uploads
16
+ data/
17
+ uploads/
18
+ *.log
19
+
20
+ # Temporary files
21
+ temp/
22
+ tmp/
23
+ .tmp
24
+
25
+ # IDE files
26
+ .vscode/
27
+ .idea/
28
+ *.swp
29
+ *.swo
30
+
31
+ # OS files
32
+ Thumbs.db
.python-version ADDED
@@ -0,0 +1 @@
 
 
1
+ 3.13
.streamlit/config.toml ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [server]
2
+ port = 7860
3
+ address = "0.0.0.0"
4
+ headless = true
5
+ fileWatcherType = "none"
6
+ enableCORS = false
7
+ enableXsrfProtection = false
8
+
9
+ [browser]
10
+ gatherUsageStats = false
11
+
12
+ [global]
13
+ dataFrameSerialization = "legacy"
14
+
15
+ [client]
16
+ caching = false
17
+ displayEnabled = false
Dockerfile ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Use Python 3.11 as Hugging Face Spaces supports it well
2
+ FROM python:3.11-slim
3
+
4
+ # Set working directory
5
+ WORKDIR /app
6
+
7
+ # Set environment variables for Python
8
+ ENV PYTHONUNBUFFERED=1 \
9
+ PYTHONDONTWRITEBYTECODE=1 \
10
+ PIP_NO_CACHE_DIR=1 \
11
+ PIP_DISABLE_PIP_VERSION_CHECK=1
12
+
13
+ # Install system dependencies
14
+ RUN apt-get update && apt-get install -y \
15
+ build-essential \
16
+ curl \
17
+ git \
18
+ && rm -rf /var/lib/apt/lists/*
19
+
20
+ # Copy requirements first for better layer caching
21
+ COPY requirements.txt .
22
+
23
+ # Install Python dependencies
24
+ RUN pip install --no-cache-dir -r requirements.txt
25
+
26
+ # Copy the application code
27
+ COPY . .
28
+
29
+ # Create necessary directories
30
+ RUN mkdir -p data/chroma_db uploads data .streamlit && \
31
+ touch data/app.log && \
32
+ chmod -R 777 data uploads .streamlit
33
+
34
+ # Expose the port that Streamlit runs on
35
+ EXPOSE 7860
36
+
37
+ # Set the default port for Hugging Face Spaces
38
+ ENV STREAMLIT_SERVER_PORT=7860
39
+ ENV STREAMLIT_SERVER_ADDRESS=0.0.0.0
40
+ ENV STREAMLIT_CONFIG_DIR=/app/.streamlit
41
+ ENV XDG_CONFIG_HOME=/app
42
+
43
+ # Health check
44
+ HEALTHCHECK CMD curl --fail http://localhost:7860/_stcore/health
45
+
46
+ # Command to run the application
47
+ CMD ["streamlit", "run", "main.py", "--server.port=7860", "--server.address=0.0.0.0", "--server.headless=true", "--server.fileWatcherType=none"]
HUGGINGFACE_DEPLOYMENT.md ADDED
@@ -0,0 +1,83 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Hugging Face Spaces Deployment Guide for Lega.AI
2
+
3
+ ## 🚀 Quick Deployment to Hugging Face Spaces
4
+
5
+ ### Step 1: Create a New Space
6
+
7
+ 1. Go to [Hugging Face Spaces](https://huggingface.co/spaces)
8
+ 2. Click "Create new Space"
9
+ 3. Choose:
10
+ - **Space name**: `lega-ai` (or your preferred name)
11
+ - **License**: `MIT`
12
+ - **SDK**: `Docker`
13
+ - **Hardware**: `CPU basic` (sufficient for this app)
14
+
15
+ ### Step 2: Upload the Code
16
+
17
+ 1. Clone or download this repository
18
+ 2. Upload all files to your Hugging Face Space repository
19
+ 3. Ensure the `README.md` has the correct frontmatter:
20
+ ```yaml
21
+ ---
22
+ title: Lega.AI
23
+ emoji: ⚖️
24
+ colorFrom: pink
25
+ colorTo: indigo
26
+ sdk: docker
27
+ pinned: false
28
+ ---
29
+ ```
30
+
31
+ ### Step 3: Configure Environment Variables
32
+
33
+ 1. In your Space, go to **Settings** → **Variables**
34
+ 2. Add the required environment variable:
35
+ - **Name**: `GOOGLE_API_KEY`
36
+ - **Value**: Your Google AI API key from [Google AI Studio](https://aistudio.google.com/)
37
+
38
+ ### Step 4: Deploy
39
+
40
+ 1. Commit and push your changes to the Space repository
41
+ 2. Hugging Face will automatically build and deploy your Docker container
42
+ 3. Wait for the build to complete (usually 5-10 minutes)
43
+ 4. Your app will be available at `https://huggingface.co/spaces/[username]/[space-name]`
44
+
45
+ ## 🔧 Customization Options
46
+
47
+ ### Environment Variables You Can Set:
48
+
49
+ - `GOOGLE_API_KEY` (required)
50
+ - `MAX_FILE_SIZE_MB` (default: 5)
51
+ - `TEMPERATURE` (default: 0.2)
52
+ - `LOG_LEVEL` (default: INFO)
53
+
54
+ ### Hardware Requirements:
55
+
56
+ - **CPU Basic**: Sufficient for most use cases
57
+ - **CPU Upgrade**: Recommended for heavy usage
58
+ - **GPU**: Not required for this application
59
+
60
+ ## 📋 Troubleshooting
61
+
62
+ ### Common Issues:
63
+
64
+ 1. **Build fails**: Check that all files are uploaded correctly
65
+ 2. **API errors**: Ensure `GOOGLE_API_KEY` is set correctly
66
+ 3. **Timeout**: Consider upgrading to CPU Upgrade hardware
67
+
68
+ ### Logs:
69
+
70
+ - Check the Space logs in the Hugging Face interface
71
+ - Look for startup messages and error information
72
+
73
+ ## 🔒 Security Considerations
74
+
75
+ - Never commit your API key to the repository
76
+ - Use Hugging Face Spaces environment variables for sensitive data
77
+ - The application runs in a sandboxed environment on Hugging Face
78
+
79
+ ## 📊 Usage Limits
80
+
81
+ - Hugging Face Spaces has usage limits for free tiers
82
+ - Consider upgrading for production use
83
+ - Monitor usage in your Hugging Face account dashboard
README.md ADDED
@@ -0,0 +1,484 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: Lega.AI
3
+ emoji: ⚖️
4
+ colorFrom: pink
5
+ colorTo: indigo
6
+ sdk: docker
7
+ pinned: false
8
+ ---
9
+
10
+ # Lega.AI
11
+
12
+ AI-powered legal document analysis and simplification platform that makes complex legal documents accessible to everyone.
13
+
14
+ ![Python](https://img.shields.io/badge/Python-3.13+-blue.svg)
15
+ ![Streamlit](https://img.shields.io/badge/Streamlit-1.49+-red.svg)
16
+ ![LangChain](https://img.shields.io/badge/LangChain-0.3+-green.svg)
17
+ ![License](https://img.shields.io/badge/License-MIT-yellow.svg)
18
+
19
+ ## 📋 Table of Contents
20
+
21
+ - [🚀 Features](#-features)
22
+ - [🛠️ Tech Stack](#️-tech-stack)
23
+ - [📋 Prerequisites](#-prerequisites)
24
+ - [🚀 Quick Start](#-quick-start)
25
+ - [🐳 Docker Deployment](#-docker-deployment)
26
+ - [📁 Project Structure](#-project-structure)
27
+ - [🎯 Usage Guide](#-usage-guide)
28
+ - [📄 Sample Documents](#-sample-documents)
29
+ - [🚨 Document Types Supported](#-document-types-supported)
30
+ - [⚡ Key Features Deep Dive](#-key-features-deep-dive)
31
+ - [🔧 Configuration Options](#-configuration-options)
32
+ - [🔒 Privacy & Security](#-privacy--security)
33
+ - [🤝 Contributing](#-contributing)
34
+ - [🆘 Support](#-support)
35
+ - [🎯 Roadmap](#-roadmap)
36
+
37
+ ## 🚀 Features
38
+
39
+ - **🔍 Advanced Document Analysis**: Upload PDF/DOCX/TXT files and get comprehensive AI-powered analysis using Google's Gemini
40
+ - **📝 Plain Language Translation**: Convert complex legal jargon into clear, understandable language with context-aware explanations
41
+ - **⚠️ Intelligent Risk Assessment**: Multi-dimensional risk scoring with color-coded severity levels and detailed explanations
42
+ - **💬 Interactive Q&A Assistant**: Ask specific questions about your documents and get instant, context-aware AI responses
43
+ - **🎯 Smart Clause Highlighting**: Visual highlighting of risky clauses with interactive tooltips and improvement suggestions
44
+ - **📊 Vector-Powered Similarity Search**: Find similar clauses across documents using Chroma vector database
45
+ - **📚 Persistent Document Library**: Organize, search, and manage all analyzed documents with metadata
46
+ - **⚠️ Risk Visualization**: Interactive charts and gauges showing risk distribution and severity
47
+ - **🗓️ Key Information Extraction**: Automatically identify important dates, deadlines, and financial terms
48
+ - **💾 Local Data Persistence**: Secure local storage of analysis results and vector embeddings
49
+ - **🎨 Modern UI/UX**: Responsive Streamlit interface with custom CSS and intuitive navigation
50
+
51
+ ## 🛠️ Tech Stack
52
+
53
+ - **Frontend**: Streamlit with multi-page navigation and custom CSS styling
54
+ - **AI/ML**: LangChain + Google Generative AI (Gemini Pro)
55
+ - **Embeddings**: Google Generative AI Embeddings (models/text-embedding-004)
56
+ - **Vector Store**: Chroma for document similarity search and persistence
57
+ - **Document Processing**: PyPDF for PDF extraction, python-docx for Word documents
58
+ - **Package Management**: UV (modern Python package manager)
59
+ - **Configuration**: Python-dotenv for environment management
60
+ - **Visualization**: Plotly for interactive charts and analytics
61
+ - **UI Components**: Streamlit-option-menu for enhanced navigation
62
+
63
+ ## 📋 Prerequisites
64
+
65
+ - Python 3.13+ (required for latest features and performance)
66
+ - Google AI API key (get from [Google AI Studio](https://aistudio.google.com/))
67
+ - UV package manager (recommended for fast, reliable dependency management)
68
+
69
+ ## 🚀 Quick Start
70
+
71
+ ### 1. **Clone and navigate to the project**:
72
+
73
+ ```bash
74
+ git clone <repository-url>
75
+ cd Lega.AI
76
+ ```
77
+
78
+ ### 2. **Install UV (if not already installed)**:
79
+
80
+ ```bash
81
+ # On macOS/Linux
82
+ curl -LsSf https://astral.sh/uv/install.sh | sh
83
+
84
+ # On Windows (PowerShell)
85
+ powershell -c "irm https://astral.sh/uv/install.ps1 | iex"
86
+
87
+ # Or using pip
88
+ pip install uv
89
+ ```
90
+
91
+ ### 3. **Set up environment and install dependencies**:
92
+
93
+ ```bash
94
+ # Create and activate virtual environment with dependencies
95
+ uv sync
96
+
97
+ # Or if you prefer traditional approach:
98
+ # uv venv
99
+ # source .venv/bin/activate # On Windows: .venv\Scripts\activate
100
+ # uv pip install -r pyproject.toml
101
+ ```
102
+
103
+ ### 4. **Configure environment**:
104
+
105
+ ```bash
106
+ # Copy the template file
107
+ cp .env.example .env
108
+
109
+ # Edit .env file and update the following required settings:
110
+ ```
111
+
112
+ **Required Configuration:**
113
+
114
+ ```env
115
+ # Get your API key from: https://aistudio.google.com/
116
+ GOOGLE_API_KEY=your-google-api-key-here
117
+ ```
118
+
119
+ **Optional Configuration (with sensible defaults):**
120
+
121
+ ```env
122
+ # Application Settings
123
+ DEBUG=True
124
+ LOG_LEVEL=INFO
125
+ STREAMLIT_SERVER_PORT=8501
126
+ STREAMLIT_SERVER_ADDRESS=localhost
127
+
128
+ # File Upload Settings
129
+ MAX_FILE_SIZE_MB=10
130
+ SUPPORTED_FILE_TYPES=pdf,docx,txt
131
+
132
+ # AI Model Settings
133
+ TEMPERATURE=0.2
134
+ MAX_TOKENS=2048
135
+ EMBEDDING_MODEL=models/text-embedding-004
136
+
137
+ # Storage Configuration
138
+ CHROMA_PERSIST_DIRECTORY=./data/chroma_db
139
+ UPLOAD_DIR=./uploads
140
+ DATA_DIR=./data
141
+ LOG_FILE=./data/app.log
142
+
143
+ # Security Settings
144
+ SECRET_KEY=your-secret-key-here
145
+ SESSION_TIMEOUT_MINUTES=60
146
+ ```
147
+
148
+ ### 5. **Run the application**:
149
+
150
+ ```bash
151
+ # If using UV (recommended)
152
+ uv run streamlit run main.py
153
+
154
+ # Or with activated virtual environment
155
+ streamlit run main.py
156
+ ```
157
+
158
+ ### 6. **Open your browser** to `http://localhost:8501`
159
+
160
+ ### 🎯 Try the Demo
161
+
162
+ Once running, you can immediately test the application with the included sample documents:
163
+
164
+ - Navigate to **📄 Upload** page
165
+ - Try the sample documents: Employment contracts, NDAs, Lease agreements, Service agreements
166
+ - Experience the full analysis workflow without needing your own documents
167
+
168
+ ## 🐳 Docker Deployment
169
+
170
+ ### Local Docker Deployment
171
+
172
+ ```bash
173
+ # Build the Docker image
174
+ docker build -t lega-ai .
175
+
176
+ # Run the container
177
+ docker run -p 7860:7860 -e GOOGLE_API_KEY=your_api_key_here lega-ai
178
+ ```
179
+
180
+ ### Hugging Face Spaces Deployment
181
+
182
+ Deploy Lega.AI to Hugging Face Spaces with one click!
183
+
184
+ [![Deploy to Hugging Face Spaces](https://huggingface.co/datasets/huggingface/badges/raw/main/deploy-to-spaces-md.svg)](https://huggingface.co/spaces)
185
+
186
+ **Quick Setup:**
187
+
188
+ 1. Create a new [Hugging Face Space](https://huggingface.co/spaces) with SDK: Docker
189
+ 2. Upload this repository to your Space
190
+ 3. Set `GOOGLE_API_KEY` in Space Settings → Variables
191
+ 4. Your app will be live at `https://huggingface.co/spaces/[username]/[space-name]`
192
+
193
+ 📋 **Detailed Instructions**: See [HUGGINGFACE_DEPLOYMENT.md](./HUGGINGFACE_DEPLOYMENT.md) for complete setup guide.
194
+
195
+ ## 📁 Project Structure
196
+
197
+ ```
198
+ Lega.AI/
199
+ ├── main.py # Main Streamlit application entry point
200
+ ├── pyproject.toml # UV/pip package configuration and dependencies
201
+ ├── requirements.txt # Docker-compatible requirements file
202
+ ├── uv.lock # UV lockfile for reproducible builds
203
+ ├── setup.py # Legacy Python package setup
204
+ ├── Dockerfile # Docker container configuration
205
+ ├── .dockerignore # Docker build optimization
206
+ ├── start.sh # Hugging Face Spaces startup script
207
+ ├── .env.example # Environment variables template
208
+ ├── .env.hf # Hugging Face Spaces configuration
209
+ ├── README.md # Project documentation
210
+ ├── HUGGINGFACE_DEPLOYMENT.md # HF Spaces deployment guide
211
+ ├── src/ # Main application source code
212
+ │ ├── __init__.py
213
+ │ ├── models/
214
+ │ │ ├── __init__.py
215
+ │ │ └── document.py # Document data models and schemas
216
+ │ ├── services/
217
+ │ │ ├── __init__.py
218
+ │ │ ├── document_processor.py # PDF/DOCX text extraction
219
+ │ │ ├── ai_analyzer.py # AI analysis and risk assessment
220
+ │ │ └── vector_store.py # Chroma vector database management
221
+ │ ├── pages/
222
+ │ │ ├── __init__.py
223
+ │ │ ├── upload.py # Document upload interface
224
+ │ │ ├── analysis.py # Document analysis dashboard
225
+ │ │ ├── qa_assistant.py # Interactive Q&A chat interface
226
+ │ │ ├── library.py # Document library management
227
+ │ │ └── settings.py # Application settings and configuration
228
+ │ └── utils/
229
+ │ ├── __init__.py
230
+ │ ├── config.py # Environment configuration management
231
+ │ ├── logger.py # Logging utilities and setup
232
+ │ └── helpers.py # Common helper functions
233
+ ├── sample/ # Sample legal documents for testing
234
+ │ ├── Employment_Offer_Letter.pdf
235
+ │ ├── Master_Services_Agreement.pdf
236
+ │ ├── Mutual_NDA.pdf
237
+ │ └── Residential_Lease_Agreement.pdf
238
+ ├── data/ # Local data storage and persistence
239
+ │ ├── app.log # Application logs
240
+ │ └── chroma_db/ # Vector database storage
241
+ └── uploads/ # Temporary file uploads directory
242
+ ```
243
+
244
+ ## 🎯 Usage Guide
245
+
246
+ ### 1. Document Upload & Processing
247
+
248
+ - Navigate to **📄 Upload** page
249
+ - Upload PDF, DOCX, or TXT files (max 10MB per file)
250
+ - Try the included sample documents for immediate testing
251
+ - Automatic document type detection and text extraction
252
+
253
+ ### 2. Comprehensive Analysis Dashboard
254
+
255
+ Visit **📊 Analysis** to explore:
256
+
257
+ - **Risk Score Gauge**: Interactive 0-100 risk assessment with color coding
258
+ - **Side-by-Side Comparison**: Original text vs. simplified plain language
259
+ - **Risk Factor Breakdown**: Detailed explanations of identified risks with severity levels
260
+ - **Interactive Clause Highlighting**: Hover over highlighted text for tooltips with suggestions
261
+ - **Financial & Date Extraction**: Automatic identification of monetary amounts and key dates
262
+ - **Risk Visualization Charts**: Visual distribution of risk categories and severity
263
+
264
+ ### 3. Interactive Q&A Assistant
265
+
266
+ - Use **💬 Q&A** for document-specific questions and analysis
267
+ - Get context-aware answers powered by vector similarity search
268
+ - Access suggested questions based on document type and content
269
+ - Chat history preservation for reference and record-keeping
270
+
271
+ ### 4. Document Library Management
272
+
273
+ - **📚 Library** provides persistent storage of all analyzed documents
274
+ - Advanced filtering by document type, risk level, upload date
275
+ - Full-text search across document content and analysis results
276
+ - Quick re-analysis and direct access to Q&A for stored documents
277
+ - Document metadata and analysis summary views
278
+
279
+ ### 5. Settings & Configuration
280
+
281
+ - **⚙️ Settings** for API key management and validation
282
+ - Application configuration and performance monitoring
283
+ - Usage statistics and system health information
284
+
285
+ ## 🔧 Configuration Options
286
+
287
+ The application uses environment variables for configuration. All settings can be customized in the `.env` file based on the `.env.example` template.
288
+
289
+ ### 🔑 Required Settings
290
+
291
+ | Variable | Description | Example |
292
+ | ---------------- | -------------------------------- | ----------------------------- |
293
+ | `GOOGLE_API_KEY` | Google AI API key for Gemini Pro | `xyz` (from AI Studio) |
294
+
295
+ ### ⚙️ Application Settings
296
+
297
+ | Variable | Default | Description |
298
+ | -------------------------- | -------------- | ---------------------------------- |
299
+ | `DEBUG` | `True` | Enable debug mode and verbose logs |
300
+ | `LOG_LEVEL` | `INFO` | Logging level (DEBUG/INFO/WARNING) |
301
+ | `STREAMLIT_SERVER_PORT` | `8501` | Port for Streamlit server |
302
+ | `STREAMLIT_SERVER_ADDRESS` | `localhost` | Server address binding |
303
+ | `MAX_FILE_SIZE_MB` | `10` | Maximum upload file size |
304
+ | `SUPPORTED_FILE_TYPES` | `pdf,docx,txt` | Allowed file extensions |
305
+
306
+ ### 🤖 AI Model Settings
307
+
308
+ | Variable | Default | Description |
309
+ | ----------------- | ---------------------- | -------------------------------- |
310
+ | `TEMPERATURE` | `0.2` | AI response creativity (0.0-1.0) |
311
+ | `MAX_TOKENS` | `2048` | Maximum response length |
312
+ | `EMBEDDING_MODEL` | `models/embedding-001` | Google AI embedding model |
313
+
314
+ ### 💾 Storage Configuration
315
+
316
+ | Variable | Default | Description |
317
+ | -------------------------- | ------------------ | ---------------------------- |
318
+ | `CHROMA_PERSIST_DIRECTORY` | `./data/chroma_db` | Vector database storage path |
319
+ | `UPLOAD_DIR` | `./uploads` | Temporary file uploads |
320
+ | `DATA_DIR` | `./data` | Application data directory |
321
+ | `LOG_FILE` | `./data/app.log` | Application log file path |
322
+
323
+ ### 🔒 Security Settings
324
+
325
+ | Variable | Default | Description |
326
+ | ------------------------- | ------- | ------------------------ |
327
+ | `SECRET_KEY` | None | Application secret key |
328
+ | `SESSION_TIMEOUT_MINUTES` | `60` | Session timeout duration |
329
+
330
+ ### Example .env configuration:
331
+
332
+ ```bash
333
+ # Required
334
+ GOOGLE_API_KEY=your-google-ai-api-key
335
+
336
+ # Optional (with defaults shown)
337
+ DEBUG=True
338
+ LOG_LEVEL=INFO
339
+ MAX_FILE_SIZE_MB=10
340
+ SUPPORTED_FILE_TYPES=pdf,docx,txt
341
+ CHROMA_PERSIST_DIRECTORY=./data/chroma_db
342
+ TEMPERATURE=0.2
343
+ ```
344
+
345
+ ## � Sample Documents
346
+
347
+ The project includes professionally-crafted sample legal documents for testing and demonstration:
348
+
349
+ | Document Type | Filename | Purpose |
350
+ | ---------------------------- | --------------------------------- | ---------------------------------------- |
351
+ | **Employment Contract** | `Employment_Offer_Letter.pdf` | Test employment-related clause analysis |
352
+ | **Service Agreement** | `Master_Services_Agreement.pdf` | Demonstrate commercial contract analysis |
353
+ | **Non-Disclosure Agreement** | `Mutual_NDA.pdf` | Show confidentiality clause assessment |
354
+ | **Lease Agreement** | `Residential_Lease_Agreement.pdf` | Test rental/property contract analysis |
355
+
356
+ These documents are located in the `sample/` directory and can be uploaded directly through the application to:
357
+
358
+ - Experience the complete analysis workflow
359
+ - Test different document types and complexity levels
360
+ - Understand risk assessment capabilities
361
+ - Explore Q&A functionality with real legal content
362
+
363
+ ## �🚨 Document Types Supported
364
+
365
+ Currently optimized for:
366
+
367
+ - **🏠 Rental/Lease Agreements**
368
+ - **💰 Loan Contracts**
369
+ - **💼 Employment Contracts**
370
+ - **🤝 Service Agreements**
371
+ - **🔒 Non-Disclosure Agreements (NDAs)**
372
+ - **📄 General Legal Documents**
373
+
374
+ ## ⚡ Key Features Deep Dive
375
+
376
+ ### 🔍 Advanced Risk Assessment Engine
377
+
378
+ - **Multi-dimensional Analysis**: Evaluates financial, legal commitment, and rights-related risks
379
+ - **Intelligent Severity Classification**: Categorizes risks as Low, Medium, High, or Critical
380
+ - **Contextual Risk Scoring**: Dynamic 0-100 scale based on document type and complexity
381
+ - **Actionable Recommendations**: Specific suggestions for improving problematic clauses
382
+
383
+ ### 📝 AI-Powered Plain Language Translation
384
+
385
+ - **Context-Aware Simplification**: Maintains legal accuracy while improving readability
386
+ - **Jargon Definition System**: Interactive tooltips for complex legal terms
387
+ - **Document Type Optimization**: Tailored simplification based on contract category
388
+ - **Preservation of Legal Intent**: Ensures meaning is not lost in translation
389
+
390
+ ### 🎯 Interactive Clause Analysis
391
+
392
+ - **Smart Highlighting System**: Visual identification of risky and important clauses
393
+ - **Hover Tooltips**: Immediate access to explanations and suggestions
394
+ - **Clause Categorization**: Organized by risk type and legal significance
395
+ - **Improvement Suggestions**: Specific recommendations for clause modifications
396
+
397
+ ### 🔍 Vector-Powered Document Intelligence
398
+
399
+ - **Semantic Search**: Find similar clauses across your document library
400
+ - **Context-Aware Q&A**: Answers grounded in actual document content
401
+ - **Document Similarity**: Compare clauses against known patterns and standards
402
+ - **Persistent Knowledge Base**: Chroma vector database for fast, accurate retrieval
403
+
404
+ ### 📊 Advanced Visualization & Analytics
405
+
406
+ - **Interactive Risk Gauges**: Real-time visual risk assessment
407
+ - **Risk Distribution Charts**: Breakdown of risk categories and severity
408
+ - **Financial Terms Extraction**: Automatic identification of monetary obligations
409
+ - **Timeline Analysis**: Key dates and deadline extraction with visualization
410
+
411
+ ### 💾 Enterprise-Grade Data Management
412
+
413
+ - **Local Data Persistence**: Secure storage of documents and analysis results
414
+ - **Document Library**: Organized management with search and filtering
415
+ - **Analysis History**: Complete audit trail of document processing
416
+ - **Metadata Extraction**: Automatic tagging and categorization
417
+
418
+ ## 🔒 Privacy & Security
419
+
420
+ ### 🛡️ Data Protection
421
+
422
+ - **Local Processing**: Documents analyzed locally with secure API calls to Google AI
423
+ - **No Data Sharing**: Zero third-party data sharing or storage outside your environment
424
+ - **Secure Storage**: Vector embeddings and analysis results stored locally in Chroma database
425
+ - **Environment Security**: API keys managed through secure environment variables
426
+
427
+ ### 🔐 Security Best Practices
428
+
429
+ - **API Key Protection**: Secure credential management with environment-based configuration
430
+ - **Local Vector Storage**: Document embeddings stored exclusively on your local system
431
+ - **Session Management**: Configurable session timeouts and secure state management
432
+ - **Input Validation**: Comprehensive file type and size validation for uploads
433
+
434
+ ### 📋 Data Handling
435
+
436
+ - **Temporary Upload Storage**: Uploaded files processed and optionally removed from temp storage
437
+ - **Persistent Analysis**: Analysis results retained locally for document library functionality
438
+ - **User Control**: Complete control over data retention and deletion
439
+ - **Audit Trail**: Transparent logging of all document processing activities
440
+
441
+ ## 🤝 Contributing
442
+
443
+ 1. Fork the repository
444
+ 2. Create a feature branch
445
+ 3. Make your changes
446
+ 4. Test thoroughly
447
+ 5. Submit a pull request
448
+
449
+ ## 📄 License
450
+
451
+ MIT License - see LICENSE file for details.
452
+
453
+ ## 🆘 Support
454
+
455
+ ### 📚 Documentation & Resources
456
+
457
+ - **In-Code Documentation**: Comprehensive docstrings and code comments throughout the project
458
+ - **Configuration Guide**: Detailed environment setup and configuration options above
459
+ - **Sample Documents**: Use included sample contracts to understand features and capabilities
460
+
461
+ ### 🐛 Issues & Bug Reports
462
+
463
+ - **GitHub Issues**: Report bugs, request features, or ask questions via [GitHub Issues](https://github.com/your-repo/Lega.AI/issues)
464
+ - **Bug Reports**: Include system info, error logs, and steps to reproduce
465
+ - **Feature Requests**: Describe use cases and expected functionality
466
+
467
+ ### 🛠️ Development & API References
468
+
469
+ - **Google AI Documentation**: [Google AI Developer Guide](https://ai.google.dev/) for Gemini API details
470
+ - **LangChain Documentation**: [LangChain Docs](https://python.langchain.com/) for framework reference
471
+ - **Streamlit Documentation**: [Streamlit Docs](https://docs.streamlit.io/) for UI framework guidance
472
+ - **Chroma Documentation**: [Chroma Docs](https://docs.trychroma.com/) for vector database operations
473
+
474
+ ### 💡 Getting Help
475
+
476
+ 1. **Check Documentation**: Review this README and in-code comments first
477
+ 2. **Try Sample Documents**: Use provided samples to test functionality
478
+ 3. **Check Logs**: Review `data/app.log` for detailed error information
479
+ 4. **Environment Issues**: Verify `.env` configuration and API key validity
480
+ 5. **Community Support**: Open GitHub discussions for general questions
481
+
482
+ ---
483
+
484
+ **Made with ❤️ using Streamlit, LangChain, and Google AI**
main.py ADDED
@@ -0,0 +1,789 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from streamlit_option_menu import option_menu
3
+ import os
4
+ import sys
5
+
6
+ # Add src directory to Python path
7
+ sys.path.append(os.path.join(os.path.dirname(__file__), "src"))
8
+
9
+ from src.utils.config import config
10
+ from src.utils.logger import setup_logging
11
+
12
+ # Page config
13
+ st.set_page_config(
14
+ page_title="Lega.AI", page_icon="⚖️", layout="wide", initial_sidebar_state="expanded"
15
+ )
16
+
17
+ # Custom CSS for responsive dark/light theme
18
+ st.markdown(
19
+ """
20
+ <style>
21
+ /* Main header with gradient text */
22
+ .main-header {
23
+ font-size: 3rem;
24
+ font-weight: bold;
25
+ text-align: center;
26
+ margin-bottom: 2rem;
27
+ background: linear-gradient(90deg, #1f4e79, #2e86ab);
28
+ -webkit-background-clip: text;
29
+ -webkit-text-fill-color: transparent;
30
+ background-clip: text;
31
+ }
32
+
33
+ /* Responsive feature cards that adapt to theme */
34
+ .feature-card {
35
+ background: var(--background-color);
36
+ padding: 1.5rem;
37
+ border-radius: 12px;
38
+ box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1);
39
+ margin: 1rem 0;
40
+ border-left: 4px solid #2e86ab;
41
+ border: 1px solid var(--border-color);
42
+ color: var(--text-color);
43
+ transition: all 0.3s ease;
44
+ }
45
+
46
+ .feature-card:hover {
47
+ transform: translateY(-2px);
48
+ box-shadow: 0 6px 12px rgba(0, 0, 0, 0.15);
49
+ }
50
+
51
+ /* Risk color indicators */
52
+ .risk-critical { color: #ff4444; font-weight: bold; }
53
+ .risk-high { color: #ff6666; font-weight: bold; }
54
+ .risk-medium { color: #ffaa00; font-weight: bold; }
55
+ .risk-low { color: #ffcc00; font-weight: bold; }
56
+ .risk-safe { color: #44aa44; font-weight: bold; }
57
+
58
+ /* Responsive metric cards */
59
+ .metric-card {
60
+ background: var(--secondary-background-color);
61
+ padding: 1.5rem;
62
+ border-radius: 10px;
63
+ text-align: center;
64
+ margin: 0.5rem 0;
65
+ border: 1px solid var(--border-color);
66
+ color: var(--text-color);
67
+ transition: all 0.3s ease;
68
+ }
69
+
70
+ .metric-card:hover {
71
+ background: var(--hover-background-color);
72
+ }
73
+
74
+ /* Enhanced button styling */
75
+ .stButton > button {
76
+ width: 100%;
77
+ background: linear-gradient(135deg, #2e86ab, #1f4e79);
78
+ color: white !important;
79
+ border: none;
80
+ border-radius: 10px;
81
+ padding: 0.75rem 1rem;
82
+ font-weight: 600;
83
+ transition: all 0.3s ease;
84
+ box-shadow: 0 2px 4px rgba(0, 0, 0, 0.2);
85
+ }
86
+
87
+ .stButton > button:hover {
88
+ background: linear-gradient(135deg, #1f4e79, #2e86ab);
89
+ transform: translateY(-1px);
90
+ box-shadow: 0 4px 8px rgba(0, 0, 0, 0.3);
91
+ }
92
+
93
+ /* Enhanced sidebar styling for dark/light theme */
94
+ .css-1d391kg {
95
+ background: var(--background-color) !important;
96
+ }
97
+
98
+ /* Streamlit sidebar container */
99
+ section[data-testid="stSidebar"] {
100
+ background: var(--background-color) !important;
101
+ border-right: 1px solid var(--border-color) !important;
102
+ }
103
+
104
+ /* Sidebar content */
105
+ section[data-testid="stSidebar"] > div {
106
+ background: var(--background-color) !important;
107
+ color: var(--text-color) !important;
108
+ }
109
+
110
+ /* Sidebar header */
111
+ section[data-testid="stSidebar"] .block-container {
112
+ background: var(--background-color) !important;
113
+ color: var(--text-color) !important;
114
+ }
115
+
116
+ /* Option menu in sidebar */
117
+ section[data-testid="stSidebar"] .nav-link {
118
+ background: var(--secondary-background-color) !important;
119
+ color: var(--text-color) !important;
120
+ border: 1px solid var(--border-color) !important;
121
+ }
122
+
123
+ /* Active option in sidebar */
124
+ section[data-testid="stSidebar"] .nav-link.active {
125
+ background: linear-gradient(135deg, #2e86ab, #1f4e79) !important;
126
+ color: white !important;
127
+ }
128
+
129
+ /* Streamlit Option Menu specific styling - Force override */
130
+ .nav-link {
131
+ background: var(--secondary-background-color) !important;
132
+ color: var(--text-color) !important;
133
+ border: 1px solid var(--border-color) !important;
134
+ border-radius: 8px !important;
135
+ margin: 2px 0 !important;
136
+ }
137
+
138
+ .nav-link:hover {
139
+ background: var(--hover-background-color) !important;
140
+ color: var(--text-color) !important;
141
+ }
142
+
143
+ .nav-link.active {
144
+ background: linear-gradient(135deg, #2e86ab, #1f4e79) !important;
145
+ color: white !important;
146
+ border: 1px solid #2e86ab !important;
147
+ }
148
+
149
+ /* Option menu container */
150
+ .nav {
151
+ background: transparent !important;
152
+ }
153
+
154
+ /* Fix option menu wrapper */
155
+ div[data-testid="stVerticalBlock"] > div > div {
156
+ background: transparent !important;
157
+ }
158
+
159
+ /* More specific selectors for option menu */
160
+ section[data-testid="stSidebar"] .nav-link {
161
+ background-color: var(--secondary-background-color) !important;
162
+ color: var(--text-color) !important;
163
+ border: 1px solid var(--border-color) !important;
164
+ }
165
+
166
+ section[data-testid="stSidebar"] .nav-link:hover {
167
+ background-color: var(--hover-background-color) !important;
168
+ }
169
+
170
+ section[data-testid="stSidebar"] .nav-link.active {
171
+ background-color: #2e86ab !important;
172
+ color: white !important;
173
+ }
174
+
175
+ /* Force override any white backgrounds in sidebar */
176
+ section[data-testid="stSidebar"] * {
177
+ background-color: inherit !important;
178
+ }
179
+
180
+ section[data-testid="stSidebar"] .nav-link {
181
+ background-color: var(--secondary-background-color) !important;
182
+ }
183
+
184
+ /* File uploader styling */
185
+ .uploadedFile {
186
+ background: var(--secondary-background-color) !important;
187
+ border: 2px dashed var(--border-color) !important;
188
+ border-radius: 10px !important;
189
+ color: var(--text-color) !important;
190
+ }
191
+
192
+ /* Tab styling */
193
+ .stTabs [data-baseweb="tab-list"] {
194
+ gap: 8px;
195
+ }
196
+
197
+ .stTabs [data-baseweb="tab"] {
198
+ background: var(--secondary-background-color);
199
+ border-radius: 8px;
200
+ color: var(--text-color);
201
+ border: 1px solid var(--border-color);
202
+ }
203
+
204
+ .stTabs [aria-selected="true"] {
205
+ background: linear-gradient(135deg, #2e86ab, #1f4e79) !important;
206
+ color: white !important;
207
+ }
208
+
209
+ /* Tooltip styling for risk factors */
210
+ .tooltip {
211
+ position: relative;
212
+ display: inline;
213
+ cursor: help;
214
+ border-radius: 4px;
215
+ padding: 2px 4px;
216
+ margin: 0 1px;
217
+ }
218
+
219
+ .tooltip .tooltiptext {
220
+ visibility: hidden;
221
+ width: 300px;
222
+ background-color: var(--tooltip-background);
223
+ color: var(--tooltip-text);
224
+ text-align: left;
225
+ border-radius: 8px;
226
+ padding: 12px;
227
+ position: absolute;
228
+ z-index: 1000;
229
+ bottom: 125%;
230
+ left: 50%;
231
+ margin-left: -150px;
232
+ opacity: 0;
233
+ transition: opacity 0.3s, visibility 0.3s;
234
+ font-size: 13px;
235
+ line-height: 1.4;
236
+ box-shadow: 0 4px 12px rgba(0, 0, 0, 0.3);
237
+ border: 1px solid var(--border-color);
238
+ }
239
+
240
+ .tooltip:hover .tooltiptext {
241
+ visibility: visible;
242
+ opacity: 1;
243
+ }
244
+
245
+ /* Risk highlighting */
246
+ .risk-critical {
247
+ background-color: rgba(255, 68, 68, 0.2);
248
+ border-left: 4px solid #ff4444;
249
+ padding: 4px 8px;
250
+ border-radius: 4px;
251
+ }
252
+ .risk-high {
253
+ background-color: rgba(255, 136, 0, 0.2);
254
+ border-left: 4px solid #ff8800;
255
+ padding: 4px 8px;
256
+ border-radius: 4px;
257
+ }
258
+ .risk-medium {
259
+ background-color: rgba(255, 204, 0, 0.2);
260
+ border-left: 4px solid #ffcc00;
261
+ padding: 4px 8px;
262
+ border-radius: 4px;
263
+ }
264
+ .risk-low {
265
+ background-color: rgba(68, 170, 68, 0.2);
266
+ border-left: 4px solid #44aa44;
267
+ padding: 4px 8px;
268
+ border-radius: 4px;
269
+ }
270
+
271
+ /* Jargon term highlighting */
272
+ .jargon-term {
273
+ background-color: rgba(46, 134, 171, 0.2);
274
+ text-decoration: underline dotted #2e86ab;
275
+ padding: 2px 4px;
276
+ border-radius: 3px;
277
+ }
278
+
279
+ /* Dark theme variables */
280
+ [data-theme="dark"] {
281
+ --background-color: #0e1117;
282
+ --secondary-background-color: #262730;
283
+ --text-color: #fafafa;
284
+ --border-color: #464a5a;
285
+ --hover-background-color: #3d4354;
286
+ --tooltip-background: #262730;
287
+ --tooltip-text: #fafafa;
288
+ }
289
+
290
+ /* Light theme variables */
291
+ [data-theme="light"], :root {
292
+ --background-color: #ffffff;
293
+ --secondary-background-color: #f8f9fa;
294
+ --text-color: #262626;
295
+ --border-color: #e0e0e0;
296
+ --hover-background-color: #f0f0f0;
297
+ --tooltip-background: #333333;
298
+ --tooltip-text: #ffffff;
299
+ }
300
+
301
+ /* Auto-detect system theme */
302
+ @media (prefers-color-scheme: dark) {
303
+ :root {
304
+ --background-color: #0e1117;
305
+ --secondary-background-color: #262730;
306
+ --text-color: #fafafa;
307
+ --border-color: #464a5a;
308
+ --hover-background-color: #3d4354;
309
+ --tooltip-background: #262730;
310
+ --tooltip-text: #fafafa;
311
+ }
312
+ }
313
+
314
+ /* Hide Streamlit default elements */
315
+ #MainMenu {visibility: hidden;}
316
+ footer {visibility: hidden;}
317
+ .stDeployButton {visibility: hidden;}
318
+
319
+ /* Fix all Streamlit components for dark theme */
320
+ .stApp {
321
+ background: var(--background-color) !important;
322
+ color: var(--text-color) !important;
323
+ }
324
+
325
+ /* Main content area */
326
+ .main .block-container {
327
+ background: var(--background-color) !important;
328
+ color: var(--text-color) !important;
329
+ padding-top: 2rem;
330
+ padding-bottom: 2rem;
331
+ }
332
+
333
+ /* Text input fields */
334
+ .stTextInput > div > div > input {
335
+ background: var(--secondary-background-color) !important;
336
+ color: var(--text-color) !important;
337
+ border: 1px solid var(--border-color) !important;
338
+ }
339
+
340
+ /* Text areas */
341
+ .stTextArea > div > div > textarea {
342
+ background: var(--secondary-background-color) !important;
343
+ color: var(--text-color) !important;
344
+ border: 1px solid var(--border-color) !important;
345
+ }
346
+
347
+ /* Select boxes */
348
+ .stSelectbox > div > div > select {
349
+ background: var(--secondary-background-color) !important;
350
+ color: var(--text-color) !important;
351
+ border: 1px solid var(--border-color) !important;
352
+ }
353
+
354
+ /* Info, warning, error boxes */
355
+ .stAlert {
356
+ background: var(--secondary-background-color) !important;
357
+ color: var(--text-color) !important;
358
+ border: 1px solid var(--border-color) !important;
359
+ }
360
+
361
+ /* Expander */
362
+ .streamlit-expanderHeader {
363
+ background: var(--secondary-background-color) !important;
364
+ color: var(--text-color) !important;
365
+ border: 1px solid var(--border-color) !important;
366
+ }
367
+
368
+ /* Columns */
369
+ .element-container {
370
+ background: transparent !important;
371
+ }
372
+
373
+ /* Status indicators */
374
+ .status-success {
375
+ background: rgba(68, 170, 68, 0.1);
376
+ border: 1px solid #44aa44;
377
+ border-radius: 6px;
378
+ padding: 8px 12px;
379
+ color: #44aa44;
380
+ }
381
+
382
+ .status-warning {
383
+ background: rgba(255, 136, 0, 0.1);
384
+ border: 1px solid #ff8800;
385
+ border-radius: 6px;
386
+ padding: 8px 12px;
387
+ color: #ff8800;
388
+ }
389
+
390
+ .status-error {
391
+ background: rgba(255, 68, 68, 0.1);
392
+ border: 1px solid #ff4444;
393
+ border-radius: 6px;
394
+ padding: 8px 12px;
395
+ color: #ff4444;
396
+ }
397
+ </style>
398
+ """,
399
+ unsafe_allow_html=True,
400
+ )
401
+
402
+
403
+ def main():
404
+ # Initialize logging
405
+ setup_logging()
406
+
407
+ # Initialize session state
408
+ if "current_document" not in st.session_state:
409
+ st.session_state.current_document = None
410
+ if "documents_library" not in st.session_state:
411
+ st.session_state.documents_library = []
412
+
413
+ # Sidebar navigation
414
+ with st.sidebar:
415
+ st.markdown("### ⚖️ Lega.AI")
416
+ st.markdown("*Making legal documents accessible*")
417
+
418
+ selected = option_menu(
419
+ menu_title=None,
420
+ options=[
421
+ "🏠 Home",
422
+ "📄 Upload",
423
+ "📊 Analysis",
424
+ "💬 Q&A",
425
+ "📚 Library",
426
+ "⚙️ Settings",
427
+ ],
428
+ icons=["house", "upload", "graph-up", "chat-dots", "folder", "gear"],
429
+ menu_icon="list",
430
+ default_index=0,
431
+ styles={
432
+ "container": {
433
+ "padding": "0!important",
434
+ "background-color": "transparent",
435
+ },
436
+ "icon": {"color": "#2e86ab", "font-size": "18px"},
437
+ "nav-link": {
438
+ "font-size": "16px",
439
+ "text-align": "left",
440
+ "margin": "2px 0px",
441
+ "padding": "8px 12px",
442
+ "border-radius": "8px",
443
+ "background-color": "transparent",
444
+ "color": "inherit",
445
+ "border": "1px solid transparent",
446
+ "--hover-color": "transparent",
447
+ },
448
+ "nav-link-selected": {
449
+ "background-color": "#2e86ab",
450
+ "color": "white",
451
+ "border": "1px solid #2e86ab",
452
+ },
453
+ },
454
+ )
455
+
456
+ # Handle page redirections from session state
457
+ if "page" in st.session_state and st.session_state.page:
458
+ # Map the session state page to the selected value
459
+ page_mapping = {
460
+ "📄 Upload": "📄 Upload",
461
+ "📊 Analysis": "📊 Analysis",
462
+ "💬 Q&A": "💬 Q&A",
463
+ "📚 Library": "📚 Library",
464
+ "⚙️ Settings": "⚙️ Settings",
465
+ }
466
+
467
+ if st.session_state.page in page_mapping:
468
+ selected = st.session_state.page
469
+ # Clear the page state to prevent continuous redirections
470
+ del st.session_state.page
471
+
472
+ # Main content area
473
+ if selected == "🏠 Home":
474
+ show_home_page()
475
+ elif selected == "📄 Upload":
476
+ show_upload_page()
477
+ elif selected == "📊 Analysis":
478
+ show_analysis_page()
479
+ elif selected == "💬 Q&A":
480
+ show_qa_page()
481
+ elif selected == "📚 Library":
482
+ show_library_page()
483
+ elif selected == "⚙️ Settings":
484
+ show_settings_page()
485
+
486
+
487
+ def show_home_page():
488
+ """Display the home page with overview and features."""
489
+ st.markdown('<h1 class="main-header">⚖️ Lega.AI</h1>', unsafe_allow_html=True)
490
+ st.markdown(
491
+ '<p style="text-align: center; font-size: 1.2rem; color: #666;">AI-powered legal document analysis and simplification</p>',
492
+ unsafe_allow_html=True,
493
+ )
494
+
495
+ # Key benefits
496
+ col1, col2, col3 = st.columns(3)
497
+
498
+ with col1:
499
+ st.markdown(
500
+ """
501
+ <div class="feature-card">
502
+ <h3>🚀 Instant Analysis</h3>
503
+ <p>Upload any legal document and get comprehensive analysis in under 60 seconds using Google's Gemini AI.</p>
504
+ </div>
505
+ """,
506
+ unsafe_allow_html=True,
507
+ )
508
+
509
+ with col2:
510
+ st.markdown(
511
+ """
512
+ <div class="feature-card">
513
+ <h3>🎯 Risk Assessment</h3>
514
+ <p>Color-coded risk scoring helps you identify problematic clauses at a glance with detailed explanations.</p>
515
+ </div>
516
+ """,
517
+ unsafe_allow_html=True,
518
+ )
519
+
520
+ with col3:
521
+ st.markdown(
522
+ """
523
+ <div class="feature-card">
524
+ <h3>💬 Plain Language</h3>
525
+ <p>Convert complex legal jargon into clear, understandable language that anyone can comprehend.</p>
526
+ </div>
527
+ """,
528
+ unsafe_allow_html=True,
529
+ )
530
+
531
+ # Quick stats
532
+ st.markdown("---")
533
+ st.subheader("📊 Platform Statistics")
534
+
535
+ col1, col2, col3, col4 = st.columns(4)
536
+
537
+ with col1:
538
+ st.markdown(
539
+ """
540
+ <div class="metric-card">
541
+ <h2>1,247</h2>
542
+ <p>Documents Analyzed</p>
543
+ </div>
544
+ """,
545
+ unsafe_allow_html=True,
546
+ )
547
+
548
+ with col2:
549
+ st.markdown(
550
+ """
551
+ <div class="metric-card">
552
+ <h2>95%</h2>
553
+ <p>Accuracy Rate</p>
554
+ </div>
555
+ """,
556
+ unsafe_allow_html=True,
557
+ )
558
+
559
+ with col3:
560
+ st.markdown(
561
+ """
562
+ <div class="metric-card">
563
+ <h2>₹2,000</h2>
564
+ <p>Avg. Saved per User</p>
565
+ </div>
566
+ """,
567
+ unsafe_allow_html=True,
568
+ )
569
+
570
+ with col4:
571
+ st.markdown(
572
+ """
573
+ <div class="metric-card">
574
+ <h2>45 sec</h2>
575
+ <p>Avg. Processing Time</p>
576
+ </div>
577
+ """,
578
+ unsafe_allow_html=True,
579
+ )
580
+
581
+ # Getting started
582
+ st.markdown("---")
583
+ st.subheader("🎯 Getting Started")
584
+
585
+ col1, col2 = st.columns([2, 1])
586
+
587
+ with col1:
588
+ st.markdown(
589
+ """
590
+ **How to use Lega.AI:**
591
+
592
+ 1. **Upload** your legal document (PDF, DOCX, or TXT)
593
+ 2. **Wait** for AI analysis (typically 30-60 seconds)
594
+ 3. **Review** risk assessment and simplified explanations
595
+ 4. **Ask questions** about specific clauses or terms
596
+ 5. **Export** summary for your records
597
+ """
598
+ )
599
+
600
+ with col2:
601
+ st.markdown("### 📄 Try Real Sample Documents")
602
+ st.markdown("Get started with actual legal documents:")
603
+
604
+ # Get available sample documents
605
+ sample_dir = "./sample"
606
+ sample_files = []
607
+ if os.path.exists(sample_dir):
608
+ sample_files = [f for f in os.listdir(sample_dir) if f.endswith(('.pdf', '.docx', '.txt'))]
609
+
610
+ if sample_files:
611
+ for i, filename in enumerate(sample_files[:4]): # Show first 4
612
+ display_name = filename.replace('_', ' ').replace('.pdf', '').replace('.docx', '').replace('.txt', '')
613
+ display_name = display_name.title()
614
+
615
+ if st.button(f"📄 {display_name}", key=f"home_sample_{i}"):
616
+ st.session_state.load_sample = filename
617
+ st.session_state.page = "📄 Upload"
618
+ st.rerun()
619
+ else:
620
+ st.info("Sample documents loading...")
621
+
622
+ # CTA button
623
+ st.markdown("---")
624
+ if st.button("📄 Analyze Your First Document", type="primary"):
625
+ st.session_state.page = "📄 Upload"
626
+ st.rerun()
627
+
628
+
629
+ def load_sample_document(doc_type: str):
630
+ """Load a sample document for demonstration."""
631
+ sample_docs = {
632
+ "rental": {
633
+ "filename": "sample_rental_agreement.pdf",
634
+ "type": "rental",
635
+ "text": """
636
+ RESIDENTIAL LEASE AGREEMENT
637
+
638
+ This Lease Agreement is entered into between John Smith (Landlord) and Jane Doe (Tenant)
639
+ for the property located at 123 Main Street, Mumbai, Maharashtra.
640
+
641
+ RENT: Tenant agrees to pay Rs. 25,000 per month, due on the 1st of each month.
642
+ Late payments will incur a penalty of Rs. 1,000 per day.
643
+
644
+ SECURITY DEPOSIT: Tenant shall pay a security deposit of Rs. 75,000, which is
645
+ non-refundable except for damage assessment.
646
+
647
+ TERMINATION: Either party may terminate this lease with 30 days written notice.
648
+ Early termination by Tenant results in forfeiture of security deposit.
649
+
650
+ MAINTENANCE: Tenant is responsible for all repairs and maintenance, including
651
+ structural repairs, regardless of cause.
652
+
653
+ The property is leased "as-is" with no warranties. Landlord is not liable for
654
+ any damages or injuries occurring on the premises.
655
+ """,
656
+ },
657
+ "loan": {
658
+ "filename": "sample_loan_agreement.pdf",
659
+ "type": "loan",
660
+ "text": """
661
+ PERSONAL LOAN AGREEMENT
662
+
663
+ Borrower: Rajesh Kumar
664
+ Lender: QuickCash Financial Services Pvt Ltd
665
+ Principal Amount: Rs. 2,00,000
666
+
667
+ INTEREST RATE: 24% per annum (APR 28.5% including processing fees)
668
+
669
+ REPAYMENT: 24 monthly installments of Rs. 12,500 each
670
+ Total repayment amount: Rs. 3,00,000
671
+
672
+ LATE PAYMENT PENALTY: Rs. 500 per day for any late payment
673
+
674
+ DEFAULT: If payment is late by more than 7 days, the entire remaining
675
+ balance becomes immediately due and payable.
676
+
677
+ COLLATERAL: Borrower pledges gold ornaments worth Rs. 2,50,000 as security.
678
+ Lender may seize collateral immediately upon default.
679
+
680
+ ARBITRATION: All disputes shall be resolved through binding arbitration.
681
+ Borrower waives right to jury trial.
682
+
683
+ Processing fee: Rs. 10,000 (non-refundable)
684
+ Documentation charges: Rs. 5,000
685
+ """,
686
+ },
687
+ "employment": {
688
+ "filename": "sample_employment_contract.pdf",
689
+ "type": "employment",
690
+ "text": """
691
+ EMPLOYMENT CONTRACT
692
+
693
+ Employee: Priya Sharma
694
+ Company: TechCorp India Private Limited
695
+ Position: Software Developer
696
+ Start Date: January 1, 2024
697
+
698
+ SALARY: Rs. 8,00,000 per annum, payable monthly
699
+
700
+ WORKING HOURS: 45 hours per week, including mandatory weekend work when required
701
+
702
+ NON-COMPETE: Employee shall not work for any competing company for 2 years
703
+ after termination, within India or globally.
704
+
705
+ CONFIDENTIALITY: Employee agrees to maintain strict confidentiality of all
706
+ company information indefinitely, even after termination.
707
+
708
+ TERMINATION: Company may terminate employment at any time without cause or notice.
709
+ Employee must provide 90 days notice to resign.
710
+
711
+ NO MOONLIGHTING: Employee shall not engage in any other work or business
712
+ activities during employment.
713
+
714
+ INTELLECTUAL PROPERTY: All work created by employee belongs entirely to company,
715
+ including personal projects done outside work hours.
716
+ """,
717
+ },
718
+ }
719
+
720
+ if doc_type in sample_docs:
721
+ sample = sample_docs[doc_type]
722
+ from src.utils.helpers import generate_document_id
723
+
724
+ # Store in session state
725
+ st.session_state.current_document = {
726
+ "id": generate_document_id(),
727
+ "filename": sample["filename"],
728
+ "document_type": sample["type"],
729
+ "original_text": sample["text"],
730
+ "is_sample": True,
731
+ }
732
+
733
+ st.success(f"📄 Loaded sample {doc_type} document!")
734
+ st.session_state.page = "📊 Analysis"
735
+ st.rerun()
736
+
737
+
738
+ def show_upload_page():
739
+ """Import and show the upload page."""
740
+ try:
741
+ from src.pages.upload import show_upload_interface
742
+
743
+ show_upload_interface()
744
+ except ImportError as e:
745
+ st.error(f"Upload page not found: {e}")
746
+
747
+
748
+ def show_analysis_page():
749
+ """Import and show the analysis page."""
750
+ try:
751
+ from src.pages.analysis import show_analysis_interface
752
+
753
+ show_analysis_interface()
754
+ except ImportError as e:
755
+ st.error(f"Analysis page not found: {e}")
756
+
757
+
758
+ def show_qa_page():
759
+ """Import and show the Q&A page."""
760
+ try:
761
+ from src.pages.qa_assistant import show_qa_interface
762
+
763
+ show_qa_interface()
764
+ except ImportError as e:
765
+ st.error(f"Q&A page not found: {e}")
766
+
767
+
768
+ def show_library_page():
769
+ """Import and show the library page."""
770
+ try:
771
+ from src.pages.library import show_library_interface
772
+
773
+ show_library_interface()
774
+ except ImportError as e:
775
+ st.error(f"Library page not found: {e}")
776
+
777
+
778
+ def show_settings_page():
779
+ """Import and show the settings page."""
780
+ try:
781
+ from src.pages.settings import show_settings_interface
782
+
783
+ show_settings_interface()
784
+ except ImportError as e:
785
+ st.error(f"Settings page not found: {e}")
786
+
787
+
788
+ if __name__ == "__main__":
789
+ main()
pyproject.toml ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [project]
2
+ name = "lega-ai"
3
+ version = "0.1.0"
4
+ description = "Add your description here"
5
+ readme = "README.md"
6
+ requires-python = ">=3.13"
7
+ dependencies = [
8
+ "blinker>=1.9.0",
9
+ "langchain-chroma>=0.2.6",
10
+ "langchain-community>=0.3.29",
11
+ "langchain-google-genai>=2.1.12",
12
+ "langchain-text-splitters>=0.3.11",
13
+ "langchain[google-genai]>=0.3.27",
14
+ "plotly>=6.3.0",
15
+ "pypdf>=6.0.0",
16
+ "python-docx>=1.2.0",
17
+ "python-dotenv>=1.1.1",
18
+ "streamlit>=1.49.1",
19
+ "streamlit-option-menu>=0.4.0",
20
+ ]
requirements.txt ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Core dependencies
2
+ blinker>=1.9.0
3
+ langchain-chroma>=0.2.6
4
+ langchain-community>=0.3.29
5
+ langchain-google-genai>=2.1.12
6
+ langchain-text-splitters>=0.3.11
7
+ langchain[google-genai]>=0.3.27
8
+ plotly>=6.3.0
9
+ pypdf>=6.0.0
10
+ python-docx>=1.2.0
11
+ python-dotenv>=1.1.1
12
+ streamlit>=1.49.1
13
+ streamlit-option-menu>=0.4.0
14
+
15
+ # Additional dependencies for production deployment
16
+ uvicorn>=0.24.0
17
+ gunicorn>=21.2.0
sample/Employment_Offer_Letter.pdf ADDED
@@ -0,0 +1,74 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ %PDF-1.3
2
+ %���� ReportLab Generated PDF document http://www.reportlab.com
3
+ 1 0 obj
4
+ <<
5
+ /F1 2 0 R /F2 3 0 R
6
+ >>
7
+ endobj
8
+ 2 0 obj
9
+ <<
10
+ /BaseFont /Helvetica /Encoding /WinAnsiEncoding /Name /F1 /Subtype /Type1 /Type /Font
11
+ >>
12
+ endobj
13
+ 3 0 obj
14
+ <<
15
+ /BaseFont /Times-Roman /Encoding /WinAnsiEncoding /Name /F2 /Subtype /Type1 /Type /Font
16
+ >>
17
+ endobj
18
+ 4 0 obj
19
+ <<
20
+ /Contents 8 0 R /MediaBox [ 0 0 595.2756 841.8898 ] /Parent 7 0 R /Resources <<
21
+ /Font 1 0 R /ProcSet [ /PDF /Text /ImageB /ImageC /ImageI ]
22
+ >> /Rotate 0 /Trans <<
23
+
24
+ >>
25
+ /Type /Page
26
+ >>
27
+ endobj
28
+ 5 0 obj
29
+ <<
30
+ /PageMode /UseNone /Pages 7 0 R /Type /Catalog
31
+ >>
32
+ endobj
33
+ 6 0 obj
34
+ <<
35
+ /Author (anonymous) /CreationDate (D:20250921125755+00'00') /Creator (ReportLab PDF Library - www.reportlab.com) /Keywords () /ModDate (D:20250921125755+00'00') /Producer (ReportLab PDF Library - www.reportlab.com)
36
+ /Subject (unspecified) /Title (untitled) /Trapped /False
37
+ >>
38
+ endobj
39
+ 7 0 obj
40
+ <<
41
+ /Count 1 /Kids [ 4 0 R ] /Type /Pages
42
+ >>
43
+ endobj
44
+ 8 0 obj
45
+ <<
46
+ /Filter [ /ASCII85Decode /FlateDecode ] /Length 1799
47
+ >>
48
+ stream
49
+ GarVO;01G^%"@ZKd,a<:)p018Rl9XI9h<;\`6!Pa:5@n=0`ndrH[WNl;#^EA=rZ4XRnOH2<mmmAT;3LThtS(C^M-I8-)&(k7l7fMA5/hC_fq>=Bp--Gqb_G,rT3P>lJcPH+$RC\Nik"<.*5n>6).:2H98bNU[uU`_t<rkh'>bZrq:LjUZj$q-F;<gr1`7JP2GlM[E2'AltOW!Ze_oj4WPQQ;4^#"6N3O'_913c=qrR$_Ntt;S=^!U7t*-@b>`)T*3[F&inp>cn7cej;?`K]RP"LOk@J9!3>\:<`ff5rc8b4!O[HL#BHEaYVu;obgkUFC+[/&\Mre+5mFgOc+XL@[-k#:%A+.?#4G3u@(TB#e7ee`_!48KM+<ua(,VEKY*k($)I\S&Lgs;`@\rUPFhpBWW,aus31ZpubUYDe"6X9Ks10cJn!Eh%+6l-g$@kk/Z*7E-bV]g2KCP?l^<L3"S\s:S5#!h[6j2e#*QBSAt/ZgM+fh!Tc5-:>6\[#D_SMR67WE(m1iHWPN3"Kp9Lp9*%_UAmuF0u7#j\2A6[;3H)NlegKdPuOD,FUs5XUpQ'knDf^]0u%NJ$[U,2E7Jf<Wc1DWXp#i'C,Wn[*r^LJ4or7%<WgtP],Kl%<`"d3E)WY>?qm>M5Afa%r+Sl;uW9/e$Z1qbDY"tS`51#QG5EP;CY\4R8/l(;l4`Y=hc!*p&3^a,bC&c(+8o1d0J`3VX2#i3/V@U)!@bGr0%g0W/<&H:oX.3L.612eC^$9;QbTE1B5866#5DZbL`an:qKn%_PRHF84SS.Bs:K$HD:@7IDe,=\MQ`TpB1G"Bb.U_W'[,22H[2bqZdQ4qAe+bO-t8PUb@pVEZ6>)gECumi4+E[0>Z2E@>7XqC[GJ\$Z?q[D(s)/=1]J69:?BUCU6&fqd/P)haTO9D)@hRaj8>m;>a^+grI)J`o*hc)TT1#&@>VRaM0a;)KBr24BR4b`*O]$'rXg/g4#!<BD<%OG%?Y$Ws(AA2jV"7;Kn"$JjD*Q6<$fs3%U]u]oo\o8k[PBmR&h$b442d2HW=/Ea.jp**&Ct!?%1de@J"qbi.ZFAs7KqMlr9-qZM:u]5cUZIo6@V>63P3N`$A+WJZs__GR5]A1G\^N.V/.Pa)UL`eYlanlAIfWRE<'\`4*.q!i`RnbAPn;u!J2K.Jgj0SQ"lJOJd"TlCDaJ[C-80^7M!(Pemh>Uao<"t1Y4RcJ?WT#fH=29>F<T#S@f0OMOJ5?ObM7RFoqCqC;l28D/S5KirEo+^?B"tVZI'A>mfYh-^!/9X'JF."%C9bXs9"2t2KA08$.-aUE55+,`dJtfR'(>O!lG%uG:(gu+@YWQ]30&udATP6Y)!jT?cf1m6[6>s`g#3e)sGS=+\b//dsj*<"83/6.#M'XC`?FSoGKrf]>+u\rfXOBQ".9^e)ML@jQIdVd&-I"#E:RA:HF6nNs37PR!S'<mfDl"mh8tsVYZEGg*'H^OakQph2rc!X+3sJj<adc%lO$ic$NgRd_r0=/>+.m70q[t3]'BYL=l#'C:bVRuTMgW&GeO4I8ARV$SR@1o/\eS?`>%&'HiC^^[jpu>3:?4$C%`^h6[l;0QX=kj4&g*EVYKr4eBoC.i[/=1h>(a:8Yd'FQJQ$-Hg34@!L/n5DEp5<pfSXj%<CJK]7*rRnJ8hC>Ceq3?&)g8$H81\4_`\HE'!B*?4D;+9?NQ5r_>jXflLKWVPcENJVjgm7Nm!-c7EW-&i@^T;o8a<HS[)[-@[a)>(H9UKm5=Afg,Z=(8.QB/oskA&ea>0>53ne[iMTcW!WT#&Jc~>endstream
50
+ endobj
51
+ xref
52
+ 0 9
53
+ 0000000000 65535 f
54
+ 0000000073 00000 n
55
+ 0000000114 00000 n
56
+ 0000000221 00000 n
57
+ 0000000330 00000 n
58
+ 0000000533 00000 n
59
+ 0000000601 00000 n
60
+ 0000000897 00000 n
61
+ 0000000956 00000 n
62
+ trailer
63
+ <<
64
+ /ID
65
+ [<cc2bcd5827912acf8c3bd2324dddcc13><cc2bcd5827912acf8c3bd2324dddcc13>]
66
+ % ReportLab generated PDF document -- digest (http://www.reportlab.com)
67
+
68
+ /Info 6 0 R
69
+ /Root 5 0 R
70
+ /Size 9
71
+ >>
72
+ startxref
73
+ 2846
74
+ %%EOF
sample/Master_Services_Agreement.pdf ADDED
@@ -0,0 +1,74 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ %PDF-1.3
2
+ %���� ReportLab Generated PDF document http://www.reportlab.com
3
+ 1 0 obj
4
+ <<
5
+ /F1 2 0 R /F2 3 0 R
6
+ >>
7
+ endobj
8
+ 2 0 obj
9
+ <<
10
+ /BaseFont /Helvetica /Encoding /WinAnsiEncoding /Name /F1 /Subtype /Type1 /Type /Font
11
+ >>
12
+ endobj
13
+ 3 0 obj
14
+ <<
15
+ /BaseFont /Times-Roman /Encoding /WinAnsiEncoding /Name /F2 /Subtype /Type1 /Type /Font
16
+ >>
17
+ endobj
18
+ 4 0 obj
19
+ <<
20
+ /Contents 8 0 R /MediaBox [ 0 0 595.2756 841.8898 ] /Parent 7 0 R /Resources <<
21
+ /Font 1 0 R /ProcSet [ /PDF /Text /ImageB /ImageC /ImageI ]
22
+ >> /Rotate 0 /Trans <<
23
+
24
+ >>
25
+ /Type /Page
26
+ >>
27
+ endobj
28
+ 5 0 obj
29
+ <<
30
+ /PageMode /UseNone /Pages 7 0 R /Type /Catalog
31
+ >>
32
+ endobj
33
+ 6 0 obj
34
+ <<
35
+ /Author (anonymous) /CreationDate (D:20250921125755+00'00') /Creator (ReportLab PDF Library - www.reportlab.com) /Keywords () /ModDate (D:20250921125755+00'00') /Producer (ReportLab PDF Library - www.reportlab.com)
36
+ /Subject (unspecified) /Title (untitled) /Trapped /False
37
+ >>
38
+ endobj
39
+ 7 0 obj
40
+ <<
41
+ /Count 1 /Kids [ 4 0 R ] /Type /Pages
42
+ >>
43
+ endobj
44
+ 8 0 obj
45
+ <<
46
+ /Filter [ /ASCII85Decode /FlateDecode ] /Length 2201
47
+ >>
48
+ stream
49
+ Gas1`d;ms^&q/Z@kXsCVZ<MtkG*6-tT+i,(WX-Ebg"o^(AWR*PM56h7K*h;<n*,sr+SNL6`$Wo+GG!@?VjfES_k446R?^14;Yous9]HLQf/.q@H;A6C,5h\ka6h7b+%IEV0!<4dfA@Z>j/\5BPa8h$60#5bF?-qj97pB/UgcoP=PefHO\g#/'OiEKoQNpB;>LOf^HoHY29f\!Pbge$B:Z#jJg[TuW@CE!K/^+^Gk:\li><UaX)cXR1F-`kCN:@_@_Gg1e'EoKdOA??86.aQPhR]gdA5-h3k?:js/pK4$Yu>V%aU1<-*n%PHF&^a*T$QiR44$5E,kB-r(F/,KJPXk0cj1IR4e]fD`YPeM89R<ogT]#D3\BG>C+Kc&UCPEbj(Gu8Z#Br/="4)8fY&i,p)c4Z/:t=AH'C"W#mMTh!H)!mj`MA4Ln$;,/Nbf](N2+bh0&&bSU-]Y@YmiHfGOl`JDD+IND8$q"C-l_,jBA&("8ANj.U'-,8eVXamTUNe#i#(`tu-gMNW?\LJ;K2kmpa:c1SU41j*%W7e1EYBB#%j[3e]1iu9mPOB4Q21fJ9)ArZ<Qg_^`/e9hAXXW5tfl0VF0TRVkjIM,SEg^JSFu39'n#f.%3$0UT"tX.r4SnOtk7=Pk!6$bp9DLrA]95:WG["q'WB`T^EdpjJ,nl`)-k_a9/r42G7.i+[Y<AeMUG:t2Dq,A(4'b"C7hCuP7GT<>(MXo"<DISJlmlZ5NS\]7)S!5R"rN[[ka_j'ene@L%$jXO?SqG0m,bk']oj&NrTQsS<h-VWbDbJ[i_HRjrD_=#L2F!0a4iW':E)5<V]G?S4m0q"cFrV8n)^Q7$X,0FqH+1EX;oT[AZ=VgIH"sY4m]VSU@k[iOhUXH<:<lF5QTW)e\N%prc[V,KJd#qH8t$GOs8ha!;_o'^5lS%D7E(R;Iq>^SJAaTnIB8OCm"1+Ak:JmADn#:f:\+\efI01iR_"8C<.Goo=ZpV[i;Xq;Q8HkjJFV,&=m7QLFF@!jiZYqaT,E!BtCgfRT*/Hq7t6l'5UE,(W_B2ep(:W'GZ<:CQW_sP*IrWF]Ol5jl^4PIRF^_EZ)f0.$AA7L<1\J_O4MtW81$b>(m$ZI>>?BRRfNoaLZ*_*M?Pp6_M9]HDBc=@\o^9;+nG_m_^K.8aF'V<ZDLqI`=L';+.$q5qHs:bF+`lbp_NI2^1i1H&Q&;i[I%7QX=>j*/.`&iHHi=kc0+[@ZaQMi6u.lj3^q#d\(e2VF(\u^ScU6V<D'LSs/gJ.ML)o=FabM$rIjh$D[*tb7S#B-ZKYWV;sEO(En;^Ts7Dr7`(K95sRf$H(9\?mVk&mU*5Um^e('ek@>__GXsg%\;S6]]LQ5C/@ZG:Y]Jrh1TeJF7e(>WMEW;Gn'#nf9fu370;?"%,Ck%OKN#I?3??u/@M+;-+!d)DG6VaYHEdkj<;I^]K_.5@?U0igo/Q]=S?qm.`;"44+s8mU.7M([UUQ@,ns,rrZ@*%#/X]V4T6q6c125`^!$uh;:G`-r'9!cT*/2'Ab#YmO>2rGEWUiCT@d0JA=WH8u2Wseag-<m9>a:R\+,5l^[6at:'5pQq+4?H]<]RUc?^mO6>%NE;--nJTZZ_h/@nhFm8C5u!>?S_Ki'l73/U6upQe'<:8\<!]'eWPP$2cJ&#/8\X!L4p$.!T:e\E:hmK+(7GKs@8`0.)L)?*,\fD"]^tN:gcFr'52,e<.O;4N?:D1K?Xepi#f==O*@M`J6/HM<2+pLmDPB!Y)$l[<7S1Yc?d[Lqh*I`-UKdiO9k6*V6E*8J:l:C27/1.=MAJbp6I6neJOh6.W5dV=fG\,DMPfhJp/"3*e#KDRC)Ko+9as=S3/Vk.)^6rG]u!V6D8sdX'a1M,bs34KWtMgFY>9MpBGX)R3U#ma1im\9W1?:2Nht*3:,V@$<Qh4nqYI\I]4r]gDS/i^C3/crZ+fCqK]9UpV`?Cbo=,hDk[Y>rN]cD[N_9RAW[W#QJ^\E!'NoEP#g8<P5%DRnM$bYBt`?YraQ]@ZflW8JH["d<*[&Ygsr/BYGO\.M;VTHi1Jfbol1Ob^rs@n'75D?`fc>k.4i6I)Y]c@WghT^LkI,GSC"PG$%DR*CfsL>9.tEoHb<&;^ljQ1<WCT'n+KkX[KR$d)ilus.fsaq[fVZ7Me3Qai40ik\K@[SK%C+r-&>E'qn:Ss8"(m)9;J9qW5J8O,n/~>endstream
50
+ endobj
51
+ xref
52
+ 0 9
53
+ 0000000000 65535 f
54
+ 0000000073 00000 n
55
+ 0000000114 00000 n
56
+ 0000000221 00000 n
57
+ 0000000330 00000 n
58
+ 0000000533 00000 n
59
+ 0000000601 00000 n
60
+ 0000000897 00000 n
61
+ 0000000956 00000 n
62
+ trailer
63
+ <<
64
+ /ID
65
+ [<1c25949f9492116853fbf25e4239dab0><1c25949f9492116853fbf25e4239dab0>]
66
+ % ReportLab generated PDF document -- digest (http://www.reportlab.com)
67
+
68
+ /Info 6 0 R
69
+ /Root 5 0 R
70
+ /Size 9
71
+ >>
72
+ startxref
73
+ 3248
74
+ %%EOF
sample/Mutual_NDA.pdf ADDED
@@ -0,0 +1,74 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ %PDF-1.3
2
+ %���� ReportLab Generated PDF document http://www.reportlab.com
3
+ 1 0 obj
4
+ <<
5
+ /F1 2 0 R /F2 3 0 R
6
+ >>
7
+ endobj
8
+ 2 0 obj
9
+ <<
10
+ /BaseFont /Helvetica /Encoding /WinAnsiEncoding /Name /F1 /Subtype /Type1 /Type /Font
11
+ >>
12
+ endobj
13
+ 3 0 obj
14
+ <<
15
+ /BaseFont /Times-Roman /Encoding /WinAnsiEncoding /Name /F2 /Subtype /Type1 /Type /Font
16
+ >>
17
+ endobj
18
+ 4 0 obj
19
+ <<
20
+ /Contents 8 0 R /MediaBox [ 0 0 595.2756 841.8898 ] /Parent 7 0 R /Resources <<
21
+ /Font 1 0 R /ProcSet [ /PDF /Text /ImageB /ImageC /ImageI ]
22
+ >> /Rotate 0 /Trans <<
23
+
24
+ >>
25
+ /Type /Page
26
+ >>
27
+ endobj
28
+ 5 0 obj
29
+ <<
30
+ /PageMode /UseNone /Pages 7 0 R /Type /Catalog
31
+ >>
32
+ endobj
33
+ 6 0 obj
34
+ <<
35
+ /Author (anonymous) /CreationDate (D:20250921125754+00'00') /Creator (ReportLab PDF Library - www.reportlab.com) /Keywords () /ModDate (D:20250921125754+00'00') /Producer (ReportLab PDF Library - www.reportlab.com)
36
+ /Subject (unspecified) /Title (untitled) /Trapped /False
37
+ >>
38
+ endobj
39
+ 7 0 obj
40
+ <<
41
+ /Count 1 /Kids [ 4 0 R ] /Type /Pages
42
+ >>
43
+ endobj
44
+ 8 0 obj
45
+ <<
46
+ /Filter [ /ASCII85Decode /FlateDecode ] /Length 2422
47
+ >>
48
+ stream
49
+ Gat=,gN)%.&q/)-FSTK1>?^F8\]f<sUp9.m[i@CSaci9q]Kg"#6:)8h!0j7nhpVS/^:[.9S2*Y9h0o.ll%WjC$i\Dfs4MtepGMp8G"/eG>&S-KDYfCEGEf2di;K`[qYnD)_r\l!N.1`mrH*5YiKDasCq?:m/8TVo]oYtOT,uPoo/jOt?ehM;a^Ugdn'^l91gf%bidUFDRrie$GJAH+-8bb49Ti1Rnh[D_dJ=q15Nh>sdM;_?6g3jHh_UhJGF&'?V*FkR1MK>aPr#Bp-bNp33U+s2C/(9M7qPE"1YHY+oD6OMUHI#f'R"?G5;JRIUKpn;Vop*/<n><P9?pW]qfcDn=_2qXF;CIo_P56gS[BYF-r.Tf=Ja]ee*.-jfp$B;gN8R\e,.+aqhFi$3kI<3<p,9.X+Jc?C[=?4D9=XqYUUZmU-O[nlNo`B):g>VWsUslm60JeM_iqK3l;ah@0q+"`XW[)A'b2bBWqt>@\eNN+pbe-0UEMs/iYt&Of:JYqOi8@:R`9$Z#`kZE,S'I?)fXnkB#/ko4gVK8u`kDQ9EfEcLu3PejVN4b>)jo-$CM'jm;V7I]9F=`h+_`i(RuD#4+?G!63;,i;EZu3b(.)Q-K$t>cQfEA/?HQ9K=hte]sWd67m;(Q6\qE@&C5u]s=Qf+K!S3E3=ud44")OZ9d&r$,$<HfH):c3IS&I"h<;\nkg-E"AsOpH"X?E4G0tt.cVHdF;S47_?.3u<Q!Y<=;b#fE,J@ZMs6G:\dHaoX/,L_!p!"-hN7hV01=Qa67SuC08m4b+LL4'f*ElP;PfV0[Q.?.C1iM#8&gg\-/(Ep=sZ`jKi1Ea^t[4BTi;bD9W:S.#<mYL)nq[$0mFkBPXR$j\dT8o-:.OSfSRhLd(9Q'.Bo/T"N_R@HE3eji"=a64@h<pEX9n.F'r`6'Y%KZhO-]P!&%ebSrt3a&C1EH4YUo\X'tDGPN>?O[GbqC>Z[\a(Q&p&2[</CP\&1.+&ub'1<-"i;mDZE<s"5-cD.jI9)a0cpTPS%f'=HcnMTb)YcNpNSaHFd<E2o=iPL?.":26lOs+H<:`YfRe>D0<^)M'lC1K3>YL_484fgpG9Y&p/L%sQJrY#aUK"2?74\GG@DXXjCOD99R\<dkYq?cG!*/)7ehp;YNWYl)/*1Onq#,g0o*-/d$jo@"E.\TM``eL'o3cj&CglAWPaI(Tc!qsC#kL#qTF0<$;c`ifnqN%VMVS<*rA%2b"0Fm8KhLbN#i=3NsPElIc?;'pX-1@_<:6qMuI]W9_ZYS>*.fps7S1.h1c`smp;H\(:4B2gJo9`XM"U*Z`JJ5EI>p1Yf3HKk7q#o^M#VaW?r#S<,!Z$S?Y8^##$S"+IU*"QY`a*-N.*_ut>YiAV8A2N8ZU8n?4t&"/1HLO=<N')$<T%bfFeKksn@bIB&gP^sR]M5TG1$kDMZafcml6&F0P,'(\N6Z0b><:I.D[#Ara4Ku):n*D>?s^g&IH0<!5o=gQHkl[D0nYW`13$tJI=kLR^85[%0?ZH"+ARnKPl@^:^e9<.Tm:JV6uN0EufTpT<3[3!Ai$/oP;R7g?m7HV1+0A$`AZ.^51U\0n18LKfI$L[_0u8_3a$2[WT<<WmPIDG>Q$#GgUU'EDA\g:-L=p'dh=RR+fE]qIRBGc8!$C1Grh\[V]e4p+'^VP)#D1Nu$o1ED_Xm)e#;;iB*p*dm.E_h57C4-\XPE'-.A3.hiP/V"gl_&Le;Q-J7"JZf%DPdVFoNn:&.ip$sh&^WM3qGNj7U(_.9PD"EIHIg?f"$KY(fE#9l;47,G3)qDa#_aN+X'6!LD73[H@e-t\F68m`FOr2*NUU0X8X\Jo8nYk,:^UP/#+]GX6/$g:P,R)sH:ch7oo3$;XkAO9k(H9I67=(G0#:@*Nq++L_k!Q,W-Q-L)/?-TaR&Tf%*X[b1)mc$)YuSlnS]HQs@>ALCbe?Q3QC55/C86^6WUUO'e>/qL1k!tP&#]iJ%[u&]o-d^rkt7s_H#IN@-he9kR2og<=UjWabb6fWY,A]k9:eVqhR&e*mS7K:+C7h-j8GUlEfht<dbXiE(&d<(RBCGUlS5%cp'oOOc9Tt(R&[#Z@#h;*jU_$T)4N/@/4%n,+]iTP7s7<DTXWaJE9.=+'UH39I\a`;n>]J"Mjgk9C8;5iVZDNk$3gOdFK4?"keS5cYI9L$e1EAh_t:F`=:K@n)*DDY%mKhjVmi2>n,n$F7K*XX.3E&8]n6mGV$F-V(JjIc^L0fh/.M'o=0tR"q>BtdY1g'g*2(fqp)L%\;JS;8GYE*u@342]T(gP:q)<SP0(iBYQ:oJl/ZL5E3@Me?%T=+tfQ4Q>i*)rJfE,Lq'cM3&gP-\4#qA5)Td]GO1:UhIM.-lf\tC_@HBG:0o];-ERG?[p7\PK>#QN_`(<E,pqmAX8F=*1D)g_+*"HqmJ~>endstream
50
+ endobj
51
+ xref
52
+ 0 9
53
+ 0000000000 65535 f
54
+ 0000000073 00000 n
55
+ 0000000114 00000 n
56
+ 0000000221 00000 n
57
+ 0000000330 00000 n
58
+ 0000000533 00000 n
59
+ 0000000601 00000 n
60
+ 0000000897 00000 n
61
+ 0000000956 00000 n
62
+ trailer
63
+ <<
64
+ /ID
65
+ [<a017cb121e02b55d6f3b7490268d6807><a017cb121e02b55d6f3b7490268d6807>]
66
+ % ReportLab generated PDF document -- digest (http://www.reportlab.com)
67
+
68
+ /Info 6 0 R
69
+ /Root 5 0 R
70
+ /Size 9
71
+ >>
72
+ startxref
73
+ 3469
74
+ %%EOF
sample/Residential_Lease_Agreement.pdf ADDED
@@ -0,0 +1,74 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ %PDF-1.3
2
+ %���� ReportLab Generated PDF document http://www.reportlab.com
3
+ 1 0 obj
4
+ <<
5
+ /F1 2 0 R /F2 3 0 R
6
+ >>
7
+ endobj
8
+ 2 0 obj
9
+ <<
10
+ /BaseFont /Helvetica /Encoding /WinAnsiEncoding /Name /F1 /Subtype /Type1 /Type /Font
11
+ >>
12
+ endobj
13
+ 3 0 obj
14
+ <<
15
+ /BaseFont /Times-Roman /Encoding /WinAnsiEncoding /Name /F2 /Subtype /Type1 /Type /Font
16
+ >>
17
+ endobj
18
+ 4 0 obj
19
+ <<
20
+ /Contents 8 0 R /MediaBox [ 0 0 595.2756 841.8898 ] /Parent 7 0 R /Resources <<
21
+ /Font 1 0 R /ProcSet [ /PDF /Text /ImageB /ImageC /ImageI ]
22
+ >> /Rotate 0 /Trans <<
23
+
24
+ >>
25
+ /Type /Page
26
+ >>
27
+ endobj
28
+ 5 0 obj
29
+ <<
30
+ /PageMode /UseNone /Pages 7 0 R /Type /Catalog
31
+ >>
32
+ endobj
33
+ 6 0 obj
34
+ <<
35
+ /Author (anonymous) /CreationDate (D:20250921125755+00'00') /Creator (ReportLab PDF Library - www.reportlab.com) /Keywords () /ModDate (D:20250921125755+00'00') /Producer (ReportLab PDF Library - www.reportlab.com)
36
+ /Subject (unspecified) /Title (untitled) /Trapped /False
37
+ >>
38
+ endobj
39
+ 7 0 obj
40
+ <<
41
+ /Count 1 /Kids [ 4 0 R ] /Type /Pages
42
+ >>
43
+ endobj
44
+ 8 0 obj
45
+ <<
46
+ /Filter [ /ASCII85Decode /FlateDecode ] /Length 1712
47
+ >>
48
+ stream
49
+ Gas1_d;I\u&:EqJ/GdfeOOM`oRl9JoRjb69`sHSDMP/2r/ZiOV-q[!-EW,Vj9A-Tc["lGJ9p%M!Za3;akRIM\nUFCJIEq)\b7T_/-stEd?1O$H$)RG*XH0eohk)6nB.%=:qXF$'1-.K[UV+I;',lkiTsr.#*'0OboB[Zlk1OuGPB8o_Bm_d2dq^MJVYg\\Iuo&<nh8[drb"$857/p<QP3Dk?[dK!g/Fh?-=ohZ)q].%g4l&\Vg:q?3F+8J#l9DB63X7*L3AYKEb`F!c"q+2hU`5;0.0j[[4<Q;m#WU=ng%kKCqt?PaEj)tm4>rrk39O.E3tNi.jP9p,:Rt+C0d'k>N<6AZ<X[gS6d=b,lVXcQ4BPu41_oUD3C$"(I,]*!o[8m8)R_LQu>hd70F^+[2gT@P#.aqY4me85_sa+&EHp8(rW@&[KTTHl.c%-#QBE*i^:Z;mBf16Q>$Zu'J8uROGK#es#$8nOfh1nSUa(]1FmrKXYlbC^90'"GXu%lQP,H2$E0jJLU@UBO@?9]SjP\[',Y>MJ"h47IQ,I@Hb)adDrt2O^p"KbbI:?<%c;*D#pH+49kP$NW`pD`W,*\o&Up^M%ohNk2R+`4h8t<S&PI"Vhh1.n'g*;7dp^]g_\3"(.O-eQdk8]l+5r8lEPX&g.E.diD%>iIe9]dT:>*M*[G?I(d7[K=7sJ^`Ri5kC[B"hcB'!Ki!ko'hH^E+84T.Z,kN@&uj6?C<cjW"FD<W4SetXS+2@Rh5&a%RhP1RK`=pKod^B$1$3>cJ,@%1k7/_E3_>nh-#Iba5>KC&fKFV'?WGn1t`A2iM1Nd_KAJlF^qAl`GHmDU><L@5ee*bWqaSUqoMcI1He2Oj`d')sXE6ZDQ;'kgb)3F,c.PhV#I9._GLBRn+Q<>1=q%siLt_&a$I&LJgI+G'st)Xdf`TFL1d(P]C#2[\Cdr`O1RBJ#4VXg<sKZ"Q?m?mNpV4daWX?/n!<NA7.I3(`*`cVUIeKZ<V/Fu?u$)X-V\,gGpsHM&5JUe]'ZrFc_Q;1RS)_dOse1i;''Sh/<+S<g8'(]a^T.J_Tr4kP(Wd:.GRELoUmI;RL9!_Z)KP9Ghp#s/'+HoK0DcI_;LFuY5_jfV`0^M6K!`Jq3*D5qIQGB:RI_F]rtO6,)TdGoYg.SPGC9#VoMJpf_</)X?eq?RC@_53HB:J)QPeQ"oh=hp+'V?2Lj0tnF#/[^#&"V_X87TWat+fao(:Sd0X%3q!0&&q@5-Z8N?No8Yp0N@7G6=7a)ZQ,SD"sb[hO7U!]Ap-u52qPO&9Ub#Y2]#^)A$Xl8kic[P`=:42CI5%8N%Q<hQ!0h\(>?k%o)YPL/%^/-C99+1UI:Fim'/F1L4;*\#N>1lTgB\8QNf3triip$-p-;+(skGKm@25:c/7c%:Mu-6Y%gn0\5#NE(A&D\h>4J$k?&(8n;n+";1.0Fl.rIJ;n6gol&-tEi5UlhQc2.LL=$n#_+YFK#_(,e>/hPciB3gIH1p[r-D;r`3(;`QAGl*6NH:>-Rbl@td:IXV$i.MNqoSFX$!<43O5pFbp2<\jE/46e8"sJEY!3.D:manQ0'(GM!V)rjK6>-)@d6ECQpG=6`)g5(ken6''RVl5qR#=($"hch79$7Y"O]`47b0`T`kS5ooBuRtLM'Z/VXuahVs/S(?b`DCo]G_;X6%R6H7O]Y(&j'E4#Yt[Z/<5ip+U_EVAK75c\Sfd~>endstream
50
+ endobj
51
+ xref
52
+ 0 9
53
+ 0000000000 65535 f
54
+ 0000000073 00000 n
55
+ 0000000114 00000 n
56
+ 0000000221 00000 n
57
+ 0000000330 00000 n
58
+ 0000000533 00000 n
59
+ 0000000601 00000 n
60
+ 0000000897 00000 n
61
+ 0000000956 00000 n
62
+ trailer
63
+ <<
64
+ /ID
65
+ [<9d81045db1dd6bf8c79b9710d4d13b3d><9d81045db1dd6bf8c79b9710d4d13b3d>]
66
+ % ReportLab generated PDF document -- digest (http://www.reportlab.com)
67
+
68
+ /Info 6 0 R
69
+ /Root 5 0 R
70
+ /Size 9
71
+ >>
72
+ startxref
73
+ 2759
74
+ %%EOF
setup.py ADDED
@@ -0,0 +1,102 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ Lega.AI Setup Script
4
+ ===================
5
+ Interactive setup script to help configure your Lega.AI environment.
6
+ """
7
+
8
+ import os
9
+ import sys
10
+ from pathlib import Path
11
+
12
+
13
+ def main():
14
+ print("🚀 Welcome to Lega.AI Setup!")
15
+ print("=" * 50)
16
+ print()
17
+
18
+ # Check if .env exists
19
+ env_file = Path(".env")
20
+ if env_file.exists():
21
+ print("📋 Found existing .env file")
22
+ overwrite = input("Do you want to update it? (y/N): ").lower().strip()
23
+ if overwrite != "y":
24
+ print("Setup cancelled.")
25
+ return
26
+ else:
27
+ print("📋 Creating new .env file...")
28
+
29
+ # Copy from template
30
+ template_file = Path(".env.example")
31
+ if not template_file.exists():
32
+ print("❌ .env.example template not found!")
33
+ return
34
+
35
+ # Get API key from user
36
+ print()
37
+ print("🔑 Google AI API Key Setup")
38
+ print("-" * 30)
39
+ print("Get your API key from: https://makersuite.google.com/app/apikey")
40
+ print()
41
+
42
+ api_key = input("Enter your Google AI API key: ").strip()
43
+
44
+ if not api_key:
45
+ print("❌ No API key provided. You can add it later to the .env file.")
46
+ api_key = "your_google_ai_api_key_here"
47
+ else:
48
+ print("✅ API key received")
49
+
50
+ # Read template and replace API key
51
+ with open(template_file, "r") as f:
52
+ content = f.read()
53
+
54
+ # Replace the API key placeholder
55
+ content = content.replace(
56
+ "GOOGLE_API_KEY=your-google-api-key-here", f"GOOGLE_API_KEY={api_key}"
57
+ )
58
+
59
+ # Write to .env
60
+ with open(env_file, "w") as f:
61
+ f.write(content)
62
+
63
+ print()
64
+ print("✅ Environment file created successfully!")
65
+ print()
66
+
67
+ # Optional configuration
68
+ print("⚙️ Optional Configuration")
69
+ print("-" * 25)
70
+
71
+ # File size limit
72
+ max_size = input("Maximum file size in MB (default: 10): ").strip()
73
+ if max_size and max_size.isdigit():
74
+ content = content.replace("MAX_FILE_SIZE_MB=10", f"MAX_FILE_SIZE_MB={max_size}")
75
+
76
+ # Risk sensitivity
77
+ print()
78
+ print("Risk sensitivity (1-5, where 5 is most sensitive):")
79
+ risk_sens = input("Enter risk sensitivity (default: 3): ").strip()
80
+ if risk_sens and risk_sens.isdigit() and 1 <= int(risk_sens) <= 5:
81
+ content = content.replace("RISK_SENSITIVITY=3", f"RISK_SENSITIVITY={risk_sens}")
82
+
83
+ # Write updated content
84
+ with open(env_file, "w") as f:
85
+ f.write(content)
86
+
87
+ print()
88
+ print("🎉 Setup Complete!")
89
+ print("=" * 20)
90
+ print()
91
+ print("Next steps:")
92
+ print(
93
+ "1. Install dependencies: uv add streamlit 'langchain[google-genai]' langchain-google-genai langchain-chroma"
94
+ )
95
+ print("2. Run the application: streamlit run main.py")
96
+ print("3. Open your browser to: http://localhost:8501")
97
+ print()
98
+ print("Need help? Check the README.md file for detailed instructions.")
99
+
100
+
101
+ if __name__ == "__main__":
102
+ main()
src/__init__.py ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ # Create __init__.py files to make directories proper Python packages
2
+
3
+ # src/__init__.py
src/models/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ # src/models/__init__.py
src/models/document.py ADDED
@@ -0,0 +1,88 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pydantic import BaseModel, Field
2
+ from typing import List, Optional, Dict, Any
3
+ from datetime import datetime
4
+ from enum import Enum
5
+
6
+
7
+ class DocumentType(str, Enum):
8
+ RENTAL = "rental"
9
+ LOAN = "loan"
10
+ EMPLOYMENT = "employment"
11
+ SERVICE = "service"
12
+ NDA = "nda"
13
+ OTHER = "other"
14
+
15
+
16
+ class RiskLevel(str, Enum):
17
+ LOW = "low"
18
+ MEDIUM = "medium"
19
+ HIGH = "high"
20
+ CRITICAL = "critical"
21
+
22
+
23
+ class RiskCategory(str, Enum):
24
+ FINANCIAL = "financial"
25
+ COMMITMENT = "commitment"
26
+ RIGHTS = "rights"
27
+ STANDARD = "standard"
28
+
29
+
30
+ class ClausePosition(BaseModel):
31
+ start_index: int
32
+ end_index: int
33
+ page_number: Optional[int] = None
34
+
35
+
36
+ class RiskFactor(BaseModel):
37
+ id: str
38
+ clause_text: str
39
+ category: RiskCategory
40
+ severity: RiskLevel
41
+ explanation: str
42
+ suggestion: Optional[str] = None
43
+ position: Optional[ClausePosition] = None
44
+
45
+
46
+ class DocumentAnalysis(BaseModel):
47
+ document_id: str
48
+ document_type: DocumentType
49
+ risk_score: int = Field(ge=0, le=100)
50
+ summary: str
51
+ simplified_text: str
52
+ risk_factors: List[RiskFactor] = []
53
+ key_dates: List[Dict[str, Any]] = []
54
+ financial_terms: Dict[str, Any] = {}
55
+ created_at: datetime = Field(default_factory=datetime.now)
56
+
57
+
58
+ class Document(BaseModel):
59
+ id: str
60
+ filename: str
61
+ file_path: str
62
+ document_type: Optional[DocumentType] = None
63
+ file_size: int
64
+ upload_timestamp: datetime = Field(default_factory=datetime.now)
65
+ analysis: Optional[DocumentAnalysis] = None
66
+ processed: bool = False
67
+
68
+
69
+ class QASession(BaseModel):
70
+ id: str
71
+ document_id: str
72
+ question: str
73
+ answer: str
74
+ timestamp: datetime = Field(default_factory=datetime.now)
75
+ confidence_score: Optional[float] = None
76
+
77
+
78
+ class SimplificationRequest(BaseModel):
79
+ text: str
80
+ context: Optional[str] = None
81
+ document_type: Optional[DocumentType] = None
82
+
83
+
84
+ class SimplificationResponse(BaseModel):
85
+ original_text: str
86
+ simplified_text: str
87
+ key_points: List[str] = []
88
+ jargon_definitions: Dict[str, str] = {}
src/pages/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ # src/pages/__init__.py
src/pages/analysis.py ADDED
@@ -0,0 +1,978 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import plotly.graph_objects as go
3
+ import plotly.express as px
4
+ from typing import Dict, Any
5
+ import time
6
+
7
+ from ..utils.helpers import get_risk_color, extract_financial_terms, extract_key_dates
8
+
9
+
10
+ def create_advanced_highlighting(
11
+ text: str, risk_factors: list, jargon_definitions: dict
12
+ ) -> str:
13
+ """Create advanced highlighting with hover tooltips for clauses and jargon."""
14
+ import re
15
+
16
+ highlighted_text = text
17
+ processed_positions = [] # Track processed positions to avoid overlaps
18
+
19
+ # First, collect all risk factors and their positions
20
+ risk_replacements = []
21
+ for i, factor in enumerate(risk_factors):
22
+ clause_text = factor.get("clause_text", "")
23
+ if not clause_text:
24
+ continue
25
+
26
+ # Clean and limit clause text
27
+ clause_text = clause_text.strip()[:150] # Increase limit slightly
28
+
29
+ # Find the position in text
30
+ start_pos = highlighted_text.find(clause_text)
31
+ if start_pos != -1:
32
+ end_pos = start_pos + len(clause_text)
33
+
34
+ severity = factor.get("severity", "low")
35
+ explanation = factor.get("explanation", "")[:200] # Limit explanation
36
+ suggestion = factor.get("suggestion", "")[:200] # Limit suggestion
37
+
38
+ # Clean the text content for HTML (escape quotes and special chars)
39
+ clean_explanation = explanation.replace('"', "'").replace('<', '&lt;').replace('>', '&gt;')
40
+ clean_suggestion = suggestion.replace('"', "'").replace('<', '&lt;').replace('>', '&gt;')
41
+
42
+ tooltip_content = f"⚠️ Risk: {severity.upper()}<br>📝 {clean_explanation}"
43
+ if clean_suggestion:
44
+ tooltip_content += f"<br>💡 Suggestion: {clean_suggestion}"
45
+
46
+ risk_replacements.append({
47
+ 'start': start_pos,
48
+ 'end': end_pos,
49
+ 'original': clause_text,
50
+ 'replacement': f'<span class="tooltip risk-{severity}" title="{tooltip_content}">{clause_text}</span>',
51
+ 'type': 'risk'
52
+ })
53
+
54
+ # Sort by position (reverse order to maintain positions when replacing)
55
+ risk_replacements.sort(key=lambda x: x['start'], reverse=True)
56
+
57
+ # Apply risk replacements
58
+ for replacement in risk_replacements:
59
+ start, end = replacement['start'], replacement['end']
60
+ highlighted_text = (
61
+ highlighted_text[:start] +
62
+ replacement['replacement'] +
63
+ highlighted_text[end:]
64
+ )
65
+ processed_positions.extend(range(start, end))
66
+
67
+ # Then highlight jargon terms (but avoid areas already processed)
68
+ jargon_replacements = []
69
+ for term, definition in jargon_definitions.items():
70
+ if len(term) < 3: # Skip very short terms
71
+ continue
72
+
73
+ # Clean definition for HTML
74
+ clean_definition = definition.replace('"', "'").replace('<', '&lt;').replace('>', '&gt;')[:150]
75
+
76
+ # Find all occurrences of the term (case-insensitive)
77
+ pattern = re.compile(r'\b' + re.escape(term) + r'\b', re.IGNORECASE)
78
+
79
+ for match in pattern.finditer(highlighted_text):
80
+ start_pos, end_pos = match.span()
81
+
82
+ # Check if this position overlaps with existing highlights
83
+ if any(pos in processed_positions for pos in range(start_pos, end_pos)):
84
+ continue
85
+
86
+ # Check if we're inside an HTML tag
87
+ before_text = highlighted_text[:start_pos]
88
+ if before_text.count('<span') > before_text.count('</span>'):
89
+ continue # We're inside a span, skip
90
+
91
+ jargon_replacements.append({
92
+ 'start': start_pos,
93
+ 'end': end_pos,
94
+ 'original': match.group(),
95
+ 'replacement': f'<span class="tooltip jargon-term" title="📚 {term}: {clean_definition}">{match.group()}</span>',
96
+ 'type': 'jargon'
97
+ })
98
+
99
+ # Sort jargon replacements by position (reverse order)
100
+ jargon_replacements.sort(key=lambda x: x['start'], reverse=True)
101
+
102
+ # Apply jargon replacements (limit to 5 to avoid clutter)
103
+ for replacement in jargon_replacements[:5]:
104
+ start, end = replacement['start'], replacement['end']
105
+ highlighted_text = (
106
+ highlighted_text[:start] +
107
+ replacement['replacement'] +
108
+ highlighted_text[end:]
109
+ )
110
+
111
+ return highlighted_text
112
+
113
+
114
+ def show_analysis_interface():
115
+ """Display the document analysis interface."""
116
+
117
+ if not st.session_state.get("current_document"):
118
+ st.info("📊 **Document Analysis Page**")
119
+ st.markdown("### No document selected for analysis")
120
+ st.markdown("""
121
+ To view analysis results, you need to:
122
+ 1. **Upload a new document** for instant analysis, or
123
+ 2. **Check your library** for previously analyzed documents
124
+ """)
125
+
126
+ col1, col2, col3 = st.columns(3)
127
+
128
+ with col1:
129
+ if st.button("📄 Upload Document", type="primary", use_container_width=True):
130
+ st.session_state.page = "📄 Upload"
131
+ st.rerun()
132
+
133
+ with col2:
134
+ if st.button("📚 View Library", use_container_width=True):
135
+ st.session_state.page = "� Library"
136
+ st.rerun()
137
+
138
+ with col3:
139
+ if st.button("🏠 Go Home", use_container_width=True):
140
+ st.session_state.page = "🏠 Home"
141
+ st.rerun()
142
+
143
+ # Show recently analyzed documents if available
144
+ if st.session_state.get("documents_library"):
145
+ st.markdown("---")
146
+ st.markdown("### 📋 Recently Analyzed Documents")
147
+ st.markdown("Click on any document below to view its analysis:")
148
+
149
+ for doc in st.session_state.documents_library[-3:]: # Show last 3
150
+ col1, col2 = st.columns([3, 1])
151
+ with col1:
152
+ st.markdown(f"**{doc.get('filename', 'Unknown')}** - {doc.get('document_type', 'Unknown').title()}")
153
+ with col2:
154
+ if st.button(f"View Analysis", key=f"view_{doc.get('id')}", use_container_width=True):
155
+ # Load this document for analysis
156
+ st.session_state.current_document = doc
157
+ st.rerun()
158
+
159
+ return
160
+
161
+ doc = st.session_state.current_document
162
+
163
+ # Header
164
+ st.header("📊 Document Analysis")
165
+ st.markdown(
166
+ f"**File:** {doc.get('filename', 'Unknown')} | **Type:** {doc.get('document_type', 'Unknown').title()}"
167
+ )
168
+
169
+ # If it's a sample document, process it first
170
+ if doc.get("is_sample") and not doc.get("processed"):
171
+ process_sample_document(doc)
172
+ return
173
+
174
+ # Risk Score Dashboard
175
+ show_risk_dashboard(doc)
176
+
177
+ # Document Content Analysis
178
+ col1, col2 = st.columns([1, 1])
179
+
180
+ with col1:
181
+ show_original_document(doc)
182
+
183
+ with col2:
184
+ show_simplified_version(doc)
185
+
186
+ # Additional Analysis Sections
187
+ st.markdown("---")
188
+
189
+ # Tabs for different analysis views
190
+ tab1, tab2, tab3, tab4, tab5 = st.tabs(
191
+ [
192
+ "📋 Summary",
193
+ "⚠️ Risk Factors",
194
+ "📅 Key Dates",
195
+ "💰 Financial Terms",
196
+ "📊 Market Comparison",
197
+ ]
198
+ )
199
+
200
+ with tab1:
201
+ show_document_summary(doc)
202
+
203
+ with tab2:
204
+ show_risk_factors(doc)
205
+
206
+ with tab3:
207
+ show_key_dates(doc)
208
+
209
+ with tab4:
210
+ show_financial_terms(doc)
211
+
212
+ with tab5:
213
+ show_market_comparison(doc)
214
+
215
+ # Action buttons
216
+ st.markdown("---")
217
+ col1, col2, col3 = st.columns(3)
218
+
219
+ with col1:
220
+ if st.button("💬 Ask Questions", use_container_width=True):
221
+ st.session_state.page = "💬 Q&A"
222
+ st.rerun()
223
+
224
+ with col2:
225
+ if st.button("📥 Export Report", use_container_width=True):
226
+ export_report(doc)
227
+
228
+ with col3:
229
+ if st.button("📄 Analyze New Document", use_container_width=True):
230
+ st.session_state.current_document = None
231
+ st.session_state.page = "📄 Upload"
232
+ st.rerun()
233
+
234
+
235
+ def process_sample_document(doc):
236
+ """Process a sample document with simulated AI analysis."""
237
+ st.info("🤖 Processing sample document with AI analysis...")
238
+
239
+ progress_bar = st.progress(0)
240
+ status_text = st.empty()
241
+
242
+ # Simulate processing steps
243
+ steps = [
244
+ ("📄 Extracting text...", 20),
245
+ ("🔍 Detecting document type...", 40),
246
+ ("⚠️ Analyzing risks...", 60),
247
+ ("💬 Simplifying language...", 80),
248
+ ("📋 Generating summary...", 100),
249
+ ]
250
+
251
+ for step_text, progress in steps:
252
+ status_text.text(step_text)
253
+ progress_bar.progress(progress)
254
+ time.sleep(0.5)
255
+
256
+ # Generate mock analysis results
257
+ doc_type = doc.get("document_type", "other")
258
+
259
+ # Mock risk factors based on document type
260
+ risk_factors = generate_mock_risk_factors(doc_type)
261
+ simplified_text = generate_mock_simplified_text(
262
+ doc.get("original_text", ""), doc_type
263
+ )
264
+ summary = generate_mock_summary(doc_type)
265
+
266
+ # Update document with analysis
267
+ doc.update(
268
+ {
269
+ "risk_data": {
270
+ "risk_factors": risk_factors,
271
+ "overall_assessment": f"This {doc_type} document contains several high-risk clauses.",
272
+ },
273
+ "simplified_text": simplified_text,
274
+ "summary": summary,
275
+ "key_points": [
276
+ f"Key point 1 for {doc_type}",
277
+ f"Key point 2 for {doc_type}",
278
+ f"Key point 3 for {doc_type}",
279
+ ],
280
+ "jargon_definitions": {
281
+ "Liability": "Legal responsibility for damages",
282
+ "Arbitration": "Dispute resolution outside of court",
283
+ },
284
+ "processed": True,
285
+ "analysis_timestamp": time.time(),
286
+ }
287
+ )
288
+
289
+ st.session_state.current_document = doc
290
+
291
+ progress_bar.empty()
292
+ status_text.empty()
293
+ st.success("✅ Analysis complete!")
294
+ time.sleep(1)
295
+ st.rerun()
296
+
297
+
298
+ def show_risk_dashboard(doc):
299
+ """Display the risk assessment dashboard."""
300
+ risk_data = doc.get("risk_data", {})
301
+ risk_factors = risk_data.get("risk_factors", [])
302
+
303
+ # Calculate risk score
304
+ risk_score = min(len(risk_factors) * 15, 100)
305
+
306
+ # Risk score gauge
307
+ col1, col2, col3 = st.columns([2, 1, 1])
308
+
309
+ with col1:
310
+ # Create gauge chart
311
+ fig = go.Figure(
312
+ go.Indicator(
313
+ mode="gauge+number+delta",
314
+ value=risk_score,
315
+ domain={"x": [0, 1], "y": [0, 1]},
316
+ title={"text": "Risk Score"},
317
+ delta={"reference": 50},
318
+ gauge={
319
+ "axis": {"range": [None, 100]},
320
+ "bar": {"color": get_risk_color(risk_score)},
321
+ "steps": [
322
+ {"range": [0, 25], "color": "lightgray"},
323
+ {"range": [25, 50], "color": "gray"},
324
+ {"range": [50, 75], "color": "lightcoral"},
325
+ {"range": [75, 100], "color": "red"},
326
+ ],
327
+ "threshold": {
328
+ "line": {"color": "red", "width": 4},
329
+ "thickness": 0.75,
330
+ "value": 90,
331
+ },
332
+ },
333
+ )
334
+ )
335
+
336
+ fig.update_layout(height=300)
337
+ st.plotly_chart(fig, use_container_width=True)
338
+
339
+ with col2:
340
+ st.metric(
341
+ label="Risk Factors Found",
342
+ value=len(risk_factors),
343
+ delta=f"vs average: +{max(0, len(risk_factors) - 3)}",
344
+ )
345
+
346
+ with col3:
347
+ risk_level = (
348
+ "Low"
349
+ if risk_score < 25
350
+ else (
351
+ "Medium"
352
+ if risk_score < 50
353
+ else "High" if risk_score < 75 else "Critical"
354
+ )
355
+ )
356
+ st.metric(
357
+ label="Risk Level",
358
+ value=risk_level,
359
+ delta_color="inverse" if risk_score > 50 else "normal",
360
+ )
361
+
362
+ # Risk assessment summary
363
+ if risk_data.get("overall_assessment"):
364
+ st.info(f"**Assessment:** {risk_data['overall_assessment']}")
365
+
366
+
367
+ def show_original_document(doc):
368
+ """Display the original document with advanced highlighting and hover definitions."""
369
+ st.subheader("📄 Original Document")
370
+
371
+ original_text = doc.get("original_text", "")
372
+ risk_factors = doc.get("risk_data", {}).get("risk_factors", [])
373
+ jargon_definitions = doc.get("jargon_definitions", {})
374
+
375
+ # Advanced highlighting with hover tooltips
376
+ highlighted_text = create_advanced_highlighting(
377
+ original_text, risk_factors, jargon_definitions
378
+ )
379
+
380
+ # Custom CSS for hover tooltips with responsive theming
381
+ st.markdown(
382
+ """
383
+ <style>
384
+ .tooltip {
385
+ position: relative;
386
+ display: inline;
387
+ cursor: help;
388
+ border-radius: 4px;
389
+ padding: 2px 4px;
390
+ margin: 0 1px;
391
+ }
392
+
393
+ /* Risk highlighting with theme-aware backgrounds */
394
+ .risk-critical {
395
+ background-color: rgba(255, 68, 68, 0.2);
396
+ border-left: 4px solid #ff4444;
397
+ padding: 4px 8px;
398
+ border-radius: 4px;
399
+ cursor: help;
400
+ }
401
+ .risk-high {
402
+ background-color: rgba(255, 136, 0, 0.2);
403
+ border-left: 4px solid #ff8800;
404
+ padding: 4px 8px;
405
+ border-radius: 4px;
406
+ cursor: help;
407
+ }
408
+ .risk-medium {
409
+ background-color: rgba(255, 204, 0, 0.2);
410
+ border-left: 4px solid #ffcc00;
411
+ padding: 4px 8px;
412
+ border-radius: 4px;
413
+ cursor: help;
414
+ }
415
+ .risk-low {
416
+ background-color: rgba(68, 170, 68, 0.2);
417
+ border-left: 4px solid #44aa44;
418
+ padding: 4px 8px;
419
+ border-radius: 4px;
420
+ cursor: help;
421
+ }
422
+
423
+ /* Jargon term highlighting */
424
+ .jargon-term {
425
+ background-color: rgba(46, 134, 171, 0.2);
426
+ text-decoration: underline dotted #2e86ab;
427
+ padding: 2px 4px;
428
+ border-radius: 3px;
429
+ cursor: help;
430
+ }
431
+
432
+ /* Enhanced tooltips */
433
+ .tooltip:hover {
434
+ opacity: 0.8;
435
+ }
436
+ </style>
437
+ """,
438
+ unsafe_allow_html=True,
439
+ )
440
+
441
+ st.markdown(highlighted_text, unsafe_allow_html=True)
442
+
443
+ # Scroll area for long documents
444
+ if len(original_text) > 1000:
445
+ with st.expander("View Full Document"):
446
+ st.text_area("Full Text", original_text, height=400, disabled=True)
447
+
448
+
449
+ def show_simplified_version(doc):
450
+ """Display the simplified version of the document."""
451
+ st.subheader("💬 Simplified Version")
452
+
453
+ simplified_text = doc.get("simplified_text", "Processing...")
454
+ st.markdown(simplified_text)
455
+
456
+ # Key points
457
+ key_points = doc.get("key_points", [])
458
+ if key_points:
459
+ st.markdown("**Key Points:**")
460
+ for point in key_points:
461
+ st.markdown(f"• {point}")
462
+
463
+ # Jargon definitions
464
+ jargon_definitions = doc.get("jargon_definitions", {})
465
+ if jargon_definitions:
466
+ st.markdown("**Legal Terms Explained:**")
467
+ for term, definition in jargon_definitions.items():
468
+ st.markdown(f"**{term}:** {definition}")
469
+
470
+
471
+ def show_document_summary(doc):
472
+ """Display document summary."""
473
+ summary = doc.get("summary", "Generating summary...")
474
+ st.markdown(summary)
475
+
476
+ # Document metadata
477
+ st.markdown("### 📊 Document Information")
478
+ col1, col2 = st.columns(2)
479
+
480
+ with col1:
481
+ st.markdown(f"**Type:** {doc.get('document_type', 'Unknown').title()}")
482
+ st.markdown(f"**Filename:** {doc.get('filename', 'Unknown')}")
483
+
484
+ with col2:
485
+ if doc.get("file_size"):
486
+ from ..utils.helpers import format_file_size
487
+
488
+ st.markdown(f"**Size:** {format_file_size(doc['file_size'])}")
489
+
490
+ if doc.get("analysis_timestamp"):
491
+ import datetime
492
+
493
+ analysis_time = datetime.datetime.fromtimestamp(doc["analysis_timestamp"])
494
+ st.markdown(f"**Analyzed:** {analysis_time.strftime('%Y-%m-%d %H:%M')}")
495
+
496
+
497
+ def show_risk_factors(doc):
498
+ """Display detailed risk factors."""
499
+ risk_factors = doc.get("risk_data", {}).get("risk_factors", [])
500
+
501
+ if not risk_factors:
502
+ st.info("No significant risk factors identified in this document.")
503
+ return
504
+
505
+ for i, factor in enumerate(risk_factors):
506
+ severity = factor.get("severity", "low")
507
+
508
+ # Color coding based on severity
509
+ if severity == "critical":
510
+ st.error(f"🚨 **Critical Risk #{i+1}**")
511
+ elif severity == "high":
512
+ st.warning(f"⚠️ **High Risk #{i+1}**")
513
+ elif severity == "medium":
514
+ st.info(f"🟡 **Medium Risk #{i+1}**")
515
+ else:
516
+ st.success(f"🟢 **Low Risk #{i+1}**")
517
+
518
+ st.markdown(f"**Clause:** {factor.get('clause_text', 'N/A')}")
519
+ st.markdown(f"**Category:** {factor.get('category', 'N/A').title()}")
520
+ st.markdown(f"**Explanation:** {factor.get('explanation', 'N/A')}")
521
+
522
+ if factor.get("suggestion"):
523
+ st.markdown(f"**Suggestion:** {factor['suggestion']}")
524
+
525
+ st.markdown("---")
526
+
527
+
528
+ def show_key_dates(doc):
529
+ """Display extracted key dates with timeline visualization."""
530
+ original_text = doc.get("original_text", "")
531
+ dates = extract_key_dates(original_text)
532
+
533
+ if not dates:
534
+ st.info("No specific dates found in this document.")
535
+ return
536
+
537
+ # Enhanced date analysis with timeline
538
+ col1, col2 = st.columns([1, 1])
539
+
540
+ with col1:
541
+ st.markdown("**Important Dates Found:**")
542
+ for date_info in dates:
543
+ st.markdown(f"• **{date_info['date']}** - Context: {date_info['context']}")
544
+
545
+ with col2:
546
+ st.markdown("**Timeline & Obligations:**")
547
+
548
+ # Mock timeline data based on document type
549
+ doc_type = doc.get("document_type", "other")
550
+
551
+ if doc_type == "rental":
552
+ timeline_items = [
553
+ {
554
+ "date": "1st of every month",
555
+ "event": "Rent Payment Due",
556
+ "type": "recurring",
557
+ },
558
+ {
559
+ "date": "30 days notice",
560
+ "event": "Termination Notice Required",
561
+ "type": "condition",
562
+ },
563
+ {
564
+ "date": "End of lease",
565
+ "event": "Security Deposit Return",
566
+ "type": "deadline",
567
+ },
568
+ ]
569
+ elif doc_type == "loan":
570
+ timeline_items = [
571
+ {
572
+ "date": "15th of every month",
573
+ "event": "EMI Payment Due",
574
+ "type": "recurring",
575
+ },
576
+ {
577
+ "date": "7 days after due",
578
+ "event": "Late Fee Applicable",
579
+ "type": "penalty",
580
+ },
581
+ {"date": "24 months", "event": "Loan Maturity", "type": "deadline"},
582
+ ]
583
+ elif doc_type == "employment":
584
+ timeline_items = [
585
+ {
586
+ "date": "Last day of month",
587
+ "event": "Salary Payment",
588
+ "type": "recurring",
589
+ },
590
+ {
591
+ "date": "90 days",
592
+ "event": "Resignation Notice Period",
593
+ "type": "condition",
594
+ },
595
+ {
596
+ "date": "2 years post-termination",
597
+ "event": "Non-compete Expires",
598
+ "type": "deadline",
599
+ },
600
+ ]
601
+ else:
602
+ timeline_items = []
603
+
604
+ for item in timeline_items:
605
+ if item["type"] == "recurring":
606
+ st.markdown(f"🔄 **{item['date']}**: {item['event']}")
607
+ elif item["type"] == "penalty":
608
+ st.markdown(f"⚠️ **{item['date']}**: {item['event']}")
609
+ elif item["type"] == "deadline":
610
+ st.markdown(f"📅 **{item['date']}**: {item['event']}")
611
+ else:
612
+ st.markdown(f"📌 **{item['date']}**: {item['event']}")
613
+
614
+ # Visual timeline chart
615
+ if timeline_items:
616
+ st.markdown("---")
617
+ st.markdown("**📊 Visual Timeline**")
618
+
619
+ # Create timeline visualization
620
+ timeline_df = []
621
+ for i, item in enumerate(timeline_items):
622
+ timeline_df.append(
623
+ {
624
+ "Event": item["event"],
625
+ "Timeline": item["date"],
626
+ "Type": item["type"].title(),
627
+ "Order": i,
628
+ }
629
+ )
630
+
631
+ if timeline_df:
632
+ import pandas as pd
633
+
634
+ df = pd.DataFrame(timeline_df)
635
+
636
+ # Color code by type
637
+ color_map = {
638
+ "Recurring": "#2e86ab",
639
+ "Penalty": "#ff4444",
640
+ "Deadline": "#ff8800",
641
+ "Condition": "#44aa44",
642
+ }
643
+
644
+ fig = px.timeline(
645
+ df,
646
+ x_start=[0] * len(df),
647
+ x_end=[1] * len(df),
648
+ y="Event",
649
+ color="Type",
650
+ color_discrete_map=color_map,
651
+ title="Contract Timeline & Obligations",
652
+ )
653
+ st.plotly_chart(fig, use_container_width=True)
654
+
655
+
656
+ def show_financial_terms(doc):
657
+ """Display extracted financial terms."""
658
+ original_text = doc.get("original_text", "")
659
+ financial_terms = extract_financial_terms(original_text)
660
+
661
+ if not financial_terms:
662
+ st.info("No financial terms identified in this document.")
663
+ return
664
+
665
+ col1, col2 = st.columns(2)
666
+
667
+ with col1:
668
+ if "amounts" in financial_terms:
669
+ st.markdown("**Monetary Amounts:**")
670
+ for amount in financial_terms["amounts"]:
671
+ st.markdown(f"• {amount}")
672
+
673
+ with col2:
674
+ if "percentages" in financial_terms:
675
+ st.markdown("**Percentages/Rates:**")
676
+ for percentage in financial_terms["percentages"]:
677
+ st.markdown(f"• {percentage}")
678
+
679
+ if "interest_rates" in financial_terms:
680
+ st.markdown("**Interest Rates:**")
681
+ for rate in financial_terms["interest_rates"]:
682
+ st.markdown(f"• {rate}")
683
+
684
+
685
+ def export_report(doc):
686
+ """Export analysis report."""
687
+ # Create a simple text report
688
+ report = f"""
689
+ LEGA.AI DOCUMENT ANALYSIS REPORT
690
+ {'='*50}
691
+
692
+ Document: {doc.get('filename', 'Unknown')}
693
+ Type: {doc.get('document_type', 'Unknown').title()}
694
+ Analysis Date: {time.strftime('%Y-%m-%d %H:%M:%S')}
695
+
696
+ SUMMARY:
697
+ {doc.get('summary', 'No summary available')}
698
+
699
+ RISK ASSESSMENT:
700
+ {doc.get('risk_data', {}).get('overall_assessment', 'No risk assessment available')}
701
+
702
+ RISK FACTORS:
703
+ """
704
+
705
+ risk_factors = doc.get("risk_data", {}).get("risk_factors", [])
706
+ for i, factor in enumerate(risk_factors):
707
+ report += f"""
708
+ {i+1}. {factor.get('severity', 'Unknown').upper()} RISK
709
+ Category: {factor.get('category', 'N/A').title()}
710
+ Clause: {factor.get('clause_text', 'N/A')}
711
+ Explanation: {factor.get('explanation', 'N/A')}
712
+ """
713
+
714
+ report += f"""
715
+
716
+ SIMPLIFIED VERSION:
717
+ {doc.get('simplified_text', 'No simplified version available')}
718
+
719
+ KEY POINTS:
720
+ """
721
+
722
+ for point in doc.get("key_points", []):
723
+ report += f"• {point}\n"
724
+
725
+ report += "\n\nGenerated by Lega.AI - Making legal documents accessible"
726
+
727
+ # Clean filename - remove .pdf extension if present
728
+ filename = doc.get('filename', 'document')
729
+ if filename.endswith('.pdf'):
730
+ filename = filename[:-4]
731
+ if filename.endswith('.docx'):
732
+ filename = filename[:-5]
733
+ if filename.endswith('.txt'):
734
+ filename = filename[:-4]
735
+
736
+ # Offer download
737
+ st.download_button(
738
+ label="📥 Download Report",
739
+ data=report,
740
+ file_name=f"lega_ai_report_{filename}.pdf",
741
+ mime="application/pdf",
742
+ )
743
+
744
+ st.success("✅ Report prepared for download!")
745
+
746
+
747
+ def generate_mock_risk_factors(doc_type):
748
+ """Generate mock risk factors for sample documents."""
749
+ if doc_type == "rental":
750
+ return [
751
+ {
752
+ "clause_text": "Late payments will incur a penalty of Rs. 1,000 per day",
753
+ "category": "financial",
754
+ "severity": "high",
755
+ "explanation": "Daily penalties can quickly escalate to substantial amounts",
756
+ "suggestion": "Negotiate a more reasonable penalty structure",
757
+ },
758
+ {
759
+ "clause_text": "Tenant is responsible for all repairs and maintenance",
760
+ "category": "financial",
761
+ "severity": "medium",
762
+ "explanation": "This places unusual burden on tenant for structural repairs",
763
+ "suggestion": "Clarify that structural repairs remain landlord responsibility",
764
+ },
765
+ ]
766
+ elif doc_type == "loan":
767
+ return [
768
+ {
769
+ "clause_text": "24% per annum (APR 28.5% including processing fees)",
770
+ "category": "financial",
771
+ "severity": "critical",
772
+ "explanation": "Interest rate is significantly above market rates",
773
+ "suggestion": "Shop around for better rates from other lenders",
774
+ },
775
+ {
776
+ "clause_text": "Lender may seize collateral immediately upon default",
777
+ "category": "rights",
778
+ "severity": "high",
779
+ "explanation": "No grace period or notice before asset seizure",
780
+ "suggestion": "Negotiate for notice period and cure opportunity",
781
+ },
782
+ ]
783
+ elif doc_type == "employment":
784
+ return [
785
+ {
786
+ "clause_text": "Employee shall not work for any competing company for 2 years",
787
+ "category": "commitment",
788
+ "severity": "high",
789
+ "explanation": "Non-compete period is unusually long and broad",
790
+ "suggestion": "Negotiate shorter period and narrower scope",
791
+ },
792
+ {
793
+ "clause_text": "Company may terminate employment at any time without cause",
794
+ "category": "rights",
795
+ "severity": "medium",
796
+ "explanation": "No job security or notice period for termination",
797
+ "suggestion": "Request notice period and severance terms",
798
+ },
799
+ ]
800
+ else:
801
+ return []
802
+
803
+
804
+ def generate_mock_simplified_text(original_text, doc_type):
805
+ """Generate mock simplified text."""
806
+ if doc_type == "rental":
807
+ return """
808
+ **What this rental agreement means in simple terms:**
809
+
810
+ You're renting a property in Mumbai for ₹25,000 per month. Here are the key things to know:
811
+
812
+ • **Payment:** You must pay rent by the 1st of each month. If you're late, you'll be charged ₹1,000 for each day you're late.
813
+
814
+ • **Security deposit:** You need to pay ₹75,000 upfront as security. This money is hard to get back.
815
+
816
+ • **Repairs:** You're responsible for fixing everything that breaks, even major structural problems.
817
+
818
+ • **Leaving early:** If you want to leave before the lease ends, you lose your security deposit.
819
+
820
+ **Watch out for:** The daily late fees and your responsibility for all repairs are unusual and costly.
821
+ """
822
+ elif doc_type == "loan":
823
+ return """
824
+ **What this loan agreement means in simple terms:**
825
+
826
+ You're borrowing ₹2,00,000 but will pay back ₹3,00,000 total - that's ₹1,00,000 extra in interest and fees.
827
+
828
+ • **Monthly payment:** ₹12,500 every month for 2 years
829
+
830
+ • **Interest rate:** 24% per year (very high - normal rates are 10-15%)
831
+
832
+ • **Late fees:** ₹500 per day if you're late
833
+
834
+ • **Your gold jewelry:** The lender can take it immediately if you miss payments
835
+
836
+ • **Total cost:** You'll pay 50% more than you borrowed
837
+
838
+ **Warning:** This is an expensive loan. The interest rate is much higher than banks typically charge.
839
+ """
840
+ elif doc_type == "employment":
841
+ return """
842
+ **What this employment contract means in simple terms:**
843
+
844
+ You're being hired as a Software Developer for ₹8,00,000 per year. Here's what you need to know:
845
+
846
+ • **Working hours:** 45 hours per week, including weekends when needed
847
+
848
+ • **Salary:** ₹66,667 per month
849
+
850
+ • **If you quit:** You must give 90 days notice
851
+
852
+ • **If they fire you:** They can fire you anytime without reason or notice
853
+
854
+ • **After leaving:** You can't work for competing companies for 2 years
855
+
856
+ • **Side work:** You can't do any other work while employed
857
+
858
+ **Concerns:** The 2-year non-compete and ability to fire without notice are harsh terms.
859
+ """
860
+ else:
861
+ return "Document simplified version will appear here after analysis."
862
+
863
+
864
+ def show_market_comparison(doc):
865
+ """Display market benchmarking and comparison data."""
866
+ doc_type = doc.get("document_type", "other")
867
+
868
+ st.markdown("**Market Context & Benchmarking**")
869
+
870
+ if doc_type == "rental":
871
+ show_rental_market_comparison(doc)
872
+ elif doc_type == "loan":
873
+ show_loan_market_comparison(doc)
874
+ elif doc_type == "employment":
875
+ show_employment_market_comparison(doc)
876
+ else:
877
+ st.info(
878
+ "Market comparison data available for rental, loan, and employment contracts."
879
+ )
880
+
881
+
882
+ def show_rental_market_comparison(doc):
883
+ """Show rental market comparison."""
884
+ col1, col2 = st.columns(2)
885
+
886
+ with col1:
887
+ st.markdown("#### 🏠 Rental Market Analysis")
888
+ st.markdown("**Security Deposit:** ₹75,000")
889
+ st.success("✅ Standard: Typically 2-3 months rent")
890
+
891
+ st.markdown("**Late Penalty:** ₹1,000/day")
892
+ st.error("❌ Above Market: Typical penalties are ₹100-500/day")
893
+
894
+ st.markdown("**Maintenance Responsibility:** Tenant")
895
+ st.warning("⚠️ Unusual: Structural repairs typically landlord's responsibility")
896
+
897
+ with col2:
898
+ st.markdown("#### 📊 Mumbai Rental Benchmarks")
899
+
900
+ # Mock market data
901
+ market_data = {
902
+ "Average Rent (2BHK)": "₹28,000",
903
+ "Security Deposit Range": "₹50,000 - ₹84,000",
904
+ "Standard Late Fee": "₹200/day",
905
+ "Tenant Maintenance": "10% of agreements",
906
+ }
907
+
908
+ for metric, value in market_data.items():
909
+ st.metric(metric, value)
910
+
911
+
912
+ def show_loan_market_comparison(doc):
913
+ """Show loan market comparison."""
914
+ col1, col2 = st.columns(2)
915
+
916
+ with col1:
917
+ st.markdown("#### 💰 Loan Market Analysis")
918
+ st.markdown("**Interest Rate:** 24% per annum")
919
+ st.error("❌ Well Above Market: Bank rates typically 10-15%")
920
+
921
+ st.markdown("**Processing Fee:** ₹10,000")
922
+ st.warning("⚠️ High: Typical processing fees 1-2% of loan amount")
923
+
924
+ st.markdown("**Total Repayment:** ₹3,00,000 for ₹2,00,000")
925
+ st.error("❌ Very High: 50% more than principal")
926
+
927
+ with col2:
928
+ st.markdown("#### 📊 Personal Loan Benchmarks")
929
+
930
+ # Create comparison chart
931
+ fig = px.bar(
932
+ x=["Your Loan", "Bank Average", "NBFC Average"],
933
+ y=[24, 12, 18],
934
+ title="Interest Rate Comparison (%)",
935
+ color=["red", "green", "orange"],
936
+ )
937
+ st.plotly_chart(fig, use_container_width=True)
938
+
939
+
940
+ def show_employment_market_comparison(doc):
941
+ """Show employment market comparison."""
942
+ col1, col2 = st.columns(2)
943
+
944
+ with col1:
945
+ st.markdown("#### 💼 Employment Market Analysis")
946
+ st.markdown("**Non-compete Period:** 2 years")
947
+ st.error("❌ Excessive: Typical non-compete is 6-12 months")
948
+
949
+ st.markdown("**Notice Period:** 90 days")
950
+ st.warning("⚠️ Long: Standard notice is 30-60 days")
951
+
952
+ st.markdown("**At-will Termination:** Yes")
953
+ st.error("❌ Unfavorable: Most contracts provide notice period")
954
+
955
+ with col2:
956
+ st.markdown("#### 📊 IT Industry Standards")
957
+
958
+ standards = {
959
+ "Average Salary (3-5 YOE)": "₹8-12 lakhs",
960
+ "Standard Notice Period": "30-60 days",
961
+ "Typical Non-compete": "6-12 months",
962
+ "Weekend Work": "Occasionally, not mandatory",
963
+ }
964
+
965
+ for standard, value in standards.items():
966
+ st.metric(standard, value)
967
+
968
+
969
+ def generate_mock_summary(doc_type):
970
+ """Generate mock summary."""
971
+ if doc_type == "rental":
972
+ return "This is a residential lease agreement for a property in Mumbai with rent of ₹25,000/month. The agreement contains several tenant-unfavorable terms including high daily late fees, tenant responsibility for all repairs, and forfeiture of security deposit for early termination."
973
+ elif doc_type == "loan":
974
+ return "This is a personal loan agreement for ₹2,00,000 with very high interest rates (24% APR, 28.5% effective). The loan requires gold jewelry as collateral and includes harsh default terms with immediate asset seizure rights."
975
+ elif doc_type == "employment":
976
+ return "This is an employment contract for a Software Developer position with ₹8,00,000 annual salary. The contract includes restrictive terms like a 2-year non-compete clause, at-will termination by employer, and prohibition on side work."
977
+ else:
978
+ return "Document summary will appear here after analysis."
src/pages/library.py ADDED
@@ -0,0 +1,340 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ from typing import List, Dict
4
+ import time
5
+
6
+ from ..utils.helpers import format_file_size, format_timestamp
7
+
8
+
9
+ def show_library_interface():
10
+ """Display the document library interface."""
11
+
12
+ st.header("📚 Document Library")
13
+ st.markdown("Manage and review all your analyzed documents")
14
+
15
+ # Get documents from session state
16
+ documents = st.session_state.get("documents_library", [])
17
+
18
+ if not documents:
19
+ show_empty_library()
20
+ return
21
+
22
+ # Library statistics
23
+ show_library_stats(documents)
24
+
25
+ # Filter and search
26
+ show_library_filters(documents)
27
+
28
+ # Document grid
29
+ show_document_grid(documents)
30
+
31
+
32
+ def show_empty_library():
33
+ """Show empty library state."""
34
+ st.markdown("---")
35
+
36
+ col1, col2, col3 = st.columns([1, 2, 1])
37
+
38
+ with col2:
39
+ st.markdown(
40
+ """
41
+ <div style="text-align: center; padding: 3rem;">
42
+ <h3>📚 Your Library is Empty</h3>
43
+ <p style="color: var(--text-color, #666); opacity: 0.7;">Upload and analyze documents to build your personal legal document library.</p>
44
+ </div>
45
+ """,
46
+ unsafe_allow_html=True,
47
+ )
48
+
49
+ if st.button(
50
+ "📄 Upload Your First Document", type="primary", use_container_width=True
51
+ ):
52
+ st.session_state.page = "📄 Upload"
53
+ st.rerun()
54
+
55
+ # Add sample documents section
56
+ st.markdown("---")
57
+ show_sample_documents_section()
58
+
59
+
60
+ def show_library_stats(documents: List[Dict]):
61
+ """Display library statistics."""
62
+ # Calculate stats
63
+ total_docs = len(documents)
64
+ doc_types = {}
65
+ high_risk_docs = 0
66
+
67
+ for doc in documents:
68
+ doc_type = doc.get("document_type", "other")
69
+ doc_types[doc_type] = doc_types.get(doc_type, 0) + 1
70
+
71
+ if doc.get("risk_score", 0) > 60:
72
+ high_risk_docs += 1
73
+
74
+ # Display stats
75
+ col1, col2, col3, col4 = st.columns(4)
76
+
77
+ with col1:
78
+ st.metric(label="Total Documents", value=total_docs)
79
+
80
+ with col2:
81
+ most_common_type = max(doc_types, key=doc_types.get) if doc_types else "None"
82
+ st.metric(label="Most Common Type", value=most_common_type.title())
83
+
84
+ with col3:
85
+ st.metric(
86
+ label="High Risk Documents",
87
+ value=high_risk_docs,
88
+ delta=(
89
+ f"{high_risk_docs/total_docs*100:.0f}% of total"
90
+ if total_docs > 0
91
+ else "0%"
92
+ ),
93
+ )
94
+
95
+ with col4:
96
+ total_size = sum(doc.get("file_size", 0) for doc in documents)
97
+ st.metric(label="Total Storage", value=format_file_size(total_size))
98
+
99
+
100
+ def show_library_filters(documents: List[Dict]):
101
+ """Display filter and search options."""
102
+ st.markdown("---")
103
+
104
+ col1, col2, col3 = st.columns(3)
105
+
106
+ with col1:
107
+ # Document type filter
108
+ doc_types = ["All"] + list(
109
+ set(doc.get("document_type", "other") for doc in documents)
110
+ )
111
+ selected_type = st.selectbox("Filter by Type", doc_types)
112
+
113
+ with col2:
114
+ # Risk level filter
115
+ risk_levels = [
116
+ "All",
117
+ "Low Risk (0-30)",
118
+ "Medium Risk (31-60)",
119
+ "High Risk (61+)",
120
+ ]
121
+ selected_risk = st.selectbox("Filter by Risk", risk_levels)
122
+
123
+ with col3:
124
+ # Search
125
+ search_term = st.text_input(
126
+ "Search documents", placeholder="Enter filename or content..."
127
+ )
128
+
129
+ # Apply filters
130
+ filtered_docs = documents
131
+
132
+ if selected_type != "All":
133
+ filtered_docs = [
134
+ doc for doc in filtered_docs if doc.get("document_type") == selected_type
135
+ ]
136
+
137
+ if selected_risk != "All":
138
+ if "Low Risk" in selected_risk:
139
+ filtered_docs = [
140
+ doc for doc in filtered_docs if doc.get("risk_score", 0) <= 30
141
+ ]
142
+ elif "Medium Risk" in selected_risk:
143
+ filtered_docs = [
144
+ doc for doc in filtered_docs if 31 <= doc.get("risk_score", 0) <= 60
145
+ ]
146
+ elif "High Risk" in selected_risk:
147
+ filtered_docs = [
148
+ doc for doc in filtered_docs if doc.get("risk_score", 0) > 60
149
+ ]
150
+
151
+ if search_term:
152
+ filtered_docs = [
153
+ doc
154
+ for doc in filtered_docs
155
+ if search_term.lower() in doc.get("filename", "").lower()
156
+ ]
157
+
158
+ # Store filtered docs for grid display
159
+ st.session_state.filtered_documents = filtered_docs
160
+
161
+
162
+ def show_document_grid(documents: List[Dict]):
163
+ """Display documents in a grid layout."""
164
+ filtered_docs = st.session_state.get("filtered_documents", documents)
165
+
166
+ if not filtered_docs:
167
+ st.info("No documents match your filter criteria.")
168
+ return
169
+
170
+ st.markdown("---")
171
+ st.subheader(f"📄 Documents ({len(filtered_docs)})")
172
+
173
+ # Display documents in cards
174
+ for i in range(0, len(filtered_docs), 2):
175
+ col1, col2 = st.columns(2)
176
+
177
+ # First document
178
+ with col1:
179
+ if i < len(filtered_docs):
180
+ show_document_card(filtered_docs[i])
181
+
182
+ # Second document
183
+ with col2:
184
+ if i + 1 < len(filtered_docs):
185
+ show_document_card(filtered_docs[i + 1])
186
+
187
+
188
+ def show_document_card(doc: Dict):
189
+ """Display a single document card."""
190
+ # Risk color
191
+ risk_score = doc.get("risk_score", 0)
192
+ if risk_score > 60:
193
+ risk_color = "🔴"
194
+ risk_label = "High Risk"
195
+ elif risk_score > 30:
196
+ risk_color = "🟠"
197
+ risk_label = "Medium Risk"
198
+ else:
199
+ risk_color = "🟢"
200
+ risk_label = "Low Risk"
201
+
202
+ # Use container for card styling
203
+ with st.container():
204
+ # Header row with filename and risk
205
+ col1, col2 = st.columns([3, 1])
206
+ with col1:
207
+ st.markdown(f"**📄 {doc.get('filename', 'Unknown')}**")
208
+ with col2:
209
+ st.markdown(f"{risk_color} {risk_label}")
210
+
211
+ # Document details
212
+ doc_type = doc.get("document_type", "other").title()
213
+ upload_date = doc.get("upload_date", "Unknown")
214
+ file_size = format_file_size(doc.get("file_size", 0))
215
+
216
+ st.markdown(f"📋 {doc_type} • 📅 {upload_date} • 💾 {file_size}")
217
+
218
+ # Add some spacing
219
+ st.markdown("---")
220
+
221
+ # Action buttons
222
+ col1, col2, col3 = st.columns(3)
223
+
224
+ with col1:
225
+ if st.button("📊 View", key=f"view_{doc['id']}", use_container_width=True):
226
+ load_document_for_analysis(doc["id"])
227
+
228
+ with col2:
229
+ if st.button("💬 Q&A", key=f"qa_{doc['id']}", use_container_width=True):
230
+ load_document_for_qa(doc["id"])
231
+
232
+ with col3:
233
+ if st.button("🗑️ Delete", key=f"delete_{doc['id']}", use_container_width=True):
234
+ delete_document(doc["id"])
235
+
236
+
237
+ def load_document_for_analysis(doc_id: str):
238
+ """Load a document from library for analysis."""
239
+ documents = st.session_state.get("documents_library", [])
240
+
241
+ for doc in documents:
242
+ if doc["id"] == doc_id:
243
+ # Simulate loading the full document data
244
+ st.session_state.current_document = {
245
+ "id": doc["id"],
246
+ "filename": doc["filename"],
247
+ "document_type": doc["document_type"],
248
+ "original_text": f"Sample content for {doc['filename']}...", # In real app, load from storage
249
+ "is_sample": True, # Mark as sample for demo
250
+ "risk_score": doc.get("risk_score", 0),
251
+ }
252
+
253
+ st.session_state.page = "📊 Analysis"
254
+ st.rerun()
255
+ break
256
+
257
+
258
+ def load_document_for_qa(doc_id: str):
259
+ """Load a document from library for Q&A."""
260
+ documents = st.session_state.get("documents_library", [])
261
+
262
+ for doc in documents:
263
+ if doc["id"] == doc_id:
264
+ # Simulate loading the full document data
265
+ st.session_state.current_document = {
266
+ "id": doc["id"],
267
+ "filename": doc["filename"],
268
+ "document_type": doc["document_type"],
269
+ "original_text": f"Sample content for {doc['filename']}...", # In real app, load from storage
270
+ "is_sample": True, # Mark as sample for demo
271
+ }
272
+
273
+ st.session_state.page = "💬 Q&A"
274
+ st.rerun()
275
+ break
276
+
277
+
278
+ def delete_document(doc_id: str):
279
+ """Delete a document from the library."""
280
+ # Confirm deletion
281
+ if st.session_state.get(f"confirm_delete_{doc_id}"):
282
+ documents = st.session_state.get("documents_library", [])
283
+ st.session_state.documents_library = [
284
+ doc for doc in documents if doc["id"] != doc_id
285
+ ]
286
+
287
+ # Clear confirmation state
288
+ del st.session_state[f"confirm_delete_{doc_id}"]
289
+
290
+ st.success("✅ Document deleted from library")
291
+
292
+
293
+ def show_sample_documents_section():
294
+ """Show available sample documents for testing."""
295
+ import os
296
+
297
+ st.subheader("🎯 Try Sample Documents")
298
+ st.markdown("Get started by analyzing our sample legal documents:")
299
+
300
+ # Get available sample documents
301
+ sample_dir = "./sample"
302
+ sample_files = []
303
+ if os.path.exists(sample_dir):
304
+ sample_files = [
305
+ f for f in os.listdir(sample_dir) if f.endswith((".pdf", ".docx", ".txt"))
306
+ ]
307
+
308
+ if sample_files:
309
+ # Create description mapping for better UX
310
+ descriptions = {
311
+ "Employment_Offer_Letter.pdf": "📋 Analyze employment terms, benefits, and obligations",
312
+ "Master_Services_Agreement.pdf": "🤝 Review service agreements and contract terms",
313
+ "Mutual_NDA.pdf": "🔒 Examine confidentiality and non-disclosure clauses",
314
+ "Residential_Lease_Agreement.pdf": "🏠 Check rental terms, deposits, and tenant rights",
315
+ }
316
+
317
+ for filename in sample_files:
318
+ with st.expander(
319
+ f"📄 {filename.replace('_', ' ').replace('.pdf', '')}", expanded=False
320
+ ):
321
+ col1, col2 = st.columns([2, 1])
322
+
323
+ with col1:
324
+ description = descriptions.get(
325
+ filename, "📊 Analyze this legal document for risks and terms"
326
+ )
327
+ st.markdown(description)
328
+
329
+ with col2:
330
+ if st.button(
331
+ "Analyze Now",
332
+ key=f"sample_lib_{filename}",
333
+ use_container_width=True,
334
+ ):
335
+ # Set this as the sample to load and redirect to upload page
336
+ st.session_state.load_sample = filename
337
+ st.session_state.page = "📄 Upload"
338
+ st.rerun()
339
+ else:
340
+ st.info("No sample documents available.")
src/pages/qa_assistant.py ADDED
@@ -0,0 +1,239 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from typing import List, Dict
3
+ import time
4
+
5
+ from ..services.ai_analyzer import AIAnalyzer
6
+ from ..services.vector_store import VectorStoreService
7
+
8
+
9
+ def show_qa_interface():
10
+ """Display the Q&A assistant interface."""
11
+
12
+ if not st.session_state.get("current_document"):
13
+ st.warning("⚠️ No document loaded. Please upload and analyze a document first.")
14
+ if st.button("📄 Go to Upload"):
15
+ st.session_state.page = "📄 Upload"
16
+ st.rerun()
17
+ return
18
+
19
+ doc = st.session_state.current_document
20
+
21
+ # Header
22
+ st.header("💬 Q&A Assistant")
23
+ st.markdown(f"Ask questions about **{doc.get('filename', 'your document')}**")
24
+
25
+ # Initialize chat history
26
+ if "qa_history" not in st.session_state:
27
+ st.session_state.qa_history = []
28
+
29
+ # Chat interface
30
+ chat_container = st.container()
31
+
32
+ with chat_container:
33
+ # Display chat history
34
+ for i, qa in enumerate(st.session_state.qa_history):
35
+ # User message
36
+ with st.chat_message("user"):
37
+ st.markdown(qa["question"])
38
+
39
+ # Assistant response
40
+ with st.chat_message("assistant"):
41
+ st.markdown(qa["answer"])
42
+
43
+ # Suggested questions
44
+ st.markdown("### 💡 Suggested Questions")
45
+
46
+ doc_type = doc.get("document_type", "other")
47
+ suggested_questions = get_suggested_questions(doc_type)
48
+
49
+ col1, col2 = st.columns(2)
50
+
51
+ for i, question in enumerate(suggested_questions):
52
+ col = col1 if i % 2 == 0 else col2
53
+ with col:
54
+ if st.button(question, key=f"suggested_{i}", use_container_width=True):
55
+ ask_question(question, doc)
56
+
57
+ # Chat input
58
+ st.markdown("### ❓ Ask Your Question")
59
+
60
+ with st.form("question_form", clear_on_submit=True):
61
+ user_question = st.text_input(
62
+ "Type your question here...",
63
+ placeholder="e.g., What happens if I terminate this contract early?",
64
+ label_visibility="collapsed",
65
+ )
66
+
67
+ submitted = st.form_submit_button("Send", use_container_width=True)
68
+
69
+ if submitted and user_question.strip():
70
+ ask_question(user_question, doc)
71
+
72
+ # Quick actions
73
+ st.markdown("---")
74
+ col1, col2, col3 = st.columns(3)
75
+
76
+ with col1:
77
+ if st.button("📊 Back to Analysis", use_container_width=True):
78
+ st.session_state.page = "📊 Analysis"
79
+ st.rerun()
80
+
81
+ with col2:
82
+ if st.button("🗑️ Clear Chat", use_container_width=True):
83
+ st.session_state.qa_history = []
84
+ st.rerun()
85
+
86
+ with col3:
87
+ if st.button("📥 Export Chat", use_container_width=True):
88
+ export_chat_history()
89
+
90
+
91
+ def ask_question(question: str, doc: Dict):
92
+ """Process a question and get AI response."""
93
+ try:
94
+ # Show thinking indicator
95
+ with st.spinner("🤔 Thinking..."):
96
+ # Initialize AI analyzer
97
+ ai_analyzer = AIAnalyzer()
98
+
99
+ # Get document type
100
+ from ..models.document import DocumentType
101
+
102
+ doc_type = DocumentType(doc.get("document_type", "other"))
103
+
104
+ # Get answer from AI
105
+ answer = ai_analyzer.answer_question(
106
+ question=question,
107
+ document_text=doc.get("original_text", ""),
108
+ document_type=doc_type,
109
+ )
110
+
111
+ # Add to chat history
112
+ st.session_state.qa_history.append(
113
+ {"question": question, "answer": answer, "timestamp": time.time()}
114
+ )
115
+
116
+ # Rerun to show the new Q&A
117
+ st.rerun()
118
+
119
+ except Exception as e:
120
+ st.error(f"❌ Error processing question: {str(e)}")
121
+
122
+
123
+ def get_suggested_questions(doc_type: str) -> List[str]:
124
+ """Get suggested questions based on document type."""
125
+
126
+ questions_by_type = {
127
+ "rental": [
128
+ "What is the monthly rent amount?",
129
+ "What happens if I pay rent late?",
130
+ "How much is the security deposit?",
131
+ "Can I terminate the lease early?",
132
+ "Who is responsible for repairs?",
133
+ "What are the landlord's obligations?",
134
+ "Are pets allowed in the property?",
135
+ "What happens if I damage the property?",
136
+ ],
137
+ "loan": [
138
+ "What is the total amount I will repay?",
139
+ "What is the effective interest rate?",
140
+ "What happens if I miss a payment?",
141
+ "What collateral is required?",
142
+ "Can I repay the loan early?",
143
+ "What are the processing fees?",
144
+ "How is the interest calculated?",
145
+ "What happens in case of default?",
146
+ ],
147
+ "employment": [
148
+ "What is my total compensation package?",
149
+ "How many hours am I expected to work?",
150
+ "Can the company terminate me without notice?",
151
+ "What are the non-compete restrictions?",
152
+ "Am I allowed to work other jobs?",
153
+ "What benefits am I entitled to?",
154
+ "How much notice must I give to resign?",
155
+ "Who owns the intellectual property I create?",
156
+ ],
157
+ "nda": [
158
+ "What information is considered confidential?",
159
+ "How long does the confidentiality last?",
160
+ "What are the penalties for disclosure?",
161
+ "Can I discuss this agreement with others?",
162
+ "What happens after the agreement ends?",
163
+ "Are there any exceptions to confidentiality?",
164
+ ],
165
+ "service": [
166
+ "What services are included in this agreement?",
167
+ "What is the payment schedule?",
168
+ "How can this agreement be terminated?",
169
+ "What are the deliverables and deadlines?",
170
+ "Who is responsible for what costs?",
171
+ "What happens if the work is unsatisfactory?",
172
+ ],
173
+ }
174
+
175
+ return questions_by_type.get(
176
+ doc_type,
177
+ [
178
+ "What are the main obligations for each party?",
179
+ "What are the key financial terms?",
180
+ "How can this agreement be terminated?",
181
+ "What are the potential risks for me?",
182
+ "What should I be most careful about?",
183
+ "Are there any unusual or concerning clauses?",
184
+ ],
185
+ )
186
+
187
+
188
+ def export_chat_history():
189
+ """Export the chat history as a text file."""
190
+ if not st.session_state.qa_history:
191
+ st.warning("No chat history to export.")
192
+ return
193
+
194
+ doc = st.session_state.current_document
195
+
196
+ # Create chat export
197
+ export_text = f"""
198
+ LEGA.AI Q&A SESSION EXPORT
199
+ {'='*50}
200
+
201
+ Document: {doc.get('filename', 'Unknown')}
202
+ Document Type: {doc.get('document_type', 'Unknown').title()}
203
+ Export Date: {time.strftime('%Y-%m-%d %H:%M:%S')}
204
+
205
+ QUESTIONS & ANSWERS:
206
+ {'='*50}
207
+
208
+ """
209
+
210
+ for i, qa in enumerate(st.session_state.qa_history):
211
+ export_text += f"""
212
+ Q{i+1}: {qa['question']}
213
+
214
+ A{i+1}: {qa['answer']}
215
+
216
+ {'-'*30}
217
+
218
+ """
219
+
220
+ export_text += "\nGenerated by Lega.AI - Making legal documents accessible"
221
+
222
+ # Clean filename - remove .pdf extension if present
223
+ filename = doc.get("filename", "document")
224
+ if filename.endswith(".pdf"):
225
+ filename = filename[:-4]
226
+ if filename.endswith(".docx"):
227
+ filename = filename[:-5]
228
+ if filename.endswith(".txt"):
229
+ filename = filename[:-4]
230
+
231
+ # Offer download
232
+ st.download_button(
233
+ label="📥 Download Chat History",
234
+ data=export_text,
235
+ file_name=f"lega_ai_qa_{filename}.pdf",
236
+ mime="application/pdf",
237
+ )
238
+
239
+ st.success("✅ Chat history prepared for download!")
src/pages/settings.py ADDED
@@ -0,0 +1,304 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from src.utils.config import config
3
+
4
+
5
+ def show_settings_interface():
6
+ """Display the settings interface."""
7
+
8
+ st.header("⚙️ Settings")
9
+ st.markdown("Configure your Lega.AI experience")
10
+
11
+ # Tabs for different settings categories
12
+ tab1, tab2, tab3, tab4 = st.tabs(
13
+ ["🔑 API Keys", "🎨 Preferences", "📊 Usage", "ℹ️ About"]
14
+ )
15
+
16
+ with tab1:
17
+ show_api_settings()
18
+
19
+ with tab2:
20
+ show_preference_settings()
21
+
22
+ with tab3:
23
+ show_usage_stats()
24
+
25
+ with tab4:
26
+ show_about_info()
27
+
28
+
29
+ def show_api_settings():
30
+ """Display API key configuration."""
31
+ st.subheader("🔑 API Configuration")
32
+
33
+ # Check current API key status
34
+ api_key_configured = bool(
35
+ config.GOOGLE_API_KEY and config.GOOGLE_API_KEY != "your-google-api-key-here"
36
+ )
37
+
38
+ if api_key_configured:
39
+ st.success("✅ Google AI API key is configured")
40
+ else:
41
+ st.warning("⚠️ Google AI API key not configured")
42
+ st.markdown(
43
+ """
44
+ To use Lega.AI's AI features, you need to configure your Google AI API key:
45
+
46
+ 1. Go to [Google AI Studio](https://makersuite.google.com/)
47
+ 2. Create a new API key
48
+ 3. Copy the key and add it to your `.env` file
49
+ 4. Set `GOOGLE_API_KEY=your_actual_api_key`
50
+ 5. Restart the application
51
+ """
52
+ )
53
+
54
+ # API key input (for demonstration)
55
+ st.markdown("---")
56
+ st.subheader("🔧 Update API Key")
57
+
58
+ with st.form("api_key_form"):
59
+ new_api_key = st.text_input(
60
+ "Google AI API Key",
61
+ type="password",
62
+ placeholder="Enter your Google AI API key",
63
+ help="This will be saved to your environment configuration",
64
+ )
65
+
66
+ submitted = st.form_submit_button("Update API Key")
67
+
68
+ if submitted:
69
+ if new_api_key.strip():
70
+ st.success(
71
+ "✅ API key updated! Please restart the application for changes to take effect."
72
+ )
73
+ st.info("💡 Don't forget to update your `.env` file with the new key.")
74
+ else:
75
+ st.error("❌ Please enter a valid API key")
76
+
77
+
78
+ def show_preference_settings():
79
+ """Display user preference settings."""
80
+ st.subheader("🎨 User Preferences")
81
+
82
+ # Language settings
83
+ st.markdown("#### 🌐 Language & Region")
84
+
85
+ col1, col2 = st.columns(2)
86
+
87
+ with col1:
88
+ language = st.selectbox(
89
+ "Interface Language",
90
+ ["English", "Hindi", "Tamil", "Telugu", "Gujarati"],
91
+ help="Language for the user interface",
92
+ )
93
+
94
+ with col2:
95
+ region = st.selectbox(
96
+ "Legal Region",
97
+ ["India", "Maharashtra", "Delhi", "Karnataka", "Tamil Nadu"],
98
+ help="Legal jurisdiction for document analysis",
99
+ )
100
+
101
+ # Analysis preferences
102
+ st.markdown("#### 📊 Analysis Preferences")
103
+
104
+ risk_sensitivity = st.slider(
105
+ "Risk Detection Sensitivity",
106
+ min_value=1,
107
+ max_value=5,
108
+ value=3,
109
+ help="1 = Only critical risks, 5 = All potential concerns",
110
+ )
111
+
112
+ simplification_level = st.selectbox(
113
+ "Text Simplification Level",
114
+ ["Basic", "Intermediate", "Advanced"],
115
+ index=1,
116
+ help="How much to simplify legal language",
117
+ )
118
+
119
+ show_suggestions = st.checkbox(
120
+ "Show improvement suggestions",
121
+ value=True,
122
+ help="Display suggestions for problematic clauses",
123
+ )
124
+
125
+ # Notification preferences
126
+ st.markdown("#### 🔔 Notifications")
127
+
128
+ email_notifications = st.checkbox(
129
+ "Email notifications for analysis completion", value=False
130
+ )
131
+
132
+ browser_notifications = st.checkbox("Browser notifications", value=True)
133
+
134
+ # Save preferences
135
+ if st.button("💾 Save Preferences", type="primary"):
136
+ # In a real app, save to user profile/database
137
+ st.success("✅ Preferences saved successfully!")
138
+
139
+
140
+ def show_usage_stats():
141
+ """Display usage statistics."""
142
+ st.subheader("📊 Usage Statistics")
143
+
144
+ # Mock usage data
145
+ col1, col2, col3 = st.columns(3)
146
+
147
+ with col1:
148
+ st.metric(label="Documents Analyzed", value="47", delta="12 this month")
149
+
150
+ with col2:
151
+ st.metric(label="Questions Asked", value="156", delta="23 this week")
152
+
153
+ with col3:
154
+ st.metric(label="Risks Identified", value="89", delta="High: 12, Medium: 31")
155
+
156
+ # Usage by document type
157
+ st.markdown("#### 📄 Analysis by Document Type")
158
+
159
+ usage_data = {
160
+ "Rental Agreements": 18,
161
+ "Loan Contracts": 12,
162
+ "Employment Contracts": 8,
163
+ "Service Agreements": 6,
164
+ "NDAs": 3,
165
+ }
166
+
167
+ for doc_type, count in usage_data.items():
168
+ progress = count / max(usage_data.values())
169
+ st.markdown(f"**{doc_type}**: {count} documents")
170
+ st.progress(progress)
171
+
172
+ # Storage usage
173
+ st.markdown("#### 💾 Storage Usage")
174
+
175
+ storage_used = 2.4 # GB
176
+ storage_limit = 5.0 # GB
177
+
178
+ st.progress(storage_used / storage_limit)
179
+ st.markdown(
180
+ f"**{storage_used:.1f} GB** used of **{storage_limit:.1f} GB** available"
181
+ )
182
+
183
+ # Account tier
184
+ st.markdown("#### 👤 Account Information")
185
+
186
+ col1, col2 = st.columns(2)
187
+
188
+ with col1:
189
+ st.info("**Plan**: Free Tier")
190
+ st.markdown(
191
+ """
192
+ - 10 documents per month
193
+ - Basic AI analysis
194
+ - Email support
195
+ """
196
+ )
197
+
198
+ with col2:
199
+ st.markdown("**Upgrade Benefits**:")
200
+ st.markdown(
201
+ """
202
+ - Unlimited documents
203
+ - Advanced AI features
204
+ - Priority support
205
+ - Bulk processing
206
+ """
207
+ )
208
+
209
+ if st.button("🚀 Upgrade to Pro", type="primary"):
210
+ st.info("Upgrade functionality would be implemented here")
211
+
212
+
213
+ def show_about_info():
214
+ """Display about information."""
215
+ st.subheader("ℹ️ About Lega.AI")
216
+
217
+ # App info
218
+ col1, col2 = st.columns([2, 1])
219
+
220
+ with col1:
221
+ st.markdown(
222
+ """
223
+ **Lega.AI** is an AI-powered platform that makes legal documents accessible to everyone.
224
+
225
+ ### 🎯 Mission
226
+ To democratize legal document understanding by providing instant AI analysis,
227
+ risk assessment, and plain language explanations.
228
+
229
+ ### ✨ Features
230
+ - **Document Analysis**: Upload and analyze any legal document
231
+ - **Risk Assessment**: Color-coded risk scoring with explanations
232
+ - **Plain Language**: Convert legal jargon to simple English
233
+ - **Q&A Assistant**: Ask questions about your documents
234
+ - **Smart Search**: Find similar clauses and documents
235
+ - **Export Reports**: Generate comprehensive analysis reports
236
+
237
+ ### 🛡️ Privacy & Security
238
+ - Your documents are processed securely
239
+ - No data is shared with third parties
240
+ - Local vector storage for document similarity
241
+ - GDPR compliant data handling
242
+ """
243
+ )
244
+
245
+ with col2:
246
+ st.markdown(
247
+ """
248
+ ### 📊 Version Info
249
+ **Version**: 1.0.0
250
+ **Build**: 2025.09.21
251
+ **Engine**: Google Gemini
252
+
253
+ ### 🔧 Tech Stack
254
+ - **Frontend**: Streamlit
255
+ - **AI/ML**: LangChain + Gemini
256
+ - **Vector DB**: Chroma
257
+ - **Embeddings**: Google Embeddings
258
+
259
+ ### 📞 Support
260
+ - **Email**: support@lega.ai
261
+ - **Docs**: github.com/codernoahx/Lega.AI/README.md
262
+ - **GitHub**: github.com/codernoahx/Lega.AI
263
+ """
264
+ )
265
+
266
+ # Legal notice
267
+ st.markdown("---")
268
+ st.markdown(
269
+ """
270
+ ### ⚖️ Legal Notice
271
+
272
+ **Disclaimer**: Lega.AI provides AI-powered analysis for informational purposes only.
273
+ This is not legal advice. Always consult with qualified legal professionals for
274
+ important legal matters.
275
+
276
+ **Data Usage**: By using this service, you agree to our Terms of Service and Privacy Policy.
277
+ Your documents are processed to provide analysis but are not used to train AI models.
278
+
279
+ © 2025 Lega.AI. All rights reserved.
280
+ """
281
+ )
282
+
283
+ # Feedback section
284
+ st.markdown("---")
285
+ st.subheader("💬 Feedback")
286
+
287
+ with st.form("feedback_form"):
288
+ feedback_type = st.selectbox(
289
+ "Feedback Type",
290
+ ["General Feedback", "Bug Report", "Feature Request", "Question"],
291
+ )
292
+
293
+ feedback_text = st.text_area(
294
+ "Your Feedback",
295
+ placeholder="Tell us what you think or report any issues...",
296
+ height=100,
297
+ )
298
+
299
+ submitted = st.form_submit_button("Send Feedback")
300
+
301
+ if submitted and feedback_text.strip():
302
+ st.success("✅ Thank you for your feedback! We'll review it soon.")
303
+ elif submitted:
304
+ st.error("❌ Please enter your feedback before submitting.")
src/pages/upload.py ADDED
@@ -0,0 +1,342 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import os
3
+ from typing import Optional
4
+ import time
5
+
6
+ from ..services.document_processor import DocumentProcessor
7
+ from ..services.ai_analyzer import AIAnalyzer
8
+ from ..services.vector_store import VectorStoreService
9
+ from ..models.document import DocumentType
10
+ from ..utils.helpers import generate_document_id, sanitize_filename, format_file_size
11
+ from ..utils.logger import log_document_upload
12
+
13
+
14
+ def show_upload_interface():
15
+ """Display the document upload interface."""
16
+ st.header("📄 Upload Legal Document")
17
+ st.markdown(
18
+ "Upload your legal document for instant AI analysis and risk assessment."
19
+ )
20
+
21
+ # Check if we should auto-load a sample document
22
+ if st.session_state.get("load_sample"):
23
+ filename = st.session_state.load_sample
24
+ del st.session_state.load_sample # Clear the flag
25
+ load_sample_document_from_file(filename)
26
+ return
27
+
28
+ # File uploader
29
+ uploaded_file = st.file_uploader(
30
+ "Choose a file",
31
+ type=["pdf", "txt", "docx"], # Added docx support
32
+ help="Supported formats: PDF, TXT, DOCX (Max 10MB)",
33
+ key="document_uploader",
34
+ )
35
+
36
+ if uploaded_file is not None:
37
+ # Display file info
38
+ file_size = len(uploaded_file.getvalue())
39
+
40
+ # Check file size limit
41
+ max_size = 10 * 1024 * 1024 # 10MB
42
+ if file_size > max_size:
43
+ st.error(f"❌ File too large. Maximum size is {format_file_size(max_size)}")
44
+ return
45
+
46
+ st.success(f"📁 **{uploaded_file.name}** ({format_file_size(file_size)})")
47
+
48
+ # Process button
49
+ if st.button("🔍 Analyze Document", type="primary", use_container_width=True):
50
+ process_uploaded_document(uploaded_file)
51
+
52
+ # Sample documents section
53
+ st.markdown("---")
54
+ st.subheader("📋 Try Sample Documents")
55
+ st.markdown("Don't have a document handy? Try one of our real sample documents:")
56
+
57
+ # Get available sample documents
58
+ sample_dir = "./sample"
59
+ sample_files = []
60
+ if os.path.exists(sample_dir):
61
+ sample_files = [f for f in os.listdir(sample_dir) if f.endswith(('.pdf', '.docx', '.txt'))]
62
+
63
+ if sample_files:
64
+ col1, col2 = st.columns(2)
65
+
66
+ for i, filename in enumerate(sample_files):
67
+ col = col1 if i % 2 == 0 else col2
68
+
69
+ with col:
70
+ # Create descriptive button names
71
+ display_name = filename.replace('_', ' ').replace('.pdf', '').replace('.docx', '').replace('.txt', '')
72
+ display_name = display_name.title()
73
+
74
+ if st.button(f"📄 {display_name}", use_container_width=True, key=f"sample_{i}"):
75
+ load_sample_document_from_file(filename)
76
+ else:
77
+ st.info("No sample documents found in the sample directory.")
78
+
79
+
80
+ def process_uploaded_document(uploaded_file):
81
+ """Process the uploaded document with AI analysis."""
82
+ try:
83
+ # Initialize processors
84
+ doc_processor = DocumentProcessor()
85
+ ai_analyzer = AIAnalyzer()
86
+ vector_store = VectorStoreService()
87
+
88
+ # Create progress tracking
89
+ progress_bar = st.progress(0)
90
+ status_text = st.empty()
91
+
92
+ # Step 1: Extract text
93
+ status_text.text("📄 Extracting text from document...")
94
+ progress_bar.progress(20)
95
+
96
+ file_content = uploaded_file.getvalue()
97
+ text = doc_processor.extract_text(file_content, uploaded_file.name)
98
+
99
+ if not text.strip():
100
+ st.error(
101
+ "❌ Could not extract text from the document. Please try a different file."
102
+ )
103
+ progress_bar.empty()
104
+ status_text.empty()
105
+ return
106
+
107
+ progress_bar.progress(40)
108
+
109
+ # Step 2: Detect document type
110
+ status_text.text("🔍 Analyzing document type...")
111
+ document_type = doc_processor.detect_document_type(text)
112
+ progress_bar.progress(50)
113
+
114
+ # Step 3: Risk analysis
115
+ status_text.text("⚠️ Performing risk assessment...")
116
+ risk_data = ai_analyzer.analyze_document_risk(text, document_type)
117
+ progress_bar.progress(70)
118
+
119
+ # Step 4: Text simplification
120
+ status_text.text("💬 Simplifying legal language...")
121
+ simplified_data = ai_analyzer.simplify_text(text, document_type)
122
+ progress_bar.progress(85)
123
+
124
+ # Step 5: Generate summary
125
+ status_text.text("📋 Generating summary...")
126
+ summary = ai_analyzer.generate_summary(text, document_type)
127
+
128
+ # Step 6: Add to vector store
129
+ status_text.text("💾 Storing document for search...")
130
+ doc_id = generate_document_id()
131
+ vector_store.add_document(
132
+ document_id=doc_id,
133
+ text=text,
134
+ metadata={
135
+ "filename": uploaded_file.name,
136
+ "document_type": document_type.value,
137
+ "upload_date": time.strftime("%Y-%m-%d %H:%M:%S"),
138
+ },
139
+ )
140
+
141
+ progress_bar.progress(100)
142
+
143
+ # Complete
144
+ status_text.text("✅ Analysis complete!")
145
+ time.sleep(1)
146
+ progress_bar.empty()
147
+ status_text.empty()
148
+
149
+ # Store results in session state
150
+ st.session_state.current_document = {
151
+ "id": doc_id,
152
+ "filename": uploaded_file.name,
153
+ "document_type": document_type.value,
154
+ "original_text": text,
155
+ "simplified_text": simplified_data.get("simplified_text", ""),
156
+ "summary": summary,
157
+ "risk_data": risk_data,
158
+ "key_points": simplified_data.get("key_points", []),
159
+ "jargon_definitions": simplified_data.get("jargon_definitions", {}),
160
+ "analysis_timestamp": time.time(),
161
+ "file_size": len(file_content),
162
+ }
163
+
164
+ # Add to documents library
165
+ if "documents_library" not in st.session_state:
166
+ st.session_state.documents_library = []
167
+
168
+ st.session_state.documents_library.append(
169
+ {
170
+ "id": doc_id,
171
+ "filename": uploaded_file.name,
172
+ "document_type": document_type.value,
173
+ "upload_date": time.strftime("%Y-%m-%d %H:%M:%S"),
174
+ "file_size": len(file_content),
175
+ "risk_score": len(risk_data.get("risk_factors", []))
176
+ * 10, # Simple risk score
177
+ }
178
+ )
179
+
180
+ # Log the upload
181
+ log_document_upload(uploaded_file.name, len(file_content))
182
+
183
+ # Show success and redirect to analysis page
184
+ st.success("🎉 Document analysis completed! Redirecting to results...")
185
+
186
+ # Set page state for redirection
187
+ st.session_state.page = "📊 Analysis"
188
+
189
+ time.sleep(2)
190
+ st.rerun()
191
+
192
+ except Exception as e:
193
+ st.error(f"❌ Error processing document: {str(e)}")
194
+ progress_bar.empty()
195
+ status_text.empty()
196
+
197
+
198
+ def load_sample_document_from_file(filename: str):
199
+ """Load an actual sample document from the sample directory."""
200
+ try:
201
+ sample_path = os.path.join("./sample", filename)
202
+
203
+ if not os.path.exists(sample_path):
204
+ st.error(f"❌ Sample file not found: {filename}")
205
+ return
206
+
207
+ # Read the file
208
+ with open(sample_path, 'rb') as f:
209
+ file_content = f.read()
210
+
211
+ # Create a mock uploaded file object
212
+ class MockUploadedFile:
213
+ def __init__(self, content, name):
214
+ self._content = content
215
+ self.name = name
216
+
217
+ def getvalue(self):
218
+ return self._content
219
+
220
+ mock_file = MockUploadedFile(file_content, filename)
221
+
222
+ st.success(f"📄 Loading sample document: **{filename}**")
223
+
224
+ # Process the sample document
225
+ process_uploaded_document(mock_file)
226
+
227
+ except Exception as e:
228
+ st.error(f"❌ Error loading sample document: {str(e)}")
229
+
230
+
231
+ def load_sample_document(doc_type: str):
232
+ """Load a sample document for demonstration."""
233
+ sample_docs = {
234
+ "rental": {
235
+ "filename": "sample_rental_agreement.pdf",
236
+ "type": "rental",
237
+ "text": """
238
+ RESIDENTIAL LEASE AGREEMENT
239
+
240
+ This Lease Agreement is entered into between John Smith (Landlord) and Jane Doe (Tenant)
241
+ for the property located at 123 Main Street, Mumbai, Maharashtra.
242
+
243
+ RENT: Tenant agrees to pay Rs. 25,000 per month, due on the 1st of each month.
244
+ Late payments will incur a penalty of Rs. 1,000 per day.
245
+
246
+ SECURITY DEPOSIT: Tenant shall pay a security deposit of Rs. 75,000, which is
247
+ non-refundable except for damage assessment.
248
+
249
+ TERMINATION: Either party may terminate this lease with 30 days written notice.
250
+ Early termination by Tenant results in forfeiture of security deposit.
251
+
252
+ MAINTENANCE: Tenant is responsible for all repairs and maintenance, including
253
+ structural repairs, regardless of cause.
254
+
255
+ The property is leased "as-is" with no warranties. Landlord is not liable for
256
+ any damages or injuries occurring on the premises.
257
+ """,
258
+ },
259
+ "loan": {
260
+ "filename": "sample_loan_agreement.pdf",
261
+ "type": "loan",
262
+ "text": """
263
+ PERSONAL LOAN AGREEMENT
264
+
265
+ Borrower: Rajesh Kumar
266
+ Lender: QuickCash Financial Services Pvt Ltd
267
+ Principal Amount: Rs. 2,00,000
268
+
269
+ INTEREST RATE: 24% per annum (APR 28.5% including processing fees)
270
+
271
+ REPAYMENT: 24 monthly installments of Rs. 12,500 each
272
+ Total repayment amount: Rs. 3,00,000
273
+
274
+ LATE PAYMENT PENALTY: Rs. 500 per day for any late payment
275
+
276
+ DEFAULT: If payment is late by more than 7 days, the entire remaining
277
+ balance becomes immediately due and payable.
278
+
279
+ COLLATERAL: Borrower pledges gold ornaments worth Rs. 2,50,000 as security.
280
+ Lender may seize collateral immediately upon default.
281
+
282
+ ARBITRATION: All disputes shall be resolved through binding arbitration.
283
+ Borrower waives right to jury trial.
284
+
285
+ Processing fee: Rs. 10,000 (non-refundable)
286
+ Documentation charges: Rs. 5,000
287
+ """,
288
+ },
289
+ "employment": {
290
+ "filename": "sample_employment_contract.docx", # Changed to DOCX
291
+ "type": "employment",
292
+ "text": """
293
+ EMPLOYMENT CONTRACT
294
+
295
+ Employee: Priya Sharma
296
+ Company: TechCorp India Private Limited
297
+ Position: Software Developer
298
+ Start Date: January 1, 2024
299
+
300
+ SALARY: Rs. 8,00,000 per annum, payable monthly
301
+
302
+ WORKING HOURS: 45 hours per week, including mandatory weekend work when required
303
+
304
+ NON-COMPETE: Employee shall not work for any competing company for 2 years
305
+ after termination, within India or globally.
306
+
307
+ CONFIDENTIALITY: Employee agrees to maintain strict confidentiality of all
308
+ company information indefinitely, even after termination.
309
+
310
+ TERMINATION: Company may terminate employment at any time without cause or notice.
311
+ Employee must provide 90 days notice to resign.
312
+
313
+ NO MOONLIGHTING: Employee shall not engage in any other work or business
314
+ activities during employment.
315
+
316
+ INTELLECTUAL PROPERTY: All work created by employee belongs entirely to company,
317
+ including personal projects done outside work hours.
318
+ """,
319
+ },
320
+ }
321
+
322
+ if doc_type in sample_docs:
323
+ sample = sample_docs[doc_type]
324
+ from ..utils.helpers import generate_document_id
325
+
326
+ # Store in session state
327
+ doc_id = generate_document_id()
328
+ st.session_state.current_document = {
329
+ "id": doc_id,
330
+ "filename": sample["filename"],
331
+ "document_type": sample["type"],
332
+ "original_text": sample["text"],
333
+ "is_sample": True,
334
+ }
335
+
336
+ st.success(f"📄 Loaded sample {doc_type} document. Processing...")
337
+
338
+ # Simulate processing for demo
339
+ with st.spinner("Analyzing sample document..."):
340
+ time.sleep(2)
341
+
342
+ st.rerun()
src/services/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ # src/services/__init__.py
src/services/ai_analyzer.py ADDED
@@ -0,0 +1,378 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import List, Dict, Any, Optional
2
+ from langchain_google_genai import ChatGoogleGenerativeAI
3
+ from langchain.prompts import PromptTemplate
4
+ from langchain.chains import LLMChain
5
+ import json
6
+ import time
7
+
8
+ from src.utils.config import config
9
+ from src.utils.logger import log_error, log_analysis_start, log_analysis_complete
10
+ from src.models.document import (
11
+ DocumentType,
12
+ RiskLevel,
13
+ RiskCategory,
14
+ RiskFactor,
15
+ ClausePosition,
16
+ )
17
+ from src.utils.helpers import (
18
+ calculate_risk_score,
19
+ extract_key_dates,
20
+ extract_financial_terms,
21
+ )
22
+
23
+
24
+ class AIAnalyzer:
25
+ def __init__(self):
26
+ self.llm = ChatGoogleGenerativeAI(
27
+ model=config.CHAT_MODEL,
28
+ google_api_key=config.GOOGLE_API_KEY,
29
+ temperature=config.TEMPERATURE,
30
+ max_output_tokens=config.MAX_TOKENS,
31
+ )
32
+
33
+ # Initialize prompt templates
34
+ self._setup_prompts()
35
+
36
+ def _setup_prompts(self):
37
+ """Set up prompt templates for different analysis tasks."""
38
+
39
+ # Risk analysis prompt
40
+ self.risk_analysis_prompt = PromptTemplate(
41
+ input_variables=["text", "document_type"],
42
+ template="""
43
+ Analyze the following {document_type} document for potential risks and problematic clauses.
44
+
45
+ Document text:
46
+ {text}
47
+
48
+ Please identify:
49
+ 1. High-risk clauses that could be problematic for the signer
50
+ 2. Financial risks (hidden fees, penalties, high costs)
51
+ 3. Commitment risks (long-term obligations, difficult exit clauses)
52
+ 4. Rights risks (waived protections, limited recourse)
53
+
54
+ For each risk, provide:
55
+ - The exact clause text (keep it concise, max 100 words)
56
+ - Risk category (financial, commitment, rights, or standard)
57
+ - Severity level (low, medium, high, critical)
58
+ - Clear explanation of why it's risky
59
+ - Suggestion for improvement
60
+
61
+ IMPORTANT: Return ONLY valid JSON in the exact format below. Do not include any explanatory text before or after the JSON:
62
+
63
+ {{
64
+ "risk_factors": [
65
+ {{
66
+ "clause_text": "exact text from document",
67
+ "category": "financial",
68
+ "severity": "medium",
69
+ "explanation": "why this is risky",
70
+ "suggestion": "how to improve or what to watch for"
71
+ }}
72
+ ],
73
+ "overall_assessment": "brief summary of document risk level"
74
+ }}
75
+ """,
76
+ )
77
+
78
+ # Plain language translation prompt
79
+ self.simplification_prompt = PromptTemplate(
80
+ input_variables=["text", "document_type"],
81
+ template="""
82
+ Convert the following legal text into plain, simple English that anyone can understand.
83
+
84
+ Document type: {document_type}
85
+ Legal text: {text}
86
+
87
+ Rules for simplification:
88
+ 1. Use everyday language instead of legal jargon
89
+ 2. Break down complex sentences into shorter ones
90
+ 3. Explain what actions or obligations mean in practical terms
91
+ 4. Keep the essential meaning intact
92
+ 5. Use "you" to make it personal and clear
93
+ 6. Focus on the most important points
94
+
95
+ IMPORTANT: Return ONLY valid JSON in the exact format below. Do not include any explanatory text:
96
+
97
+ {{
98
+ "simplified_text": "the simplified version in plain English",
99
+ "key_points": ["main point 1", "main point 2", "main point 3"],
100
+ "jargon_definitions": {{"legal term": "simple definition"}}
101
+ }}
102
+ """,
103
+ )
104
+
105
+ # Document summary prompt
106
+ self.summary_prompt = PromptTemplate(
107
+ input_variables=["text", "document_type"],
108
+ template="""
109
+ Create a concise summary of this {document_type} document.
110
+
111
+ Document: {text}
112
+
113
+ Provide a summary that includes:
114
+ 1. What type of agreement this is
115
+ 2. Who are the main parties involved
116
+ 3. Key obligations for each party
117
+ 4. Important terms (dates, amounts, conditions)
118
+ 5. Major benefits and risks
119
+
120
+ Keep it under 200 words and focus on what matters most to the person signing.
121
+ """,
122
+ )
123
+
124
+ def analyze_document_risk(
125
+ self, text: str, document_type: DocumentType
126
+ ) -> Dict[str, Any]:
127
+ """Analyze document for risks and problematic clauses."""
128
+ try:
129
+ log_analysis_start("risk_analysis")
130
+ start_time = time.time()
131
+
132
+ # Create and run the risk analysis chain
133
+ risk_chain = LLMChain(llm=self.llm, prompt=self.risk_analysis_prompt)
134
+
135
+ result = risk_chain.run(
136
+ text=text[:4000], # Limit text size for API
137
+ document_type=document_type.value,
138
+ )
139
+
140
+ # Parse JSON response with better error handling
141
+ try:
142
+ # Try to extract JSON from the response if it's wrapped in markdown
143
+ if "```json" in result:
144
+ json_start = result.find("```json") + 7
145
+ json_end = result.find("```", json_start)
146
+ if json_end != -1:
147
+ result = result[json_start:json_end].strip()
148
+
149
+ # Clean up the result string
150
+ result = result.strip()
151
+ if result.startswith("```") and result.endswith("```"):
152
+ result = result[3:-3].strip()
153
+
154
+ risk_data = json.loads(result)
155
+
156
+ # Validate the structure
157
+ if not isinstance(risk_data, dict):
158
+ raise ValueError("Response is not a dictionary")
159
+
160
+ if "risk_factors" not in risk_data:
161
+ risk_data["risk_factors"] = []
162
+
163
+ if "overall_assessment" not in risk_data:
164
+ risk_data["overall_assessment"] = "Analysis completed"
165
+
166
+ except (json.JSONDecodeError, ValueError) as e:
167
+ log_error(f"JSON parsing error in risk analysis: {str(e)}")
168
+ log_error(f"Raw AI response: {result[:500]}...")
169
+
170
+ # Try to extract risk information manually if JSON fails
171
+ risk_data = self._extract_risk_fallback(result, text)
172
+
173
+ processing_time = time.time() - start_time
174
+ log_analysis_complete("risk_analysis", processing_time)
175
+
176
+ return risk_data
177
+
178
+ except Exception as e:
179
+ log_error(f"Error in risk analysis: {str(e)}")
180
+ return {"risk_factors": [], "overall_assessment": "Analysis failed"}
181
+
182
+ def _extract_risk_fallback(
183
+ self, response: str, original_text: str
184
+ ) -> Dict[str, Any]:
185
+ """Fallback method to extract risk information when JSON parsing fails."""
186
+ try:
187
+ risk_factors = []
188
+
189
+ # Look for common risk indicators in the response
190
+ risk_keywords = [
191
+ "risk",
192
+ "problematic",
193
+ "concern",
194
+ "warning",
195
+ "caution",
196
+ "penalty",
197
+ "fee",
198
+ ]
199
+ sentences = response.split(".")
200
+
201
+ for i, sentence in enumerate(sentences):
202
+ sentence = sentence.strip()
203
+ if (
204
+ any(
205
+ keyword.lower() in sentence.lower() for keyword in risk_keywords
206
+ )
207
+ and len(sentence) > 20
208
+ ):
209
+ risk_factors.append(
210
+ {
211
+ "clause_text": sentence[:200], # Limit length
212
+ "category": "standard",
213
+ "severity": "medium",
214
+ "explanation": "Potential risk identified by text analysis",
215
+ "suggestion": "Review this clause carefully with legal counsel",
216
+ }
217
+ )
218
+
219
+ if len(risk_factors) >= 5: # Limit to 5 fallback risks
220
+ break
221
+
222
+ return {
223
+ "risk_factors": risk_factors,
224
+ "overall_assessment": "Risk analysis completed with limited parsing. Please review manually.",
225
+ }
226
+
227
+ except Exception as e:
228
+ log_error(f"Error in fallback risk extraction: {str(e)}")
229
+ return {
230
+ "risk_factors": [],
231
+ "overall_assessment": "Unable to analyze risks - please try again",
232
+ }
233
+
234
+ def simplify_text(self, text: str, document_type: DocumentType) -> Dict[str, Any]:
235
+ """Convert legal text to plain language."""
236
+ try:
237
+ simplification_chain = LLMChain(
238
+ llm=self.llm, prompt=self.simplification_prompt
239
+ )
240
+
241
+ result = simplification_chain.run(
242
+ text=text[:3000], document_type=document_type.value # Limit text size
243
+ )
244
+
245
+ # Parse JSON response with better error handling
246
+ try:
247
+ # Try to extract JSON from the response if it's wrapped in markdown
248
+ if "```json" in result:
249
+ json_start = result.find("```json") + 7
250
+ json_end = result.find("```", json_start)
251
+ if json_end != -1:
252
+ result = result[json_start:json_end].strip()
253
+
254
+ # Clean up the result string
255
+ result = result.strip()
256
+ if result.startswith("```") and result.endswith("```"):
257
+ result = result[3:-3].strip()
258
+
259
+ simplified_data = json.loads(result)
260
+
261
+ # Validate the structure
262
+ if not isinstance(simplified_data, dict):
263
+ raise ValueError("Response is not a dictionary")
264
+
265
+ # Ensure required keys exist
266
+ if "simplified_text" not in simplified_data:
267
+ simplified_data["simplified_text"] = text[:500] + "..."
268
+ if "key_points" not in simplified_data:
269
+ simplified_data["key_points"] = ["Unable to extract key points"]
270
+ if "jargon_definitions" not in simplified_data:
271
+ simplified_data["jargon_definitions"] = {}
272
+
273
+ except (json.JSONDecodeError, ValueError) as e:
274
+ log_error(f"JSON parsing error in text simplification: {str(e)}")
275
+ simplified_data = {
276
+ "simplified_text": text[:500]
277
+ + "... (Full simplification unavailable)",
278
+ "key_points": ["Document content requires legal review"],
279
+ "jargon_definitions": {},
280
+ }
281
+
282
+ return simplified_data
283
+
284
+ except Exception as e:
285
+ log_error(f"Error in text simplification: {str(e)}")
286
+ return {
287
+ "simplified_text": text[:500]
288
+ + "...", # Return truncated original if simplification fails
289
+ "key_points": ["Simplification failed - showing original text"],
290
+ "jargon_definitions": {},
291
+ }
292
+
293
+ def generate_summary(self, text: str, document_type: DocumentType) -> str:
294
+ """Generate a concise document summary."""
295
+ try:
296
+ summary_chain = LLMChain(llm=self.llm, prompt=self.summary_prompt)
297
+
298
+ summary = summary_chain.run(
299
+ text=text[:3000], document_type=document_type.value # Limit text size
300
+ )
301
+
302
+ return summary.strip()
303
+
304
+ except Exception as e:
305
+ log_error(f"Error generating summary: {str(e)}")
306
+ return "Unable to generate summary"
307
+
308
+ def answer_question(
309
+ self, question: str, document_text: str, document_type: DocumentType
310
+ ) -> str:
311
+ """Answer a question about the document."""
312
+ try:
313
+ qa_prompt = PromptTemplate(
314
+ input_variables=["question", "document", "doc_type"],
315
+ template="""
316
+ Answer the following question about this {doc_type} document.
317
+ Be specific and cite relevant parts of the document.
318
+
319
+ Document: {document}
320
+
321
+ Question: {question}
322
+
323
+ Provide a clear, helpful answer based only on the document content.
324
+ If the answer isn't in the document, say so clearly.
325
+ """,
326
+ )
327
+
328
+ qa_chain = LLMChain(llm=self.llm, prompt=qa_prompt)
329
+
330
+ answer = qa_chain.run(
331
+ question=question,
332
+ document=document_text[:3000], # Limit context size
333
+ doc_type=document_type.value,
334
+ )
335
+
336
+ return answer.strip()
337
+
338
+ except Exception as e:
339
+ log_error(f"Error answering question: {str(e)}")
340
+ return "Sorry, I couldn't process your question. Please try again."
341
+
342
+ def create_risk_factors(
343
+ self, risk_data: Dict[str, Any], text: str
344
+ ) -> List[RiskFactor]:
345
+ """Convert AI analysis results to RiskFactor objects."""
346
+ risk_factors = []
347
+
348
+ for factor_data in risk_data.get("risk_factors", []):
349
+ try:
350
+ # Find clause position in text
351
+ clause_text = factor_data.get("clause_text", "")
352
+ position = None
353
+
354
+ if clause_text:
355
+ start_index = text.find(clause_text)
356
+ if start_index != -1:
357
+ position = ClausePosition(
358
+ start_index=start_index,
359
+ end_index=start_index + len(clause_text),
360
+ )
361
+
362
+ risk_factor = RiskFactor(
363
+ id=f"risk_{len(risk_factors) + 1}",
364
+ clause_text=clause_text,
365
+ category=RiskCategory(factor_data.get("category", "standard")),
366
+ severity=RiskLevel(factor_data.get("severity", "low")),
367
+ explanation=factor_data.get("explanation", ""),
368
+ suggestion=factor_data.get("suggestion"),
369
+ position=position,
370
+ )
371
+
372
+ risk_factors.append(risk_factor)
373
+
374
+ except Exception as e:
375
+ log_error(f"Error creating risk factor: {str(e)}")
376
+ continue
377
+
378
+ return risk_factors
src/services/document_processor.py ADDED
@@ -0,0 +1,202 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import BinaryIO, Optional
2
+ from langchain_community.document_loaders import PyPDFLoader, TextLoader
3
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
4
+ import tempfile
5
+ import os
6
+ from docx import Document
7
+
8
+ from src.utils.config import config
9
+ from src.utils.logger import log_error
10
+ from src.models.document import DocumentType
11
+
12
+
13
+ class DocumentProcessor:
14
+ def __init__(self):
15
+ self.text_splitter = RecursiveCharacterTextSplitter(
16
+ chunk_size=1000,
17
+ chunk_overlap=200,
18
+ length_function=len,
19
+ )
20
+
21
+ def extract_text_from_pdf(self, file_content: bytes, filename: str) -> str:
22
+ """Extract text from PDF using LangChain PyPDFLoader."""
23
+ try:
24
+ # Save uploaded file to temporary location
25
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as temp_file:
26
+ temp_file.write(file_content)
27
+ temp_file_path = temp_file.name
28
+
29
+ # Use LangChain PyPDFLoader
30
+ loader = PyPDFLoader(temp_file_path)
31
+ documents = loader.load()
32
+
33
+ # Combine all pages
34
+ text = "\n".join([doc.page_content for doc in documents])
35
+
36
+ # Clean up temporary file
37
+ os.unlink(temp_file_path)
38
+
39
+ return text
40
+
41
+ except Exception as e:
42
+ log_error(f"Error extracting text from PDF: {str(e)}")
43
+ return ""
44
+
45
+ def extract_text_from_txt(self, file_content: bytes, filename: str) -> str:
46
+ """Extract text from TXT file."""
47
+ try:
48
+ # Try different encodings
49
+ encodings = ["utf-8", "utf-16", "latin-1", "cp1252"]
50
+
51
+ for encoding in encodings:
52
+ try:
53
+ text = file_content.decode(encoding)
54
+ return text
55
+ except UnicodeDecodeError:
56
+ continue
57
+
58
+ # If all encodings fail, use utf-8 with error handling
59
+ return file_content.decode("utf-8", errors="ignore")
60
+
61
+ except Exception as e:
62
+ log_error(f"Error extracting text from TXT: {str(e)}")
63
+ return ""
64
+
65
+ def extract_text_from_docx(self, file_content: bytes, filename: str) -> str:
66
+ """Extract text from DOCX file."""
67
+ try:
68
+ # Save uploaded file to temporary location
69
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".docx") as temp_file:
70
+ temp_file.write(file_content)
71
+ temp_file_path = temp_file.name
72
+
73
+ # Use python-docx to extract text
74
+ from docx import Document as DocxDocument
75
+
76
+ doc = DocxDocument(temp_file_path)
77
+
78
+ # Extract text from all paragraphs
79
+ text_parts = []
80
+ for paragraph in doc.paragraphs:
81
+ if paragraph.text.strip():
82
+ text_parts.append(paragraph.text)
83
+
84
+ # Extract text from tables
85
+ for table in doc.tables:
86
+ for row in table.rows:
87
+ for cell in row.cells:
88
+ if cell.text.strip():
89
+ text_parts.append(cell.text)
90
+
91
+ # Clean up temporary file
92
+ os.unlink(temp_file_path)
93
+
94
+ # Join all text parts
95
+ full_text = "\n".join(text_parts)
96
+ return full_text
97
+
98
+ except Exception as e:
99
+ log_error(f"Error extracting text from DOCX: {str(e)}")
100
+ return ""
101
+
102
+ def extract_text(self, file_content: bytes, filename: str) -> str:
103
+ """Extract text based on file extension."""
104
+ file_ext = filename.lower().split(".")[-1]
105
+
106
+ if file_ext == "pdf":
107
+ return self.extract_text_from_pdf(file_content, filename)
108
+ elif file_ext == "txt":
109
+ return self.extract_text_from_txt(file_content, filename)
110
+ elif file_ext in ["docx", "doc"]:
111
+ return self.extract_text_from_docx(file_content, filename)
112
+ else:
113
+ log_error(f"Unsupported file type: {file_ext}")
114
+ return ""
115
+
116
+ def split_text_into_chunks(self, text: str) -> list:
117
+ """Split text into manageable chunks for processing."""
118
+ return self.text_splitter.split_text(text)
119
+
120
+ def detect_document_type(self, text: str) -> DocumentType:
121
+ """Detect document type based on content."""
122
+ text_lower = text.lower()
123
+
124
+ # Rental agreement keywords
125
+ rental_keywords = [
126
+ "lease",
127
+ "rent",
128
+ "tenant",
129
+ "landlord",
130
+ "property",
131
+ "premises",
132
+ "deposit",
133
+ ]
134
+
135
+ # Loan agreement keywords
136
+ loan_keywords = [
137
+ "loan",
138
+ "borrow",
139
+ "lender",
140
+ "principal",
141
+ "interest",
142
+ "repayment",
143
+ "credit",
144
+ ]
145
+
146
+ # Employment keywords
147
+ employment_keywords = [
148
+ "employment",
149
+ "employee",
150
+ "employer",
151
+ "salary",
152
+ "wages",
153
+ "position",
154
+ "job",
155
+ ]
156
+
157
+ # NDA keywords
158
+ nda_keywords = ["confidential", "non-disclosure", "proprietary", "trade secret"]
159
+
160
+ # Service agreement keywords
161
+ service_keywords = [
162
+ "service",
163
+ "provider",
164
+ "client",
165
+ "deliverables",
166
+ "scope of work",
167
+ ]
168
+
169
+ # Count keyword matches
170
+ scores = {
171
+ DocumentType.RENTAL: sum(
172
+ 1 for keyword in rental_keywords if keyword in text_lower
173
+ ),
174
+ DocumentType.LOAN: sum(
175
+ 1 for keyword in loan_keywords if keyword in text_lower
176
+ ),
177
+ DocumentType.EMPLOYMENT: sum(
178
+ 1 for keyword in employment_keywords if keyword in text_lower
179
+ ),
180
+ DocumentType.NDA: sum(
181
+ 1 for keyword in nda_keywords if keyword in text_lower
182
+ ),
183
+ DocumentType.SERVICE: sum(
184
+ 1 for keyword in service_keywords if keyword in text_lower
185
+ ),
186
+ }
187
+
188
+ # Return type with highest score, or OTHER if no clear match
189
+ if max(scores.values()) > 2:
190
+ return max(scores, key=scores.get)
191
+ else:
192
+ return DocumentType.OTHER
193
+
194
+ def extract_metadata(self, text: str) -> dict:
195
+ """Extract metadata from document text."""
196
+ metadata = {
197
+ "word_count": len(text.split()),
198
+ "character_count": len(text),
199
+ "estimated_reading_time": len(text.split()) // 200, # Assuming 200 WPM
200
+ }
201
+
202
+ return metadata
src/services/vector_store.py ADDED
@@ -0,0 +1,212 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import List, Dict, Any, Optional
2
+ from langchain_google_genai import GoogleGenerativeAIEmbeddings
3
+ from langchain_chroma import Chroma
4
+ from langchain.schema import Document
5
+ import os
6
+
7
+ from src.utils.config import config
8
+ from src.utils.logger import log_error
9
+ from src.models.document import Document as DocModel
10
+
11
+
12
+ class VectorStoreService:
13
+ def __init__(self):
14
+ # Initialize embeddings
15
+ self.embeddings = GoogleGenerativeAIEmbeddings(
16
+ model=config.EMBEDDING_MODEL, google_api_key=config.GOOGLE_API_KEY
17
+ )
18
+
19
+ # Ensure Chroma directory exists
20
+ os.makedirs(config.CHROMA_PERSIST_DIR, exist_ok=True)
21
+
22
+ # Initialize Chroma vector store
23
+ self.vector_store = Chroma(
24
+ persist_directory=config.CHROMA_PERSIST_DIR,
25
+ embedding_function=self.embeddings,
26
+ collection_name="lega_documents",
27
+ )
28
+
29
+ def add_document(
30
+ self, document_id: str, text: str, metadata: Dict[str, Any] = None
31
+ ) -> bool:
32
+ """Add a document to the vector store."""
33
+ try:
34
+ # Create document chunks for better retrieval
35
+ chunks = self._chunk_document(text)
36
+
37
+ documents = []
38
+ metadatas = []
39
+ ids = []
40
+
41
+ for i, chunk in enumerate(chunks):
42
+ chunk_metadata = {
43
+ "document_id": document_id,
44
+ "chunk_id": i,
45
+ "chunk_type": "text",
46
+ **(metadata or {}),
47
+ }
48
+
49
+ documents.append(chunk)
50
+ metadatas.append(chunk_metadata)
51
+ ids.append(f"{document_id}_chunk_{i}")
52
+
53
+ # Add to vector store
54
+ self.vector_store.add_texts(texts=documents, metadatas=metadatas, ids=ids)
55
+
56
+ return True
57
+
58
+ except Exception as e:
59
+ log_error(f"Error adding document to vector store: {str(e)}")
60
+ return False
61
+
62
+ def search_similar_documents(self, query: str, k: int = 5) -> List[Dict[str, Any]]:
63
+ """Search for similar documents based on query."""
64
+ try:
65
+ results = self.vector_store.similarity_search_with_score(query=query, k=k)
66
+
67
+ formatted_results = []
68
+ for doc, score in results:
69
+ formatted_results.append(
70
+ {
71
+ "content": doc.page_content,
72
+ "metadata": doc.metadata,
73
+ "similarity_score": score,
74
+ }
75
+ )
76
+
77
+ return formatted_results
78
+
79
+ except Exception as e:
80
+ log_error(f"Error searching vector store: {str(e)}")
81
+ return []
82
+
83
+ def search_document_clauses(
84
+ self, document_id: str, query: str, k: int = 3
85
+ ) -> List[Dict[str, Any]]:
86
+ """Search for specific clauses within a document."""
87
+ try:
88
+ # Filter by document_id
89
+ results = self.vector_store.similarity_search_with_score(
90
+ query=query, k=k, filter={"document_id": document_id}
91
+ )
92
+
93
+ formatted_results = []
94
+ for doc, score in results:
95
+ formatted_results.append(
96
+ {
97
+ "content": doc.page_content,
98
+ "metadata": doc.metadata,
99
+ "similarity_score": score,
100
+ }
101
+ )
102
+
103
+ return formatted_results
104
+
105
+ except Exception as e:
106
+ log_error(f"Error searching document clauses: {str(e)}")
107
+ return []
108
+
109
+ def get_document_context(
110
+ self, document_id: str, query: str, max_chunks: int = 5
111
+ ) -> str:
112
+ """Get relevant context from a document for Q&A."""
113
+ try:
114
+ results = self.search_document_clauses(document_id, query, k=max_chunks)
115
+
116
+ # Combine relevant chunks
117
+ context_parts = []
118
+ for result in results:
119
+ if result["similarity_score"] < 0.8: # Only use highly relevant chunks
120
+ context_parts.append(result["content"])
121
+
122
+ return "\n\n".join(context_parts)
123
+
124
+ except Exception as e:
125
+ log_error(f"Error getting document context: {str(e)}")
126
+ return ""
127
+
128
+ def remove_document(self, document_id: str) -> bool:
129
+ """Remove a document and all its chunks from the vector store."""
130
+ try:
131
+ # Get all chunks for this document
132
+ results = self.vector_store.get(where={"document_id": document_id})
133
+
134
+ if results and results.get("ids"):
135
+ # Delete all chunks
136
+ self.vector_store.delete(ids=results["ids"])
137
+
138
+ return True
139
+
140
+ except Exception as e:
141
+ log_error(f"Error removing document from vector store: {str(e)}")
142
+ return False
143
+
144
+ def get_document_stats(self) -> Dict[str, Any]:
145
+ """Get statistics about the vector store."""
146
+ try:
147
+ # Get collection info
148
+ collection = self.vector_store._collection
149
+ count = collection.count()
150
+
151
+ return {
152
+ "total_documents": count,
153
+ "collection_name": "lega_documents",
154
+ "persist_directory": config.CHROMA_PERSIST_DIR,
155
+ }
156
+
157
+ except Exception as e:
158
+ log_error(f"Error getting vector store stats: {str(e)}")
159
+ return {"total_documents": 0}
160
+
161
+ def _chunk_document(
162
+ self, text: str, chunk_size: int = 1000, overlap: int = 200
163
+ ) -> List[str]:
164
+ """Split document into chunks for embedding."""
165
+ chunks = []
166
+ start = 0
167
+
168
+ while start < len(text):
169
+ end = start + chunk_size
170
+ chunk = text[start:end]
171
+
172
+ # Try to break at sentence boundary
173
+ if end < len(text):
174
+ last_period = chunk.rfind(".")
175
+ if last_period > chunk_size // 2:
176
+ chunk = chunk[: last_period + 1]
177
+ end = start + last_period + 1
178
+
179
+ chunks.append(chunk.strip())
180
+ start = end - overlap
181
+
182
+ return [chunk for chunk in chunks if chunk.strip()]
183
+
184
+ def find_similar_clauses(
185
+ self, clause_text: str, exclude_document_id: str = None, k: int = 3
186
+ ) -> List[Dict[str, Any]]:
187
+ """Find similar clauses across all documents."""
188
+ try:
189
+ filter_dict = {}
190
+ if exclude_document_id:
191
+ # This is a simplified filter - Chroma might need different syntax
192
+ filter_dict = {"document_id": {"$ne": exclude_document_id}}
193
+
194
+ results = self.vector_store.similarity_search_with_score(
195
+ query=clause_text, k=k, filter=filter_dict if filter_dict else None
196
+ )
197
+
198
+ formatted_results = []
199
+ for doc, score in results:
200
+ formatted_results.append(
201
+ {
202
+ "content": doc.page_content,
203
+ "metadata": doc.metadata,
204
+ "similarity_score": score,
205
+ }
206
+ )
207
+
208
+ return formatted_results
209
+
210
+ except Exception as e:
211
+ log_error(f"Error finding similar clauses: {str(e)}")
212
+ return []
src/utils/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ # src/utils/__init__.py
src/utils/config.py ADDED
@@ -0,0 +1,175 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from typing import Optional
3
+ from dotenv import load_dotenv
4
+
5
+ # Load environment variables
6
+ load_dotenv()
7
+
8
+
9
+ class Config:
10
+ # =============================================================================
11
+ # GOOGLE AI API CONFIGURATION
12
+ # =============================================================================
13
+ GOOGLE_API_KEY: str = os.getenv("GOOGLE_API_KEY", "")
14
+
15
+ # =============================================================================
16
+ # APPLICATION SETTINGS
17
+ # =============================================================================
18
+ DEBUG: bool = os.getenv("DEBUG", "True").lower() == "true"
19
+ STREAMLIT_SERVER_PORT: int = int(os.getenv("STREAMLIT_SERVER_PORT", "8501"))
20
+ STREAMLIT_SERVER_ADDRESS: str = os.getenv("STREAMLIT_SERVER_ADDRESS", "localhost")
21
+ MAX_FILE_SIZE_MB: int = int(os.getenv("MAX_FILE_SIZE_MB", "10"))
22
+ SUPPORTED_FILE_TYPES: list = os.getenv(
23
+ "SUPPORTED_FILE_TYPES", "pdf,docx,txt"
24
+ ).split(",")
25
+
26
+ # =============================================================================
27
+ # LOGGING
28
+ # =============================================================================
29
+ LOG_LEVEL: str = os.getenv("LOG_LEVEL", "INFO")
30
+ LOG_FILE: str = os.getenv("LOG_FILE", "./data/app.log")
31
+
32
+ # =============================================================================
33
+ # SECURITY
34
+ # =============================================================================
35
+ SECRET_KEY: str = os.getenv("SECRET_KEY", "development-key-change-in-production")
36
+ SESSION_TIMEOUT_MINUTES: int = int(os.getenv("SESSION_TIMEOUT_MINUTES", "60"))
37
+
38
+ # =============================================================================
39
+ # AI MODEL SETTINGS
40
+ # =============================================================================
41
+ CHAT_MODEL: str = os.getenv("CHAT_MODEL", "gemini-1.5-flash")
42
+ TEMPERATURE: float = float(os.getenv("TEMPERATURE", "0.2"))
43
+ MAX_TOKENS: int = int(os.getenv("MAX_TOKENS", "2048"))
44
+ EMBEDDING_MODEL: str = os.getenv("EMBEDDING_MODEL", "models/embedding-001")
45
+
46
+ # =============================================================================
47
+ # VECTOR STORE CONFIGURATION
48
+ # =============================================================================
49
+ CHROMA_PERSIST_DIR: str = os.getenv("CHROMA_PERSIST_DIRECTORY", "./data/chroma_db")
50
+
51
+ # =============================================================================
52
+ # STORAGE CONFIGURATION
53
+ # =============================================================================
54
+ UPLOAD_DIR: str = os.getenv("UPLOAD_DIR", "./uploads")
55
+ DATA_DIR: str = os.getenv("DATA_DIR", "./data")
56
+ DATABASE_URL: str = os.getenv("DATABASE_URL", "sqlite:///./data/lega.db")
57
+
58
+ # =============================================================================
59
+ # PERFORMANCE SETTINGS
60
+ # =============================================================================
61
+ MAX_CONCURRENT_UPLOADS: int = int(os.getenv("MAX_CONCURRENT_UPLOADS", "5"))
62
+ DOCUMENT_PROCESSING_TIMEOUT: int = int(
63
+ os.getenv("DOCUMENT_PROCESSING_TIMEOUT", "300")
64
+ )
65
+ ENABLE_CACHE: bool = os.getenv("ENABLE_CACHE", "True").lower() == "true"
66
+ CACHE_TTL_SECONDS: int = int(os.getenv("CACHE_TTL_SECONDS", "3600"))
67
+
68
+ # =============================================================================
69
+ # FEATURE FLAGS
70
+ # =============================================================================
71
+ ENABLE_DOCUMENT_LIBRARY: bool = (
72
+ os.getenv("ENABLE_DOCUMENT_LIBRARY", "True").lower() == "true"
73
+ )
74
+ ENABLE_QA_ASSISTANT: bool = (
75
+ os.getenv("ENABLE_QA_ASSISTANT", "True").lower() == "true"
76
+ )
77
+ ENABLE_MARKET_COMPARISON: bool = (
78
+ os.getenv("ENABLE_MARKET_COMPARISON", "True").lower() == "true"
79
+ )
80
+ ENABLE_TIMELINE_TRACKER: bool = (
81
+ os.getenv("ENABLE_TIMELINE_TRACKER", "True").lower() == "true"
82
+ )
83
+ ENABLE_EXPORT_FEATURES: bool = (
84
+ os.getenv("ENABLE_EXPORT_FEATURES", "True").lower() == "true"
85
+ )
86
+
87
+ # =============================================================================
88
+ # ANALYTICS & MONITORING
89
+ # =============================================================================
90
+ ENABLE_ANALYTICS: bool = os.getenv("ENABLE_ANALYTICS", "False").lower() == "true"
91
+ ANALYTICS_API_KEY: str = os.getenv("ANALYTICS_API_KEY", "")
92
+ ENABLE_ERROR_TRACKING: bool = (
93
+ os.getenv("ENABLE_ERROR_TRACKING", "False").lower() == "true"
94
+ )
95
+ SENTRY_DSN: str = os.getenv("SENTRY_DSN", "")
96
+
97
+ # =============================================================================
98
+ # REGIONAL SETTINGS
99
+ # =============================================================================
100
+ DEFAULT_REGION: str = os.getenv("DEFAULT_REGION", "India")
101
+ DEFAULT_CURRENCY: str = os.getenv("DEFAULT_CURRENCY", "INR")
102
+ TIMEZONE: str = os.getenv("TIMEZONE", "Asia/Kolkata")
103
+
104
+ # =============================================================================
105
+ # ADVANCED AI SETTINGS
106
+ # =============================================================================
107
+ RISK_SENSITIVITY: int = int(os.getenv("RISK_SENSITIVITY", "3"))
108
+ SIMPLIFICATION_LEVEL: str = os.getenv("SIMPLIFICATION_LEVEL", "intermediate")
109
+ MAX_RISK_FACTORS: int = int(os.getenv("MAX_RISK_FACTORS", "10"))
110
+
111
+ # =============================================================================
112
+ # API RATE LIMITING
113
+ # =============================================================================
114
+ API_REQUESTS_PER_MINUTE: int = int(os.getenv("API_REQUESTS_PER_MINUTE", "60"))
115
+ API_REQUESTS_PER_DAY: int = int(os.getenv("API_REQUESTS_PER_DAY", "1000"))
116
+
117
+ # =============================================================================
118
+ # BACKUP & MAINTENANCE
119
+ # =============================================================================
120
+ ENABLE_AUTO_BACKUP: bool = (
121
+ os.getenv("ENABLE_AUTO_BACKUP", "False").lower() == "true"
122
+ )
123
+ BACKUP_INTERVAL_HOURS: int = int(os.getenv("BACKUP_INTERVAL_HOURS", "24"))
124
+ BACKUP_RETENTION_DAYS: int = int(os.getenv("BACKUP_RETENTION_DAYS", "30"))
125
+ AUTO_CLEANUP_TEMP_FILES: bool = (
126
+ os.getenv("AUTO_CLEANUP_TEMP_FILES", "True").lower() == "true"
127
+ )
128
+ CLEANUP_INTERVAL_HOURS: int = int(os.getenv("CLEANUP_INTERVAL_HOURS", "6"))
129
+
130
+ @classmethod
131
+ def validate_config(cls) -> bool:
132
+ """Validate that required configuration is present."""
133
+ required_vars = ["GOOGLE_API_KEY"]
134
+
135
+ missing_vars = []
136
+ for var in required_vars:
137
+ value = getattr(cls, var, "")
138
+ if not value or value == "your_google_ai_api_key_here":
139
+ missing_vars.append(var)
140
+
141
+ if missing_vars:
142
+ print(
143
+ f"⚠️ Missing required environment variables: {', '.join(missing_vars)}"
144
+ )
145
+ print("📝 Please update your .env file with valid values")
146
+ return False
147
+
148
+ return True
149
+
150
+ @classmethod
151
+ def get_config_summary(cls) -> dict:
152
+ """Get a summary of current configuration for debugging."""
153
+ return {
154
+ "api_configured": bool(
155
+ cls.GOOGLE_API_KEY
156
+ and cls.GOOGLE_API_KEY != "your_google_ai_api_key_here"
157
+ ),
158
+ "debug_mode": cls.DEBUG,
159
+ "features_enabled": {
160
+ "document_library": cls.ENABLE_DOCUMENT_LIBRARY,
161
+ "qa_assistant": cls.ENABLE_QA_ASSISTANT,
162
+ "market_comparison": cls.ENABLE_MARKET_COMPARISON,
163
+ "timeline_tracker": cls.ENABLE_TIMELINE_TRACKER,
164
+ "export_features": cls.ENABLE_EXPORT_FEATURES,
165
+ },
166
+ "supported_file_types": cls.SUPPORTED_FILE_TYPES,
167
+ "max_file_size_mb": cls.MAX_FILE_SIZE_MB,
168
+ "risk_sensitivity": cls.RISK_SENSITIVITY,
169
+ "region": cls.DEFAULT_REGION,
170
+ "currency": cls.DEFAULT_CURRENCY,
171
+ }
172
+
173
+
174
+ # Create singleton instance
175
+ config = Config()
src/utils/helpers.py ADDED
@@ -0,0 +1,172 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import hashlib
2
+ import os
3
+ import uuid
4
+ from datetime import datetime, timedelta
5
+ from typing import List, Dict, Any, Optional
6
+ import re
7
+
8
+
9
+ def generate_document_id() -> str:
10
+ """Generate a unique document ID."""
11
+ return str(uuid.uuid4())
12
+
13
+
14
+ def generate_session_id() -> str:
15
+ """Generate a unique session ID."""
16
+ return str(uuid.uuid4())
17
+
18
+
19
+ def calculate_file_hash(file_content: bytes) -> str:
20
+ """Calculate SHA-256 hash of file content."""
21
+ return hashlib.sha256(file_content).hexdigest()
22
+
23
+
24
+ def sanitize_filename(filename: str) -> str:
25
+ """Sanitize filename for safe storage."""
26
+ # Remove or replace dangerous characters
27
+ sanitized = re.sub(r"[^\w\-_\.]", "_", filename)
28
+ # Ensure it's not too long
29
+ if len(sanitized) > 255:
30
+ name, ext = os.path.splitext(sanitized)
31
+ sanitized = name[: 255 - len(ext)] + ext
32
+ return sanitized
33
+
34
+
35
+ def format_file_size(size_bytes: int) -> str:
36
+ """Format file size in human readable format."""
37
+ if size_bytes == 0:
38
+ return "0 B"
39
+
40
+ size_names = ["B", "KB", "MB", "GB"]
41
+ i = 0
42
+ while size_bytes >= 1024 and i < len(size_names) - 1:
43
+ size_bytes /= 1024.0
44
+ i += 1
45
+
46
+ return f"{size_bytes:.1f} {size_names[i]}"
47
+
48
+
49
+ def extract_key_dates(text: str) -> List[Dict[str, Any]]:
50
+ """Extract dates and deadlines from text."""
51
+ date_patterns = [
52
+ r"\b\d{1,2}/\d{1,2}/\d{4}\b", # MM/DD/YYYY
53
+ r"\b\d{1,2}-\d{1,2}-\d{4}\b", # MM-DD-YYYY
54
+ r"\b\d{4}-\d{1,2}-\d{1,2}\b", # YYYY-MM-DD
55
+ r"\b(?:January|February|March|April|May|June|July|August|September|October|November|December)\s+\d{1,2},?\s+\d{4}\b",
56
+ ]
57
+
58
+ dates = []
59
+ for pattern in date_patterns:
60
+ matches = re.finditer(pattern, text, re.IGNORECASE)
61
+ for match in matches:
62
+ dates.append(
63
+ {
64
+ "date": match.group(),
65
+ "position": match.start(),
66
+ "context": text[max(0, match.start() - 50) : match.end() + 50],
67
+ }
68
+ )
69
+
70
+ return dates
71
+
72
+
73
+ def extract_financial_terms(text: str) -> Dict[str, Any]:
74
+ """Extract financial information from text."""
75
+ financial_info = {}
76
+
77
+ # Extract monetary amounts (Indian Rupees and other currencies)
78
+ money_patterns = [
79
+ r"₹[\d,]+(?:\.\d{2})?", # Indian Rupees
80
+ r"Rs\.?\s*[\d,]+(?:\.\d{2})?", # Rs. format
81
+ r"\$[\d,]+(?:\.\d{2})?", # USD
82
+ ]
83
+
84
+ amounts = []
85
+ for pattern in money_patterns:
86
+ amounts.extend(re.findall(pattern, text))
87
+
88
+ if amounts:
89
+ financial_info["amounts"] = amounts
90
+
91
+ # Extract percentages
92
+ percentage_pattern = r"\d+(?:\.\d+)?%"
93
+ percentages = re.findall(percentage_pattern, text)
94
+ if percentages:
95
+ financial_info["percentages"] = percentages
96
+
97
+ # Extract interest rates
98
+ interest_pattern = (
99
+ r"(?:interest rate|APR|annual percentage rate).*?(\d+(?:\.\d+)?%)"
100
+ )
101
+ interest_matches = re.findall(interest_pattern, text, re.IGNORECASE)
102
+ if interest_matches:
103
+ financial_info["interest_rates"] = interest_matches
104
+
105
+ return financial_info
106
+
107
+
108
+ def calculate_risk_score(risk_factors: List[Dict[str, Any]]) -> int:
109
+ """Calculate overall risk score from individual risk factors."""
110
+ if not risk_factors:
111
+ return 0
112
+
113
+ risk_weights = {"critical": 25, "high": 15, "medium": 8, "low": 3}
114
+
115
+ total_score = 0
116
+ for factor in risk_factors:
117
+ severity = factor.get("severity", "low").lower()
118
+ total_score += risk_weights.get(severity, 0)
119
+
120
+ # Cap at 100
121
+ return min(total_score, 100)
122
+
123
+
124
+ def get_risk_color(risk_score: int) -> str:
125
+ """Get color code based on risk score."""
126
+ if risk_score >= 75:
127
+ return "#FF4444" # Red
128
+ elif risk_score >= 50:
129
+ return "#FF8800" # Orange
130
+ elif risk_score >= 25:
131
+ return "#FFCC00" # Yellow
132
+ else:
133
+ return "#44AA44" # Green
134
+
135
+
136
+ def chunk_text(text: str, chunk_size: int = 1000, overlap: int = 100) -> List[str]:
137
+ """Split text into overlapping chunks for processing."""
138
+ chunks = []
139
+ start = 0
140
+
141
+ while start < len(text):
142
+ end = start + chunk_size
143
+ chunk = text[start:end]
144
+
145
+ # Try to break at sentence boundary
146
+ if end < len(text):
147
+ last_period = chunk.rfind(".")
148
+ if last_period > chunk_size // 2:
149
+ chunk = chunk[: last_period + 1]
150
+ end = start + last_period + 1
151
+
152
+ chunks.append(chunk)
153
+ start = end - overlap
154
+
155
+ return chunks
156
+
157
+
158
+ def format_timestamp(timestamp: datetime) -> str:
159
+ """Format timestamp for display."""
160
+ now = datetime.now()
161
+ diff = now - timestamp
162
+
163
+ if diff.days > 0:
164
+ return f"{diff.days} days ago"
165
+ elif diff.seconds > 3600:
166
+ hours = diff.seconds // 3600
167
+ return f"{hours} hours ago"
168
+ elif diff.seconds > 60:
169
+ minutes = diff.seconds // 60
170
+ return f"{minutes} minutes ago"
171
+ else:
172
+ return "Just now"
src/utils/logger.py ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import logging
2
+ import os
3
+ from datetime import datetime
4
+ from src.utils.config import config
5
+
6
+
7
+ def setup_logging():
8
+ """Set up logging configuration."""
9
+ # Create data directory if it doesn't exist
10
+ os.makedirs(os.path.dirname(config.LOG_FILE), exist_ok=True)
11
+
12
+ # Configure logging
13
+ logging.basicConfig(
14
+ level=getattr(logging, config.LOG_LEVEL),
15
+ format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
16
+ handlers=[logging.FileHandler(config.LOG_FILE), logging.StreamHandler()],
17
+ )
18
+
19
+ return logging.getLogger(__name__)
20
+
21
+
22
+ def log_document_upload(filename: str, file_size: int) -> None:
23
+ """Log document upload event."""
24
+ logger = logging.getLogger(__name__)
25
+ logger.info(f"Document uploaded: {filename} ({file_size} bytes)")
26
+
27
+
28
+ def log_analysis_start(document_id: str) -> None:
29
+ """Log analysis start event."""
30
+ logger = logging.getLogger(__name__)
31
+ logger.info(f"Starting analysis for document: {document_id}")
32
+
33
+
34
+ def log_analysis_complete(document_id: str, processing_time: float) -> None:
35
+ """Log analysis completion event."""
36
+ logger = logging.getLogger(__name__)
37
+ logger.info(
38
+ f"Analysis completed for document: {document_id} in {processing_time:.2f}s"
39
+ )
40
+
41
+
42
+ def log_error(error_message: str, document_id: str = None) -> None:
43
+ """Log error event."""
44
+ logger = logging.getLogger(__name__)
45
+ if document_id:
46
+ logger.error(f"Error processing document {document_id}: {error_message}")
47
+ else:
48
+ logger.error(f"Application error: {error_message}")
49
+
50
+
51
+ def log_qa_interaction(document_id: str, question: str) -> None:
52
+ """Log Q&A interaction."""
53
+ logger = logging.getLogger(__name__)
54
+ logger.info(f"Q&A interaction for document {document_id}: {question[:100]}...")
55
+
56
+
57
+ # Initialize logging when module is imported
58
+ setup_logging()
start.sh ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/bash
2
+
3
+ # Hugging Face Spaces startup script for Lega.AI
4
+
5
+ echo "🚀 Starting Lega.AI on Hugging Face Spaces..."
6
+
7
+ # Create necessary directories if they don't exist
8
+ mkdir -p data/chroma_db
9
+ mkdir -p uploads
10
+ mkdir -p .streamlit
11
+
12
+ # Set default environment variables for Hugging Face deployment
13
+ export STREAMLIT_SERVER_PORT=${PORT:-7860}
14
+ export STREAMLIT_SERVER_ADDRESS="0.0.0.0"
15
+ export DEBUG=False
16
+ export LOG_LEVEL=INFO
17
+ export STREAMLIT_CONFIG_DIR=/app/.streamlit
18
+ export XDG_CONFIG_HOME=/app
19
+
20
+ # Check if GOOGLE_API_KEY is set
21
+ if [ -z "$GOOGLE_API_KEY" ]; then
22
+ echo "⚠️ WARNING: GOOGLE_API_KEY environment variable is not set!"
23
+ echo "Please set it in your Hugging Face Space settings for the app to work properly."
24
+ fi
25
+
26
+ # Start the Streamlit application
27
+ echo "🌐 Starting Streamlit on port $STREAMLIT_SERVER_PORT..."
28
+ exec streamlit run main.py \
29
+ --server.port=$STREAMLIT_SERVER_PORT \
30
+ --server.address=$STREAMLIT_SERVER_ADDRESS \
31
+ --server.headless=true \
32
+ --server.fileWatcherType=none \
33
+ --server.enableCORS=false \
34
+ --server.enableXsrfProtection=false
uv.lock ADDED
The diff for this file is too large to render. See raw diff