CoderNoah commited on
Commit ·
8b7e8f0
0
Parent(s):
Initial commit
Browse files- .dockerignore +75 -0
- .env.example +92 -0
- .env.hf +51 -0
- .gitignore +32 -0
- .python-version +1 -0
- .streamlit/config.toml +17 -0
- Dockerfile +47 -0
- HUGGINGFACE_DEPLOYMENT.md +83 -0
- README.md +484 -0
- main.py +789 -0
- pyproject.toml +20 -0
- requirements.txt +17 -0
- sample/Employment_Offer_Letter.pdf +74 -0
- sample/Master_Services_Agreement.pdf +74 -0
- sample/Mutual_NDA.pdf +74 -0
- sample/Residential_Lease_Agreement.pdf +74 -0
- setup.py +102 -0
- src/__init__.py +3 -0
- src/models/__init__.py +1 -0
- src/models/document.py +88 -0
- src/pages/__init__.py +1 -0
- src/pages/analysis.py +978 -0
- src/pages/library.py +340 -0
- src/pages/qa_assistant.py +239 -0
- src/pages/settings.py +304 -0
- src/pages/upload.py +342 -0
- src/services/__init__.py +1 -0
- src/services/ai_analyzer.py +378 -0
- src/services/document_processor.py +202 -0
- src/services/vector_store.py +212 -0
- src/utils/__init__.py +1 -0
- src/utils/config.py +175 -0
- src/utils/helpers.py +172 -0
- src/utils/logger.py +58 -0
- start.sh +34 -0
- uv.lock +0 -0
.dockerignore
ADDED
|
@@ -0,0 +1,75 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Python cache files
|
| 2 |
+
__pycache__/
|
| 3 |
+
*.py[cod]
|
| 4 |
+
*$py.class
|
| 5 |
+
*.so
|
| 6 |
+
.Python
|
| 7 |
+
env/
|
| 8 |
+
build/
|
| 9 |
+
develop-eggs/
|
| 10 |
+
dist/
|
| 11 |
+
downloads/
|
| 12 |
+
eggs/
|
| 13 |
+
.eggs/
|
| 14 |
+
lib/
|
| 15 |
+
lib64/
|
| 16 |
+
parts/
|
| 17 |
+
sdist/
|
| 18 |
+
var/
|
| 19 |
+
wheels/
|
| 20 |
+
*.egg-info/
|
| 21 |
+
.installed.cfg
|
| 22 |
+
*.egg
|
| 23 |
+
|
| 24 |
+
# Virtual environments
|
| 25 |
+
.venv/
|
| 26 |
+
venv/
|
| 27 |
+
ENV/
|
| 28 |
+
env/
|
| 29 |
+
|
| 30 |
+
# IDE files
|
| 31 |
+
.vscode/
|
| 32 |
+
.idea/
|
| 33 |
+
*.swp
|
| 34 |
+
*.swo
|
| 35 |
+
*~
|
| 36 |
+
|
| 37 |
+
# OS files
|
| 38 |
+
.DS_Store
|
| 39 |
+
Thumbs.db
|
| 40 |
+
|
| 41 |
+
# Git files
|
| 42 |
+
.git/
|
| 43 |
+
.gitignore
|
| 44 |
+
|
| 45 |
+
# Project specific
|
| 46 |
+
data/chroma_db/
|
| 47 |
+
data/*.log
|
| 48 |
+
uploads/*
|
| 49 |
+
!uploads/.gitkeep
|
| 50 |
+
|
| 51 |
+
# Environment files
|
| 52 |
+
.env
|
| 53 |
+
|
| 54 |
+
# Documentation
|
| 55 |
+
*.md
|
| 56 |
+
!README.md
|
| 57 |
+
|
| 58 |
+
# Lock files
|
| 59 |
+
uv.lock
|
| 60 |
+
poetry.lock
|
| 61 |
+
Pipfile.lock
|
| 62 |
+
|
| 63 |
+
# Testing
|
| 64 |
+
.pytest_cache/
|
| 65 |
+
.coverage
|
| 66 |
+
htmlcov/
|
| 67 |
+
|
| 68 |
+
# Jupyter notebooks
|
| 69 |
+
*.ipynb
|
| 70 |
+
.ipynb_checkpoints/
|
| 71 |
+
|
| 72 |
+
# Local development
|
| 73 |
+
local/
|
| 74 |
+
tmp/
|
| 75 |
+
temp/
|
.env.example
ADDED
|
@@ -0,0 +1,92 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# =============================================================================
|
| 2 |
+
# LEGA.AI CONFIGURATION TEMPLATE
|
| 3 |
+
# =============================================================================
|
| 4 |
+
# Copy this file to .env and update the values below
|
| 5 |
+
# This is a template file - safe to commit to version control
|
| 6 |
+
|
| 7 |
+
# =============================================================================
|
| 8 |
+
# GOOGLE AI API CONFIGURATION (REQUIRED)
|
| 9 |
+
# =============================================================================
|
| 10 |
+
# Get your API key from: https://aistudio.google.com/
|
| 11 |
+
GOOGLE_API_KEY=your-google-api-key-here
|
| 12 |
+
|
| 13 |
+
# =============================================================================
|
| 14 |
+
# APPLICATION SETTINGS
|
| 15 |
+
# =============================================================================
|
| 16 |
+
DEBUG=True
|
| 17 |
+
LOG_LEVEL=INFO
|
| 18 |
+
|
| 19 |
+
# Streamlit server configuration
|
| 20 |
+
STREAMLIT_SERVER_PORT=8501
|
| 21 |
+
STREAMLIT_SERVER_ADDRESS=localhost
|
| 22 |
+
|
| 23 |
+
# File upload settings
|
| 24 |
+
MAX_FILE_SIZE_MB=10
|
| 25 |
+
SUPPORTED_FILE_TYPES=pdf,docx,txt
|
| 26 |
+
|
| 27 |
+
# =============================================================================
|
| 28 |
+
# AI MODEL SETTINGS
|
| 29 |
+
# =============================================================================
|
| 30 |
+
TEMPERATURE=0.2
|
| 31 |
+
MAX_TOKENS=2048
|
| 32 |
+
EMBEDDING_MODEL=models/text-embedding-004
|
| 33 |
+
|
| 34 |
+
# =============================================================================
|
| 35 |
+
# VECTOR STORE CONFIGURATION
|
| 36 |
+
# =============================================================================
|
| 37 |
+
CHROMA_PERSIST_DIRECTORY=./data/chroma_db
|
| 38 |
+
|
| 39 |
+
# =============================================================================
|
| 40 |
+
# STORAGE CONFIGURATION
|
| 41 |
+
# =============================================================================
|
| 42 |
+
UPLOAD_DIR=./uploads
|
| 43 |
+
DATA_DIR=./data
|
| 44 |
+
LOG_FILE=./data/app.log
|
| 45 |
+
|
| 46 |
+
# =============================================================================
|
| 47 |
+
# SECURITY SETTINGS
|
| 48 |
+
# =============================================================================
|
| 49 |
+
SECRET_KEY=your-secret-key-here
|
| 50 |
+
SESSION_TIMEOUT_MINUTES=60
|
| 51 |
+
|
| 52 |
+
# =============================================================================
|
| 53 |
+
# DATABASE CONFIGURATION
|
| 54 |
+
# =============================================================================
|
| 55 |
+
DATABASE_URL=sqlite:///./data/lega.db
|
| 56 |
+
|
| 57 |
+
# =============================================================================
|
| 58 |
+
# PERFORMANCE SETTINGS
|
| 59 |
+
# =============================================================================
|
| 60 |
+
MAX_CONCURRENT_UPLOADS=5
|
| 61 |
+
DOCUMENT_PROCESSING_TIMEOUT=300
|
| 62 |
+
ENABLE_CACHE=True
|
| 63 |
+
CACHE_TTL_SECONDS=3600
|
| 64 |
+
|
| 65 |
+
# =============================================================================
|
| 66 |
+
# FEATURE FLAGS
|
| 67 |
+
# =============================================================================
|
| 68 |
+
ENABLE_DOCUMENT_LIBRARY=True
|
| 69 |
+
ENABLE_QA_ASSISTANT=True
|
| 70 |
+
ENABLE_MARKET_COMPARISON=True
|
| 71 |
+
ENABLE_TIMELINE_TRACKER=True
|
| 72 |
+
ENABLE_EXPORT_FEATURES=True
|
| 73 |
+
|
| 74 |
+
# =============================================================================
|
| 75 |
+
# REGIONAL SETTINGS
|
| 76 |
+
# =============================================================================
|
| 77 |
+
DEFAULT_REGION=India
|
| 78 |
+
DEFAULT_CURRENCY=INR
|
| 79 |
+
TIMEZONE=Asia/Kolkata
|
| 80 |
+
|
| 81 |
+
# =============================================================================
|
| 82 |
+
# ADVANCED AI SETTINGS
|
| 83 |
+
# =============================================================================
|
| 84 |
+
RISK_SENSITIVITY=3
|
| 85 |
+
SIMPLIFICATION_LEVEL=intermediate
|
| 86 |
+
MAX_RISK_FACTORS=10
|
| 87 |
+
|
| 88 |
+
# =============================================================================
|
| 89 |
+
# API RATE LIMITING
|
| 90 |
+
# =============================================================================
|
| 91 |
+
API_REQUESTS_PER_MINUTE=60
|
| 92 |
+
API_REQUESTS_PER_DAY=1000
|
.env.hf
ADDED
|
@@ -0,0 +1,51 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# =============================================================================
|
| 2 |
+
# HUGGING FACE SPACES CONFIGURATION
|
| 3 |
+
# =============================================================================
|
| 4 |
+
# This file contains default configuration for Hugging Face Spaces deployment
|
| 5 |
+
# Set these environment variables in your Hugging Face Space settings
|
| 6 |
+
|
| 7 |
+
# =============================================================================
|
| 8 |
+
# REQUIRED: GOOGLE AI API CONFIGURATION
|
| 9 |
+
# =============================================================================
|
| 10 |
+
# Get your API key from: https://aistudio.google.com/
|
| 11 |
+
# Set this in Hugging Face Spaces under Settings -> Variables
|
| 12 |
+
GOOGLE_API_KEY=
|
| 13 |
+
|
| 14 |
+
# =============================================================================
|
| 15 |
+
# HUGGING FACE SPACES SETTINGS (Automatically configured)
|
| 16 |
+
# =============================================================================
|
| 17 |
+
# These are set automatically by the startup script
|
| 18 |
+
PORT=7860
|
| 19 |
+
STREAMLIT_SERVER_PORT=7860
|
| 20 |
+
STREAMLIT_SERVER_ADDRESS=0.0.0.0
|
| 21 |
+
|
| 22 |
+
# =============================================================================
|
| 23 |
+
# APPLICATION SETTINGS FOR PRODUCTION
|
| 24 |
+
# =============================================================================
|
| 25 |
+
DEBUG=False
|
| 26 |
+
LOG_LEVEL=INFO
|
| 27 |
+
|
| 28 |
+
# File upload settings (conservative for cloud deployment)
|
| 29 |
+
MAX_FILE_SIZE_MB=5
|
| 30 |
+
SUPPORTED_FILE_TYPES=pdf,docx,txt
|
| 31 |
+
|
| 32 |
+
# =============================================================================
|
| 33 |
+
# AI MODEL SETTINGS (Optimized for Hugging Face)
|
| 34 |
+
# =============================================================================
|
| 35 |
+
TEMPERATURE=0.2
|
| 36 |
+
MAX_TOKENS=2048
|
| 37 |
+
EMBEDDING_MODEL=models/text-embedding-004
|
| 38 |
+
|
| 39 |
+
# =============================================================================
|
| 40 |
+
# STORAGE CONFIGURATION (Cloud optimized)
|
| 41 |
+
# =============================================================================
|
| 42 |
+
CHROMA_PERSIST_DIRECTORY=./data/chroma_db
|
| 43 |
+
UPLOAD_DIR=./uploads
|
| 44 |
+
DATA_DIR=./data
|
| 45 |
+
LOG_FILE=./data/app.log
|
| 46 |
+
|
| 47 |
+
# =============================================================================
|
| 48 |
+
# SECURITY SETTINGS FOR CLOUD DEPLOYMENT
|
| 49 |
+
# =============================================================================
|
| 50 |
+
SECRET_KEY=huggingface-lega-ai-deployment
|
| 51 |
+
SESSION_TIMEOUT_MINUTES=30
|
.gitignore
ADDED
|
@@ -0,0 +1,32 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Python-generated files
|
| 2 |
+
__pycache__/
|
| 3 |
+
*.py[oc]
|
| 4 |
+
build/
|
| 5 |
+
dist/
|
| 6 |
+
wheels/
|
| 7 |
+
*.egg-info
|
| 8 |
+
|
| 9 |
+
# Virtual environments
|
| 10 |
+
.venv
|
| 11 |
+
|
| 12 |
+
# Environment variables (contains secrets)
|
| 13 |
+
.env
|
| 14 |
+
|
| 15 |
+
# Local data and uploads
|
| 16 |
+
data/
|
| 17 |
+
uploads/
|
| 18 |
+
*.log
|
| 19 |
+
|
| 20 |
+
# Temporary files
|
| 21 |
+
temp/
|
| 22 |
+
tmp/
|
| 23 |
+
.tmp
|
| 24 |
+
|
| 25 |
+
# IDE files
|
| 26 |
+
.vscode/
|
| 27 |
+
.idea/
|
| 28 |
+
*.swp
|
| 29 |
+
*.swo
|
| 30 |
+
|
| 31 |
+
# OS files
|
| 32 |
+
Thumbs.db
|
.python-version
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
3.13
|
.streamlit/config.toml
ADDED
|
@@ -0,0 +1,17 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[server]
|
| 2 |
+
port = 7860
|
| 3 |
+
address = "0.0.0.0"
|
| 4 |
+
headless = true
|
| 5 |
+
fileWatcherType = "none"
|
| 6 |
+
enableCORS = false
|
| 7 |
+
enableXsrfProtection = false
|
| 8 |
+
|
| 9 |
+
[browser]
|
| 10 |
+
gatherUsageStats = false
|
| 11 |
+
|
| 12 |
+
[global]
|
| 13 |
+
dataFrameSerialization = "legacy"
|
| 14 |
+
|
| 15 |
+
[client]
|
| 16 |
+
caching = false
|
| 17 |
+
displayEnabled = false
|
Dockerfile
ADDED
|
@@ -0,0 +1,47 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Use Python 3.11 as Hugging Face Spaces supports it well
|
| 2 |
+
FROM python:3.11-slim
|
| 3 |
+
|
| 4 |
+
# Set working directory
|
| 5 |
+
WORKDIR /app
|
| 6 |
+
|
| 7 |
+
# Set environment variables for Python
|
| 8 |
+
ENV PYTHONUNBUFFERED=1 \
|
| 9 |
+
PYTHONDONTWRITEBYTECODE=1 \
|
| 10 |
+
PIP_NO_CACHE_DIR=1 \
|
| 11 |
+
PIP_DISABLE_PIP_VERSION_CHECK=1
|
| 12 |
+
|
| 13 |
+
# Install system dependencies
|
| 14 |
+
RUN apt-get update && apt-get install -y \
|
| 15 |
+
build-essential \
|
| 16 |
+
curl \
|
| 17 |
+
git \
|
| 18 |
+
&& rm -rf /var/lib/apt/lists/*
|
| 19 |
+
|
| 20 |
+
# Copy requirements first for better layer caching
|
| 21 |
+
COPY requirements.txt .
|
| 22 |
+
|
| 23 |
+
# Install Python dependencies
|
| 24 |
+
RUN pip install --no-cache-dir -r requirements.txt
|
| 25 |
+
|
| 26 |
+
# Copy the application code
|
| 27 |
+
COPY . .
|
| 28 |
+
|
| 29 |
+
# Create necessary directories
|
| 30 |
+
RUN mkdir -p data/chroma_db uploads data .streamlit && \
|
| 31 |
+
touch data/app.log && \
|
| 32 |
+
chmod -R 777 data uploads .streamlit
|
| 33 |
+
|
| 34 |
+
# Expose the port that Streamlit runs on
|
| 35 |
+
EXPOSE 7860
|
| 36 |
+
|
| 37 |
+
# Set the default port for Hugging Face Spaces
|
| 38 |
+
ENV STREAMLIT_SERVER_PORT=7860
|
| 39 |
+
ENV STREAMLIT_SERVER_ADDRESS=0.0.0.0
|
| 40 |
+
ENV STREAMLIT_CONFIG_DIR=/app/.streamlit
|
| 41 |
+
ENV XDG_CONFIG_HOME=/app
|
| 42 |
+
|
| 43 |
+
# Health check
|
| 44 |
+
HEALTHCHECK CMD curl --fail http://localhost:7860/_stcore/health
|
| 45 |
+
|
| 46 |
+
# Command to run the application
|
| 47 |
+
CMD ["streamlit", "run", "main.py", "--server.port=7860", "--server.address=0.0.0.0", "--server.headless=true", "--server.fileWatcherType=none"]
|
HUGGINGFACE_DEPLOYMENT.md
ADDED
|
@@ -0,0 +1,83 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Hugging Face Spaces Deployment Guide for Lega.AI
|
| 2 |
+
|
| 3 |
+
## 🚀 Quick Deployment to Hugging Face Spaces
|
| 4 |
+
|
| 5 |
+
### Step 1: Create a New Space
|
| 6 |
+
|
| 7 |
+
1. Go to [Hugging Face Spaces](https://huggingface.co/spaces)
|
| 8 |
+
2. Click "Create new Space"
|
| 9 |
+
3. Choose:
|
| 10 |
+
- **Space name**: `lega-ai` (or your preferred name)
|
| 11 |
+
- **License**: `MIT`
|
| 12 |
+
- **SDK**: `Docker`
|
| 13 |
+
- **Hardware**: `CPU basic` (sufficient for this app)
|
| 14 |
+
|
| 15 |
+
### Step 2: Upload the Code
|
| 16 |
+
|
| 17 |
+
1. Clone or download this repository
|
| 18 |
+
2. Upload all files to your Hugging Face Space repository
|
| 19 |
+
3. Ensure the `README.md` has the correct frontmatter:
|
| 20 |
+
```yaml
|
| 21 |
+
---
|
| 22 |
+
title: Lega.AI
|
| 23 |
+
emoji: ⚖️
|
| 24 |
+
colorFrom: pink
|
| 25 |
+
colorTo: indigo
|
| 26 |
+
sdk: docker
|
| 27 |
+
pinned: false
|
| 28 |
+
---
|
| 29 |
+
```
|
| 30 |
+
|
| 31 |
+
### Step 3: Configure Environment Variables
|
| 32 |
+
|
| 33 |
+
1. In your Space, go to **Settings** → **Variables**
|
| 34 |
+
2. Add the required environment variable:
|
| 35 |
+
- **Name**: `GOOGLE_API_KEY`
|
| 36 |
+
- **Value**: Your Google AI API key from [Google AI Studio](https://aistudio.google.com/)
|
| 37 |
+
|
| 38 |
+
### Step 4: Deploy
|
| 39 |
+
|
| 40 |
+
1. Commit and push your changes to the Space repository
|
| 41 |
+
2. Hugging Face will automatically build and deploy your Docker container
|
| 42 |
+
3. Wait for the build to complete (usually 5-10 minutes)
|
| 43 |
+
4. Your app will be available at `https://huggingface.co/spaces/[username]/[space-name]`
|
| 44 |
+
|
| 45 |
+
## 🔧 Customization Options
|
| 46 |
+
|
| 47 |
+
### Environment Variables You Can Set:
|
| 48 |
+
|
| 49 |
+
- `GOOGLE_API_KEY` (required)
|
| 50 |
+
- `MAX_FILE_SIZE_MB` (default: 5)
|
| 51 |
+
- `TEMPERATURE` (default: 0.2)
|
| 52 |
+
- `LOG_LEVEL` (default: INFO)
|
| 53 |
+
|
| 54 |
+
### Hardware Requirements:
|
| 55 |
+
|
| 56 |
+
- **CPU Basic**: Sufficient for most use cases
|
| 57 |
+
- **CPU Upgrade**: Recommended for heavy usage
|
| 58 |
+
- **GPU**: Not required for this application
|
| 59 |
+
|
| 60 |
+
## 📋 Troubleshooting
|
| 61 |
+
|
| 62 |
+
### Common Issues:
|
| 63 |
+
|
| 64 |
+
1. **Build fails**: Check that all files are uploaded correctly
|
| 65 |
+
2. **API errors**: Ensure `GOOGLE_API_KEY` is set correctly
|
| 66 |
+
3. **Timeout**: Consider upgrading to CPU Upgrade hardware
|
| 67 |
+
|
| 68 |
+
### Logs:
|
| 69 |
+
|
| 70 |
+
- Check the Space logs in the Hugging Face interface
|
| 71 |
+
- Look for startup messages and error information
|
| 72 |
+
|
| 73 |
+
## 🔒 Security Considerations
|
| 74 |
+
|
| 75 |
+
- Never commit your API key to the repository
|
| 76 |
+
- Use Hugging Face Spaces environment variables for sensitive data
|
| 77 |
+
- The application runs in a sandboxed environment on Hugging Face
|
| 78 |
+
|
| 79 |
+
## 📊 Usage Limits
|
| 80 |
+
|
| 81 |
+
- Hugging Face Spaces has usage limits for free tiers
|
| 82 |
+
- Consider upgrading for production use
|
| 83 |
+
- Monitor usage in your Hugging Face account dashboard
|
README.md
ADDED
|
@@ -0,0 +1,484 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
---
|
| 2 |
+
title: Lega.AI
|
| 3 |
+
emoji: ⚖️
|
| 4 |
+
colorFrom: pink
|
| 5 |
+
colorTo: indigo
|
| 6 |
+
sdk: docker
|
| 7 |
+
pinned: false
|
| 8 |
+
---
|
| 9 |
+
|
| 10 |
+
# Lega.AI
|
| 11 |
+
|
| 12 |
+
AI-powered legal document analysis and simplification platform that makes complex legal documents accessible to everyone.
|
| 13 |
+
|
| 14 |
+

|
| 15 |
+

|
| 16 |
+

|
| 17 |
+

|
| 18 |
+
|
| 19 |
+
## 📋 Table of Contents
|
| 20 |
+
|
| 21 |
+
- [🚀 Features](#-features)
|
| 22 |
+
- [🛠️ Tech Stack](#️-tech-stack)
|
| 23 |
+
- [📋 Prerequisites](#-prerequisites)
|
| 24 |
+
- [🚀 Quick Start](#-quick-start)
|
| 25 |
+
- [🐳 Docker Deployment](#-docker-deployment)
|
| 26 |
+
- [📁 Project Structure](#-project-structure)
|
| 27 |
+
- [🎯 Usage Guide](#-usage-guide)
|
| 28 |
+
- [📄 Sample Documents](#-sample-documents)
|
| 29 |
+
- [🚨 Document Types Supported](#-document-types-supported)
|
| 30 |
+
- [⚡ Key Features Deep Dive](#-key-features-deep-dive)
|
| 31 |
+
- [🔧 Configuration Options](#-configuration-options)
|
| 32 |
+
- [🔒 Privacy & Security](#-privacy--security)
|
| 33 |
+
- [🤝 Contributing](#-contributing)
|
| 34 |
+
- [🆘 Support](#-support)
|
| 35 |
+
- [🎯 Roadmap](#-roadmap)
|
| 36 |
+
|
| 37 |
+
## 🚀 Features
|
| 38 |
+
|
| 39 |
+
- **🔍 Advanced Document Analysis**: Upload PDF/DOCX/TXT files and get comprehensive AI-powered analysis using Google's Gemini
|
| 40 |
+
- **📝 Plain Language Translation**: Convert complex legal jargon into clear, understandable language with context-aware explanations
|
| 41 |
+
- **⚠️ Intelligent Risk Assessment**: Multi-dimensional risk scoring with color-coded severity levels and detailed explanations
|
| 42 |
+
- **💬 Interactive Q&A Assistant**: Ask specific questions about your documents and get instant, context-aware AI responses
|
| 43 |
+
- **🎯 Smart Clause Highlighting**: Visual highlighting of risky clauses with interactive tooltips and improvement suggestions
|
| 44 |
+
- **📊 Vector-Powered Similarity Search**: Find similar clauses across documents using Chroma vector database
|
| 45 |
+
- **📚 Persistent Document Library**: Organize, search, and manage all analyzed documents with metadata
|
| 46 |
+
- **⚠️ Risk Visualization**: Interactive charts and gauges showing risk distribution and severity
|
| 47 |
+
- **🗓️ Key Information Extraction**: Automatically identify important dates, deadlines, and financial terms
|
| 48 |
+
- **💾 Local Data Persistence**: Secure local storage of analysis results and vector embeddings
|
| 49 |
+
- **🎨 Modern UI/UX**: Responsive Streamlit interface with custom CSS and intuitive navigation
|
| 50 |
+
|
| 51 |
+
## 🛠️ Tech Stack
|
| 52 |
+
|
| 53 |
+
- **Frontend**: Streamlit with multi-page navigation and custom CSS styling
|
| 54 |
+
- **AI/ML**: LangChain + Google Generative AI (Gemini Pro)
|
| 55 |
+
- **Embeddings**: Google Generative AI Embeddings (models/text-embedding-004)
|
| 56 |
+
- **Vector Store**: Chroma for document similarity search and persistence
|
| 57 |
+
- **Document Processing**: PyPDF for PDF extraction, python-docx for Word documents
|
| 58 |
+
- **Package Management**: UV (modern Python package manager)
|
| 59 |
+
- **Configuration**: Python-dotenv for environment management
|
| 60 |
+
- **Visualization**: Plotly for interactive charts and analytics
|
| 61 |
+
- **UI Components**: Streamlit-option-menu for enhanced navigation
|
| 62 |
+
|
| 63 |
+
## 📋 Prerequisites
|
| 64 |
+
|
| 65 |
+
- Python 3.13+ (required for latest features and performance)
|
| 66 |
+
- Google AI API key (get from [Google AI Studio](https://aistudio.google.com/))
|
| 67 |
+
- UV package manager (recommended for fast, reliable dependency management)
|
| 68 |
+
|
| 69 |
+
## 🚀 Quick Start
|
| 70 |
+
|
| 71 |
+
### 1. **Clone and navigate to the project**:
|
| 72 |
+
|
| 73 |
+
```bash
|
| 74 |
+
git clone <repository-url>
|
| 75 |
+
cd Lega.AI
|
| 76 |
+
```
|
| 77 |
+
|
| 78 |
+
### 2. **Install UV (if not already installed)**:
|
| 79 |
+
|
| 80 |
+
```bash
|
| 81 |
+
# On macOS/Linux
|
| 82 |
+
curl -LsSf https://astral.sh/uv/install.sh | sh
|
| 83 |
+
|
| 84 |
+
# On Windows (PowerShell)
|
| 85 |
+
powershell -c "irm https://astral.sh/uv/install.ps1 | iex"
|
| 86 |
+
|
| 87 |
+
# Or using pip
|
| 88 |
+
pip install uv
|
| 89 |
+
```
|
| 90 |
+
|
| 91 |
+
### 3. **Set up environment and install dependencies**:
|
| 92 |
+
|
| 93 |
+
```bash
|
| 94 |
+
# Create and activate virtual environment with dependencies
|
| 95 |
+
uv sync
|
| 96 |
+
|
| 97 |
+
# Or if you prefer traditional approach:
|
| 98 |
+
# uv venv
|
| 99 |
+
# source .venv/bin/activate # On Windows: .venv\Scripts\activate
|
| 100 |
+
# uv pip install -r pyproject.toml
|
| 101 |
+
```
|
| 102 |
+
|
| 103 |
+
### 4. **Configure environment**:
|
| 104 |
+
|
| 105 |
+
```bash
|
| 106 |
+
# Copy the template file
|
| 107 |
+
cp .env.example .env
|
| 108 |
+
|
| 109 |
+
# Edit .env file and update the following required settings:
|
| 110 |
+
```
|
| 111 |
+
|
| 112 |
+
**Required Configuration:**
|
| 113 |
+
|
| 114 |
+
```env
|
| 115 |
+
# Get your API key from: https://aistudio.google.com/
|
| 116 |
+
GOOGLE_API_KEY=your-google-api-key-here
|
| 117 |
+
```
|
| 118 |
+
|
| 119 |
+
**Optional Configuration (with sensible defaults):**
|
| 120 |
+
|
| 121 |
+
```env
|
| 122 |
+
# Application Settings
|
| 123 |
+
DEBUG=True
|
| 124 |
+
LOG_LEVEL=INFO
|
| 125 |
+
STREAMLIT_SERVER_PORT=8501
|
| 126 |
+
STREAMLIT_SERVER_ADDRESS=localhost
|
| 127 |
+
|
| 128 |
+
# File Upload Settings
|
| 129 |
+
MAX_FILE_SIZE_MB=10
|
| 130 |
+
SUPPORTED_FILE_TYPES=pdf,docx,txt
|
| 131 |
+
|
| 132 |
+
# AI Model Settings
|
| 133 |
+
TEMPERATURE=0.2
|
| 134 |
+
MAX_TOKENS=2048
|
| 135 |
+
EMBEDDING_MODEL=models/text-embedding-004
|
| 136 |
+
|
| 137 |
+
# Storage Configuration
|
| 138 |
+
CHROMA_PERSIST_DIRECTORY=./data/chroma_db
|
| 139 |
+
UPLOAD_DIR=./uploads
|
| 140 |
+
DATA_DIR=./data
|
| 141 |
+
LOG_FILE=./data/app.log
|
| 142 |
+
|
| 143 |
+
# Security Settings
|
| 144 |
+
SECRET_KEY=your-secret-key-here
|
| 145 |
+
SESSION_TIMEOUT_MINUTES=60
|
| 146 |
+
```
|
| 147 |
+
|
| 148 |
+
### 5. **Run the application**:
|
| 149 |
+
|
| 150 |
+
```bash
|
| 151 |
+
# If using UV (recommended)
|
| 152 |
+
uv run streamlit run main.py
|
| 153 |
+
|
| 154 |
+
# Or with activated virtual environment
|
| 155 |
+
streamlit run main.py
|
| 156 |
+
```
|
| 157 |
+
|
| 158 |
+
### 6. **Open your browser** to `http://localhost:8501`
|
| 159 |
+
|
| 160 |
+
### 🎯 Try the Demo
|
| 161 |
+
|
| 162 |
+
Once running, you can immediately test the application with the included sample documents:
|
| 163 |
+
|
| 164 |
+
- Navigate to **📄 Upload** page
|
| 165 |
+
- Try the sample documents: Employment contracts, NDAs, Lease agreements, Service agreements
|
| 166 |
+
- Experience the full analysis workflow without needing your own documents
|
| 167 |
+
|
| 168 |
+
## 🐳 Docker Deployment
|
| 169 |
+
|
| 170 |
+
### Local Docker Deployment
|
| 171 |
+
|
| 172 |
+
```bash
|
| 173 |
+
# Build the Docker image
|
| 174 |
+
docker build -t lega-ai .
|
| 175 |
+
|
| 176 |
+
# Run the container
|
| 177 |
+
docker run -p 7860:7860 -e GOOGLE_API_KEY=your_api_key_here lega-ai
|
| 178 |
+
```
|
| 179 |
+
|
| 180 |
+
### Hugging Face Spaces Deployment
|
| 181 |
+
|
| 182 |
+
Deploy Lega.AI to Hugging Face Spaces with one click!
|
| 183 |
+
|
| 184 |
+
[](https://huggingface.co/spaces)
|
| 185 |
+
|
| 186 |
+
**Quick Setup:**
|
| 187 |
+
|
| 188 |
+
1. Create a new [Hugging Face Space](https://huggingface.co/spaces) with SDK: Docker
|
| 189 |
+
2. Upload this repository to your Space
|
| 190 |
+
3. Set `GOOGLE_API_KEY` in Space Settings → Variables
|
| 191 |
+
4. Your app will be live at `https://huggingface.co/spaces/[username]/[space-name]`
|
| 192 |
+
|
| 193 |
+
📋 **Detailed Instructions**: See [HUGGINGFACE_DEPLOYMENT.md](./HUGGINGFACE_DEPLOYMENT.md) for complete setup guide.
|
| 194 |
+
|
| 195 |
+
## 📁 Project Structure
|
| 196 |
+
|
| 197 |
+
```
|
| 198 |
+
Lega.AI/
|
| 199 |
+
├── main.py # Main Streamlit application entry point
|
| 200 |
+
├── pyproject.toml # UV/pip package configuration and dependencies
|
| 201 |
+
├── requirements.txt # Docker-compatible requirements file
|
| 202 |
+
├── uv.lock # UV lockfile for reproducible builds
|
| 203 |
+
├── setup.py # Legacy Python package setup
|
| 204 |
+
├── Dockerfile # Docker container configuration
|
| 205 |
+
├── .dockerignore # Docker build optimization
|
| 206 |
+
├── start.sh # Hugging Face Spaces startup script
|
| 207 |
+
├── .env.example # Environment variables template
|
| 208 |
+
├── .env.hf # Hugging Face Spaces configuration
|
| 209 |
+
├── README.md # Project documentation
|
| 210 |
+
├── HUGGINGFACE_DEPLOYMENT.md # HF Spaces deployment guide
|
| 211 |
+
├── src/ # Main application source code
|
| 212 |
+
│ ├── __init__.py
|
| 213 |
+
│ ├── models/
|
| 214 |
+
│ │ ├── __init__.py
|
| 215 |
+
│ │ └── document.py # Document data models and schemas
|
| 216 |
+
│ ├── services/
|
| 217 |
+
│ │ ├── __init__.py
|
| 218 |
+
│ │ ├── document_processor.py # PDF/DOCX text extraction
|
| 219 |
+
│ │ ├── ai_analyzer.py # AI analysis and risk assessment
|
| 220 |
+
│ │ └── vector_store.py # Chroma vector database management
|
| 221 |
+
│ ├── pages/
|
| 222 |
+
│ │ ├── __init__.py
|
| 223 |
+
│ │ ├── upload.py # Document upload interface
|
| 224 |
+
│ │ ├── analysis.py # Document analysis dashboard
|
| 225 |
+
│ │ ├── qa_assistant.py # Interactive Q&A chat interface
|
| 226 |
+
│ │ ├── library.py # Document library management
|
| 227 |
+
│ │ └── settings.py # Application settings and configuration
|
| 228 |
+
│ └── utils/
|
| 229 |
+
│ ├── __init__.py
|
| 230 |
+
│ ├── config.py # Environment configuration management
|
| 231 |
+
│ ├── logger.py # Logging utilities and setup
|
| 232 |
+
│ └── helpers.py # Common helper functions
|
| 233 |
+
├── sample/ # Sample legal documents for testing
|
| 234 |
+
│ ├── Employment_Offer_Letter.pdf
|
| 235 |
+
│ ├── Master_Services_Agreement.pdf
|
| 236 |
+
│ ├── Mutual_NDA.pdf
|
| 237 |
+
│ └── Residential_Lease_Agreement.pdf
|
| 238 |
+
├── data/ # Local data storage and persistence
|
| 239 |
+
│ ├── app.log # Application logs
|
| 240 |
+
│ └── chroma_db/ # Vector database storage
|
| 241 |
+
└── uploads/ # Temporary file uploads directory
|
| 242 |
+
```
|
| 243 |
+
|
| 244 |
+
## 🎯 Usage Guide
|
| 245 |
+
|
| 246 |
+
### 1. Document Upload & Processing
|
| 247 |
+
|
| 248 |
+
- Navigate to **📄 Upload** page
|
| 249 |
+
- Upload PDF, DOCX, or TXT files (max 10MB per file)
|
| 250 |
+
- Try the included sample documents for immediate testing
|
| 251 |
+
- Automatic document type detection and text extraction
|
| 252 |
+
|
| 253 |
+
### 2. Comprehensive Analysis Dashboard
|
| 254 |
+
|
| 255 |
+
Visit **📊 Analysis** to explore:
|
| 256 |
+
|
| 257 |
+
- **Risk Score Gauge**: Interactive 0-100 risk assessment with color coding
|
| 258 |
+
- **Side-by-Side Comparison**: Original text vs. simplified plain language
|
| 259 |
+
- **Risk Factor Breakdown**: Detailed explanations of identified risks with severity levels
|
| 260 |
+
- **Interactive Clause Highlighting**: Hover over highlighted text for tooltips with suggestions
|
| 261 |
+
- **Financial & Date Extraction**: Automatic identification of monetary amounts and key dates
|
| 262 |
+
- **Risk Visualization Charts**: Visual distribution of risk categories and severity
|
| 263 |
+
|
| 264 |
+
### 3. Interactive Q&A Assistant
|
| 265 |
+
|
| 266 |
+
- Use **💬 Q&A** for document-specific questions and analysis
|
| 267 |
+
- Get context-aware answers powered by vector similarity search
|
| 268 |
+
- Access suggested questions based on document type and content
|
| 269 |
+
- Chat history preservation for reference and record-keeping
|
| 270 |
+
|
| 271 |
+
### 4. Document Library Management
|
| 272 |
+
|
| 273 |
+
- **📚 Library** provides persistent storage of all analyzed documents
|
| 274 |
+
- Advanced filtering by document type, risk level, upload date
|
| 275 |
+
- Full-text search across document content and analysis results
|
| 276 |
+
- Quick re-analysis and direct access to Q&A for stored documents
|
| 277 |
+
- Document metadata and analysis summary views
|
| 278 |
+
|
| 279 |
+
### 5. Settings & Configuration
|
| 280 |
+
|
| 281 |
+
- **⚙️ Settings** for API key management and validation
|
| 282 |
+
- Application configuration and performance monitoring
|
| 283 |
+
- Usage statistics and system health information
|
| 284 |
+
|
| 285 |
+
## 🔧 Configuration Options
|
| 286 |
+
|
| 287 |
+
The application uses environment variables for configuration. All settings can be customized in the `.env` file based on the `.env.example` template.
|
| 288 |
+
|
| 289 |
+
### 🔑 Required Settings
|
| 290 |
+
|
| 291 |
+
| Variable | Description | Example |
|
| 292 |
+
| ---------------- | -------------------------------- | ----------------------------- |
|
| 293 |
+
| `GOOGLE_API_KEY` | Google AI API key for Gemini Pro | `xyz` (from AI Studio) |
|
| 294 |
+
|
| 295 |
+
### ⚙️ Application Settings
|
| 296 |
+
|
| 297 |
+
| Variable | Default | Description |
|
| 298 |
+
| -------------------------- | -------------- | ---------------------------------- |
|
| 299 |
+
| `DEBUG` | `True` | Enable debug mode and verbose logs |
|
| 300 |
+
| `LOG_LEVEL` | `INFO` | Logging level (DEBUG/INFO/WARNING) |
|
| 301 |
+
| `STREAMLIT_SERVER_PORT` | `8501` | Port for Streamlit server |
|
| 302 |
+
| `STREAMLIT_SERVER_ADDRESS` | `localhost` | Server address binding |
|
| 303 |
+
| `MAX_FILE_SIZE_MB` | `10` | Maximum upload file size |
|
| 304 |
+
| `SUPPORTED_FILE_TYPES` | `pdf,docx,txt` | Allowed file extensions |
|
| 305 |
+
|
| 306 |
+
### 🤖 AI Model Settings
|
| 307 |
+
|
| 308 |
+
| Variable | Default | Description |
|
| 309 |
+
| ----------------- | ---------------------- | -------------------------------- |
|
| 310 |
+
| `TEMPERATURE` | `0.2` | AI response creativity (0.0-1.0) |
|
| 311 |
+
| `MAX_TOKENS` | `2048` | Maximum response length |
|
| 312 |
+
| `EMBEDDING_MODEL` | `models/embedding-001` | Google AI embedding model |
|
| 313 |
+
|
| 314 |
+
### 💾 Storage Configuration
|
| 315 |
+
|
| 316 |
+
| Variable | Default | Description |
|
| 317 |
+
| -------------------------- | ------------------ | ---------------------------- |
|
| 318 |
+
| `CHROMA_PERSIST_DIRECTORY` | `./data/chroma_db` | Vector database storage path |
|
| 319 |
+
| `UPLOAD_DIR` | `./uploads` | Temporary file uploads |
|
| 320 |
+
| `DATA_DIR` | `./data` | Application data directory |
|
| 321 |
+
| `LOG_FILE` | `./data/app.log` | Application log file path |
|
| 322 |
+
|
| 323 |
+
### 🔒 Security Settings
|
| 324 |
+
|
| 325 |
+
| Variable | Default | Description |
|
| 326 |
+
| ------------------------- | ------- | ------------------------ |
|
| 327 |
+
| `SECRET_KEY` | None | Application secret key |
|
| 328 |
+
| `SESSION_TIMEOUT_MINUTES` | `60` | Session timeout duration |
|
| 329 |
+
|
| 330 |
+
### Example .env configuration:
|
| 331 |
+
|
| 332 |
+
```bash
|
| 333 |
+
# Required
|
| 334 |
+
GOOGLE_API_KEY=your-google-ai-api-key
|
| 335 |
+
|
| 336 |
+
# Optional (with defaults shown)
|
| 337 |
+
DEBUG=True
|
| 338 |
+
LOG_LEVEL=INFO
|
| 339 |
+
MAX_FILE_SIZE_MB=10
|
| 340 |
+
SUPPORTED_FILE_TYPES=pdf,docx,txt
|
| 341 |
+
CHROMA_PERSIST_DIRECTORY=./data/chroma_db
|
| 342 |
+
TEMPERATURE=0.2
|
| 343 |
+
```
|
| 344 |
+
|
| 345 |
+
## � Sample Documents
|
| 346 |
+
|
| 347 |
+
The project includes professionally-crafted sample legal documents for testing and demonstration:
|
| 348 |
+
|
| 349 |
+
| Document Type | Filename | Purpose |
|
| 350 |
+
| ---------------------------- | --------------------------------- | ---------------------------------------- |
|
| 351 |
+
| **Employment Contract** | `Employment_Offer_Letter.pdf` | Test employment-related clause analysis |
|
| 352 |
+
| **Service Agreement** | `Master_Services_Agreement.pdf` | Demonstrate commercial contract analysis |
|
| 353 |
+
| **Non-Disclosure Agreement** | `Mutual_NDA.pdf` | Show confidentiality clause assessment |
|
| 354 |
+
| **Lease Agreement** | `Residential_Lease_Agreement.pdf` | Test rental/property contract analysis |
|
| 355 |
+
|
| 356 |
+
These documents are located in the `sample/` directory and can be uploaded directly through the application to:
|
| 357 |
+
|
| 358 |
+
- Experience the complete analysis workflow
|
| 359 |
+
- Test different document types and complexity levels
|
| 360 |
+
- Understand risk assessment capabilities
|
| 361 |
+
- Explore Q&A functionality with real legal content
|
| 362 |
+
|
| 363 |
+
## �🚨 Document Types Supported
|
| 364 |
+
|
| 365 |
+
Currently optimized for:
|
| 366 |
+
|
| 367 |
+
- **🏠 Rental/Lease Agreements**
|
| 368 |
+
- **💰 Loan Contracts**
|
| 369 |
+
- **💼 Employment Contracts**
|
| 370 |
+
- **🤝 Service Agreements**
|
| 371 |
+
- **🔒 Non-Disclosure Agreements (NDAs)**
|
| 372 |
+
- **📄 General Legal Documents**
|
| 373 |
+
|
| 374 |
+
## ⚡ Key Features Deep Dive
|
| 375 |
+
|
| 376 |
+
### 🔍 Advanced Risk Assessment Engine
|
| 377 |
+
|
| 378 |
+
- **Multi-dimensional Analysis**: Evaluates financial, legal commitment, and rights-related risks
|
| 379 |
+
- **Intelligent Severity Classification**: Categorizes risks as Low, Medium, High, or Critical
|
| 380 |
+
- **Contextual Risk Scoring**: Dynamic 0-100 scale based on document type and complexity
|
| 381 |
+
- **Actionable Recommendations**: Specific suggestions for improving problematic clauses
|
| 382 |
+
|
| 383 |
+
### 📝 AI-Powered Plain Language Translation
|
| 384 |
+
|
| 385 |
+
- **Context-Aware Simplification**: Maintains legal accuracy while improving readability
|
| 386 |
+
- **Jargon Definition System**: Interactive tooltips for complex legal terms
|
| 387 |
+
- **Document Type Optimization**: Tailored simplification based on contract category
|
| 388 |
+
- **Preservation of Legal Intent**: Ensures meaning is not lost in translation
|
| 389 |
+
|
| 390 |
+
### 🎯 Interactive Clause Analysis
|
| 391 |
+
|
| 392 |
+
- **Smart Highlighting System**: Visual identification of risky and important clauses
|
| 393 |
+
- **Hover Tooltips**: Immediate access to explanations and suggestions
|
| 394 |
+
- **Clause Categorization**: Organized by risk type and legal significance
|
| 395 |
+
- **Improvement Suggestions**: Specific recommendations for clause modifications
|
| 396 |
+
|
| 397 |
+
### 🔍 Vector-Powered Document Intelligence
|
| 398 |
+
|
| 399 |
+
- **Semantic Search**: Find similar clauses across your document library
|
| 400 |
+
- **Context-Aware Q&A**: Answers grounded in actual document content
|
| 401 |
+
- **Document Similarity**: Compare clauses against known patterns and standards
|
| 402 |
+
- **Persistent Knowledge Base**: Chroma vector database for fast, accurate retrieval
|
| 403 |
+
|
| 404 |
+
### 📊 Advanced Visualization & Analytics
|
| 405 |
+
|
| 406 |
+
- **Interactive Risk Gauges**: Real-time visual risk assessment
|
| 407 |
+
- **Risk Distribution Charts**: Breakdown of risk categories and severity
|
| 408 |
+
- **Financial Terms Extraction**: Automatic identification of monetary obligations
|
| 409 |
+
- **Timeline Analysis**: Key dates and deadline extraction with visualization
|
| 410 |
+
|
| 411 |
+
### 💾 Enterprise-Grade Data Management
|
| 412 |
+
|
| 413 |
+
- **Local Data Persistence**: Secure storage of documents and analysis results
|
| 414 |
+
- **Document Library**: Organized management with search and filtering
|
| 415 |
+
- **Analysis History**: Complete audit trail of document processing
|
| 416 |
+
- **Metadata Extraction**: Automatic tagging and categorization
|
| 417 |
+
|
| 418 |
+
## 🔒 Privacy & Security
|
| 419 |
+
|
| 420 |
+
### 🛡️ Data Protection
|
| 421 |
+
|
| 422 |
+
- **Local Processing**: Documents analyzed locally with secure API calls to Google AI
|
| 423 |
+
- **No Data Sharing**: Zero third-party data sharing or storage outside your environment
|
| 424 |
+
- **Secure Storage**: Vector embeddings and analysis results stored locally in Chroma database
|
| 425 |
+
- **Environment Security**: API keys managed through secure environment variables
|
| 426 |
+
|
| 427 |
+
### 🔐 Security Best Practices
|
| 428 |
+
|
| 429 |
+
- **API Key Protection**: Secure credential management with environment-based configuration
|
| 430 |
+
- **Local Vector Storage**: Document embeddings stored exclusively on your local system
|
| 431 |
+
- **Session Management**: Configurable session timeouts and secure state management
|
| 432 |
+
- **Input Validation**: Comprehensive file type and size validation for uploads
|
| 433 |
+
|
| 434 |
+
### 📋 Data Handling
|
| 435 |
+
|
| 436 |
+
- **Temporary Upload Storage**: Uploaded files processed and optionally removed from temp storage
|
| 437 |
+
- **Persistent Analysis**: Analysis results retained locally for document library functionality
|
| 438 |
+
- **User Control**: Complete control over data retention and deletion
|
| 439 |
+
- **Audit Trail**: Transparent logging of all document processing activities
|
| 440 |
+
|
| 441 |
+
## 🤝 Contributing
|
| 442 |
+
|
| 443 |
+
1. Fork the repository
|
| 444 |
+
2. Create a feature branch
|
| 445 |
+
3. Make your changes
|
| 446 |
+
4. Test thoroughly
|
| 447 |
+
5. Submit a pull request
|
| 448 |
+
|
| 449 |
+
## 📄 License
|
| 450 |
+
|
| 451 |
+
MIT License - see LICENSE file for details.
|
| 452 |
+
|
| 453 |
+
## 🆘 Support
|
| 454 |
+
|
| 455 |
+
### 📚 Documentation & Resources
|
| 456 |
+
|
| 457 |
+
- **In-Code Documentation**: Comprehensive docstrings and code comments throughout the project
|
| 458 |
+
- **Configuration Guide**: Detailed environment setup and configuration options above
|
| 459 |
+
- **Sample Documents**: Use included sample contracts to understand features and capabilities
|
| 460 |
+
|
| 461 |
+
### 🐛 Issues & Bug Reports
|
| 462 |
+
|
| 463 |
+
- **GitHub Issues**: Report bugs, request features, or ask questions via [GitHub Issues](https://github.com/your-repo/Lega.AI/issues)
|
| 464 |
+
- **Bug Reports**: Include system info, error logs, and steps to reproduce
|
| 465 |
+
- **Feature Requests**: Describe use cases and expected functionality
|
| 466 |
+
|
| 467 |
+
### 🛠️ Development & API References
|
| 468 |
+
|
| 469 |
+
- **Google AI Documentation**: [Google AI Developer Guide](https://ai.google.dev/) for Gemini API details
|
| 470 |
+
- **LangChain Documentation**: [LangChain Docs](https://python.langchain.com/) for framework reference
|
| 471 |
+
- **Streamlit Documentation**: [Streamlit Docs](https://docs.streamlit.io/) for UI framework guidance
|
| 472 |
+
- **Chroma Documentation**: [Chroma Docs](https://docs.trychroma.com/) for vector database operations
|
| 473 |
+
|
| 474 |
+
### 💡 Getting Help
|
| 475 |
+
|
| 476 |
+
1. **Check Documentation**: Review this README and in-code comments first
|
| 477 |
+
2. **Try Sample Documents**: Use provided samples to test functionality
|
| 478 |
+
3. **Check Logs**: Review `data/app.log` for detailed error information
|
| 479 |
+
4. **Environment Issues**: Verify `.env` configuration and API key validity
|
| 480 |
+
5. **Community Support**: Open GitHub discussions for general questions
|
| 481 |
+
|
| 482 |
+
---
|
| 483 |
+
|
| 484 |
+
**Made with ❤️ using Streamlit, LangChain, and Google AI**
|
main.py
ADDED
|
@@ -0,0 +1,789 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import streamlit as st
|
| 2 |
+
from streamlit_option_menu import option_menu
|
| 3 |
+
import os
|
| 4 |
+
import sys
|
| 5 |
+
|
| 6 |
+
# Add src directory to Python path
|
| 7 |
+
sys.path.append(os.path.join(os.path.dirname(__file__), "src"))
|
| 8 |
+
|
| 9 |
+
from src.utils.config import config
|
| 10 |
+
from src.utils.logger import setup_logging
|
| 11 |
+
|
| 12 |
+
# Page config
|
| 13 |
+
st.set_page_config(
|
| 14 |
+
page_title="Lega.AI", page_icon="⚖️", layout="wide", initial_sidebar_state="expanded"
|
| 15 |
+
)
|
| 16 |
+
|
| 17 |
+
# Custom CSS for responsive dark/light theme
|
| 18 |
+
st.markdown(
|
| 19 |
+
"""
|
| 20 |
+
<style>
|
| 21 |
+
/* Main header with gradient text */
|
| 22 |
+
.main-header {
|
| 23 |
+
font-size: 3rem;
|
| 24 |
+
font-weight: bold;
|
| 25 |
+
text-align: center;
|
| 26 |
+
margin-bottom: 2rem;
|
| 27 |
+
background: linear-gradient(90deg, #1f4e79, #2e86ab);
|
| 28 |
+
-webkit-background-clip: text;
|
| 29 |
+
-webkit-text-fill-color: transparent;
|
| 30 |
+
background-clip: text;
|
| 31 |
+
}
|
| 32 |
+
|
| 33 |
+
/* Responsive feature cards that adapt to theme */
|
| 34 |
+
.feature-card {
|
| 35 |
+
background: var(--background-color);
|
| 36 |
+
padding: 1.5rem;
|
| 37 |
+
border-radius: 12px;
|
| 38 |
+
box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1);
|
| 39 |
+
margin: 1rem 0;
|
| 40 |
+
border-left: 4px solid #2e86ab;
|
| 41 |
+
border: 1px solid var(--border-color);
|
| 42 |
+
color: var(--text-color);
|
| 43 |
+
transition: all 0.3s ease;
|
| 44 |
+
}
|
| 45 |
+
|
| 46 |
+
.feature-card:hover {
|
| 47 |
+
transform: translateY(-2px);
|
| 48 |
+
box-shadow: 0 6px 12px rgba(0, 0, 0, 0.15);
|
| 49 |
+
}
|
| 50 |
+
|
| 51 |
+
/* Risk color indicators */
|
| 52 |
+
.risk-critical { color: #ff4444; font-weight: bold; }
|
| 53 |
+
.risk-high { color: #ff6666; font-weight: bold; }
|
| 54 |
+
.risk-medium { color: #ffaa00; font-weight: bold; }
|
| 55 |
+
.risk-low { color: #ffcc00; font-weight: bold; }
|
| 56 |
+
.risk-safe { color: #44aa44; font-weight: bold; }
|
| 57 |
+
|
| 58 |
+
/* Responsive metric cards */
|
| 59 |
+
.metric-card {
|
| 60 |
+
background: var(--secondary-background-color);
|
| 61 |
+
padding: 1.5rem;
|
| 62 |
+
border-radius: 10px;
|
| 63 |
+
text-align: center;
|
| 64 |
+
margin: 0.5rem 0;
|
| 65 |
+
border: 1px solid var(--border-color);
|
| 66 |
+
color: var(--text-color);
|
| 67 |
+
transition: all 0.3s ease;
|
| 68 |
+
}
|
| 69 |
+
|
| 70 |
+
.metric-card:hover {
|
| 71 |
+
background: var(--hover-background-color);
|
| 72 |
+
}
|
| 73 |
+
|
| 74 |
+
/* Enhanced button styling */
|
| 75 |
+
.stButton > button {
|
| 76 |
+
width: 100%;
|
| 77 |
+
background: linear-gradient(135deg, #2e86ab, #1f4e79);
|
| 78 |
+
color: white !important;
|
| 79 |
+
border: none;
|
| 80 |
+
border-radius: 10px;
|
| 81 |
+
padding: 0.75rem 1rem;
|
| 82 |
+
font-weight: 600;
|
| 83 |
+
transition: all 0.3s ease;
|
| 84 |
+
box-shadow: 0 2px 4px rgba(0, 0, 0, 0.2);
|
| 85 |
+
}
|
| 86 |
+
|
| 87 |
+
.stButton > button:hover {
|
| 88 |
+
background: linear-gradient(135deg, #1f4e79, #2e86ab);
|
| 89 |
+
transform: translateY(-1px);
|
| 90 |
+
box-shadow: 0 4px 8px rgba(0, 0, 0, 0.3);
|
| 91 |
+
}
|
| 92 |
+
|
| 93 |
+
/* Enhanced sidebar styling for dark/light theme */
|
| 94 |
+
.css-1d391kg {
|
| 95 |
+
background: var(--background-color) !important;
|
| 96 |
+
}
|
| 97 |
+
|
| 98 |
+
/* Streamlit sidebar container */
|
| 99 |
+
section[data-testid="stSidebar"] {
|
| 100 |
+
background: var(--background-color) !important;
|
| 101 |
+
border-right: 1px solid var(--border-color) !important;
|
| 102 |
+
}
|
| 103 |
+
|
| 104 |
+
/* Sidebar content */
|
| 105 |
+
section[data-testid="stSidebar"] > div {
|
| 106 |
+
background: var(--background-color) !important;
|
| 107 |
+
color: var(--text-color) !important;
|
| 108 |
+
}
|
| 109 |
+
|
| 110 |
+
/* Sidebar header */
|
| 111 |
+
section[data-testid="stSidebar"] .block-container {
|
| 112 |
+
background: var(--background-color) !important;
|
| 113 |
+
color: var(--text-color) !important;
|
| 114 |
+
}
|
| 115 |
+
|
| 116 |
+
/* Option menu in sidebar */
|
| 117 |
+
section[data-testid="stSidebar"] .nav-link {
|
| 118 |
+
background: var(--secondary-background-color) !important;
|
| 119 |
+
color: var(--text-color) !important;
|
| 120 |
+
border: 1px solid var(--border-color) !important;
|
| 121 |
+
}
|
| 122 |
+
|
| 123 |
+
/* Active option in sidebar */
|
| 124 |
+
section[data-testid="stSidebar"] .nav-link.active {
|
| 125 |
+
background: linear-gradient(135deg, #2e86ab, #1f4e79) !important;
|
| 126 |
+
color: white !important;
|
| 127 |
+
}
|
| 128 |
+
|
| 129 |
+
/* Streamlit Option Menu specific styling - Force override */
|
| 130 |
+
.nav-link {
|
| 131 |
+
background: var(--secondary-background-color) !important;
|
| 132 |
+
color: var(--text-color) !important;
|
| 133 |
+
border: 1px solid var(--border-color) !important;
|
| 134 |
+
border-radius: 8px !important;
|
| 135 |
+
margin: 2px 0 !important;
|
| 136 |
+
}
|
| 137 |
+
|
| 138 |
+
.nav-link:hover {
|
| 139 |
+
background: var(--hover-background-color) !important;
|
| 140 |
+
color: var(--text-color) !important;
|
| 141 |
+
}
|
| 142 |
+
|
| 143 |
+
.nav-link.active {
|
| 144 |
+
background: linear-gradient(135deg, #2e86ab, #1f4e79) !important;
|
| 145 |
+
color: white !important;
|
| 146 |
+
border: 1px solid #2e86ab !important;
|
| 147 |
+
}
|
| 148 |
+
|
| 149 |
+
/* Option menu container */
|
| 150 |
+
.nav {
|
| 151 |
+
background: transparent !important;
|
| 152 |
+
}
|
| 153 |
+
|
| 154 |
+
/* Fix option menu wrapper */
|
| 155 |
+
div[data-testid="stVerticalBlock"] > div > div {
|
| 156 |
+
background: transparent !important;
|
| 157 |
+
}
|
| 158 |
+
|
| 159 |
+
/* More specific selectors for option menu */
|
| 160 |
+
section[data-testid="stSidebar"] .nav-link {
|
| 161 |
+
background-color: var(--secondary-background-color) !important;
|
| 162 |
+
color: var(--text-color) !important;
|
| 163 |
+
border: 1px solid var(--border-color) !important;
|
| 164 |
+
}
|
| 165 |
+
|
| 166 |
+
section[data-testid="stSidebar"] .nav-link:hover {
|
| 167 |
+
background-color: var(--hover-background-color) !important;
|
| 168 |
+
}
|
| 169 |
+
|
| 170 |
+
section[data-testid="stSidebar"] .nav-link.active {
|
| 171 |
+
background-color: #2e86ab !important;
|
| 172 |
+
color: white !important;
|
| 173 |
+
}
|
| 174 |
+
|
| 175 |
+
/* Force override any white backgrounds in sidebar */
|
| 176 |
+
section[data-testid="stSidebar"] * {
|
| 177 |
+
background-color: inherit !important;
|
| 178 |
+
}
|
| 179 |
+
|
| 180 |
+
section[data-testid="stSidebar"] .nav-link {
|
| 181 |
+
background-color: var(--secondary-background-color) !important;
|
| 182 |
+
}
|
| 183 |
+
|
| 184 |
+
/* File uploader styling */
|
| 185 |
+
.uploadedFile {
|
| 186 |
+
background: var(--secondary-background-color) !important;
|
| 187 |
+
border: 2px dashed var(--border-color) !important;
|
| 188 |
+
border-radius: 10px !important;
|
| 189 |
+
color: var(--text-color) !important;
|
| 190 |
+
}
|
| 191 |
+
|
| 192 |
+
/* Tab styling */
|
| 193 |
+
.stTabs [data-baseweb="tab-list"] {
|
| 194 |
+
gap: 8px;
|
| 195 |
+
}
|
| 196 |
+
|
| 197 |
+
.stTabs [data-baseweb="tab"] {
|
| 198 |
+
background: var(--secondary-background-color);
|
| 199 |
+
border-radius: 8px;
|
| 200 |
+
color: var(--text-color);
|
| 201 |
+
border: 1px solid var(--border-color);
|
| 202 |
+
}
|
| 203 |
+
|
| 204 |
+
.stTabs [aria-selected="true"] {
|
| 205 |
+
background: linear-gradient(135deg, #2e86ab, #1f4e79) !important;
|
| 206 |
+
color: white !important;
|
| 207 |
+
}
|
| 208 |
+
|
| 209 |
+
/* Tooltip styling for risk factors */
|
| 210 |
+
.tooltip {
|
| 211 |
+
position: relative;
|
| 212 |
+
display: inline;
|
| 213 |
+
cursor: help;
|
| 214 |
+
border-radius: 4px;
|
| 215 |
+
padding: 2px 4px;
|
| 216 |
+
margin: 0 1px;
|
| 217 |
+
}
|
| 218 |
+
|
| 219 |
+
.tooltip .tooltiptext {
|
| 220 |
+
visibility: hidden;
|
| 221 |
+
width: 300px;
|
| 222 |
+
background-color: var(--tooltip-background);
|
| 223 |
+
color: var(--tooltip-text);
|
| 224 |
+
text-align: left;
|
| 225 |
+
border-radius: 8px;
|
| 226 |
+
padding: 12px;
|
| 227 |
+
position: absolute;
|
| 228 |
+
z-index: 1000;
|
| 229 |
+
bottom: 125%;
|
| 230 |
+
left: 50%;
|
| 231 |
+
margin-left: -150px;
|
| 232 |
+
opacity: 0;
|
| 233 |
+
transition: opacity 0.3s, visibility 0.3s;
|
| 234 |
+
font-size: 13px;
|
| 235 |
+
line-height: 1.4;
|
| 236 |
+
box-shadow: 0 4px 12px rgba(0, 0, 0, 0.3);
|
| 237 |
+
border: 1px solid var(--border-color);
|
| 238 |
+
}
|
| 239 |
+
|
| 240 |
+
.tooltip:hover .tooltiptext {
|
| 241 |
+
visibility: visible;
|
| 242 |
+
opacity: 1;
|
| 243 |
+
}
|
| 244 |
+
|
| 245 |
+
/* Risk highlighting */
|
| 246 |
+
.risk-critical {
|
| 247 |
+
background-color: rgba(255, 68, 68, 0.2);
|
| 248 |
+
border-left: 4px solid #ff4444;
|
| 249 |
+
padding: 4px 8px;
|
| 250 |
+
border-radius: 4px;
|
| 251 |
+
}
|
| 252 |
+
.risk-high {
|
| 253 |
+
background-color: rgba(255, 136, 0, 0.2);
|
| 254 |
+
border-left: 4px solid #ff8800;
|
| 255 |
+
padding: 4px 8px;
|
| 256 |
+
border-radius: 4px;
|
| 257 |
+
}
|
| 258 |
+
.risk-medium {
|
| 259 |
+
background-color: rgba(255, 204, 0, 0.2);
|
| 260 |
+
border-left: 4px solid #ffcc00;
|
| 261 |
+
padding: 4px 8px;
|
| 262 |
+
border-radius: 4px;
|
| 263 |
+
}
|
| 264 |
+
.risk-low {
|
| 265 |
+
background-color: rgba(68, 170, 68, 0.2);
|
| 266 |
+
border-left: 4px solid #44aa44;
|
| 267 |
+
padding: 4px 8px;
|
| 268 |
+
border-radius: 4px;
|
| 269 |
+
}
|
| 270 |
+
|
| 271 |
+
/* Jargon term highlighting */
|
| 272 |
+
.jargon-term {
|
| 273 |
+
background-color: rgba(46, 134, 171, 0.2);
|
| 274 |
+
text-decoration: underline dotted #2e86ab;
|
| 275 |
+
padding: 2px 4px;
|
| 276 |
+
border-radius: 3px;
|
| 277 |
+
}
|
| 278 |
+
|
| 279 |
+
/* Dark theme variables */
|
| 280 |
+
[data-theme="dark"] {
|
| 281 |
+
--background-color: #0e1117;
|
| 282 |
+
--secondary-background-color: #262730;
|
| 283 |
+
--text-color: #fafafa;
|
| 284 |
+
--border-color: #464a5a;
|
| 285 |
+
--hover-background-color: #3d4354;
|
| 286 |
+
--tooltip-background: #262730;
|
| 287 |
+
--tooltip-text: #fafafa;
|
| 288 |
+
}
|
| 289 |
+
|
| 290 |
+
/* Light theme variables */
|
| 291 |
+
[data-theme="light"], :root {
|
| 292 |
+
--background-color: #ffffff;
|
| 293 |
+
--secondary-background-color: #f8f9fa;
|
| 294 |
+
--text-color: #262626;
|
| 295 |
+
--border-color: #e0e0e0;
|
| 296 |
+
--hover-background-color: #f0f0f0;
|
| 297 |
+
--tooltip-background: #333333;
|
| 298 |
+
--tooltip-text: #ffffff;
|
| 299 |
+
}
|
| 300 |
+
|
| 301 |
+
/* Auto-detect system theme */
|
| 302 |
+
@media (prefers-color-scheme: dark) {
|
| 303 |
+
:root {
|
| 304 |
+
--background-color: #0e1117;
|
| 305 |
+
--secondary-background-color: #262730;
|
| 306 |
+
--text-color: #fafafa;
|
| 307 |
+
--border-color: #464a5a;
|
| 308 |
+
--hover-background-color: #3d4354;
|
| 309 |
+
--tooltip-background: #262730;
|
| 310 |
+
--tooltip-text: #fafafa;
|
| 311 |
+
}
|
| 312 |
+
}
|
| 313 |
+
|
| 314 |
+
/* Hide Streamlit default elements */
|
| 315 |
+
#MainMenu {visibility: hidden;}
|
| 316 |
+
footer {visibility: hidden;}
|
| 317 |
+
.stDeployButton {visibility: hidden;}
|
| 318 |
+
|
| 319 |
+
/* Fix all Streamlit components for dark theme */
|
| 320 |
+
.stApp {
|
| 321 |
+
background: var(--background-color) !important;
|
| 322 |
+
color: var(--text-color) !important;
|
| 323 |
+
}
|
| 324 |
+
|
| 325 |
+
/* Main content area */
|
| 326 |
+
.main .block-container {
|
| 327 |
+
background: var(--background-color) !important;
|
| 328 |
+
color: var(--text-color) !important;
|
| 329 |
+
padding-top: 2rem;
|
| 330 |
+
padding-bottom: 2rem;
|
| 331 |
+
}
|
| 332 |
+
|
| 333 |
+
/* Text input fields */
|
| 334 |
+
.stTextInput > div > div > input {
|
| 335 |
+
background: var(--secondary-background-color) !important;
|
| 336 |
+
color: var(--text-color) !important;
|
| 337 |
+
border: 1px solid var(--border-color) !important;
|
| 338 |
+
}
|
| 339 |
+
|
| 340 |
+
/* Text areas */
|
| 341 |
+
.stTextArea > div > div > textarea {
|
| 342 |
+
background: var(--secondary-background-color) !important;
|
| 343 |
+
color: var(--text-color) !important;
|
| 344 |
+
border: 1px solid var(--border-color) !important;
|
| 345 |
+
}
|
| 346 |
+
|
| 347 |
+
/* Select boxes */
|
| 348 |
+
.stSelectbox > div > div > select {
|
| 349 |
+
background: var(--secondary-background-color) !important;
|
| 350 |
+
color: var(--text-color) !important;
|
| 351 |
+
border: 1px solid var(--border-color) !important;
|
| 352 |
+
}
|
| 353 |
+
|
| 354 |
+
/* Info, warning, error boxes */
|
| 355 |
+
.stAlert {
|
| 356 |
+
background: var(--secondary-background-color) !important;
|
| 357 |
+
color: var(--text-color) !important;
|
| 358 |
+
border: 1px solid var(--border-color) !important;
|
| 359 |
+
}
|
| 360 |
+
|
| 361 |
+
/* Expander */
|
| 362 |
+
.streamlit-expanderHeader {
|
| 363 |
+
background: var(--secondary-background-color) !important;
|
| 364 |
+
color: var(--text-color) !important;
|
| 365 |
+
border: 1px solid var(--border-color) !important;
|
| 366 |
+
}
|
| 367 |
+
|
| 368 |
+
/* Columns */
|
| 369 |
+
.element-container {
|
| 370 |
+
background: transparent !important;
|
| 371 |
+
}
|
| 372 |
+
|
| 373 |
+
/* Status indicators */
|
| 374 |
+
.status-success {
|
| 375 |
+
background: rgba(68, 170, 68, 0.1);
|
| 376 |
+
border: 1px solid #44aa44;
|
| 377 |
+
border-radius: 6px;
|
| 378 |
+
padding: 8px 12px;
|
| 379 |
+
color: #44aa44;
|
| 380 |
+
}
|
| 381 |
+
|
| 382 |
+
.status-warning {
|
| 383 |
+
background: rgba(255, 136, 0, 0.1);
|
| 384 |
+
border: 1px solid #ff8800;
|
| 385 |
+
border-radius: 6px;
|
| 386 |
+
padding: 8px 12px;
|
| 387 |
+
color: #ff8800;
|
| 388 |
+
}
|
| 389 |
+
|
| 390 |
+
.status-error {
|
| 391 |
+
background: rgba(255, 68, 68, 0.1);
|
| 392 |
+
border: 1px solid #ff4444;
|
| 393 |
+
border-radius: 6px;
|
| 394 |
+
padding: 8px 12px;
|
| 395 |
+
color: #ff4444;
|
| 396 |
+
}
|
| 397 |
+
</style>
|
| 398 |
+
""",
|
| 399 |
+
unsafe_allow_html=True,
|
| 400 |
+
)
|
| 401 |
+
|
| 402 |
+
|
| 403 |
+
def main():
|
| 404 |
+
# Initialize logging
|
| 405 |
+
setup_logging()
|
| 406 |
+
|
| 407 |
+
# Initialize session state
|
| 408 |
+
if "current_document" not in st.session_state:
|
| 409 |
+
st.session_state.current_document = None
|
| 410 |
+
if "documents_library" not in st.session_state:
|
| 411 |
+
st.session_state.documents_library = []
|
| 412 |
+
|
| 413 |
+
# Sidebar navigation
|
| 414 |
+
with st.sidebar:
|
| 415 |
+
st.markdown("### ⚖️ Lega.AI")
|
| 416 |
+
st.markdown("*Making legal documents accessible*")
|
| 417 |
+
|
| 418 |
+
selected = option_menu(
|
| 419 |
+
menu_title=None,
|
| 420 |
+
options=[
|
| 421 |
+
"🏠 Home",
|
| 422 |
+
"📄 Upload",
|
| 423 |
+
"📊 Analysis",
|
| 424 |
+
"💬 Q&A",
|
| 425 |
+
"📚 Library",
|
| 426 |
+
"⚙️ Settings",
|
| 427 |
+
],
|
| 428 |
+
icons=["house", "upload", "graph-up", "chat-dots", "folder", "gear"],
|
| 429 |
+
menu_icon="list",
|
| 430 |
+
default_index=0,
|
| 431 |
+
styles={
|
| 432 |
+
"container": {
|
| 433 |
+
"padding": "0!important",
|
| 434 |
+
"background-color": "transparent",
|
| 435 |
+
},
|
| 436 |
+
"icon": {"color": "#2e86ab", "font-size": "18px"},
|
| 437 |
+
"nav-link": {
|
| 438 |
+
"font-size": "16px",
|
| 439 |
+
"text-align": "left",
|
| 440 |
+
"margin": "2px 0px",
|
| 441 |
+
"padding": "8px 12px",
|
| 442 |
+
"border-radius": "8px",
|
| 443 |
+
"background-color": "transparent",
|
| 444 |
+
"color": "inherit",
|
| 445 |
+
"border": "1px solid transparent",
|
| 446 |
+
"--hover-color": "transparent",
|
| 447 |
+
},
|
| 448 |
+
"nav-link-selected": {
|
| 449 |
+
"background-color": "#2e86ab",
|
| 450 |
+
"color": "white",
|
| 451 |
+
"border": "1px solid #2e86ab",
|
| 452 |
+
},
|
| 453 |
+
},
|
| 454 |
+
)
|
| 455 |
+
|
| 456 |
+
# Handle page redirections from session state
|
| 457 |
+
if "page" in st.session_state and st.session_state.page:
|
| 458 |
+
# Map the session state page to the selected value
|
| 459 |
+
page_mapping = {
|
| 460 |
+
"📄 Upload": "📄 Upload",
|
| 461 |
+
"📊 Analysis": "📊 Analysis",
|
| 462 |
+
"💬 Q&A": "💬 Q&A",
|
| 463 |
+
"📚 Library": "📚 Library",
|
| 464 |
+
"⚙️ Settings": "⚙️ Settings",
|
| 465 |
+
}
|
| 466 |
+
|
| 467 |
+
if st.session_state.page in page_mapping:
|
| 468 |
+
selected = st.session_state.page
|
| 469 |
+
# Clear the page state to prevent continuous redirections
|
| 470 |
+
del st.session_state.page
|
| 471 |
+
|
| 472 |
+
# Main content area
|
| 473 |
+
if selected == "🏠 Home":
|
| 474 |
+
show_home_page()
|
| 475 |
+
elif selected == "📄 Upload":
|
| 476 |
+
show_upload_page()
|
| 477 |
+
elif selected == "📊 Analysis":
|
| 478 |
+
show_analysis_page()
|
| 479 |
+
elif selected == "💬 Q&A":
|
| 480 |
+
show_qa_page()
|
| 481 |
+
elif selected == "📚 Library":
|
| 482 |
+
show_library_page()
|
| 483 |
+
elif selected == "⚙️ Settings":
|
| 484 |
+
show_settings_page()
|
| 485 |
+
|
| 486 |
+
|
| 487 |
+
def show_home_page():
|
| 488 |
+
"""Display the home page with overview and features."""
|
| 489 |
+
st.markdown('<h1 class="main-header">⚖️ Lega.AI</h1>', unsafe_allow_html=True)
|
| 490 |
+
st.markdown(
|
| 491 |
+
'<p style="text-align: center; font-size: 1.2rem; color: #666;">AI-powered legal document analysis and simplification</p>',
|
| 492 |
+
unsafe_allow_html=True,
|
| 493 |
+
)
|
| 494 |
+
|
| 495 |
+
# Key benefits
|
| 496 |
+
col1, col2, col3 = st.columns(3)
|
| 497 |
+
|
| 498 |
+
with col1:
|
| 499 |
+
st.markdown(
|
| 500 |
+
"""
|
| 501 |
+
<div class="feature-card">
|
| 502 |
+
<h3>🚀 Instant Analysis</h3>
|
| 503 |
+
<p>Upload any legal document and get comprehensive analysis in under 60 seconds using Google's Gemini AI.</p>
|
| 504 |
+
</div>
|
| 505 |
+
""",
|
| 506 |
+
unsafe_allow_html=True,
|
| 507 |
+
)
|
| 508 |
+
|
| 509 |
+
with col2:
|
| 510 |
+
st.markdown(
|
| 511 |
+
"""
|
| 512 |
+
<div class="feature-card">
|
| 513 |
+
<h3>🎯 Risk Assessment</h3>
|
| 514 |
+
<p>Color-coded risk scoring helps you identify problematic clauses at a glance with detailed explanations.</p>
|
| 515 |
+
</div>
|
| 516 |
+
""",
|
| 517 |
+
unsafe_allow_html=True,
|
| 518 |
+
)
|
| 519 |
+
|
| 520 |
+
with col3:
|
| 521 |
+
st.markdown(
|
| 522 |
+
"""
|
| 523 |
+
<div class="feature-card">
|
| 524 |
+
<h3>💬 Plain Language</h3>
|
| 525 |
+
<p>Convert complex legal jargon into clear, understandable language that anyone can comprehend.</p>
|
| 526 |
+
</div>
|
| 527 |
+
""",
|
| 528 |
+
unsafe_allow_html=True,
|
| 529 |
+
)
|
| 530 |
+
|
| 531 |
+
# Quick stats
|
| 532 |
+
st.markdown("---")
|
| 533 |
+
st.subheader("📊 Platform Statistics")
|
| 534 |
+
|
| 535 |
+
col1, col2, col3, col4 = st.columns(4)
|
| 536 |
+
|
| 537 |
+
with col1:
|
| 538 |
+
st.markdown(
|
| 539 |
+
"""
|
| 540 |
+
<div class="metric-card">
|
| 541 |
+
<h2>1,247</h2>
|
| 542 |
+
<p>Documents Analyzed</p>
|
| 543 |
+
</div>
|
| 544 |
+
""",
|
| 545 |
+
unsafe_allow_html=True,
|
| 546 |
+
)
|
| 547 |
+
|
| 548 |
+
with col2:
|
| 549 |
+
st.markdown(
|
| 550 |
+
"""
|
| 551 |
+
<div class="metric-card">
|
| 552 |
+
<h2>95%</h2>
|
| 553 |
+
<p>Accuracy Rate</p>
|
| 554 |
+
</div>
|
| 555 |
+
""",
|
| 556 |
+
unsafe_allow_html=True,
|
| 557 |
+
)
|
| 558 |
+
|
| 559 |
+
with col3:
|
| 560 |
+
st.markdown(
|
| 561 |
+
"""
|
| 562 |
+
<div class="metric-card">
|
| 563 |
+
<h2>₹2,000</h2>
|
| 564 |
+
<p>Avg. Saved per User</p>
|
| 565 |
+
</div>
|
| 566 |
+
""",
|
| 567 |
+
unsafe_allow_html=True,
|
| 568 |
+
)
|
| 569 |
+
|
| 570 |
+
with col4:
|
| 571 |
+
st.markdown(
|
| 572 |
+
"""
|
| 573 |
+
<div class="metric-card">
|
| 574 |
+
<h2>45 sec</h2>
|
| 575 |
+
<p>Avg. Processing Time</p>
|
| 576 |
+
</div>
|
| 577 |
+
""",
|
| 578 |
+
unsafe_allow_html=True,
|
| 579 |
+
)
|
| 580 |
+
|
| 581 |
+
# Getting started
|
| 582 |
+
st.markdown("---")
|
| 583 |
+
st.subheader("🎯 Getting Started")
|
| 584 |
+
|
| 585 |
+
col1, col2 = st.columns([2, 1])
|
| 586 |
+
|
| 587 |
+
with col1:
|
| 588 |
+
st.markdown(
|
| 589 |
+
"""
|
| 590 |
+
**How to use Lega.AI:**
|
| 591 |
+
|
| 592 |
+
1. **Upload** your legal document (PDF, DOCX, or TXT)
|
| 593 |
+
2. **Wait** for AI analysis (typically 30-60 seconds)
|
| 594 |
+
3. **Review** risk assessment and simplified explanations
|
| 595 |
+
4. **Ask questions** about specific clauses or terms
|
| 596 |
+
5. **Export** summary for your records
|
| 597 |
+
"""
|
| 598 |
+
)
|
| 599 |
+
|
| 600 |
+
with col2:
|
| 601 |
+
st.markdown("### 📄 Try Real Sample Documents")
|
| 602 |
+
st.markdown("Get started with actual legal documents:")
|
| 603 |
+
|
| 604 |
+
# Get available sample documents
|
| 605 |
+
sample_dir = "./sample"
|
| 606 |
+
sample_files = []
|
| 607 |
+
if os.path.exists(sample_dir):
|
| 608 |
+
sample_files = [f for f in os.listdir(sample_dir) if f.endswith(('.pdf', '.docx', '.txt'))]
|
| 609 |
+
|
| 610 |
+
if sample_files:
|
| 611 |
+
for i, filename in enumerate(sample_files[:4]): # Show first 4
|
| 612 |
+
display_name = filename.replace('_', ' ').replace('.pdf', '').replace('.docx', '').replace('.txt', '')
|
| 613 |
+
display_name = display_name.title()
|
| 614 |
+
|
| 615 |
+
if st.button(f"📄 {display_name}", key=f"home_sample_{i}"):
|
| 616 |
+
st.session_state.load_sample = filename
|
| 617 |
+
st.session_state.page = "📄 Upload"
|
| 618 |
+
st.rerun()
|
| 619 |
+
else:
|
| 620 |
+
st.info("Sample documents loading...")
|
| 621 |
+
|
| 622 |
+
# CTA button
|
| 623 |
+
st.markdown("---")
|
| 624 |
+
if st.button("📄 Analyze Your First Document", type="primary"):
|
| 625 |
+
st.session_state.page = "📄 Upload"
|
| 626 |
+
st.rerun()
|
| 627 |
+
|
| 628 |
+
|
| 629 |
+
def load_sample_document(doc_type: str):
|
| 630 |
+
"""Load a sample document for demonstration."""
|
| 631 |
+
sample_docs = {
|
| 632 |
+
"rental": {
|
| 633 |
+
"filename": "sample_rental_agreement.pdf",
|
| 634 |
+
"type": "rental",
|
| 635 |
+
"text": """
|
| 636 |
+
RESIDENTIAL LEASE AGREEMENT
|
| 637 |
+
|
| 638 |
+
This Lease Agreement is entered into between John Smith (Landlord) and Jane Doe (Tenant)
|
| 639 |
+
for the property located at 123 Main Street, Mumbai, Maharashtra.
|
| 640 |
+
|
| 641 |
+
RENT: Tenant agrees to pay Rs. 25,000 per month, due on the 1st of each month.
|
| 642 |
+
Late payments will incur a penalty of Rs. 1,000 per day.
|
| 643 |
+
|
| 644 |
+
SECURITY DEPOSIT: Tenant shall pay a security deposit of Rs. 75,000, which is
|
| 645 |
+
non-refundable except for damage assessment.
|
| 646 |
+
|
| 647 |
+
TERMINATION: Either party may terminate this lease with 30 days written notice.
|
| 648 |
+
Early termination by Tenant results in forfeiture of security deposit.
|
| 649 |
+
|
| 650 |
+
MAINTENANCE: Tenant is responsible for all repairs and maintenance, including
|
| 651 |
+
structural repairs, regardless of cause.
|
| 652 |
+
|
| 653 |
+
The property is leased "as-is" with no warranties. Landlord is not liable for
|
| 654 |
+
any damages or injuries occurring on the premises.
|
| 655 |
+
""",
|
| 656 |
+
},
|
| 657 |
+
"loan": {
|
| 658 |
+
"filename": "sample_loan_agreement.pdf",
|
| 659 |
+
"type": "loan",
|
| 660 |
+
"text": """
|
| 661 |
+
PERSONAL LOAN AGREEMENT
|
| 662 |
+
|
| 663 |
+
Borrower: Rajesh Kumar
|
| 664 |
+
Lender: QuickCash Financial Services Pvt Ltd
|
| 665 |
+
Principal Amount: Rs. 2,00,000
|
| 666 |
+
|
| 667 |
+
INTEREST RATE: 24% per annum (APR 28.5% including processing fees)
|
| 668 |
+
|
| 669 |
+
REPAYMENT: 24 monthly installments of Rs. 12,500 each
|
| 670 |
+
Total repayment amount: Rs. 3,00,000
|
| 671 |
+
|
| 672 |
+
LATE PAYMENT PENALTY: Rs. 500 per day for any late payment
|
| 673 |
+
|
| 674 |
+
DEFAULT: If payment is late by more than 7 days, the entire remaining
|
| 675 |
+
balance becomes immediately due and payable.
|
| 676 |
+
|
| 677 |
+
COLLATERAL: Borrower pledges gold ornaments worth Rs. 2,50,000 as security.
|
| 678 |
+
Lender may seize collateral immediately upon default.
|
| 679 |
+
|
| 680 |
+
ARBITRATION: All disputes shall be resolved through binding arbitration.
|
| 681 |
+
Borrower waives right to jury trial.
|
| 682 |
+
|
| 683 |
+
Processing fee: Rs. 10,000 (non-refundable)
|
| 684 |
+
Documentation charges: Rs. 5,000
|
| 685 |
+
""",
|
| 686 |
+
},
|
| 687 |
+
"employment": {
|
| 688 |
+
"filename": "sample_employment_contract.pdf",
|
| 689 |
+
"type": "employment",
|
| 690 |
+
"text": """
|
| 691 |
+
EMPLOYMENT CONTRACT
|
| 692 |
+
|
| 693 |
+
Employee: Priya Sharma
|
| 694 |
+
Company: TechCorp India Private Limited
|
| 695 |
+
Position: Software Developer
|
| 696 |
+
Start Date: January 1, 2024
|
| 697 |
+
|
| 698 |
+
SALARY: Rs. 8,00,000 per annum, payable monthly
|
| 699 |
+
|
| 700 |
+
WORKING HOURS: 45 hours per week, including mandatory weekend work when required
|
| 701 |
+
|
| 702 |
+
NON-COMPETE: Employee shall not work for any competing company for 2 years
|
| 703 |
+
after termination, within India or globally.
|
| 704 |
+
|
| 705 |
+
CONFIDENTIALITY: Employee agrees to maintain strict confidentiality of all
|
| 706 |
+
company information indefinitely, even after termination.
|
| 707 |
+
|
| 708 |
+
TERMINATION: Company may terminate employment at any time without cause or notice.
|
| 709 |
+
Employee must provide 90 days notice to resign.
|
| 710 |
+
|
| 711 |
+
NO MOONLIGHTING: Employee shall not engage in any other work or business
|
| 712 |
+
activities during employment.
|
| 713 |
+
|
| 714 |
+
INTELLECTUAL PROPERTY: All work created by employee belongs entirely to company,
|
| 715 |
+
including personal projects done outside work hours.
|
| 716 |
+
""",
|
| 717 |
+
},
|
| 718 |
+
}
|
| 719 |
+
|
| 720 |
+
if doc_type in sample_docs:
|
| 721 |
+
sample = sample_docs[doc_type]
|
| 722 |
+
from src.utils.helpers import generate_document_id
|
| 723 |
+
|
| 724 |
+
# Store in session state
|
| 725 |
+
st.session_state.current_document = {
|
| 726 |
+
"id": generate_document_id(),
|
| 727 |
+
"filename": sample["filename"],
|
| 728 |
+
"document_type": sample["type"],
|
| 729 |
+
"original_text": sample["text"],
|
| 730 |
+
"is_sample": True,
|
| 731 |
+
}
|
| 732 |
+
|
| 733 |
+
st.success(f"📄 Loaded sample {doc_type} document!")
|
| 734 |
+
st.session_state.page = "📊 Analysis"
|
| 735 |
+
st.rerun()
|
| 736 |
+
|
| 737 |
+
|
| 738 |
+
def show_upload_page():
|
| 739 |
+
"""Import and show the upload page."""
|
| 740 |
+
try:
|
| 741 |
+
from src.pages.upload import show_upload_interface
|
| 742 |
+
|
| 743 |
+
show_upload_interface()
|
| 744 |
+
except ImportError as e:
|
| 745 |
+
st.error(f"Upload page not found: {e}")
|
| 746 |
+
|
| 747 |
+
|
| 748 |
+
def show_analysis_page():
|
| 749 |
+
"""Import and show the analysis page."""
|
| 750 |
+
try:
|
| 751 |
+
from src.pages.analysis import show_analysis_interface
|
| 752 |
+
|
| 753 |
+
show_analysis_interface()
|
| 754 |
+
except ImportError as e:
|
| 755 |
+
st.error(f"Analysis page not found: {e}")
|
| 756 |
+
|
| 757 |
+
|
| 758 |
+
def show_qa_page():
|
| 759 |
+
"""Import and show the Q&A page."""
|
| 760 |
+
try:
|
| 761 |
+
from src.pages.qa_assistant import show_qa_interface
|
| 762 |
+
|
| 763 |
+
show_qa_interface()
|
| 764 |
+
except ImportError as e:
|
| 765 |
+
st.error(f"Q&A page not found: {e}")
|
| 766 |
+
|
| 767 |
+
|
| 768 |
+
def show_library_page():
|
| 769 |
+
"""Import and show the library page."""
|
| 770 |
+
try:
|
| 771 |
+
from src.pages.library import show_library_interface
|
| 772 |
+
|
| 773 |
+
show_library_interface()
|
| 774 |
+
except ImportError as e:
|
| 775 |
+
st.error(f"Library page not found: {e}")
|
| 776 |
+
|
| 777 |
+
|
| 778 |
+
def show_settings_page():
|
| 779 |
+
"""Import and show the settings page."""
|
| 780 |
+
try:
|
| 781 |
+
from src.pages.settings import show_settings_interface
|
| 782 |
+
|
| 783 |
+
show_settings_interface()
|
| 784 |
+
except ImportError as e:
|
| 785 |
+
st.error(f"Settings page not found: {e}")
|
| 786 |
+
|
| 787 |
+
|
| 788 |
+
if __name__ == "__main__":
|
| 789 |
+
main()
|
pyproject.toml
ADDED
|
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[project]
|
| 2 |
+
name = "lega-ai"
|
| 3 |
+
version = "0.1.0"
|
| 4 |
+
description = "Add your description here"
|
| 5 |
+
readme = "README.md"
|
| 6 |
+
requires-python = ">=3.13"
|
| 7 |
+
dependencies = [
|
| 8 |
+
"blinker>=1.9.0",
|
| 9 |
+
"langchain-chroma>=0.2.6",
|
| 10 |
+
"langchain-community>=0.3.29",
|
| 11 |
+
"langchain-google-genai>=2.1.12",
|
| 12 |
+
"langchain-text-splitters>=0.3.11",
|
| 13 |
+
"langchain[google-genai]>=0.3.27",
|
| 14 |
+
"plotly>=6.3.0",
|
| 15 |
+
"pypdf>=6.0.0",
|
| 16 |
+
"python-docx>=1.2.0",
|
| 17 |
+
"python-dotenv>=1.1.1",
|
| 18 |
+
"streamlit>=1.49.1",
|
| 19 |
+
"streamlit-option-menu>=0.4.0",
|
| 20 |
+
]
|
requirements.txt
ADDED
|
@@ -0,0 +1,17 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Core dependencies
|
| 2 |
+
blinker>=1.9.0
|
| 3 |
+
langchain-chroma>=0.2.6
|
| 4 |
+
langchain-community>=0.3.29
|
| 5 |
+
langchain-google-genai>=2.1.12
|
| 6 |
+
langchain-text-splitters>=0.3.11
|
| 7 |
+
langchain[google-genai]>=0.3.27
|
| 8 |
+
plotly>=6.3.0
|
| 9 |
+
pypdf>=6.0.0
|
| 10 |
+
python-docx>=1.2.0
|
| 11 |
+
python-dotenv>=1.1.1
|
| 12 |
+
streamlit>=1.49.1
|
| 13 |
+
streamlit-option-menu>=0.4.0
|
| 14 |
+
|
| 15 |
+
# Additional dependencies for production deployment
|
| 16 |
+
uvicorn>=0.24.0
|
| 17 |
+
gunicorn>=21.2.0
|
sample/Employment_Offer_Letter.pdf
ADDED
|
@@ -0,0 +1,74 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
%PDF-1.3
|
| 2 |
+
%���� ReportLab Generated PDF document http://www.reportlab.com
|
| 3 |
+
1 0 obj
|
| 4 |
+
<<
|
| 5 |
+
/F1 2 0 R /F2 3 0 R
|
| 6 |
+
>>
|
| 7 |
+
endobj
|
| 8 |
+
2 0 obj
|
| 9 |
+
<<
|
| 10 |
+
/BaseFont /Helvetica /Encoding /WinAnsiEncoding /Name /F1 /Subtype /Type1 /Type /Font
|
| 11 |
+
>>
|
| 12 |
+
endobj
|
| 13 |
+
3 0 obj
|
| 14 |
+
<<
|
| 15 |
+
/BaseFont /Times-Roman /Encoding /WinAnsiEncoding /Name /F2 /Subtype /Type1 /Type /Font
|
| 16 |
+
>>
|
| 17 |
+
endobj
|
| 18 |
+
4 0 obj
|
| 19 |
+
<<
|
| 20 |
+
/Contents 8 0 R /MediaBox [ 0 0 595.2756 841.8898 ] /Parent 7 0 R /Resources <<
|
| 21 |
+
/Font 1 0 R /ProcSet [ /PDF /Text /ImageB /ImageC /ImageI ]
|
| 22 |
+
>> /Rotate 0 /Trans <<
|
| 23 |
+
|
| 24 |
+
>>
|
| 25 |
+
/Type /Page
|
| 26 |
+
>>
|
| 27 |
+
endobj
|
| 28 |
+
5 0 obj
|
| 29 |
+
<<
|
| 30 |
+
/PageMode /UseNone /Pages 7 0 R /Type /Catalog
|
| 31 |
+
>>
|
| 32 |
+
endobj
|
| 33 |
+
6 0 obj
|
| 34 |
+
<<
|
| 35 |
+
/Author (anonymous) /CreationDate (D:20250921125755+00'00') /Creator (ReportLab PDF Library - www.reportlab.com) /Keywords () /ModDate (D:20250921125755+00'00') /Producer (ReportLab PDF Library - www.reportlab.com)
|
| 36 |
+
/Subject (unspecified) /Title (untitled) /Trapped /False
|
| 37 |
+
>>
|
| 38 |
+
endobj
|
| 39 |
+
7 0 obj
|
| 40 |
+
<<
|
| 41 |
+
/Count 1 /Kids [ 4 0 R ] /Type /Pages
|
| 42 |
+
>>
|
| 43 |
+
endobj
|
| 44 |
+
8 0 obj
|
| 45 |
+
<<
|
| 46 |
+
/Filter [ /ASCII85Decode /FlateDecode ] /Length 1799
|
| 47 |
+
>>
|
| 48 |
+
stream
|
| 49 |
+
GarVO;01G^%"@ZKd,a<:)p018Rl9XI9h<;\`6!Pa:5@n=0`ndrH[WNl;#^EA=rZ4XRnOH2<mmmAT;3LThtS(C^M-I8-)&(k7l7fMA5/hC_fq>=Bp--Gqb_G,rT3P>lJcPH+$RC\Nik"<.*5n>6).:2H98bNU[uU`_t<rkh'>bZrq:LjUZj$q-F;<gr1`7JP2GlM[E2'AltOW!Ze_oj4WPQQ;4^#"6N3O'_913c=qrR$_Ntt;S=^!U7t*-@b>`)T*3[F&inp>cn7cej;?`K]RP"LOk@J9!3>\:<`ff5rc8b4!O[HL#BHEaYVu;obgkUFC+[/&\Mre+5mFgOc+XL@[-k#:%A+.?#4G3u@(TB#e7ee`_!48KM+<ua(,VEKY*k($)I\S&Lgs;`@\rUPFhpBWW,aus31ZpubUYDe"6X9Ks10cJn!Eh%+6l-g$@kk/Z*7E-bV]g2KCP?l^<L3"S\s:S5#!h[6j2e#*QBSAt/ZgM+fh!Tc5-:>6\[#D_SMR67WE(m1iHWPN3"Kp9Lp9*%_UAmuF0u7#j\2A6[;3H)NlegKdPuOD,FUs5XUpQ'knDf^]0u%NJ$[U,2E7Jf<Wc1DWXp#i'C,Wn[*r^LJ4or7%<WgtP],Kl%<`"d3E)WY>?qm>M5Afa%r+Sl;uW9/e$Z1qbDY"tS`51#QG5EP;CY\4R8/l(;l4`Y=hc!*p&3^a,bC&c(+8o1d0J`3VX2#i3/V@U)!@bGr0%g0W/<&H:oX.3L.612eC^$9;QbTE1B5866#5DZbL`an:qKn%_PRHF84SS.Bs:K$HD:@7IDe,=\MQ`TpB1G"Bb.U_W'[,22H[2bqZdQ4qAe+bO-t8PUb@pVEZ6>)gECumi4+E[0>Z2E@>7XqC[GJ\$Z?q[D(s)/=1]J69:?BUCU6&fqd/P)haTO9D)@hRaj8>m;>a^+grI)J`o*hc)TT1#&@>VRaM0a;)KBr24BR4b`*O]$'rXg/g4#!<BD<%OG%?Y$Ws(AA2jV"7;Kn"$JjD*Q6<$fs3%U]u]oo\o8k[PBmR&h$b442d2HW=/Ea.jp**&Ct!?%1de@J"qbi.ZFAs7KqMlr9-qZM:u]5cUZIo6@V>63P3N`$A+WJZs__GR5]A1G\^N.V/.Pa)UL`eYlanlAIfWRE<'\`4*.q!i`RnbAPn;u!J2K.Jgj0SQ"lJOJd"TlCDaJ[C-80^7M!(Pemh>Uao<"t1Y4RcJ?WT#fH=29>F<T#S@f0OMOJ5?ObM7RFoqCqC;l28D/S5KirEo+^?B"tVZI'A>mfYh-^!/9X'JF."%C9bXs9"2t2KA08$.-aUE55+,`dJtfR'(>O!lG%uG:(gu+@YWQ]30&udATP6Y)!jT?cf1m6[6>s`g#3e)sGS=+\b//dsj*<"83/6.#M'XC`?FSoGKrf]>+u\rfXOBQ".9^e)ML@jQIdVd&-I"#E:RA:HF6nNs37PR!S'<mfDl"mh8tsVYZEGg*'H^OakQph2rc!X+3sJj<adc%lO$ic$NgRd_r0=/>+.m70q[t3]'BYL=l#'C:bVRuTMgW&GeO4I8ARV$SR@1o/\eS?`>%&'HiC^^[jpu>3:?4$C%`^h6[l;0QX=kj4&g*EVYKr4eBoC.i[/=1h>(a:8Yd'FQJQ$-Hg34@!L/n5DEp5<pfSXj%<CJK]7*rRnJ8hC>Ceq3?&)g8$H81\4_`\HE'!B*?4D;+9?NQ5r_>jXflLKWVPcENJVjgm7Nm!-c7EW-&i@^T;o8a<HS[)[-@[a)>(H9UKm5=Afg,Z=(8.QB/oskA&ea>0>53ne[iMTcW!WT#&Jc~>endstream
|
| 50 |
+
endobj
|
| 51 |
+
xref
|
| 52 |
+
0 9
|
| 53 |
+
0000000000 65535 f
|
| 54 |
+
0000000073 00000 n
|
| 55 |
+
0000000114 00000 n
|
| 56 |
+
0000000221 00000 n
|
| 57 |
+
0000000330 00000 n
|
| 58 |
+
0000000533 00000 n
|
| 59 |
+
0000000601 00000 n
|
| 60 |
+
0000000897 00000 n
|
| 61 |
+
0000000956 00000 n
|
| 62 |
+
trailer
|
| 63 |
+
<<
|
| 64 |
+
/ID
|
| 65 |
+
[<cc2bcd5827912acf8c3bd2324dddcc13><cc2bcd5827912acf8c3bd2324dddcc13>]
|
| 66 |
+
% ReportLab generated PDF document -- digest (http://www.reportlab.com)
|
| 67 |
+
|
| 68 |
+
/Info 6 0 R
|
| 69 |
+
/Root 5 0 R
|
| 70 |
+
/Size 9
|
| 71 |
+
>>
|
| 72 |
+
startxref
|
| 73 |
+
2846
|
| 74 |
+
%%EOF
|
sample/Master_Services_Agreement.pdf
ADDED
|
@@ -0,0 +1,74 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
%PDF-1.3
|
| 2 |
+
%���� ReportLab Generated PDF document http://www.reportlab.com
|
| 3 |
+
1 0 obj
|
| 4 |
+
<<
|
| 5 |
+
/F1 2 0 R /F2 3 0 R
|
| 6 |
+
>>
|
| 7 |
+
endobj
|
| 8 |
+
2 0 obj
|
| 9 |
+
<<
|
| 10 |
+
/BaseFont /Helvetica /Encoding /WinAnsiEncoding /Name /F1 /Subtype /Type1 /Type /Font
|
| 11 |
+
>>
|
| 12 |
+
endobj
|
| 13 |
+
3 0 obj
|
| 14 |
+
<<
|
| 15 |
+
/BaseFont /Times-Roman /Encoding /WinAnsiEncoding /Name /F2 /Subtype /Type1 /Type /Font
|
| 16 |
+
>>
|
| 17 |
+
endobj
|
| 18 |
+
4 0 obj
|
| 19 |
+
<<
|
| 20 |
+
/Contents 8 0 R /MediaBox [ 0 0 595.2756 841.8898 ] /Parent 7 0 R /Resources <<
|
| 21 |
+
/Font 1 0 R /ProcSet [ /PDF /Text /ImageB /ImageC /ImageI ]
|
| 22 |
+
>> /Rotate 0 /Trans <<
|
| 23 |
+
|
| 24 |
+
>>
|
| 25 |
+
/Type /Page
|
| 26 |
+
>>
|
| 27 |
+
endobj
|
| 28 |
+
5 0 obj
|
| 29 |
+
<<
|
| 30 |
+
/PageMode /UseNone /Pages 7 0 R /Type /Catalog
|
| 31 |
+
>>
|
| 32 |
+
endobj
|
| 33 |
+
6 0 obj
|
| 34 |
+
<<
|
| 35 |
+
/Author (anonymous) /CreationDate (D:20250921125755+00'00') /Creator (ReportLab PDF Library - www.reportlab.com) /Keywords () /ModDate (D:20250921125755+00'00') /Producer (ReportLab PDF Library - www.reportlab.com)
|
| 36 |
+
/Subject (unspecified) /Title (untitled) /Trapped /False
|
| 37 |
+
>>
|
| 38 |
+
endobj
|
| 39 |
+
7 0 obj
|
| 40 |
+
<<
|
| 41 |
+
/Count 1 /Kids [ 4 0 R ] /Type /Pages
|
| 42 |
+
>>
|
| 43 |
+
endobj
|
| 44 |
+
8 0 obj
|
| 45 |
+
<<
|
| 46 |
+
/Filter [ /ASCII85Decode /FlateDecode ] /Length 2201
|
| 47 |
+
>>
|
| 48 |
+
stream
|
| 49 |
+
Gas1`d;ms^&q/Z@kXsCVZ<MtkG*6-tT+i,(WX-Ebg"o^(AWR*PM56h7K*h;<n*,sr+SNL6`$Wo+GG!@?VjfES_k446R?^14;Yous9]HLQf/.q@H;A6C,5h\ka6h7b+%IEV0!<4dfA@Z>j/\5BPa8h$60#5bF?-qj97pB/UgcoP=PefHO\g#/'OiEKoQNpB;>LOf^HoHY29f\!Pbge$B:Z#jJg[TuW@CE!K/^+^Gk:\li><UaX)cXR1F-`kCN:@_@_Gg1e'EoKdOA??86.aQPhR]gdA5-h3k?:js/pK4$Yu>V%aU1<-*n%PHF&^a*T$QiR44$5E,kB-r(F/,KJPXk0cj1IR4e]fD`YPeM89R<ogT]#D3\BG>C+Kc&UCPEbj(Gu8Z#Br/="4)8fY&i,p)c4Z/:t=AH'C"W#mMTh!H)!mj`MA4Ln$;,/Nbf](N2+bh0&&bSU-]Y@YmiHfGOl`JDD+IND8$q"C-l_,jBA&("8ANj.U'-,8eVXamTUNe#i#(`tu-gMNW?\LJ;K2kmpa:c1SU41j*%W7e1EYBB#%j[3e]1iu9mPOB4Q21fJ9)ArZ<Qg_^`/e9hAXXW5tfl0VF0TRVkjIM,SEg^JSFu39'n#f.%3$0UT"tX.r4SnOtk7=Pk!6$bp9DLrA]95:WG["q'WB`T^EdpjJ,nl`)-k_a9/r42G7.i+[Y<AeMUG:t2Dq,A(4'b"C7hCuP7GT<>(MXo"<DISJlmlZ5NS\]7)S!5R"rN[[ka_j'ene@L%$jXO?SqG0m,bk']oj&NrTQsS<h-VWbDbJ[i_HRjrD_=#L2F!0a4iW':E)5<V]G?S4m0q"cFrV8n)^Q7$X,0FqH+1EX;oT[AZ=VgIH"sY4m]VSU@k[iOhUXH<:<lF5QTW)e\N%prc[V,KJd#qH8t$GOs8ha!;_o'^5lS%D7E(R;Iq>^SJAaTnIB8OCm"1+Ak:JmADn#:f:\+\efI01iR_"8C<.Goo=ZpV[i;Xq;Q8HkjJFV,&=m7QLFF@!jiZYqaT,E!BtCgfRT*/Hq7t6l'5UE,(W_B2ep(:W'GZ<:CQW_sP*IrWF]Ol5jl^4PIRF^_EZ)f0.$AA7L<1\J_O4MtW81$b>(m$ZI>>?BRRfNoaLZ*_*M?Pp6_M9]HDBc=@\o^9;+nG_m_^K.8aF'V<ZDLqI`=L';+.$q5qHs:bF+`lbp_NI2^1i1H&Q&;i[I%7QX=>j*/.`&iHHi=kc0+[@ZaQMi6u.lj3^q#d\(e2VF(\u^ScU6V<D'LSs/gJ.ML)o=FabM$rIjh$D[*tb7S#B-ZKYWV;sEO(En;^Ts7Dr7`(K95sRf$H(9\?mVk&mU*5Um^e('ek@>__GXsg%\;S6]]LQ5C/@ZG:Y]Jrh1TeJF7e(>WMEW;Gn'#nf9fu370;?"%,Ck%OKN#I?3??u/@M+;-+!d)DG6VaYHEdkj<;I^]K_.5@?U0igo/Q]=S?qm.`;"44+s8mU.7M([UUQ@,ns,rrZ@*%#/X]V4T6q6c125`^!$uh;:G`-r'9!cT*/2'Ab#YmO>2rGEWUiCT@d0JA=WH8u2Wseag-<m9>a:R\+,5l^[6at:'5pQq+4?H]<]RUc?^mO6>%NE;--nJTZZ_h/@nhFm8C5u!>?S_Ki'l73/U6upQe'<:8\<!]'eWPP$2cJ&#/8\X!L4p$.!T:e\E:hmK+(7GKs@8`0.)L)?*,\fD"]^tN:gcFr'52,e<.O;4N?:D1K?Xepi#f==O*@M`J6/HM<2+pLmDPB!Y)$l[<7S1Yc?d[Lqh*I`-UKdiO9k6*V6E*8J:l:C27/1.=MAJbp6I6neJOh6.W5dV=fG\,DMPfhJp/"3*e#KDRC)Ko+9as=S3/Vk.)^6rG]u!V6D8sdX'a1M,bs34KWtMgFY>9MpBGX)R3U#ma1im\9W1?:2Nht*3:,V@$<Qh4nqYI\I]4r]gDS/i^C3/crZ+fCqK]9UpV`?Cbo=,hDk[Y>rN]cD[N_9RAW[W#QJ^\E!'NoEP#g8<P5%DRnM$bYBt`?YraQ]@ZflW8JH["d<*[&Ygsr/BYGO\.M;VTHi1Jfbol1Ob^rs@n'75D?`fc>k.4i6I)Y]c@WghT^LkI,GSC"PG$%DR*CfsL>9.tEoHb<&;^ljQ1<WCT'n+KkX[KR$d)ilus.fsaq[fVZ7Me3Qai40ik\K@[SK%C+r-&>E'qn:Ss8"(m)9;J9qW5J8O,n/~>endstream
|
| 50 |
+
endobj
|
| 51 |
+
xref
|
| 52 |
+
0 9
|
| 53 |
+
0000000000 65535 f
|
| 54 |
+
0000000073 00000 n
|
| 55 |
+
0000000114 00000 n
|
| 56 |
+
0000000221 00000 n
|
| 57 |
+
0000000330 00000 n
|
| 58 |
+
0000000533 00000 n
|
| 59 |
+
0000000601 00000 n
|
| 60 |
+
0000000897 00000 n
|
| 61 |
+
0000000956 00000 n
|
| 62 |
+
trailer
|
| 63 |
+
<<
|
| 64 |
+
/ID
|
| 65 |
+
[<1c25949f9492116853fbf25e4239dab0><1c25949f9492116853fbf25e4239dab0>]
|
| 66 |
+
% ReportLab generated PDF document -- digest (http://www.reportlab.com)
|
| 67 |
+
|
| 68 |
+
/Info 6 0 R
|
| 69 |
+
/Root 5 0 R
|
| 70 |
+
/Size 9
|
| 71 |
+
>>
|
| 72 |
+
startxref
|
| 73 |
+
3248
|
| 74 |
+
%%EOF
|
sample/Mutual_NDA.pdf
ADDED
|
@@ -0,0 +1,74 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
%PDF-1.3
|
| 2 |
+
%���� ReportLab Generated PDF document http://www.reportlab.com
|
| 3 |
+
1 0 obj
|
| 4 |
+
<<
|
| 5 |
+
/F1 2 0 R /F2 3 0 R
|
| 6 |
+
>>
|
| 7 |
+
endobj
|
| 8 |
+
2 0 obj
|
| 9 |
+
<<
|
| 10 |
+
/BaseFont /Helvetica /Encoding /WinAnsiEncoding /Name /F1 /Subtype /Type1 /Type /Font
|
| 11 |
+
>>
|
| 12 |
+
endobj
|
| 13 |
+
3 0 obj
|
| 14 |
+
<<
|
| 15 |
+
/BaseFont /Times-Roman /Encoding /WinAnsiEncoding /Name /F2 /Subtype /Type1 /Type /Font
|
| 16 |
+
>>
|
| 17 |
+
endobj
|
| 18 |
+
4 0 obj
|
| 19 |
+
<<
|
| 20 |
+
/Contents 8 0 R /MediaBox [ 0 0 595.2756 841.8898 ] /Parent 7 0 R /Resources <<
|
| 21 |
+
/Font 1 0 R /ProcSet [ /PDF /Text /ImageB /ImageC /ImageI ]
|
| 22 |
+
>> /Rotate 0 /Trans <<
|
| 23 |
+
|
| 24 |
+
>>
|
| 25 |
+
/Type /Page
|
| 26 |
+
>>
|
| 27 |
+
endobj
|
| 28 |
+
5 0 obj
|
| 29 |
+
<<
|
| 30 |
+
/PageMode /UseNone /Pages 7 0 R /Type /Catalog
|
| 31 |
+
>>
|
| 32 |
+
endobj
|
| 33 |
+
6 0 obj
|
| 34 |
+
<<
|
| 35 |
+
/Author (anonymous) /CreationDate (D:20250921125754+00'00') /Creator (ReportLab PDF Library - www.reportlab.com) /Keywords () /ModDate (D:20250921125754+00'00') /Producer (ReportLab PDF Library - www.reportlab.com)
|
| 36 |
+
/Subject (unspecified) /Title (untitled) /Trapped /False
|
| 37 |
+
>>
|
| 38 |
+
endobj
|
| 39 |
+
7 0 obj
|
| 40 |
+
<<
|
| 41 |
+
/Count 1 /Kids [ 4 0 R ] /Type /Pages
|
| 42 |
+
>>
|
| 43 |
+
endobj
|
| 44 |
+
8 0 obj
|
| 45 |
+
<<
|
| 46 |
+
/Filter [ /ASCII85Decode /FlateDecode ] /Length 2422
|
| 47 |
+
>>
|
| 48 |
+
stream
|
| 49 |
+
Gat=,gN)%.&q/)-FSTK1>?^F8\]f<sUp9.m[i@CSaci9q]Kg"#6:)8h!0j7nhpVS/^:[.9S2*Y9h0o.ll%WjC$i\Dfs4MtepGMp8G"/eG>&S-KDYfCEGEf2di;K`[qYnD)_r\l!N.1`mrH*5YiKDasCq?:m/8TVo]oYtOT,uPoo/jOt?ehM;a^Ugdn'^l91gf%bidUFDRrie$GJAH+-8bb49Ti1Rnh[D_dJ=q15Nh>sdM;_?6g3jHh_UhJGF&'?V*FkR1MK>aPr#Bp-bNp33U+s2C/(9M7qPE"1YHY+oD6OMUHI#f'R"?G5;JRIUKpn;Vop*/<n><P9?pW]qfcDn=_2qXF;CIo_P56gS[BYF-r.Tf=Ja]ee*.-jfp$B;gN8R\e,.+aqhFi$3kI<3<p,9.X+Jc?C[=?4D9=XqYUUZmU-O[nlNo`B):g>VWsUslm60JeM_iqK3l;ah@0q+"`XW[)A'b2bBWqt>@\eNN+pbe-0UEMs/iYt&Of:JYqOi8@:R`9$Z#`kZE,S'I?)fXnkB#/ko4gVK8u`kDQ9EfEcLu3PejVN4b>)jo-$CM'jm;V7I]9F=`h+_`i(RuD#4+?G!63;,i;EZu3b(.)Q-K$t>cQfEA/?HQ9K=hte]sWd67m;(Q6\qE@&C5u]s=Qf+K!S3E3=ud44")OZ9d&r$,$<HfH):c3IS&I"h<;\nkg-E"AsOpH"X?E4G0tt.cVHdF;S47_?.3u<Q!Y<=;b#fE,J@ZMs6G:\dHaoX/,L_!p!"-hN7hV01=Qa67SuC08m4b+LL4'f*ElP;PfV0[Q.?.C1iM#8&gg\-/(Ep=sZ`jKi1Ea^t[4BTi;bD9W:S.#<mYL)nq[$0mFkBPXR$j\dT8o-:.OSfSRhLd(9Q'.Bo/T"N_R@HE3eji"=a64@h<pEX9n.F'r`6'Y%KZhO-]P!&%ebSrt3a&C1EH4YUo\X'tDGPN>?O[GbqC>Z[\a(Q&p&2[</CP\&1.+&ub'1<-"i;mDZE<s"5-cD.jI9)a0cpTPS%f'=HcnMTb)YcNpNSaHFd<E2o=iPL?.":26lOs+H<:`YfRe>D0<^)M'lC1K3>YL_484fgpG9Y&p/L%sQJrY#aUK"2?74\GG@DXXjCOD99R\<dkYq?cG!*/)7ehp;YNWYl)/*1Onq#,g0o*-/d$jo@"E.\TM``eL'o3cj&CglAWPaI(Tc!qsC#kL#qTF0<$;c`ifnqN%VMVS<*rA%2b"0Fm8KhLbN#i=3NsPElIc?;'pX-1@_<:6qMuI]W9_ZYS>*.fps7S1.h1c`smp;H\(:4B2gJo9`XM"U*Z`JJ5EI>p1Yf3HKk7q#o^M#VaW?r#S<,!Z$S?Y8^##$S"+IU*"QY`a*-N.*_ut>YiAV8A2N8ZU8n?4t&"/1HLO=<N')$<T%bfFeKksn@bIB&gP^sR]M5TG1$kDMZafcml6&F0P,'(\N6Z0b><:I.D[#Ara4Ku):n*D>?s^g&IH0<!5o=gQHkl[D0nYW`13$tJI=kLR^85[%0?ZH"+ARnKPl@^:^e9<.Tm:JV6uN0EufTpT<3[3!Ai$/oP;R7g?m7HV1+0A$`AZ.^51U\0n18LKfI$L[_0u8_3a$2[WT<<WmPIDG>Q$#GgUU'EDA\g:-L=p'dh=RR+fE]qIRBGc8!$C1Grh\[V]e4p+'^VP)#D1Nu$o1ED_Xm)e#;;iB*p*dm.E_h57C4-\XPE'-.A3.hiP/V"gl_&Le;Q-J7"JZf%DPdVFoNn:&.ip$sh&^WM3qGNj7U(_.9PD"EIHIg?f"$KY(fE#9l;47,G3)qDa#_aN+X'6!LD73[H@e-t\F68m`FOr2*NUU0X8X\Jo8nYk,:^UP/#+]GX6/$g:P,R)sH:ch7oo3$;XkAO9k(H9I67=(G0#:@*Nq++L_k!Q,W-Q-L)/?-TaR&Tf%*X[b1)mc$)YuSlnS]HQs@>ALCbe?Q3QC55/C86^6WUUO'e>/qL1k!tP&#]iJ%[u&]o-d^rkt7s_H#IN@-he9kR2og<=UjWabb6fWY,A]k9:eVqhR&e*mS7K:+C7h-j8GUlEfht<dbXiE(&d<(RBCGUlS5%cp'oOOc9Tt(R&[#Z@#h;*jU_$T)4N/@/4%n,+]iTP7s7<DTXWaJE9.=+'UH39I\a`;n>]J"Mjgk9C8;5iVZDNk$3gOdFK4?"keS5cYI9L$e1EAh_t:F`=:K@n)*DDY%mKhjVmi2>n,n$F7K*XX.3E&8]n6mGV$F-V(JjIc^L0fh/.M'o=0tR"q>BtdY1g'g*2(fqp)L%\;JS;8GYE*u@342]T(gP:q)<SP0(iBYQ:oJl/ZL5E3@Me?%T=+tfQ4Q>i*)rJfE,Lq'cM3&gP-\4#qA5)Td]GO1:UhIM.-lf\tC_@HBG:0o];-ERG?[p7\PK>#QN_`(<E,pqmAX8F=*1D)g_+*"HqmJ~>endstream
|
| 50 |
+
endobj
|
| 51 |
+
xref
|
| 52 |
+
0 9
|
| 53 |
+
0000000000 65535 f
|
| 54 |
+
0000000073 00000 n
|
| 55 |
+
0000000114 00000 n
|
| 56 |
+
0000000221 00000 n
|
| 57 |
+
0000000330 00000 n
|
| 58 |
+
0000000533 00000 n
|
| 59 |
+
0000000601 00000 n
|
| 60 |
+
0000000897 00000 n
|
| 61 |
+
0000000956 00000 n
|
| 62 |
+
trailer
|
| 63 |
+
<<
|
| 64 |
+
/ID
|
| 65 |
+
[<a017cb121e02b55d6f3b7490268d6807><a017cb121e02b55d6f3b7490268d6807>]
|
| 66 |
+
% ReportLab generated PDF document -- digest (http://www.reportlab.com)
|
| 67 |
+
|
| 68 |
+
/Info 6 0 R
|
| 69 |
+
/Root 5 0 R
|
| 70 |
+
/Size 9
|
| 71 |
+
>>
|
| 72 |
+
startxref
|
| 73 |
+
3469
|
| 74 |
+
%%EOF
|
sample/Residential_Lease_Agreement.pdf
ADDED
|
@@ -0,0 +1,74 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
%PDF-1.3
|
| 2 |
+
%���� ReportLab Generated PDF document http://www.reportlab.com
|
| 3 |
+
1 0 obj
|
| 4 |
+
<<
|
| 5 |
+
/F1 2 0 R /F2 3 0 R
|
| 6 |
+
>>
|
| 7 |
+
endobj
|
| 8 |
+
2 0 obj
|
| 9 |
+
<<
|
| 10 |
+
/BaseFont /Helvetica /Encoding /WinAnsiEncoding /Name /F1 /Subtype /Type1 /Type /Font
|
| 11 |
+
>>
|
| 12 |
+
endobj
|
| 13 |
+
3 0 obj
|
| 14 |
+
<<
|
| 15 |
+
/BaseFont /Times-Roman /Encoding /WinAnsiEncoding /Name /F2 /Subtype /Type1 /Type /Font
|
| 16 |
+
>>
|
| 17 |
+
endobj
|
| 18 |
+
4 0 obj
|
| 19 |
+
<<
|
| 20 |
+
/Contents 8 0 R /MediaBox [ 0 0 595.2756 841.8898 ] /Parent 7 0 R /Resources <<
|
| 21 |
+
/Font 1 0 R /ProcSet [ /PDF /Text /ImageB /ImageC /ImageI ]
|
| 22 |
+
>> /Rotate 0 /Trans <<
|
| 23 |
+
|
| 24 |
+
>>
|
| 25 |
+
/Type /Page
|
| 26 |
+
>>
|
| 27 |
+
endobj
|
| 28 |
+
5 0 obj
|
| 29 |
+
<<
|
| 30 |
+
/PageMode /UseNone /Pages 7 0 R /Type /Catalog
|
| 31 |
+
>>
|
| 32 |
+
endobj
|
| 33 |
+
6 0 obj
|
| 34 |
+
<<
|
| 35 |
+
/Author (anonymous) /CreationDate (D:20250921125755+00'00') /Creator (ReportLab PDF Library - www.reportlab.com) /Keywords () /ModDate (D:20250921125755+00'00') /Producer (ReportLab PDF Library - www.reportlab.com)
|
| 36 |
+
/Subject (unspecified) /Title (untitled) /Trapped /False
|
| 37 |
+
>>
|
| 38 |
+
endobj
|
| 39 |
+
7 0 obj
|
| 40 |
+
<<
|
| 41 |
+
/Count 1 /Kids [ 4 0 R ] /Type /Pages
|
| 42 |
+
>>
|
| 43 |
+
endobj
|
| 44 |
+
8 0 obj
|
| 45 |
+
<<
|
| 46 |
+
/Filter [ /ASCII85Decode /FlateDecode ] /Length 1712
|
| 47 |
+
>>
|
| 48 |
+
stream
|
| 49 |
+
Gas1_d;I\u&:EqJ/GdfeOOM`oRl9JoRjb69`sHSDMP/2r/ZiOV-q[!-EW,Vj9A-Tc["lGJ9p%M!Za3;akRIM\nUFCJIEq)\b7T_/-stEd?1O$H$)RG*XH0eohk)6nB.%=:qXF$'1-.K[UV+I;',lkiTsr.#*'0OboB[Zlk1OuGPB8o_Bm_d2dq^MJVYg\\Iuo&<nh8[drb"$857/p<QP3Dk?[dK!g/Fh?-=ohZ)q].%g4l&\Vg:q?3F+8J#l9DB63X7*L3AYKEb`F!c"q+2hU`5;0.0j[[4<Q;m#WU=ng%kKCqt?PaEj)tm4>rrk39O.E3tNi.jP9p,:Rt+C0d'k>N<6AZ<X[gS6d=b,lVXcQ4BPu41_oUD3C$"(I,]*!o[8m8)R_LQu>hd70F^+[2gT@P#.aqY4me85_sa+&EHp8(rW@&[KTTHl.c%-#QBE*i^:Z;mBf16Q>$Zu'J8uROGK#es#$8nOfh1nSUa(]1FmrKXYlbC^90'"GXu%lQP,H2$E0jJLU@UBO@?9]SjP\[',Y>MJ"h47IQ,I@Hb)adDrt2O^p"KbbI:?<%c;*D#pH+49kP$NW`pD`W,*\o&Up^M%ohNk2R+`4h8t<S&PI"Vhh1.n'g*;7dp^]g_\3"(.O-eQdk8]l+5r8lEPX&g.E.diD%>iIe9]dT:>*M*[G?I(d7[K=7sJ^`Ri5kC[B"hcB'!Ki!ko'hH^E+84T.Z,kN@&uj6?C<cjW"FD<W4SetXS+2@Rh5&a%RhP1RK`=pKod^B$1$3>cJ,@%1k7/_E3_>nh-#Iba5>KC&fKFV'?WGn1t`A2iM1Nd_KAJlF^qAl`GHmDU><L@5ee*bWqaSUqoMcI1He2Oj`d')sXE6ZDQ;'kgb)3F,c.PhV#I9._GLBRn+Q<>1=q%siLt_&a$I&LJgI+G'st)Xdf`TFL1d(P]C#2[\Cdr`O1RBJ#4VXg<sKZ"Q?m?mNpV4daWX?/n!<NA7.I3(`*`cVUIeKZ<V/Fu?u$)X-V\,gGpsHM&5JUe]'ZrFc_Q;1RS)_dOse1i;''Sh/<+S<g8'(]a^T.J_Tr4kP(Wd:.GRELoUmI;RL9!_Z)KP9Ghp#s/'+HoK0DcI_;LFuY5_jfV`0^M6K!`Jq3*D5qIQGB:RI_F]rtO6,)TdGoYg.SPGC9#VoMJpf_</)X?eq?RC@_53HB:J)QPeQ"oh=hp+'V?2Lj0tnF#/[^#&"V_X87TWat+fao(:Sd0X%3q!0&&q@5-Z8N?No8Yp0N@7G6=7a)ZQ,SD"sb[hO7U!]Ap-u52qPO&9Ub#Y2]#^)A$Xl8kic[P`=:42CI5%8N%Q<hQ!0h\(>?k%o)YPL/%^/-C99+1UI:Fim'/F1L4;*\#N>1lTgB\8QNf3triip$-p-;+(skGKm@25:c/7c%:Mu-6Y%gn0\5#NE(A&D\h>4J$k?&(8n;n+";1.0Fl.rIJ;n6gol&-tEi5UlhQc2.LL=$n#_+YFK#_(,e>/hPciB3gIH1p[r-D;r`3(;`QAGl*6NH:>-Rbl@td:IXV$i.MNqoSFX$!<43O5pFbp2<\jE/46e8"sJEY!3.D:manQ0'(GM!V)rjK6>-)@d6ECQpG=6`)g5(ken6''RVl5qR#=($"hch79$7Y"O]`47b0`T`kS5ooBuRtLM'Z/VXuahVs/S(?b`DCo]G_;X6%R6H7O]Y(&j'E4#Yt[Z/<5ip+U_EVAK75c\Sfd~>endstream
|
| 50 |
+
endobj
|
| 51 |
+
xref
|
| 52 |
+
0 9
|
| 53 |
+
0000000000 65535 f
|
| 54 |
+
0000000073 00000 n
|
| 55 |
+
0000000114 00000 n
|
| 56 |
+
0000000221 00000 n
|
| 57 |
+
0000000330 00000 n
|
| 58 |
+
0000000533 00000 n
|
| 59 |
+
0000000601 00000 n
|
| 60 |
+
0000000897 00000 n
|
| 61 |
+
0000000956 00000 n
|
| 62 |
+
trailer
|
| 63 |
+
<<
|
| 64 |
+
/ID
|
| 65 |
+
[<9d81045db1dd6bf8c79b9710d4d13b3d><9d81045db1dd6bf8c79b9710d4d13b3d>]
|
| 66 |
+
% ReportLab generated PDF document -- digest (http://www.reportlab.com)
|
| 67 |
+
|
| 68 |
+
/Info 6 0 R
|
| 69 |
+
/Root 5 0 R
|
| 70 |
+
/Size 9
|
| 71 |
+
>>
|
| 72 |
+
startxref
|
| 73 |
+
2759
|
| 74 |
+
%%EOF
|
setup.py
ADDED
|
@@ -0,0 +1,102 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
"""
|
| 3 |
+
Lega.AI Setup Script
|
| 4 |
+
===================
|
| 5 |
+
Interactive setup script to help configure your Lega.AI environment.
|
| 6 |
+
"""
|
| 7 |
+
|
| 8 |
+
import os
|
| 9 |
+
import sys
|
| 10 |
+
from pathlib import Path
|
| 11 |
+
|
| 12 |
+
|
| 13 |
+
def main():
|
| 14 |
+
print("🚀 Welcome to Lega.AI Setup!")
|
| 15 |
+
print("=" * 50)
|
| 16 |
+
print()
|
| 17 |
+
|
| 18 |
+
# Check if .env exists
|
| 19 |
+
env_file = Path(".env")
|
| 20 |
+
if env_file.exists():
|
| 21 |
+
print("📋 Found existing .env file")
|
| 22 |
+
overwrite = input("Do you want to update it? (y/N): ").lower().strip()
|
| 23 |
+
if overwrite != "y":
|
| 24 |
+
print("Setup cancelled.")
|
| 25 |
+
return
|
| 26 |
+
else:
|
| 27 |
+
print("📋 Creating new .env file...")
|
| 28 |
+
|
| 29 |
+
# Copy from template
|
| 30 |
+
template_file = Path(".env.example")
|
| 31 |
+
if not template_file.exists():
|
| 32 |
+
print("❌ .env.example template not found!")
|
| 33 |
+
return
|
| 34 |
+
|
| 35 |
+
# Get API key from user
|
| 36 |
+
print()
|
| 37 |
+
print("🔑 Google AI API Key Setup")
|
| 38 |
+
print("-" * 30)
|
| 39 |
+
print("Get your API key from: https://makersuite.google.com/app/apikey")
|
| 40 |
+
print()
|
| 41 |
+
|
| 42 |
+
api_key = input("Enter your Google AI API key: ").strip()
|
| 43 |
+
|
| 44 |
+
if not api_key:
|
| 45 |
+
print("❌ No API key provided. You can add it later to the .env file.")
|
| 46 |
+
api_key = "your_google_ai_api_key_here"
|
| 47 |
+
else:
|
| 48 |
+
print("✅ API key received")
|
| 49 |
+
|
| 50 |
+
# Read template and replace API key
|
| 51 |
+
with open(template_file, "r") as f:
|
| 52 |
+
content = f.read()
|
| 53 |
+
|
| 54 |
+
# Replace the API key placeholder
|
| 55 |
+
content = content.replace(
|
| 56 |
+
"GOOGLE_API_KEY=your-google-api-key-here", f"GOOGLE_API_KEY={api_key}"
|
| 57 |
+
)
|
| 58 |
+
|
| 59 |
+
# Write to .env
|
| 60 |
+
with open(env_file, "w") as f:
|
| 61 |
+
f.write(content)
|
| 62 |
+
|
| 63 |
+
print()
|
| 64 |
+
print("✅ Environment file created successfully!")
|
| 65 |
+
print()
|
| 66 |
+
|
| 67 |
+
# Optional configuration
|
| 68 |
+
print("⚙️ Optional Configuration")
|
| 69 |
+
print("-" * 25)
|
| 70 |
+
|
| 71 |
+
# File size limit
|
| 72 |
+
max_size = input("Maximum file size in MB (default: 10): ").strip()
|
| 73 |
+
if max_size and max_size.isdigit():
|
| 74 |
+
content = content.replace("MAX_FILE_SIZE_MB=10", f"MAX_FILE_SIZE_MB={max_size}")
|
| 75 |
+
|
| 76 |
+
# Risk sensitivity
|
| 77 |
+
print()
|
| 78 |
+
print("Risk sensitivity (1-5, where 5 is most sensitive):")
|
| 79 |
+
risk_sens = input("Enter risk sensitivity (default: 3): ").strip()
|
| 80 |
+
if risk_sens and risk_sens.isdigit() and 1 <= int(risk_sens) <= 5:
|
| 81 |
+
content = content.replace("RISK_SENSITIVITY=3", f"RISK_SENSITIVITY={risk_sens}")
|
| 82 |
+
|
| 83 |
+
# Write updated content
|
| 84 |
+
with open(env_file, "w") as f:
|
| 85 |
+
f.write(content)
|
| 86 |
+
|
| 87 |
+
print()
|
| 88 |
+
print("🎉 Setup Complete!")
|
| 89 |
+
print("=" * 20)
|
| 90 |
+
print()
|
| 91 |
+
print("Next steps:")
|
| 92 |
+
print(
|
| 93 |
+
"1. Install dependencies: uv add streamlit 'langchain[google-genai]' langchain-google-genai langchain-chroma"
|
| 94 |
+
)
|
| 95 |
+
print("2. Run the application: streamlit run main.py")
|
| 96 |
+
print("3. Open your browser to: http://localhost:8501")
|
| 97 |
+
print()
|
| 98 |
+
print("Need help? Check the README.md file for detailed instructions.")
|
| 99 |
+
|
| 100 |
+
|
| 101 |
+
if __name__ == "__main__":
|
| 102 |
+
main()
|
src/__init__.py
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Create __init__.py files to make directories proper Python packages
|
| 2 |
+
|
| 3 |
+
# src/__init__.py
|
src/models/__init__.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
# src/models/__init__.py
|
src/models/document.py
ADDED
|
@@ -0,0 +1,88 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from pydantic import BaseModel, Field
|
| 2 |
+
from typing import List, Optional, Dict, Any
|
| 3 |
+
from datetime import datetime
|
| 4 |
+
from enum import Enum
|
| 5 |
+
|
| 6 |
+
|
| 7 |
+
class DocumentType(str, Enum):
|
| 8 |
+
RENTAL = "rental"
|
| 9 |
+
LOAN = "loan"
|
| 10 |
+
EMPLOYMENT = "employment"
|
| 11 |
+
SERVICE = "service"
|
| 12 |
+
NDA = "nda"
|
| 13 |
+
OTHER = "other"
|
| 14 |
+
|
| 15 |
+
|
| 16 |
+
class RiskLevel(str, Enum):
|
| 17 |
+
LOW = "low"
|
| 18 |
+
MEDIUM = "medium"
|
| 19 |
+
HIGH = "high"
|
| 20 |
+
CRITICAL = "critical"
|
| 21 |
+
|
| 22 |
+
|
| 23 |
+
class RiskCategory(str, Enum):
|
| 24 |
+
FINANCIAL = "financial"
|
| 25 |
+
COMMITMENT = "commitment"
|
| 26 |
+
RIGHTS = "rights"
|
| 27 |
+
STANDARD = "standard"
|
| 28 |
+
|
| 29 |
+
|
| 30 |
+
class ClausePosition(BaseModel):
|
| 31 |
+
start_index: int
|
| 32 |
+
end_index: int
|
| 33 |
+
page_number: Optional[int] = None
|
| 34 |
+
|
| 35 |
+
|
| 36 |
+
class RiskFactor(BaseModel):
|
| 37 |
+
id: str
|
| 38 |
+
clause_text: str
|
| 39 |
+
category: RiskCategory
|
| 40 |
+
severity: RiskLevel
|
| 41 |
+
explanation: str
|
| 42 |
+
suggestion: Optional[str] = None
|
| 43 |
+
position: Optional[ClausePosition] = None
|
| 44 |
+
|
| 45 |
+
|
| 46 |
+
class DocumentAnalysis(BaseModel):
|
| 47 |
+
document_id: str
|
| 48 |
+
document_type: DocumentType
|
| 49 |
+
risk_score: int = Field(ge=0, le=100)
|
| 50 |
+
summary: str
|
| 51 |
+
simplified_text: str
|
| 52 |
+
risk_factors: List[RiskFactor] = []
|
| 53 |
+
key_dates: List[Dict[str, Any]] = []
|
| 54 |
+
financial_terms: Dict[str, Any] = {}
|
| 55 |
+
created_at: datetime = Field(default_factory=datetime.now)
|
| 56 |
+
|
| 57 |
+
|
| 58 |
+
class Document(BaseModel):
|
| 59 |
+
id: str
|
| 60 |
+
filename: str
|
| 61 |
+
file_path: str
|
| 62 |
+
document_type: Optional[DocumentType] = None
|
| 63 |
+
file_size: int
|
| 64 |
+
upload_timestamp: datetime = Field(default_factory=datetime.now)
|
| 65 |
+
analysis: Optional[DocumentAnalysis] = None
|
| 66 |
+
processed: bool = False
|
| 67 |
+
|
| 68 |
+
|
| 69 |
+
class QASession(BaseModel):
|
| 70 |
+
id: str
|
| 71 |
+
document_id: str
|
| 72 |
+
question: str
|
| 73 |
+
answer: str
|
| 74 |
+
timestamp: datetime = Field(default_factory=datetime.now)
|
| 75 |
+
confidence_score: Optional[float] = None
|
| 76 |
+
|
| 77 |
+
|
| 78 |
+
class SimplificationRequest(BaseModel):
|
| 79 |
+
text: str
|
| 80 |
+
context: Optional[str] = None
|
| 81 |
+
document_type: Optional[DocumentType] = None
|
| 82 |
+
|
| 83 |
+
|
| 84 |
+
class SimplificationResponse(BaseModel):
|
| 85 |
+
original_text: str
|
| 86 |
+
simplified_text: str
|
| 87 |
+
key_points: List[str] = []
|
| 88 |
+
jargon_definitions: Dict[str, str] = {}
|
src/pages/__init__.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
# src/pages/__init__.py
|
src/pages/analysis.py
ADDED
|
@@ -0,0 +1,978 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import streamlit as st
|
| 2 |
+
import plotly.graph_objects as go
|
| 3 |
+
import plotly.express as px
|
| 4 |
+
from typing import Dict, Any
|
| 5 |
+
import time
|
| 6 |
+
|
| 7 |
+
from ..utils.helpers import get_risk_color, extract_financial_terms, extract_key_dates
|
| 8 |
+
|
| 9 |
+
|
| 10 |
+
def create_advanced_highlighting(
|
| 11 |
+
text: str, risk_factors: list, jargon_definitions: dict
|
| 12 |
+
) -> str:
|
| 13 |
+
"""Create advanced highlighting with hover tooltips for clauses and jargon."""
|
| 14 |
+
import re
|
| 15 |
+
|
| 16 |
+
highlighted_text = text
|
| 17 |
+
processed_positions = [] # Track processed positions to avoid overlaps
|
| 18 |
+
|
| 19 |
+
# First, collect all risk factors and their positions
|
| 20 |
+
risk_replacements = []
|
| 21 |
+
for i, factor in enumerate(risk_factors):
|
| 22 |
+
clause_text = factor.get("clause_text", "")
|
| 23 |
+
if not clause_text:
|
| 24 |
+
continue
|
| 25 |
+
|
| 26 |
+
# Clean and limit clause text
|
| 27 |
+
clause_text = clause_text.strip()[:150] # Increase limit slightly
|
| 28 |
+
|
| 29 |
+
# Find the position in text
|
| 30 |
+
start_pos = highlighted_text.find(clause_text)
|
| 31 |
+
if start_pos != -1:
|
| 32 |
+
end_pos = start_pos + len(clause_text)
|
| 33 |
+
|
| 34 |
+
severity = factor.get("severity", "low")
|
| 35 |
+
explanation = factor.get("explanation", "")[:200] # Limit explanation
|
| 36 |
+
suggestion = factor.get("suggestion", "")[:200] # Limit suggestion
|
| 37 |
+
|
| 38 |
+
# Clean the text content for HTML (escape quotes and special chars)
|
| 39 |
+
clean_explanation = explanation.replace('"', "'").replace('<', '<').replace('>', '>')
|
| 40 |
+
clean_suggestion = suggestion.replace('"', "'").replace('<', '<').replace('>', '>')
|
| 41 |
+
|
| 42 |
+
tooltip_content = f"⚠️ Risk: {severity.upper()}<br>📝 {clean_explanation}"
|
| 43 |
+
if clean_suggestion:
|
| 44 |
+
tooltip_content += f"<br>💡 Suggestion: {clean_suggestion}"
|
| 45 |
+
|
| 46 |
+
risk_replacements.append({
|
| 47 |
+
'start': start_pos,
|
| 48 |
+
'end': end_pos,
|
| 49 |
+
'original': clause_text,
|
| 50 |
+
'replacement': f'<span class="tooltip risk-{severity}" title="{tooltip_content}">{clause_text}</span>',
|
| 51 |
+
'type': 'risk'
|
| 52 |
+
})
|
| 53 |
+
|
| 54 |
+
# Sort by position (reverse order to maintain positions when replacing)
|
| 55 |
+
risk_replacements.sort(key=lambda x: x['start'], reverse=True)
|
| 56 |
+
|
| 57 |
+
# Apply risk replacements
|
| 58 |
+
for replacement in risk_replacements:
|
| 59 |
+
start, end = replacement['start'], replacement['end']
|
| 60 |
+
highlighted_text = (
|
| 61 |
+
highlighted_text[:start] +
|
| 62 |
+
replacement['replacement'] +
|
| 63 |
+
highlighted_text[end:]
|
| 64 |
+
)
|
| 65 |
+
processed_positions.extend(range(start, end))
|
| 66 |
+
|
| 67 |
+
# Then highlight jargon terms (but avoid areas already processed)
|
| 68 |
+
jargon_replacements = []
|
| 69 |
+
for term, definition in jargon_definitions.items():
|
| 70 |
+
if len(term) < 3: # Skip very short terms
|
| 71 |
+
continue
|
| 72 |
+
|
| 73 |
+
# Clean definition for HTML
|
| 74 |
+
clean_definition = definition.replace('"', "'").replace('<', '<').replace('>', '>')[:150]
|
| 75 |
+
|
| 76 |
+
# Find all occurrences of the term (case-insensitive)
|
| 77 |
+
pattern = re.compile(r'\b' + re.escape(term) + r'\b', re.IGNORECASE)
|
| 78 |
+
|
| 79 |
+
for match in pattern.finditer(highlighted_text):
|
| 80 |
+
start_pos, end_pos = match.span()
|
| 81 |
+
|
| 82 |
+
# Check if this position overlaps with existing highlights
|
| 83 |
+
if any(pos in processed_positions for pos in range(start_pos, end_pos)):
|
| 84 |
+
continue
|
| 85 |
+
|
| 86 |
+
# Check if we're inside an HTML tag
|
| 87 |
+
before_text = highlighted_text[:start_pos]
|
| 88 |
+
if before_text.count('<span') > before_text.count('</span>'):
|
| 89 |
+
continue # We're inside a span, skip
|
| 90 |
+
|
| 91 |
+
jargon_replacements.append({
|
| 92 |
+
'start': start_pos,
|
| 93 |
+
'end': end_pos,
|
| 94 |
+
'original': match.group(),
|
| 95 |
+
'replacement': f'<span class="tooltip jargon-term" title="📚 {term}: {clean_definition}">{match.group()}</span>',
|
| 96 |
+
'type': 'jargon'
|
| 97 |
+
})
|
| 98 |
+
|
| 99 |
+
# Sort jargon replacements by position (reverse order)
|
| 100 |
+
jargon_replacements.sort(key=lambda x: x['start'], reverse=True)
|
| 101 |
+
|
| 102 |
+
# Apply jargon replacements (limit to 5 to avoid clutter)
|
| 103 |
+
for replacement in jargon_replacements[:5]:
|
| 104 |
+
start, end = replacement['start'], replacement['end']
|
| 105 |
+
highlighted_text = (
|
| 106 |
+
highlighted_text[:start] +
|
| 107 |
+
replacement['replacement'] +
|
| 108 |
+
highlighted_text[end:]
|
| 109 |
+
)
|
| 110 |
+
|
| 111 |
+
return highlighted_text
|
| 112 |
+
|
| 113 |
+
|
| 114 |
+
def show_analysis_interface():
|
| 115 |
+
"""Display the document analysis interface."""
|
| 116 |
+
|
| 117 |
+
if not st.session_state.get("current_document"):
|
| 118 |
+
st.info("📊 **Document Analysis Page**")
|
| 119 |
+
st.markdown("### No document selected for analysis")
|
| 120 |
+
st.markdown("""
|
| 121 |
+
To view analysis results, you need to:
|
| 122 |
+
1. **Upload a new document** for instant analysis, or
|
| 123 |
+
2. **Check your library** for previously analyzed documents
|
| 124 |
+
""")
|
| 125 |
+
|
| 126 |
+
col1, col2, col3 = st.columns(3)
|
| 127 |
+
|
| 128 |
+
with col1:
|
| 129 |
+
if st.button("📄 Upload Document", type="primary", use_container_width=True):
|
| 130 |
+
st.session_state.page = "📄 Upload"
|
| 131 |
+
st.rerun()
|
| 132 |
+
|
| 133 |
+
with col2:
|
| 134 |
+
if st.button("📚 View Library", use_container_width=True):
|
| 135 |
+
st.session_state.page = "� Library"
|
| 136 |
+
st.rerun()
|
| 137 |
+
|
| 138 |
+
with col3:
|
| 139 |
+
if st.button("🏠 Go Home", use_container_width=True):
|
| 140 |
+
st.session_state.page = "🏠 Home"
|
| 141 |
+
st.rerun()
|
| 142 |
+
|
| 143 |
+
# Show recently analyzed documents if available
|
| 144 |
+
if st.session_state.get("documents_library"):
|
| 145 |
+
st.markdown("---")
|
| 146 |
+
st.markdown("### 📋 Recently Analyzed Documents")
|
| 147 |
+
st.markdown("Click on any document below to view its analysis:")
|
| 148 |
+
|
| 149 |
+
for doc in st.session_state.documents_library[-3:]: # Show last 3
|
| 150 |
+
col1, col2 = st.columns([3, 1])
|
| 151 |
+
with col1:
|
| 152 |
+
st.markdown(f"**{doc.get('filename', 'Unknown')}** - {doc.get('document_type', 'Unknown').title()}")
|
| 153 |
+
with col2:
|
| 154 |
+
if st.button(f"View Analysis", key=f"view_{doc.get('id')}", use_container_width=True):
|
| 155 |
+
# Load this document for analysis
|
| 156 |
+
st.session_state.current_document = doc
|
| 157 |
+
st.rerun()
|
| 158 |
+
|
| 159 |
+
return
|
| 160 |
+
|
| 161 |
+
doc = st.session_state.current_document
|
| 162 |
+
|
| 163 |
+
# Header
|
| 164 |
+
st.header("📊 Document Analysis")
|
| 165 |
+
st.markdown(
|
| 166 |
+
f"**File:** {doc.get('filename', 'Unknown')} | **Type:** {doc.get('document_type', 'Unknown').title()}"
|
| 167 |
+
)
|
| 168 |
+
|
| 169 |
+
# If it's a sample document, process it first
|
| 170 |
+
if doc.get("is_sample") and not doc.get("processed"):
|
| 171 |
+
process_sample_document(doc)
|
| 172 |
+
return
|
| 173 |
+
|
| 174 |
+
# Risk Score Dashboard
|
| 175 |
+
show_risk_dashboard(doc)
|
| 176 |
+
|
| 177 |
+
# Document Content Analysis
|
| 178 |
+
col1, col2 = st.columns([1, 1])
|
| 179 |
+
|
| 180 |
+
with col1:
|
| 181 |
+
show_original_document(doc)
|
| 182 |
+
|
| 183 |
+
with col2:
|
| 184 |
+
show_simplified_version(doc)
|
| 185 |
+
|
| 186 |
+
# Additional Analysis Sections
|
| 187 |
+
st.markdown("---")
|
| 188 |
+
|
| 189 |
+
# Tabs for different analysis views
|
| 190 |
+
tab1, tab2, tab3, tab4, tab5 = st.tabs(
|
| 191 |
+
[
|
| 192 |
+
"📋 Summary",
|
| 193 |
+
"⚠️ Risk Factors",
|
| 194 |
+
"📅 Key Dates",
|
| 195 |
+
"💰 Financial Terms",
|
| 196 |
+
"📊 Market Comparison",
|
| 197 |
+
]
|
| 198 |
+
)
|
| 199 |
+
|
| 200 |
+
with tab1:
|
| 201 |
+
show_document_summary(doc)
|
| 202 |
+
|
| 203 |
+
with tab2:
|
| 204 |
+
show_risk_factors(doc)
|
| 205 |
+
|
| 206 |
+
with tab3:
|
| 207 |
+
show_key_dates(doc)
|
| 208 |
+
|
| 209 |
+
with tab4:
|
| 210 |
+
show_financial_terms(doc)
|
| 211 |
+
|
| 212 |
+
with tab5:
|
| 213 |
+
show_market_comparison(doc)
|
| 214 |
+
|
| 215 |
+
# Action buttons
|
| 216 |
+
st.markdown("---")
|
| 217 |
+
col1, col2, col3 = st.columns(3)
|
| 218 |
+
|
| 219 |
+
with col1:
|
| 220 |
+
if st.button("💬 Ask Questions", use_container_width=True):
|
| 221 |
+
st.session_state.page = "💬 Q&A"
|
| 222 |
+
st.rerun()
|
| 223 |
+
|
| 224 |
+
with col2:
|
| 225 |
+
if st.button("📥 Export Report", use_container_width=True):
|
| 226 |
+
export_report(doc)
|
| 227 |
+
|
| 228 |
+
with col3:
|
| 229 |
+
if st.button("📄 Analyze New Document", use_container_width=True):
|
| 230 |
+
st.session_state.current_document = None
|
| 231 |
+
st.session_state.page = "📄 Upload"
|
| 232 |
+
st.rerun()
|
| 233 |
+
|
| 234 |
+
|
| 235 |
+
def process_sample_document(doc):
|
| 236 |
+
"""Process a sample document with simulated AI analysis."""
|
| 237 |
+
st.info("🤖 Processing sample document with AI analysis...")
|
| 238 |
+
|
| 239 |
+
progress_bar = st.progress(0)
|
| 240 |
+
status_text = st.empty()
|
| 241 |
+
|
| 242 |
+
# Simulate processing steps
|
| 243 |
+
steps = [
|
| 244 |
+
("📄 Extracting text...", 20),
|
| 245 |
+
("🔍 Detecting document type...", 40),
|
| 246 |
+
("⚠️ Analyzing risks...", 60),
|
| 247 |
+
("💬 Simplifying language...", 80),
|
| 248 |
+
("📋 Generating summary...", 100),
|
| 249 |
+
]
|
| 250 |
+
|
| 251 |
+
for step_text, progress in steps:
|
| 252 |
+
status_text.text(step_text)
|
| 253 |
+
progress_bar.progress(progress)
|
| 254 |
+
time.sleep(0.5)
|
| 255 |
+
|
| 256 |
+
# Generate mock analysis results
|
| 257 |
+
doc_type = doc.get("document_type", "other")
|
| 258 |
+
|
| 259 |
+
# Mock risk factors based on document type
|
| 260 |
+
risk_factors = generate_mock_risk_factors(doc_type)
|
| 261 |
+
simplified_text = generate_mock_simplified_text(
|
| 262 |
+
doc.get("original_text", ""), doc_type
|
| 263 |
+
)
|
| 264 |
+
summary = generate_mock_summary(doc_type)
|
| 265 |
+
|
| 266 |
+
# Update document with analysis
|
| 267 |
+
doc.update(
|
| 268 |
+
{
|
| 269 |
+
"risk_data": {
|
| 270 |
+
"risk_factors": risk_factors,
|
| 271 |
+
"overall_assessment": f"This {doc_type} document contains several high-risk clauses.",
|
| 272 |
+
},
|
| 273 |
+
"simplified_text": simplified_text,
|
| 274 |
+
"summary": summary,
|
| 275 |
+
"key_points": [
|
| 276 |
+
f"Key point 1 for {doc_type}",
|
| 277 |
+
f"Key point 2 for {doc_type}",
|
| 278 |
+
f"Key point 3 for {doc_type}",
|
| 279 |
+
],
|
| 280 |
+
"jargon_definitions": {
|
| 281 |
+
"Liability": "Legal responsibility for damages",
|
| 282 |
+
"Arbitration": "Dispute resolution outside of court",
|
| 283 |
+
},
|
| 284 |
+
"processed": True,
|
| 285 |
+
"analysis_timestamp": time.time(),
|
| 286 |
+
}
|
| 287 |
+
)
|
| 288 |
+
|
| 289 |
+
st.session_state.current_document = doc
|
| 290 |
+
|
| 291 |
+
progress_bar.empty()
|
| 292 |
+
status_text.empty()
|
| 293 |
+
st.success("✅ Analysis complete!")
|
| 294 |
+
time.sleep(1)
|
| 295 |
+
st.rerun()
|
| 296 |
+
|
| 297 |
+
|
| 298 |
+
def show_risk_dashboard(doc):
|
| 299 |
+
"""Display the risk assessment dashboard."""
|
| 300 |
+
risk_data = doc.get("risk_data", {})
|
| 301 |
+
risk_factors = risk_data.get("risk_factors", [])
|
| 302 |
+
|
| 303 |
+
# Calculate risk score
|
| 304 |
+
risk_score = min(len(risk_factors) * 15, 100)
|
| 305 |
+
|
| 306 |
+
# Risk score gauge
|
| 307 |
+
col1, col2, col3 = st.columns([2, 1, 1])
|
| 308 |
+
|
| 309 |
+
with col1:
|
| 310 |
+
# Create gauge chart
|
| 311 |
+
fig = go.Figure(
|
| 312 |
+
go.Indicator(
|
| 313 |
+
mode="gauge+number+delta",
|
| 314 |
+
value=risk_score,
|
| 315 |
+
domain={"x": [0, 1], "y": [0, 1]},
|
| 316 |
+
title={"text": "Risk Score"},
|
| 317 |
+
delta={"reference": 50},
|
| 318 |
+
gauge={
|
| 319 |
+
"axis": {"range": [None, 100]},
|
| 320 |
+
"bar": {"color": get_risk_color(risk_score)},
|
| 321 |
+
"steps": [
|
| 322 |
+
{"range": [0, 25], "color": "lightgray"},
|
| 323 |
+
{"range": [25, 50], "color": "gray"},
|
| 324 |
+
{"range": [50, 75], "color": "lightcoral"},
|
| 325 |
+
{"range": [75, 100], "color": "red"},
|
| 326 |
+
],
|
| 327 |
+
"threshold": {
|
| 328 |
+
"line": {"color": "red", "width": 4},
|
| 329 |
+
"thickness": 0.75,
|
| 330 |
+
"value": 90,
|
| 331 |
+
},
|
| 332 |
+
},
|
| 333 |
+
)
|
| 334 |
+
)
|
| 335 |
+
|
| 336 |
+
fig.update_layout(height=300)
|
| 337 |
+
st.plotly_chart(fig, use_container_width=True)
|
| 338 |
+
|
| 339 |
+
with col2:
|
| 340 |
+
st.metric(
|
| 341 |
+
label="Risk Factors Found",
|
| 342 |
+
value=len(risk_factors),
|
| 343 |
+
delta=f"vs average: +{max(0, len(risk_factors) - 3)}",
|
| 344 |
+
)
|
| 345 |
+
|
| 346 |
+
with col3:
|
| 347 |
+
risk_level = (
|
| 348 |
+
"Low"
|
| 349 |
+
if risk_score < 25
|
| 350 |
+
else (
|
| 351 |
+
"Medium"
|
| 352 |
+
if risk_score < 50
|
| 353 |
+
else "High" if risk_score < 75 else "Critical"
|
| 354 |
+
)
|
| 355 |
+
)
|
| 356 |
+
st.metric(
|
| 357 |
+
label="Risk Level",
|
| 358 |
+
value=risk_level,
|
| 359 |
+
delta_color="inverse" if risk_score > 50 else "normal",
|
| 360 |
+
)
|
| 361 |
+
|
| 362 |
+
# Risk assessment summary
|
| 363 |
+
if risk_data.get("overall_assessment"):
|
| 364 |
+
st.info(f"**Assessment:** {risk_data['overall_assessment']}")
|
| 365 |
+
|
| 366 |
+
|
| 367 |
+
def show_original_document(doc):
|
| 368 |
+
"""Display the original document with advanced highlighting and hover definitions."""
|
| 369 |
+
st.subheader("📄 Original Document")
|
| 370 |
+
|
| 371 |
+
original_text = doc.get("original_text", "")
|
| 372 |
+
risk_factors = doc.get("risk_data", {}).get("risk_factors", [])
|
| 373 |
+
jargon_definitions = doc.get("jargon_definitions", {})
|
| 374 |
+
|
| 375 |
+
# Advanced highlighting with hover tooltips
|
| 376 |
+
highlighted_text = create_advanced_highlighting(
|
| 377 |
+
original_text, risk_factors, jargon_definitions
|
| 378 |
+
)
|
| 379 |
+
|
| 380 |
+
# Custom CSS for hover tooltips with responsive theming
|
| 381 |
+
st.markdown(
|
| 382 |
+
"""
|
| 383 |
+
<style>
|
| 384 |
+
.tooltip {
|
| 385 |
+
position: relative;
|
| 386 |
+
display: inline;
|
| 387 |
+
cursor: help;
|
| 388 |
+
border-radius: 4px;
|
| 389 |
+
padding: 2px 4px;
|
| 390 |
+
margin: 0 1px;
|
| 391 |
+
}
|
| 392 |
+
|
| 393 |
+
/* Risk highlighting with theme-aware backgrounds */
|
| 394 |
+
.risk-critical {
|
| 395 |
+
background-color: rgba(255, 68, 68, 0.2);
|
| 396 |
+
border-left: 4px solid #ff4444;
|
| 397 |
+
padding: 4px 8px;
|
| 398 |
+
border-radius: 4px;
|
| 399 |
+
cursor: help;
|
| 400 |
+
}
|
| 401 |
+
.risk-high {
|
| 402 |
+
background-color: rgba(255, 136, 0, 0.2);
|
| 403 |
+
border-left: 4px solid #ff8800;
|
| 404 |
+
padding: 4px 8px;
|
| 405 |
+
border-radius: 4px;
|
| 406 |
+
cursor: help;
|
| 407 |
+
}
|
| 408 |
+
.risk-medium {
|
| 409 |
+
background-color: rgba(255, 204, 0, 0.2);
|
| 410 |
+
border-left: 4px solid #ffcc00;
|
| 411 |
+
padding: 4px 8px;
|
| 412 |
+
border-radius: 4px;
|
| 413 |
+
cursor: help;
|
| 414 |
+
}
|
| 415 |
+
.risk-low {
|
| 416 |
+
background-color: rgba(68, 170, 68, 0.2);
|
| 417 |
+
border-left: 4px solid #44aa44;
|
| 418 |
+
padding: 4px 8px;
|
| 419 |
+
border-radius: 4px;
|
| 420 |
+
cursor: help;
|
| 421 |
+
}
|
| 422 |
+
|
| 423 |
+
/* Jargon term highlighting */
|
| 424 |
+
.jargon-term {
|
| 425 |
+
background-color: rgba(46, 134, 171, 0.2);
|
| 426 |
+
text-decoration: underline dotted #2e86ab;
|
| 427 |
+
padding: 2px 4px;
|
| 428 |
+
border-radius: 3px;
|
| 429 |
+
cursor: help;
|
| 430 |
+
}
|
| 431 |
+
|
| 432 |
+
/* Enhanced tooltips */
|
| 433 |
+
.tooltip:hover {
|
| 434 |
+
opacity: 0.8;
|
| 435 |
+
}
|
| 436 |
+
</style>
|
| 437 |
+
""",
|
| 438 |
+
unsafe_allow_html=True,
|
| 439 |
+
)
|
| 440 |
+
|
| 441 |
+
st.markdown(highlighted_text, unsafe_allow_html=True)
|
| 442 |
+
|
| 443 |
+
# Scroll area for long documents
|
| 444 |
+
if len(original_text) > 1000:
|
| 445 |
+
with st.expander("View Full Document"):
|
| 446 |
+
st.text_area("Full Text", original_text, height=400, disabled=True)
|
| 447 |
+
|
| 448 |
+
|
| 449 |
+
def show_simplified_version(doc):
|
| 450 |
+
"""Display the simplified version of the document."""
|
| 451 |
+
st.subheader("💬 Simplified Version")
|
| 452 |
+
|
| 453 |
+
simplified_text = doc.get("simplified_text", "Processing...")
|
| 454 |
+
st.markdown(simplified_text)
|
| 455 |
+
|
| 456 |
+
# Key points
|
| 457 |
+
key_points = doc.get("key_points", [])
|
| 458 |
+
if key_points:
|
| 459 |
+
st.markdown("**Key Points:**")
|
| 460 |
+
for point in key_points:
|
| 461 |
+
st.markdown(f"• {point}")
|
| 462 |
+
|
| 463 |
+
# Jargon definitions
|
| 464 |
+
jargon_definitions = doc.get("jargon_definitions", {})
|
| 465 |
+
if jargon_definitions:
|
| 466 |
+
st.markdown("**Legal Terms Explained:**")
|
| 467 |
+
for term, definition in jargon_definitions.items():
|
| 468 |
+
st.markdown(f"**{term}:** {definition}")
|
| 469 |
+
|
| 470 |
+
|
| 471 |
+
def show_document_summary(doc):
|
| 472 |
+
"""Display document summary."""
|
| 473 |
+
summary = doc.get("summary", "Generating summary...")
|
| 474 |
+
st.markdown(summary)
|
| 475 |
+
|
| 476 |
+
# Document metadata
|
| 477 |
+
st.markdown("### 📊 Document Information")
|
| 478 |
+
col1, col2 = st.columns(2)
|
| 479 |
+
|
| 480 |
+
with col1:
|
| 481 |
+
st.markdown(f"**Type:** {doc.get('document_type', 'Unknown').title()}")
|
| 482 |
+
st.markdown(f"**Filename:** {doc.get('filename', 'Unknown')}")
|
| 483 |
+
|
| 484 |
+
with col2:
|
| 485 |
+
if doc.get("file_size"):
|
| 486 |
+
from ..utils.helpers import format_file_size
|
| 487 |
+
|
| 488 |
+
st.markdown(f"**Size:** {format_file_size(doc['file_size'])}")
|
| 489 |
+
|
| 490 |
+
if doc.get("analysis_timestamp"):
|
| 491 |
+
import datetime
|
| 492 |
+
|
| 493 |
+
analysis_time = datetime.datetime.fromtimestamp(doc["analysis_timestamp"])
|
| 494 |
+
st.markdown(f"**Analyzed:** {analysis_time.strftime('%Y-%m-%d %H:%M')}")
|
| 495 |
+
|
| 496 |
+
|
| 497 |
+
def show_risk_factors(doc):
|
| 498 |
+
"""Display detailed risk factors."""
|
| 499 |
+
risk_factors = doc.get("risk_data", {}).get("risk_factors", [])
|
| 500 |
+
|
| 501 |
+
if not risk_factors:
|
| 502 |
+
st.info("No significant risk factors identified in this document.")
|
| 503 |
+
return
|
| 504 |
+
|
| 505 |
+
for i, factor in enumerate(risk_factors):
|
| 506 |
+
severity = factor.get("severity", "low")
|
| 507 |
+
|
| 508 |
+
# Color coding based on severity
|
| 509 |
+
if severity == "critical":
|
| 510 |
+
st.error(f"🚨 **Critical Risk #{i+1}**")
|
| 511 |
+
elif severity == "high":
|
| 512 |
+
st.warning(f"⚠️ **High Risk #{i+1}**")
|
| 513 |
+
elif severity == "medium":
|
| 514 |
+
st.info(f"🟡 **Medium Risk #{i+1}**")
|
| 515 |
+
else:
|
| 516 |
+
st.success(f"🟢 **Low Risk #{i+1}**")
|
| 517 |
+
|
| 518 |
+
st.markdown(f"**Clause:** {factor.get('clause_text', 'N/A')}")
|
| 519 |
+
st.markdown(f"**Category:** {factor.get('category', 'N/A').title()}")
|
| 520 |
+
st.markdown(f"**Explanation:** {factor.get('explanation', 'N/A')}")
|
| 521 |
+
|
| 522 |
+
if factor.get("suggestion"):
|
| 523 |
+
st.markdown(f"**Suggestion:** {factor['suggestion']}")
|
| 524 |
+
|
| 525 |
+
st.markdown("---")
|
| 526 |
+
|
| 527 |
+
|
| 528 |
+
def show_key_dates(doc):
|
| 529 |
+
"""Display extracted key dates with timeline visualization."""
|
| 530 |
+
original_text = doc.get("original_text", "")
|
| 531 |
+
dates = extract_key_dates(original_text)
|
| 532 |
+
|
| 533 |
+
if not dates:
|
| 534 |
+
st.info("No specific dates found in this document.")
|
| 535 |
+
return
|
| 536 |
+
|
| 537 |
+
# Enhanced date analysis with timeline
|
| 538 |
+
col1, col2 = st.columns([1, 1])
|
| 539 |
+
|
| 540 |
+
with col1:
|
| 541 |
+
st.markdown("**Important Dates Found:**")
|
| 542 |
+
for date_info in dates:
|
| 543 |
+
st.markdown(f"• **{date_info['date']}** - Context: {date_info['context']}")
|
| 544 |
+
|
| 545 |
+
with col2:
|
| 546 |
+
st.markdown("**Timeline & Obligations:**")
|
| 547 |
+
|
| 548 |
+
# Mock timeline data based on document type
|
| 549 |
+
doc_type = doc.get("document_type", "other")
|
| 550 |
+
|
| 551 |
+
if doc_type == "rental":
|
| 552 |
+
timeline_items = [
|
| 553 |
+
{
|
| 554 |
+
"date": "1st of every month",
|
| 555 |
+
"event": "Rent Payment Due",
|
| 556 |
+
"type": "recurring",
|
| 557 |
+
},
|
| 558 |
+
{
|
| 559 |
+
"date": "30 days notice",
|
| 560 |
+
"event": "Termination Notice Required",
|
| 561 |
+
"type": "condition",
|
| 562 |
+
},
|
| 563 |
+
{
|
| 564 |
+
"date": "End of lease",
|
| 565 |
+
"event": "Security Deposit Return",
|
| 566 |
+
"type": "deadline",
|
| 567 |
+
},
|
| 568 |
+
]
|
| 569 |
+
elif doc_type == "loan":
|
| 570 |
+
timeline_items = [
|
| 571 |
+
{
|
| 572 |
+
"date": "15th of every month",
|
| 573 |
+
"event": "EMI Payment Due",
|
| 574 |
+
"type": "recurring",
|
| 575 |
+
},
|
| 576 |
+
{
|
| 577 |
+
"date": "7 days after due",
|
| 578 |
+
"event": "Late Fee Applicable",
|
| 579 |
+
"type": "penalty",
|
| 580 |
+
},
|
| 581 |
+
{"date": "24 months", "event": "Loan Maturity", "type": "deadline"},
|
| 582 |
+
]
|
| 583 |
+
elif doc_type == "employment":
|
| 584 |
+
timeline_items = [
|
| 585 |
+
{
|
| 586 |
+
"date": "Last day of month",
|
| 587 |
+
"event": "Salary Payment",
|
| 588 |
+
"type": "recurring",
|
| 589 |
+
},
|
| 590 |
+
{
|
| 591 |
+
"date": "90 days",
|
| 592 |
+
"event": "Resignation Notice Period",
|
| 593 |
+
"type": "condition",
|
| 594 |
+
},
|
| 595 |
+
{
|
| 596 |
+
"date": "2 years post-termination",
|
| 597 |
+
"event": "Non-compete Expires",
|
| 598 |
+
"type": "deadline",
|
| 599 |
+
},
|
| 600 |
+
]
|
| 601 |
+
else:
|
| 602 |
+
timeline_items = []
|
| 603 |
+
|
| 604 |
+
for item in timeline_items:
|
| 605 |
+
if item["type"] == "recurring":
|
| 606 |
+
st.markdown(f"🔄 **{item['date']}**: {item['event']}")
|
| 607 |
+
elif item["type"] == "penalty":
|
| 608 |
+
st.markdown(f"⚠️ **{item['date']}**: {item['event']}")
|
| 609 |
+
elif item["type"] == "deadline":
|
| 610 |
+
st.markdown(f"📅 **{item['date']}**: {item['event']}")
|
| 611 |
+
else:
|
| 612 |
+
st.markdown(f"📌 **{item['date']}**: {item['event']}")
|
| 613 |
+
|
| 614 |
+
# Visual timeline chart
|
| 615 |
+
if timeline_items:
|
| 616 |
+
st.markdown("---")
|
| 617 |
+
st.markdown("**📊 Visual Timeline**")
|
| 618 |
+
|
| 619 |
+
# Create timeline visualization
|
| 620 |
+
timeline_df = []
|
| 621 |
+
for i, item in enumerate(timeline_items):
|
| 622 |
+
timeline_df.append(
|
| 623 |
+
{
|
| 624 |
+
"Event": item["event"],
|
| 625 |
+
"Timeline": item["date"],
|
| 626 |
+
"Type": item["type"].title(),
|
| 627 |
+
"Order": i,
|
| 628 |
+
}
|
| 629 |
+
)
|
| 630 |
+
|
| 631 |
+
if timeline_df:
|
| 632 |
+
import pandas as pd
|
| 633 |
+
|
| 634 |
+
df = pd.DataFrame(timeline_df)
|
| 635 |
+
|
| 636 |
+
# Color code by type
|
| 637 |
+
color_map = {
|
| 638 |
+
"Recurring": "#2e86ab",
|
| 639 |
+
"Penalty": "#ff4444",
|
| 640 |
+
"Deadline": "#ff8800",
|
| 641 |
+
"Condition": "#44aa44",
|
| 642 |
+
}
|
| 643 |
+
|
| 644 |
+
fig = px.timeline(
|
| 645 |
+
df,
|
| 646 |
+
x_start=[0] * len(df),
|
| 647 |
+
x_end=[1] * len(df),
|
| 648 |
+
y="Event",
|
| 649 |
+
color="Type",
|
| 650 |
+
color_discrete_map=color_map,
|
| 651 |
+
title="Contract Timeline & Obligations",
|
| 652 |
+
)
|
| 653 |
+
st.plotly_chart(fig, use_container_width=True)
|
| 654 |
+
|
| 655 |
+
|
| 656 |
+
def show_financial_terms(doc):
|
| 657 |
+
"""Display extracted financial terms."""
|
| 658 |
+
original_text = doc.get("original_text", "")
|
| 659 |
+
financial_terms = extract_financial_terms(original_text)
|
| 660 |
+
|
| 661 |
+
if not financial_terms:
|
| 662 |
+
st.info("No financial terms identified in this document.")
|
| 663 |
+
return
|
| 664 |
+
|
| 665 |
+
col1, col2 = st.columns(2)
|
| 666 |
+
|
| 667 |
+
with col1:
|
| 668 |
+
if "amounts" in financial_terms:
|
| 669 |
+
st.markdown("**Monetary Amounts:**")
|
| 670 |
+
for amount in financial_terms["amounts"]:
|
| 671 |
+
st.markdown(f"• {amount}")
|
| 672 |
+
|
| 673 |
+
with col2:
|
| 674 |
+
if "percentages" in financial_terms:
|
| 675 |
+
st.markdown("**Percentages/Rates:**")
|
| 676 |
+
for percentage in financial_terms["percentages"]:
|
| 677 |
+
st.markdown(f"• {percentage}")
|
| 678 |
+
|
| 679 |
+
if "interest_rates" in financial_terms:
|
| 680 |
+
st.markdown("**Interest Rates:**")
|
| 681 |
+
for rate in financial_terms["interest_rates"]:
|
| 682 |
+
st.markdown(f"• {rate}")
|
| 683 |
+
|
| 684 |
+
|
| 685 |
+
def export_report(doc):
|
| 686 |
+
"""Export analysis report."""
|
| 687 |
+
# Create a simple text report
|
| 688 |
+
report = f"""
|
| 689 |
+
LEGA.AI DOCUMENT ANALYSIS REPORT
|
| 690 |
+
{'='*50}
|
| 691 |
+
|
| 692 |
+
Document: {doc.get('filename', 'Unknown')}
|
| 693 |
+
Type: {doc.get('document_type', 'Unknown').title()}
|
| 694 |
+
Analysis Date: {time.strftime('%Y-%m-%d %H:%M:%S')}
|
| 695 |
+
|
| 696 |
+
SUMMARY:
|
| 697 |
+
{doc.get('summary', 'No summary available')}
|
| 698 |
+
|
| 699 |
+
RISK ASSESSMENT:
|
| 700 |
+
{doc.get('risk_data', {}).get('overall_assessment', 'No risk assessment available')}
|
| 701 |
+
|
| 702 |
+
RISK FACTORS:
|
| 703 |
+
"""
|
| 704 |
+
|
| 705 |
+
risk_factors = doc.get("risk_data", {}).get("risk_factors", [])
|
| 706 |
+
for i, factor in enumerate(risk_factors):
|
| 707 |
+
report += f"""
|
| 708 |
+
{i+1}. {factor.get('severity', 'Unknown').upper()} RISK
|
| 709 |
+
Category: {factor.get('category', 'N/A').title()}
|
| 710 |
+
Clause: {factor.get('clause_text', 'N/A')}
|
| 711 |
+
Explanation: {factor.get('explanation', 'N/A')}
|
| 712 |
+
"""
|
| 713 |
+
|
| 714 |
+
report += f"""
|
| 715 |
+
|
| 716 |
+
SIMPLIFIED VERSION:
|
| 717 |
+
{doc.get('simplified_text', 'No simplified version available')}
|
| 718 |
+
|
| 719 |
+
KEY POINTS:
|
| 720 |
+
"""
|
| 721 |
+
|
| 722 |
+
for point in doc.get("key_points", []):
|
| 723 |
+
report += f"• {point}\n"
|
| 724 |
+
|
| 725 |
+
report += "\n\nGenerated by Lega.AI - Making legal documents accessible"
|
| 726 |
+
|
| 727 |
+
# Clean filename - remove .pdf extension if present
|
| 728 |
+
filename = doc.get('filename', 'document')
|
| 729 |
+
if filename.endswith('.pdf'):
|
| 730 |
+
filename = filename[:-4]
|
| 731 |
+
if filename.endswith('.docx'):
|
| 732 |
+
filename = filename[:-5]
|
| 733 |
+
if filename.endswith('.txt'):
|
| 734 |
+
filename = filename[:-4]
|
| 735 |
+
|
| 736 |
+
# Offer download
|
| 737 |
+
st.download_button(
|
| 738 |
+
label="📥 Download Report",
|
| 739 |
+
data=report,
|
| 740 |
+
file_name=f"lega_ai_report_{filename}.pdf",
|
| 741 |
+
mime="application/pdf",
|
| 742 |
+
)
|
| 743 |
+
|
| 744 |
+
st.success("✅ Report prepared for download!")
|
| 745 |
+
|
| 746 |
+
|
| 747 |
+
def generate_mock_risk_factors(doc_type):
|
| 748 |
+
"""Generate mock risk factors for sample documents."""
|
| 749 |
+
if doc_type == "rental":
|
| 750 |
+
return [
|
| 751 |
+
{
|
| 752 |
+
"clause_text": "Late payments will incur a penalty of Rs. 1,000 per day",
|
| 753 |
+
"category": "financial",
|
| 754 |
+
"severity": "high",
|
| 755 |
+
"explanation": "Daily penalties can quickly escalate to substantial amounts",
|
| 756 |
+
"suggestion": "Negotiate a more reasonable penalty structure",
|
| 757 |
+
},
|
| 758 |
+
{
|
| 759 |
+
"clause_text": "Tenant is responsible for all repairs and maintenance",
|
| 760 |
+
"category": "financial",
|
| 761 |
+
"severity": "medium",
|
| 762 |
+
"explanation": "This places unusual burden on tenant for structural repairs",
|
| 763 |
+
"suggestion": "Clarify that structural repairs remain landlord responsibility",
|
| 764 |
+
},
|
| 765 |
+
]
|
| 766 |
+
elif doc_type == "loan":
|
| 767 |
+
return [
|
| 768 |
+
{
|
| 769 |
+
"clause_text": "24% per annum (APR 28.5% including processing fees)",
|
| 770 |
+
"category": "financial",
|
| 771 |
+
"severity": "critical",
|
| 772 |
+
"explanation": "Interest rate is significantly above market rates",
|
| 773 |
+
"suggestion": "Shop around for better rates from other lenders",
|
| 774 |
+
},
|
| 775 |
+
{
|
| 776 |
+
"clause_text": "Lender may seize collateral immediately upon default",
|
| 777 |
+
"category": "rights",
|
| 778 |
+
"severity": "high",
|
| 779 |
+
"explanation": "No grace period or notice before asset seizure",
|
| 780 |
+
"suggestion": "Negotiate for notice period and cure opportunity",
|
| 781 |
+
},
|
| 782 |
+
]
|
| 783 |
+
elif doc_type == "employment":
|
| 784 |
+
return [
|
| 785 |
+
{
|
| 786 |
+
"clause_text": "Employee shall not work for any competing company for 2 years",
|
| 787 |
+
"category": "commitment",
|
| 788 |
+
"severity": "high",
|
| 789 |
+
"explanation": "Non-compete period is unusually long and broad",
|
| 790 |
+
"suggestion": "Negotiate shorter period and narrower scope",
|
| 791 |
+
},
|
| 792 |
+
{
|
| 793 |
+
"clause_text": "Company may terminate employment at any time without cause",
|
| 794 |
+
"category": "rights",
|
| 795 |
+
"severity": "medium",
|
| 796 |
+
"explanation": "No job security or notice period for termination",
|
| 797 |
+
"suggestion": "Request notice period and severance terms",
|
| 798 |
+
},
|
| 799 |
+
]
|
| 800 |
+
else:
|
| 801 |
+
return []
|
| 802 |
+
|
| 803 |
+
|
| 804 |
+
def generate_mock_simplified_text(original_text, doc_type):
|
| 805 |
+
"""Generate mock simplified text."""
|
| 806 |
+
if doc_type == "rental":
|
| 807 |
+
return """
|
| 808 |
+
**What this rental agreement means in simple terms:**
|
| 809 |
+
|
| 810 |
+
You're renting a property in Mumbai for ₹25,000 per month. Here are the key things to know:
|
| 811 |
+
|
| 812 |
+
• **Payment:** You must pay rent by the 1st of each month. If you're late, you'll be charged ₹1,000 for each day you're late.
|
| 813 |
+
|
| 814 |
+
• **Security deposit:** You need to pay ₹75,000 upfront as security. This money is hard to get back.
|
| 815 |
+
|
| 816 |
+
• **Repairs:** You're responsible for fixing everything that breaks, even major structural problems.
|
| 817 |
+
|
| 818 |
+
• **Leaving early:** If you want to leave before the lease ends, you lose your security deposit.
|
| 819 |
+
|
| 820 |
+
**Watch out for:** The daily late fees and your responsibility for all repairs are unusual and costly.
|
| 821 |
+
"""
|
| 822 |
+
elif doc_type == "loan":
|
| 823 |
+
return """
|
| 824 |
+
**What this loan agreement means in simple terms:**
|
| 825 |
+
|
| 826 |
+
You're borrowing ₹2,00,000 but will pay back ₹3,00,000 total - that's ₹1,00,000 extra in interest and fees.
|
| 827 |
+
|
| 828 |
+
• **Monthly payment:** ₹12,500 every month for 2 years
|
| 829 |
+
|
| 830 |
+
• **Interest rate:** 24% per year (very high - normal rates are 10-15%)
|
| 831 |
+
|
| 832 |
+
• **Late fees:** ₹500 per day if you're late
|
| 833 |
+
|
| 834 |
+
• **Your gold jewelry:** The lender can take it immediately if you miss payments
|
| 835 |
+
|
| 836 |
+
• **Total cost:** You'll pay 50% more than you borrowed
|
| 837 |
+
|
| 838 |
+
**Warning:** This is an expensive loan. The interest rate is much higher than banks typically charge.
|
| 839 |
+
"""
|
| 840 |
+
elif doc_type == "employment":
|
| 841 |
+
return """
|
| 842 |
+
**What this employment contract means in simple terms:**
|
| 843 |
+
|
| 844 |
+
You're being hired as a Software Developer for ₹8,00,000 per year. Here's what you need to know:
|
| 845 |
+
|
| 846 |
+
• **Working hours:** 45 hours per week, including weekends when needed
|
| 847 |
+
|
| 848 |
+
• **Salary:** ₹66,667 per month
|
| 849 |
+
|
| 850 |
+
• **If you quit:** You must give 90 days notice
|
| 851 |
+
|
| 852 |
+
• **If they fire you:** They can fire you anytime without reason or notice
|
| 853 |
+
|
| 854 |
+
• **After leaving:** You can't work for competing companies for 2 years
|
| 855 |
+
|
| 856 |
+
• **Side work:** You can't do any other work while employed
|
| 857 |
+
|
| 858 |
+
**Concerns:** The 2-year non-compete and ability to fire without notice are harsh terms.
|
| 859 |
+
"""
|
| 860 |
+
else:
|
| 861 |
+
return "Document simplified version will appear here after analysis."
|
| 862 |
+
|
| 863 |
+
|
| 864 |
+
def show_market_comparison(doc):
|
| 865 |
+
"""Display market benchmarking and comparison data."""
|
| 866 |
+
doc_type = doc.get("document_type", "other")
|
| 867 |
+
|
| 868 |
+
st.markdown("**Market Context & Benchmarking**")
|
| 869 |
+
|
| 870 |
+
if doc_type == "rental":
|
| 871 |
+
show_rental_market_comparison(doc)
|
| 872 |
+
elif doc_type == "loan":
|
| 873 |
+
show_loan_market_comparison(doc)
|
| 874 |
+
elif doc_type == "employment":
|
| 875 |
+
show_employment_market_comparison(doc)
|
| 876 |
+
else:
|
| 877 |
+
st.info(
|
| 878 |
+
"Market comparison data available for rental, loan, and employment contracts."
|
| 879 |
+
)
|
| 880 |
+
|
| 881 |
+
|
| 882 |
+
def show_rental_market_comparison(doc):
|
| 883 |
+
"""Show rental market comparison."""
|
| 884 |
+
col1, col2 = st.columns(2)
|
| 885 |
+
|
| 886 |
+
with col1:
|
| 887 |
+
st.markdown("#### 🏠 Rental Market Analysis")
|
| 888 |
+
st.markdown("**Security Deposit:** ₹75,000")
|
| 889 |
+
st.success("✅ Standard: Typically 2-3 months rent")
|
| 890 |
+
|
| 891 |
+
st.markdown("**Late Penalty:** ₹1,000/day")
|
| 892 |
+
st.error("❌ Above Market: Typical penalties are ₹100-500/day")
|
| 893 |
+
|
| 894 |
+
st.markdown("**Maintenance Responsibility:** Tenant")
|
| 895 |
+
st.warning("⚠️ Unusual: Structural repairs typically landlord's responsibility")
|
| 896 |
+
|
| 897 |
+
with col2:
|
| 898 |
+
st.markdown("#### 📊 Mumbai Rental Benchmarks")
|
| 899 |
+
|
| 900 |
+
# Mock market data
|
| 901 |
+
market_data = {
|
| 902 |
+
"Average Rent (2BHK)": "₹28,000",
|
| 903 |
+
"Security Deposit Range": "₹50,000 - ₹84,000",
|
| 904 |
+
"Standard Late Fee": "₹200/day",
|
| 905 |
+
"Tenant Maintenance": "10% of agreements",
|
| 906 |
+
}
|
| 907 |
+
|
| 908 |
+
for metric, value in market_data.items():
|
| 909 |
+
st.metric(metric, value)
|
| 910 |
+
|
| 911 |
+
|
| 912 |
+
def show_loan_market_comparison(doc):
|
| 913 |
+
"""Show loan market comparison."""
|
| 914 |
+
col1, col2 = st.columns(2)
|
| 915 |
+
|
| 916 |
+
with col1:
|
| 917 |
+
st.markdown("#### 💰 Loan Market Analysis")
|
| 918 |
+
st.markdown("**Interest Rate:** 24% per annum")
|
| 919 |
+
st.error("❌ Well Above Market: Bank rates typically 10-15%")
|
| 920 |
+
|
| 921 |
+
st.markdown("**Processing Fee:** ₹10,000")
|
| 922 |
+
st.warning("⚠️ High: Typical processing fees 1-2% of loan amount")
|
| 923 |
+
|
| 924 |
+
st.markdown("**Total Repayment:** ₹3,00,000 for ₹2,00,000")
|
| 925 |
+
st.error("❌ Very High: 50% more than principal")
|
| 926 |
+
|
| 927 |
+
with col2:
|
| 928 |
+
st.markdown("#### 📊 Personal Loan Benchmarks")
|
| 929 |
+
|
| 930 |
+
# Create comparison chart
|
| 931 |
+
fig = px.bar(
|
| 932 |
+
x=["Your Loan", "Bank Average", "NBFC Average"],
|
| 933 |
+
y=[24, 12, 18],
|
| 934 |
+
title="Interest Rate Comparison (%)",
|
| 935 |
+
color=["red", "green", "orange"],
|
| 936 |
+
)
|
| 937 |
+
st.plotly_chart(fig, use_container_width=True)
|
| 938 |
+
|
| 939 |
+
|
| 940 |
+
def show_employment_market_comparison(doc):
|
| 941 |
+
"""Show employment market comparison."""
|
| 942 |
+
col1, col2 = st.columns(2)
|
| 943 |
+
|
| 944 |
+
with col1:
|
| 945 |
+
st.markdown("#### 💼 Employment Market Analysis")
|
| 946 |
+
st.markdown("**Non-compete Period:** 2 years")
|
| 947 |
+
st.error("❌ Excessive: Typical non-compete is 6-12 months")
|
| 948 |
+
|
| 949 |
+
st.markdown("**Notice Period:** 90 days")
|
| 950 |
+
st.warning("⚠️ Long: Standard notice is 30-60 days")
|
| 951 |
+
|
| 952 |
+
st.markdown("**At-will Termination:** Yes")
|
| 953 |
+
st.error("❌ Unfavorable: Most contracts provide notice period")
|
| 954 |
+
|
| 955 |
+
with col2:
|
| 956 |
+
st.markdown("#### 📊 IT Industry Standards")
|
| 957 |
+
|
| 958 |
+
standards = {
|
| 959 |
+
"Average Salary (3-5 YOE)": "₹8-12 lakhs",
|
| 960 |
+
"Standard Notice Period": "30-60 days",
|
| 961 |
+
"Typical Non-compete": "6-12 months",
|
| 962 |
+
"Weekend Work": "Occasionally, not mandatory",
|
| 963 |
+
}
|
| 964 |
+
|
| 965 |
+
for standard, value in standards.items():
|
| 966 |
+
st.metric(standard, value)
|
| 967 |
+
|
| 968 |
+
|
| 969 |
+
def generate_mock_summary(doc_type):
|
| 970 |
+
"""Generate mock summary."""
|
| 971 |
+
if doc_type == "rental":
|
| 972 |
+
return "This is a residential lease agreement for a property in Mumbai with rent of ₹25,000/month. The agreement contains several tenant-unfavorable terms including high daily late fees, tenant responsibility for all repairs, and forfeiture of security deposit for early termination."
|
| 973 |
+
elif doc_type == "loan":
|
| 974 |
+
return "This is a personal loan agreement for ₹2,00,000 with very high interest rates (24% APR, 28.5% effective). The loan requires gold jewelry as collateral and includes harsh default terms with immediate asset seizure rights."
|
| 975 |
+
elif doc_type == "employment":
|
| 976 |
+
return "This is an employment contract for a Software Developer position with ₹8,00,000 annual salary. The contract includes restrictive terms like a 2-year non-compete clause, at-will termination by employer, and prohibition on side work."
|
| 977 |
+
else:
|
| 978 |
+
return "Document summary will appear here after analysis."
|
src/pages/library.py
ADDED
|
@@ -0,0 +1,340 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import streamlit as st
|
| 2 |
+
import pandas as pd
|
| 3 |
+
from typing import List, Dict
|
| 4 |
+
import time
|
| 5 |
+
|
| 6 |
+
from ..utils.helpers import format_file_size, format_timestamp
|
| 7 |
+
|
| 8 |
+
|
| 9 |
+
def show_library_interface():
|
| 10 |
+
"""Display the document library interface."""
|
| 11 |
+
|
| 12 |
+
st.header("📚 Document Library")
|
| 13 |
+
st.markdown("Manage and review all your analyzed documents")
|
| 14 |
+
|
| 15 |
+
# Get documents from session state
|
| 16 |
+
documents = st.session_state.get("documents_library", [])
|
| 17 |
+
|
| 18 |
+
if not documents:
|
| 19 |
+
show_empty_library()
|
| 20 |
+
return
|
| 21 |
+
|
| 22 |
+
# Library statistics
|
| 23 |
+
show_library_stats(documents)
|
| 24 |
+
|
| 25 |
+
# Filter and search
|
| 26 |
+
show_library_filters(documents)
|
| 27 |
+
|
| 28 |
+
# Document grid
|
| 29 |
+
show_document_grid(documents)
|
| 30 |
+
|
| 31 |
+
|
| 32 |
+
def show_empty_library():
|
| 33 |
+
"""Show empty library state."""
|
| 34 |
+
st.markdown("---")
|
| 35 |
+
|
| 36 |
+
col1, col2, col3 = st.columns([1, 2, 1])
|
| 37 |
+
|
| 38 |
+
with col2:
|
| 39 |
+
st.markdown(
|
| 40 |
+
"""
|
| 41 |
+
<div style="text-align: center; padding: 3rem;">
|
| 42 |
+
<h3>📚 Your Library is Empty</h3>
|
| 43 |
+
<p style="color: var(--text-color, #666); opacity: 0.7;">Upload and analyze documents to build your personal legal document library.</p>
|
| 44 |
+
</div>
|
| 45 |
+
""",
|
| 46 |
+
unsafe_allow_html=True,
|
| 47 |
+
)
|
| 48 |
+
|
| 49 |
+
if st.button(
|
| 50 |
+
"📄 Upload Your First Document", type="primary", use_container_width=True
|
| 51 |
+
):
|
| 52 |
+
st.session_state.page = "📄 Upload"
|
| 53 |
+
st.rerun()
|
| 54 |
+
|
| 55 |
+
# Add sample documents section
|
| 56 |
+
st.markdown("---")
|
| 57 |
+
show_sample_documents_section()
|
| 58 |
+
|
| 59 |
+
|
| 60 |
+
def show_library_stats(documents: List[Dict]):
|
| 61 |
+
"""Display library statistics."""
|
| 62 |
+
# Calculate stats
|
| 63 |
+
total_docs = len(documents)
|
| 64 |
+
doc_types = {}
|
| 65 |
+
high_risk_docs = 0
|
| 66 |
+
|
| 67 |
+
for doc in documents:
|
| 68 |
+
doc_type = doc.get("document_type", "other")
|
| 69 |
+
doc_types[doc_type] = doc_types.get(doc_type, 0) + 1
|
| 70 |
+
|
| 71 |
+
if doc.get("risk_score", 0) > 60:
|
| 72 |
+
high_risk_docs += 1
|
| 73 |
+
|
| 74 |
+
# Display stats
|
| 75 |
+
col1, col2, col3, col4 = st.columns(4)
|
| 76 |
+
|
| 77 |
+
with col1:
|
| 78 |
+
st.metric(label="Total Documents", value=total_docs)
|
| 79 |
+
|
| 80 |
+
with col2:
|
| 81 |
+
most_common_type = max(doc_types, key=doc_types.get) if doc_types else "None"
|
| 82 |
+
st.metric(label="Most Common Type", value=most_common_type.title())
|
| 83 |
+
|
| 84 |
+
with col3:
|
| 85 |
+
st.metric(
|
| 86 |
+
label="High Risk Documents",
|
| 87 |
+
value=high_risk_docs,
|
| 88 |
+
delta=(
|
| 89 |
+
f"{high_risk_docs/total_docs*100:.0f}% of total"
|
| 90 |
+
if total_docs > 0
|
| 91 |
+
else "0%"
|
| 92 |
+
),
|
| 93 |
+
)
|
| 94 |
+
|
| 95 |
+
with col4:
|
| 96 |
+
total_size = sum(doc.get("file_size", 0) for doc in documents)
|
| 97 |
+
st.metric(label="Total Storage", value=format_file_size(total_size))
|
| 98 |
+
|
| 99 |
+
|
| 100 |
+
def show_library_filters(documents: List[Dict]):
|
| 101 |
+
"""Display filter and search options."""
|
| 102 |
+
st.markdown("---")
|
| 103 |
+
|
| 104 |
+
col1, col2, col3 = st.columns(3)
|
| 105 |
+
|
| 106 |
+
with col1:
|
| 107 |
+
# Document type filter
|
| 108 |
+
doc_types = ["All"] + list(
|
| 109 |
+
set(doc.get("document_type", "other") for doc in documents)
|
| 110 |
+
)
|
| 111 |
+
selected_type = st.selectbox("Filter by Type", doc_types)
|
| 112 |
+
|
| 113 |
+
with col2:
|
| 114 |
+
# Risk level filter
|
| 115 |
+
risk_levels = [
|
| 116 |
+
"All",
|
| 117 |
+
"Low Risk (0-30)",
|
| 118 |
+
"Medium Risk (31-60)",
|
| 119 |
+
"High Risk (61+)",
|
| 120 |
+
]
|
| 121 |
+
selected_risk = st.selectbox("Filter by Risk", risk_levels)
|
| 122 |
+
|
| 123 |
+
with col3:
|
| 124 |
+
# Search
|
| 125 |
+
search_term = st.text_input(
|
| 126 |
+
"Search documents", placeholder="Enter filename or content..."
|
| 127 |
+
)
|
| 128 |
+
|
| 129 |
+
# Apply filters
|
| 130 |
+
filtered_docs = documents
|
| 131 |
+
|
| 132 |
+
if selected_type != "All":
|
| 133 |
+
filtered_docs = [
|
| 134 |
+
doc for doc in filtered_docs if doc.get("document_type") == selected_type
|
| 135 |
+
]
|
| 136 |
+
|
| 137 |
+
if selected_risk != "All":
|
| 138 |
+
if "Low Risk" in selected_risk:
|
| 139 |
+
filtered_docs = [
|
| 140 |
+
doc for doc in filtered_docs if doc.get("risk_score", 0) <= 30
|
| 141 |
+
]
|
| 142 |
+
elif "Medium Risk" in selected_risk:
|
| 143 |
+
filtered_docs = [
|
| 144 |
+
doc for doc in filtered_docs if 31 <= doc.get("risk_score", 0) <= 60
|
| 145 |
+
]
|
| 146 |
+
elif "High Risk" in selected_risk:
|
| 147 |
+
filtered_docs = [
|
| 148 |
+
doc for doc in filtered_docs if doc.get("risk_score", 0) > 60
|
| 149 |
+
]
|
| 150 |
+
|
| 151 |
+
if search_term:
|
| 152 |
+
filtered_docs = [
|
| 153 |
+
doc
|
| 154 |
+
for doc in filtered_docs
|
| 155 |
+
if search_term.lower() in doc.get("filename", "").lower()
|
| 156 |
+
]
|
| 157 |
+
|
| 158 |
+
# Store filtered docs for grid display
|
| 159 |
+
st.session_state.filtered_documents = filtered_docs
|
| 160 |
+
|
| 161 |
+
|
| 162 |
+
def show_document_grid(documents: List[Dict]):
|
| 163 |
+
"""Display documents in a grid layout."""
|
| 164 |
+
filtered_docs = st.session_state.get("filtered_documents", documents)
|
| 165 |
+
|
| 166 |
+
if not filtered_docs:
|
| 167 |
+
st.info("No documents match your filter criteria.")
|
| 168 |
+
return
|
| 169 |
+
|
| 170 |
+
st.markdown("---")
|
| 171 |
+
st.subheader(f"📄 Documents ({len(filtered_docs)})")
|
| 172 |
+
|
| 173 |
+
# Display documents in cards
|
| 174 |
+
for i in range(0, len(filtered_docs), 2):
|
| 175 |
+
col1, col2 = st.columns(2)
|
| 176 |
+
|
| 177 |
+
# First document
|
| 178 |
+
with col1:
|
| 179 |
+
if i < len(filtered_docs):
|
| 180 |
+
show_document_card(filtered_docs[i])
|
| 181 |
+
|
| 182 |
+
# Second document
|
| 183 |
+
with col2:
|
| 184 |
+
if i + 1 < len(filtered_docs):
|
| 185 |
+
show_document_card(filtered_docs[i + 1])
|
| 186 |
+
|
| 187 |
+
|
| 188 |
+
def show_document_card(doc: Dict):
|
| 189 |
+
"""Display a single document card."""
|
| 190 |
+
# Risk color
|
| 191 |
+
risk_score = doc.get("risk_score", 0)
|
| 192 |
+
if risk_score > 60:
|
| 193 |
+
risk_color = "🔴"
|
| 194 |
+
risk_label = "High Risk"
|
| 195 |
+
elif risk_score > 30:
|
| 196 |
+
risk_color = "🟠"
|
| 197 |
+
risk_label = "Medium Risk"
|
| 198 |
+
else:
|
| 199 |
+
risk_color = "🟢"
|
| 200 |
+
risk_label = "Low Risk"
|
| 201 |
+
|
| 202 |
+
# Use container for card styling
|
| 203 |
+
with st.container():
|
| 204 |
+
# Header row with filename and risk
|
| 205 |
+
col1, col2 = st.columns([3, 1])
|
| 206 |
+
with col1:
|
| 207 |
+
st.markdown(f"**📄 {doc.get('filename', 'Unknown')}**")
|
| 208 |
+
with col2:
|
| 209 |
+
st.markdown(f"{risk_color} {risk_label}")
|
| 210 |
+
|
| 211 |
+
# Document details
|
| 212 |
+
doc_type = doc.get("document_type", "other").title()
|
| 213 |
+
upload_date = doc.get("upload_date", "Unknown")
|
| 214 |
+
file_size = format_file_size(doc.get("file_size", 0))
|
| 215 |
+
|
| 216 |
+
st.markdown(f"📋 {doc_type} • 📅 {upload_date} • 💾 {file_size}")
|
| 217 |
+
|
| 218 |
+
# Add some spacing
|
| 219 |
+
st.markdown("---")
|
| 220 |
+
|
| 221 |
+
# Action buttons
|
| 222 |
+
col1, col2, col3 = st.columns(3)
|
| 223 |
+
|
| 224 |
+
with col1:
|
| 225 |
+
if st.button("📊 View", key=f"view_{doc['id']}", use_container_width=True):
|
| 226 |
+
load_document_for_analysis(doc["id"])
|
| 227 |
+
|
| 228 |
+
with col2:
|
| 229 |
+
if st.button("💬 Q&A", key=f"qa_{doc['id']}", use_container_width=True):
|
| 230 |
+
load_document_for_qa(doc["id"])
|
| 231 |
+
|
| 232 |
+
with col3:
|
| 233 |
+
if st.button("🗑️ Delete", key=f"delete_{doc['id']}", use_container_width=True):
|
| 234 |
+
delete_document(doc["id"])
|
| 235 |
+
|
| 236 |
+
|
| 237 |
+
def load_document_for_analysis(doc_id: str):
|
| 238 |
+
"""Load a document from library for analysis."""
|
| 239 |
+
documents = st.session_state.get("documents_library", [])
|
| 240 |
+
|
| 241 |
+
for doc in documents:
|
| 242 |
+
if doc["id"] == doc_id:
|
| 243 |
+
# Simulate loading the full document data
|
| 244 |
+
st.session_state.current_document = {
|
| 245 |
+
"id": doc["id"],
|
| 246 |
+
"filename": doc["filename"],
|
| 247 |
+
"document_type": doc["document_type"],
|
| 248 |
+
"original_text": f"Sample content for {doc['filename']}...", # In real app, load from storage
|
| 249 |
+
"is_sample": True, # Mark as sample for demo
|
| 250 |
+
"risk_score": doc.get("risk_score", 0),
|
| 251 |
+
}
|
| 252 |
+
|
| 253 |
+
st.session_state.page = "📊 Analysis"
|
| 254 |
+
st.rerun()
|
| 255 |
+
break
|
| 256 |
+
|
| 257 |
+
|
| 258 |
+
def load_document_for_qa(doc_id: str):
|
| 259 |
+
"""Load a document from library for Q&A."""
|
| 260 |
+
documents = st.session_state.get("documents_library", [])
|
| 261 |
+
|
| 262 |
+
for doc in documents:
|
| 263 |
+
if doc["id"] == doc_id:
|
| 264 |
+
# Simulate loading the full document data
|
| 265 |
+
st.session_state.current_document = {
|
| 266 |
+
"id": doc["id"],
|
| 267 |
+
"filename": doc["filename"],
|
| 268 |
+
"document_type": doc["document_type"],
|
| 269 |
+
"original_text": f"Sample content for {doc['filename']}...", # In real app, load from storage
|
| 270 |
+
"is_sample": True, # Mark as sample for demo
|
| 271 |
+
}
|
| 272 |
+
|
| 273 |
+
st.session_state.page = "💬 Q&A"
|
| 274 |
+
st.rerun()
|
| 275 |
+
break
|
| 276 |
+
|
| 277 |
+
|
| 278 |
+
def delete_document(doc_id: str):
|
| 279 |
+
"""Delete a document from the library."""
|
| 280 |
+
# Confirm deletion
|
| 281 |
+
if st.session_state.get(f"confirm_delete_{doc_id}"):
|
| 282 |
+
documents = st.session_state.get("documents_library", [])
|
| 283 |
+
st.session_state.documents_library = [
|
| 284 |
+
doc for doc in documents if doc["id"] != doc_id
|
| 285 |
+
]
|
| 286 |
+
|
| 287 |
+
# Clear confirmation state
|
| 288 |
+
del st.session_state[f"confirm_delete_{doc_id}"]
|
| 289 |
+
|
| 290 |
+
st.success("✅ Document deleted from library")
|
| 291 |
+
|
| 292 |
+
|
| 293 |
+
def show_sample_documents_section():
|
| 294 |
+
"""Show available sample documents for testing."""
|
| 295 |
+
import os
|
| 296 |
+
|
| 297 |
+
st.subheader("🎯 Try Sample Documents")
|
| 298 |
+
st.markdown("Get started by analyzing our sample legal documents:")
|
| 299 |
+
|
| 300 |
+
# Get available sample documents
|
| 301 |
+
sample_dir = "./sample"
|
| 302 |
+
sample_files = []
|
| 303 |
+
if os.path.exists(sample_dir):
|
| 304 |
+
sample_files = [
|
| 305 |
+
f for f in os.listdir(sample_dir) if f.endswith((".pdf", ".docx", ".txt"))
|
| 306 |
+
]
|
| 307 |
+
|
| 308 |
+
if sample_files:
|
| 309 |
+
# Create description mapping for better UX
|
| 310 |
+
descriptions = {
|
| 311 |
+
"Employment_Offer_Letter.pdf": "📋 Analyze employment terms, benefits, and obligations",
|
| 312 |
+
"Master_Services_Agreement.pdf": "🤝 Review service agreements and contract terms",
|
| 313 |
+
"Mutual_NDA.pdf": "🔒 Examine confidentiality and non-disclosure clauses",
|
| 314 |
+
"Residential_Lease_Agreement.pdf": "🏠 Check rental terms, deposits, and tenant rights",
|
| 315 |
+
}
|
| 316 |
+
|
| 317 |
+
for filename in sample_files:
|
| 318 |
+
with st.expander(
|
| 319 |
+
f"📄 {filename.replace('_', ' ').replace('.pdf', '')}", expanded=False
|
| 320 |
+
):
|
| 321 |
+
col1, col2 = st.columns([2, 1])
|
| 322 |
+
|
| 323 |
+
with col1:
|
| 324 |
+
description = descriptions.get(
|
| 325 |
+
filename, "📊 Analyze this legal document for risks and terms"
|
| 326 |
+
)
|
| 327 |
+
st.markdown(description)
|
| 328 |
+
|
| 329 |
+
with col2:
|
| 330 |
+
if st.button(
|
| 331 |
+
"Analyze Now",
|
| 332 |
+
key=f"sample_lib_{filename}",
|
| 333 |
+
use_container_width=True,
|
| 334 |
+
):
|
| 335 |
+
# Set this as the sample to load and redirect to upload page
|
| 336 |
+
st.session_state.load_sample = filename
|
| 337 |
+
st.session_state.page = "📄 Upload"
|
| 338 |
+
st.rerun()
|
| 339 |
+
else:
|
| 340 |
+
st.info("No sample documents available.")
|
src/pages/qa_assistant.py
ADDED
|
@@ -0,0 +1,239 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import streamlit as st
|
| 2 |
+
from typing import List, Dict
|
| 3 |
+
import time
|
| 4 |
+
|
| 5 |
+
from ..services.ai_analyzer import AIAnalyzer
|
| 6 |
+
from ..services.vector_store import VectorStoreService
|
| 7 |
+
|
| 8 |
+
|
| 9 |
+
def show_qa_interface():
|
| 10 |
+
"""Display the Q&A assistant interface."""
|
| 11 |
+
|
| 12 |
+
if not st.session_state.get("current_document"):
|
| 13 |
+
st.warning("⚠️ No document loaded. Please upload and analyze a document first.")
|
| 14 |
+
if st.button("📄 Go to Upload"):
|
| 15 |
+
st.session_state.page = "📄 Upload"
|
| 16 |
+
st.rerun()
|
| 17 |
+
return
|
| 18 |
+
|
| 19 |
+
doc = st.session_state.current_document
|
| 20 |
+
|
| 21 |
+
# Header
|
| 22 |
+
st.header("💬 Q&A Assistant")
|
| 23 |
+
st.markdown(f"Ask questions about **{doc.get('filename', 'your document')}**")
|
| 24 |
+
|
| 25 |
+
# Initialize chat history
|
| 26 |
+
if "qa_history" not in st.session_state:
|
| 27 |
+
st.session_state.qa_history = []
|
| 28 |
+
|
| 29 |
+
# Chat interface
|
| 30 |
+
chat_container = st.container()
|
| 31 |
+
|
| 32 |
+
with chat_container:
|
| 33 |
+
# Display chat history
|
| 34 |
+
for i, qa in enumerate(st.session_state.qa_history):
|
| 35 |
+
# User message
|
| 36 |
+
with st.chat_message("user"):
|
| 37 |
+
st.markdown(qa["question"])
|
| 38 |
+
|
| 39 |
+
# Assistant response
|
| 40 |
+
with st.chat_message("assistant"):
|
| 41 |
+
st.markdown(qa["answer"])
|
| 42 |
+
|
| 43 |
+
# Suggested questions
|
| 44 |
+
st.markdown("### 💡 Suggested Questions")
|
| 45 |
+
|
| 46 |
+
doc_type = doc.get("document_type", "other")
|
| 47 |
+
suggested_questions = get_suggested_questions(doc_type)
|
| 48 |
+
|
| 49 |
+
col1, col2 = st.columns(2)
|
| 50 |
+
|
| 51 |
+
for i, question in enumerate(suggested_questions):
|
| 52 |
+
col = col1 if i % 2 == 0 else col2
|
| 53 |
+
with col:
|
| 54 |
+
if st.button(question, key=f"suggested_{i}", use_container_width=True):
|
| 55 |
+
ask_question(question, doc)
|
| 56 |
+
|
| 57 |
+
# Chat input
|
| 58 |
+
st.markdown("### ❓ Ask Your Question")
|
| 59 |
+
|
| 60 |
+
with st.form("question_form", clear_on_submit=True):
|
| 61 |
+
user_question = st.text_input(
|
| 62 |
+
"Type your question here...",
|
| 63 |
+
placeholder="e.g., What happens if I terminate this contract early?",
|
| 64 |
+
label_visibility="collapsed",
|
| 65 |
+
)
|
| 66 |
+
|
| 67 |
+
submitted = st.form_submit_button("Send", use_container_width=True)
|
| 68 |
+
|
| 69 |
+
if submitted and user_question.strip():
|
| 70 |
+
ask_question(user_question, doc)
|
| 71 |
+
|
| 72 |
+
# Quick actions
|
| 73 |
+
st.markdown("---")
|
| 74 |
+
col1, col2, col3 = st.columns(3)
|
| 75 |
+
|
| 76 |
+
with col1:
|
| 77 |
+
if st.button("📊 Back to Analysis", use_container_width=True):
|
| 78 |
+
st.session_state.page = "📊 Analysis"
|
| 79 |
+
st.rerun()
|
| 80 |
+
|
| 81 |
+
with col2:
|
| 82 |
+
if st.button("🗑️ Clear Chat", use_container_width=True):
|
| 83 |
+
st.session_state.qa_history = []
|
| 84 |
+
st.rerun()
|
| 85 |
+
|
| 86 |
+
with col3:
|
| 87 |
+
if st.button("📥 Export Chat", use_container_width=True):
|
| 88 |
+
export_chat_history()
|
| 89 |
+
|
| 90 |
+
|
| 91 |
+
def ask_question(question: str, doc: Dict):
|
| 92 |
+
"""Process a question and get AI response."""
|
| 93 |
+
try:
|
| 94 |
+
# Show thinking indicator
|
| 95 |
+
with st.spinner("🤔 Thinking..."):
|
| 96 |
+
# Initialize AI analyzer
|
| 97 |
+
ai_analyzer = AIAnalyzer()
|
| 98 |
+
|
| 99 |
+
# Get document type
|
| 100 |
+
from ..models.document import DocumentType
|
| 101 |
+
|
| 102 |
+
doc_type = DocumentType(doc.get("document_type", "other"))
|
| 103 |
+
|
| 104 |
+
# Get answer from AI
|
| 105 |
+
answer = ai_analyzer.answer_question(
|
| 106 |
+
question=question,
|
| 107 |
+
document_text=doc.get("original_text", ""),
|
| 108 |
+
document_type=doc_type,
|
| 109 |
+
)
|
| 110 |
+
|
| 111 |
+
# Add to chat history
|
| 112 |
+
st.session_state.qa_history.append(
|
| 113 |
+
{"question": question, "answer": answer, "timestamp": time.time()}
|
| 114 |
+
)
|
| 115 |
+
|
| 116 |
+
# Rerun to show the new Q&A
|
| 117 |
+
st.rerun()
|
| 118 |
+
|
| 119 |
+
except Exception as e:
|
| 120 |
+
st.error(f"❌ Error processing question: {str(e)}")
|
| 121 |
+
|
| 122 |
+
|
| 123 |
+
def get_suggested_questions(doc_type: str) -> List[str]:
|
| 124 |
+
"""Get suggested questions based on document type."""
|
| 125 |
+
|
| 126 |
+
questions_by_type = {
|
| 127 |
+
"rental": [
|
| 128 |
+
"What is the monthly rent amount?",
|
| 129 |
+
"What happens if I pay rent late?",
|
| 130 |
+
"How much is the security deposit?",
|
| 131 |
+
"Can I terminate the lease early?",
|
| 132 |
+
"Who is responsible for repairs?",
|
| 133 |
+
"What are the landlord's obligations?",
|
| 134 |
+
"Are pets allowed in the property?",
|
| 135 |
+
"What happens if I damage the property?",
|
| 136 |
+
],
|
| 137 |
+
"loan": [
|
| 138 |
+
"What is the total amount I will repay?",
|
| 139 |
+
"What is the effective interest rate?",
|
| 140 |
+
"What happens if I miss a payment?",
|
| 141 |
+
"What collateral is required?",
|
| 142 |
+
"Can I repay the loan early?",
|
| 143 |
+
"What are the processing fees?",
|
| 144 |
+
"How is the interest calculated?",
|
| 145 |
+
"What happens in case of default?",
|
| 146 |
+
],
|
| 147 |
+
"employment": [
|
| 148 |
+
"What is my total compensation package?",
|
| 149 |
+
"How many hours am I expected to work?",
|
| 150 |
+
"Can the company terminate me without notice?",
|
| 151 |
+
"What are the non-compete restrictions?",
|
| 152 |
+
"Am I allowed to work other jobs?",
|
| 153 |
+
"What benefits am I entitled to?",
|
| 154 |
+
"How much notice must I give to resign?",
|
| 155 |
+
"Who owns the intellectual property I create?",
|
| 156 |
+
],
|
| 157 |
+
"nda": [
|
| 158 |
+
"What information is considered confidential?",
|
| 159 |
+
"How long does the confidentiality last?",
|
| 160 |
+
"What are the penalties for disclosure?",
|
| 161 |
+
"Can I discuss this agreement with others?",
|
| 162 |
+
"What happens after the agreement ends?",
|
| 163 |
+
"Are there any exceptions to confidentiality?",
|
| 164 |
+
],
|
| 165 |
+
"service": [
|
| 166 |
+
"What services are included in this agreement?",
|
| 167 |
+
"What is the payment schedule?",
|
| 168 |
+
"How can this agreement be terminated?",
|
| 169 |
+
"What are the deliverables and deadlines?",
|
| 170 |
+
"Who is responsible for what costs?",
|
| 171 |
+
"What happens if the work is unsatisfactory?",
|
| 172 |
+
],
|
| 173 |
+
}
|
| 174 |
+
|
| 175 |
+
return questions_by_type.get(
|
| 176 |
+
doc_type,
|
| 177 |
+
[
|
| 178 |
+
"What are the main obligations for each party?",
|
| 179 |
+
"What are the key financial terms?",
|
| 180 |
+
"How can this agreement be terminated?",
|
| 181 |
+
"What are the potential risks for me?",
|
| 182 |
+
"What should I be most careful about?",
|
| 183 |
+
"Are there any unusual or concerning clauses?",
|
| 184 |
+
],
|
| 185 |
+
)
|
| 186 |
+
|
| 187 |
+
|
| 188 |
+
def export_chat_history():
|
| 189 |
+
"""Export the chat history as a text file."""
|
| 190 |
+
if not st.session_state.qa_history:
|
| 191 |
+
st.warning("No chat history to export.")
|
| 192 |
+
return
|
| 193 |
+
|
| 194 |
+
doc = st.session_state.current_document
|
| 195 |
+
|
| 196 |
+
# Create chat export
|
| 197 |
+
export_text = f"""
|
| 198 |
+
LEGA.AI Q&A SESSION EXPORT
|
| 199 |
+
{'='*50}
|
| 200 |
+
|
| 201 |
+
Document: {doc.get('filename', 'Unknown')}
|
| 202 |
+
Document Type: {doc.get('document_type', 'Unknown').title()}
|
| 203 |
+
Export Date: {time.strftime('%Y-%m-%d %H:%M:%S')}
|
| 204 |
+
|
| 205 |
+
QUESTIONS & ANSWERS:
|
| 206 |
+
{'='*50}
|
| 207 |
+
|
| 208 |
+
"""
|
| 209 |
+
|
| 210 |
+
for i, qa in enumerate(st.session_state.qa_history):
|
| 211 |
+
export_text += f"""
|
| 212 |
+
Q{i+1}: {qa['question']}
|
| 213 |
+
|
| 214 |
+
A{i+1}: {qa['answer']}
|
| 215 |
+
|
| 216 |
+
{'-'*30}
|
| 217 |
+
|
| 218 |
+
"""
|
| 219 |
+
|
| 220 |
+
export_text += "\nGenerated by Lega.AI - Making legal documents accessible"
|
| 221 |
+
|
| 222 |
+
# Clean filename - remove .pdf extension if present
|
| 223 |
+
filename = doc.get("filename", "document")
|
| 224 |
+
if filename.endswith(".pdf"):
|
| 225 |
+
filename = filename[:-4]
|
| 226 |
+
if filename.endswith(".docx"):
|
| 227 |
+
filename = filename[:-5]
|
| 228 |
+
if filename.endswith(".txt"):
|
| 229 |
+
filename = filename[:-4]
|
| 230 |
+
|
| 231 |
+
# Offer download
|
| 232 |
+
st.download_button(
|
| 233 |
+
label="📥 Download Chat History",
|
| 234 |
+
data=export_text,
|
| 235 |
+
file_name=f"lega_ai_qa_{filename}.pdf",
|
| 236 |
+
mime="application/pdf",
|
| 237 |
+
)
|
| 238 |
+
|
| 239 |
+
st.success("✅ Chat history prepared for download!")
|
src/pages/settings.py
ADDED
|
@@ -0,0 +1,304 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import streamlit as st
|
| 2 |
+
from src.utils.config import config
|
| 3 |
+
|
| 4 |
+
|
| 5 |
+
def show_settings_interface():
|
| 6 |
+
"""Display the settings interface."""
|
| 7 |
+
|
| 8 |
+
st.header("⚙️ Settings")
|
| 9 |
+
st.markdown("Configure your Lega.AI experience")
|
| 10 |
+
|
| 11 |
+
# Tabs for different settings categories
|
| 12 |
+
tab1, tab2, tab3, tab4 = st.tabs(
|
| 13 |
+
["🔑 API Keys", "🎨 Preferences", "📊 Usage", "ℹ️ About"]
|
| 14 |
+
)
|
| 15 |
+
|
| 16 |
+
with tab1:
|
| 17 |
+
show_api_settings()
|
| 18 |
+
|
| 19 |
+
with tab2:
|
| 20 |
+
show_preference_settings()
|
| 21 |
+
|
| 22 |
+
with tab3:
|
| 23 |
+
show_usage_stats()
|
| 24 |
+
|
| 25 |
+
with tab4:
|
| 26 |
+
show_about_info()
|
| 27 |
+
|
| 28 |
+
|
| 29 |
+
def show_api_settings():
|
| 30 |
+
"""Display API key configuration."""
|
| 31 |
+
st.subheader("🔑 API Configuration")
|
| 32 |
+
|
| 33 |
+
# Check current API key status
|
| 34 |
+
api_key_configured = bool(
|
| 35 |
+
config.GOOGLE_API_KEY and config.GOOGLE_API_KEY != "your-google-api-key-here"
|
| 36 |
+
)
|
| 37 |
+
|
| 38 |
+
if api_key_configured:
|
| 39 |
+
st.success("✅ Google AI API key is configured")
|
| 40 |
+
else:
|
| 41 |
+
st.warning("⚠️ Google AI API key not configured")
|
| 42 |
+
st.markdown(
|
| 43 |
+
"""
|
| 44 |
+
To use Lega.AI's AI features, you need to configure your Google AI API key:
|
| 45 |
+
|
| 46 |
+
1. Go to [Google AI Studio](https://makersuite.google.com/)
|
| 47 |
+
2. Create a new API key
|
| 48 |
+
3. Copy the key and add it to your `.env` file
|
| 49 |
+
4. Set `GOOGLE_API_KEY=your_actual_api_key`
|
| 50 |
+
5. Restart the application
|
| 51 |
+
"""
|
| 52 |
+
)
|
| 53 |
+
|
| 54 |
+
# API key input (for demonstration)
|
| 55 |
+
st.markdown("---")
|
| 56 |
+
st.subheader("🔧 Update API Key")
|
| 57 |
+
|
| 58 |
+
with st.form("api_key_form"):
|
| 59 |
+
new_api_key = st.text_input(
|
| 60 |
+
"Google AI API Key",
|
| 61 |
+
type="password",
|
| 62 |
+
placeholder="Enter your Google AI API key",
|
| 63 |
+
help="This will be saved to your environment configuration",
|
| 64 |
+
)
|
| 65 |
+
|
| 66 |
+
submitted = st.form_submit_button("Update API Key")
|
| 67 |
+
|
| 68 |
+
if submitted:
|
| 69 |
+
if new_api_key.strip():
|
| 70 |
+
st.success(
|
| 71 |
+
"✅ API key updated! Please restart the application for changes to take effect."
|
| 72 |
+
)
|
| 73 |
+
st.info("💡 Don't forget to update your `.env` file with the new key.")
|
| 74 |
+
else:
|
| 75 |
+
st.error("❌ Please enter a valid API key")
|
| 76 |
+
|
| 77 |
+
|
| 78 |
+
def show_preference_settings():
|
| 79 |
+
"""Display user preference settings."""
|
| 80 |
+
st.subheader("🎨 User Preferences")
|
| 81 |
+
|
| 82 |
+
# Language settings
|
| 83 |
+
st.markdown("#### 🌐 Language & Region")
|
| 84 |
+
|
| 85 |
+
col1, col2 = st.columns(2)
|
| 86 |
+
|
| 87 |
+
with col1:
|
| 88 |
+
language = st.selectbox(
|
| 89 |
+
"Interface Language",
|
| 90 |
+
["English", "Hindi", "Tamil", "Telugu", "Gujarati"],
|
| 91 |
+
help="Language for the user interface",
|
| 92 |
+
)
|
| 93 |
+
|
| 94 |
+
with col2:
|
| 95 |
+
region = st.selectbox(
|
| 96 |
+
"Legal Region",
|
| 97 |
+
["India", "Maharashtra", "Delhi", "Karnataka", "Tamil Nadu"],
|
| 98 |
+
help="Legal jurisdiction for document analysis",
|
| 99 |
+
)
|
| 100 |
+
|
| 101 |
+
# Analysis preferences
|
| 102 |
+
st.markdown("#### 📊 Analysis Preferences")
|
| 103 |
+
|
| 104 |
+
risk_sensitivity = st.slider(
|
| 105 |
+
"Risk Detection Sensitivity",
|
| 106 |
+
min_value=1,
|
| 107 |
+
max_value=5,
|
| 108 |
+
value=3,
|
| 109 |
+
help="1 = Only critical risks, 5 = All potential concerns",
|
| 110 |
+
)
|
| 111 |
+
|
| 112 |
+
simplification_level = st.selectbox(
|
| 113 |
+
"Text Simplification Level",
|
| 114 |
+
["Basic", "Intermediate", "Advanced"],
|
| 115 |
+
index=1,
|
| 116 |
+
help="How much to simplify legal language",
|
| 117 |
+
)
|
| 118 |
+
|
| 119 |
+
show_suggestions = st.checkbox(
|
| 120 |
+
"Show improvement suggestions",
|
| 121 |
+
value=True,
|
| 122 |
+
help="Display suggestions for problematic clauses",
|
| 123 |
+
)
|
| 124 |
+
|
| 125 |
+
# Notification preferences
|
| 126 |
+
st.markdown("#### 🔔 Notifications")
|
| 127 |
+
|
| 128 |
+
email_notifications = st.checkbox(
|
| 129 |
+
"Email notifications for analysis completion", value=False
|
| 130 |
+
)
|
| 131 |
+
|
| 132 |
+
browser_notifications = st.checkbox("Browser notifications", value=True)
|
| 133 |
+
|
| 134 |
+
# Save preferences
|
| 135 |
+
if st.button("💾 Save Preferences", type="primary"):
|
| 136 |
+
# In a real app, save to user profile/database
|
| 137 |
+
st.success("✅ Preferences saved successfully!")
|
| 138 |
+
|
| 139 |
+
|
| 140 |
+
def show_usage_stats():
|
| 141 |
+
"""Display usage statistics."""
|
| 142 |
+
st.subheader("📊 Usage Statistics")
|
| 143 |
+
|
| 144 |
+
# Mock usage data
|
| 145 |
+
col1, col2, col3 = st.columns(3)
|
| 146 |
+
|
| 147 |
+
with col1:
|
| 148 |
+
st.metric(label="Documents Analyzed", value="47", delta="12 this month")
|
| 149 |
+
|
| 150 |
+
with col2:
|
| 151 |
+
st.metric(label="Questions Asked", value="156", delta="23 this week")
|
| 152 |
+
|
| 153 |
+
with col3:
|
| 154 |
+
st.metric(label="Risks Identified", value="89", delta="High: 12, Medium: 31")
|
| 155 |
+
|
| 156 |
+
# Usage by document type
|
| 157 |
+
st.markdown("#### 📄 Analysis by Document Type")
|
| 158 |
+
|
| 159 |
+
usage_data = {
|
| 160 |
+
"Rental Agreements": 18,
|
| 161 |
+
"Loan Contracts": 12,
|
| 162 |
+
"Employment Contracts": 8,
|
| 163 |
+
"Service Agreements": 6,
|
| 164 |
+
"NDAs": 3,
|
| 165 |
+
}
|
| 166 |
+
|
| 167 |
+
for doc_type, count in usage_data.items():
|
| 168 |
+
progress = count / max(usage_data.values())
|
| 169 |
+
st.markdown(f"**{doc_type}**: {count} documents")
|
| 170 |
+
st.progress(progress)
|
| 171 |
+
|
| 172 |
+
# Storage usage
|
| 173 |
+
st.markdown("#### 💾 Storage Usage")
|
| 174 |
+
|
| 175 |
+
storage_used = 2.4 # GB
|
| 176 |
+
storage_limit = 5.0 # GB
|
| 177 |
+
|
| 178 |
+
st.progress(storage_used / storage_limit)
|
| 179 |
+
st.markdown(
|
| 180 |
+
f"**{storage_used:.1f} GB** used of **{storage_limit:.1f} GB** available"
|
| 181 |
+
)
|
| 182 |
+
|
| 183 |
+
# Account tier
|
| 184 |
+
st.markdown("#### 👤 Account Information")
|
| 185 |
+
|
| 186 |
+
col1, col2 = st.columns(2)
|
| 187 |
+
|
| 188 |
+
with col1:
|
| 189 |
+
st.info("**Plan**: Free Tier")
|
| 190 |
+
st.markdown(
|
| 191 |
+
"""
|
| 192 |
+
- 10 documents per month
|
| 193 |
+
- Basic AI analysis
|
| 194 |
+
- Email support
|
| 195 |
+
"""
|
| 196 |
+
)
|
| 197 |
+
|
| 198 |
+
with col2:
|
| 199 |
+
st.markdown("**Upgrade Benefits**:")
|
| 200 |
+
st.markdown(
|
| 201 |
+
"""
|
| 202 |
+
- Unlimited documents
|
| 203 |
+
- Advanced AI features
|
| 204 |
+
- Priority support
|
| 205 |
+
- Bulk processing
|
| 206 |
+
"""
|
| 207 |
+
)
|
| 208 |
+
|
| 209 |
+
if st.button("🚀 Upgrade to Pro", type="primary"):
|
| 210 |
+
st.info("Upgrade functionality would be implemented here")
|
| 211 |
+
|
| 212 |
+
|
| 213 |
+
def show_about_info():
|
| 214 |
+
"""Display about information."""
|
| 215 |
+
st.subheader("ℹ️ About Lega.AI")
|
| 216 |
+
|
| 217 |
+
# App info
|
| 218 |
+
col1, col2 = st.columns([2, 1])
|
| 219 |
+
|
| 220 |
+
with col1:
|
| 221 |
+
st.markdown(
|
| 222 |
+
"""
|
| 223 |
+
**Lega.AI** is an AI-powered platform that makes legal documents accessible to everyone.
|
| 224 |
+
|
| 225 |
+
### 🎯 Mission
|
| 226 |
+
To democratize legal document understanding by providing instant AI analysis,
|
| 227 |
+
risk assessment, and plain language explanations.
|
| 228 |
+
|
| 229 |
+
### ✨ Features
|
| 230 |
+
- **Document Analysis**: Upload and analyze any legal document
|
| 231 |
+
- **Risk Assessment**: Color-coded risk scoring with explanations
|
| 232 |
+
- **Plain Language**: Convert legal jargon to simple English
|
| 233 |
+
- **Q&A Assistant**: Ask questions about your documents
|
| 234 |
+
- **Smart Search**: Find similar clauses and documents
|
| 235 |
+
- **Export Reports**: Generate comprehensive analysis reports
|
| 236 |
+
|
| 237 |
+
### 🛡️ Privacy & Security
|
| 238 |
+
- Your documents are processed securely
|
| 239 |
+
- No data is shared with third parties
|
| 240 |
+
- Local vector storage for document similarity
|
| 241 |
+
- GDPR compliant data handling
|
| 242 |
+
"""
|
| 243 |
+
)
|
| 244 |
+
|
| 245 |
+
with col2:
|
| 246 |
+
st.markdown(
|
| 247 |
+
"""
|
| 248 |
+
### 📊 Version Info
|
| 249 |
+
**Version**: 1.0.0
|
| 250 |
+
**Build**: 2025.09.21
|
| 251 |
+
**Engine**: Google Gemini
|
| 252 |
+
|
| 253 |
+
### 🔧 Tech Stack
|
| 254 |
+
- **Frontend**: Streamlit
|
| 255 |
+
- **AI/ML**: LangChain + Gemini
|
| 256 |
+
- **Vector DB**: Chroma
|
| 257 |
+
- **Embeddings**: Google Embeddings
|
| 258 |
+
|
| 259 |
+
### 📞 Support
|
| 260 |
+
- **Email**: support@lega.ai
|
| 261 |
+
- **Docs**: github.com/codernoahx/Lega.AI/README.md
|
| 262 |
+
- **GitHub**: github.com/codernoahx/Lega.AI
|
| 263 |
+
"""
|
| 264 |
+
)
|
| 265 |
+
|
| 266 |
+
# Legal notice
|
| 267 |
+
st.markdown("---")
|
| 268 |
+
st.markdown(
|
| 269 |
+
"""
|
| 270 |
+
### ⚖️ Legal Notice
|
| 271 |
+
|
| 272 |
+
**Disclaimer**: Lega.AI provides AI-powered analysis for informational purposes only.
|
| 273 |
+
This is not legal advice. Always consult with qualified legal professionals for
|
| 274 |
+
important legal matters.
|
| 275 |
+
|
| 276 |
+
**Data Usage**: By using this service, you agree to our Terms of Service and Privacy Policy.
|
| 277 |
+
Your documents are processed to provide analysis but are not used to train AI models.
|
| 278 |
+
|
| 279 |
+
© 2025 Lega.AI. All rights reserved.
|
| 280 |
+
"""
|
| 281 |
+
)
|
| 282 |
+
|
| 283 |
+
# Feedback section
|
| 284 |
+
st.markdown("---")
|
| 285 |
+
st.subheader("💬 Feedback")
|
| 286 |
+
|
| 287 |
+
with st.form("feedback_form"):
|
| 288 |
+
feedback_type = st.selectbox(
|
| 289 |
+
"Feedback Type",
|
| 290 |
+
["General Feedback", "Bug Report", "Feature Request", "Question"],
|
| 291 |
+
)
|
| 292 |
+
|
| 293 |
+
feedback_text = st.text_area(
|
| 294 |
+
"Your Feedback",
|
| 295 |
+
placeholder="Tell us what you think or report any issues...",
|
| 296 |
+
height=100,
|
| 297 |
+
)
|
| 298 |
+
|
| 299 |
+
submitted = st.form_submit_button("Send Feedback")
|
| 300 |
+
|
| 301 |
+
if submitted and feedback_text.strip():
|
| 302 |
+
st.success("✅ Thank you for your feedback! We'll review it soon.")
|
| 303 |
+
elif submitted:
|
| 304 |
+
st.error("❌ Please enter your feedback before submitting.")
|
src/pages/upload.py
ADDED
|
@@ -0,0 +1,342 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import streamlit as st
|
| 2 |
+
import os
|
| 3 |
+
from typing import Optional
|
| 4 |
+
import time
|
| 5 |
+
|
| 6 |
+
from ..services.document_processor import DocumentProcessor
|
| 7 |
+
from ..services.ai_analyzer import AIAnalyzer
|
| 8 |
+
from ..services.vector_store import VectorStoreService
|
| 9 |
+
from ..models.document import DocumentType
|
| 10 |
+
from ..utils.helpers import generate_document_id, sanitize_filename, format_file_size
|
| 11 |
+
from ..utils.logger import log_document_upload
|
| 12 |
+
|
| 13 |
+
|
| 14 |
+
def show_upload_interface():
|
| 15 |
+
"""Display the document upload interface."""
|
| 16 |
+
st.header("📄 Upload Legal Document")
|
| 17 |
+
st.markdown(
|
| 18 |
+
"Upload your legal document for instant AI analysis and risk assessment."
|
| 19 |
+
)
|
| 20 |
+
|
| 21 |
+
# Check if we should auto-load a sample document
|
| 22 |
+
if st.session_state.get("load_sample"):
|
| 23 |
+
filename = st.session_state.load_sample
|
| 24 |
+
del st.session_state.load_sample # Clear the flag
|
| 25 |
+
load_sample_document_from_file(filename)
|
| 26 |
+
return
|
| 27 |
+
|
| 28 |
+
# File uploader
|
| 29 |
+
uploaded_file = st.file_uploader(
|
| 30 |
+
"Choose a file",
|
| 31 |
+
type=["pdf", "txt", "docx"], # Added docx support
|
| 32 |
+
help="Supported formats: PDF, TXT, DOCX (Max 10MB)",
|
| 33 |
+
key="document_uploader",
|
| 34 |
+
)
|
| 35 |
+
|
| 36 |
+
if uploaded_file is not None:
|
| 37 |
+
# Display file info
|
| 38 |
+
file_size = len(uploaded_file.getvalue())
|
| 39 |
+
|
| 40 |
+
# Check file size limit
|
| 41 |
+
max_size = 10 * 1024 * 1024 # 10MB
|
| 42 |
+
if file_size > max_size:
|
| 43 |
+
st.error(f"❌ File too large. Maximum size is {format_file_size(max_size)}")
|
| 44 |
+
return
|
| 45 |
+
|
| 46 |
+
st.success(f"📁 **{uploaded_file.name}** ({format_file_size(file_size)})")
|
| 47 |
+
|
| 48 |
+
# Process button
|
| 49 |
+
if st.button("🔍 Analyze Document", type="primary", use_container_width=True):
|
| 50 |
+
process_uploaded_document(uploaded_file)
|
| 51 |
+
|
| 52 |
+
# Sample documents section
|
| 53 |
+
st.markdown("---")
|
| 54 |
+
st.subheader("📋 Try Sample Documents")
|
| 55 |
+
st.markdown("Don't have a document handy? Try one of our real sample documents:")
|
| 56 |
+
|
| 57 |
+
# Get available sample documents
|
| 58 |
+
sample_dir = "./sample"
|
| 59 |
+
sample_files = []
|
| 60 |
+
if os.path.exists(sample_dir):
|
| 61 |
+
sample_files = [f for f in os.listdir(sample_dir) if f.endswith(('.pdf', '.docx', '.txt'))]
|
| 62 |
+
|
| 63 |
+
if sample_files:
|
| 64 |
+
col1, col2 = st.columns(2)
|
| 65 |
+
|
| 66 |
+
for i, filename in enumerate(sample_files):
|
| 67 |
+
col = col1 if i % 2 == 0 else col2
|
| 68 |
+
|
| 69 |
+
with col:
|
| 70 |
+
# Create descriptive button names
|
| 71 |
+
display_name = filename.replace('_', ' ').replace('.pdf', '').replace('.docx', '').replace('.txt', '')
|
| 72 |
+
display_name = display_name.title()
|
| 73 |
+
|
| 74 |
+
if st.button(f"📄 {display_name}", use_container_width=True, key=f"sample_{i}"):
|
| 75 |
+
load_sample_document_from_file(filename)
|
| 76 |
+
else:
|
| 77 |
+
st.info("No sample documents found in the sample directory.")
|
| 78 |
+
|
| 79 |
+
|
| 80 |
+
def process_uploaded_document(uploaded_file):
|
| 81 |
+
"""Process the uploaded document with AI analysis."""
|
| 82 |
+
try:
|
| 83 |
+
# Initialize processors
|
| 84 |
+
doc_processor = DocumentProcessor()
|
| 85 |
+
ai_analyzer = AIAnalyzer()
|
| 86 |
+
vector_store = VectorStoreService()
|
| 87 |
+
|
| 88 |
+
# Create progress tracking
|
| 89 |
+
progress_bar = st.progress(0)
|
| 90 |
+
status_text = st.empty()
|
| 91 |
+
|
| 92 |
+
# Step 1: Extract text
|
| 93 |
+
status_text.text("📄 Extracting text from document...")
|
| 94 |
+
progress_bar.progress(20)
|
| 95 |
+
|
| 96 |
+
file_content = uploaded_file.getvalue()
|
| 97 |
+
text = doc_processor.extract_text(file_content, uploaded_file.name)
|
| 98 |
+
|
| 99 |
+
if not text.strip():
|
| 100 |
+
st.error(
|
| 101 |
+
"❌ Could not extract text from the document. Please try a different file."
|
| 102 |
+
)
|
| 103 |
+
progress_bar.empty()
|
| 104 |
+
status_text.empty()
|
| 105 |
+
return
|
| 106 |
+
|
| 107 |
+
progress_bar.progress(40)
|
| 108 |
+
|
| 109 |
+
# Step 2: Detect document type
|
| 110 |
+
status_text.text("🔍 Analyzing document type...")
|
| 111 |
+
document_type = doc_processor.detect_document_type(text)
|
| 112 |
+
progress_bar.progress(50)
|
| 113 |
+
|
| 114 |
+
# Step 3: Risk analysis
|
| 115 |
+
status_text.text("⚠️ Performing risk assessment...")
|
| 116 |
+
risk_data = ai_analyzer.analyze_document_risk(text, document_type)
|
| 117 |
+
progress_bar.progress(70)
|
| 118 |
+
|
| 119 |
+
# Step 4: Text simplification
|
| 120 |
+
status_text.text("💬 Simplifying legal language...")
|
| 121 |
+
simplified_data = ai_analyzer.simplify_text(text, document_type)
|
| 122 |
+
progress_bar.progress(85)
|
| 123 |
+
|
| 124 |
+
# Step 5: Generate summary
|
| 125 |
+
status_text.text("📋 Generating summary...")
|
| 126 |
+
summary = ai_analyzer.generate_summary(text, document_type)
|
| 127 |
+
|
| 128 |
+
# Step 6: Add to vector store
|
| 129 |
+
status_text.text("💾 Storing document for search...")
|
| 130 |
+
doc_id = generate_document_id()
|
| 131 |
+
vector_store.add_document(
|
| 132 |
+
document_id=doc_id,
|
| 133 |
+
text=text,
|
| 134 |
+
metadata={
|
| 135 |
+
"filename": uploaded_file.name,
|
| 136 |
+
"document_type": document_type.value,
|
| 137 |
+
"upload_date": time.strftime("%Y-%m-%d %H:%M:%S"),
|
| 138 |
+
},
|
| 139 |
+
)
|
| 140 |
+
|
| 141 |
+
progress_bar.progress(100)
|
| 142 |
+
|
| 143 |
+
# Complete
|
| 144 |
+
status_text.text("✅ Analysis complete!")
|
| 145 |
+
time.sleep(1)
|
| 146 |
+
progress_bar.empty()
|
| 147 |
+
status_text.empty()
|
| 148 |
+
|
| 149 |
+
# Store results in session state
|
| 150 |
+
st.session_state.current_document = {
|
| 151 |
+
"id": doc_id,
|
| 152 |
+
"filename": uploaded_file.name,
|
| 153 |
+
"document_type": document_type.value,
|
| 154 |
+
"original_text": text,
|
| 155 |
+
"simplified_text": simplified_data.get("simplified_text", ""),
|
| 156 |
+
"summary": summary,
|
| 157 |
+
"risk_data": risk_data,
|
| 158 |
+
"key_points": simplified_data.get("key_points", []),
|
| 159 |
+
"jargon_definitions": simplified_data.get("jargon_definitions", {}),
|
| 160 |
+
"analysis_timestamp": time.time(),
|
| 161 |
+
"file_size": len(file_content),
|
| 162 |
+
}
|
| 163 |
+
|
| 164 |
+
# Add to documents library
|
| 165 |
+
if "documents_library" not in st.session_state:
|
| 166 |
+
st.session_state.documents_library = []
|
| 167 |
+
|
| 168 |
+
st.session_state.documents_library.append(
|
| 169 |
+
{
|
| 170 |
+
"id": doc_id,
|
| 171 |
+
"filename": uploaded_file.name,
|
| 172 |
+
"document_type": document_type.value,
|
| 173 |
+
"upload_date": time.strftime("%Y-%m-%d %H:%M:%S"),
|
| 174 |
+
"file_size": len(file_content),
|
| 175 |
+
"risk_score": len(risk_data.get("risk_factors", []))
|
| 176 |
+
* 10, # Simple risk score
|
| 177 |
+
}
|
| 178 |
+
)
|
| 179 |
+
|
| 180 |
+
# Log the upload
|
| 181 |
+
log_document_upload(uploaded_file.name, len(file_content))
|
| 182 |
+
|
| 183 |
+
# Show success and redirect to analysis page
|
| 184 |
+
st.success("🎉 Document analysis completed! Redirecting to results...")
|
| 185 |
+
|
| 186 |
+
# Set page state for redirection
|
| 187 |
+
st.session_state.page = "📊 Analysis"
|
| 188 |
+
|
| 189 |
+
time.sleep(2)
|
| 190 |
+
st.rerun()
|
| 191 |
+
|
| 192 |
+
except Exception as e:
|
| 193 |
+
st.error(f"❌ Error processing document: {str(e)}")
|
| 194 |
+
progress_bar.empty()
|
| 195 |
+
status_text.empty()
|
| 196 |
+
|
| 197 |
+
|
| 198 |
+
def load_sample_document_from_file(filename: str):
|
| 199 |
+
"""Load an actual sample document from the sample directory."""
|
| 200 |
+
try:
|
| 201 |
+
sample_path = os.path.join("./sample", filename)
|
| 202 |
+
|
| 203 |
+
if not os.path.exists(sample_path):
|
| 204 |
+
st.error(f"❌ Sample file not found: {filename}")
|
| 205 |
+
return
|
| 206 |
+
|
| 207 |
+
# Read the file
|
| 208 |
+
with open(sample_path, 'rb') as f:
|
| 209 |
+
file_content = f.read()
|
| 210 |
+
|
| 211 |
+
# Create a mock uploaded file object
|
| 212 |
+
class MockUploadedFile:
|
| 213 |
+
def __init__(self, content, name):
|
| 214 |
+
self._content = content
|
| 215 |
+
self.name = name
|
| 216 |
+
|
| 217 |
+
def getvalue(self):
|
| 218 |
+
return self._content
|
| 219 |
+
|
| 220 |
+
mock_file = MockUploadedFile(file_content, filename)
|
| 221 |
+
|
| 222 |
+
st.success(f"📄 Loading sample document: **{filename}**")
|
| 223 |
+
|
| 224 |
+
# Process the sample document
|
| 225 |
+
process_uploaded_document(mock_file)
|
| 226 |
+
|
| 227 |
+
except Exception as e:
|
| 228 |
+
st.error(f"❌ Error loading sample document: {str(e)}")
|
| 229 |
+
|
| 230 |
+
|
| 231 |
+
def load_sample_document(doc_type: str):
|
| 232 |
+
"""Load a sample document for demonstration."""
|
| 233 |
+
sample_docs = {
|
| 234 |
+
"rental": {
|
| 235 |
+
"filename": "sample_rental_agreement.pdf",
|
| 236 |
+
"type": "rental",
|
| 237 |
+
"text": """
|
| 238 |
+
RESIDENTIAL LEASE AGREEMENT
|
| 239 |
+
|
| 240 |
+
This Lease Agreement is entered into between John Smith (Landlord) and Jane Doe (Tenant)
|
| 241 |
+
for the property located at 123 Main Street, Mumbai, Maharashtra.
|
| 242 |
+
|
| 243 |
+
RENT: Tenant agrees to pay Rs. 25,000 per month, due on the 1st of each month.
|
| 244 |
+
Late payments will incur a penalty of Rs. 1,000 per day.
|
| 245 |
+
|
| 246 |
+
SECURITY DEPOSIT: Tenant shall pay a security deposit of Rs. 75,000, which is
|
| 247 |
+
non-refundable except for damage assessment.
|
| 248 |
+
|
| 249 |
+
TERMINATION: Either party may terminate this lease with 30 days written notice.
|
| 250 |
+
Early termination by Tenant results in forfeiture of security deposit.
|
| 251 |
+
|
| 252 |
+
MAINTENANCE: Tenant is responsible for all repairs and maintenance, including
|
| 253 |
+
structural repairs, regardless of cause.
|
| 254 |
+
|
| 255 |
+
The property is leased "as-is" with no warranties. Landlord is not liable for
|
| 256 |
+
any damages or injuries occurring on the premises.
|
| 257 |
+
""",
|
| 258 |
+
},
|
| 259 |
+
"loan": {
|
| 260 |
+
"filename": "sample_loan_agreement.pdf",
|
| 261 |
+
"type": "loan",
|
| 262 |
+
"text": """
|
| 263 |
+
PERSONAL LOAN AGREEMENT
|
| 264 |
+
|
| 265 |
+
Borrower: Rajesh Kumar
|
| 266 |
+
Lender: QuickCash Financial Services Pvt Ltd
|
| 267 |
+
Principal Amount: Rs. 2,00,000
|
| 268 |
+
|
| 269 |
+
INTEREST RATE: 24% per annum (APR 28.5% including processing fees)
|
| 270 |
+
|
| 271 |
+
REPAYMENT: 24 monthly installments of Rs. 12,500 each
|
| 272 |
+
Total repayment amount: Rs. 3,00,000
|
| 273 |
+
|
| 274 |
+
LATE PAYMENT PENALTY: Rs. 500 per day for any late payment
|
| 275 |
+
|
| 276 |
+
DEFAULT: If payment is late by more than 7 days, the entire remaining
|
| 277 |
+
balance becomes immediately due and payable.
|
| 278 |
+
|
| 279 |
+
COLLATERAL: Borrower pledges gold ornaments worth Rs. 2,50,000 as security.
|
| 280 |
+
Lender may seize collateral immediately upon default.
|
| 281 |
+
|
| 282 |
+
ARBITRATION: All disputes shall be resolved through binding arbitration.
|
| 283 |
+
Borrower waives right to jury trial.
|
| 284 |
+
|
| 285 |
+
Processing fee: Rs. 10,000 (non-refundable)
|
| 286 |
+
Documentation charges: Rs. 5,000
|
| 287 |
+
""",
|
| 288 |
+
},
|
| 289 |
+
"employment": {
|
| 290 |
+
"filename": "sample_employment_contract.docx", # Changed to DOCX
|
| 291 |
+
"type": "employment",
|
| 292 |
+
"text": """
|
| 293 |
+
EMPLOYMENT CONTRACT
|
| 294 |
+
|
| 295 |
+
Employee: Priya Sharma
|
| 296 |
+
Company: TechCorp India Private Limited
|
| 297 |
+
Position: Software Developer
|
| 298 |
+
Start Date: January 1, 2024
|
| 299 |
+
|
| 300 |
+
SALARY: Rs. 8,00,000 per annum, payable monthly
|
| 301 |
+
|
| 302 |
+
WORKING HOURS: 45 hours per week, including mandatory weekend work when required
|
| 303 |
+
|
| 304 |
+
NON-COMPETE: Employee shall not work for any competing company for 2 years
|
| 305 |
+
after termination, within India or globally.
|
| 306 |
+
|
| 307 |
+
CONFIDENTIALITY: Employee agrees to maintain strict confidentiality of all
|
| 308 |
+
company information indefinitely, even after termination.
|
| 309 |
+
|
| 310 |
+
TERMINATION: Company may terminate employment at any time without cause or notice.
|
| 311 |
+
Employee must provide 90 days notice to resign.
|
| 312 |
+
|
| 313 |
+
NO MOONLIGHTING: Employee shall not engage in any other work or business
|
| 314 |
+
activities during employment.
|
| 315 |
+
|
| 316 |
+
INTELLECTUAL PROPERTY: All work created by employee belongs entirely to company,
|
| 317 |
+
including personal projects done outside work hours.
|
| 318 |
+
""",
|
| 319 |
+
},
|
| 320 |
+
}
|
| 321 |
+
|
| 322 |
+
if doc_type in sample_docs:
|
| 323 |
+
sample = sample_docs[doc_type]
|
| 324 |
+
from ..utils.helpers import generate_document_id
|
| 325 |
+
|
| 326 |
+
# Store in session state
|
| 327 |
+
doc_id = generate_document_id()
|
| 328 |
+
st.session_state.current_document = {
|
| 329 |
+
"id": doc_id,
|
| 330 |
+
"filename": sample["filename"],
|
| 331 |
+
"document_type": sample["type"],
|
| 332 |
+
"original_text": sample["text"],
|
| 333 |
+
"is_sample": True,
|
| 334 |
+
}
|
| 335 |
+
|
| 336 |
+
st.success(f"📄 Loaded sample {doc_type} document. Processing...")
|
| 337 |
+
|
| 338 |
+
# Simulate processing for demo
|
| 339 |
+
with st.spinner("Analyzing sample document..."):
|
| 340 |
+
time.sleep(2)
|
| 341 |
+
|
| 342 |
+
st.rerun()
|
src/services/__init__.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
# src/services/__init__.py
|
src/services/ai_analyzer.py
ADDED
|
@@ -0,0 +1,378 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from typing import List, Dict, Any, Optional
|
| 2 |
+
from langchain_google_genai import ChatGoogleGenerativeAI
|
| 3 |
+
from langchain.prompts import PromptTemplate
|
| 4 |
+
from langchain.chains import LLMChain
|
| 5 |
+
import json
|
| 6 |
+
import time
|
| 7 |
+
|
| 8 |
+
from src.utils.config import config
|
| 9 |
+
from src.utils.logger import log_error, log_analysis_start, log_analysis_complete
|
| 10 |
+
from src.models.document import (
|
| 11 |
+
DocumentType,
|
| 12 |
+
RiskLevel,
|
| 13 |
+
RiskCategory,
|
| 14 |
+
RiskFactor,
|
| 15 |
+
ClausePosition,
|
| 16 |
+
)
|
| 17 |
+
from src.utils.helpers import (
|
| 18 |
+
calculate_risk_score,
|
| 19 |
+
extract_key_dates,
|
| 20 |
+
extract_financial_terms,
|
| 21 |
+
)
|
| 22 |
+
|
| 23 |
+
|
| 24 |
+
class AIAnalyzer:
|
| 25 |
+
def __init__(self):
|
| 26 |
+
self.llm = ChatGoogleGenerativeAI(
|
| 27 |
+
model=config.CHAT_MODEL,
|
| 28 |
+
google_api_key=config.GOOGLE_API_KEY,
|
| 29 |
+
temperature=config.TEMPERATURE,
|
| 30 |
+
max_output_tokens=config.MAX_TOKENS,
|
| 31 |
+
)
|
| 32 |
+
|
| 33 |
+
# Initialize prompt templates
|
| 34 |
+
self._setup_prompts()
|
| 35 |
+
|
| 36 |
+
def _setup_prompts(self):
|
| 37 |
+
"""Set up prompt templates for different analysis tasks."""
|
| 38 |
+
|
| 39 |
+
# Risk analysis prompt
|
| 40 |
+
self.risk_analysis_prompt = PromptTemplate(
|
| 41 |
+
input_variables=["text", "document_type"],
|
| 42 |
+
template="""
|
| 43 |
+
Analyze the following {document_type} document for potential risks and problematic clauses.
|
| 44 |
+
|
| 45 |
+
Document text:
|
| 46 |
+
{text}
|
| 47 |
+
|
| 48 |
+
Please identify:
|
| 49 |
+
1. High-risk clauses that could be problematic for the signer
|
| 50 |
+
2. Financial risks (hidden fees, penalties, high costs)
|
| 51 |
+
3. Commitment risks (long-term obligations, difficult exit clauses)
|
| 52 |
+
4. Rights risks (waived protections, limited recourse)
|
| 53 |
+
|
| 54 |
+
For each risk, provide:
|
| 55 |
+
- The exact clause text (keep it concise, max 100 words)
|
| 56 |
+
- Risk category (financial, commitment, rights, or standard)
|
| 57 |
+
- Severity level (low, medium, high, critical)
|
| 58 |
+
- Clear explanation of why it's risky
|
| 59 |
+
- Suggestion for improvement
|
| 60 |
+
|
| 61 |
+
IMPORTANT: Return ONLY valid JSON in the exact format below. Do not include any explanatory text before or after the JSON:
|
| 62 |
+
|
| 63 |
+
{{
|
| 64 |
+
"risk_factors": [
|
| 65 |
+
{{
|
| 66 |
+
"clause_text": "exact text from document",
|
| 67 |
+
"category": "financial",
|
| 68 |
+
"severity": "medium",
|
| 69 |
+
"explanation": "why this is risky",
|
| 70 |
+
"suggestion": "how to improve or what to watch for"
|
| 71 |
+
}}
|
| 72 |
+
],
|
| 73 |
+
"overall_assessment": "brief summary of document risk level"
|
| 74 |
+
}}
|
| 75 |
+
""",
|
| 76 |
+
)
|
| 77 |
+
|
| 78 |
+
# Plain language translation prompt
|
| 79 |
+
self.simplification_prompt = PromptTemplate(
|
| 80 |
+
input_variables=["text", "document_type"],
|
| 81 |
+
template="""
|
| 82 |
+
Convert the following legal text into plain, simple English that anyone can understand.
|
| 83 |
+
|
| 84 |
+
Document type: {document_type}
|
| 85 |
+
Legal text: {text}
|
| 86 |
+
|
| 87 |
+
Rules for simplification:
|
| 88 |
+
1. Use everyday language instead of legal jargon
|
| 89 |
+
2. Break down complex sentences into shorter ones
|
| 90 |
+
3. Explain what actions or obligations mean in practical terms
|
| 91 |
+
4. Keep the essential meaning intact
|
| 92 |
+
5. Use "you" to make it personal and clear
|
| 93 |
+
6. Focus on the most important points
|
| 94 |
+
|
| 95 |
+
IMPORTANT: Return ONLY valid JSON in the exact format below. Do not include any explanatory text:
|
| 96 |
+
|
| 97 |
+
{{
|
| 98 |
+
"simplified_text": "the simplified version in plain English",
|
| 99 |
+
"key_points": ["main point 1", "main point 2", "main point 3"],
|
| 100 |
+
"jargon_definitions": {{"legal term": "simple definition"}}
|
| 101 |
+
}}
|
| 102 |
+
""",
|
| 103 |
+
)
|
| 104 |
+
|
| 105 |
+
# Document summary prompt
|
| 106 |
+
self.summary_prompt = PromptTemplate(
|
| 107 |
+
input_variables=["text", "document_type"],
|
| 108 |
+
template="""
|
| 109 |
+
Create a concise summary of this {document_type} document.
|
| 110 |
+
|
| 111 |
+
Document: {text}
|
| 112 |
+
|
| 113 |
+
Provide a summary that includes:
|
| 114 |
+
1. What type of agreement this is
|
| 115 |
+
2. Who are the main parties involved
|
| 116 |
+
3. Key obligations for each party
|
| 117 |
+
4. Important terms (dates, amounts, conditions)
|
| 118 |
+
5. Major benefits and risks
|
| 119 |
+
|
| 120 |
+
Keep it under 200 words and focus on what matters most to the person signing.
|
| 121 |
+
""",
|
| 122 |
+
)
|
| 123 |
+
|
| 124 |
+
def analyze_document_risk(
|
| 125 |
+
self, text: str, document_type: DocumentType
|
| 126 |
+
) -> Dict[str, Any]:
|
| 127 |
+
"""Analyze document for risks and problematic clauses."""
|
| 128 |
+
try:
|
| 129 |
+
log_analysis_start("risk_analysis")
|
| 130 |
+
start_time = time.time()
|
| 131 |
+
|
| 132 |
+
# Create and run the risk analysis chain
|
| 133 |
+
risk_chain = LLMChain(llm=self.llm, prompt=self.risk_analysis_prompt)
|
| 134 |
+
|
| 135 |
+
result = risk_chain.run(
|
| 136 |
+
text=text[:4000], # Limit text size for API
|
| 137 |
+
document_type=document_type.value,
|
| 138 |
+
)
|
| 139 |
+
|
| 140 |
+
# Parse JSON response with better error handling
|
| 141 |
+
try:
|
| 142 |
+
# Try to extract JSON from the response if it's wrapped in markdown
|
| 143 |
+
if "```json" in result:
|
| 144 |
+
json_start = result.find("```json") + 7
|
| 145 |
+
json_end = result.find("```", json_start)
|
| 146 |
+
if json_end != -1:
|
| 147 |
+
result = result[json_start:json_end].strip()
|
| 148 |
+
|
| 149 |
+
# Clean up the result string
|
| 150 |
+
result = result.strip()
|
| 151 |
+
if result.startswith("```") and result.endswith("```"):
|
| 152 |
+
result = result[3:-3].strip()
|
| 153 |
+
|
| 154 |
+
risk_data = json.loads(result)
|
| 155 |
+
|
| 156 |
+
# Validate the structure
|
| 157 |
+
if not isinstance(risk_data, dict):
|
| 158 |
+
raise ValueError("Response is not a dictionary")
|
| 159 |
+
|
| 160 |
+
if "risk_factors" not in risk_data:
|
| 161 |
+
risk_data["risk_factors"] = []
|
| 162 |
+
|
| 163 |
+
if "overall_assessment" not in risk_data:
|
| 164 |
+
risk_data["overall_assessment"] = "Analysis completed"
|
| 165 |
+
|
| 166 |
+
except (json.JSONDecodeError, ValueError) as e:
|
| 167 |
+
log_error(f"JSON parsing error in risk analysis: {str(e)}")
|
| 168 |
+
log_error(f"Raw AI response: {result[:500]}...")
|
| 169 |
+
|
| 170 |
+
# Try to extract risk information manually if JSON fails
|
| 171 |
+
risk_data = self._extract_risk_fallback(result, text)
|
| 172 |
+
|
| 173 |
+
processing_time = time.time() - start_time
|
| 174 |
+
log_analysis_complete("risk_analysis", processing_time)
|
| 175 |
+
|
| 176 |
+
return risk_data
|
| 177 |
+
|
| 178 |
+
except Exception as e:
|
| 179 |
+
log_error(f"Error in risk analysis: {str(e)}")
|
| 180 |
+
return {"risk_factors": [], "overall_assessment": "Analysis failed"}
|
| 181 |
+
|
| 182 |
+
def _extract_risk_fallback(
|
| 183 |
+
self, response: str, original_text: str
|
| 184 |
+
) -> Dict[str, Any]:
|
| 185 |
+
"""Fallback method to extract risk information when JSON parsing fails."""
|
| 186 |
+
try:
|
| 187 |
+
risk_factors = []
|
| 188 |
+
|
| 189 |
+
# Look for common risk indicators in the response
|
| 190 |
+
risk_keywords = [
|
| 191 |
+
"risk",
|
| 192 |
+
"problematic",
|
| 193 |
+
"concern",
|
| 194 |
+
"warning",
|
| 195 |
+
"caution",
|
| 196 |
+
"penalty",
|
| 197 |
+
"fee",
|
| 198 |
+
]
|
| 199 |
+
sentences = response.split(".")
|
| 200 |
+
|
| 201 |
+
for i, sentence in enumerate(sentences):
|
| 202 |
+
sentence = sentence.strip()
|
| 203 |
+
if (
|
| 204 |
+
any(
|
| 205 |
+
keyword.lower() in sentence.lower() for keyword in risk_keywords
|
| 206 |
+
)
|
| 207 |
+
and len(sentence) > 20
|
| 208 |
+
):
|
| 209 |
+
risk_factors.append(
|
| 210 |
+
{
|
| 211 |
+
"clause_text": sentence[:200], # Limit length
|
| 212 |
+
"category": "standard",
|
| 213 |
+
"severity": "medium",
|
| 214 |
+
"explanation": "Potential risk identified by text analysis",
|
| 215 |
+
"suggestion": "Review this clause carefully with legal counsel",
|
| 216 |
+
}
|
| 217 |
+
)
|
| 218 |
+
|
| 219 |
+
if len(risk_factors) >= 5: # Limit to 5 fallback risks
|
| 220 |
+
break
|
| 221 |
+
|
| 222 |
+
return {
|
| 223 |
+
"risk_factors": risk_factors,
|
| 224 |
+
"overall_assessment": "Risk analysis completed with limited parsing. Please review manually.",
|
| 225 |
+
}
|
| 226 |
+
|
| 227 |
+
except Exception as e:
|
| 228 |
+
log_error(f"Error in fallback risk extraction: {str(e)}")
|
| 229 |
+
return {
|
| 230 |
+
"risk_factors": [],
|
| 231 |
+
"overall_assessment": "Unable to analyze risks - please try again",
|
| 232 |
+
}
|
| 233 |
+
|
| 234 |
+
def simplify_text(self, text: str, document_type: DocumentType) -> Dict[str, Any]:
|
| 235 |
+
"""Convert legal text to plain language."""
|
| 236 |
+
try:
|
| 237 |
+
simplification_chain = LLMChain(
|
| 238 |
+
llm=self.llm, prompt=self.simplification_prompt
|
| 239 |
+
)
|
| 240 |
+
|
| 241 |
+
result = simplification_chain.run(
|
| 242 |
+
text=text[:3000], document_type=document_type.value # Limit text size
|
| 243 |
+
)
|
| 244 |
+
|
| 245 |
+
# Parse JSON response with better error handling
|
| 246 |
+
try:
|
| 247 |
+
# Try to extract JSON from the response if it's wrapped in markdown
|
| 248 |
+
if "```json" in result:
|
| 249 |
+
json_start = result.find("```json") + 7
|
| 250 |
+
json_end = result.find("```", json_start)
|
| 251 |
+
if json_end != -1:
|
| 252 |
+
result = result[json_start:json_end].strip()
|
| 253 |
+
|
| 254 |
+
# Clean up the result string
|
| 255 |
+
result = result.strip()
|
| 256 |
+
if result.startswith("```") and result.endswith("```"):
|
| 257 |
+
result = result[3:-3].strip()
|
| 258 |
+
|
| 259 |
+
simplified_data = json.loads(result)
|
| 260 |
+
|
| 261 |
+
# Validate the structure
|
| 262 |
+
if not isinstance(simplified_data, dict):
|
| 263 |
+
raise ValueError("Response is not a dictionary")
|
| 264 |
+
|
| 265 |
+
# Ensure required keys exist
|
| 266 |
+
if "simplified_text" not in simplified_data:
|
| 267 |
+
simplified_data["simplified_text"] = text[:500] + "..."
|
| 268 |
+
if "key_points" not in simplified_data:
|
| 269 |
+
simplified_data["key_points"] = ["Unable to extract key points"]
|
| 270 |
+
if "jargon_definitions" not in simplified_data:
|
| 271 |
+
simplified_data["jargon_definitions"] = {}
|
| 272 |
+
|
| 273 |
+
except (json.JSONDecodeError, ValueError) as e:
|
| 274 |
+
log_error(f"JSON parsing error in text simplification: {str(e)}")
|
| 275 |
+
simplified_data = {
|
| 276 |
+
"simplified_text": text[:500]
|
| 277 |
+
+ "... (Full simplification unavailable)",
|
| 278 |
+
"key_points": ["Document content requires legal review"],
|
| 279 |
+
"jargon_definitions": {},
|
| 280 |
+
}
|
| 281 |
+
|
| 282 |
+
return simplified_data
|
| 283 |
+
|
| 284 |
+
except Exception as e:
|
| 285 |
+
log_error(f"Error in text simplification: {str(e)}")
|
| 286 |
+
return {
|
| 287 |
+
"simplified_text": text[:500]
|
| 288 |
+
+ "...", # Return truncated original if simplification fails
|
| 289 |
+
"key_points": ["Simplification failed - showing original text"],
|
| 290 |
+
"jargon_definitions": {},
|
| 291 |
+
}
|
| 292 |
+
|
| 293 |
+
def generate_summary(self, text: str, document_type: DocumentType) -> str:
|
| 294 |
+
"""Generate a concise document summary."""
|
| 295 |
+
try:
|
| 296 |
+
summary_chain = LLMChain(llm=self.llm, prompt=self.summary_prompt)
|
| 297 |
+
|
| 298 |
+
summary = summary_chain.run(
|
| 299 |
+
text=text[:3000], document_type=document_type.value # Limit text size
|
| 300 |
+
)
|
| 301 |
+
|
| 302 |
+
return summary.strip()
|
| 303 |
+
|
| 304 |
+
except Exception as e:
|
| 305 |
+
log_error(f"Error generating summary: {str(e)}")
|
| 306 |
+
return "Unable to generate summary"
|
| 307 |
+
|
| 308 |
+
def answer_question(
|
| 309 |
+
self, question: str, document_text: str, document_type: DocumentType
|
| 310 |
+
) -> str:
|
| 311 |
+
"""Answer a question about the document."""
|
| 312 |
+
try:
|
| 313 |
+
qa_prompt = PromptTemplate(
|
| 314 |
+
input_variables=["question", "document", "doc_type"],
|
| 315 |
+
template="""
|
| 316 |
+
Answer the following question about this {doc_type} document.
|
| 317 |
+
Be specific and cite relevant parts of the document.
|
| 318 |
+
|
| 319 |
+
Document: {document}
|
| 320 |
+
|
| 321 |
+
Question: {question}
|
| 322 |
+
|
| 323 |
+
Provide a clear, helpful answer based only on the document content.
|
| 324 |
+
If the answer isn't in the document, say so clearly.
|
| 325 |
+
""",
|
| 326 |
+
)
|
| 327 |
+
|
| 328 |
+
qa_chain = LLMChain(llm=self.llm, prompt=qa_prompt)
|
| 329 |
+
|
| 330 |
+
answer = qa_chain.run(
|
| 331 |
+
question=question,
|
| 332 |
+
document=document_text[:3000], # Limit context size
|
| 333 |
+
doc_type=document_type.value,
|
| 334 |
+
)
|
| 335 |
+
|
| 336 |
+
return answer.strip()
|
| 337 |
+
|
| 338 |
+
except Exception as e:
|
| 339 |
+
log_error(f"Error answering question: {str(e)}")
|
| 340 |
+
return "Sorry, I couldn't process your question. Please try again."
|
| 341 |
+
|
| 342 |
+
def create_risk_factors(
|
| 343 |
+
self, risk_data: Dict[str, Any], text: str
|
| 344 |
+
) -> List[RiskFactor]:
|
| 345 |
+
"""Convert AI analysis results to RiskFactor objects."""
|
| 346 |
+
risk_factors = []
|
| 347 |
+
|
| 348 |
+
for factor_data in risk_data.get("risk_factors", []):
|
| 349 |
+
try:
|
| 350 |
+
# Find clause position in text
|
| 351 |
+
clause_text = factor_data.get("clause_text", "")
|
| 352 |
+
position = None
|
| 353 |
+
|
| 354 |
+
if clause_text:
|
| 355 |
+
start_index = text.find(clause_text)
|
| 356 |
+
if start_index != -1:
|
| 357 |
+
position = ClausePosition(
|
| 358 |
+
start_index=start_index,
|
| 359 |
+
end_index=start_index + len(clause_text),
|
| 360 |
+
)
|
| 361 |
+
|
| 362 |
+
risk_factor = RiskFactor(
|
| 363 |
+
id=f"risk_{len(risk_factors) + 1}",
|
| 364 |
+
clause_text=clause_text,
|
| 365 |
+
category=RiskCategory(factor_data.get("category", "standard")),
|
| 366 |
+
severity=RiskLevel(factor_data.get("severity", "low")),
|
| 367 |
+
explanation=factor_data.get("explanation", ""),
|
| 368 |
+
suggestion=factor_data.get("suggestion"),
|
| 369 |
+
position=position,
|
| 370 |
+
)
|
| 371 |
+
|
| 372 |
+
risk_factors.append(risk_factor)
|
| 373 |
+
|
| 374 |
+
except Exception as e:
|
| 375 |
+
log_error(f"Error creating risk factor: {str(e)}")
|
| 376 |
+
continue
|
| 377 |
+
|
| 378 |
+
return risk_factors
|
src/services/document_processor.py
ADDED
|
@@ -0,0 +1,202 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from typing import BinaryIO, Optional
|
| 2 |
+
from langchain_community.document_loaders import PyPDFLoader, TextLoader
|
| 3 |
+
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
| 4 |
+
import tempfile
|
| 5 |
+
import os
|
| 6 |
+
from docx import Document
|
| 7 |
+
|
| 8 |
+
from src.utils.config import config
|
| 9 |
+
from src.utils.logger import log_error
|
| 10 |
+
from src.models.document import DocumentType
|
| 11 |
+
|
| 12 |
+
|
| 13 |
+
class DocumentProcessor:
|
| 14 |
+
def __init__(self):
|
| 15 |
+
self.text_splitter = RecursiveCharacterTextSplitter(
|
| 16 |
+
chunk_size=1000,
|
| 17 |
+
chunk_overlap=200,
|
| 18 |
+
length_function=len,
|
| 19 |
+
)
|
| 20 |
+
|
| 21 |
+
def extract_text_from_pdf(self, file_content: bytes, filename: str) -> str:
|
| 22 |
+
"""Extract text from PDF using LangChain PyPDFLoader."""
|
| 23 |
+
try:
|
| 24 |
+
# Save uploaded file to temporary location
|
| 25 |
+
with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as temp_file:
|
| 26 |
+
temp_file.write(file_content)
|
| 27 |
+
temp_file_path = temp_file.name
|
| 28 |
+
|
| 29 |
+
# Use LangChain PyPDFLoader
|
| 30 |
+
loader = PyPDFLoader(temp_file_path)
|
| 31 |
+
documents = loader.load()
|
| 32 |
+
|
| 33 |
+
# Combine all pages
|
| 34 |
+
text = "\n".join([doc.page_content for doc in documents])
|
| 35 |
+
|
| 36 |
+
# Clean up temporary file
|
| 37 |
+
os.unlink(temp_file_path)
|
| 38 |
+
|
| 39 |
+
return text
|
| 40 |
+
|
| 41 |
+
except Exception as e:
|
| 42 |
+
log_error(f"Error extracting text from PDF: {str(e)}")
|
| 43 |
+
return ""
|
| 44 |
+
|
| 45 |
+
def extract_text_from_txt(self, file_content: bytes, filename: str) -> str:
|
| 46 |
+
"""Extract text from TXT file."""
|
| 47 |
+
try:
|
| 48 |
+
# Try different encodings
|
| 49 |
+
encodings = ["utf-8", "utf-16", "latin-1", "cp1252"]
|
| 50 |
+
|
| 51 |
+
for encoding in encodings:
|
| 52 |
+
try:
|
| 53 |
+
text = file_content.decode(encoding)
|
| 54 |
+
return text
|
| 55 |
+
except UnicodeDecodeError:
|
| 56 |
+
continue
|
| 57 |
+
|
| 58 |
+
# If all encodings fail, use utf-8 with error handling
|
| 59 |
+
return file_content.decode("utf-8", errors="ignore")
|
| 60 |
+
|
| 61 |
+
except Exception as e:
|
| 62 |
+
log_error(f"Error extracting text from TXT: {str(e)}")
|
| 63 |
+
return ""
|
| 64 |
+
|
| 65 |
+
def extract_text_from_docx(self, file_content: bytes, filename: str) -> str:
|
| 66 |
+
"""Extract text from DOCX file."""
|
| 67 |
+
try:
|
| 68 |
+
# Save uploaded file to temporary location
|
| 69 |
+
with tempfile.NamedTemporaryFile(delete=False, suffix=".docx") as temp_file:
|
| 70 |
+
temp_file.write(file_content)
|
| 71 |
+
temp_file_path = temp_file.name
|
| 72 |
+
|
| 73 |
+
# Use python-docx to extract text
|
| 74 |
+
from docx import Document as DocxDocument
|
| 75 |
+
|
| 76 |
+
doc = DocxDocument(temp_file_path)
|
| 77 |
+
|
| 78 |
+
# Extract text from all paragraphs
|
| 79 |
+
text_parts = []
|
| 80 |
+
for paragraph in doc.paragraphs:
|
| 81 |
+
if paragraph.text.strip():
|
| 82 |
+
text_parts.append(paragraph.text)
|
| 83 |
+
|
| 84 |
+
# Extract text from tables
|
| 85 |
+
for table in doc.tables:
|
| 86 |
+
for row in table.rows:
|
| 87 |
+
for cell in row.cells:
|
| 88 |
+
if cell.text.strip():
|
| 89 |
+
text_parts.append(cell.text)
|
| 90 |
+
|
| 91 |
+
# Clean up temporary file
|
| 92 |
+
os.unlink(temp_file_path)
|
| 93 |
+
|
| 94 |
+
# Join all text parts
|
| 95 |
+
full_text = "\n".join(text_parts)
|
| 96 |
+
return full_text
|
| 97 |
+
|
| 98 |
+
except Exception as e:
|
| 99 |
+
log_error(f"Error extracting text from DOCX: {str(e)}")
|
| 100 |
+
return ""
|
| 101 |
+
|
| 102 |
+
def extract_text(self, file_content: bytes, filename: str) -> str:
|
| 103 |
+
"""Extract text based on file extension."""
|
| 104 |
+
file_ext = filename.lower().split(".")[-1]
|
| 105 |
+
|
| 106 |
+
if file_ext == "pdf":
|
| 107 |
+
return self.extract_text_from_pdf(file_content, filename)
|
| 108 |
+
elif file_ext == "txt":
|
| 109 |
+
return self.extract_text_from_txt(file_content, filename)
|
| 110 |
+
elif file_ext in ["docx", "doc"]:
|
| 111 |
+
return self.extract_text_from_docx(file_content, filename)
|
| 112 |
+
else:
|
| 113 |
+
log_error(f"Unsupported file type: {file_ext}")
|
| 114 |
+
return ""
|
| 115 |
+
|
| 116 |
+
def split_text_into_chunks(self, text: str) -> list:
|
| 117 |
+
"""Split text into manageable chunks for processing."""
|
| 118 |
+
return self.text_splitter.split_text(text)
|
| 119 |
+
|
| 120 |
+
def detect_document_type(self, text: str) -> DocumentType:
|
| 121 |
+
"""Detect document type based on content."""
|
| 122 |
+
text_lower = text.lower()
|
| 123 |
+
|
| 124 |
+
# Rental agreement keywords
|
| 125 |
+
rental_keywords = [
|
| 126 |
+
"lease",
|
| 127 |
+
"rent",
|
| 128 |
+
"tenant",
|
| 129 |
+
"landlord",
|
| 130 |
+
"property",
|
| 131 |
+
"premises",
|
| 132 |
+
"deposit",
|
| 133 |
+
]
|
| 134 |
+
|
| 135 |
+
# Loan agreement keywords
|
| 136 |
+
loan_keywords = [
|
| 137 |
+
"loan",
|
| 138 |
+
"borrow",
|
| 139 |
+
"lender",
|
| 140 |
+
"principal",
|
| 141 |
+
"interest",
|
| 142 |
+
"repayment",
|
| 143 |
+
"credit",
|
| 144 |
+
]
|
| 145 |
+
|
| 146 |
+
# Employment keywords
|
| 147 |
+
employment_keywords = [
|
| 148 |
+
"employment",
|
| 149 |
+
"employee",
|
| 150 |
+
"employer",
|
| 151 |
+
"salary",
|
| 152 |
+
"wages",
|
| 153 |
+
"position",
|
| 154 |
+
"job",
|
| 155 |
+
]
|
| 156 |
+
|
| 157 |
+
# NDA keywords
|
| 158 |
+
nda_keywords = ["confidential", "non-disclosure", "proprietary", "trade secret"]
|
| 159 |
+
|
| 160 |
+
# Service agreement keywords
|
| 161 |
+
service_keywords = [
|
| 162 |
+
"service",
|
| 163 |
+
"provider",
|
| 164 |
+
"client",
|
| 165 |
+
"deliverables",
|
| 166 |
+
"scope of work",
|
| 167 |
+
]
|
| 168 |
+
|
| 169 |
+
# Count keyword matches
|
| 170 |
+
scores = {
|
| 171 |
+
DocumentType.RENTAL: sum(
|
| 172 |
+
1 for keyword in rental_keywords if keyword in text_lower
|
| 173 |
+
),
|
| 174 |
+
DocumentType.LOAN: sum(
|
| 175 |
+
1 for keyword in loan_keywords if keyword in text_lower
|
| 176 |
+
),
|
| 177 |
+
DocumentType.EMPLOYMENT: sum(
|
| 178 |
+
1 for keyword in employment_keywords if keyword in text_lower
|
| 179 |
+
),
|
| 180 |
+
DocumentType.NDA: sum(
|
| 181 |
+
1 for keyword in nda_keywords if keyword in text_lower
|
| 182 |
+
),
|
| 183 |
+
DocumentType.SERVICE: sum(
|
| 184 |
+
1 for keyword in service_keywords if keyword in text_lower
|
| 185 |
+
),
|
| 186 |
+
}
|
| 187 |
+
|
| 188 |
+
# Return type with highest score, or OTHER if no clear match
|
| 189 |
+
if max(scores.values()) > 2:
|
| 190 |
+
return max(scores, key=scores.get)
|
| 191 |
+
else:
|
| 192 |
+
return DocumentType.OTHER
|
| 193 |
+
|
| 194 |
+
def extract_metadata(self, text: str) -> dict:
|
| 195 |
+
"""Extract metadata from document text."""
|
| 196 |
+
metadata = {
|
| 197 |
+
"word_count": len(text.split()),
|
| 198 |
+
"character_count": len(text),
|
| 199 |
+
"estimated_reading_time": len(text.split()) // 200, # Assuming 200 WPM
|
| 200 |
+
}
|
| 201 |
+
|
| 202 |
+
return metadata
|
src/services/vector_store.py
ADDED
|
@@ -0,0 +1,212 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from typing import List, Dict, Any, Optional
|
| 2 |
+
from langchain_google_genai import GoogleGenerativeAIEmbeddings
|
| 3 |
+
from langchain_chroma import Chroma
|
| 4 |
+
from langchain.schema import Document
|
| 5 |
+
import os
|
| 6 |
+
|
| 7 |
+
from src.utils.config import config
|
| 8 |
+
from src.utils.logger import log_error
|
| 9 |
+
from src.models.document import Document as DocModel
|
| 10 |
+
|
| 11 |
+
|
| 12 |
+
class VectorStoreService:
|
| 13 |
+
def __init__(self):
|
| 14 |
+
# Initialize embeddings
|
| 15 |
+
self.embeddings = GoogleGenerativeAIEmbeddings(
|
| 16 |
+
model=config.EMBEDDING_MODEL, google_api_key=config.GOOGLE_API_KEY
|
| 17 |
+
)
|
| 18 |
+
|
| 19 |
+
# Ensure Chroma directory exists
|
| 20 |
+
os.makedirs(config.CHROMA_PERSIST_DIR, exist_ok=True)
|
| 21 |
+
|
| 22 |
+
# Initialize Chroma vector store
|
| 23 |
+
self.vector_store = Chroma(
|
| 24 |
+
persist_directory=config.CHROMA_PERSIST_DIR,
|
| 25 |
+
embedding_function=self.embeddings,
|
| 26 |
+
collection_name="lega_documents",
|
| 27 |
+
)
|
| 28 |
+
|
| 29 |
+
def add_document(
|
| 30 |
+
self, document_id: str, text: str, metadata: Dict[str, Any] = None
|
| 31 |
+
) -> bool:
|
| 32 |
+
"""Add a document to the vector store."""
|
| 33 |
+
try:
|
| 34 |
+
# Create document chunks for better retrieval
|
| 35 |
+
chunks = self._chunk_document(text)
|
| 36 |
+
|
| 37 |
+
documents = []
|
| 38 |
+
metadatas = []
|
| 39 |
+
ids = []
|
| 40 |
+
|
| 41 |
+
for i, chunk in enumerate(chunks):
|
| 42 |
+
chunk_metadata = {
|
| 43 |
+
"document_id": document_id,
|
| 44 |
+
"chunk_id": i,
|
| 45 |
+
"chunk_type": "text",
|
| 46 |
+
**(metadata or {}),
|
| 47 |
+
}
|
| 48 |
+
|
| 49 |
+
documents.append(chunk)
|
| 50 |
+
metadatas.append(chunk_metadata)
|
| 51 |
+
ids.append(f"{document_id}_chunk_{i}")
|
| 52 |
+
|
| 53 |
+
# Add to vector store
|
| 54 |
+
self.vector_store.add_texts(texts=documents, metadatas=metadatas, ids=ids)
|
| 55 |
+
|
| 56 |
+
return True
|
| 57 |
+
|
| 58 |
+
except Exception as e:
|
| 59 |
+
log_error(f"Error adding document to vector store: {str(e)}")
|
| 60 |
+
return False
|
| 61 |
+
|
| 62 |
+
def search_similar_documents(self, query: str, k: int = 5) -> List[Dict[str, Any]]:
|
| 63 |
+
"""Search for similar documents based on query."""
|
| 64 |
+
try:
|
| 65 |
+
results = self.vector_store.similarity_search_with_score(query=query, k=k)
|
| 66 |
+
|
| 67 |
+
formatted_results = []
|
| 68 |
+
for doc, score in results:
|
| 69 |
+
formatted_results.append(
|
| 70 |
+
{
|
| 71 |
+
"content": doc.page_content,
|
| 72 |
+
"metadata": doc.metadata,
|
| 73 |
+
"similarity_score": score,
|
| 74 |
+
}
|
| 75 |
+
)
|
| 76 |
+
|
| 77 |
+
return formatted_results
|
| 78 |
+
|
| 79 |
+
except Exception as e:
|
| 80 |
+
log_error(f"Error searching vector store: {str(e)}")
|
| 81 |
+
return []
|
| 82 |
+
|
| 83 |
+
def search_document_clauses(
|
| 84 |
+
self, document_id: str, query: str, k: int = 3
|
| 85 |
+
) -> List[Dict[str, Any]]:
|
| 86 |
+
"""Search for specific clauses within a document."""
|
| 87 |
+
try:
|
| 88 |
+
# Filter by document_id
|
| 89 |
+
results = self.vector_store.similarity_search_with_score(
|
| 90 |
+
query=query, k=k, filter={"document_id": document_id}
|
| 91 |
+
)
|
| 92 |
+
|
| 93 |
+
formatted_results = []
|
| 94 |
+
for doc, score in results:
|
| 95 |
+
formatted_results.append(
|
| 96 |
+
{
|
| 97 |
+
"content": doc.page_content,
|
| 98 |
+
"metadata": doc.metadata,
|
| 99 |
+
"similarity_score": score,
|
| 100 |
+
}
|
| 101 |
+
)
|
| 102 |
+
|
| 103 |
+
return formatted_results
|
| 104 |
+
|
| 105 |
+
except Exception as e:
|
| 106 |
+
log_error(f"Error searching document clauses: {str(e)}")
|
| 107 |
+
return []
|
| 108 |
+
|
| 109 |
+
def get_document_context(
|
| 110 |
+
self, document_id: str, query: str, max_chunks: int = 5
|
| 111 |
+
) -> str:
|
| 112 |
+
"""Get relevant context from a document for Q&A."""
|
| 113 |
+
try:
|
| 114 |
+
results = self.search_document_clauses(document_id, query, k=max_chunks)
|
| 115 |
+
|
| 116 |
+
# Combine relevant chunks
|
| 117 |
+
context_parts = []
|
| 118 |
+
for result in results:
|
| 119 |
+
if result["similarity_score"] < 0.8: # Only use highly relevant chunks
|
| 120 |
+
context_parts.append(result["content"])
|
| 121 |
+
|
| 122 |
+
return "\n\n".join(context_parts)
|
| 123 |
+
|
| 124 |
+
except Exception as e:
|
| 125 |
+
log_error(f"Error getting document context: {str(e)}")
|
| 126 |
+
return ""
|
| 127 |
+
|
| 128 |
+
def remove_document(self, document_id: str) -> bool:
|
| 129 |
+
"""Remove a document and all its chunks from the vector store."""
|
| 130 |
+
try:
|
| 131 |
+
# Get all chunks for this document
|
| 132 |
+
results = self.vector_store.get(where={"document_id": document_id})
|
| 133 |
+
|
| 134 |
+
if results and results.get("ids"):
|
| 135 |
+
# Delete all chunks
|
| 136 |
+
self.vector_store.delete(ids=results["ids"])
|
| 137 |
+
|
| 138 |
+
return True
|
| 139 |
+
|
| 140 |
+
except Exception as e:
|
| 141 |
+
log_error(f"Error removing document from vector store: {str(e)}")
|
| 142 |
+
return False
|
| 143 |
+
|
| 144 |
+
def get_document_stats(self) -> Dict[str, Any]:
|
| 145 |
+
"""Get statistics about the vector store."""
|
| 146 |
+
try:
|
| 147 |
+
# Get collection info
|
| 148 |
+
collection = self.vector_store._collection
|
| 149 |
+
count = collection.count()
|
| 150 |
+
|
| 151 |
+
return {
|
| 152 |
+
"total_documents": count,
|
| 153 |
+
"collection_name": "lega_documents",
|
| 154 |
+
"persist_directory": config.CHROMA_PERSIST_DIR,
|
| 155 |
+
}
|
| 156 |
+
|
| 157 |
+
except Exception as e:
|
| 158 |
+
log_error(f"Error getting vector store stats: {str(e)}")
|
| 159 |
+
return {"total_documents": 0}
|
| 160 |
+
|
| 161 |
+
def _chunk_document(
|
| 162 |
+
self, text: str, chunk_size: int = 1000, overlap: int = 200
|
| 163 |
+
) -> List[str]:
|
| 164 |
+
"""Split document into chunks for embedding."""
|
| 165 |
+
chunks = []
|
| 166 |
+
start = 0
|
| 167 |
+
|
| 168 |
+
while start < len(text):
|
| 169 |
+
end = start + chunk_size
|
| 170 |
+
chunk = text[start:end]
|
| 171 |
+
|
| 172 |
+
# Try to break at sentence boundary
|
| 173 |
+
if end < len(text):
|
| 174 |
+
last_period = chunk.rfind(".")
|
| 175 |
+
if last_period > chunk_size // 2:
|
| 176 |
+
chunk = chunk[: last_period + 1]
|
| 177 |
+
end = start + last_period + 1
|
| 178 |
+
|
| 179 |
+
chunks.append(chunk.strip())
|
| 180 |
+
start = end - overlap
|
| 181 |
+
|
| 182 |
+
return [chunk for chunk in chunks if chunk.strip()]
|
| 183 |
+
|
| 184 |
+
def find_similar_clauses(
|
| 185 |
+
self, clause_text: str, exclude_document_id: str = None, k: int = 3
|
| 186 |
+
) -> List[Dict[str, Any]]:
|
| 187 |
+
"""Find similar clauses across all documents."""
|
| 188 |
+
try:
|
| 189 |
+
filter_dict = {}
|
| 190 |
+
if exclude_document_id:
|
| 191 |
+
# This is a simplified filter - Chroma might need different syntax
|
| 192 |
+
filter_dict = {"document_id": {"$ne": exclude_document_id}}
|
| 193 |
+
|
| 194 |
+
results = self.vector_store.similarity_search_with_score(
|
| 195 |
+
query=clause_text, k=k, filter=filter_dict if filter_dict else None
|
| 196 |
+
)
|
| 197 |
+
|
| 198 |
+
formatted_results = []
|
| 199 |
+
for doc, score in results:
|
| 200 |
+
formatted_results.append(
|
| 201 |
+
{
|
| 202 |
+
"content": doc.page_content,
|
| 203 |
+
"metadata": doc.metadata,
|
| 204 |
+
"similarity_score": score,
|
| 205 |
+
}
|
| 206 |
+
)
|
| 207 |
+
|
| 208 |
+
return formatted_results
|
| 209 |
+
|
| 210 |
+
except Exception as e:
|
| 211 |
+
log_error(f"Error finding similar clauses: {str(e)}")
|
| 212 |
+
return []
|
src/utils/__init__.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
# src/utils/__init__.py
|
src/utils/config.py
ADDED
|
@@ -0,0 +1,175 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
from typing import Optional
|
| 3 |
+
from dotenv import load_dotenv
|
| 4 |
+
|
| 5 |
+
# Load environment variables
|
| 6 |
+
load_dotenv()
|
| 7 |
+
|
| 8 |
+
|
| 9 |
+
class Config:
|
| 10 |
+
# =============================================================================
|
| 11 |
+
# GOOGLE AI API CONFIGURATION
|
| 12 |
+
# =============================================================================
|
| 13 |
+
GOOGLE_API_KEY: str = os.getenv("GOOGLE_API_KEY", "")
|
| 14 |
+
|
| 15 |
+
# =============================================================================
|
| 16 |
+
# APPLICATION SETTINGS
|
| 17 |
+
# =============================================================================
|
| 18 |
+
DEBUG: bool = os.getenv("DEBUG", "True").lower() == "true"
|
| 19 |
+
STREAMLIT_SERVER_PORT: int = int(os.getenv("STREAMLIT_SERVER_PORT", "8501"))
|
| 20 |
+
STREAMLIT_SERVER_ADDRESS: str = os.getenv("STREAMLIT_SERVER_ADDRESS", "localhost")
|
| 21 |
+
MAX_FILE_SIZE_MB: int = int(os.getenv("MAX_FILE_SIZE_MB", "10"))
|
| 22 |
+
SUPPORTED_FILE_TYPES: list = os.getenv(
|
| 23 |
+
"SUPPORTED_FILE_TYPES", "pdf,docx,txt"
|
| 24 |
+
).split(",")
|
| 25 |
+
|
| 26 |
+
# =============================================================================
|
| 27 |
+
# LOGGING
|
| 28 |
+
# =============================================================================
|
| 29 |
+
LOG_LEVEL: str = os.getenv("LOG_LEVEL", "INFO")
|
| 30 |
+
LOG_FILE: str = os.getenv("LOG_FILE", "./data/app.log")
|
| 31 |
+
|
| 32 |
+
# =============================================================================
|
| 33 |
+
# SECURITY
|
| 34 |
+
# =============================================================================
|
| 35 |
+
SECRET_KEY: str = os.getenv("SECRET_KEY", "development-key-change-in-production")
|
| 36 |
+
SESSION_TIMEOUT_MINUTES: int = int(os.getenv("SESSION_TIMEOUT_MINUTES", "60"))
|
| 37 |
+
|
| 38 |
+
# =============================================================================
|
| 39 |
+
# AI MODEL SETTINGS
|
| 40 |
+
# =============================================================================
|
| 41 |
+
CHAT_MODEL: str = os.getenv("CHAT_MODEL", "gemini-1.5-flash")
|
| 42 |
+
TEMPERATURE: float = float(os.getenv("TEMPERATURE", "0.2"))
|
| 43 |
+
MAX_TOKENS: int = int(os.getenv("MAX_TOKENS", "2048"))
|
| 44 |
+
EMBEDDING_MODEL: str = os.getenv("EMBEDDING_MODEL", "models/embedding-001")
|
| 45 |
+
|
| 46 |
+
# =============================================================================
|
| 47 |
+
# VECTOR STORE CONFIGURATION
|
| 48 |
+
# =============================================================================
|
| 49 |
+
CHROMA_PERSIST_DIR: str = os.getenv("CHROMA_PERSIST_DIRECTORY", "./data/chroma_db")
|
| 50 |
+
|
| 51 |
+
# =============================================================================
|
| 52 |
+
# STORAGE CONFIGURATION
|
| 53 |
+
# =============================================================================
|
| 54 |
+
UPLOAD_DIR: str = os.getenv("UPLOAD_DIR", "./uploads")
|
| 55 |
+
DATA_DIR: str = os.getenv("DATA_DIR", "./data")
|
| 56 |
+
DATABASE_URL: str = os.getenv("DATABASE_URL", "sqlite:///./data/lega.db")
|
| 57 |
+
|
| 58 |
+
# =============================================================================
|
| 59 |
+
# PERFORMANCE SETTINGS
|
| 60 |
+
# =============================================================================
|
| 61 |
+
MAX_CONCURRENT_UPLOADS: int = int(os.getenv("MAX_CONCURRENT_UPLOADS", "5"))
|
| 62 |
+
DOCUMENT_PROCESSING_TIMEOUT: int = int(
|
| 63 |
+
os.getenv("DOCUMENT_PROCESSING_TIMEOUT", "300")
|
| 64 |
+
)
|
| 65 |
+
ENABLE_CACHE: bool = os.getenv("ENABLE_CACHE", "True").lower() == "true"
|
| 66 |
+
CACHE_TTL_SECONDS: int = int(os.getenv("CACHE_TTL_SECONDS", "3600"))
|
| 67 |
+
|
| 68 |
+
# =============================================================================
|
| 69 |
+
# FEATURE FLAGS
|
| 70 |
+
# =============================================================================
|
| 71 |
+
ENABLE_DOCUMENT_LIBRARY: bool = (
|
| 72 |
+
os.getenv("ENABLE_DOCUMENT_LIBRARY", "True").lower() == "true"
|
| 73 |
+
)
|
| 74 |
+
ENABLE_QA_ASSISTANT: bool = (
|
| 75 |
+
os.getenv("ENABLE_QA_ASSISTANT", "True").lower() == "true"
|
| 76 |
+
)
|
| 77 |
+
ENABLE_MARKET_COMPARISON: bool = (
|
| 78 |
+
os.getenv("ENABLE_MARKET_COMPARISON", "True").lower() == "true"
|
| 79 |
+
)
|
| 80 |
+
ENABLE_TIMELINE_TRACKER: bool = (
|
| 81 |
+
os.getenv("ENABLE_TIMELINE_TRACKER", "True").lower() == "true"
|
| 82 |
+
)
|
| 83 |
+
ENABLE_EXPORT_FEATURES: bool = (
|
| 84 |
+
os.getenv("ENABLE_EXPORT_FEATURES", "True").lower() == "true"
|
| 85 |
+
)
|
| 86 |
+
|
| 87 |
+
# =============================================================================
|
| 88 |
+
# ANALYTICS & MONITORING
|
| 89 |
+
# =============================================================================
|
| 90 |
+
ENABLE_ANALYTICS: bool = os.getenv("ENABLE_ANALYTICS", "False").lower() == "true"
|
| 91 |
+
ANALYTICS_API_KEY: str = os.getenv("ANALYTICS_API_KEY", "")
|
| 92 |
+
ENABLE_ERROR_TRACKING: bool = (
|
| 93 |
+
os.getenv("ENABLE_ERROR_TRACKING", "False").lower() == "true"
|
| 94 |
+
)
|
| 95 |
+
SENTRY_DSN: str = os.getenv("SENTRY_DSN", "")
|
| 96 |
+
|
| 97 |
+
# =============================================================================
|
| 98 |
+
# REGIONAL SETTINGS
|
| 99 |
+
# =============================================================================
|
| 100 |
+
DEFAULT_REGION: str = os.getenv("DEFAULT_REGION", "India")
|
| 101 |
+
DEFAULT_CURRENCY: str = os.getenv("DEFAULT_CURRENCY", "INR")
|
| 102 |
+
TIMEZONE: str = os.getenv("TIMEZONE", "Asia/Kolkata")
|
| 103 |
+
|
| 104 |
+
# =============================================================================
|
| 105 |
+
# ADVANCED AI SETTINGS
|
| 106 |
+
# =============================================================================
|
| 107 |
+
RISK_SENSITIVITY: int = int(os.getenv("RISK_SENSITIVITY", "3"))
|
| 108 |
+
SIMPLIFICATION_LEVEL: str = os.getenv("SIMPLIFICATION_LEVEL", "intermediate")
|
| 109 |
+
MAX_RISK_FACTORS: int = int(os.getenv("MAX_RISK_FACTORS", "10"))
|
| 110 |
+
|
| 111 |
+
# =============================================================================
|
| 112 |
+
# API RATE LIMITING
|
| 113 |
+
# =============================================================================
|
| 114 |
+
API_REQUESTS_PER_MINUTE: int = int(os.getenv("API_REQUESTS_PER_MINUTE", "60"))
|
| 115 |
+
API_REQUESTS_PER_DAY: int = int(os.getenv("API_REQUESTS_PER_DAY", "1000"))
|
| 116 |
+
|
| 117 |
+
# =============================================================================
|
| 118 |
+
# BACKUP & MAINTENANCE
|
| 119 |
+
# =============================================================================
|
| 120 |
+
ENABLE_AUTO_BACKUP: bool = (
|
| 121 |
+
os.getenv("ENABLE_AUTO_BACKUP", "False").lower() == "true"
|
| 122 |
+
)
|
| 123 |
+
BACKUP_INTERVAL_HOURS: int = int(os.getenv("BACKUP_INTERVAL_HOURS", "24"))
|
| 124 |
+
BACKUP_RETENTION_DAYS: int = int(os.getenv("BACKUP_RETENTION_DAYS", "30"))
|
| 125 |
+
AUTO_CLEANUP_TEMP_FILES: bool = (
|
| 126 |
+
os.getenv("AUTO_CLEANUP_TEMP_FILES", "True").lower() == "true"
|
| 127 |
+
)
|
| 128 |
+
CLEANUP_INTERVAL_HOURS: int = int(os.getenv("CLEANUP_INTERVAL_HOURS", "6"))
|
| 129 |
+
|
| 130 |
+
@classmethod
|
| 131 |
+
def validate_config(cls) -> bool:
|
| 132 |
+
"""Validate that required configuration is present."""
|
| 133 |
+
required_vars = ["GOOGLE_API_KEY"]
|
| 134 |
+
|
| 135 |
+
missing_vars = []
|
| 136 |
+
for var in required_vars:
|
| 137 |
+
value = getattr(cls, var, "")
|
| 138 |
+
if not value or value == "your_google_ai_api_key_here":
|
| 139 |
+
missing_vars.append(var)
|
| 140 |
+
|
| 141 |
+
if missing_vars:
|
| 142 |
+
print(
|
| 143 |
+
f"⚠️ Missing required environment variables: {', '.join(missing_vars)}"
|
| 144 |
+
)
|
| 145 |
+
print("📝 Please update your .env file with valid values")
|
| 146 |
+
return False
|
| 147 |
+
|
| 148 |
+
return True
|
| 149 |
+
|
| 150 |
+
@classmethod
|
| 151 |
+
def get_config_summary(cls) -> dict:
|
| 152 |
+
"""Get a summary of current configuration for debugging."""
|
| 153 |
+
return {
|
| 154 |
+
"api_configured": bool(
|
| 155 |
+
cls.GOOGLE_API_KEY
|
| 156 |
+
and cls.GOOGLE_API_KEY != "your_google_ai_api_key_here"
|
| 157 |
+
),
|
| 158 |
+
"debug_mode": cls.DEBUG,
|
| 159 |
+
"features_enabled": {
|
| 160 |
+
"document_library": cls.ENABLE_DOCUMENT_LIBRARY,
|
| 161 |
+
"qa_assistant": cls.ENABLE_QA_ASSISTANT,
|
| 162 |
+
"market_comparison": cls.ENABLE_MARKET_COMPARISON,
|
| 163 |
+
"timeline_tracker": cls.ENABLE_TIMELINE_TRACKER,
|
| 164 |
+
"export_features": cls.ENABLE_EXPORT_FEATURES,
|
| 165 |
+
},
|
| 166 |
+
"supported_file_types": cls.SUPPORTED_FILE_TYPES,
|
| 167 |
+
"max_file_size_mb": cls.MAX_FILE_SIZE_MB,
|
| 168 |
+
"risk_sensitivity": cls.RISK_SENSITIVITY,
|
| 169 |
+
"region": cls.DEFAULT_REGION,
|
| 170 |
+
"currency": cls.DEFAULT_CURRENCY,
|
| 171 |
+
}
|
| 172 |
+
|
| 173 |
+
|
| 174 |
+
# Create singleton instance
|
| 175 |
+
config = Config()
|
src/utils/helpers.py
ADDED
|
@@ -0,0 +1,172 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import hashlib
|
| 2 |
+
import os
|
| 3 |
+
import uuid
|
| 4 |
+
from datetime import datetime, timedelta
|
| 5 |
+
from typing import List, Dict, Any, Optional
|
| 6 |
+
import re
|
| 7 |
+
|
| 8 |
+
|
| 9 |
+
def generate_document_id() -> str:
|
| 10 |
+
"""Generate a unique document ID."""
|
| 11 |
+
return str(uuid.uuid4())
|
| 12 |
+
|
| 13 |
+
|
| 14 |
+
def generate_session_id() -> str:
|
| 15 |
+
"""Generate a unique session ID."""
|
| 16 |
+
return str(uuid.uuid4())
|
| 17 |
+
|
| 18 |
+
|
| 19 |
+
def calculate_file_hash(file_content: bytes) -> str:
|
| 20 |
+
"""Calculate SHA-256 hash of file content."""
|
| 21 |
+
return hashlib.sha256(file_content).hexdigest()
|
| 22 |
+
|
| 23 |
+
|
| 24 |
+
def sanitize_filename(filename: str) -> str:
|
| 25 |
+
"""Sanitize filename for safe storage."""
|
| 26 |
+
# Remove or replace dangerous characters
|
| 27 |
+
sanitized = re.sub(r"[^\w\-_\.]", "_", filename)
|
| 28 |
+
# Ensure it's not too long
|
| 29 |
+
if len(sanitized) > 255:
|
| 30 |
+
name, ext = os.path.splitext(sanitized)
|
| 31 |
+
sanitized = name[: 255 - len(ext)] + ext
|
| 32 |
+
return sanitized
|
| 33 |
+
|
| 34 |
+
|
| 35 |
+
def format_file_size(size_bytes: int) -> str:
|
| 36 |
+
"""Format file size in human readable format."""
|
| 37 |
+
if size_bytes == 0:
|
| 38 |
+
return "0 B"
|
| 39 |
+
|
| 40 |
+
size_names = ["B", "KB", "MB", "GB"]
|
| 41 |
+
i = 0
|
| 42 |
+
while size_bytes >= 1024 and i < len(size_names) - 1:
|
| 43 |
+
size_bytes /= 1024.0
|
| 44 |
+
i += 1
|
| 45 |
+
|
| 46 |
+
return f"{size_bytes:.1f} {size_names[i]}"
|
| 47 |
+
|
| 48 |
+
|
| 49 |
+
def extract_key_dates(text: str) -> List[Dict[str, Any]]:
|
| 50 |
+
"""Extract dates and deadlines from text."""
|
| 51 |
+
date_patterns = [
|
| 52 |
+
r"\b\d{1,2}/\d{1,2}/\d{4}\b", # MM/DD/YYYY
|
| 53 |
+
r"\b\d{1,2}-\d{1,2}-\d{4}\b", # MM-DD-YYYY
|
| 54 |
+
r"\b\d{4}-\d{1,2}-\d{1,2}\b", # YYYY-MM-DD
|
| 55 |
+
r"\b(?:January|February|March|April|May|June|July|August|September|October|November|December)\s+\d{1,2},?\s+\d{4}\b",
|
| 56 |
+
]
|
| 57 |
+
|
| 58 |
+
dates = []
|
| 59 |
+
for pattern in date_patterns:
|
| 60 |
+
matches = re.finditer(pattern, text, re.IGNORECASE)
|
| 61 |
+
for match in matches:
|
| 62 |
+
dates.append(
|
| 63 |
+
{
|
| 64 |
+
"date": match.group(),
|
| 65 |
+
"position": match.start(),
|
| 66 |
+
"context": text[max(0, match.start() - 50) : match.end() + 50],
|
| 67 |
+
}
|
| 68 |
+
)
|
| 69 |
+
|
| 70 |
+
return dates
|
| 71 |
+
|
| 72 |
+
|
| 73 |
+
def extract_financial_terms(text: str) -> Dict[str, Any]:
|
| 74 |
+
"""Extract financial information from text."""
|
| 75 |
+
financial_info = {}
|
| 76 |
+
|
| 77 |
+
# Extract monetary amounts (Indian Rupees and other currencies)
|
| 78 |
+
money_patterns = [
|
| 79 |
+
r"₹[\d,]+(?:\.\d{2})?", # Indian Rupees
|
| 80 |
+
r"Rs\.?\s*[\d,]+(?:\.\d{2})?", # Rs. format
|
| 81 |
+
r"\$[\d,]+(?:\.\d{2})?", # USD
|
| 82 |
+
]
|
| 83 |
+
|
| 84 |
+
amounts = []
|
| 85 |
+
for pattern in money_patterns:
|
| 86 |
+
amounts.extend(re.findall(pattern, text))
|
| 87 |
+
|
| 88 |
+
if amounts:
|
| 89 |
+
financial_info["amounts"] = amounts
|
| 90 |
+
|
| 91 |
+
# Extract percentages
|
| 92 |
+
percentage_pattern = r"\d+(?:\.\d+)?%"
|
| 93 |
+
percentages = re.findall(percentage_pattern, text)
|
| 94 |
+
if percentages:
|
| 95 |
+
financial_info["percentages"] = percentages
|
| 96 |
+
|
| 97 |
+
# Extract interest rates
|
| 98 |
+
interest_pattern = (
|
| 99 |
+
r"(?:interest rate|APR|annual percentage rate).*?(\d+(?:\.\d+)?%)"
|
| 100 |
+
)
|
| 101 |
+
interest_matches = re.findall(interest_pattern, text, re.IGNORECASE)
|
| 102 |
+
if interest_matches:
|
| 103 |
+
financial_info["interest_rates"] = interest_matches
|
| 104 |
+
|
| 105 |
+
return financial_info
|
| 106 |
+
|
| 107 |
+
|
| 108 |
+
def calculate_risk_score(risk_factors: List[Dict[str, Any]]) -> int:
|
| 109 |
+
"""Calculate overall risk score from individual risk factors."""
|
| 110 |
+
if not risk_factors:
|
| 111 |
+
return 0
|
| 112 |
+
|
| 113 |
+
risk_weights = {"critical": 25, "high": 15, "medium": 8, "low": 3}
|
| 114 |
+
|
| 115 |
+
total_score = 0
|
| 116 |
+
for factor in risk_factors:
|
| 117 |
+
severity = factor.get("severity", "low").lower()
|
| 118 |
+
total_score += risk_weights.get(severity, 0)
|
| 119 |
+
|
| 120 |
+
# Cap at 100
|
| 121 |
+
return min(total_score, 100)
|
| 122 |
+
|
| 123 |
+
|
| 124 |
+
def get_risk_color(risk_score: int) -> str:
|
| 125 |
+
"""Get color code based on risk score."""
|
| 126 |
+
if risk_score >= 75:
|
| 127 |
+
return "#FF4444" # Red
|
| 128 |
+
elif risk_score >= 50:
|
| 129 |
+
return "#FF8800" # Orange
|
| 130 |
+
elif risk_score >= 25:
|
| 131 |
+
return "#FFCC00" # Yellow
|
| 132 |
+
else:
|
| 133 |
+
return "#44AA44" # Green
|
| 134 |
+
|
| 135 |
+
|
| 136 |
+
def chunk_text(text: str, chunk_size: int = 1000, overlap: int = 100) -> List[str]:
|
| 137 |
+
"""Split text into overlapping chunks for processing."""
|
| 138 |
+
chunks = []
|
| 139 |
+
start = 0
|
| 140 |
+
|
| 141 |
+
while start < len(text):
|
| 142 |
+
end = start + chunk_size
|
| 143 |
+
chunk = text[start:end]
|
| 144 |
+
|
| 145 |
+
# Try to break at sentence boundary
|
| 146 |
+
if end < len(text):
|
| 147 |
+
last_period = chunk.rfind(".")
|
| 148 |
+
if last_period > chunk_size // 2:
|
| 149 |
+
chunk = chunk[: last_period + 1]
|
| 150 |
+
end = start + last_period + 1
|
| 151 |
+
|
| 152 |
+
chunks.append(chunk)
|
| 153 |
+
start = end - overlap
|
| 154 |
+
|
| 155 |
+
return chunks
|
| 156 |
+
|
| 157 |
+
|
| 158 |
+
def format_timestamp(timestamp: datetime) -> str:
|
| 159 |
+
"""Format timestamp for display."""
|
| 160 |
+
now = datetime.now()
|
| 161 |
+
diff = now - timestamp
|
| 162 |
+
|
| 163 |
+
if diff.days > 0:
|
| 164 |
+
return f"{diff.days} days ago"
|
| 165 |
+
elif diff.seconds > 3600:
|
| 166 |
+
hours = diff.seconds // 3600
|
| 167 |
+
return f"{hours} hours ago"
|
| 168 |
+
elif diff.seconds > 60:
|
| 169 |
+
minutes = diff.seconds // 60
|
| 170 |
+
return f"{minutes} minutes ago"
|
| 171 |
+
else:
|
| 172 |
+
return "Just now"
|
src/utils/logger.py
ADDED
|
@@ -0,0 +1,58 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import logging
|
| 2 |
+
import os
|
| 3 |
+
from datetime import datetime
|
| 4 |
+
from src.utils.config import config
|
| 5 |
+
|
| 6 |
+
|
| 7 |
+
def setup_logging():
|
| 8 |
+
"""Set up logging configuration."""
|
| 9 |
+
# Create data directory if it doesn't exist
|
| 10 |
+
os.makedirs(os.path.dirname(config.LOG_FILE), exist_ok=True)
|
| 11 |
+
|
| 12 |
+
# Configure logging
|
| 13 |
+
logging.basicConfig(
|
| 14 |
+
level=getattr(logging, config.LOG_LEVEL),
|
| 15 |
+
format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
|
| 16 |
+
handlers=[logging.FileHandler(config.LOG_FILE), logging.StreamHandler()],
|
| 17 |
+
)
|
| 18 |
+
|
| 19 |
+
return logging.getLogger(__name__)
|
| 20 |
+
|
| 21 |
+
|
| 22 |
+
def log_document_upload(filename: str, file_size: int) -> None:
|
| 23 |
+
"""Log document upload event."""
|
| 24 |
+
logger = logging.getLogger(__name__)
|
| 25 |
+
logger.info(f"Document uploaded: {filename} ({file_size} bytes)")
|
| 26 |
+
|
| 27 |
+
|
| 28 |
+
def log_analysis_start(document_id: str) -> None:
|
| 29 |
+
"""Log analysis start event."""
|
| 30 |
+
logger = logging.getLogger(__name__)
|
| 31 |
+
logger.info(f"Starting analysis for document: {document_id}")
|
| 32 |
+
|
| 33 |
+
|
| 34 |
+
def log_analysis_complete(document_id: str, processing_time: float) -> None:
|
| 35 |
+
"""Log analysis completion event."""
|
| 36 |
+
logger = logging.getLogger(__name__)
|
| 37 |
+
logger.info(
|
| 38 |
+
f"Analysis completed for document: {document_id} in {processing_time:.2f}s"
|
| 39 |
+
)
|
| 40 |
+
|
| 41 |
+
|
| 42 |
+
def log_error(error_message: str, document_id: str = None) -> None:
|
| 43 |
+
"""Log error event."""
|
| 44 |
+
logger = logging.getLogger(__name__)
|
| 45 |
+
if document_id:
|
| 46 |
+
logger.error(f"Error processing document {document_id}: {error_message}")
|
| 47 |
+
else:
|
| 48 |
+
logger.error(f"Application error: {error_message}")
|
| 49 |
+
|
| 50 |
+
|
| 51 |
+
def log_qa_interaction(document_id: str, question: str) -> None:
|
| 52 |
+
"""Log Q&A interaction."""
|
| 53 |
+
logger = logging.getLogger(__name__)
|
| 54 |
+
logger.info(f"Q&A interaction for document {document_id}: {question[:100]}...")
|
| 55 |
+
|
| 56 |
+
|
| 57 |
+
# Initialize logging when module is imported
|
| 58 |
+
setup_logging()
|
start.sh
ADDED
|
@@ -0,0 +1,34 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/bin/bash
|
| 2 |
+
|
| 3 |
+
# Hugging Face Spaces startup script for Lega.AI
|
| 4 |
+
|
| 5 |
+
echo "🚀 Starting Lega.AI on Hugging Face Spaces..."
|
| 6 |
+
|
| 7 |
+
# Create necessary directories if they don't exist
|
| 8 |
+
mkdir -p data/chroma_db
|
| 9 |
+
mkdir -p uploads
|
| 10 |
+
mkdir -p .streamlit
|
| 11 |
+
|
| 12 |
+
# Set default environment variables for Hugging Face deployment
|
| 13 |
+
export STREAMLIT_SERVER_PORT=${PORT:-7860}
|
| 14 |
+
export STREAMLIT_SERVER_ADDRESS="0.0.0.0"
|
| 15 |
+
export DEBUG=False
|
| 16 |
+
export LOG_LEVEL=INFO
|
| 17 |
+
export STREAMLIT_CONFIG_DIR=/app/.streamlit
|
| 18 |
+
export XDG_CONFIG_HOME=/app
|
| 19 |
+
|
| 20 |
+
# Check if GOOGLE_API_KEY is set
|
| 21 |
+
if [ -z "$GOOGLE_API_KEY" ]; then
|
| 22 |
+
echo "⚠️ WARNING: GOOGLE_API_KEY environment variable is not set!"
|
| 23 |
+
echo "Please set it in your Hugging Face Space settings for the app to work properly."
|
| 24 |
+
fi
|
| 25 |
+
|
| 26 |
+
# Start the Streamlit application
|
| 27 |
+
echo "🌐 Starting Streamlit on port $STREAMLIT_SERVER_PORT..."
|
| 28 |
+
exec streamlit run main.py \
|
| 29 |
+
--server.port=$STREAMLIT_SERVER_PORT \
|
| 30 |
+
--server.address=$STREAMLIT_SERVER_ADDRESS \
|
| 31 |
+
--server.headless=true \
|
| 32 |
+
--server.fileWatcherType=none \
|
| 33 |
+
--server.enableCORS=false \
|
| 34 |
+
--server.enableXsrfProtection=false
|
uv.lock
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|