jameszokah commited on
Commit
9243bca
·
0 Parent(s):

Initial commit

Browse files
Files changed (8) hide show
  1. .github/workflows/quality_gate.yml +32 -0
  2. .gitignore +207 -0
  3. Dockerfile +21 -0
  4. LICENSE +21 -0
  5. README.md +112 -0
  6. app.py +421 -0
  7. requirements.txt +6 -0
  8. styles.css +695 -0
.github/workflows/quality_gate.yml ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: Production Quality Gate
2
+
3
+ on:
4
+ push:
5
+ branches: [ "main" ]
6
+ pull_request:
7
+ branches: [ "main" ]
8
+
9
+ jobs:
10
+ build-and-audit:
11
+ runs-on: ubuntu-latest
12
+
13
+ steps:
14
+ - uses: actions/checkout@v3
15
+
16
+ - name: Set up Python 3.10
17
+ uses: actions/setup-python@v3
18
+ with:
19
+ python-version: "3.10"
20
+
21
+ - name: Install Dependencies
22
+ run: |
23
+ python -m pip install --upgrade pip
24
+ pip install flake8
25
+ if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
26
+
27
+ - name: Lint with Flake8 (Style Enforcer)
28
+ run: |
29
+ # stop the build if there are Python syntax errors or undefined names
30
+ flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics
31
+ # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
32
+ flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
.gitignore ADDED
@@ -0,0 +1,207 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Byte-compiled / optimized / DLL files
2
+ __pycache__/
3
+ *.py[codz]
4
+ *$py.class
5
+
6
+ # C extensions
7
+ *.so
8
+
9
+ # Distribution / packaging
10
+ .Python
11
+ build/
12
+ develop-eggs/
13
+ dist/
14
+ downloads/
15
+ eggs/
16
+ .eggs/
17
+ lib/
18
+ lib64/
19
+ parts/
20
+ sdist/
21
+ var/
22
+ wheels/
23
+ share/python-wheels/
24
+ *.egg-info/
25
+ .installed.cfg
26
+ *.egg
27
+ MANIFEST
28
+
29
+ # PyInstaller
30
+ # Usually these files are written by a python script from a template
31
+ # before PyInstaller builds the exe, so as to inject date/other infos into it.
32
+ *.manifest
33
+ *.spec
34
+
35
+ # Installer logs
36
+ pip-log.txt
37
+ pip-delete-this-directory.txt
38
+
39
+ # Unit test / coverage reports
40
+ htmlcov/
41
+ .tox/
42
+ .nox/
43
+ .coverage
44
+ .coverage.*
45
+ .cache
46
+ nosetests.xml
47
+ coverage.xml
48
+ *.cover
49
+ *.py.cover
50
+ .hypothesis/
51
+ .pytest_cache/
52
+ cover/
53
+
54
+ # Translations
55
+ *.mo
56
+ *.pot
57
+
58
+ # Django stuff:
59
+ *.log
60
+ local_settings.py
61
+ db.sqlite3
62
+ db.sqlite3-journal
63
+
64
+ # Flask stuff:
65
+ instance/
66
+ .webassets-cache
67
+
68
+ # Scrapy stuff:
69
+ .scrapy
70
+
71
+ # Sphinx documentation
72
+ docs/_build/
73
+
74
+ # PyBuilder
75
+ .pybuilder/
76
+ target/
77
+
78
+ # Jupyter Notebook
79
+ .ipynb_checkpoints
80
+
81
+ # IPython
82
+ profile_default/
83
+ ipython_config.py
84
+
85
+ # pyenv
86
+ # For a library or package, you might want to ignore these files since the code is
87
+ # intended to run in multiple environments; otherwise, check them in:
88
+ # .python-version
89
+
90
+ # pipenv
91
+ # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
92
+ # However, in case of collaboration, if having platform-specific dependencies or dependencies
93
+ # having no cross-platform support, pipenv may install dependencies that don't work, or not
94
+ # install all needed dependencies.
95
+ #Pipfile.lock
96
+
97
+ # UV
98
+ # Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
99
+ # This is especially recommended for binary packages to ensure reproducibility, and is more
100
+ # commonly ignored for libraries.
101
+ #uv.lock
102
+
103
+ # poetry
104
+ # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
105
+ # This is especially recommended for binary packages to ensure reproducibility, and is more
106
+ # commonly ignored for libraries.
107
+ # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
108
+ #poetry.lock
109
+ #poetry.toml
110
+
111
+ # pdm
112
+ # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
113
+ # pdm recommends including project-wide configuration in pdm.toml, but excluding .pdm-python.
114
+ # https://pdm-project.org/en/latest/usage/project/#working-with-version-control
115
+ #pdm.lock
116
+ #pdm.toml
117
+ .pdm-python
118
+ .pdm-build/
119
+
120
+ # pixi
121
+ # Similar to Pipfile.lock, it is generally recommended to include pixi.lock in version control.
122
+ #pixi.lock
123
+ # Pixi creates a virtual environment in the .pixi directory, just like venv module creates one
124
+ # in the .venv directory. It is recommended not to include this directory in version control.
125
+ .pixi
126
+
127
+ # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
128
+ __pypackages__/
129
+
130
+ # Celery stuff
131
+ celerybeat-schedule
132
+ celerybeat.pid
133
+
134
+ # SageMath parsed files
135
+ *.sage.py
136
+
137
+ # Environments
138
+ .env
139
+ .envrc
140
+ .venv
141
+ env/
142
+ venv/
143
+ ENV/
144
+ env.bak/
145
+ venv.bak/
146
+
147
+ # Spyder project settings
148
+ .spyderproject
149
+ .spyproject
150
+
151
+ # Rope project settings
152
+ .ropeproject
153
+
154
+ # mkdocs documentation
155
+ /site
156
+
157
+ # mypy
158
+ .mypy_cache/
159
+ .dmypy.json
160
+ dmypy.json
161
+
162
+ # Pyre type checker
163
+ .pyre/
164
+
165
+ # pytype static type analyzer
166
+ .pytype/
167
+
168
+ # Cython debug symbols
169
+ cython_debug/
170
+
171
+ # PyCharm
172
+ # JetBrains specific template is maintained in a separate JetBrains.gitignore that can
173
+ # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
174
+ # and can be added to the global gitignore or merged into this file. For a more nuclear
175
+ # option (not recommended) you can uncomment the following to ignore the entire idea folder.
176
+ #.idea/
177
+
178
+ # Abstra
179
+ # Abstra is an AI-powered process automation framework.
180
+ # Ignore directories containing user credentials, local state, and settings.
181
+ # Learn more at https://abstra.io/docs
182
+ .abstra/
183
+
184
+ # Visual Studio Code
185
+ # Visual Studio Code specific template is maintained in a separate VisualStudioCode.gitignore
186
+ # that can be found at https://github.com/github/gitignore/blob/main/Global/VisualStudioCode.gitignore
187
+ # and can be added to the global gitignore or merged into this file. However, if you prefer,
188
+ # you could uncomment the following to ignore the entire vscode folder
189
+ # .vscode/
190
+
191
+ # Ruff stuff:
192
+ .ruff_cache/
193
+
194
+ # PyPI configuration file
195
+ .pypirc
196
+
197
+ # Cursor
198
+ # Cursor is an AI-powered code editor. `.cursorignore` specifies files/directories to
199
+ # exclude from AI features like autocomplete and code analysis. Recommended for sensitive data
200
+ # refer to https://docs.cursor.com/context/ignore-files
201
+ .cursorignore
202
+ .cursorindexingignore
203
+
204
+ # Marimo
205
+ marimo/_static/
206
+ marimo/_lsp/
207
+ __marimo__/
Dockerfile ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # 1. Base Image
2
+ FROM python:3.10-slim
3
+
4
+ # 2. INSTALL GIT (Crucial Fix)
5
+ RUN apt-get update && \
6
+ apt-get install -y git && \
7
+ rm -rf /var/lib/apt/lists/*
8
+
9
+ # 3. Setup App
10
+ WORKDIR /app
11
+ COPY requirements.txt .
12
+ RUN pip install --no-cache-dir -r requirements.txt
13
+ COPY . .
14
+
15
+ # 4. Permissions
16
+ RUN useradd -m -u 1000 user
17
+ USER user
18
+ ENV PATH="/home/user/.local/bin:$PATH"
19
+
20
+ # 5. Run Streamlit
21
+ CMD ["streamlit", "run", "app.py", "--server.port=7860", "--server.address=0.0.0.0", "--server.enableCORS=false", "--server.enableXsrfProtection=false"]
LICENSE ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ MIT License
2
+
3
+ Copyright (c) 2026 James Zokah
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
README.md ADDED
@@ -0,0 +1,112 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <div align="center">
2
+ <!-- HERO HERO (Clickable) -->
3
+ <img src="assets/DocuTrace_hero.jpg" width="100%" alt="DocuTrace header hero" style="border-radius: 8px; box-shadow: 0px 4px 12px rgba(0,0,0,0.3); border: 1px solid #30363d;">
4
+
5
+
6
+ <br/><br/>
7
+
8
+ # DocuTrace: Verifiable AI Auditor
9
+ ### *Precision Data Extraction with Pixel-Perfect Source Grounding*
10
+
11
+ [![Streamlit](https://img.shields.io/badge/Frontend-Streamlit-FF4B4B?style=for-the-badge&logo=streamlit&logoColor=white)](https://streamlit.io/)
12
+ [![LangExtract](https://img.shields.io/badge/Core-LangExtract-4285F4?style=for-the-badge&logo=google&logoColor=white)](https://github.com/google/langextract)
13
+ [![Gemini](https://img.shields.io/badge/Intelligence-Gemini%202.5%20Flash-8E75B2?style=for-the-badge&logo=googlebard&logoColor=white)](https://ai.google.dev/)
14
+ [![Status](https://img.shields.io/badge/Status-Production-success?style=for-the-badge)]()
15
+
16
+ [View Live Demo](https://huggingface.co/spaces/EATosin/DocuTrace-10K-Analyst) • [Engineering Journey](#-engineering-journey-challenges--solutions) • [Architecture](#-system-architecture)
17
+
18
+ </div>
19
+
20
+ ---
21
+
22
+ ## Executive Summary
23
+ **DocuTrace** is a specialized auditing tool designed for high-stakes domains (Legal, Finance, Compliance) where AI hallucinations are unacceptable.
24
+
25
+ Unlike standard RAG (Retrieval Augmented Generation) pipelines that summarize text, DocuTrace utilizes **Google's LangExtract** library to perform **Source Grounding**. Every extracted data point is cryptographically linked to its specific coordinates in the source PDF, generating an interactive HTML audit trail.
26
+
27
+ ---
28
+
29
+ ## Engineering Journey: Challenges & Solutions
30
+ Building a verifiable extraction engine involves overcoming significant "dependency hell" and rate-limiting barriers.
31
+
32
+ ### 1. The "Slim Image" Dependency Conflict
33
+ * **The Challenge:** We deployed using the standard `python:3.10-slim` Docker image to minimize boot time. However, the `LangExtract` library requires installation directly from GitHub source, which depends on `git`. The build failed with `ExecutableNotFound: git`.
34
+ * **The Solution:** Architected a custom multi-stage Dockerfile that injects system-level dependencies (`apt-get install git`) before the Python environment initializes, ensuring a successful build without bloating the final image.
35
+
36
+ ### 2. The PDF Parsing Bottleneck
37
+ * **The Challenge:** Financial 10-K reports often contain scanned pages or complex layouts that break standard OCR tools.
38
+ * **The Solution:** Implemented a robust `pypdf` pre-processing layer with a "Fail-Fast" mechanism. The system validates text density before passing data to the LLM, preventing wasted API tokens on unreadable files.
39
+
40
+ ### 3. API Rate Limiting (The "Thundering Herd")
41
+ * **The Challenge:** LangExtract is designed for enterprise usage (Vertex AI) and attempts parallel chunk processing. On the Gemini Free Tier, this triggered `429 Resource Exhausted` errors immediately.
42
+ * **The Solution:** Implemented an "Extraction Window" logic in the UI. Users select specific page ranges (e.g., "Risk Factors, Pages 15-20") rather than processing the entire 100-page document at once, keeping the request volume within the 15 RPM limit.
43
+
44
+ ---
45
+
46
+ ## System Architecture
47
+
48
+ ```mermaid
49
+ graph TD
50
+ User[Financial Analyst] -->|Upload 10-K PDF| UI[Streamlit Interface]
51
+ User -->|Define Schema| UI
52
+
53
+ subgraph Extraction Engine
54
+ UI -->|Raw Text| Preprocessor[PyPDF Chuncker]
55
+ Preprocessor -->|Context Blocks| LangExtract[LangExtract Library]
56
+ LangExtract -->|Inference Req| Gemini[Gemini 2.5 Flash]
57
+ Gemini -->|Structured Data| LangExtract
58
+ end
59
+
60
+ LangExtract -->|HTML Generation| Visualizer[Interactive Highlighter]
61
+ Visualizer -->|Iframe Render| UI
62
+ ```
63
+
64
+ ### Core Components
65
+ 1. **Frontend (Streamlit):** Handles file ingestion and renders the output within a secure container.
66
+ 2. **Intelligence (Gemini 2.5 Flash):** Selected for its 1M token context window, allowing it to hold large document sections in working memory without RAG retrieval loss.
67
+ 3. **Grounding (LangExtract):** Maps the LLM's JSON output back to the original PDF text spans using fuzzy matching algorithms.
68
+
69
+ ---
70
+
71
+ ## ⚠️ Limitations & Constraints
72
+ This is a production-grade Proof of Concept (PoC) with specific constraints:
73
+
74
+ * **Text-Only Extraction:** The current pipeline extracts text. It does not parse charts, graphs, or tables (requires multimodal vision upgrade).
75
+ * **API Quotas:** The Live Demo operates on the Google Gemini Free Tier. Heavy usage may trigger temporary cooldowns (429 Errors).
76
+ * **Session State:** For privacy, all files are processed in ephemeral memory and discarded immediately after the session ends. No data is persisted.
77
+
78
+ ---
79
+
80
+ ## 🚀 Installation & Local Development
81
+
82
+ ### Prerequisites
83
+ * Python 3.10+
84
+ * Google Gemini API Key
85
+
86
+ ### Setup
87
+ ```bash
88
+ # 1. Clone the repository
89
+ git clone https://github.com/eatosin/DocuTrace-AI-Auditor.git
90
+ cd DocuTrace-AI-Auditor
91
+
92
+ # 2. Install dependencies (requires Git installed)
93
+ pip install -r requirements.txt
94
+
95
+ # 3. Configure Environment
96
+ export GEMINI_API_KEY="your_key_here"
97
+
98
+ # 4. Run the App
99
+ streamlit run app.py
100
+ ```
101
+
102
+ ---
103
+
104
+ ## Author
105
+ **Owadokun Tosin Tobi**
106
+ *Senior AI Engineer | Specialist in MLOps & LLM Evaluation*
107
+
108
+ * **Portfolio:** [ReasonBench](https://github.com/eatosin/ReasonBench), [Sentinel](https://github.com/eatosin/Sentinel-MLOps)
109
+ * **Connect:** [LinkedIn](https://www.linkedin.com/in/owadokun-tosin-tobi-6159091a3?utm_source=share&utm_campaign=share_via&utm_content=profile&utm_medium=android_app)
110
+
111
+ ---
112
+ *Built with Python, Google Cloud AI, and Engineering Rigor.*
app.py ADDED
@@ -0,0 +1,421 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import os
3
+ import streamlit.components.v1 as components
4
+ from datetime import datetime
5
+
6
+ # --- Try to import optional dependencies ---
7
+ try:
8
+ import google.generativeai as genai
9
+ GENAI_AVAILABLE = True
10
+ except ImportError:
11
+ GENAI_AVAILABLE = False
12
+
13
+ try:
14
+ import langextract as lx
15
+ LANGEXTRACT_AVAILABLE = True
16
+ except ImportError:
17
+ LANGEXTRACT_AVAILABLE = False
18
+
19
+ try:
20
+ from pypdf import PdfReader
21
+ PYPDF_AVAILABLE = True
22
+ except ImportError:
23
+ PYPDF_AVAILABLE = False
24
+
25
+ # --- CONFIG ---
26
+ st.set_page_config(
27
+ page_title="DocuTrace AI",
28
+ layout="wide",
29
+ page_icon="🔍",
30
+ initial_sidebar_state="expanded"
31
+ )
32
+
33
+ # --- LOAD CUSTOM CSS ---
34
+ def load_css():
35
+ css_file = os.path.join(os.path.dirname(__file__), "styles.css")
36
+ if os.path.exists(css_file):
37
+ with open(css_file, "r") as f:
38
+ st.markdown(f"<style>{f.read()}</style>", unsafe_allow_html=True)
39
+
40
+ load_css()
41
+
42
+ # --- SESSION STATE INIT ---
43
+ if "messages" not in st.session_state:
44
+ st.session_state.messages = []
45
+ if "extraction_count" not in st.session_state:
46
+ st.session_state.extraction_count = 0
47
+ if "pages_processed" not in st.session_state:
48
+ st.session_state.pages_processed = 0
49
+
50
+ # Load API Key
51
+ api_key = os.getenv("GEMINI_API_KEY")
52
+ if api_key and GENAI_AVAILABLE:
53
+ os.environ["LANGEXTRACT_API_KEY"] = api_key
54
+
55
+ # --- HELPER FUNCTIONS ---
56
+ def add_message(role: str, content: str, msg_type: str = "text"):
57
+ """Add a message to the chat history"""
58
+ st.session_state.messages.append({
59
+ "role": role,
60
+ "content": content,
61
+ "type": msg_type,
62
+ "timestamp": datetime.now().strftime("%H:%M")
63
+ })
64
+
65
+ def render_chat_message(message):
66
+ """Render a single chat message with styling"""
67
+ role = message["role"]
68
+ content = message["content"]
69
+ timestamp = message.get("timestamp", "")
70
+
71
+ if role == "user":
72
+ avatar = "👤"
73
+ bubble_class = "user"
74
+ else:
75
+ avatar = "🔍"
76
+ bubble_class = "assistant"
77
+
78
+ html = f"""
79
+ <div class="chat-message {role}">
80
+ <div class="chat-avatar {role}">{avatar}</div>
81
+ <div class="chat-bubble {bubble_class}">
82
+ <div style="margin-bottom: 4px;">{content}</div>
83
+ <div style="font-size: 0.7rem; opacity: 0.6; text-align: right;">{timestamp}</div>
84
+ </div>
85
+ </div>
86
+ """
87
+ return html
88
+
89
+ def render_result_card(title, content, icon="📄"):
90
+ """Render an extraction result card"""
91
+ html = f"""
92
+ <div class="result-card">
93
+ <div class="result-card-header">
94
+ <div class="result-card-icon">{icon}</div>
95
+ <div class="result-card-title">{title}</div>
96
+ </div>
97
+ <div class="result-card-content">{content}</div>
98
+ <span class="evidence-tag">✓ Source Verified</span>
99
+ </div>
100
+ """
101
+ return html
102
+
103
+ # --- SIDEBAR ---
104
+ with st.sidebar:
105
+ # Logo / Brand
106
+ st.markdown("""
107
+ <div style="text-align: center; padding: 1rem 0 2rem 0;">
108
+ <div style="font-size: 3rem; margin-bottom: 0.5rem;">🔍</div>
109
+ <div style="font-family: 'Outfit', sans-serif; font-size: 1.5rem; font-weight: 700;
110
+ background: linear-gradient(135deg, #f1f5f9 0%, #6366f1 100%);
111
+ -webkit-background-clip: text; -webkit-text-fill-color: transparent;">
112
+ DocuTrace
113
+ </div>
114
+ <div style="font-size: 0.8rem; color: #64748b; margin-top: 0.25rem;">
115
+ Verifiable AI Auditor
116
+ </div>
117
+ </div>
118
+ """, unsafe_allow_html=True)
119
+
120
+ st.markdown("### ⚙️ Configuration")
121
+
122
+ # Model Selection with icons
123
+ model_choice = st.selectbox(
124
+ "AI Model",
125
+ ["gemini-2.5-flash", "gemini-1.5-flash"],
126
+ help="Select the Gemini model for extraction"
127
+ )
128
+
129
+ # Status indicators
130
+ all_deps_ready = GENAI_AVAILABLE and LANGEXTRACT_AVAILABLE and PYPDF_AVAILABLE and api_key
131
+
132
+ if all_deps_ready:
133
+ st.markdown("""
134
+ <div style="display: flex; align-items: center; gap: 8px; padding: 0.75rem;
135
+ background: rgba(16, 185, 129, 0.15); border-radius: 8px; margin: 1rem 0;">
136
+ <div style="width: 8px; height: 8px; background: #10b981; border-radius: 50%;"></div>
137
+ <span style="color: #10b981; font-size: 0.85rem;">System Ready</span>
138
+ </div>
139
+ """, unsafe_allow_html=True)
140
+ else:
141
+ missing = []
142
+ if not api_key:
143
+ missing.append("API Key")
144
+ if not GENAI_AVAILABLE:
145
+ missing.append("google-generativeai")
146
+ if not LANGEXTRACT_AVAILABLE:
147
+ missing.append("langextract")
148
+ if not PYPDF_AVAILABLE:
149
+ missing.append("pypdf")
150
+
151
+ st.markdown(f"""
152
+ <div style="display: flex; align-items: center; gap: 8px; padding: 0.75rem;
153
+ background: rgba(245, 158, 11, 0.15); border-radius: 8px; margin: 1rem 0;">
154
+ <div style="width: 8px; height: 8px; background: #f59e0b; border-radius: 50%;"></div>
155
+ <span style="color: #f59e0b; font-size: 0.85rem;">Demo Mode</span>
156
+ </div>
157
+ """, unsafe_allow_html=True)
158
+
159
+ st.divider()
160
+
161
+ # File Upload
162
+ st.markdown("### 📁 Document")
163
+ uploaded_file = st.file_uploader(
164
+ "Upload PDF",
165
+ type=["pdf"],
166
+ help="Upload your document for analysis"
167
+ )
168
+
169
+ st.divider()
170
+
171
+ # Stats
172
+ st.markdown("### 📊 Session Stats")
173
+ col1, col2 = st.columns(2)
174
+ with col1:
175
+ st.markdown(f"""
176
+ <div class="metric-container">
177
+ <div class="metric-value">{st.session_state.extraction_count}</div>
178
+ <div class="metric-label">Extractions</div>
179
+ </div>
180
+ """, unsafe_allow_html=True)
181
+ with col2:
182
+ st.markdown(f"""
183
+ <div class="metric-container">
184
+ <div class="metric-value">{st.session_state.pages_processed}</div>
185
+ <div class="metric-label">Pages</div>
186
+ </div>
187
+ """, unsafe_allow_html=True)
188
+
189
+ st.divider()
190
+
191
+ # Footer
192
+ st.markdown("""
193
+ <div style="text-align: center; padding: 1rem 0; color: #64748b; font-size: 0.75rem;">
194
+ Powered by <strong>Google LangExtract</strong><br/>
195
+ & Gemini AI
196
+ </div>
197
+ """, unsafe_allow_html=True)
198
+
199
+ # --- MAIN CONTENT ---
200
+ # Hero Section
201
+ st.markdown("""
202
+ <div class="hero-section">
203
+ <h1 class="hero-title">📜 DocuTrace AI</h1>
204
+ <p class="hero-subtitle">Extract structured data from documents with <strong>Source Grounding</strong> & verifiable evidence</p>
205
+ <div class="hero-badge">
206
+ <span>●</span> Production Ready
207
+ </div>
208
+ </div>
209
+ """, unsafe_allow_html=True)
210
+
211
+ # Main content area
212
+ if uploaded_file:
213
+ # Read PDF
214
+ if PYPDF_AVAILABLE:
215
+ with st.spinner(""):
216
+ try:
217
+ reader = PdfReader(uploaded_file)
218
+ text = ""
219
+ page_count = min(5, len(reader.pages))
220
+ for i in range(page_count):
221
+ text += reader.pages[i].extract_text() + "\n"
222
+
223
+ st.session_state.pages_processed = page_count
224
+
225
+ # Success notification
226
+ st.markdown(f"""
227
+ <div style="display: flex; align-items: center; gap: 12px; padding: 1rem 1.5rem;
228
+ background: rgba(16, 185, 129, 0.1); border: 1px solid rgba(16, 185, 129, 0.3);
229
+ border-radius: 12px; margin-bottom: 1.5rem;">
230
+ <span style="font-size: 1.5rem;">✓</span>
231
+ <div>
232
+ <div style="color: #10b981; font-weight: 600;">Document Loaded</div>
233
+ <div style="color: #94a3b8; font-size: 0.85rem;">
234
+ {len(reader.pages)} pages total • Analyzing first {page_count} pages
235
+ </div>
236
+ </div>
237
+ </div>
238
+ """, unsafe_allow_html=True)
239
+
240
+ except Exception as e:
241
+ st.error(f"Error reading PDF: {e}")
242
+ st.stop()
243
+ else:
244
+ st.warning("PyPDF module not installed. PDF parsing disabled.")
245
+ text = ""
246
+
247
+ # Query Section
248
+ st.markdown("### 💬 What would you like to extract?")
249
+
250
+ col1, col2 = st.columns([2, 1])
251
+ with col1:
252
+ topic = st.text_input(
253
+ "Search Topic",
254
+ placeholder="e.g., Risk Factors, Financial Data, Legal Terms...",
255
+ label_visibility="collapsed"
256
+ )
257
+ with col2:
258
+ fields = st.text_input(
259
+ "Fields",
260
+ placeholder="category, summary, impact",
261
+ label_visibility="collapsed"
262
+ )
263
+
264
+ # Chat History Display
265
+ if st.session_state.messages:
266
+ st.markdown("### 📝 Conversation")
267
+ chat_html = '<div class="chat-container">'
268
+ for msg in st.session_state.messages:
269
+ chat_html += render_chat_message(msg)
270
+ chat_html += '</div>'
271
+ st.markdown(chat_html, unsafe_allow_html=True)
272
+
273
+ # Action Button
274
+ col1, col2, col3 = st.columns([1, 1, 1])
275
+ with col2:
276
+ run_audit = st.button("🚀 Run Audit", type="primary", use_container_width=True)
277
+
278
+ if run_audit:
279
+ if not all_deps_ready:
280
+ # Demo mode - show simulated results
281
+ add_message("user", f"Extract **{topic or 'Key Information'}** with fields: {fields or 'auto-detect'}")
282
+ add_message("assistant", "⚠️ Running in Demo Mode. Install required dependencies (google-generativeai, langextract, pypdf) and set GEMINI_API_KEY for full functionality.")
283
+
284
+ # Show demo result cards
285
+ st.markdown("### 🔍 Demo Results")
286
+ st.markdown(render_result_card(
287
+ "Sample Extraction",
288
+ "This is a demonstration of the DocuTrace extraction interface. In production mode with proper dependencies installed, real AI-powered extractions with source grounding would appear here.",
289
+ "📊"
290
+ ), unsafe_allow_html=True)
291
+ st.session_state.extraction_count += 1
292
+ st.rerun()
293
+ elif not topic:
294
+ st.warning("Please enter a search topic")
295
+ else:
296
+ # Full extraction mode
297
+ add_message("user", f"Extract **{topic}** with fields: {fields or 'auto-detect'}")
298
+
299
+ with st.status("🕵️ Analyzing Document...", expanded=True) as status:
300
+ try:
301
+ prompt = f"Extract '{topic}'. Fields: {fields}."
302
+ status.write("🔍 Scanning document content...")
303
+
304
+ examples = [
305
+ lx.data.ExampleData(
306
+ text="The company faces regulatory risks.",
307
+ extractions=[lx.data.Extraction(
308
+ extraction_class="item",
309
+ extraction_text="The company faces regulatory risks",
310
+ attributes={"category": "Legal"}
311
+ )]
312
+ )
313
+ ]
314
+
315
+ status.write("🤖 AI is extracting data...")
316
+
317
+ result = lx.extract(
318
+ text_or_documents=text,
319
+ prompt_description=prompt,
320
+ examples=examples,
321
+ model_id=model_choice
322
+ )
323
+
324
+ extraction_count = len(result.extractions)
325
+ st.session_state.extraction_count += extraction_count
326
+
327
+ status.write(f"✅ Found {extraction_count} items")
328
+ status.write("🎨 Generating evidence visualization...")
329
+ lx.io.save_annotated_documents([result], output_name="data.jsonl", output_dir=".")
330
+ html_obj = lx.visualize("data.jsonl")
331
+ html_content = html_obj.data
332
+
333
+ status.update(label="✅ Audit Complete!", state="complete")
334
+ add_message("assistant", f"Found **{extraction_count}** relevant items for '{topic}'. Evidence is highlighted below with source verification.")
335
+ st.rerun()
336
+
337
+ except Exception as e:
338
+ status.update(label="❌ Error", state="error")
339
+ add_message("assistant", f"⚠️ Extraction failed: {str(e)}")
340
+ st.error(f"Extraction Failed: {e}")
341
+
342
+ # Results Display (after extraction)
343
+ if os.path.exists("data.jsonl") and LANGEXTRACT_AVAILABLE:
344
+ st.markdown("---")
345
+ st.markdown("""
346
+ <div style="display: flex; align-items: center; gap: 12px; margin-bottom: 1rem;">
347
+ <div style="font-size: 1.5rem;">🔍</div>
348
+ <div>
349
+ <div style="font-family: 'Outfit', sans-serif; font-size: 1.25rem; font-weight: 600; color: #f1f5f9;">
350
+ Verified Evidence
351
+ </div>
352
+ <div style="color: #64748b; font-size: 0.85rem;">
353
+ Click highlights to see source text
354
+ </div>
355
+ </div>
356
+ </div>
357
+ """, unsafe_allow_html=True)
358
+
359
+ try:
360
+ html_obj = lx.visualize("data.jsonl")
361
+ components.html(html_obj.data, height=600, scrolling=True)
362
+ except:
363
+ pass
364
+
365
+ else:
366
+ # Empty State
367
+ st.markdown("""
368
+ <div style="text-align: center; padding: 4rem 2rem;">
369
+ <div style="font-size: 4rem; margin-bottom: 1.5rem; opacity: 0.5;">📄</div>
370
+ <div style="font-family: 'Outfit', sans-serif; font-size: 1.5rem; font-weight: 600; color: #94a3b8; margin-bottom: 0.5rem;">
371
+ No Document Loaded
372
+ </div>
373
+ <div style="color: #64748b; max-width: 400px; margin: 0 auto;">
374
+ Upload a PDF from the sidebar to begin extracting structured data with verifiable source grounding.
375
+ </div>
376
+ </div>
377
+ """, unsafe_allow_html=True)
378
+
379
+ # Feature Cards
380
+ st.markdown("<br>", unsafe_allow_html=True)
381
+
382
+ col1, col2, col3 = st.columns(3)
383
+
384
+ with col1:
385
+ st.markdown("""
386
+ <div class="glass-card" style="text-align: center;">
387
+ <div style="font-size: 2.5rem; margin-bottom: 1rem;">🎯</div>
388
+ <div style="font-family: 'Outfit', sans-serif; font-size: 1.1rem; font-weight: 600; color: #f1f5f9; margin-bottom: 0.5rem;">
389
+ Precision Extraction
390
+ </div>
391
+ <div style="color: #64748b; font-size: 0.9rem;">
392
+ Extract exactly what you need with AI-powered document understanding
393
+ </div>
394
+ </div>
395
+ """, unsafe_allow_html=True)
396
+
397
+ with col2:
398
+ st.markdown("""
399
+ <div class="glass-card" style="text-align: center;">
400
+ <div style="font-size: 2.5rem; margin-bottom: 1rem;">✓</div>
401
+ <div style="font-family: 'Outfit', sans-serif; font-size: 1.1rem; font-weight: 600; color: #f1f5f9; margin-bottom: 0.5rem;">
402
+ Source Grounding
403
+ </div>
404
+ <div style="color: #64748b; font-size: 0.9rem;">
405
+ Every extraction is linked to its exact source location in the document
406
+ </div>
407
+ </div>
408
+ """, unsafe_allow_html=True)
409
+
410
+ with col3:
411
+ st.markdown("""
412
+ <div class="glass-card" style="text-align: center;">
413
+ <div style="font-size: 2.5rem; margin-bottom: 1rem;">🔒</div>
414
+ <div style="font-family: 'Outfit', sans-serif; font-size: 1.1rem; font-weight: 600; color: #f1f5f9; margin-bottom: 0.5rem;">
415
+ Enterprise Ready
416
+ </div>
417
+ <div style="color: #64748b; font-size: 0.9rem;">
418
+ Built for high-stakes domains: Legal, Finance, and Compliance
419
+ </div>
420
+ </div>
421
+ """, unsafe_allow_html=True)
requirements.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ streamlit
2
+ google-generativeai
3
+ pypdf
4
+ python-dotenv
5
+ pandas
6
+ git+https://github.com/google/langextract.git
styles.css ADDED
@@ -0,0 +1,695 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /* DocuTrace Premium Theme */
2
+ /* Custom CSS for Streamlit Premium UI */
3
+
4
+ @import url("https://fonts.googleapis.com/css2?family=Inter:wght@300;400;500;600;700&family=Outfit:wght@400;500;600;700;800&family=JetBrains+Mono:wght@400;500&display=swap");
5
+
6
+ /* === ROOT VARIABLES === */
7
+ :root {
8
+ --primary: #6366f1;
9
+ --primary-dark: #4f46e5;
10
+ --primary-light: #818cf8;
11
+ --accent: #f59e0b;
12
+ --accent-glow: #fbbf24;
13
+ --bg-dark: #0a0a1a;
14
+ --bg-gradient-1: #0f0f23;
15
+ --bg-gradient-2: #1a1a2e;
16
+ --surface: rgba(30, 30, 60, 0.6);
17
+ --surface-hover: rgba(40, 40, 80, 0.8);
18
+ --glass: rgba(255, 255, 255, 0.05);
19
+ --glass-border: rgba(255, 255, 255, 0.1);
20
+ --text-primary: #f1f5f9;
21
+ --text-secondary: #94a3b8;
22
+ --text-muted: #64748b;
23
+ --success: #10b981;
24
+ --success-bg: rgba(16, 185, 129, 0.15);
25
+ --error: #ef4444;
26
+ --error-bg: rgba(239, 68, 68, 0.15);
27
+ --warning: #f59e0b;
28
+ --info: #3b82f6;
29
+ --border-radius: 16px;
30
+ --border-radius-sm: 8px;
31
+ --border-radius-lg: 24px;
32
+ --shadow-sm: 0 2px 8px rgba(0, 0, 0, 0.3);
33
+ --shadow-md: 0 4px 20px rgba(0, 0, 0, 0.4);
34
+ --shadow-lg: 0 8px 40px rgba(0, 0, 0, 0.5);
35
+ --shadow-glow: 0 0 40px rgba(99, 102, 241, 0.3);
36
+ --transition-fast: 0.15s ease;
37
+ --transition-normal: 0.3s ease;
38
+ --transition-slow: 0.5s ease;
39
+ }
40
+
41
+ /* === BASE STYLES === */
42
+ .stApp {
43
+ background: linear-gradient(
44
+ 135deg,
45
+ var(--bg-dark) 0%,
46
+ var(--bg-gradient-1) 50%,
47
+ var(--bg-gradient-2) 100%
48
+ ) !important;
49
+ background-attachment: fixed !important;
50
+ }
51
+
52
+ .stApp::before {
53
+ content: "";
54
+ position: fixed;
55
+ top: 0;
56
+ left: 0;
57
+ right: 0;
58
+ bottom: 0;
59
+ background:
60
+ radial-gradient(
61
+ ellipse at 20% 20%,
62
+ rgba(99, 102, 241, 0.15) 0%,
63
+ transparent 50%
64
+ ),
65
+ radial-gradient(
66
+ ellipse at 80% 80%,
67
+ rgba(245, 158, 11, 0.1) 0%,
68
+ transparent 50%
69
+ ),
70
+ radial-gradient(
71
+ ellipse at 50% 50%,
72
+ rgba(139, 92, 246, 0.08) 0%,
73
+ transparent 60%
74
+ );
75
+ pointer-events: none;
76
+ z-index: -1;
77
+ }
78
+
79
+ /* === TYPOGRAPHY === */
80
+ h1,
81
+ h2,
82
+ h3,
83
+ h4,
84
+ h5,
85
+ h6,
86
+ .stMarkdown h1,
87
+ .stMarkdown h2,
88
+ .stMarkdown h3 {
89
+ font-family: "Outfit", sans-serif !important;
90
+ color: var(--text-primary) !important;
91
+ font-weight: 600 !important;
92
+ }
93
+
94
+ p,
95
+ span,
96
+ div,
97
+ label,
98
+ .stMarkdown p {
99
+ font-family: "Inter", sans-serif !important;
100
+ color: var(--text-secondary) !important;
101
+ }
102
+
103
+ code,
104
+ pre,
105
+ .stCode {
106
+ font-family: "JetBrains Mono", monospace !important;
107
+ }
108
+
109
+ /* === CUSTOM SCROLLBAR === */
110
+ ::-webkit-scrollbar {
111
+ width: 8px;
112
+ height: 8px;
113
+ }
114
+
115
+ ::-webkit-scrollbar-track {
116
+ background: var(--bg-gradient-1);
117
+ border-radius: 4px;
118
+ }
119
+
120
+ ::-webkit-scrollbar-thumb {
121
+ background: var(--primary);
122
+ border-radius: 4px;
123
+ }
124
+
125
+ ::-webkit-scrollbar-thumb:hover {
126
+ background: var(--primary-light);
127
+ }
128
+
129
+ /* === SIDEBAR === */
130
+ section[data-testid="stSidebar"] {
131
+ background: linear-gradient(
132
+ 180deg,
133
+ rgba(15, 15, 35, 0.95) 0%,
134
+ rgba(20, 20, 50, 0.98) 100%
135
+ ) !important;
136
+ border-right: 1px solid var(--glass-border) !important;
137
+ backdrop-filter: blur(20px) !important;
138
+ }
139
+
140
+ section[data-testid="stSidebar"] > div {
141
+ padding: 2rem 1.5rem !important;
142
+ }
143
+
144
+ section[data-testid="stSidebar"] .stMarkdown h2,
145
+ section[data-testid="stSidebar"] .stMarkdown h3 {
146
+ color: var(--text-primary) !important;
147
+ font-size: 1.1rem !important;
148
+ text-transform: uppercase;
149
+ letter-spacing: 0.1em;
150
+ margin-bottom: 1.5rem;
151
+ padding-bottom: 0.75rem;
152
+ border-bottom: 2px solid var(--primary);
153
+ display: inline-block;
154
+ }
155
+
156
+ /* === BUTTONS === */
157
+ .stButton > button {
158
+ background: linear-gradient(
159
+ 135deg,
160
+ var(--primary) 0%,
161
+ var(--primary-dark) 100%
162
+ ) !important;
163
+ color: white !important;
164
+ border: none !important;
165
+ border-radius: var(--border-radius) !important;
166
+ padding: 0.75rem 2rem !important;
167
+ font-family: "Inter", sans-serif !important;
168
+ font-weight: 600 !important;
169
+ font-size: 1rem !important;
170
+ letter-spacing: 0.02em;
171
+ box-shadow: var(--shadow-md), var(--shadow-glow) !important;
172
+ transition: all var(--transition-normal) !important;
173
+ position: relative;
174
+ overflow: hidden;
175
+ }
176
+
177
+ .stButton > button::before {
178
+ content: "";
179
+ position: absolute;
180
+ top: 0;
181
+ left: -100%;
182
+ width: 100%;
183
+ height: 100%;
184
+ background: linear-gradient(
185
+ 90deg,
186
+ transparent,
187
+ rgba(255, 255, 255, 0.2),
188
+ transparent
189
+ );
190
+ transition: left 0.5s ease;
191
+ }
192
+
193
+ .stButton > button:hover::before {
194
+ left: 100%;
195
+ }
196
+
197
+ .stButton > button:hover {
198
+ transform: translateY(-2px) !important;
199
+ box-shadow:
200
+ var(--shadow-lg),
201
+ 0 0 60px rgba(99, 102, 241, 0.4) !important;
202
+ }
203
+
204
+ .stButton > button:active {
205
+ transform: translateY(0) !important;
206
+ }
207
+
208
+ /* Primary button variant */
209
+ .stButton > button[kind="primary"] {
210
+ background: linear-gradient(
211
+ 135deg,
212
+ var(--accent) 0%,
213
+ #d97706 100%
214
+ ) !important;
215
+ box-shadow:
216
+ var(--shadow-md),
217
+ 0 0 40px rgba(245, 158, 11, 0.3) !important;
218
+ }
219
+
220
+ .stButton > button[kind="primary"]:hover {
221
+ box-shadow:
222
+ var(--shadow-lg),
223
+ 0 0 60px rgba(245, 158, 11, 0.5) !important;
224
+ }
225
+
226
+ /* === FILE UPLOADER === */
227
+ section[data-testid="stFileUploader"] {
228
+ background: var(--surface) !important;
229
+ border: 2px dashed var(--glass-border) !important;
230
+ border-radius: var(--border-radius) !important;
231
+ padding: 2rem !important;
232
+ transition: all var(--transition-normal) !important;
233
+ }
234
+
235
+ section[data-testid="stFileUploader"]:hover {
236
+ border-color: var(--primary) !important;
237
+ background: var(--surface-hover) !important;
238
+ box-shadow: var(--shadow-glow) !important;
239
+ }
240
+
241
+ section[data-testid="stFileUploader"] label {
242
+ color: var(--text-primary) !important;
243
+ font-weight: 500 !important;
244
+ }
245
+
246
+ /* === TEXT INPUTS === */
247
+ .stTextInput > div > div {
248
+ background: var(--surface) !important;
249
+ border: 1px solid var(--glass-border) !important;
250
+ border-radius: var(--border-radius-sm) !important;
251
+ transition: all var(--transition-fast) !important;
252
+ }
253
+
254
+ .stTextInput > div > div:focus-within {
255
+ border-color: var(--primary) !important;
256
+ box-shadow: 0 0 0 3px rgba(99, 102, 241, 0.2) !important;
257
+ }
258
+
259
+ .stTextInput input {
260
+ color: var(--text-primary) !important;
261
+ font-family: "Inter", sans-serif !important;
262
+ }
263
+
264
+ .stTextInput input::placeholder {
265
+ color: var(--text-muted) !important;
266
+ }
267
+
268
+ /* === SELECT BOXES === */
269
+ .stSelectbox > div > div {
270
+ background: var(--surface) !important;
271
+ border: 1px solid var(--glass-border) !important;
272
+ border-radius: var(--border-radius-sm) !important;
273
+ }
274
+
275
+ .stSelectbox [data-baseweb="select"] {
276
+ background: transparent !important;
277
+ }
278
+
279
+ .stSelectbox [data-baseweb="select"] > div {
280
+ background: var(--surface) !important;
281
+ border-color: var(--glass-border) !important;
282
+ }
283
+
284
+ /* === ALERTS & INFO BOXES === */
285
+ .stAlert {
286
+ background: var(--surface) !important;
287
+ border: 1px solid var(--glass-border) !important;
288
+ border-radius: var(--border-radius) !important;
289
+ backdrop-filter: blur(10px) !important;
290
+ }
291
+
292
+ div[data-testid="stNotification"] {
293
+ background: var(--surface) !important;
294
+ border-left: 4px solid var(--primary) !important;
295
+ border-radius: var(--border-radius-sm) !important;
296
+ }
297
+
298
+ .stSuccess {
299
+ background: var(--success-bg) !important;
300
+ border-color: var(--success) !important;
301
+ }
302
+
303
+ .stError {
304
+ background: var(--error-bg) !important;
305
+ border-color: var(--error) !important;
306
+ }
307
+
308
+ /* === EXPANDER === */
309
+ .streamlit-expanderHeader {
310
+ background: var(--surface) !important;
311
+ border: 1px solid var(--glass-border) !important;
312
+ border-radius: var(--border-radius-sm) !important;
313
+ color: var(--text-primary) !important;
314
+ font-weight: 500 !important;
315
+ }
316
+
317
+ /* === DIVIDER === */
318
+ .stDivider {
319
+ border-color: var(--glass-border) !important;
320
+ }
321
+
322
+ hr {
323
+ border-color: var(--glass-border) !important;
324
+ opacity: 0.5;
325
+ }
326
+
327
+ /* === STATUS WIDGET === */
328
+ div[data-testid="stStatusWidget"] {
329
+ background: var(--surface) !important;
330
+ border: 1px solid var(--glass-border) !important;
331
+ border-radius: var(--border-radius) !important;
332
+ backdrop-filter: blur(10px) !important;
333
+ }
334
+
335
+ /* === SPINNER === */
336
+ .stSpinner > div {
337
+ border-color: var(--primary) transparent transparent transparent !important;
338
+ }
339
+
340
+ /* === CHAT MESSAGE STYLES === */
341
+ .chat-container {
342
+ display: flex;
343
+ flex-direction: column;
344
+ gap: 1rem;
345
+ padding: 1rem 0;
346
+ }
347
+
348
+ .chat-message {
349
+ display: flex;
350
+ gap: 0.75rem;
351
+ animation: fadeInUp 0.3s ease;
352
+ }
353
+
354
+ .chat-message.user {
355
+ flex-direction: row-reverse;
356
+ }
357
+
358
+ .chat-avatar {
359
+ width: 40px;
360
+ height: 40px;
361
+ border-radius: 50%;
362
+ display: flex;
363
+ align-items: center;
364
+ justify-content: center;
365
+ font-size: 1.25rem;
366
+ flex-shrink: 0;
367
+ }
368
+
369
+ .chat-avatar.user {
370
+ background: linear-gradient(
371
+ 135deg,
372
+ var(--primary) 0%,
373
+ var(--primary-dark) 100%
374
+ );
375
+ }
376
+
377
+ .chat-avatar.assistant {
378
+ background: linear-gradient(135deg, var(--accent) 0%, #d97706 100%);
379
+ }
380
+
381
+ .chat-bubble {
382
+ max-width: 80%;
383
+ padding: 1rem 1.25rem;
384
+ border-radius: var(--border-radius);
385
+ line-height: 1.6;
386
+ }
387
+
388
+ .chat-bubble.user {
389
+ background: linear-gradient(
390
+ 135deg,
391
+ var(--primary) 0%,
392
+ var(--primary-dark) 100%
393
+ );
394
+ color: white;
395
+ border-bottom-right-radius: 4px;
396
+ }
397
+
398
+ .chat-bubble.assistant {
399
+ background: var(--surface);
400
+ border: 1px solid var(--glass-border);
401
+ color: var(--text-primary);
402
+ border-bottom-left-radius: 4px;
403
+ backdrop-filter: blur(10px);
404
+ }
405
+
406
+ /* === RESULT CARDS === */
407
+ .result-card {
408
+ background: var(--surface);
409
+ border: 1px solid var(--glass-border);
410
+ border-radius: var(--border-radius);
411
+ padding: 1.5rem;
412
+ margin-bottom: 1rem;
413
+ backdrop-filter: blur(10px);
414
+ transition: all var(--transition-normal);
415
+ animation: fadeInUp 0.4s ease;
416
+ }
417
+
418
+ .result-card:hover {
419
+ border-color: var(--primary);
420
+ box-shadow: var(--shadow-glow);
421
+ transform: translateY(-2px);
422
+ }
423
+
424
+ .result-card-header {
425
+ display: flex;
426
+ align-items: center;
427
+ gap: 0.75rem;
428
+ margin-bottom: 1rem;
429
+ padding-bottom: 0.75rem;
430
+ border-bottom: 1px solid var(--glass-border);
431
+ }
432
+
433
+ .result-card-icon {
434
+ width: 36px;
435
+ height: 36px;
436
+ border-radius: 10px;
437
+ display: flex;
438
+ align-items: center;
439
+ justify-content: center;
440
+ background: linear-gradient(
441
+ 135deg,
442
+ var(--primary) 0%,
443
+ var(--primary-dark) 100%
444
+ );
445
+ font-size: 1.1rem;
446
+ }
447
+
448
+ .result-card-title {
449
+ font-family: "Outfit", sans-serif;
450
+ font-size: 1.1rem;
451
+ font-weight: 600;
452
+ color: var(--text-primary);
453
+ }
454
+
455
+ .result-card-content {
456
+ color: var(--text-secondary);
457
+ line-height: 1.7;
458
+ }
459
+
460
+ .evidence-tag {
461
+ display: inline-block;
462
+ padding: 0.25rem 0.75rem;
463
+ background: var(--success-bg);
464
+ color: var(--success);
465
+ border-radius: 20px;
466
+ font-size: 0.75rem;
467
+ font-weight: 500;
468
+ margin-top: 0.75rem;
469
+ }
470
+
471
+ /* === HERO SECTION === */
472
+ .hero-section {
473
+ text-align: center;
474
+ padding: 3rem 2rem;
475
+ margin-bottom: 2rem;
476
+ background: var(--surface);
477
+ border: 1px solid var(--glass-border);
478
+ border-radius: var(--border-radius-lg);
479
+ backdrop-filter: blur(20px);
480
+ position: relative;
481
+ overflow: hidden;
482
+ }
483
+
484
+ .hero-section::before {
485
+ content: "";
486
+ position: absolute;
487
+ top: -50%;
488
+ left: -50%;
489
+ width: 200%;
490
+ height: 200%;
491
+ background: conic-gradient(
492
+ from 0deg at 50% 50%,
493
+ transparent,
494
+ var(--primary),
495
+ transparent,
496
+ var(--accent),
497
+ transparent
498
+ );
499
+ animation: rotate 20s linear infinite;
500
+ opacity: 0.1;
501
+ }
502
+
503
+ .hero-title {
504
+ font-family: "Outfit", sans-serif;
505
+ font-size: 3rem;
506
+ font-weight: 800;
507
+ background: linear-gradient(
508
+ 135deg,
509
+ var(--text-primary) 0%,
510
+ var(--primary-light) 50%,
511
+ var(--accent) 100%
512
+ );
513
+ -webkit-background-clip: text;
514
+ -webkit-text-fill-color: transparent;
515
+ background-clip: text;
516
+ margin-bottom: 0.5rem;
517
+ position: relative;
518
+ z-index: 1;
519
+ }
520
+
521
+ .hero-subtitle {
522
+ font-size: 1.1rem;
523
+ color: var(--text-secondary);
524
+ position: relative;
525
+ z-index: 1;
526
+ }
527
+
528
+ .hero-badge {
529
+ display: inline-flex;
530
+ align-items: center;
531
+ gap: 0.5rem;
532
+ padding: 0.5rem 1rem;
533
+ background: var(--success-bg);
534
+ color: var(--success);
535
+ border-radius: 20px;
536
+ font-size: 0.85rem;
537
+ font-weight: 500;
538
+ margin-top: 1rem;
539
+ position: relative;
540
+ z-index: 1;
541
+ }
542
+
543
+ /* === STATS BAR === */
544
+ .stats-bar {
545
+ display: flex;
546
+ justify-content: center;
547
+ gap: 3rem;
548
+ padding: 1.5rem;
549
+ background: var(--surface);
550
+ border: 1px solid var(--glass-border);
551
+ border-radius: var(--border-radius);
552
+ margin-bottom: 2rem;
553
+ backdrop-filter: blur(10px);
554
+ }
555
+
556
+ .stat-item {
557
+ text-align: center;
558
+ }
559
+
560
+ .stat-value {
561
+ font-family: "Outfit", sans-serif;
562
+ font-size: 1.75rem;
563
+ font-weight: 700;
564
+ color: var(--primary-light);
565
+ }
566
+
567
+ .stat-label {
568
+ font-size: 0.85rem;
569
+ color: var(--text-muted);
570
+ text-transform: uppercase;
571
+ letter-spacing: 0.05em;
572
+ }
573
+
574
+ /* === ANIMATIONS === */
575
+ @keyframes fadeInUp {
576
+ from {
577
+ opacity: 0;
578
+ transform: translateY(20px);
579
+ }
580
+ to {
581
+ opacity: 1;
582
+ transform: translateY(0);
583
+ }
584
+ }
585
+
586
+ @keyframes rotate {
587
+ from {
588
+ transform: rotate(0deg);
589
+ }
590
+ to {
591
+ transform: rotate(360deg);
592
+ }
593
+ }
594
+
595
+ @keyframes pulse {
596
+ 0%,
597
+ 100% {
598
+ opacity: 1;
599
+ }
600
+ 50% {
601
+ opacity: 0.5;
602
+ }
603
+ }
604
+
605
+ @keyframes shimmer {
606
+ 0% {
607
+ background-position: -200% 0;
608
+ }
609
+ 100% {
610
+ background-position: 200% 0;
611
+ }
612
+ }
613
+
614
+ .loading-shimmer {
615
+ background: linear-gradient(
616
+ 90deg,
617
+ var(--surface) 25%,
618
+ var(--surface-hover) 50%,
619
+ var(--surface) 75%
620
+ );
621
+ background-size: 200% 100%;
622
+ animation: shimmer 1.5s infinite;
623
+ }
624
+
625
+ /* === TYPING INDICATOR === */
626
+ .typing-indicator {
627
+ display: flex;
628
+ gap: 4px;
629
+ padding: 1rem;
630
+ }
631
+
632
+ .typing-dot {
633
+ width: 8px;
634
+ height: 8px;
635
+ background: var(--primary);
636
+ border-radius: 50%;
637
+ animation: pulse 1s infinite;
638
+ }
639
+
640
+ .typing-dot:nth-child(2) {
641
+ animation-delay: 0.2s;
642
+ }
643
+
644
+ .typing-dot:nth-child(3) {
645
+ animation-delay: 0.4s;
646
+ }
647
+
648
+ /* === GLASSMORPHISM CARD === */
649
+ .glass-card {
650
+ background: var(--surface);
651
+ border: 1px solid var(--glass-border);
652
+ border-radius: var(--border-radius);
653
+ padding: 1.5rem;
654
+ backdrop-filter: blur(20px);
655
+ box-shadow: var(--shadow-md);
656
+ }
657
+
658
+ /* === METRIC DISPLAY === */
659
+ .metric-container {
660
+ background: var(--surface);
661
+ border: 1px solid var(--glass-border);
662
+ border-radius: var(--border-radius);
663
+ padding: 1.25rem;
664
+ text-align: center;
665
+ transition: all var(--transition-normal);
666
+ }
667
+
668
+ .metric-container:hover {
669
+ border-color: var(--primary);
670
+ box-shadow: var(--shadow-glow);
671
+ }
672
+
673
+ .metric-value {
674
+ font-family: "Outfit", sans-serif;
675
+ font-size: 2rem;
676
+ font-weight: 700;
677
+ color: var(--primary-light);
678
+ }
679
+
680
+ .metric-label {
681
+ font-size: 0.9rem;
682
+ color: var(--text-muted);
683
+ margin-top: 0.25rem;
684
+ }
685
+
686
+ /* === HIDE STREAMLIT BRANDING === */
687
+ #MainMenu {
688
+ visibility: hidden;
689
+ }
690
+ footer {
691
+ visibility: hidden;
692
+ }
693
+ header {
694
+ visibility: hidden;
695
+ }