Spaces:
Sleeping
Sleeping
Commit ·
78046e4
0
Parent(s):
Configure Hugging Face Space
Browse files- .dockerignore +21 -0
- .env.example +6 -0
- .gitattributes +20 -0
- .gitignore +44 -0
- Dockerfile +37 -0
- LICENSE +21 -0
- README.md +23 -0
- SECURITY.md +27 -0
.dockerignore
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
.env
|
| 2 |
+
.env.*
|
| 3 |
+
.git
|
| 4 |
+
.gitignore
|
| 5 |
+
|
| 6 |
+
motor_policy_venv/
|
| 7 |
+
.venv/
|
| 8 |
+
venv/
|
| 9 |
+
__pycache__/
|
| 10 |
+
.pytest_cache/
|
| 11 |
+
|
| 12 |
+
ui/node_modules/
|
| 13 |
+
ui/dist/
|
| 14 |
+
|
| 15 |
+
output/
|
| 16 |
+
src/output/
|
| 17 |
+
docs/*.pdf
|
| 18 |
+
|
| 19 |
+
*.log
|
| 20 |
+
.DS_Store
|
| 21 |
+
Thumbs.db
|
.env.example
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Required: create a Groq API key at https://console.groq.com/keys
|
| 2 |
+
GROQ_API_KEY="replace_with_your_groq_api_key"
|
| 3 |
+
|
| 4 |
+
# Optional model overrides
|
| 5 |
+
# GROQ_MODEL="meta-llama/llama-4-scout-17b-16e-instruct"
|
| 6 |
+
# GROQ_CLASSIFIER_MODEL="llama-3.1-8b-instant"
|
.gitattributes
ADDED
|
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
* text=auto
|
| 2 |
+
|
| 3 |
+
*.py text eol=lf
|
| 4 |
+
*.ts text eol=lf
|
| 5 |
+
*.tsx text eol=lf
|
| 6 |
+
*.js text eol=lf
|
| 7 |
+
*.json text eol=lf
|
| 8 |
+
*.md text eol=lf
|
| 9 |
+
*.yaml text eol=lf
|
| 10 |
+
*.yml text eol=lf
|
| 11 |
+
*.css text eol=lf
|
| 12 |
+
*.html text eol=lf
|
| 13 |
+
Dockerfile text eol=lf
|
| 14 |
+
|
| 15 |
+
*.pdf binary
|
| 16 |
+
*.png binary
|
| 17 |
+
*.jpg binary
|
| 18 |
+
*.jpeg binary
|
| 19 |
+
*.gif binary
|
| 20 |
+
*.webp binary
|
.gitignore
ADDED
|
@@ -0,0 +1,44 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Secrets and local environment
|
| 2 |
+
.env
|
| 3 |
+
.env.*
|
| 4 |
+
!.env.example
|
| 5 |
+
|
| 6 |
+
# Python
|
| 7 |
+
__pycache__/
|
| 8 |
+
*.py[cod]
|
| 9 |
+
*.pyo
|
| 10 |
+
.pytest_cache/
|
| 11 |
+
.ruff_cache/
|
| 12 |
+
.mypy_cache/
|
| 13 |
+
.coverage
|
| 14 |
+
htmlcov/
|
| 15 |
+
motor_policy_venv/
|
| 16 |
+
.venv/
|
| 17 |
+
venv/
|
| 18 |
+
|
| 19 |
+
# Node / Vite
|
| 20 |
+
ui/node_modules/
|
| 21 |
+
ui/dist/
|
| 22 |
+
ui/.vite/
|
| 23 |
+
npm-debug.log*
|
| 24 |
+
yarn-debug.log*
|
| 25 |
+
yarn-error.log*
|
| 26 |
+
|
| 27 |
+
# Generated runtime output
|
| 28 |
+
output/
|
| 29 |
+
src/output/
|
| 30 |
+
*.log
|
| 31 |
+
|
| 32 |
+
# Private source documents
|
| 33 |
+
docs/*.pdf
|
| 34 |
+
|
| 35 |
+
# OS / editor noise
|
| 36 |
+
.DS_Store
|
| 37 |
+
Thumbs.db
|
| 38 |
+
.vscode/*
|
| 39 |
+
!.vscode/extensions.json
|
| 40 |
+
!.vscode/settings.json.example
|
| 41 |
+
|
| 42 |
+
# Large model/cache artifacts
|
| 43 |
+
.cache/
|
| 44 |
+
models/
|
Dockerfile
ADDED
|
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# syntax=docker/dockerfile:1
|
| 2 |
+
|
| 3 |
+
FROM node:20-slim AS ui-build
|
| 4 |
+
|
| 5 |
+
WORKDIR /app/ui
|
| 6 |
+
COPY ui/package*.json ./
|
| 7 |
+
RUN npm ci
|
| 8 |
+
COPY ui/ ./
|
| 9 |
+
RUN npm run build
|
| 10 |
+
|
| 11 |
+
|
| 12 |
+
FROM python:3.11-slim AS app
|
| 13 |
+
|
| 14 |
+
ENV PYTHONUNBUFFERED=1 \
|
| 15 |
+
PORT=7860
|
| 16 |
+
|
| 17 |
+
WORKDIR /app
|
| 18 |
+
|
| 19 |
+
RUN apt-get update \
|
| 20 |
+
&& apt-get install -y --no-install-recommends build-essential curl \
|
| 21 |
+
&& rm -rf /var/lib/apt/lists/*
|
| 22 |
+
|
| 23 |
+
COPY requirements.txt ./
|
| 24 |
+
RUN pip install --no-cache-dir -r requirements.txt \
|
| 25 |
+
&& python -m spacy download en_core_web_sm
|
| 26 |
+
|
| 27 |
+
COPY config/ ./config/
|
| 28 |
+
COPY src/ ./src/
|
| 29 |
+
COPY scripts/ ./scripts/
|
| 30 |
+
COPY sample_data/ ./sample_data/
|
| 31 |
+
COPY --from=ui-build /app/ui/dist ./ui/dist
|
| 32 |
+
|
| 33 |
+
RUN mkdir -p output/sessions output/debug
|
| 34 |
+
|
| 35 |
+
EXPOSE 7860
|
| 36 |
+
|
| 37 |
+
CMD ["sh", "-c", "uvicorn api:app --app-dir src --host 0.0.0.0 --port ${PORT:-7860}"]
|
LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
MIT License
|
| 2 |
+
|
| 3 |
+
Copyright (c) 2026 Teja Sagiraju
|
| 4 |
+
|
| 5 |
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
| 6 |
+
of this software and associated documentation files (the "Software"), to deal
|
| 7 |
+
in the Software without restriction, including without limitation the rights
|
| 8 |
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
| 9 |
+
copies of the Software, and to permit persons to whom the Software is
|
| 10 |
+
furnished to do so, subject to the following conditions:
|
| 11 |
+
|
| 12 |
+
The above copyright notice and this permission notice shall be included in all
|
| 13 |
+
copies or substantial portions of the Software.
|
| 14 |
+
|
| 15 |
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
| 16 |
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
| 17 |
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
| 18 |
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
| 19 |
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
| 20 |
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
| 21 |
+
SOFTWARE.
|
README.md
ADDED
|
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
---
|
| 2 |
+
title: PolicyTrace
|
| 3 |
+
sdk: docker
|
| 4 |
+
app_port: 7860
|
| 5 |
+
pinned: false
|
| 6 |
+
license: mit
|
| 7 |
+
---
|
| 8 |
+
|
| 9 |
+
# PolicyTrace
|
| 10 |
+
|
| 11 |
+
PolicyTrace is a Document AI workflow for UK motor insurance PDFs.
|
| 12 |
+
|
| 13 |
+
It uses a FastAPI backend and React review UI to extract a structured Golden Record, match fields back to source PDF locations, and support human review.
|
| 14 |
+
|
| 15 |
+
GitHub repository:
|
| 16 |
+
|
| 17 |
+
https://github.com/AItoolstack/ai-policytrace
|
| 18 |
+
|
| 19 |
+
Demo PDFs are available in the GitHub repo under:
|
| 20 |
+
|
| 21 |
+
sample_data/policytrace_demo_pack/
|
| 22 |
+
|
| 23 |
+
For public demos, use only synthetic or redacted PDFs.
|
SECURITY.md
ADDED
|
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Security And Privacy
|
| 2 |
+
|
| 3 |
+
PolicyTrace is designed for document extraction workflows that may involve sensitive personal data.
|
| 4 |
+
|
| 5 |
+
## Public Demo Rules
|
| 6 |
+
|
| 7 |
+
- Use only synthetic or redacted PDFs.
|
| 8 |
+
- Do not commit real customer documents.
|
| 9 |
+
- Do not commit `.env` or API keys.
|
| 10 |
+
- Do not commit `output/`, session folders, debug artifacts, or generated extraction JSON from real documents.
|
| 11 |
+
- Rotate any key that was ever stored in a local file before publishing the repository.
|
| 12 |
+
|
| 13 |
+
## Secrets
|
| 14 |
+
|
| 15 |
+
Local development uses `.env`, which is ignored by git.
|
| 16 |
+
|
| 17 |
+
Production deployments should use platform secrets:
|
| 18 |
+
|
| 19 |
+
- Hugging Face Spaces: Space Secrets
|
| 20 |
+
- GitHub Actions: Repository Secrets
|
| 21 |
+
- Cloud platforms: managed secret stores
|
| 22 |
+
|
| 23 |
+
## Uploaded Files
|
| 24 |
+
|
| 25 |
+
The local API stores uploaded PDFs under `output/sessions/`. Session folders are deleted on startup when they are older than `pipeline.session_ttl_days`.
|
| 26 |
+
|
| 27 |
+
For production use, add authentication, stronger retention controls, audit logging, and external storage policies before accepting real documents.
|