Spaces:
Running
Running
Merge branch 'dev' into Hugging-Face-Token-Input
Browse files- .github/ISSUE_TEMPLATE/bug_report.yml +1 -1
- .github/ISSUE_TEMPLATE/feature_request.yml +1 -1
- .github/workflows/sync-issue-labels.yml +164 -0
- README.md +20 -2
- backend/app/auth.py +35 -7
- backend/app/config.py +4 -1
- backend/app/database.py +1 -0
- backend/app/models.py +132 -14
- backend/app/rag/agent.py +13 -21
- backend/app/rag/retriever.py +149 -8
- backend/app/routes/admin.py +17 -4
- backend/app/routes/auth.py +2 -1
- backend/app/routes/chat.py +6 -4
- backend/app/routes/documents.py +135 -3
- backend/app/schemas.py +7 -0
- backend/migrate_add_role.py +39 -0
- backend/requirements.txt +3 -1
- backend/scripts/migrate_sqlite_to_postgres.py +524 -0
- backend/tests/test_auth.py +7 -0
- backend/tests/test_chat.py +32 -1
- backend/tests/test_retriever.py +77 -0
- frontend/package-lock.json +26 -1
- frontend/package.json +1 -0
- frontend/src/app/dashboard/page.tsx +66 -70
- frontend/src/app/globals.css +1 -0
- frontend/src/app/page.tsx +12 -2
- frontend/src/app/privacy/page.tsx +450 -0
- frontend/src/app/terms/page.tsx +435 -0
- frontend/src/components/document/PDFViewer.tsx +78 -25
- frontend/src/components/layout/Header.tsx +150 -70
.github/ISSUE_TEMPLATE/bug_report.yml
CHANGED
|
@@ -59,7 +59,7 @@ body:
|
|
| 59 |
- type: checkboxes
|
| 60 |
id: gssoc
|
| 61 |
attributes:
|
| 62 |
-
label: "GSSoC '
|
| 63 |
description: "Are you a GSSoC contributor?"
|
| 64 |
options:
|
| 65 |
- label: "Yes, I am participating in GirlScript Summer of Code and would like to fix this."
|
|
|
|
| 59 |
- type: checkboxes
|
| 60 |
id: gssoc
|
| 61 |
attributes:
|
| 62 |
+
label: "GSSoC '26"
|
| 63 |
description: "Are you a GSSoC contributor?"
|
| 64 |
options:
|
| 65 |
- label: "Yes, I am participating in GirlScript Summer of Code and would like to fix this."
|
.github/ISSUE_TEMPLATE/feature_request.yml
CHANGED
|
@@ -42,7 +42,7 @@ body:
|
|
| 42 |
- type: checkboxes
|
| 43 |
id: gssoc
|
| 44 |
attributes:
|
| 45 |
-
label: "GSSoC '
|
| 46 |
description: "Are you a GSSoC contributor?"
|
| 47 |
options:
|
| 48 |
- label: "Yes, I am participating in GirlScript Summer of Code and would like to build this."
|
|
|
|
| 42 |
- type: checkboxes
|
| 43 |
id: gssoc
|
| 44 |
attributes:
|
| 45 |
+
label: "GSSoC '26"
|
| 46 |
description: "Are you a GSSoC contributor?"
|
| 47 |
options:
|
| 48 |
- label: "Yes, I am participating in GirlScript Summer of Code and would like to build this."
|
.github/workflows/sync-issue-labels.yml
ADDED
|
@@ -0,0 +1,164 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
name: Sync Labels β Issue to PR
|
| 2 |
+
|
| 3 |
+
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 4 |
+
# Auto-syncs labels from referenced issue(s) to the PR when
|
| 5 |
+
# a PR is opened or updated targeting `dev`.
|
| 6 |
+
#
|
| 7 |
+
# Why pull_request_target:
|
| 8 |
+
# Label operations need write permissions on the target
|
| 9 |
+
# repo. pull_request_target runs in the context of the
|
| 10 |
+
# base repo with access to secrets and write token.
|
| 11 |
+
# Since we only read issue data and apply labels, there
|
| 12 |
+
# is no security concern.
|
| 13 |
+
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 14 |
+
|
| 15 |
+
on:
|
| 16 |
+
pull_request_target:
|
| 17 |
+
types: [closed]
|
| 18 |
+
branches: ["dev"]
|
| 19 |
+
|
| 20 |
+
permissions:
|
| 21 |
+
contents: read
|
| 22 |
+
issues: read
|
| 23 |
+
pull-requests: write
|
| 24 |
+
|
| 25 |
+
jobs:
|
| 26 |
+
sync-labels:
|
| 27 |
+
name: Sync labels from referenced issue
|
| 28 |
+
runs-on: ubuntu-latest
|
| 29 |
+
if: github.event.pull_request.merged == true
|
| 30 |
+
|
| 31 |
+
steps:
|
| 32 |
+
- name: Extract issue numbers from PR body
|
| 33 |
+
id: extract
|
| 34 |
+
env:
|
| 35 |
+
PR_BODY: ${{ github.event.pull_request.body }}
|
| 36 |
+
run: |
|
| 37 |
+
# Match patterns:
|
| 38 |
+
# "Closes #123"
|
| 39 |
+
# "Fixes #456, #789" (comma-separated)
|
| 40 |
+
# "Resolves #111, #222, #333"
|
| 41 |
+
#
|
| 42 |
+
# Approach: grab lines containing a keyword, then
|
| 43 |
+
# extract every NNN from those lines.
|
| 44 |
+
# We place '|| true' at the very end of the pipeline so it doesn't short-circuit.
|
| 45 |
+
ISSUES=$(
|
| 46 |
+
echo "${PR_BODY:-}" \
|
| 47 |
+
| grep -ioE '.*(closes|fixes|resolves).*' \
|
| 48 |
+
| grep -oE '#[0-9]+' \
|
| 49 |
+
| grep -oE '[0-9]+' \
|
| 50 |
+
| sort -un \
|
| 51 |
+
| xargs \
|
| 52 |
+
|| true
|
| 53 |
+
)
|
| 54 |
+
echo "Found issues: [$ISSUES]"
|
| 55 |
+
echo "issues=$ISSUES" >> "$GITHUB_OUTPUT"
|
| 56 |
+
|
| 57 |
+
- name: Fetch and apply labels
|
| 58 |
+
env:
|
| 59 |
+
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
| 60 |
+
ISSUES: ${{ steps.extract.outputs.issues }}
|
| 61 |
+
PR_NUMBER: ${{ github.event.pull_request.number }}
|
| 62 |
+
REPO: ${{ github.repository }}
|
| 63 |
+
run: |
|
| 64 |
+
set -euo pipefail
|
| 65 |
+
|
| 66 |
+
ALL_LABELS="gssoc"$'\n'"gssoc:approved"$'\n'"mentor:param20h"$'\n'
|
| 67 |
+
|
| 68 |
+
for ISSUE in $ISSUES; do
|
| 69 |
+
echo "--- Fetching labels for #$ISSUE ---"
|
| 70 |
+
|
| 71 |
+
LABELS=$(gh issue view "$ISSUE" --repo "$REPO" --json labels --jq '.labels[].name' 2>/dev/null || true)
|
| 72 |
+
|
| 73 |
+
if [ -z "$LABELS" ]; then
|
| 74 |
+
echo " β No labels on #$ISSUE, skipping"
|
| 75 |
+
continue
|
| 76 |
+
fi
|
| 77 |
+
|
| 78 |
+
echo " β Labels: $(echo "$LABELS" | tr '\n' ' ')"
|
| 79 |
+
|
| 80 |
+
# Accumulate labels (newline-separated, deduplicated later)
|
| 81 |
+
ALL_LABELS="${ALL_LABELS}${LABELS}"$'\n'
|
| 82 |
+
done
|
| 83 |
+
|
| 84 |
+
if [ -z "$ALL_LABELS" ]; then
|
| 85 |
+
echo "No labels to apply. Exiting."
|
| 86 |
+
exit 0
|
| 87 |
+
fi
|
| 88 |
+
|
| 89 |
+
# Deduplicate and remove empty lines
|
| 90 |
+
UNIQUE_LABELS=$(echo "$ALL_LABELS" | sort -u | grep -v '^$')
|
| 91 |
+
|
| 92 |
+
echo ""
|
| 93 |
+
echo "=== Applying labels to PR #$PR_NUMBER ==="
|
| 94 |
+
echo "$UNIQUE_LABELS"
|
| 95 |
+
|
| 96 |
+
# Get labels already on the PR
|
| 97 |
+
EXISTING=$(gh pr view "$PR_NUMBER" --repo "$REPO" --json labels --jq '.labels[].name' 2>/dev/null || true)
|
| 98 |
+
|
| 99 |
+
MISSING=0
|
| 100 |
+
while IFS= read -r LABEL; do
|
| 101 |
+
[ -z "$LABEL" ] && continue
|
| 102 |
+
if echo "$EXISTING" | grep -qxF "$LABEL"; then
|
| 103 |
+
echo " β Already present: $LABEL"
|
| 104 |
+
else
|
| 105 |
+
echo " + Adding: $LABEL"
|
| 106 |
+
gh label create "$LABEL" --repo "$REPO" 2>/dev/null || true # create if not exists
|
| 107 |
+
gh pr edit "$PR_NUMBER" --repo "$REPO" --add-label "$LABEL"
|
| 108 |
+
MISSING=$((MISSING + 1))
|
| 109 |
+
fi
|
| 110 |
+
done <<< "$UNIQUE_LABELS"
|
| 111 |
+
|
| 112 |
+
if [ "$MISSING" -eq 0 ]; then
|
| 113 |
+
echo "All labels already synced β nothing to add."
|
| 114 |
+
else
|
| 115 |
+
echo "Done. Added $MISSING label(s) to PR #$PR_NUMBER."
|
| 116 |
+
fi
|
| 117 |
+
|
| 118 |
+
- name: Calculate GSSoC Points and Comment
|
| 119 |
+
env:
|
| 120 |
+
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
| 121 |
+
PR_NUMBER: ${{ github.event.pull_request.number }}
|
| 122 |
+
REPO: ${{ github.repository }}
|
| 123 |
+
run: |
|
| 124 |
+
set -euo pipefail
|
| 125 |
+
|
| 126 |
+
echo "Calculating GSSoC points for PR #$PR_NUMBER..."
|
| 127 |
+
|
| 128 |
+
# Fetch all labels currently on the PR (including the ones we just synced)
|
| 129 |
+
PR_LABELS=$(gh pr view "$PR_NUMBER" --repo "$REPO" --json labels --jq '.labels[].name' 2>/dev/null || true)
|
| 130 |
+
|
| 131 |
+
POINTS=0
|
| 132 |
+
|
| 133 |
+
while IFS= read -r LABEL; do
|
| 134 |
+
[ -z "$LABEL" ] && continue
|
| 135 |
+
case "$LABEL" in
|
| 136 |
+
"level:beginner") POINTS=$((POINTS + 20)) ;;
|
| 137 |
+
"level:intermediate") POINTS=$((POINTS + 35)) ;;
|
| 138 |
+
"level:advanced") POINTS=$((POINTS + 55)) ;;
|
| 139 |
+
"level:critical") POINTS=$((POINTS + 80)) ;;
|
| 140 |
+
"type:accessibility") POINTS=$((POINTS + 15)) ;;
|
| 141 |
+
"type:bug") POINTS=$((POINTS + 10)) ;;
|
| 142 |
+
"type:design") POINTS=$((POINTS + 10)) ;;
|
| 143 |
+
"type:devops") POINTS=$((POINTS + 15)) ;;
|
| 144 |
+
"type:docs") POINTS=$((POINTS + 5)) ;;
|
| 145 |
+
"type:feature") POINTS=$((POINTS + 10)) ;;
|
| 146 |
+
"type:performance") POINTS=$((POINTS + 15)) ;;
|
| 147 |
+
"type:refactor") POINTS=$((POINTS + 10)) ;;
|
| 148 |
+
"type:security") POINTS=$((POINTS + 20)) ;;
|
| 149 |
+
"type:testing") POINTS=$((POINTS + 10)) ;;
|
| 150 |
+
esac
|
| 151 |
+
done <<< "$PR_LABELS"
|
| 152 |
+
|
| 153 |
+
echo "Total Points Calculated: $POINTS"
|
| 154 |
+
|
| 155 |
+
if [ "$POINTS" -gt 0 ]; then
|
| 156 |
+
printf -v COMMENT "π **Congratulations on getting your Pull Request merged!** π\n\nThank you for contributing to PDF-Assistant-RAG as part of GSSoC '26! π\n\nKeep up the great work! β¨"
|
| 157 |
+
|
| 158 |
+
# Post the comment to the PR
|
| 159 |
+
echo "Posting comment..."
|
| 160 |
+
gh pr comment "$PR_NUMBER" --repo "$REPO" --body "$COMMENT"
|
| 161 |
+
else
|
| 162 |
+
echo "No scorable labels found. No comment posted."
|
| 163 |
+
fi
|
| 164 |
+
|
README.md
CHANGED
|
@@ -362,7 +362,25 @@ CHROMA_PERSIST_DIR=./data/chroma_db
|
|
| 362 |
|
| 363 |
> Get your free HuggingFace token at [huggingface.co/settings/tokens](https://huggingface.co/settings/tokens)
|
| 364 |
|
| 365 |
-
### 3.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 366 |
|
| 367 |
Open **two terminals**:
|
| 368 |
|
|
@@ -384,7 +402,7 @@ npm run dev
|
|
| 384 |
# β App running at http://localhost:3000
|
| 385 |
```
|
| 386 |
|
| 387 |
-
###
|
| 388 |
|
| 389 |
```bash
|
| 390 |
docker compose up --build
|
|
|
|
| 362 |
|
| 363 |
> Get your free HuggingFace token at [huggingface.co/settings/tokens](https://huggingface.co/settings/tokens)
|
| 364 |
|
| 365 |
+
### 3. Set up crawl4ai (URL Upload Feature)
|
| 366 |
+
|
| 367 |
+
The URL upload feature (`POST /api/v1/documents/urlupload`) uses **crawl4ai** with a Playwright browser to crawl web pages. `crawl4ai-setup` handles the Playwright browser installation automatically β run it once after `pip install`:
|
| 368 |
+
|
| 369 |
+
```bash
|
| 370 |
+
crawl4ai-setup
|
| 371 |
+
```
|
| 372 |
+
|
| 373 |
+
> **Linux / Docker users:** If Chromium fails to launch due to missing system libraries, also run:
|
| 374 |
+
> ```bash
|
| 375 |
+
> playwright install-deps chromium
|
| 376 |
+
> ```
|
| 377 |
+
> This installs OS-level dependencies (libnss, libatk, etc.) on fresh Ubuntu/Debian servers.
|
| 378 |
+
|
| 379 |
+
> **Windows users:** No extra steps β the `NotImplementedError` (SelectorEventLoop + subprocess) is already handled in the backend automatically.
|
| 380 |
+
|
| 381 |
+
---
|
| 382 |
+
|
| 383 |
+
### 4. Run Locally
|
| 384 |
|
| 385 |
Open **two terminals**:
|
| 386 |
|
|
|
|
| 402 |
# β App running at http://localhost:3000
|
| 403 |
```
|
| 404 |
|
| 405 |
+
### 5. Run with Docker
|
| 406 |
|
| 407 |
```bash
|
| 408 |
docker compose up --build
|
backend/app/auth.py
CHANGED
|
@@ -12,7 +12,7 @@ from sqlalchemy.orm import Session
|
|
| 12 |
|
| 13 |
from app.config import get_settings
|
| 14 |
from app.database import get_db
|
| 15 |
-
from app.models import User
|
| 16 |
|
| 17 |
settings = get_settings()
|
| 18 |
security = HTTPBearer()
|
|
@@ -30,10 +30,10 @@ def verify_password(plain: str, hashed: str) -> bool:
|
|
| 30 |
|
| 31 |
# ββ JWT Token ββββββββββββββββββββββββββββββββββββββββ
|
| 32 |
|
| 33 |
-
def create_access_token(user_id
|
| 34 |
"""Create a JWT access token with user_id as the subject."""
|
| 35 |
payload = {
|
| 36 |
-
"sub": user_id,
|
| 37 |
"type": "access",
|
| 38 |
"exp": datetime.now(timezone.utc) + timedelta(minutes=settings.JWT_ACCESS_EXPIRY_MINUTES),
|
| 39 |
"iat": datetime.now(timezone.utc),
|
|
@@ -41,10 +41,10 @@ def create_access_token(user_id: str) -> str:
|
|
| 41 |
return jwt.encode(payload, settings.SECRET_KEY, algorithm=settings.JWT_ALGORITHM)
|
| 42 |
|
| 43 |
|
| 44 |
-
def create_refresh_token(user_id
|
| 45 |
"""Create a JWT refresh token with user_id as the subject."""
|
| 46 |
payload = {
|
| 47 |
-
"sub": user_id,
|
| 48 |
"type": "refresh",
|
| 49 |
"exp": datetime.now(timezone.utc) + timedelta(days=settings.JWT_REFRESH_EXPIRY_DAYS),
|
| 50 |
"iat": datetime.now(timezone.utc),
|
|
@@ -120,11 +120,39 @@ def get_current_user(
|
|
| 120 |
return user
|
| 121 |
|
| 122 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 123 |
def get_admin_user(user: User = Depends(get_current_user)) -> User:
|
| 124 |
-
"""
|
| 125 |
-
|
|
|
|
|
|
|
|
|
|
| 126 |
raise HTTPException(
|
| 127 |
status_code=status.HTTP_403_FORBIDDEN,
|
| 128 |
detail="Admin access required",
|
| 129 |
)
|
| 130 |
return user
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 12 |
|
| 13 |
from app.config import get_settings
|
| 14 |
from app.database import get_db
|
| 15 |
+
from app.models import User, UserRole
|
| 16 |
|
| 17 |
settings = get_settings()
|
| 18 |
security = HTTPBearer()
|
|
|
|
| 30 |
|
| 31 |
# ββ JWT Token ββββββββββββββββββββββββββββββββββββββββ
|
| 32 |
|
| 33 |
+
def create_access_token(user_id) -> str:
|
| 34 |
"""Create a JWT access token with user_id as the subject."""
|
| 35 |
payload = {
|
| 36 |
+
"sub": str(user_id),
|
| 37 |
"type": "access",
|
| 38 |
"exp": datetime.now(timezone.utc) + timedelta(minutes=settings.JWT_ACCESS_EXPIRY_MINUTES),
|
| 39 |
"iat": datetime.now(timezone.utc),
|
|
|
|
| 41 |
return jwt.encode(payload, settings.SECRET_KEY, algorithm=settings.JWT_ALGORITHM)
|
| 42 |
|
| 43 |
|
| 44 |
+
def create_refresh_token(user_id) -> str:
|
| 45 |
"""Create a JWT refresh token with user_id as the subject."""
|
| 46 |
payload = {
|
| 47 |
+
"sub": str(user_id),
|
| 48 |
"type": "refresh",
|
| 49 |
"exp": datetime.now(timezone.utc) + timedelta(days=settings.JWT_REFRESH_EXPIRY_DAYS),
|
| 50 |
"iat": datetime.now(timezone.utc),
|
|
|
|
| 120 |
return user
|
| 121 |
|
| 122 |
|
| 123 |
+
def _is_admin_user(user: User) -> bool:
|
| 124 |
+
"""
|
| 125 |
+
Check if a user has administrative privileges.
|
| 126 |
+
Supports both the modern 'role' field and the legacy 'is_admin' boolean.
|
| 127 |
+
"""
|
| 128 |
+
if not user:
|
| 129 |
+
return False
|
| 130 |
+
|
| 131 |
+
# We check the role first (it can be an Enum or a plain string depending on the environment)
|
| 132 |
+
role_check = (user.role == UserRole.admin) or (str(user.role) == "admin")
|
| 133 |
+
|
| 134 |
+
# Fallback to the legacy is_admin flag
|
| 135 |
+
return role_check or bool(user.is_admin)
|
| 136 |
+
|
| 137 |
+
|
| 138 |
def get_admin_user(user: User = Depends(get_current_user)) -> User:
|
| 139 |
+
"""
|
| 140 |
+
FastAPI dependency that restricts access to administrators only.
|
| 141 |
+
Raises 403 Forbidden if the user lacks sufficient permissions.
|
| 142 |
+
"""
|
| 143 |
+
if not _is_admin_user(user):
|
| 144 |
raise HTTPException(
|
| 145 |
status_code=status.HTTP_403_FORBIDDEN,
|
| 146 |
detail="Admin access required",
|
| 147 |
)
|
| 148 |
return user
|
| 149 |
+
|
| 150 |
+
|
| 151 |
+
async def get_current_admin(
|
| 152 |
+
current_user: User = Depends(get_current_user)
|
| 153 |
+
) -> User:
|
| 154 |
+
"""
|
| 155 |
+
Alias for get_admin_user to maintain compatibility with existing routes.
|
| 156 |
+
Ensures the requesting user has administrative rights.
|
| 157 |
+
"""
|
| 158 |
+
return get_admin_user(current_user)
|
backend/app/config.py
CHANGED
|
@@ -33,7 +33,10 @@ class Settings(BaseSettings):
|
|
| 33 |
".docx": [
|
| 34 |
"application/vnd.openxmlformats-officedocument.wordprocessingml.document",
|
| 35 |
"application/zip",
|
| 36 |
-
]
|
|
|
|
|
|
|
|
|
|
| 37 |
}
|
| 38 |
|
| 39 |
# ββ RAG Pipeline βββββββββββββββββββββββββββββββββββββ
|
|
|
|
| 33 |
".docx": [
|
| 34 |
"application/vnd.openxmlformats-officedocument.wordprocessingml.document",
|
| 35 |
"application/zip",
|
| 36 |
+
],
|
| 37 |
+
".txt": ["text/plain"],
|
| 38 |
+
".md": ["text/markdown"],
|
| 39 |
+
|
| 40 |
}
|
| 41 |
|
| 42 |
# ββ RAG Pipeline βββββββββββββββββββββββββββββββββββββ
|
backend/app/database.py
CHANGED
|
@@ -48,6 +48,7 @@ def _migrate_schema():
|
|
| 48 |
|
| 49 |
migrations = [
|
| 50 |
("users", "hf_token", "ALTER TABLE users ADD COLUMN hf_token VARCHAR(255)"),
|
|
|
|
| 51 |
]
|
| 52 |
|
| 53 |
for table, column, ddl in migrations:
|
|
|
|
| 48 |
|
| 49 |
migrations = [
|
| 50 |
("users", "hf_token", "ALTER TABLE users ADD COLUMN hf_token VARCHAR(255)"),
|
| 51 |
+
("users", "role", "ALTER TABLE users ADD COLUMN role VARCHAR(20) DEFAULT 'user'"),
|
| 52 |
]
|
| 53 |
|
| 54 |
for table, column, ddl in migrations:
|
backend/app/models.py
CHANGED
|
@@ -2,27 +2,128 @@
|
|
| 2 |
SQLAlchemy ORM models for users, documents, and chat messages.
|
| 3 |
"""
|
| 4 |
import uuid
|
|
|
|
|
|
|
|
|
|
| 5 |
from datetime import datetime, timezone
|
|
|
|
|
|
|
| 6 |
from sqlalchemy import Column, String, Integer, DateTime, ForeignKey, Text, Boolean
|
|
|
|
|
|
|
|
|
|
|
|
|
| 7 |
from sqlalchemy.orm import relationship
|
|
|
|
| 8 |
from app.database import Base
|
| 9 |
|
| 10 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 11 |
def generate_uuid():
|
|
|
|
| 12 |
return str(uuid.uuid4())
|
| 13 |
|
| 14 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 15 |
class User(Base):
|
|
|
|
|
|
|
|
|
|
|
|
|
| 16 |
__tablename__ = "users"
|
| 17 |
|
| 18 |
-
id = Column(
|
| 19 |
username = Column(String(80), unique=True, nullable=False, index=True)
|
| 20 |
email = Column(String(120), unique=True, nullable=False, index=True)
|
| 21 |
hashed_password = Column(String(255), nullable=False)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 22 |
is_admin = Column(Boolean, default=False)
|
|
|
|
| 23 |
created_at = Column(DateTime, default=lambda: datetime.now(timezone.utc))
|
| 24 |
last_login = Column(DateTime, nullable=True, index=True)
|
| 25 |
-
hf_token = Column(
|
| 26 |
|
| 27 |
# Relationships
|
| 28 |
documents = relationship("Document", back_populates="owner", cascade="all, delete-orphan")
|
|
@@ -31,10 +132,13 @@ class User(Base):
|
|
| 31 |
|
| 32 |
|
| 33 |
class ApiKey(Base):
|
|
|
|
|
|
|
|
|
|
| 34 |
__tablename__ = "api_keys"
|
| 35 |
|
| 36 |
-
id = Column(
|
| 37 |
-
user_id = Column(
|
| 38 |
key_prefix = Column(String(10), nullable=False)
|
| 39 |
hashed_key = Column(String(255), nullable=False, unique=True, index=True)
|
| 40 |
created_at = Column(DateTime, default=lambda: datetime.now(timezone.utc))
|
|
@@ -45,19 +149,27 @@ class ApiKey(Base):
|
|
| 45 |
|
| 46 |
|
| 47 |
class Document(Base):
|
|
|
|
|
|
|
|
|
|
| 48 |
__tablename__ = "documents"
|
| 49 |
|
| 50 |
-
id = Column(
|
| 51 |
-
user_id = Column(
|
| 52 |
filename = Column(String(255), nullable=False) # Stored filename (UUID-based)
|
| 53 |
original_name = Column(String(255), nullable=False) # User's original filename
|
| 54 |
file_size = Column(Integer, default=0) # Size in bytes
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 55 |
page_count = Column(Integer, default=0)
|
| 56 |
chunk_count = Column(Integer, default=0)
|
| 57 |
-
status = Column(String(20), default="pending")
|
| 58 |
error_message = Column(Text, nullable=True)
|
| 59 |
uploaded_at = Column(DateTime, default=lambda: datetime.now(timezone.utc))
|
| 60 |
-
summary = Column(Text, nullable=True)
|
| 61 |
|
| 62 |
# Relationships
|
| 63 |
owner = relationship("User", back_populates="documents")
|
|
@@ -65,14 +177,17 @@ class Document(Base):
|
|
| 65 |
|
| 66 |
|
| 67 |
class ChatMessage(Base):
|
|
|
|
|
|
|
|
|
|
| 68 |
__tablename__ = "chat_messages"
|
| 69 |
|
| 70 |
-
id = Column(
|
| 71 |
-
user_id = Column(
|
| 72 |
-
document_id = Column(
|
| 73 |
role = Column(String(20), nullable=False) # "user" | "assistant"
|
| 74 |
content = Column(Text, nullable=False)
|
| 75 |
-
sources_json = Column(Text, nullable=True) # JSON
|
| 76 |
created_at = Column(DateTime, default=lambda: datetime.now(timezone.utc))
|
| 77 |
|
| 78 |
# Relationships
|
|
@@ -82,10 +197,13 @@ class ChatMessage(Base):
|
|
| 82 |
|
| 83 |
|
| 84 |
class SharedMessage(Base):
|
|
|
|
|
|
|
|
|
|
| 85 |
__tablename__ = "shared_messages"
|
| 86 |
|
| 87 |
-
id = Column(
|
| 88 |
-
message_id = Column(
|
| 89 |
created_at = Column(DateTime, default=lambda: datetime.now(timezone.utc))
|
| 90 |
|
| 91 |
# Relationships
|
|
|
|
| 2 |
SQLAlchemy ORM models for users, documents, and chat messages.
|
| 3 |
"""
|
| 4 |
import uuid
|
| 5 |
+
import enum
|
| 6 |
+
import base64
|
| 7 |
+
import hashlib
|
| 8 |
from datetime import datetime, timezone
|
| 9 |
+
|
| 10 |
+
from cryptography.fernet import Fernet
|
| 11 |
from sqlalchemy import Column, String, Integer, DateTime, ForeignKey, Text, Boolean
|
| 12 |
+
from sqlalchemy.types import TypeDecorator, CHAR
|
| 13 |
+
from sqlalchemy.dialects.postgresql import UUID as PG_UUID
|
| 14 |
+
from sqlalchemy import Column, String, Integer, DateTime, ForeignKey, Text, Boolean, Enum as SQLAlchemyEnum
|
| 15 |
+
from sqlalchemy.types import TypeDecorator
|
| 16 |
from sqlalchemy.orm import relationship
|
| 17 |
+
|
| 18 |
from app.database import Base
|
| 19 |
|
| 20 |
|
| 21 |
+
class GUID(TypeDecorator):
|
| 22 |
+
"""Platform-independent GUID type.
|
| 23 |
+
Uses PostgreSQL's UUID type, otherwise uses CHAR(36).
|
| 24 |
+
"""
|
| 25 |
+
impl = CHAR
|
| 26 |
+
cache_ok = True
|
| 27 |
+
|
| 28 |
+
def load_dialect_impl(self, dialect):
|
| 29 |
+
if dialect.name == 'postgresql':
|
| 30 |
+
return dialect.type_descriptor(PG_UUID(as_uuid=True))
|
| 31 |
+
else:
|
| 32 |
+
return dialect.type_descriptor(CHAR(36))
|
| 33 |
+
|
| 34 |
+
def process_bind_param(self, value, dialect):
|
| 35 |
+
if value is None:
|
| 36 |
+
return value
|
| 37 |
+
if isinstance(value, uuid.UUID):
|
| 38 |
+
return value if dialect.name == 'postgresql' else str(value)
|
| 39 |
+
try:
|
| 40 |
+
val_uuid = uuid.UUID(value)
|
| 41 |
+
return val_uuid if dialect.name == 'postgresql' else str(val_uuid)
|
| 42 |
+
except ValueError:
|
| 43 |
+
if dialect.name == 'postgresql':
|
| 44 |
+
return uuid.UUID(int=0)
|
| 45 |
+
return value
|
| 46 |
+
|
| 47 |
+
def process_result_value(self, value, dialect):
|
| 48 |
+
if value is None:
|
| 49 |
+
return value
|
| 50 |
+
return str(value)
|
| 51 |
+
|
| 52 |
+
|
| 53 |
+
class EncryptedString(TypeDecorator):
|
| 54 |
+
"""
|
| 55 |
+
A custom SQLAlchemy type that transparently encrypts strings in the database
|
| 56 |
+
using Fernet (AES). This ensures sensitive tokens aren't stored in plain text
|
| 57 |
+
while remaining easily accessible in code.
|
| 58 |
+
"""
|
| 59 |
+
impl = Text
|
| 60 |
+
cache_ok = False
|
| 61 |
+
|
| 62 |
+
def _get_cipher(self):
|
| 63 |
+
from app.config import get_settings
|
| 64 |
+
settings = get_settings()
|
| 65 |
+
# Derive a 32-byte key from the SECRET_KEY for Fernet encryption
|
| 66 |
+
key = base64.urlsafe_b64encode(hashlib.sha256(settings.SECRET_KEY.encode()).digest())
|
| 67 |
+
return Fernet(key)
|
| 68 |
+
|
| 69 |
+
def process_bind_param(self, value, dialect):
|
| 70 |
+
"""Encrypt the value before saving to the database."""
|
| 71 |
+
if value is None:
|
| 72 |
+
return value
|
| 73 |
+
cipher = self._get_cipher()
|
| 74 |
+
return cipher.encrypt(value.encode()).decode()
|
| 75 |
+
|
| 76 |
+
def process_result_value(self, value, dialect):
|
| 77 |
+
"""Decrypt the value after reading from the database."""
|
| 78 |
+
if value is None:
|
| 79 |
+
return value
|
| 80 |
+
cipher = self._get_cipher()
|
| 81 |
+
try:
|
| 82 |
+
return cipher.decrypt(value.encode()).decode()
|
| 83 |
+
except Exception:
|
| 84 |
+
# Fallback for unencrypted data or if decryption fails
|
| 85 |
+
return value
|
| 86 |
+
|
| 87 |
+
|
| 88 |
def generate_uuid():
|
| 89 |
+
"""Generates a standard unique string identifier for database records."""
|
| 90 |
return str(uuid.uuid4())
|
| 91 |
|
| 92 |
|
| 93 |
+
class UserRole(str, enum.Enum):
|
| 94 |
+
"""
|
| 95 |
+
Defines the available user roles for Role-Based Access Control (RBAC).
|
| 96 |
+
- 'admin': Full access to system statistics and user management.
|
| 97 |
+
- 'user': Standard access for uploading and chatting with documents.
|
| 98 |
+
"""
|
| 99 |
+
user = "user"
|
| 100 |
+
admin = "admin"
|
| 101 |
+
|
| 102 |
+
|
| 103 |
class User(Base):
|
| 104 |
+
"""
|
| 105 |
+
Represents a registered user within the system.
|
| 106 |
+
Supports both legacy 'is_admin' flags and the modern 'role' enum for permissions.
|
| 107 |
+
"""
|
| 108 |
__tablename__ = "users"
|
| 109 |
|
| 110 |
+
id = Column(GUID, primary_key=True, default=uuid.uuid4)
|
| 111 |
username = Column(String(80), unique=True, nullable=False, index=True)
|
| 112 |
email = Column(String(120), unique=True, nullable=False, index=True)
|
| 113 |
hashed_password = Column(String(255), nullable=False)
|
| 114 |
+
|
| 115 |
+
# Permission fields: transitioning towards 'role', keeping 'is_admin' for compatibility
|
| 116 |
+
role = Column(
|
| 117 |
+
SQLAlchemyEnum(UserRole),
|
| 118 |
+
default=UserRole.user,
|
| 119 |
+
nullable=False,
|
| 120 |
+
server_default="user"
|
| 121 |
+
)
|
| 122 |
is_admin = Column(Boolean, default=False)
|
| 123 |
+
|
| 124 |
created_at = Column(DateTime, default=lambda: datetime.now(timezone.utc))
|
| 125 |
last_login = Column(DateTime, nullable=True, index=True)
|
| 126 |
+
hf_token = Column(EncryptedString, nullable=True)
|
| 127 |
|
| 128 |
# Relationships
|
| 129 |
documents = relationship("Document", back_populates="owner", cascade="all, delete-orphan")
|
|
|
|
| 132 |
|
| 133 |
|
| 134 |
class ApiKey(Base):
|
| 135 |
+
"""
|
| 136 |
+
Stores secure hashes of API keys used for programmatic interaction with the system.
|
| 137 |
+
"""
|
| 138 |
__tablename__ = "api_keys"
|
| 139 |
|
| 140 |
+
id = Column(GUID, primary_key=True, default=uuid.uuid4)
|
| 141 |
+
user_id = Column(GUID, ForeignKey("users.id"), nullable=False, index=True)
|
| 142 |
key_prefix = Column(String(10), nullable=False)
|
| 143 |
hashed_key = Column(String(255), nullable=False, unique=True, index=True)
|
| 144 |
created_at = Column(DateTime, default=lambda: datetime.now(timezone.utc))
|
|
|
|
| 149 |
|
| 150 |
|
| 151 |
class Document(Base):
|
| 152 |
+
"""
|
| 153 |
+
Metadata and processing status for files uploaded by users.
|
| 154 |
+
"""
|
| 155 |
__tablename__ = "documents"
|
| 156 |
|
| 157 |
+
id = Column(GUID, primary_key=True, default=uuid.uuid4)
|
| 158 |
+
user_id = Column(GUID, ForeignKey("users.id"), nullable=False, index=True)
|
| 159 |
filename = Column(String(255), nullable=False) # Stored filename (UUID-based)
|
| 160 |
original_name = Column(String(255), nullable=False) # User's original filename
|
| 161 |
file_size = Column(Integer, default=0) # Size in bytes
|
| 162 |
+
id = Column(String, primary_key=True, default=generate_uuid)
|
| 163 |
+
user_id = Column(String, ForeignKey("users.id"), nullable=False, index=True)
|
| 164 |
+
filename = Column(String(255), nullable=False) # Internal UUID-based filename
|
| 165 |
+
original_name = Column(String(255), nullable=False) # Original name for user display
|
| 166 |
+
file_size = Column(Integer, default=0) # Size in bytes
|
| 167 |
page_count = Column(Integer, default=0)
|
| 168 |
chunk_count = Column(Integer, default=0)
|
| 169 |
+
status = Column(String(20), default="pending") # pending | processing | ready | failed
|
| 170 |
error_message = Column(Text, nullable=True)
|
| 171 |
uploaded_at = Column(DateTime, default=lambda: datetime.now(timezone.utc))
|
| 172 |
+
summary = Column(Text, nullable=True)
|
| 173 |
|
| 174 |
# Relationships
|
| 175 |
owner = relationship("User", back_populates="documents")
|
|
|
|
| 177 |
|
| 178 |
|
| 179 |
class ChatMessage(Base):
|
| 180 |
+
"""
|
| 181 |
+
Persistent log of conversations between users and the AI analyst.
|
| 182 |
+
"""
|
| 183 |
__tablename__ = "chat_messages"
|
| 184 |
|
| 185 |
+
id = Column(GUID, primary_key=True, default=uuid.uuid4)
|
| 186 |
+
user_id = Column(GUID, ForeignKey("users.id"), nullable=False, index=True)
|
| 187 |
+
document_id = Column(GUID, ForeignKey("documents.id"), nullable=True, index=True)
|
| 188 |
role = Column(String(20), nullable=False) # "user" | "assistant"
|
| 189 |
content = Column(Text, nullable=False)
|
| 190 |
+
sources_json = Column(Text, nullable=True) # JSON representation of retrieved sources
|
| 191 |
created_at = Column(DateTime, default=lambda: datetime.now(timezone.utc))
|
| 192 |
|
| 193 |
# Relationships
|
|
|
|
| 197 |
|
| 198 |
|
| 199 |
class SharedMessage(Base):
|
| 200 |
+
"""
|
| 201 |
+
Links specific chat messages to public sharing URLs.
|
| 202 |
+
"""
|
| 203 |
__tablename__ = "shared_messages"
|
| 204 |
|
| 205 |
+
id = Column(GUID, primary_key=True, default=uuid.uuid4)
|
| 206 |
+
message_id = Column(GUID, ForeignKey("chat_messages.id"), nullable=False, unique=True, index=True)
|
| 207 |
created_at = Column(DateTime, default=lambda: datetime.now(timezone.utc))
|
| 208 |
|
| 209 |
# Relationships
|
backend/app/rag/agent.py
CHANGED
|
@@ -15,21 +15,11 @@ from app.rag.tracing import trace_function
|
|
| 15 |
logger = logging.getLogger(__name__)
|
| 16 |
settings = get_settings()
|
| 17 |
|
| 18 |
-
|
| 19 |
-
|
| 20 |
-
|
| 21 |
-
|
| 22 |
-
|
| 23 |
-
"""Get or create HuggingFace InferenceClient (singleton)."""
|
| 24 |
-
global _llm_client
|
| 25 |
-
|
| 26 |
-
if _llm_client is None:
|
| 27 |
-
_llm_client = InferenceClient(
|
| 28 |
-
token=settings.HF_TOKEN,
|
| 29 |
-
)
|
| 30 |
-
logger.info(f"LLM client initialized for model: {settings.LLM_MODEL}")
|
| 31 |
-
|
| 32 |
-
return _llm_client
|
| 33 |
|
| 34 |
|
| 35 |
def is_greeting(question: str) -> bool:
|
|
@@ -68,7 +58,7 @@ def _chat_messages(system: str, user_content: str) -> list:
|
|
| 68 |
|
| 69 |
@trace_function(
|
| 70 |
"generate_answer",
|
| 71 |
-
metadata_factory=lambda question, user_id, document_id=None: {
|
| 72 |
"user_id": user_id,
|
| 73 |
"document_id": document_id,
|
| 74 |
"llm_model": settings.LLM_MODEL,
|
|
@@ -78,13 +68,14 @@ def generate_answer(
|
|
| 78 |
question: str,
|
| 79 |
user_id: str,
|
| 80 |
document_id: Optional[str] = None,
|
|
|
|
| 81 |
) -> Dict[str, Any]:
|
| 82 |
"""
|
| 83 |
Full RAG pipeline: retrieve β build context β generate answer.
|
| 84 |
Returns dict with 'answer' and 'sources'.
|
| 85 |
"""
|
| 86 |
-
# Get HuggingFace InferenceClient
|
| 87 |
-
client = get_llm_client()
|
| 88 |
|
| 89 |
# ββ Handle greetings βββββββββββββββββββββββββββββ
|
| 90 |
# Short-circuit: if user just says "hello", skip RAG entirely
|
|
@@ -156,7 +147,7 @@ def generate_answer(
|
|
| 156 |
|
| 157 |
@trace_function(
|
| 158 |
"generate_answer_stream",
|
| 159 |
-
metadata_factory=lambda question, user_id, document_id=None: {
|
| 160 |
"user_id": user_id,
|
| 161 |
"document_id": document_id,
|
| 162 |
"llm_model": settings.LLM_MODEL,
|
|
@@ -166,13 +157,14 @@ def generate_answer_stream(
|
|
| 166 |
question: str,
|
| 167 |
user_id: str,
|
| 168 |
document_id: Optional[str] = None,
|
|
|
|
| 169 |
) -> Generator[str, None, None]:
|
| 170 |
"""
|
| 171 |
Streaming RAG pipeline β yields SSE-formatted chunks.
|
| 172 |
First yields sources, then streams answer tokens.
|
| 173 |
"""
|
| 174 |
-
# Get HuggingFace InferenceClient
|
| 175 |
-
client = get_llm_client()
|
| 176 |
|
| 177 |
# ββ Handle greetings βββββββββββββββββββββββββββββ
|
| 178 |
# Short-circuit: if user just says "hello", skip RAG entirely
|
|
|
|
| 15 |
logger = logging.getLogger(__name__)
|
| 16 |
settings = get_settings()
|
| 17 |
|
| 18 |
+
def get_llm_client(hf_token: Optional[str] = None) -> InferenceClient:
|
| 19 |
+
"""Create a HuggingFace InferenceClient per-request."""
|
| 20 |
+
return InferenceClient(
|
| 21 |
+
token=hf_token or settings.HF_TOKEN,
|
| 22 |
+
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 23 |
|
| 24 |
|
| 25 |
def is_greeting(question: str) -> bool:
|
|
|
|
| 58 |
|
| 59 |
@trace_function(
|
| 60 |
"generate_answer",
|
| 61 |
+
metadata_factory=lambda question, user_id, document_id=None, **kwargs: {
|
| 62 |
"user_id": user_id,
|
| 63 |
"document_id": document_id,
|
| 64 |
"llm_model": settings.LLM_MODEL,
|
|
|
|
| 68 |
question: str,
|
| 69 |
user_id: str,
|
| 70 |
document_id: Optional[str] = None,
|
| 71 |
+
hf_token: Optional[str] = None,
|
| 72 |
) -> Dict[str, Any]:
|
| 73 |
"""
|
| 74 |
Full RAG pipeline: retrieve β build context β generate answer.
|
| 75 |
Returns dict with 'answer' and 'sources'.
|
| 76 |
"""
|
| 77 |
+
# Get HuggingFace InferenceClient per-request
|
| 78 |
+
client = get_llm_client(hf_token)
|
| 79 |
|
| 80 |
# ββ Handle greetings βββββββββββββββββββββββββββββ
|
| 81 |
# Short-circuit: if user just says "hello", skip RAG entirely
|
|
|
|
| 147 |
|
| 148 |
@trace_function(
|
| 149 |
"generate_answer_stream",
|
| 150 |
+
metadata_factory=lambda question, user_id, document_id=None, **kwargs: {
|
| 151 |
"user_id": user_id,
|
| 152 |
"document_id": document_id,
|
| 153 |
"llm_model": settings.LLM_MODEL,
|
|
|
|
| 157 |
question: str,
|
| 158 |
user_id: str,
|
| 159 |
document_id: Optional[str] = None,
|
| 160 |
+
hf_token: Optional[str] = None,
|
| 161 |
) -> Generator[str, None, None]:
|
| 162 |
"""
|
| 163 |
Streaming RAG pipeline β yields SSE-formatted chunks.
|
| 164 |
First yields sources, then streams answer tokens.
|
| 165 |
"""
|
| 166 |
+
# Get HuggingFace InferenceClient per-request
|
| 167 |
+
client = get_llm_client(hf_token)
|
| 168 |
|
| 169 |
# ββ Handle greetings βββββββββββββββββββββββββββββ
|
| 170 |
# Short-circuit: if user just says "hello", skip RAG entirely
|
backend/app/rag/retriever.py
CHANGED
|
@@ -1,7 +1,9 @@
|
|
| 1 |
"""
|
| 2 |
Two-stage retrieval: ChromaDB similarity search + cross-encoder reranking.
|
| 3 |
"""
|
|
|
|
| 4 |
import logging
|
|
|
|
| 5 |
from typing import List, Dict, Any, Optional
|
| 6 |
from app.config import get_settings
|
| 7 |
from app.rag.embeddings import embed_query
|
|
@@ -10,6 +12,7 @@ from app.rag.vectorstore import query_chunks
|
|
| 10 |
|
| 11 |
logger = logging.getLogger(__name__)
|
| 12 |
settings = get_settings()
|
|
|
|
| 13 |
|
| 14 |
# ββ Singleton reranker βββββββββββββββββββββββββββββββ
|
| 15 |
_reranker = None
|
|
@@ -32,6 +35,136 @@ def get_reranker():
|
|
| 32 |
return _reranker if _reranker != "disabled" else None
|
| 33 |
|
| 34 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 35 |
@trace_function(
|
| 36 |
"retrieve",
|
| 37 |
metadata_factory=lambda query, user_id, document_id=None: {
|
|
@@ -55,18 +188,24 @@ def retrieve(
|
|
| 55 |
|
| 56 |
Returns chunks with confidence scores.
|
| 57 |
"""
|
| 58 |
-
# ββ Stage 1:
|
| 59 |
-
|
| 60 |
-
|
| 61 |
-
|
| 62 |
-
|
| 63 |
-
|
| 64 |
-
|
| 65 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 66 |
|
| 67 |
if not candidates:
|
| 68 |
return []
|
| 69 |
|
|
|
|
|
|
|
| 70 |
# ββ Stage 2: Cross-encoder reranking βββββββββββββ
|
| 71 |
reranker = get_reranker()
|
| 72 |
|
|
@@ -86,6 +225,8 @@ def retrieve(
|
|
| 86 |
except Exception as e:
|
| 87 |
logger.warning(f"Reranking failed, using embedding scores: {e}")
|
| 88 |
|
|
|
|
|
|
|
| 89 |
# ββ Take top-K after reranking βββββββββββββββββββ
|
| 90 |
top_chunks = candidates[:settings.TOP_K_RERANK]
|
| 91 |
|
|
|
|
| 1 |
"""
|
| 2 |
Two-stage retrieval: ChromaDB similarity search + cross-encoder reranking.
|
| 3 |
"""
|
| 4 |
+
import json
|
| 5 |
import logging
|
| 6 |
+
import re
|
| 7 |
from typing import List, Dict, Any, Optional
|
| 8 |
from app.config import get_settings
|
| 9 |
from app.rag.embeddings import embed_query
|
|
|
|
| 12 |
|
| 13 |
logger = logging.getLogger(__name__)
|
| 14 |
settings = get_settings()
|
| 15 |
+
MAX_QUERY_VARIANTS = 4
|
| 16 |
|
| 17 |
# ββ Singleton reranker βββββββββββββββββββββββββββββββ
|
| 18 |
_reranker = None
|
|
|
|
| 35 |
return _reranker if _reranker != "disabled" else None
|
| 36 |
|
| 37 |
|
| 38 |
+
def transform_query(query: str) -> List[str]:
|
| 39 |
+
"""Rewrite a user question into multiple retrieval-friendly search queries."""
|
| 40 |
+
original_query = query.strip()
|
| 41 |
+
if not original_query:
|
| 42 |
+
return []
|
| 43 |
+
|
| 44 |
+
try:
|
| 45 |
+
generated_queries = _generate_query_variants(original_query)
|
| 46 |
+
except Exception as e:
|
| 47 |
+
logger.warning(f"Query transformation failed, using original query only: {e}")
|
| 48 |
+
generated_queries = []
|
| 49 |
+
|
| 50 |
+
return _dedupe_queries([original_query, *generated_queries])[:MAX_QUERY_VARIANTS]
|
| 51 |
+
|
| 52 |
+
|
| 53 |
+
def _generate_query_variants(query: str) -> List[str]:
|
| 54 |
+
"""Use the configured LLM to split/rewrite a user query for semantic search."""
|
| 55 |
+
if not settings.HF_TOKEN:
|
| 56 |
+
return []
|
| 57 |
+
|
| 58 |
+
from huggingface_hub import InferenceClient
|
| 59 |
+
|
| 60 |
+
client = InferenceClient(token=settings.HF_TOKEN)
|
| 61 |
+
prompt = (
|
| 62 |
+
"Rewrite the user question into concise semantic search queries for document retrieval. "
|
| 63 |
+
"Split independent topics into separate queries. Return a JSON array of strings only. "
|
| 64 |
+
f"User question: {query}"
|
| 65 |
+
)
|
| 66 |
+
response = client.chat_completion(
|
| 67 |
+
messages=[
|
| 68 |
+
{
|
| 69 |
+
"role": "system",
|
| 70 |
+
"content": "You create optimized search queries for a RAG retriever.",
|
| 71 |
+
},
|
| 72 |
+
{"role": "user", "content": prompt},
|
| 73 |
+
],
|
| 74 |
+
model=settings.LLM_MODEL,
|
| 75 |
+
max_tokens=256,
|
| 76 |
+
temperature=0.2,
|
| 77 |
+
)
|
| 78 |
+
|
| 79 |
+
if not response.choices:
|
| 80 |
+
return []
|
| 81 |
+
|
| 82 |
+
content = response.choices[0].message.content or ""
|
| 83 |
+
return _parse_query_variants(content)
|
| 84 |
+
|
| 85 |
+
|
| 86 |
+
def _parse_query_variants(content: str) -> List[str]:
|
| 87 |
+
"""Parse LLM output into a list even when it adds light prose around JSON."""
|
| 88 |
+
content = content.strip()
|
| 89 |
+
if not content:
|
| 90 |
+
return []
|
| 91 |
+
|
| 92 |
+
parsed = _try_parse_query_json(content)
|
| 93 |
+
if parsed is not None:
|
| 94 |
+
return parsed
|
| 95 |
+
|
| 96 |
+
match = re.search(r"\[[\s\S]*\]", content)
|
| 97 |
+
if match:
|
| 98 |
+
parsed = _try_parse_query_json(match.group(0))
|
| 99 |
+
if parsed is not None:
|
| 100 |
+
return parsed
|
| 101 |
+
|
| 102 |
+
queries = []
|
| 103 |
+
for line in content.splitlines():
|
| 104 |
+
cleaned = re.sub(r"^\s*[-*\d.)]+\s*", "", line).strip().strip('"')
|
| 105 |
+
if cleaned:
|
| 106 |
+
queries.append(cleaned)
|
| 107 |
+
return queries
|
| 108 |
+
|
| 109 |
+
|
| 110 |
+
def _try_parse_query_json(content: str) -> Optional[List[str]]:
|
| 111 |
+
try:
|
| 112 |
+
parsed = json.loads(content)
|
| 113 |
+
except json.JSONDecodeError:
|
| 114 |
+
return None
|
| 115 |
+
|
| 116 |
+
if isinstance(parsed, dict):
|
| 117 |
+
parsed = parsed.get("queries", [])
|
| 118 |
+
|
| 119 |
+
if not isinstance(parsed, list):
|
| 120 |
+
return []
|
| 121 |
+
|
| 122 |
+
return [item.strip() for item in parsed if isinstance(item, str) and item.strip()]
|
| 123 |
+
|
| 124 |
+
|
| 125 |
+
def _dedupe_queries(queries: List[str]) -> List[str]:
|
| 126 |
+
deduped = []
|
| 127 |
+
seen = set()
|
| 128 |
+
for query in queries:
|
| 129 |
+
normalized = " ".join(query.split())
|
| 130 |
+
key = normalized.lower()
|
| 131 |
+
if normalized and key not in seen:
|
| 132 |
+
seen.add(key)
|
| 133 |
+
deduped.append(normalized)
|
| 134 |
+
return deduped
|
| 135 |
+
|
| 136 |
+
|
| 137 |
+
def _candidate_key(chunk: Dict[str, Any]) -> str:
|
| 138 |
+
for key in ("id", "chunk_id"):
|
| 139 |
+
if chunk.get(key):
|
| 140 |
+
return str(chunk[key])
|
| 141 |
+
|
| 142 |
+
text = str(chunk.get("text", ""))
|
| 143 |
+
return "|".join(
|
| 144 |
+
str(part)
|
| 145 |
+
for part in (
|
| 146 |
+
chunk.get("document_id", ""),
|
| 147 |
+
chunk.get("filename", ""),
|
| 148 |
+
chunk.get("page", ""),
|
| 149 |
+
text[:200],
|
| 150 |
+
)
|
| 151 |
+
)
|
| 152 |
+
|
| 153 |
+
|
| 154 |
+
def _merge_candidates(candidates: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
|
| 155 |
+
merged: Dict[str, Dict[str, Any]] = {}
|
| 156 |
+
|
| 157 |
+
for candidate in candidates:
|
| 158 |
+
candidate_copy = dict(candidate)
|
| 159 |
+
key = _candidate_key(candidate_copy)
|
| 160 |
+
existing = merged.get(key)
|
| 161 |
+
|
| 162 |
+
if existing is None or candidate_copy.get("score", 0) > existing.get("score", 0):
|
| 163 |
+
merged[key] = candidate_copy
|
| 164 |
+
|
| 165 |
+
return list(merged.values())
|
| 166 |
+
|
| 167 |
+
|
| 168 |
@trace_function(
|
| 169 |
"retrieve",
|
| 170 |
metadata_factory=lambda query, user_id, document_id=None: {
|
|
|
|
| 188 |
|
| 189 |
Returns chunks with confidence scores.
|
| 190 |
"""
|
| 191 |
+
# ββ Stage 1: Query transformation + embedding search βββββββββββββ
|
| 192 |
+
candidates = []
|
| 193 |
+
for search_query in transform_query(query):
|
| 194 |
+
query_vector = embed_query(search_query)
|
| 195 |
+
candidates.extend(
|
| 196 |
+
query_chunks(
|
| 197 |
+
query_embedding=query_vector,
|
| 198 |
+
user_id=user_id,
|
| 199 |
+
document_id=document_id,
|
| 200 |
+
top_k=settings.TOP_K_RETRIEVAL,
|
| 201 |
+
)
|
| 202 |
+
)
|
| 203 |
|
| 204 |
if not candidates:
|
| 205 |
return []
|
| 206 |
|
| 207 |
+
candidates = _merge_candidates(candidates)
|
| 208 |
+
|
| 209 |
# ββ Stage 2: Cross-encoder reranking βββββββββββββ
|
| 210 |
reranker = get_reranker()
|
| 211 |
|
|
|
|
| 225 |
except Exception as e:
|
| 226 |
logger.warning(f"Reranking failed, using embedding scores: {e}")
|
| 227 |
|
| 228 |
+
candidates.sort(key=lambda x: x.get("rerank_score", x.get("score", 0)), reverse=True)
|
| 229 |
+
|
| 230 |
# ββ Take top-K after reranking βββββββββββββββββββ
|
| 231 |
top_chunks = candidates[:settings.TOP_K_RERANK]
|
| 232 |
|
backend/app/routes/admin.py
CHANGED
|
@@ -3,17 +3,18 @@ Admin-only operational statistics routes.
|
|
| 3 |
"""
|
| 4 |
import shutil
|
| 5 |
from pathlib import Path
|
|
|
|
| 6 |
|
| 7 |
from fastapi import APIRouter, Depends
|
| 8 |
from sqlalchemy import func
|
| 9 |
from sqlalchemy.orm import Session
|
| 10 |
|
| 11 |
-
from app.auth import
|
| 12 |
from app.config import get_settings
|
| 13 |
from app.database import get_db
|
| 14 |
from app.metrics import get_query_metrics
|
| 15 |
-
from app.models import Document, User
|
| 16 |
-
from app.schemas import AdminStatsResponse, DiskUsageResponse
|
| 17 |
|
| 18 |
router = APIRouter(prefix="/admin", tags=["Admin"])
|
| 19 |
settings = get_settings()
|
|
@@ -35,8 +36,8 @@ def _directory_size(path: Path) -> int:
|
|
| 35 |
|
| 36 |
@router.get("/stats", response_model=AdminStatsResponse)
|
| 37 |
def get_admin_stats(
|
| 38 |
-
_admin: User = Depends(get_admin_user),
|
| 39 |
db: Session = Depends(get_db),
|
|
|
|
| 40 |
):
|
| 41 |
"""Return aggregate system statistics for administrators."""
|
| 42 |
upload_dir = Path(settings.UPLOAD_DIR).resolve()
|
|
@@ -59,6 +60,8 @@ def get_admin_stats(
|
|
| 59 |
return AdminStatsResponse(
|
| 60 |
total_users=db.query(User).count(),
|
| 61 |
total_pdfs_uploaded=total_pdfs_uploaded,
|
|
|
|
|
|
|
| 62 |
average_query_response_time_ms=float(
|
| 63 |
query_metrics["average_query_response_time_ms"]
|
| 64 |
),
|
|
@@ -70,4 +73,14 @@ def get_admin_stats(
|
|
| 70 |
usage_percent=used_percent,
|
| 71 |
upload_dir_bytes=_directory_size(upload_dir),
|
| 72 |
),
|
|
|
|
| 73 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3 |
"""
|
| 4 |
import shutil
|
| 5 |
from pathlib import Path
|
| 6 |
+
from typing import List
|
| 7 |
|
| 8 |
from fastapi import APIRouter, Depends
|
| 9 |
from sqlalchemy import func
|
| 10 |
from sqlalchemy.orm import Session
|
| 11 |
|
| 12 |
+
from app.auth import get_current_admin
|
| 13 |
from app.config import get_settings
|
| 14 |
from app.database import get_db
|
| 15 |
from app.metrics import get_query_metrics
|
| 16 |
+
from app.models import Document, User, ChatMessage
|
| 17 |
+
from app.schemas import AdminStatsResponse, DiskUsageResponse, UserResponse
|
| 18 |
|
| 19 |
router = APIRouter(prefix="/admin", tags=["Admin"])
|
| 20 |
settings = get_settings()
|
|
|
|
| 36 |
|
| 37 |
@router.get("/stats", response_model=AdminStatsResponse)
|
| 38 |
def get_admin_stats(
|
|
|
|
| 39 |
db: Session = Depends(get_db),
|
| 40 |
+
_admin: User = Depends(get_current_admin),
|
| 41 |
):
|
| 42 |
"""Return aggregate system statistics for administrators."""
|
| 43 |
upload_dir = Path(settings.UPLOAD_DIR).resolve()
|
|
|
|
| 60 |
return AdminStatsResponse(
|
| 61 |
total_users=db.query(User).count(),
|
| 62 |
total_pdfs_uploaded=total_pdfs_uploaded,
|
| 63 |
+
total_documents=db.query(Document).count(),
|
| 64 |
+
total_messages=db.query(ChatMessage).count(),
|
| 65 |
average_query_response_time_ms=float(
|
| 66 |
query_metrics["average_query_response_time_ms"]
|
| 67 |
),
|
|
|
|
| 73 |
usage_percent=used_percent,
|
| 74 |
upload_dir_bytes=_directory_size(upload_dir),
|
| 75 |
),
|
| 76 |
+
users=db.query(User).all()
|
| 77 |
)
|
| 78 |
+
|
| 79 |
+
|
| 80 |
+
@router.get("/users", response_model=List[UserResponse])
|
| 81 |
+
def list_all_users(
|
| 82 |
+
db: Session = Depends(get_db),
|
| 83 |
+
_admin: User = Depends(get_current_admin),
|
| 84 |
+
):
|
| 85 |
+
"""List all registered users (admin-only)."""
|
| 86 |
+
return db.query(User).all()
|
backend/app/routes/auth.py
CHANGED
|
@@ -11,7 +11,7 @@ from sqlalchemy.orm import Session
|
|
| 11 |
from sqlalchemy import select
|
| 12 |
from app.config import get_settings
|
| 13 |
from app.database import get_db
|
| 14 |
-
from app.models import User, ApiKey
|
| 15 |
from app.schemas import (
|
| 16 |
GoogleLoginRequest,
|
| 17 |
HFTokenUpdate,
|
|
@@ -140,6 +140,7 @@ def register(payload: UserRegister, db: Session = Depends(get_db)):
|
|
| 140 |
username=payload.username,
|
| 141 |
email=payload.email,
|
| 142 |
hashed_password=hash_password(payload.password),
|
|
|
|
| 143 |
)
|
| 144 |
db.add(user)
|
| 145 |
db.commit()
|
|
|
|
| 11 |
from sqlalchemy import select
|
| 12 |
from app.config import get_settings
|
| 13 |
from app.database import get_db
|
| 14 |
+
from app.models import User, ApiKey, UserRole
|
| 15 |
from app.schemas import (
|
| 16 |
GoogleLoginRequest,
|
| 17 |
HFTokenUpdate,
|
|
|
|
| 140 |
username=payload.username,
|
| 141 |
email=payload.email,
|
| 142 |
hashed_password=hash_password(payload.password),
|
| 143 |
+
role=UserRole.user,
|
| 144 |
)
|
| 145 |
db.add(user)
|
| 146 |
db.commit()
|
backend/app/routes/chat.py
CHANGED
|
@@ -82,16 +82,16 @@ def create_share_link(
|
|
| 82 |
)
|
| 83 |
|
| 84 |
|
| 85 |
-
def generate_answer(question: str, user_id: str, document_id: Optional[str] = None):
|
| 86 |
from app.rag.agent import generate_answer as _generate_answer
|
| 87 |
|
| 88 |
-
return _generate_answer(question=question, user_id=user_id, document_id=document_id)
|
| 89 |
|
| 90 |
|
| 91 |
-
def generate_answer_stream(question: str, user_id: str, document_id: Optional[str] = None):
|
| 92 |
from app.rag.agent import generate_answer_stream as _generate_answer_stream
|
| 93 |
|
| 94 |
-
return _generate_answer_stream(question=question, user_id=user_id, document_id=document_id)
|
| 95 |
|
| 96 |
|
| 97 |
@router.post("/ask", response_model=ChatResponse)
|
|
@@ -151,6 +151,7 @@ def ask_question(
|
|
| 151 |
question=payload.question,
|
| 152 |
user_id=user.id,
|
| 153 |
document_id=payload.document_id,
|
|
|
|
| 154 |
)
|
| 155 |
|
| 156 |
# Save to chat history
|
|
@@ -240,6 +241,7 @@ def ask_question_stream(
|
|
| 240 |
question=payload.question,
|
| 241 |
user_id=user.id,
|
| 242 |
document_id=payload.document_id,
|
|
|
|
| 243 |
):
|
| 244 |
yield chunk
|
| 245 |
|
|
|
|
| 82 |
)
|
| 83 |
|
| 84 |
|
| 85 |
+
def generate_answer(question: str, user_id: str, document_id: Optional[str] = None, hf_token: Optional[str] = None):
|
| 86 |
from app.rag.agent import generate_answer as _generate_answer
|
| 87 |
|
| 88 |
+
return _generate_answer(question=question, user_id=user_id, document_id=document_id, hf_token=hf_token)
|
| 89 |
|
| 90 |
|
| 91 |
+
def generate_answer_stream(question: str, user_id: str, document_id: Optional[str] = None, hf_token: Optional[str] = None):
|
| 92 |
from app.rag.agent import generate_answer_stream as _generate_answer_stream
|
| 93 |
|
| 94 |
+
return _generate_answer_stream(question=question, user_id=user_id, document_id=document_id, hf_token=hf_token)
|
| 95 |
|
| 96 |
|
| 97 |
@router.post("/ask", response_model=ChatResponse)
|
|
|
|
| 151 |
question=payload.question,
|
| 152 |
user_id=user.id,
|
| 153 |
document_id=payload.document_id,
|
| 154 |
+
hf_token=user.hf_token,
|
| 155 |
)
|
| 156 |
|
| 157 |
# Save to chat history
|
|
|
|
| 241 |
question=payload.question,
|
| 242 |
user_id=user.id,
|
| 243 |
document_id=payload.document_id,
|
| 244 |
+
hf_token=user.hf_token,
|
| 245 |
):
|
| 246 |
yield chunk
|
| 247 |
|
backend/app/routes/documents.py
CHANGED
|
@@ -3,24 +3,32 @@ Document management routes β upload, list, delete, and serve PDF files.
|
|
| 3 |
Background ingestion via FastAPI BackgroundTasks.
|
| 4 |
"""
|
| 5 |
import os
|
|
|
|
| 6 |
import uuid
|
| 7 |
import logging
|
|
|
|
|
|
|
| 8 |
from typing import Optional
|
| 9 |
from pathlib import Path
|
| 10 |
import shutil
|
| 11 |
import tempfile
|
| 12 |
-
|
| 13 |
from fastapi import APIRouter, Depends, HTTPException, UploadFile, File, BackgroundTasks, status, Query
|
| 14 |
from fastapi.responses import FileResponse
|
| 15 |
from sqlalchemy.orm import Session
|
| 16 |
|
| 17 |
from app.database import get_db
|
| 18 |
from app.models import User, Document
|
| 19 |
-
from app.schemas import DocumentResponse, DocumentListResponse, DocumentStatusResponse
|
| 20 |
from app.auth import get_current_user
|
| 21 |
from app.config import get_settings
|
| 22 |
from app.rag.chunker import chunk_document, get_page_count
|
| 23 |
from app.rag.vectorstore import store_chunks, delete_document_chunks
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 24 |
from sqlalchemy import select
|
| 25 |
logger = logging.getLogger(__name__)
|
| 26 |
settings = get_settings()
|
|
@@ -62,7 +70,7 @@ async def validate_upload(file: UploadFile):
|
|
| 62 |
|
| 63 |
# extension without leading dot in settings
|
| 64 |
if ext.lstrip(".") not in settings.ALLOWED_EXTENSIONS:
|
| 65 |
-
raise HTTPException(status_code=400, detail="Only PDF
|
| 66 |
|
| 67 |
# save to a temporary file
|
| 68 |
with tempfile.NamedTemporaryFile(delete=False, suffix=ext) as tmp:
|
|
@@ -205,6 +213,36 @@ def _ingest_document(document_id: str, filepath: str, original_name: str, user_i
|
|
| 205 |
db.close()
|
| 206 |
|
| 207 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 208 |
@router.post("/upload", response_model=DocumentResponse, status_code=status.HTTP_202_ACCEPTED)
|
| 209 |
async def upload_document(
|
| 210 |
background_tasks: BackgroundTasks,
|
|
@@ -287,6 +325,100 @@ async def upload_document(
|
|
| 287 |
|
| 288 |
return DocumentResponse.model_validate(document)
|
| 289 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 290 |
|
| 291 |
@router.get("/{document_id}/status", response_model=DocumentStatusResponse)
|
| 292 |
def get_document_status(
|
|
|
|
| 3 |
Background ingestion via FastAPI BackgroundTasks.
|
| 4 |
"""
|
| 5 |
import os
|
| 6 |
+
import sys
|
| 7 |
import uuid
|
| 8 |
import logging
|
| 9 |
+
import asyncio
|
| 10 |
+
import concurrent.futures
|
| 11 |
from typing import Optional
|
| 12 |
from pathlib import Path
|
| 13 |
import shutil
|
| 14 |
import tempfile
|
| 15 |
+
from urllib.parse import urlparse
|
| 16 |
from fastapi import APIRouter, Depends, HTTPException, UploadFile, File, BackgroundTasks, status, Query
|
| 17 |
from fastapi.responses import FileResponse
|
| 18 |
from sqlalchemy.orm import Session
|
| 19 |
|
| 20 |
from app.database import get_db
|
| 21 |
from app.models import User, Document
|
| 22 |
+
from app.schemas import DocumentResponse, DocumentListResponse, DocumentStatusResponse, UploadUrl
|
| 23 |
from app.auth import get_current_user
|
| 24 |
from app.config import get_settings
|
| 25 |
from app.rag.chunker import chunk_document, get_page_count
|
| 26 |
from app.rag.vectorstore import store_chunks, delete_document_chunks
|
| 27 |
+
|
| 28 |
+
import crawl4ai
|
| 29 |
+
from crawl4ai import AsyncWebCrawler
|
| 30 |
+
from crawl4ai.async_configs import BrowserConfig, CrawlerRunConfig, CacheMode
|
| 31 |
+
|
| 32 |
from sqlalchemy import select
|
| 33 |
logger = logging.getLogger(__name__)
|
| 34 |
settings = get_settings()
|
|
|
|
| 70 |
|
| 71 |
# extension without leading dot in settings
|
| 72 |
if ext.lstrip(".") not in settings.ALLOWED_EXTENSIONS:
|
| 73 |
+
raise HTTPException(status_code=400, detail="Only PDF, DOCX, TEXT, AND MARKDOWN files are allowed")
|
| 74 |
|
| 75 |
# save to a temporary file
|
| 76 |
with tempfile.NamedTemporaryFile(delete=False, suffix=ext) as tmp:
|
|
|
|
| 213 |
db.close()
|
| 214 |
|
| 215 |
|
| 216 |
+
|
| 217 |
+
def _crawl_in_new_loop(url: str) -> str:
|
| 218 |
+
"""Run the async crawler in a fresh event loop on a worker thread.
|
| 219 |
+
On Windows this must be a ProactorEventLoop to support subprocesses.
|
| 220 |
+
"""
|
| 221 |
+
if sys.platform == "win32":
|
| 222 |
+
loop = asyncio.ProactorEventLoop()
|
| 223 |
+
else:
|
| 224 |
+
loop = asyncio.new_event_loop()
|
| 225 |
+
asyncio.set_event_loop(loop)
|
| 226 |
+
try:
|
| 227 |
+
async def _crawl():
|
| 228 |
+
browser_config = BrowserConfig()
|
| 229 |
+
run_config = CrawlerRunConfig(
|
| 230 |
+
excluded_tags=['form', 'header'],
|
| 231 |
+
|
| 232 |
+
# Content processing
|
| 233 |
+
process_iframes=True,
|
| 234 |
+
# remove_overlay_elements=True,
|
| 235 |
+
|
| 236 |
+
# Cache control
|
| 237 |
+
# cache_mode=CacheMode.ENABLED
|
| 238 |
+
)
|
| 239 |
+
async with AsyncWebCrawler(config=browser_config) as crawler:
|
| 240 |
+
result = await crawler.arun(url=url, config=run_config)
|
| 241 |
+
return result.markdown or ""
|
| 242 |
+
return loop.run_until_complete(_crawl())
|
| 243 |
+
finally:
|
| 244 |
+
loop.close()
|
| 245 |
+
|
| 246 |
@router.post("/upload", response_model=DocumentResponse, status_code=status.HTTP_202_ACCEPTED)
|
| 247 |
async def upload_document(
|
| 248 |
background_tasks: BackgroundTasks,
|
|
|
|
| 325 |
|
| 326 |
return DocumentResponse.model_validate(document)
|
| 327 |
|
| 328 |
+
@router.post("/urlupload", status_code=status.HTTP_202_ACCEPTED)
|
| 329 |
+
async def upload_document_url(
|
| 330 |
+
payload: UploadUrl,
|
| 331 |
+
background_tasks: BackgroundTasks,
|
| 332 |
+
user: User = Depends(get_current_user),
|
| 333 |
+
db: Session = Depends(get_db),
|
| 334 |
+
):
|
| 335 |
+
"""
|
| 336 |
+
Uses crawl4ai's AsyncWebCrawler in a dedicated thread with its own
|
| 337 |
+
event loop. This is required on Windows because uvicorn's default
|
| 338 |
+
SelectorEventLoop does not support subprocess creation (used by
|
| 339 |
+
Playwright/crawl4ai), which causes a NotImplementedError.
|
| 340 |
+
On Linux (production) a plain new_event_loop() is used instead.
|
| 341 |
+
"""
|
| 342 |
+
temp_path: Optional[str] = None
|
| 343 |
+
try:
|
| 344 |
+
parsed = urlparse(payload.url)
|
| 345 |
+
if not all([parsed.scheme, parsed.netloc]):
|
| 346 |
+
raise HTTPException(status_code=400, detail="Invalid URL")
|
| 347 |
+
|
| 348 |
+
|
| 349 |
+
# Run in a worker thread with its own event loop to avoid
|
| 350 |
+
# NotImplementedError on Windows (SelectorEventLoop can't spawn subprocesses)
|
| 351 |
+
with concurrent.futures.ThreadPoolExecutor(max_workers=1) as pool:
|
| 352 |
+
markdown = await asyncio.get_event_loop().run_in_executor(
|
| 353 |
+
pool, _crawl_in_new_loop, payload.url
|
| 354 |
+
)
|
| 355 |
+
|
| 356 |
+
if not markdown:
|
| 357 |
+
raise HTTPException(status_code=422, detail="No content could be extracted from the URL")
|
| 358 |
+
|
| 359 |
+
|
| 360 |
+
with tempfile.NamedTemporaryFile(
|
| 361 |
+
mode="w",
|
| 362 |
+
suffix=".txt",
|
| 363 |
+
delete=False,
|
| 364 |
+
encoding="utf-8",
|
| 365 |
+
) as tmp:
|
| 366 |
+
tmp.write(markdown)
|
| 367 |
+
temp_path = tmp.name
|
| 368 |
+
|
| 369 |
+
# ββ Move temp file to permanent user upload directory ββ
|
| 370 |
+
ext = "txt"
|
| 371 |
+
user_dir = os.path.join(settings.UPLOAD_DIR, user.id)
|
| 372 |
+
os.makedirs(user_dir, exist_ok=True)
|
| 373 |
+
|
| 374 |
+
stored_filename = f"{uuid.uuid4().hex}.{ext}"
|
| 375 |
+
filepath = os.path.join(user_dir, stored_filename)
|
| 376 |
+
shutil.move(temp_path, filepath)
|
| 377 |
+
temp_path = None # file is now at filepath; no longer a temp to clean up
|
| 378 |
+
|
| 379 |
+
file_size = Path(filepath).stat().st_size
|
| 380 |
+
|
| 381 |
+
# ββ Derive a human-readable name from the URL βββββββββ
|
| 382 |
+
url_path = parsed.path.rstrip("/")
|
| 383 |
+
original_name = f"{parsed.netloc}{url_path or ''}.txt"
|
| 384 |
+
|
| 385 |
+
# ββ Create database record βββββββββββββββββββββββββββββ
|
| 386 |
+
document = Document(
|
| 387 |
+
user_id=user.id,
|
| 388 |
+
filename=stored_filename,
|
| 389 |
+
original_name=original_name,
|
| 390 |
+
file_size=file_size,
|
| 391 |
+
status="pending",
|
| 392 |
+
)
|
| 393 |
+
db.add(document)
|
| 394 |
+
db.commit()
|
| 395 |
+
db.refresh(document)
|
| 396 |
+
|
| 397 |
+
# ββ Trigger background ingestion βββββββββββββββββββββββ
|
| 398 |
+
background_tasks.add_task(
|
| 399 |
+
_ingest_document,
|
| 400 |
+
document_id=document.id,
|
| 401 |
+
filepath=filepath,
|
| 402 |
+
original_name=original_name,
|
| 403 |
+
user_id=user.id,
|
| 404 |
+
)
|
| 405 |
+
|
| 406 |
+
return DocumentResponse.model_validate(document)
|
| 407 |
+
|
| 408 |
+
except HTTPException:
|
| 409 |
+
raise
|
| 410 |
+
except ValueError:
|
| 411 |
+
raise HTTPException(status_code=400, detail="Invalid URL")
|
| 412 |
+
except Exception as e:
|
| 413 |
+
logger.error(f"URL upload error: {e}")
|
| 414 |
+
raise HTTPException(status_code=400, detail=f"Something went wrong with URL processing: {str(e)}")
|
| 415 |
+
finally:
|
| 416 |
+
'''Runs whether the request succeeded, raised an HTTPException,
|
| 417 |
+
or hit an unexpected error β no temp files are ever left behind.'''
|
| 418 |
+
if temp_path is not None:
|
| 419 |
+
Path(temp_path).unlink(missing_ok=True)
|
| 420 |
+
|
| 421 |
+
|
| 422 |
|
| 423 |
@router.get("/{document_id}/status", response_model=DocumentStatusResponse)
|
| 424 |
def get_document_status(
|
backend/app/schemas.py
CHANGED
|
@@ -4,6 +4,7 @@ Pydantic schemas for API request/response validation.
|
|
| 4 |
from pydantic import BaseModel, EmailStr, Field
|
| 5 |
from typing import Optional, List
|
| 6 |
from datetime import datetime
|
|
|
|
| 7 |
|
| 8 |
|
| 9 |
# ββ Auth βββββββββββββββββββββββββββββββββββββββββββββ
|
|
@@ -75,6 +76,7 @@ class UserResponse(BaseModel):
|
|
| 75 |
id: str
|
| 76 |
username: str
|
| 77 |
email: str
|
|
|
|
| 78 |
is_admin: bool
|
| 79 |
hf_token: Optional[str] = None
|
| 80 |
created_at: datetime
|
|
@@ -131,9 +133,12 @@ class DiskUsageResponse(BaseModel):
|
|
| 131 |
class AdminStatsResponse(BaseModel):
|
| 132 |
total_users: int
|
| 133 |
total_pdfs_uploaded: int
|
|
|
|
|
|
|
| 134 |
average_query_response_time_ms: float
|
| 135 |
query_count: int
|
| 136 |
disk_space_usage: DiskUsageResponse
|
|
|
|
| 137 |
|
| 138 |
|
| 139 |
# ββ Chat βββββββββββββββββββββββββββββββββββββββββββββ
|
|
@@ -172,6 +177,8 @@ class ChatHistoryResponse(BaseModel):
|
|
| 172 |
messages: List[ChatMessageResponse]
|
| 173 |
document_id: Optional[str] = None
|
| 174 |
|
|
|
|
|
|
|
| 175 |
|
| 176 |
class ShareAnswerResponse(BaseModel):
|
| 177 |
id: str
|
|
|
|
| 4 |
from pydantic import BaseModel, EmailStr, Field
|
| 5 |
from typing import Optional, List
|
| 6 |
from datetime import datetime
|
| 7 |
+
from app.models import UserRole
|
| 8 |
|
| 9 |
|
| 10 |
# ββ Auth βββββββββββββββββββββββββββββββββββββββββββββ
|
|
|
|
| 76 |
id: str
|
| 77 |
username: str
|
| 78 |
email: str
|
| 79 |
+
role: UserRole
|
| 80 |
is_admin: bool
|
| 81 |
hf_token: Optional[str] = None
|
| 82 |
created_at: datetime
|
|
|
|
| 133 |
class AdminStatsResponse(BaseModel):
|
| 134 |
total_users: int
|
| 135 |
total_pdfs_uploaded: int
|
| 136 |
+
total_documents: int
|
| 137 |
+
total_messages: int
|
| 138 |
average_query_response_time_ms: float
|
| 139 |
query_count: int
|
| 140 |
disk_space_usage: DiskUsageResponse
|
| 141 |
+
users: List[UserResponse]
|
| 142 |
|
| 143 |
|
| 144 |
# ββ Chat βββββββββββββββββββββββββββββββββββββββββββββ
|
|
|
|
| 177 |
messages: List[ChatMessageResponse]
|
| 178 |
document_id: Optional[str] = None
|
| 179 |
|
| 180 |
+
class UploadUrl(BaseModel):
|
| 181 |
+
url: str
|
| 182 |
|
| 183 |
class ShareAnswerResponse(BaseModel):
|
| 184 |
id: str
|
backend/migrate_add_role.py
ADDED
|
@@ -0,0 +1,39 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
One-time migration script to add the 'role' column to the 'users' table.
|
| 3 |
+
Run this from the 'backend' directory.
|
| 4 |
+
"""
|
| 5 |
+
import sys
|
| 6 |
+
import os
|
| 7 |
+
|
| 8 |
+
# Add the current directory to sys.path to allow importing 'app'
|
| 9 |
+
sys.path.append(os.path.dirname(os.path.abspath(__file__)))
|
| 10 |
+
|
| 11 |
+
from app.database import engine
|
| 12 |
+
from sqlalchemy import text
|
| 13 |
+
|
| 14 |
+
def migrate():
|
| 15 |
+
print("π Starting migration: adding 'role' column to 'users' table...")
|
| 16 |
+
try:
|
| 17 |
+
with engine.connect() as conn:
|
| 18 |
+
# SQLite doesn't support adding a column with NOT NULL without a default value
|
| 19 |
+
# if there are already rows, but we provide a default 'user'.
|
| 20 |
+
conn.execute(text(
|
| 21 |
+
"ALTER TABLE users ADD COLUMN role VARCHAR DEFAULT 'user'"
|
| 22 |
+
))
|
| 23 |
+
# Update existing rows to have the 'user' role
|
| 24 |
+
conn.execute(text(
|
| 25 |
+
"UPDATE users SET role = 'user' WHERE role IS NULL"
|
| 26 |
+
))
|
| 27 |
+
conn.execute(text(
|
| 28 |
+
"UPDATE users SET role = 'admin' WHERE is_admin = 1"
|
| 29 |
+
))
|
| 30 |
+
conn.commit()
|
| 31 |
+
print("β
Migration successful!")
|
| 32 |
+
except Exception as e:
|
| 33 |
+
if "duplicate column name" in str(e).lower():
|
| 34 |
+
print("βΉοΈ Column 'role' already exists. Skipping migration.")
|
| 35 |
+
else:
|
| 36 |
+
print(f"β Migration failed: {e}")
|
| 37 |
+
|
| 38 |
+
if __name__ == "__main__":
|
| 39 |
+
migrate()
|
backend/requirements.txt
CHANGED
|
@@ -8,6 +8,7 @@ python-multipart
|
|
| 8 |
# Database
|
| 9 |
sqlalchemy
|
| 10 |
aiosqlite
|
|
|
|
| 11 |
|
| 12 |
# Auth
|
| 13 |
pyjwt
|
|
@@ -50,8 +51,9 @@ slowapi
|
|
| 50 |
# File Validation
|
| 51 |
#sudo apt-get install libmagic1 // for Debian/Ubuntu
|
| 52 |
#brew install libmagic // for OSX
|
| 53 |
-
python-magic-bin
|
| 54 |
python-magic; sys_platform != "win32"
|
| 55 |
python-docx
|
| 56 |
pypdf
|
| 57 |
reportlab
|
|
|
|
|
|
| 8 |
# Database
|
| 9 |
sqlalchemy
|
| 10 |
aiosqlite
|
| 11 |
+
psycopg[binary]
|
| 12 |
|
| 13 |
# Auth
|
| 14 |
pyjwt
|
|
|
|
| 51 |
# File Validation
|
| 52 |
#sudo apt-get install libmagic1 // for Debian/Ubuntu
|
| 53 |
#brew install libmagic // for OSX
|
| 54 |
+
python-magic-bin; sys_platform == "win32" # for windows
|
| 55 |
python-magic; sys_platform != "win32"
|
| 56 |
python-docx
|
| 57 |
pypdf
|
| 58 |
reportlab
|
| 59 |
+
crawl4ai
|
backend/scripts/migrate_sqlite_to_postgres.py
ADDED
|
@@ -0,0 +1,524 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Migrate SQLite app data into a Supabase/Postgres database.
|
| 2 |
+
|
| 3 |
+
The script supports both the current FastAPI SQLite schema
|
| 4 |
+
(`users`, `documents`, `chat_messages`) and the older legacy
|
| 5 |
+
`instance/users.db` schema (`user` only).
|
| 6 |
+
"""
|
| 7 |
+
from __future__ import annotations
|
| 8 |
+
|
| 9 |
+
import argparse
|
| 10 |
+
import logging
|
| 11 |
+
import os
|
| 12 |
+
import sys
|
| 13 |
+
import uuid
|
| 14 |
+
from dataclasses import dataclass, field
|
| 15 |
+
from datetime import datetime, timezone
|
| 16 |
+
from pathlib import Path
|
| 17 |
+
from typing import Any
|
| 18 |
+
|
| 19 |
+
from sqlalchemy import (
|
| 20 |
+
Boolean,
|
| 21 |
+
Column,
|
| 22 |
+
DateTime,
|
| 23 |
+
ForeignKey,
|
| 24 |
+
Integer,
|
| 25 |
+
MetaData,
|
| 26 |
+
String,
|
| 27 |
+
Table,
|
| 28 |
+
Text,
|
| 29 |
+
create_engine,
|
| 30 |
+
inspect,
|
| 31 |
+
select,
|
| 32 |
+
)
|
| 33 |
+
from sqlalchemy.engine import Engine
|
| 34 |
+
from sqlalchemy.exc import IntegrityError
|
| 35 |
+
from sqlalchemy.orm import Session, sessionmaker
|
| 36 |
+
|
| 37 |
+
LOGGER = logging.getLogger("sqlite_to_postgres")
|
| 38 |
+
|
| 39 |
+
|
| 40 |
+
def generate_uuid() -> str:
|
| 41 |
+
return str(uuid.uuid4())
|
| 42 |
+
|
| 43 |
+
|
| 44 |
+
metadata = MetaData()
|
| 45 |
+
|
| 46 |
+
users = Table(
|
| 47 |
+
"users",
|
| 48 |
+
metadata,
|
| 49 |
+
Column("id", String, primary_key=True, default=generate_uuid),
|
| 50 |
+
Column("username", String(80), unique=True, nullable=False, index=True),
|
| 51 |
+
Column("email", String(120), unique=True, nullable=False, index=True),
|
| 52 |
+
Column("hashed_password", String(255), nullable=False),
|
| 53 |
+
Column("is_admin", Boolean, default=False),
|
| 54 |
+
Column("created_at", DateTime, default=lambda: datetime.now(timezone.utc)),
|
| 55 |
+
Column("last_login", DateTime, nullable=True, index=True),
|
| 56 |
+
Column("hf_token", String(255), nullable=True),
|
| 57 |
+
)
|
| 58 |
+
|
| 59 |
+
api_keys = Table(
|
| 60 |
+
"api_keys",
|
| 61 |
+
metadata,
|
| 62 |
+
Column("id", String, primary_key=True, default=generate_uuid),
|
| 63 |
+
Column("user_id", String, ForeignKey("users.id"), nullable=False, index=True),
|
| 64 |
+
Column("key_prefix", String(10), nullable=False),
|
| 65 |
+
Column("hashed_key", String(255), nullable=False, unique=True, index=True),
|
| 66 |
+
Column("created_at", DateTime, default=lambda: datetime.now(timezone.utc)),
|
| 67 |
+
Column("last_used", DateTime, nullable=True),
|
| 68 |
+
)
|
| 69 |
+
|
| 70 |
+
documents = Table(
|
| 71 |
+
"documents",
|
| 72 |
+
metadata,
|
| 73 |
+
Column("id", String, primary_key=True, default=generate_uuid),
|
| 74 |
+
Column("user_id", String, ForeignKey("users.id"), nullable=False, index=True),
|
| 75 |
+
Column("filename", String(255), nullable=False),
|
| 76 |
+
Column("original_name", String(255), nullable=False),
|
| 77 |
+
Column("file_size", Integer, default=0),
|
| 78 |
+
Column("page_count", Integer, default=0),
|
| 79 |
+
Column("chunk_count", Integer, default=0),
|
| 80 |
+
Column("status", String(20), default="pending"),
|
| 81 |
+
Column("error_message", Text, nullable=True),
|
| 82 |
+
Column("uploaded_at", DateTime, default=lambda: datetime.now(timezone.utc)),
|
| 83 |
+
Column("summary", Text, nullable=True),
|
| 84 |
+
)
|
| 85 |
+
|
| 86 |
+
chat_messages = Table(
|
| 87 |
+
"chat_messages",
|
| 88 |
+
metadata,
|
| 89 |
+
Column("id", String, primary_key=True, default=generate_uuid),
|
| 90 |
+
Column("user_id", String, ForeignKey("users.id"), nullable=False, index=True),
|
| 91 |
+
Column("document_id", String, ForeignKey("documents.id"), nullable=True, index=True),
|
| 92 |
+
Column("role", String(20), nullable=False),
|
| 93 |
+
Column("content", Text, nullable=False),
|
| 94 |
+
Column("sources_json", Text, nullable=True),
|
| 95 |
+
Column("created_at", DateTime, default=lambda: datetime.now(timezone.utc)),
|
| 96 |
+
)
|
| 97 |
+
|
| 98 |
+
shared_messages = Table(
|
| 99 |
+
"shared_messages",
|
| 100 |
+
metadata,
|
| 101 |
+
Column("id", String, primary_key=True, default=generate_uuid),
|
| 102 |
+
Column("message_id", String, ForeignKey("chat_messages.id"), nullable=False, unique=True, index=True),
|
| 103 |
+
Column("created_at", DateTime, default=lambda: datetime.now(timezone.utc)),
|
| 104 |
+
)
|
| 105 |
+
|
| 106 |
+
|
| 107 |
+
@dataclass
|
| 108 |
+
class MigrationStats:
|
| 109 |
+
inserted: dict[str, int] = field(default_factory=dict)
|
| 110 |
+
reused: dict[str, int] = field(default_factory=dict)
|
| 111 |
+
skipped: dict[str, int] = field(default_factory=dict)
|
| 112 |
+
|
| 113 |
+
def add(self, table_name: str, action: str) -> None:
|
| 114 |
+
getattr(self, action)[table_name] = getattr(self, action).get(table_name, 0) + 1
|
| 115 |
+
|
| 116 |
+
|
| 117 |
+
def normalize_postgres_url(url: str) -> str:
|
| 118 |
+
"""Prefer psycopg v3 when callers pass Supabase's common URL forms."""
|
| 119 |
+
if url.startswith("postgres://"):
|
| 120 |
+
return "postgresql+psycopg://" + url.removeprefix("postgres://")
|
| 121 |
+
if url.startswith("postgresql://"):
|
| 122 |
+
return "postgresql+psycopg://" + url.removeprefix("postgresql://")
|
| 123 |
+
return url
|
| 124 |
+
|
| 125 |
+
|
| 126 |
+
def sqlite_url_from_path(path: str) -> str:
|
| 127 |
+
return f"sqlite:///{Path(path).resolve().as_posix()}"
|
| 128 |
+
|
| 129 |
+
|
| 130 |
+
def make_engine(url: str) -> Engine:
|
| 131 |
+
return create_engine(url, future=True)
|
| 132 |
+
|
| 133 |
+
|
| 134 |
+
def make_session(engine: Engine) -> Session:
|
| 135 |
+
return sessionmaker(bind=engine, autocommit=False, autoflush=False, future=True)()
|
| 136 |
+
|
| 137 |
+
|
| 138 |
+
def reflected_table(engine: Engine, table_name: str) -> Table | None:
|
| 139 |
+
if not inspect(engine).has_table(table_name):
|
| 140 |
+
return None
|
| 141 |
+
reflected = MetaData()
|
| 142 |
+
return Table(table_name, reflected, autoload_with=engine)
|
| 143 |
+
|
| 144 |
+
|
| 145 |
+
def fetch_rows(session: Session, table: Table) -> list[dict[str, Any]]:
|
| 146 |
+
stmt = select(table)
|
| 147 |
+
if "id" in table.c:
|
| 148 |
+
stmt = stmt.order_by(table.c.id)
|
| 149 |
+
return [dict(row) for row in session.execute(stmt).mappings().all()]
|
| 150 |
+
|
| 151 |
+
|
| 152 |
+
def existing_id(session: Session, table: Table, source_id: str | None) -> str | None:
|
| 153 |
+
if not source_id:
|
| 154 |
+
return None
|
| 155 |
+
return session.execute(select(table.c.id).where(table.c.id == source_id)).scalar_one_or_none()
|
| 156 |
+
|
| 157 |
+
|
| 158 |
+
def available_id(session: Session, table: Table, source_id: Any) -> str:
|
| 159 |
+
candidate = str(source_id) if source_id is not None else generate_uuid()
|
| 160 |
+
if existing_id(session, table, candidate) is None:
|
| 161 |
+
return candidate
|
| 162 |
+
|
| 163 |
+
while True:
|
| 164 |
+
candidate = generate_uuid()
|
| 165 |
+
if existing_id(session, table, candidate) is None:
|
| 166 |
+
return candidate
|
| 167 |
+
|
| 168 |
+
|
| 169 |
+
def first_existing_user(session: Session, row: dict[str, Any]) -> str | None:
|
| 170 |
+
email = row.get("email")
|
| 171 |
+
username = row.get("username")
|
| 172 |
+
if email:
|
| 173 |
+
match = session.execute(select(users.c.id).where(users.c.email == email)).scalar_one_or_none()
|
| 174 |
+
if match:
|
| 175 |
+
return match
|
| 176 |
+
if username:
|
| 177 |
+
return session.execute(select(users.c.id).where(users.c.username == username)).scalar_one_or_none()
|
| 178 |
+
return None
|
| 179 |
+
|
| 180 |
+
|
| 181 |
+
def copy_users(
|
| 182 |
+
source_session: Session,
|
| 183 |
+
target_session: Session,
|
| 184 |
+
source_table: Table,
|
| 185 |
+
stats: MigrationStats,
|
| 186 |
+
) -> dict[str, str]:
|
| 187 |
+
id_map: dict[str, str] = {}
|
| 188 |
+
now = datetime.now(timezone.utc)
|
| 189 |
+
|
| 190 |
+
for row in fetch_rows(source_session, source_table):
|
| 191 |
+
old_id = str(row.get("id"))
|
| 192 |
+
existing = existing_id(target_session, users, old_id) or first_existing_user(target_session, row)
|
| 193 |
+
if existing:
|
| 194 |
+
id_map[old_id] = existing
|
| 195 |
+
stats.add("users", "reused")
|
| 196 |
+
continue
|
| 197 |
+
|
| 198 |
+
is_legacy = source_table.name == "user"
|
| 199 |
+
new_id = available_id(target_session, users, None if is_legacy else old_id)
|
| 200 |
+
user_values = {
|
| 201 |
+
"id": new_id,
|
| 202 |
+
"username": row["username"],
|
| 203 |
+
"email": row["email"],
|
| 204 |
+
"hashed_password": row.get("hashed_password") or row.get("password") or "",
|
| 205 |
+
"is_admin": bool(row.get("is_admin") or False),
|
| 206 |
+
"created_at": row.get("created_at") or now,
|
| 207 |
+
"last_login": row.get("last_login"),
|
| 208 |
+
"hf_token": row.get("hf_token"),
|
| 209 |
+
}
|
| 210 |
+
target_session.execute(users.insert().values(**user_values))
|
| 211 |
+
id_map[old_id] = new_id
|
| 212 |
+
stats.add("users", "inserted")
|
| 213 |
+
|
| 214 |
+
return id_map
|
| 215 |
+
|
| 216 |
+
|
| 217 |
+
def copy_api_keys(
|
| 218 |
+
source_session: Session,
|
| 219 |
+
target_session: Session,
|
| 220 |
+
source_table: Table | None,
|
| 221 |
+
user_id_map: dict[str, str],
|
| 222 |
+
stats: MigrationStats,
|
| 223 |
+
) -> dict[str, str]:
|
| 224 |
+
id_map: dict[str, str] = {}
|
| 225 |
+
if source_table is None:
|
| 226 |
+
return id_map
|
| 227 |
+
|
| 228 |
+
for row in fetch_rows(source_session, source_table):
|
| 229 |
+
old_id = str(row.get("id"))
|
| 230 |
+
new_user_id = user_id_map.get(str(row.get("user_id")))
|
| 231 |
+
if not new_user_id:
|
| 232 |
+
stats.add("api_keys", "skipped")
|
| 233 |
+
continue
|
| 234 |
+
|
| 235 |
+
existing = (
|
| 236 |
+
existing_id(target_session, api_keys, old_id)
|
| 237 |
+
or target_session.execute(
|
| 238 |
+
select(api_keys.c.id).where(api_keys.c.hashed_key == row.get("hashed_key"))
|
| 239 |
+
).scalar_one_or_none()
|
| 240 |
+
)
|
| 241 |
+
if existing:
|
| 242 |
+
id_map[old_id] = existing
|
| 243 |
+
stats.add("api_keys", "reused")
|
| 244 |
+
continue
|
| 245 |
+
|
| 246 |
+
new_id = available_id(target_session, api_keys, old_id)
|
| 247 |
+
target_session.execute(
|
| 248 |
+
api_keys.insert().values(
|
| 249 |
+
id=new_id,
|
| 250 |
+
user_id=new_user_id,
|
| 251 |
+
key_prefix=row["key_prefix"],
|
| 252 |
+
hashed_key=row["hashed_key"],
|
| 253 |
+
created_at=row.get("created_at") or datetime.now(timezone.utc),
|
| 254 |
+
last_used=row.get("last_used"),
|
| 255 |
+
)
|
| 256 |
+
)
|
| 257 |
+
id_map[old_id] = new_id
|
| 258 |
+
stats.add("api_keys", "inserted")
|
| 259 |
+
|
| 260 |
+
return id_map
|
| 261 |
+
|
| 262 |
+
|
| 263 |
+
def copy_documents(
|
| 264 |
+
source_session: Session,
|
| 265 |
+
target_session: Session,
|
| 266 |
+
source_table: Table | None,
|
| 267 |
+
user_id_map: dict[str, str],
|
| 268 |
+
stats: MigrationStats,
|
| 269 |
+
) -> dict[str, str]:
|
| 270 |
+
id_map: dict[str, str] = {}
|
| 271 |
+
if source_table is None:
|
| 272 |
+
return id_map
|
| 273 |
+
|
| 274 |
+
for row in fetch_rows(source_session, source_table):
|
| 275 |
+
old_id = str(row.get("id"))
|
| 276 |
+
new_user_id = user_id_map.get(str(row.get("user_id")))
|
| 277 |
+
if not new_user_id:
|
| 278 |
+
stats.add("documents", "skipped")
|
| 279 |
+
continue
|
| 280 |
+
|
| 281 |
+
existing = existing_id(target_session, documents, old_id)
|
| 282 |
+
if existing:
|
| 283 |
+
id_map[old_id] = existing
|
| 284 |
+
stats.add("documents", "reused")
|
| 285 |
+
continue
|
| 286 |
+
|
| 287 |
+
new_id = available_id(target_session, documents, old_id)
|
| 288 |
+
target_session.execute(
|
| 289 |
+
documents.insert().values(
|
| 290 |
+
id=new_id,
|
| 291 |
+
user_id=new_user_id,
|
| 292 |
+
filename=row["filename"],
|
| 293 |
+
original_name=row["original_name"],
|
| 294 |
+
file_size=row.get("file_size") or 0,
|
| 295 |
+
page_count=row.get("page_count") or 0,
|
| 296 |
+
chunk_count=row.get("chunk_count") or 0,
|
| 297 |
+
status=row.get("status") or "pending",
|
| 298 |
+
error_message=row.get("error_message"),
|
| 299 |
+
uploaded_at=row.get("uploaded_at") or datetime.now(timezone.utc),
|
| 300 |
+
summary=row.get("summary"),
|
| 301 |
+
)
|
| 302 |
+
)
|
| 303 |
+
id_map[old_id] = new_id
|
| 304 |
+
stats.add("documents", "inserted")
|
| 305 |
+
|
| 306 |
+
return id_map
|
| 307 |
+
|
| 308 |
+
|
| 309 |
+
def copy_chat_messages(
|
| 310 |
+
source_session: Session,
|
| 311 |
+
target_session: Session,
|
| 312 |
+
source_table: Table | None,
|
| 313 |
+
user_id_map: dict[str, str],
|
| 314 |
+
document_id_map: dict[str, str],
|
| 315 |
+
stats: MigrationStats,
|
| 316 |
+
) -> dict[str, str]:
|
| 317 |
+
id_map: dict[str, str] = {}
|
| 318 |
+
if source_table is None:
|
| 319 |
+
return id_map
|
| 320 |
+
|
| 321 |
+
for row in fetch_rows(source_session, source_table):
|
| 322 |
+
old_id = str(row.get("id"))
|
| 323 |
+
new_user_id = user_id_map.get(str(row.get("user_id")))
|
| 324 |
+
old_document_id = row.get("document_id")
|
| 325 |
+
new_document_id = document_id_map.get(str(old_document_id)) if old_document_id else None
|
| 326 |
+
if not new_user_id or (old_document_id and not new_document_id):
|
| 327 |
+
stats.add("chat_messages", "skipped")
|
| 328 |
+
continue
|
| 329 |
+
|
| 330 |
+
existing = existing_id(target_session, chat_messages, old_id)
|
| 331 |
+
if existing:
|
| 332 |
+
id_map[old_id] = existing
|
| 333 |
+
stats.add("chat_messages", "reused")
|
| 334 |
+
continue
|
| 335 |
+
|
| 336 |
+
new_id = available_id(target_session, chat_messages, old_id)
|
| 337 |
+
target_session.execute(
|
| 338 |
+
chat_messages.insert().values(
|
| 339 |
+
id=new_id,
|
| 340 |
+
user_id=new_user_id,
|
| 341 |
+
document_id=new_document_id,
|
| 342 |
+
role=row["role"],
|
| 343 |
+
content=row["content"],
|
| 344 |
+
sources_json=row.get("sources_json"),
|
| 345 |
+
created_at=row.get("created_at") or datetime.now(timezone.utc),
|
| 346 |
+
)
|
| 347 |
+
)
|
| 348 |
+
id_map[old_id] = new_id
|
| 349 |
+
stats.add("chat_messages", "inserted")
|
| 350 |
+
|
| 351 |
+
return id_map
|
| 352 |
+
|
| 353 |
+
|
| 354 |
+
def copy_shared_messages(
|
| 355 |
+
source_session: Session,
|
| 356 |
+
target_session: Session,
|
| 357 |
+
source_table: Table | None,
|
| 358 |
+
message_id_map: dict[str, str],
|
| 359 |
+
stats: MigrationStats,
|
| 360 |
+
) -> None:
|
| 361 |
+
if source_table is None:
|
| 362 |
+
return
|
| 363 |
+
|
| 364 |
+
for row in fetch_rows(source_session, source_table):
|
| 365 |
+
old_id = str(row.get("id"))
|
| 366 |
+
new_message_id = message_id_map.get(str(row.get("message_id")))
|
| 367 |
+
if not new_message_id:
|
| 368 |
+
stats.add("shared_messages", "skipped")
|
| 369 |
+
continue
|
| 370 |
+
|
| 371 |
+
existing = (
|
| 372 |
+
existing_id(target_session, shared_messages, old_id)
|
| 373 |
+
or target_session.execute(
|
| 374 |
+
select(shared_messages.c.id).where(shared_messages.c.message_id == new_message_id)
|
| 375 |
+
).scalar_one_or_none()
|
| 376 |
+
)
|
| 377 |
+
if existing:
|
| 378 |
+
stats.add("shared_messages", "reused")
|
| 379 |
+
continue
|
| 380 |
+
|
| 381 |
+
target_session.execute(
|
| 382 |
+
shared_messages.insert().values(
|
| 383 |
+
id=available_id(target_session, shared_messages, old_id),
|
| 384 |
+
message_id=new_message_id,
|
| 385 |
+
created_at=row.get("created_at") or datetime.now(timezone.utc),
|
| 386 |
+
)
|
| 387 |
+
)
|
| 388 |
+
stats.add("shared_messages", "inserted")
|
| 389 |
+
|
| 390 |
+
|
| 391 |
+
def migrate(
|
| 392 |
+
sqlite_url: str,
|
| 393 |
+
postgres_url: str,
|
| 394 |
+
create_tables: bool,
|
| 395 |
+
dry_run: bool,
|
| 396 |
+
) -> MigrationStats:
|
| 397 |
+
source_engine = make_engine(sqlite_url)
|
| 398 |
+
target_engine = make_engine(normalize_postgres_url(postgres_url))
|
| 399 |
+
|
| 400 |
+
if create_tables:
|
| 401 |
+
metadata.create_all(target_engine)
|
| 402 |
+
|
| 403 |
+
source_session = make_session(source_engine)
|
| 404 |
+
target_session = make_session(target_engine)
|
| 405 |
+
stats = MigrationStats()
|
| 406 |
+
|
| 407 |
+
try:
|
| 408 |
+
current_users = reflected_table(source_engine, "users")
|
| 409 |
+
legacy_users = reflected_table(source_engine, "user")
|
| 410 |
+
source_users = current_users if current_users is not None else legacy_users
|
| 411 |
+
if source_users is None:
|
| 412 |
+
raise RuntimeError("No users table found. Expected 'users' or legacy 'user'.")
|
| 413 |
+
|
| 414 |
+
user_id_map = copy_users(source_session, target_session, source_users, stats)
|
| 415 |
+
copy_api_keys(source_session, target_session, reflected_table(source_engine, "api_keys"), user_id_map, stats)
|
| 416 |
+
document_id_map = copy_documents(
|
| 417 |
+
source_session,
|
| 418 |
+
target_session,
|
| 419 |
+
reflected_table(source_engine, "documents"),
|
| 420 |
+
user_id_map,
|
| 421 |
+
stats,
|
| 422 |
+
)
|
| 423 |
+
message_id_map = copy_chat_messages(
|
| 424 |
+
source_session,
|
| 425 |
+
target_session,
|
| 426 |
+
reflected_table(source_engine, "chat_messages"),
|
| 427 |
+
user_id_map,
|
| 428 |
+
document_id_map,
|
| 429 |
+
stats,
|
| 430 |
+
)
|
| 431 |
+
copy_shared_messages(
|
| 432 |
+
source_session,
|
| 433 |
+
target_session,
|
| 434 |
+
reflected_table(source_engine, "shared_messages"),
|
| 435 |
+
message_id_map,
|
| 436 |
+
stats,
|
| 437 |
+
)
|
| 438 |
+
|
| 439 |
+
if dry_run:
|
| 440 |
+
target_session.rollback()
|
| 441 |
+
LOGGER.info("Dry run complete; rolled back target transaction.")
|
| 442 |
+
else:
|
| 443 |
+
target_session.commit()
|
| 444 |
+
LOGGER.info("Migration committed.")
|
| 445 |
+
|
| 446 |
+
return stats
|
| 447 |
+
except IntegrityError:
|
| 448 |
+
target_session.rollback()
|
| 449 |
+
LOGGER.exception("Migration failed because the target database rejected a row.")
|
| 450 |
+
raise
|
| 451 |
+
except Exception:
|
| 452 |
+
target_session.rollback()
|
| 453 |
+
LOGGER.exception("Migration failed; rolled back target transaction.")
|
| 454 |
+
raise
|
| 455 |
+
finally:
|
| 456 |
+
source_session.close()
|
| 457 |
+
target_session.close()
|
| 458 |
+
source_engine.dispose()
|
| 459 |
+
target_engine.dispose()
|
| 460 |
+
|
| 461 |
+
|
| 462 |
+
def parse_args() -> argparse.Namespace:
|
| 463 |
+
parser = argparse.ArgumentParser(description="Migrate SQLite users/documents/chat history to Supabase Postgres.")
|
| 464 |
+
parser.add_argument(
|
| 465 |
+
"--sqlite-path",
|
| 466 |
+
default="instance/users.db",
|
| 467 |
+
help="Path to the SQLite database file. Defaults to instance/users.db.",
|
| 468 |
+
)
|
| 469 |
+
parser.add_argument(
|
| 470 |
+
"--sqlite-url",
|
| 471 |
+
help="Full SQLite SQLAlchemy URL. Overrides --sqlite-path.",
|
| 472 |
+
)
|
| 473 |
+
parser.add_argument(
|
| 474 |
+
"--postgres-url",
|
| 475 |
+
default=os.getenv("SUPABASE_DB_URL") or os.getenv("POSTGRES_DATABASE_URL") or os.getenv("DATABASE_URL"),
|
| 476 |
+
help="Supabase/Postgres SQLAlchemy URL. Also read from SUPABASE_DB_URL, POSTGRES_DATABASE_URL, or DATABASE_URL.",
|
| 477 |
+
)
|
| 478 |
+
parser.add_argument(
|
| 479 |
+
"--no-create-tables",
|
| 480 |
+
action="store_true",
|
| 481 |
+
help="Do not create missing target tables before migrating.",
|
| 482 |
+
)
|
| 483 |
+
parser.add_argument(
|
| 484 |
+
"--dry-run",
|
| 485 |
+
action="store_true",
|
| 486 |
+
help="Run the migration and roll back the target transaction.",
|
| 487 |
+
)
|
| 488 |
+
parser.add_argument("--verbose", action="store_true", help="Enable debug logging.")
|
| 489 |
+
return parser.parse_args()
|
| 490 |
+
|
| 491 |
+
|
| 492 |
+
def main() -> int:
|
| 493 |
+
args = parse_args()
|
| 494 |
+
logging.basicConfig(
|
| 495 |
+
level=logging.DEBUG if args.verbose else logging.INFO,
|
| 496 |
+
format="%(levelname)s %(message)s",
|
| 497 |
+
)
|
| 498 |
+
|
| 499 |
+
postgres_url = args.postgres_url
|
| 500 |
+
if not postgres_url or postgres_url.startswith("sqlite"):
|
| 501 |
+
LOGGER.error("Provide a Supabase/Postgres URL with --postgres-url or SUPABASE_DB_URL.")
|
| 502 |
+
return 2
|
| 503 |
+
|
| 504 |
+
sqlite_url = args.sqlite_url or sqlite_url_from_path(args.sqlite_path)
|
| 505 |
+
stats = migrate(
|
| 506 |
+
sqlite_url=sqlite_url,
|
| 507 |
+
postgres_url=postgres_url,
|
| 508 |
+
create_tables=not args.no_create_tables,
|
| 509 |
+
dry_run=args.dry_run,
|
| 510 |
+
)
|
| 511 |
+
|
| 512 |
+
for table_name in sorted(set(stats.inserted) | set(stats.reused) | set(stats.skipped)):
|
| 513 |
+
LOGGER.info(
|
| 514 |
+
"%s: inserted=%s reused=%s skipped=%s",
|
| 515 |
+
table_name,
|
| 516 |
+
stats.inserted.get(table_name, 0),
|
| 517 |
+
stats.reused.get(table_name, 0),
|
| 518 |
+
stats.skipped.get(table_name, 0),
|
| 519 |
+
)
|
| 520 |
+
return 0
|
| 521 |
+
|
| 522 |
+
|
| 523 |
+
if __name__ == "__main__":
|
| 524 |
+
sys.exit(main())
|
backend/tests/test_auth.py
CHANGED
|
@@ -115,3 +115,10 @@ def test_hf_token_appears_in_user_response(client, auth_headers, user, db_sessio
|
|
| 115 |
me_resp = client.get("/api/v1/auth/me", headers=auth_headers)
|
| 116 |
assert me_resp.status_code == 200
|
| 117 |
assert me_resp.json()["hf_token"] == "hf_persist_token"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 115 |
me_resp = client.get("/api/v1/auth/me", headers=auth_headers)
|
| 116 |
assert me_resp.status_code == 200
|
| 117 |
assert me_resp.json()["hf_token"] == "hf_persist_token"
|
| 118 |
+
|
| 119 |
+
# Verify encryption at rest in the database directly
|
| 120 |
+
from sqlalchemy import text
|
| 121 |
+
row = db_session.execute(text("SELECT hf_token FROM users WHERE id = :id"), {"id": user.id}).fetchone()
|
| 122 |
+
stored_token = row[0]
|
| 123 |
+
assert stored_token is not None
|
| 124 |
+
assert stored_token != "hf_persist_token"
|
backend/tests/test_chat.py
CHANGED
|
@@ -1,7 +1,7 @@
|
|
| 1 |
def test_chat_ask_success(client, auth_headers, ready_document, monkeypatch):
|
| 2 |
monkeypatch.setattr(
|
| 3 |
"app.routes.chat.generate_answer",
|
| 4 |
-
lambda question, user_id, document_id=None: {
|
| 5 |
"answer": "Mocked answer",
|
| 6 |
"sources": [
|
| 7 |
{
|
|
@@ -48,3 +48,34 @@ def test_chat_ask_document_not_ready(client, auth_headers, pending_document):
|
|
| 48 |
|
| 49 |
assert response.status_code == 400
|
| 50 |
assert "Document is still pending" in response.json()["detail"]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
def test_chat_ask_success(client, auth_headers, ready_document, monkeypatch):
|
| 2 |
monkeypatch.setattr(
|
| 3 |
"app.routes.chat.generate_answer",
|
| 4 |
+
lambda question, user_id, document_id=None, **kwargs: {
|
| 5 |
"answer": "Mocked answer",
|
| 6 |
"sources": [
|
| 7 |
{
|
|
|
|
| 48 |
|
| 49 |
assert response.status_code == 400
|
| 50 |
assert "Document is still pending" in response.json()["detail"]
|
| 51 |
+
|
| 52 |
+
|
| 53 |
+
def test_agent_dynamic_token(monkeypatch):
|
| 54 |
+
from app.rag.agent import generate_answer
|
| 55 |
+
import app.rag.agent
|
| 56 |
+
|
| 57 |
+
called_with_token = None
|
| 58 |
+
|
| 59 |
+
class MockInferenceClient:
|
| 60 |
+
def __init__(self, token=None, **kwargs):
|
| 61 |
+
nonlocal called_with_token
|
| 62 |
+
called_with_token = token
|
| 63 |
+
|
| 64 |
+
def chat_completion(self, *args, **kwargs):
|
| 65 |
+
class MockResponse:
|
| 66 |
+
choices = []
|
| 67 |
+
return MockResponse()
|
| 68 |
+
|
| 69 |
+
# Mock the InferenceClient in app.rag.agent
|
| 70 |
+
monkeypatch.setattr(app.rag.agent, "InferenceClient", MockInferenceClient)
|
| 71 |
+
# Mock retrieval to return empty chunks
|
| 72 |
+
monkeypatch.setattr("app.rag.agent.retrieve", lambda **kwargs: [])
|
| 73 |
+
|
| 74 |
+
# Test with custom token
|
| 75 |
+
generate_answer(question="hello?", user_id="some-user", hf_token="my-custom-hf-token")
|
| 76 |
+
assert called_with_token == "my-custom-hf-token"
|
| 77 |
+
|
| 78 |
+
# Test with None (should fallback to global token in config)
|
| 79 |
+
generate_answer(question="hello?", user_id="some-user", hf_token=None)
|
| 80 |
+
from app.config import get_settings
|
| 81 |
+
assert called_with_token == get_settings().HF_TOKEN
|
backend/tests/test_retriever.py
ADDED
|
@@ -0,0 +1,77 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from app.rag import retriever
|
| 2 |
+
|
| 3 |
+
|
| 4 |
+
def test_transform_query_includes_original_and_dedupes(monkeypatch):
|
| 5 |
+
monkeypatch.setattr(
|
| 6 |
+
retriever,
|
| 7 |
+
"_generate_query_variants",
|
| 8 |
+
lambda _query: [
|
| 9 |
+
"How do taxes work?",
|
| 10 |
+
"how do taxes work?",
|
| 11 |
+
"How does healthcare work?",
|
| 12 |
+
"healthcare overview",
|
| 13 |
+
],
|
| 14 |
+
)
|
| 15 |
+
|
| 16 |
+
queries = retriever.transform_query("How do taxes and healthcare work?")
|
| 17 |
+
|
| 18 |
+
assert queries == [
|
| 19 |
+
"How do taxes and healthcare work?",
|
| 20 |
+
"How do taxes work?",
|
| 21 |
+
"How does healthcare work?",
|
| 22 |
+
"healthcare overview",
|
| 23 |
+
]
|
| 24 |
+
|
| 25 |
+
|
| 26 |
+
def test_retrieve_fans_out_transformed_queries_and_merges_duplicates(monkeypatch):
|
| 27 |
+
searched_queries = []
|
| 28 |
+
|
| 29 |
+
monkeypatch.setattr(retriever, "transform_query", lambda _query: ["taxes", "healthcare"])
|
| 30 |
+
monkeypatch.setattr(retriever, "embed_query", lambda query: f"embedding:{query}")
|
| 31 |
+
monkeypatch.setattr(retriever, "get_reranker", lambda: None)
|
| 32 |
+
|
| 33 |
+
def fake_query_chunks(query_embedding, user_id, document_id=None, top_k=10):
|
| 34 |
+
searched_queries.append(query_embedding)
|
| 35 |
+
if query_embedding == "embedding:taxes":
|
| 36 |
+
return [
|
| 37 |
+
{
|
| 38 |
+
"id": "shared",
|
| 39 |
+
"text": "Shared chunk",
|
| 40 |
+
"filename": "policy.pdf",
|
| 41 |
+
"page": 1,
|
| 42 |
+
"score": 0.2,
|
| 43 |
+
},
|
| 44 |
+
{
|
| 45 |
+
"id": "taxes",
|
| 46 |
+
"text": "Tax chunk",
|
| 47 |
+
"filename": "policy.pdf",
|
| 48 |
+
"page": 2,
|
| 49 |
+
"score": 0.7,
|
| 50 |
+
},
|
| 51 |
+
]
|
| 52 |
+
|
| 53 |
+
return [
|
| 54 |
+
{
|
| 55 |
+
"id": "shared",
|
| 56 |
+
"text": "Shared chunk",
|
| 57 |
+
"filename": "policy.pdf",
|
| 58 |
+
"page": 1,
|
| 59 |
+
"score": 0.9,
|
| 60 |
+
},
|
| 61 |
+
{
|
| 62 |
+
"id": "healthcare",
|
| 63 |
+
"text": "Healthcare chunk",
|
| 64 |
+
"filename": "policy.pdf",
|
| 65 |
+
"page": 3,
|
| 66 |
+
"score": 0.8,
|
| 67 |
+
},
|
| 68 |
+
]
|
| 69 |
+
|
| 70 |
+
monkeypatch.setattr(retriever, "query_chunks", fake_query_chunks)
|
| 71 |
+
|
| 72 |
+
chunks = retriever.retrieve("How do taxes and healthcare work?", user_id="user-1")
|
| 73 |
+
|
| 74 |
+
assert searched_queries == ["embedding:taxes", "embedding:healthcare"]
|
| 75 |
+
assert [chunk["id"] for chunk in chunks] == ["shared", "healthcare", "taxes"]
|
| 76 |
+
assert chunks[0]["score"] == 0.9
|
| 77 |
+
assert chunks[0]["confidence"] == 100.0
|
frontend/package-lock.json
CHANGED
|
@@ -9,6 +9,7 @@
|
|
| 9 |
"version": "0.1.0",
|
| 10 |
"dependencies": {
|
| 11 |
"@base-ui/react": "^1.4.1",
|
|
|
|
| 12 |
"class-variance-authority": "^0.7.1",
|
| 13 |
"clsx": "^2.1.1",
|
| 14 |
"i18next": "^26.3.0",
|
|
@@ -2532,6 +2533,31 @@
|
|
| 2532 |
"tailwindcss": "4.2.2"
|
| 2533 |
}
|
| 2534 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2535 |
"node_modules/@ts-morph/common": {
|
| 2536 |
"version": "0.27.0",
|
| 2537 |
"resolved": "https://registry.npmjs.org/@ts-morph/common/-/common-0.27.0.tgz",
|
|
@@ -10845,7 +10871,6 @@
|
|
| 10845 |
"version": "4.2.2",
|
| 10846 |
"resolved": "https://registry.npmjs.org/tailwindcss/-/tailwindcss-4.2.2.tgz",
|
| 10847 |
"integrity": "sha512-KWBIxs1Xb6NoLdMVqhbhgwZf2PGBpPEiwOqgI4pFIYbNTfBXiKYyWoTsXgBQ9WFg/OlhnvHaY+AEpW7wSmFo2Q==",
|
| 10848 |
-
"dev": true,
|
| 10849 |
"license": "MIT"
|
| 10850 |
},
|
| 10851 |
"node_modules/tapable": {
|
|
|
|
| 9 |
"version": "0.1.0",
|
| 10 |
"dependencies": {
|
| 11 |
"@base-ui/react": "^1.4.1",
|
| 12 |
+
"@tailwindcss/typography": "^0.5.19",
|
| 13 |
"class-variance-authority": "^0.7.1",
|
| 14 |
"clsx": "^2.1.1",
|
| 15 |
"i18next": "^26.3.0",
|
|
|
|
| 2533 |
"tailwindcss": "4.2.2"
|
| 2534 |
}
|
| 2535 |
},
|
| 2536 |
+
"node_modules/@tailwindcss/typography": {
|
| 2537 |
+
"version": "0.5.19",
|
| 2538 |
+
"resolved": "https://registry.npmjs.org/@tailwindcss/typography/-/typography-0.5.19.tgz",
|
| 2539 |
+
"integrity": "sha512-w31dd8HOx3k9vPtcQh5QHP9GwKcgbMp87j58qi6xgiBnFFtKEAgCWnDw4qUT8aHwkCp8bKvb/KGKWWHedP0AAg==",
|
| 2540 |
+
"license": "MIT",
|
| 2541 |
+
"dependencies": {
|
| 2542 |
+
"postcss-selector-parser": "6.0.10"
|
| 2543 |
+
},
|
| 2544 |
+
"peerDependencies": {
|
| 2545 |
+
"tailwindcss": ">=3.0.0 || insiders || >=4.0.0-alpha.20 || >=4.0.0-beta.1"
|
| 2546 |
+
}
|
| 2547 |
+
},
|
| 2548 |
+
"node_modules/@tailwindcss/typography/node_modules/postcss-selector-parser": {
|
| 2549 |
+
"version": "6.0.10",
|
| 2550 |
+
"resolved": "https://registry.npmjs.org/postcss-selector-parser/-/postcss-selector-parser-6.0.10.tgz",
|
| 2551 |
+
"integrity": "sha512-IQ7TZdoaqbT+LCpShg46jnZVlhWD2w6iQYAcYXfHARZ7X1t/UGhhceQDs5X0cGqKvYlHNOuv7Oa1xmb0oQuA3w==",
|
| 2552 |
+
"license": "MIT",
|
| 2553 |
+
"dependencies": {
|
| 2554 |
+
"cssesc": "^3.0.0",
|
| 2555 |
+
"util-deprecate": "^1.0.2"
|
| 2556 |
+
},
|
| 2557 |
+
"engines": {
|
| 2558 |
+
"node": ">=4"
|
| 2559 |
+
}
|
| 2560 |
+
},
|
| 2561 |
"node_modules/@ts-morph/common": {
|
| 2562 |
"version": "0.27.0",
|
| 2563 |
"resolved": "https://registry.npmjs.org/@ts-morph/common/-/common-0.27.0.tgz",
|
|
|
|
| 10871 |
"version": "4.2.2",
|
| 10872 |
"resolved": "https://registry.npmjs.org/tailwindcss/-/tailwindcss-4.2.2.tgz",
|
| 10873 |
"integrity": "sha512-KWBIxs1Xb6NoLdMVqhbhgwZf2PGBpPEiwOqgI4pFIYbNTfBXiKYyWoTsXgBQ9WFg/OlhnvHaY+AEpW7wSmFo2Q==",
|
|
|
|
| 10874 |
"license": "MIT"
|
| 10875 |
},
|
| 10876 |
"node_modules/tapable": {
|
frontend/package.json
CHANGED
|
@@ -12,6 +12,7 @@
|
|
| 12 |
},
|
| 13 |
"dependencies": {
|
| 14 |
"@base-ui/react": "^1.4.1",
|
|
|
|
| 15 |
"class-variance-authority": "^0.7.1",
|
| 16 |
"clsx": "^2.1.1",
|
| 17 |
"i18next": "^26.3.0",
|
|
|
|
| 12 |
},
|
| 13 |
"dependencies": {
|
| 14 |
"@base-ui/react": "^1.4.1",
|
| 15 |
+
"@tailwindcss/typography": "^0.5.19",
|
| 16 |
"class-variance-authority": "^0.7.1",
|
| 17 |
"clsx": "^2.1.1",
|
| 18 |
"i18next": "^26.3.0",
|
frontend/src/app/dashboard/page.tsx
CHANGED
|
@@ -3,17 +3,41 @@
|
|
| 3 |
import { useEffect, useState, useCallback } from "react";
|
| 4 |
import { useRouter } from "next/navigation";
|
| 5 |
import { useAuth } from "@/lib/auth";
|
| 6 |
-
import {
|
| 7 |
-
api,
|
| 8 |
-
CONNECTION_ERROR_BANNER_MESSAGE,
|
| 9 |
-
CONNECTION_ERROR_MESSAGE,
|
| 10 |
-
} from "@/lib/api";
|
| 11 |
-
|
| 12 |
import Header from "@/components/layout/Header";
|
| 13 |
import DocumentSidebar from "@/components/document/DocumentSidebar";
|
| 14 |
import ChatPanel from "@/components/chat/ChatPanel";
|
| 15 |
-
|
| 16 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 17 |
|
| 18 |
export interface DocInfo {
|
| 19 |
summary: string;
|
|
@@ -27,23 +51,6 @@ export interface DocInfo {
|
|
| 27 |
uploaded_at: string;
|
| 28 |
}
|
| 29 |
|
| 30 |
-
function DocumentSkeleton() {
|
| 31 |
-
return (
|
| 32 |
-
<div className="w-72 flex-shrink-0 border-r border-border/50 p-4 space-y-4">
|
| 33 |
-
{[1, 2, 3, 4].map((item) => (
|
| 34 |
-
<div
|
| 35 |
-
key={item}
|
| 36 |
-
className="rounded-lg border border-border/50 p-4 space-y-3"
|
| 37 |
-
>
|
| 38 |
-
<Skeleton className="h-4 w-[180px]" />
|
| 39 |
-
<Skeleton className="h-3 w-[120px]" />
|
| 40 |
-
<Skeleton className="h-3 w-[90px]" />
|
| 41 |
-
</div>
|
| 42 |
-
))}
|
| 43 |
-
</div>
|
| 44 |
-
);
|
| 45 |
-
}
|
| 46 |
-
|
| 47 |
export default function DashboardPage() {
|
| 48 |
const { user, loading } = useAuth();
|
| 49 |
const router = useRouter();
|
|
@@ -54,7 +61,6 @@ export default function DashboardPage() {
|
|
| 54 |
const [sidebarOpen, setSidebarOpen] = useState(true);
|
| 55 |
const [viewerOpen, setViewerOpen] = useState(true);
|
| 56 |
const [connectionError, setConnectionError] = useState("");
|
| 57 |
-
const [documentsLoading, setDocumentsLoading] = useState(true);
|
| 58 |
|
| 59 |
// Auth guard
|
| 60 |
useEffect(() => {
|
|
@@ -76,31 +82,23 @@ export default function DashboardPage() {
|
|
| 76 |
// Load documents
|
| 77 |
const loadDocuments = useCallback(async () => {
|
| 78 |
try {
|
| 79 |
-
setDocumentsLoading(true);
|
| 80 |
-
|
| 81 |
const data = await api.get<{ documents?: DocInfo[]; items?: DocInfo[] }>(
|
| 82 |
"/api/v1/documents/"
|
| 83 |
);
|
| 84 |
-
|
| 85 |
setDocuments(data?.documents ?? data?.items ?? []);
|
| 86 |
setConnectionError("");
|
| 87 |
} catch (err) {
|
| 88 |
-
const message =
|
| 89 |
-
err instanceof Error ? err.message : CONNECTION_ERROR_MESSAGE;
|
| 90 |
-
|
| 91 |
setConnectionError(
|
| 92 |
message === CONNECTION_ERROR_MESSAGE
|
| 93 |
? CONNECTION_ERROR_BANNER_MESSAGE
|
| 94 |
: `β οΈ ${message}`
|
| 95 |
);
|
| 96 |
-
} finally {
|
| 97 |
-
setDocumentsLoading(false);
|
| 98 |
}
|
| 99 |
}, []);
|
| 100 |
|
| 101 |
useEffect(() => {
|
| 102 |
if (!user) return;
|
| 103 |
-
|
| 104 |
void (async () => {
|
| 105 |
await loadDocuments();
|
| 106 |
})();
|
|
@@ -111,11 +109,9 @@ export default function DashboardPage() {
|
|
| 111 |
const hasPending = (documents || []).some(
|
| 112 |
(d) => d.status === "pending" || d.status === "processing"
|
| 113 |
);
|
| 114 |
-
|
| 115 |
if (!hasPending) return;
|
| 116 |
|
| 117 |
const interval = setInterval(loadDocuments, 3000);
|
| 118 |
-
|
| 119 |
return () => clearInterval(interval);
|
| 120 |
}, [documents, loadDocuments]);
|
| 121 |
|
|
@@ -127,6 +123,19 @@ export default function DashboardPage() {
|
|
| 127 |
);
|
| 128 |
}
|
| 129 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 130 |
return (
|
| 131 |
<div className="h-screen flex flex-col overflow-hidden">
|
| 132 |
<Header
|
|
@@ -134,6 +143,7 @@ export default function DashboardPage() {
|
|
| 134 |
onToggleSidebar={() => setSidebarOpen(!sidebarOpen)}
|
| 135 |
viewerOpen={viewerOpen}
|
| 136 |
onToggleViewer={() => setViewerOpen(!viewerOpen)}
|
|
|
|
| 137 |
/>
|
| 138 |
|
| 139 |
{connectionError && (
|
|
@@ -146,49 +156,35 @@ export default function DashboardPage() {
|
|
| 146 |
)}
|
| 147 |
|
| 148 |
<div className="flex-1 flex overflow-hidden">
|
| 149 |
-
{/* ββ Left: Document Sidebar
|
| 150 |
-
{sidebarOpen &&
|
| 151 |
-
|
| 152 |
-
|
| 153 |
-
|
| 154 |
-
|
| 155 |
-
|
| 156 |
-
|
| 157 |
-
activeDoc={activeDoc}
|
| 158 |
-
onSelectDoc={(doc) => {
|
| 159 |
-
setActiveDoc(doc);
|
| 160 |
-
setPdfPage(1);
|
| 161 |
-
}}
|
| 162 |
-
onDocumentsChange={loadDocuments}
|
| 163 |
-
/>
|
| 164 |
-
</div>
|
| 165 |
-
))}
|
| 166 |
-
|
| 167 |
-
{/* ββ Center: Chat Panel βββββββββββββββββββ */}
|
| 168 |
<div className="flex-1 min-w-0 flex flex-col">
|
| 169 |
<ChatPanel
|
| 170 |
activeDoc={activeDoc}
|
| 171 |
onCitationClick={(page) => {
|
| 172 |
setPdfPage(page);
|
| 173 |
-
|
| 174 |
if (!viewerOpen) setViewerOpen(true);
|
| 175 |
}}
|
| 176 |
/>
|
| 177 |
</div>
|
| 178 |
|
| 179 |
-
{/* ββ Right: PDF Viewer ββββββββββββββββββ
|
| 180 |
-
{viewerOpen &&
|
| 181 |
-
|
| 182 |
-
|
| 183 |
-
|
| 184 |
-
|
| 185 |
-
|
| 186 |
-
|
| 187 |
-
|
| 188 |
-
|
| 189 |
-
|
| 190 |
-
</div>
|
| 191 |
-
)}
|
| 192 |
</div>
|
| 193 |
</div>
|
| 194 |
);
|
|
|
|
| 3 |
import { useEffect, useState, useCallback } from "react";
|
| 4 |
import { useRouter } from "next/navigation";
|
| 5 |
import { useAuth } from "@/lib/auth";
|
| 6 |
+
import { api, CONNECTION_ERROR_BANNER_MESSAGE, CONNECTION_ERROR_MESSAGE } from "@/lib/api";
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 7 |
import Header from "@/components/layout/Header";
|
| 8 |
import DocumentSidebar from "@/components/document/DocumentSidebar";
|
| 9 |
import ChatPanel from "@/components/chat/ChatPanel";
|
| 10 |
+
|
| 11 |
+
function PDFViewerSkeleton() {
|
| 12 |
+
return (
|
| 13 |
+
<div
|
| 14 |
+
className="h-full flex flex-col bg-background"
|
| 15 |
+
aria-busy="true"
|
| 16 |
+
aria-label="Loading PDF viewer"
|
| 17 |
+
>
|
| 18 |
+
<div className="flex items-center justify-between px-3 py-2 border-b border-border/50 bg-card/50 shrink-0">
|
| 19 |
+
<div className="flex items-center gap-2">
|
| 20 |
+
<div className="h-7 w-7 rounded-md bg-muted/70 animate-pulse" />
|
| 21 |
+
<div className="h-7 w-20 rounded-md bg-muted/70 animate-pulse" />
|
| 22 |
+
<div className="h-7 w-7 rounded-md bg-muted/70 animate-pulse" />
|
| 23 |
+
</div>
|
| 24 |
+
<div className="flex items-center gap-2">
|
| 25 |
+
<div className="h-7 w-7 rounded-md bg-muted/70 animate-pulse" />
|
| 26 |
+
<div className="h-4 w-10 rounded bg-muted/70 animate-pulse" />
|
| 27 |
+
<div className="h-7 w-7 rounded-md bg-muted/70 animate-pulse" />
|
| 28 |
+
</div>
|
| 29 |
+
</div>
|
| 30 |
+
<div className="flex-1 p-4">
|
| 31 |
+
<div className="h-full rounded-lg border border-border/50 bg-muted/40 animate-pulse" />
|
| 32 |
+
</div>
|
| 33 |
+
</div>
|
| 34 |
+
);
|
| 35 |
+
}
|
| 36 |
+
|
| 37 |
+
const PDFViewer = dynamic(() => import("@/components/document/PDFViewer"), {
|
| 38 |
+
ssr: false,
|
| 39 |
+
loading: () => <PDFViewerSkeleton />,
|
| 40 |
+
});
|
| 41 |
|
| 42 |
export interface DocInfo {
|
| 43 |
summary: string;
|
|
|
|
| 51 |
uploaded_at: string;
|
| 52 |
}
|
| 53 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 54 |
export default function DashboardPage() {
|
| 55 |
const { user, loading } = useAuth();
|
| 56 |
const router = useRouter();
|
|
|
|
| 61 |
const [sidebarOpen, setSidebarOpen] = useState(true);
|
| 62 |
const [viewerOpen, setViewerOpen] = useState(true);
|
| 63 |
const [connectionError, setConnectionError] = useState("");
|
|
|
|
| 64 |
|
| 65 |
// Auth guard
|
| 66 |
useEffect(() => {
|
|
|
|
| 82 |
// Load documents
|
| 83 |
const loadDocuments = useCallback(async () => {
|
| 84 |
try {
|
|
|
|
|
|
|
| 85 |
const data = await api.get<{ documents?: DocInfo[]; items?: DocInfo[] }>(
|
| 86 |
"/api/v1/documents/"
|
| 87 |
);
|
|
|
|
| 88 |
setDocuments(data?.documents ?? data?.items ?? []);
|
| 89 |
setConnectionError("");
|
| 90 |
} catch (err) {
|
| 91 |
+
const message = err instanceof Error ? err.message : CONNECTION_ERROR_MESSAGE;
|
|
|
|
|
|
|
| 92 |
setConnectionError(
|
| 93 |
message === CONNECTION_ERROR_MESSAGE
|
| 94 |
? CONNECTION_ERROR_BANNER_MESSAGE
|
| 95 |
: `β οΈ ${message}`
|
| 96 |
);
|
|
|
|
|
|
|
| 97 |
}
|
| 98 |
}, []);
|
| 99 |
|
| 100 |
useEffect(() => {
|
| 101 |
if (!user) return;
|
|
|
|
| 102 |
void (async () => {
|
| 103 |
await loadDocuments();
|
| 104 |
})();
|
|
|
|
| 109 |
const hasPending = (documents || []).some(
|
| 110 |
(d) => d.status === "pending" || d.status === "processing"
|
| 111 |
);
|
|
|
|
| 112 |
if (!hasPending) return;
|
| 113 |
|
| 114 |
const interval = setInterval(loadDocuments, 3000);
|
|
|
|
| 115 |
return () => clearInterval(interval);
|
| 116 |
}, [documents, loadDocuments]);
|
| 117 |
|
|
|
|
| 123 |
);
|
| 124 |
}
|
| 125 |
|
| 126 |
+
// Shared sidebar content β used by both desktop panel and mobile sheet
|
| 127 |
+
const sidebarContent = (
|
| 128 |
+
<DocumentSidebar
|
| 129 |
+
documents={documents}
|
| 130 |
+
activeDoc={activeDoc}
|
| 131 |
+
onSelectDoc={(doc) => {
|
| 132 |
+
setActiveDoc(doc);
|
| 133 |
+
setPdfPage(1);
|
| 134 |
+
}}
|
| 135 |
+
onDocumentsChange={loadDocuments}
|
| 136 |
+
/>
|
| 137 |
+
);
|
| 138 |
+
|
| 139 |
return (
|
| 140 |
<div className="h-screen flex flex-col overflow-hidden">
|
| 141 |
<Header
|
|
|
|
| 143 |
onToggleSidebar={() => setSidebarOpen(!sidebarOpen)}
|
| 144 |
viewerOpen={viewerOpen}
|
| 145 |
onToggleViewer={() => setViewerOpen(!viewerOpen)}
|
| 146 |
+
mobileSheetContent={sidebarContent}
|
| 147 |
/>
|
| 148 |
|
| 149 |
{connectionError && (
|
|
|
|
| 156 |
)}
|
| 157 |
|
| 158 |
<div className="flex-1 flex overflow-hidden">
|
| 159 |
+
{/* ββ Left: Document Sidebar β desktop only (md+) βββββββββββ */}
|
| 160 |
+
{sidebarOpen && (
|
| 161 |
+
<div className="hidden md:block w-72 flex-shrink-0 border-r border-border/50 overflow-hidden animate-fade-in-up">
|
| 162 |
+
{sidebarContent}
|
| 163 |
+
</div>
|
| 164 |
+
)}
|
| 165 |
+
|
| 166 |
+
{/* ββ Center: Chat Panel ββββββββββββββββββββββββββββββββββββ */}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 167 |
<div className="flex-1 min-w-0 flex flex-col">
|
| 168 |
<ChatPanel
|
| 169 |
activeDoc={activeDoc}
|
| 170 |
onCitationClick={(page) => {
|
| 171 |
setPdfPage(page);
|
|
|
|
| 172 |
if (!viewerOpen) setViewerOpen(true);
|
| 173 |
}}
|
| 174 |
/>
|
| 175 |
</div>
|
| 176 |
|
| 177 |
+
{/* ββ Right: PDF Viewer β hidden on mobile ββββββββββββββββββ */}
|
| 178 |
+
{viewerOpen && activeDoc && activeDoc.original_name.endsWith(".pdf") && (
|
| 179 |
+
<div className="hidden md:block w-[480px] flex-shrink-0 border-l border-border/50 overflow-hidden animate-fade-in-up">
|
| 180 |
+
<PDFViewer
|
| 181 |
+
documentId={activeDoc.id}
|
| 182 |
+
currentPage={pdfPage}
|
| 183 |
+
onPageChange={setPdfPage}
|
| 184 |
+
totalPages={activeDoc.page_count}
|
| 185 |
+
/>
|
| 186 |
+
</div>
|
| 187 |
+
)}
|
|
|
|
|
|
|
| 188 |
</div>
|
| 189 |
</div>
|
| 190 |
);
|
frontend/src/app/globals.css
CHANGED
|
@@ -1,6 +1,7 @@
|
|
| 1 |
@import "tailwindcss";
|
| 2 |
@import "tw-animate-css";
|
| 3 |
@import "shadcn/tailwind.css";
|
|
|
|
| 4 |
|
| 5 |
@custom-variant dark (&:is(.dark *));
|
| 6 |
|
|
|
|
| 1 |
@import "tailwindcss";
|
| 2 |
@import "tw-animate-css";
|
| 3 |
@import "shadcn/tailwind.css";
|
| 4 |
+
@plugin "@tailwindcss/typography";
|
| 5 |
|
| 6 |
@custom-variant dark (&:is(.dark *));
|
| 7 |
|
frontend/src/app/page.tsx
CHANGED
|
@@ -128,8 +128,18 @@ export default function HomePage() {
|
|
| 128 |
</div>
|
| 129 |
|
| 130 |
{/* ββ Footer ββββββββββββββββββββββββββββββββββββ */}
|
| 131 |
-
<footer className="
|
| 132 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 133 |
</footer>
|
| 134 |
|
| 135 |
{/* Hall of Fame Modal */}
|
|
|
|
| 128 |
</div>
|
| 129 |
|
| 130 |
{/* ββ Footer ββββββββββββββββββββββββββββββββββββ */}
|
| 131 |
+
<footer className="py-8 text-xs text-muted-foreground border-t border-border/50">
|
| 132 |
+
<div className="max-w-4xl mx-auto px-6 flex flex-col sm:flex-row items-center justify-between gap-4">
|
| 133 |
+
<span>Built with FastAPI β’ LangChain β’ ChromaDB β’ HuggingFace β’ Next.js</span>
|
| 134 |
+
<div className="flex items-center gap-4">
|
| 135 |
+
<Link href="/privacy" className="hover:text-foreground transition-colors">
|
| 136 |
+
Privacy Policy
|
| 137 |
+
</Link>
|
| 138 |
+
<Link href="/terms" className="hover:text-foreground transition-colors">
|
| 139 |
+
Terms of Service
|
| 140 |
+
</Link>
|
| 141 |
+
</div>
|
| 142 |
+
</div>
|
| 143 |
</footer>
|
| 144 |
|
| 145 |
{/* Hall of Fame Modal */}
|
frontend/src/app/privacy/page.tsx
ADDED
|
@@ -0,0 +1,450 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import type { Metadata } from "next";
|
| 2 |
+
import Link from "next/link";
|
| 3 |
+
import { ArrowLeft, Shield, Brain, FileText, Database, Cookie, UserCheck, Mail } from "lucide-react";
|
| 4 |
+
|
| 5 |
+
export const metadata: Metadata = {
|
| 6 |
+
title: "Privacy Policy β Document AI Analyst",
|
| 7 |
+
description:
|
| 8 |
+
"How PDF-Assistant-RAG collects, uses, and protects your data. Learn about our privacy practices for document uploads, AI processing, and account information.",
|
| 9 |
+
openGraph: {
|
| 10 |
+
title: "Privacy Policy β Document AI Analyst",
|
| 11 |
+
description:
|
| 12 |
+
"How PDF-Assistant-RAG collects, uses, and protects your data.",
|
| 13 |
+
},
|
| 14 |
+
};
|
| 15 |
+
|
| 16 |
+
const sections = [
|
| 17 |
+
{
|
| 18 |
+
id: "information-we-collect",
|
| 19 |
+
icon: FileText,
|
| 20 |
+
title: "1. Information We Collect",
|
| 21 |
+
content: (
|
| 22 |
+
<>
|
| 23 |
+
<p>
|
| 24 |
+
When you use PDF-Assistant-RAG, we collect the following categories of information
|
| 25 |
+
to provide and improve our service:
|
| 26 |
+
</p>
|
| 27 |
+
<h3>Account Information</h3>
|
| 28 |
+
<ul>
|
| 29 |
+
<li>
|
| 30 |
+
<strong>Registration data:</strong> username, email address, and a securely hashed
|
| 31 |
+
password when you create an account.
|
| 32 |
+
</li>
|
| 33 |
+
<li>
|
| 34 |
+
<strong>Profile information:</strong> any optional details you choose to provide.
|
| 35 |
+
</li>
|
| 36 |
+
</ul>
|
| 37 |
+
<h3>Document Data</h3>
|
| 38 |
+
<ul>
|
| 39 |
+
<li>
|
| 40 |
+
<strong>Uploaded files:</strong> PDFs, DOCX, TXT, Markdown, and other documents you
|
| 41 |
+
upload for analysis.
|
| 42 |
+
</li>
|
| 43 |
+
<li>
|
| 44 |
+
<strong>Extracted content:</strong> text, embeddings, and metadata extracted from your
|
| 45 |
+
documents to enable semantic search and AI-powered question answering.
|
| 46 |
+
</li>
|
| 47 |
+
<li>
|
| 48 |
+
<strong>Chat history:</strong> questions you ask and the AI-generated responses, stored
|
| 49 |
+
to maintain conversation context.
|
| 50 |
+
</li>
|
| 51 |
+
</ul>
|
| 52 |
+
<h3>Usage Data</h3>
|
| 53 |
+
<ul>
|
| 54 |
+
<li>
|
| 55 |
+
<strong>Technical metadata:</strong> page views, feature interactions, query timestamps,
|
| 56 |
+
and performance metrics to improve the platform.
|
| 57 |
+
</li>
|
| 58 |
+
<li>
|
| 59 |
+
<strong>Device & browser info:</strong> browser type, operating system, and basic
|
| 60 |
+
device information for compatibility optimization.
|
| 61 |
+
</li>
|
| 62 |
+
</ul>
|
| 63 |
+
</>
|
| 64 |
+
),
|
| 65 |
+
},
|
| 66 |
+
{
|
| 67 |
+
id: "how-we-use-data",
|
| 68 |
+
icon: Brain,
|
| 69 |
+
title: "2. How We Use Your Data",
|
| 70 |
+
content: (
|
| 71 |
+
<>
|
| 72 |
+
<p>Your data is used solely for the core functionality of the platform:</p>
|
| 73 |
+
<ul>
|
| 74 |
+
<li>
|
| 75 |
+
<strong>AI-powered document analysis:</strong> Your documents are processed by
|
| 76 |
+
open-source large language models (LLMs) hosted on HuggingFace to generate insights,
|
| 77 |
+
summaries, and answers to your questions.
|
| 78 |
+
</li>
|
| 79 |
+
<li>
|
| 80 |
+
<strong>Semantic search & retrieval:</strong> Document embeddings are stored in
|
| 81 |
+
vector databases (ChromaDB) to enable fast, accurate retrieval of relevant content.
|
| 82 |
+
</li>
|
| 83 |
+
<li>
|
| 84 |
+
<strong>Conversation continuity:</strong> Chat history is stored per session so you
|
| 85 |
+
can refer back to previous interactions.
|
| 86 |
+
</li>
|
| 87 |
+
<li>
|
| 88 |
+
<strong>Service improvement:</strong> Aggregated, anonymized usage patterns help us
|
| 89 |
+
identify bugs, optimize performance, and prioritize features.
|
| 90 |
+
</li>
|
| 91 |
+
</ul>
|
| 92 |
+
<p>
|
| 93 |
+
We <strong>do not</strong> use your uploaded documents or chat data to train or fine-tune
|
| 94 |
+
any AI models. Your content remains private to your account.
|
| 95 |
+
</p>
|
| 96 |
+
</>
|
| 97 |
+
),
|
| 98 |
+
},
|
| 99 |
+
{
|
| 100 |
+
id: "data-storage-security",
|
| 101 |
+
icon: Shield,
|
| 102 |
+
title: "3. Data Storage & Security",
|
| 103 |
+
content: (
|
| 104 |
+
<>
|
| 105 |
+
<p>We take data protection seriously and implement multiple layers of security:</p>
|
| 106 |
+
<h3>Encryption</h3>
|
| 107 |
+
<ul>
|
| 108 |
+
<li>
|
| 109 |
+
<strong>In transit:</strong> All communications between your browser and our servers
|
| 110 |
+
are encrypted using TLS 1.3.
|
| 111 |
+
</li>
|
| 112 |
+
<li>
|
| 113 |
+
<strong>At rest:</strong> Document files, embeddings, and user data are stored in
|
| 114 |
+
encrypted storage volumes.
|
| 115 |
+
</li>
|
| 116 |
+
<li>
|
| 117 |
+
<strong>Passwords:</strong> Never stored in plain text β we use bcrypt hashing with
|
| 118 |
+
per-user salts.
|
| 119 |
+
</li>
|
| 120 |
+
</ul>
|
| 121 |
+
<h3>Data Isolation</h3>
|
| 122 |
+
<ul>
|
| 123 |
+
<li>
|
| 124 |
+
Each user's documents and embeddings are stored in isolated vector collections.
|
| 125 |
+
</li>
|
| 126 |
+
<li>
|
| 127 |
+
Authentication is enforced at every API endpoint β users can only access their own
|
| 128 |
+
data.
|
| 129 |
+
</li>
|
| 130 |
+
<li>
|
| 131 |
+
JWT tokens with short expiration and refresh token rotation prevent unauthorized
|
| 132 |
+
access.
|
| 133 |
+
</li>
|
| 134 |
+
</ul>
|
| 135 |
+
<h3>Infrastructure</h3>
|
| 136 |
+
<ul>
|
| 137 |
+
<li>
|
| 138 |
+
Servers are hosted on secure cloud infrastructure with strict access controls.
|
| 139 |
+
</li>
|
| 140 |
+
<li>
|
| 141 |
+
Regular security audits and dependency updates are performed.
|
| 142 |
+
</li>
|
| 143 |
+
</ul>
|
| 144 |
+
</>
|
| 145 |
+
),
|
| 146 |
+
},
|
| 147 |
+
{
|
| 148 |
+
id: "data-retention",
|
| 149 |
+
icon: Database,
|
| 150 |
+
title: "4. Data Retention",
|
| 151 |
+
content: (
|
| 152 |
+
<>
|
| 153 |
+
<p>We retain your data only as long as necessary to provide the service:</p>
|
| 154 |
+
<ul>
|
| 155 |
+
<li>
|
| 156 |
+
<strong>Account data:</strong> Retained until you delete your account. You can request
|
| 157 |
+
account deletion at any time.
|
| 158 |
+
</li>
|
| 159 |
+
<li>
|
| 160 |
+
<strong>Uploaded documents & embeddings:</strong> Retained until you delete them
|
| 161 |
+
or close your account. Documents can be removed individually from the dashboard.
|
| 162 |
+
</li>
|
| 163 |
+
<li>
|
| 164 |
+
<strong>Chat history:</strong> Retained per conversation. You can clear individual
|
| 165 |
+
chats or your entire history from the settings page.
|
| 166 |
+
</li>
|
| 167 |
+
<li>
|
| 168 |
+
<strong>Logs & analytics:</strong> Aggregated usage data may be retained longer
|
| 169 |
+
in anonymized form for service improvement.
|
| 170 |
+
</li>
|
| 171 |
+
</ul>
|
| 172 |
+
<p>
|
| 173 |
+
When you delete your account, all associated documents, embeddings, chat histories, and
|
| 174 |
+
personal information are permanently deleted within 30 days.
|
| 175 |
+
</p>
|
| 176 |
+
</>
|
| 177 |
+
),
|
| 178 |
+
},
|
| 179 |
+
{
|
| 180 |
+
id: "third-party-services",
|
| 181 |
+
icon: Database,
|
| 182 |
+
title: "5. Third-Party Services",
|
| 183 |
+
content: (
|
| 184 |
+
<>
|
| 185 |
+
<p>
|
| 186 |
+
PDF-Assistant-RAG integrates with the following third-party services to deliver its
|
| 187 |
+
functionality:
|
| 188 |
+
</p>
|
| 189 |
+
<ul>
|
| 190 |
+
<li>
|
| 191 |
+
<strong>HuggingFace Inference API:</strong> Used to run open-source LLMs for document
|
| 192 |
+
analysis. Document snippets may be sent to HuggingFace for inference; they are not
|
| 193 |
+
stored or used for training. See{" "}
|
| 194 |
+
<a
|
| 195 |
+
href="https://huggingface.co/privacy"
|
| 196 |
+
target="_blank"
|
| 197 |
+
rel="noopener noreferrer"
|
| 198 |
+
>
|
| 199 |
+
HuggingFace's Privacy Policy
|
| 200 |
+
</a>.
|
| 201 |
+
</li>
|
| 202 |
+
<li>
|
| 203 |
+
<strong>Google OAuth (optional):</strong> If you choose to sign in with Google, we
|
| 204 |
+
receive only your name and email address from your Google profile. See{" "}
|
| 205 |
+
<a
|
| 206 |
+
href="https://policies.google.com/privacy"
|
| 207 |
+
target="_blank"
|
| 208 |
+
rel="noopener noreferrer"
|
| 209 |
+
>
|
| 210 |
+
Google's Privacy Policy
|
| 211 |
+
</a>.
|
| 212 |
+
</li>
|
| 213 |
+
</ul>
|
| 214 |
+
<p>
|
| 215 |
+
We do not sell your personal information or document data to any third party.
|
| 216 |
+
</p>
|
| 217 |
+
</>
|
| 218 |
+
),
|
| 219 |
+
},
|
| 220 |
+
{
|
| 221 |
+
id: "cookies",
|
| 222 |
+
icon: Cookie,
|
| 223 |
+
title: "6. Cookies",
|
| 224 |
+
content: (
|
| 225 |
+
<>
|
| 226 |
+
<p>We use only essential cookies required for the platform to function:</p>
|
| 227 |
+
<ul>
|
| 228 |
+
<li>
|
| 229 |
+
<strong>Authentication cookies:</strong> JWT refresh tokens stored securely as
|
| 230 |
+
HTTP-only cookies to maintain your login session.
|
| 231 |
+
</li>
|
| 232 |
+
<li>
|
| 233 |
+
<strong>Local storage:</strong> Access tokens and UI preferences (theme, language)
|
| 234 |
+
are stored in your browser's local storage. No tracking or advertising cookies
|
| 235 |
+
are used.
|
| 236 |
+
</li>
|
| 237 |
+
</ul>
|
| 238 |
+
<p>
|
| 239 |
+
You can clear these at any time via your browser settings. Note that clearing
|
| 240 |
+
authentication data will sign you out of your session.
|
| 241 |
+
</p>
|
| 242 |
+
</>
|
| 243 |
+
),
|
| 244 |
+
},
|
| 245 |
+
{
|
| 246 |
+
id: "your-rights",
|
| 247 |
+
icon: UserCheck,
|
| 248 |
+
title: "7. Your Rights",
|
| 249 |
+
content: (
|
| 250 |
+
<>
|
| 251 |
+
<p>You have the following rights regarding your data:</p>
|
| 252 |
+
<ul>
|
| 253 |
+
<li>
|
| 254 |
+
<strong>Access:</strong> View all documents and data associated with your account at
|
| 255 |
+
any time from your dashboard.
|
| 256 |
+
</li>
|
| 257 |
+
<li>
|
| 258 |
+
<strong>Deletion:</strong> Delete individual documents or your entire account and
|
| 259 |
+
associated data.
|
| 260 |
+
</li>
|
| 261 |
+
<li>
|
| 262 |
+
<strong>Export:</strong> Request a copy of your data in a machine-readable format.
|
| 263 |
+
</li>
|
| 264 |
+
<li>
|
| 265 |
+
<strong>Correction:</strong> Update your account information (username, email) from
|
| 266 |
+
your profile settings.
|
| 267 |
+
</li>
|
| 268 |
+
<li>
|
| 269 |
+
<strong>Withdraw consent:</strong> Stop using the service and delete your account at
|
| 270 |
+
any time.
|
| 271 |
+
</li>
|
| 272 |
+
</ul>
|
| 273 |
+
<p>
|
| 274 |
+
To exercise any of these rights, please contact us using the information in the
|
| 275 |
+
“Contact” section below.
|
| 276 |
+
</p>
|
| 277 |
+
</>
|
| 278 |
+
),
|
| 279 |
+
},
|
| 280 |
+
{
|
| 281 |
+
id: "changes",
|
| 282 |
+
icon: Shield,
|
| 283 |
+
title: "8. Changes to This Policy",
|
| 284 |
+
content: (
|
| 285 |
+
<>
|
| 286 |
+
<p>
|
| 287 |
+
We may update this Privacy Policy from time to time. Changes will be communicated by:
|
| 288 |
+
</p>
|
| 289 |
+
<ul>
|
| 290 |
+
<li>Posting the updated policy on this page with a new “Last updated” date.</li>
|
| 291 |
+
<li>
|
| 292 |
+
Sending a notification to your registered email address for material changes.
|
| 293 |
+
</li>
|
| 294 |
+
</ul>
|
| 295 |
+
<p>
|
| 296 |
+
Your continued use of the platform after changes constitutes acceptance of the updated
|
| 297 |
+
policy. We encourage you to review this page periodically.
|
| 298 |
+
</p>
|
| 299 |
+
</>
|
| 300 |
+
),
|
| 301 |
+
},
|
| 302 |
+
{
|
| 303 |
+
id: "contact",
|
| 304 |
+
icon: Mail,
|
| 305 |
+
title: "9. Contact Us",
|
| 306 |
+
content: (
|
| 307 |
+
<>
|
| 308 |
+
<p>
|
| 309 |
+
If you have any questions, concerns, or requests regarding this Privacy Policy or your
|
| 310 |
+
data, please reach out through the project’s official channels:
|
| 311 |
+
</p>
|
| 312 |
+
<ul>
|
| 313 |
+
<li>
|
| 314 |
+
<strong>GitHub Issues:</strong>{" "}
|
| 315 |
+
<a
|
| 316 |
+
href="https://github.com/param20h/PDF-Assistant-RAG/issues"
|
| 317 |
+
target="_blank"
|
| 318 |
+
rel="noopener noreferrer"
|
| 319 |
+
>
|
| 320 |
+
github.com/param20h/PDF-Assistant-RAG/issues
|
| 321 |
+
</a>
|
| 322 |
+
</li>
|
| 323 |
+
<li>
|
| 324 |
+
<strong>GitHub Discussions:</strong>{" "}
|
| 325 |
+
<a
|
| 326 |
+
href="https://github.com/param20h/PDF-Assistant-RAG/discussions"
|
| 327 |
+
target="_blank"
|
| 328 |
+
rel="noopener noreferrer"
|
| 329 |
+
>
|
| 330 |
+
github.com/param20h/PDF-Assistant-RAG/discussions
|
| 331 |
+
</a>
|
| 332 |
+
</li>
|
| 333 |
+
<li>
|
| 334 |
+
<strong>LinkedIn:</strong>{" "}
|
| 335 |
+
<a
|
| 336 |
+
href="https://www.linkedin.com/in/param20h/"
|
| 337 |
+
target="_blank"
|
| 338 |
+
rel="noopener noreferrer"
|
| 339 |
+
>
|
| 340 |
+
linkedin.com/in/param20h
|
| 341 |
+
</a>
|
| 342 |
+
</li>
|
| 343 |
+
</ul>
|
| 344 |
+
</>
|
| 345 |
+
),
|
| 346 |
+
},
|
| 347 |
+
];
|
| 348 |
+
|
| 349 |
+
export default function PrivacyPage() {
|
| 350 |
+
return (
|
| 351 |
+
<div className="min-h-screen bg-background">
|
| 352 |
+
{/* ββ Header ββββββββββββββββββββββββββββββββββββββ */}
|
| 353 |
+
<header className="sticky top-0 z-50 border-b border-border/50 bg-card/50 backdrop-blur-md">
|
| 354 |
+
<div className="mx-auto max-w-4xl flex items-center justify-between px-6 h-14">
|
| 355 |
+
<Link
|
| 356 |
+
href="/"
|
| 357 |
+
className="flex items-center gap-2 text-sm text-muted-foreground hover:text-foreground transition-colors"
|
| 358 |
+
>
|
| 359 |
+
<ArrowLeft className="w-4 h-4" />
|
| 360 |
+
Back to Home
|
| 361 |
+
</Link>
|
| 362 |
+
<div className="flex items-center gap-2">
|
| 363 |
+
<div className="w-7 h-7 rounded-lg bg-primary/15 flex items-center justify-center">
|
| 364 |
+
<Shield className="w-4 h-4 text-primary" />
|
| 365 |
+
</div>
|
| 366 |
+
<span className="font-semibold text-sm">Privacy Policy</span>
|
| 367 |
+
</div>
|
| 368 |
+
</div>
|
| 369 |
+
</header>
|
| 370 |
+
|
| 371 |
+
{/* ββ Hero ββββββββββββββββββββββββββββββββββββββββ */}
|
| 372 |
+
<section className="border-b border-border/50">
|
| 373 |
+
<div className="mx-auto max-w-4xl px-6 py-16 sm:py-20 text-center">
|
| 374 |
+
<div className="inline-flex items-center gap-2 px-4 py-1.5 rounded-full bg-primary/10 border border-primary/20 text-sm text-primary mb-6">
|
| 375 |
+
<Shield className="w-4 h-4" />
|
| 376 |
+
Your data matters
|
| 377 |
+
</div>
|
| 378 |
+
<h1 className="text-4xl sm:text-5xl font-bold tracking-tight mb-4">
|
| 379 |
+
Privacy Policy
|
| 380 |
+
</h1>
|
| 381 |
+
<p className="text-lg text-muted-foreground max-w-2xl mx-auto">
|
| 382 |
+
How we collect, use, and protect your data when you use PDF-Assistant-RAG.
|
| 383 |
+
</p>
|
| 384 |
+
<p className="mt-4 text-sm text-muted-foreground">
|
| 385 |
+
<em>Last updated: May 30, 2026</em>
|
| 386 |
+
</p>
|
| 387 |
+
</div>
|
| 388 |
+
</section>
|
| 389 |
+
|
| 390 |
+
{/* ββ Content βββββββββββββββββββββββββββββββββββββ */}
|
| 391 |
+
<div className="mx-auto max-w-4xl px-6 py-12 sm:py-16">
|
| 392 |
+
{/* Table of Contents */}
|
| 393 |
+
<nav className="mb-12 p-6 rounded-xl border border-border/50 bg-card/30" aria-label="Table of contents">
|
| 394 |
+
<h2 className="text-sm font-semibold uppercase tracking-wider text-muted-foreground mb-4">
|
| 395 |
+
On this page
|
| 396 |
+
</h2>
|
| 397 |
+
<ul className="space-y-2">
|
| 398 |
+
{sections.map((section) => (
|
| 399 |
+
<li key={section.id}>
|
| 400 |
+
<a
|
| 401 |
+
href={`#${section.id}`}
|
| 402 |
+
className="flex items-center gap-2 text-sm text-muted-foreground hover:text-foreground transition-colors"
|
| 403 |
+
>
|
| 404 |
+
<section.icon className="w-3.5 h-3.5 shrink-0 text-primary" />
|
| 405 |
+
{section.title}
|
| 406 |
+
</a>
|
| 407 |
+
</li>
|
| 408 |
+
))}
|
| 409 |
+
</ul>
|
| 410 |
+
</nav>
|
| 411 |
+
|
| 412 |
+
{/* Sections */}
|
| 413 |
+
<div className="prose prose-sm sm:prose-base dark:prose-invert max-w-none prose-headings:font-semibold prose-headings:tracking-tight prose-h2:text-foreground prose-h3:text-foreground prose-p:text-muted-foreground prose-p:leading-relaxed prose-a:text-primary prose-a:no-underline hover:prose-a:underline prose-strong:text-foreground prose-li:text-muted-foreground prose-li:marker:text-primary/60">
|
| 414 |
+
{sections.map((section) => (
|
| 415 |
+
<section key={section.id} id={section.id} className="mb-12 scroll-mt-20">
|
| 416 |
+
<div className="flex items-center gap-3 mb-6">
|
| 417 |
+
<div className="w-8 h-8 rounded-lg bg-primary/10 flex items-center justify-center shrink-0">
|
| 418 |
+
<section.icon className="w-4 h-4 text-primary" />
|
| 419 |
+
</div>
|
| 420 |
+
<h2 className="text-xl sm:text-2xl !my-0">{section.title}</h2>
|
| 421 |
+
</div>
|
| 422 |
+
{section.content}
|
| 423 |
+
<hr className="mt-8 border-border/30" />
|
| 424 |
+
</section>
|
| 425 |
+
))}
|
| 426 |
+
</div>
|
| 427 |
+
|
| 428 |
+
{/* Footer note */}
|
| 429 |
+
<div className="mt-8 text-center">
|
| 430 |
+
<p className="text-sm text-muted-foreground">
|
| 431 |
+
Have questions?{" "}
|
| 432 |
+
<a
|
| 433 |
+
href="https://github.com/param20h/PDF-Assistant-RAG/discussions"
|
| 434 |
+
target="_blank"
|
| 435 |
+
rel="noopener noreferrer"
|
| 436 |
+
className="text-primary hover:underline"
|
| 437 |
+
>
|
| 438 |
+
Start a discussion
|
| 439 |
+
</a>
|
| 440 |
+
</p>
|
| 441 |
+
</div>
|
| 442 |
+
</div>
|
| 443 |
+
|
| 444 |
+
{/* ββ Footer ββββββββββββββββββββββββββββββββββ */}
|
| 445 |
+
<footer className="text-center py-6 text-xs text-muted-foreground border-t border-border/50">
|
| 446 |
+
Built with FastAPI β’ LangChain β’ ChromaDB β’ HuggingFace β’ Next.js
|
| 447 |
+
</footer>
|
| 448 |
+
</div>
|
| 449 |
+
);
|
| 450 |
+
}
|
frontend/src/app/terms/page.tsx
ADDED
|
@@ -0,0 +1,435 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import type { Metadata } from "next";
|
| 2 |
+
import Link from "next/link";
|
| 3 |
+
import {
|
| 4 |
+
ArrowLeft,
|
| 5 |
+
Shield,
|
| 6 |
+
CheckCircle,
|
| 7 |
+
FileText,
|
| 8 |
+
AlertTriangle,
|
| 9 |
+
UserCheck,
|
| 10 |
+
Scale,
|
| 11 |
+
Ban,
|
| 12 |
+
RefreshCw,
|
| 13 |
+
Mail,
|
| 14 |
+
} from "lucide-react";
|
| 15 |
+
|
| 16 |
+
const sections = [
|
| 17 |
+
{
|
| 18 |
+
id: "acceptance",
|
| 19 |
+
icon: CheckCircle,
|
| 20 |
+
title: "1. Acceptance of Terms",
|
| 21 |
+
content: (
|
| 22 |
+
<>
|
| 23 |
+
<p>
|
| 24 |
+
By accessing or using PDF-Assistant-RAG (“the Platform”), you agree to be
|
| 25 |
+
bound by these Terms of Service (“Terms”). If you do not agree to all terms,
|
| 26 |
+
you must not use the Platform.
|
| 27 |
+
</p>
|
| 28 |
+
<p>
|
| 29 |
+
These Terms apply to all visitors, users, and contributors to the Platform. By creating
|
| 30 |
+
an account, uploading documents, or interacting with the service in any way, you signify
|
| 31 |
+
your acceptance of these Terms.
|
| 32 |
+
</p>
|
| 33 |
+
</>
|
| 34 |
+
),
|
| 35 |
+
},
|
| 36 |
+
{
|
| 37 |
+
id: "service-description",
|
| 38 |
+
icon: FileText,
|
| 39 |
+
title: "2. Description of Service",
|
| 40 |
+
content: (
|
| 41 |
+
<>
|
| 42 |
+
<p>
|
| 43 |
+
PDF-Assistant-RAG is an open-source document analysis platform that allows users to upload
|
| 44 |
+
documents (PDF, DOCX, TXT, Markdown) and interact with them through AI-powered semantic
|
| 45 |
+
search and chat, using Retrieval-Augmented Generation (RAG) and open-source large language
|
| 46 |
+
models (LLMs).
|
| 47 |
+
</p>
|
| 48 |
+
<p>The core features include:</p>
|
| 49 |
+
<ul>
|
| 50 |
+
<li>Document upload, storage, and management</li>
|
| 51 |
+
<li>AI-powered question answering and document analysis</li>
|
| 52 |
+
<li>Semantic search across uploaded documents</li>
|
| 53 |
+
<li>Conversation history and context retention</li>
|
| 54 |
+
<li>Multi-language support (English, Hindi, Spanish, French)</li>
|
| 55 |
+
</ul>
|
| 56 |
+
<p>
|
| 57 |
+
The Platform is provided “as is” and “as available” for
|
| 58 |
+
educational and productivity purposes. The maintainers make no guarantees about the
|
| 59 |
+
accuracy, completeness, or reliability of AI-generated responses.
|
| 60 |
+
</p>
|
| 61 |
+
</>
|
| 62 |
+
),
|
| 63 |
+
},
|
| 64 |
+
{
|
| 65 |
+
id: "accounts",
|
| 66 |
+
icon: UserCheck,
|
| 67 |
+
title: "3. User Accounts & Registration",
|
| 68 |
+
content: (
|
| 69 |
+
<>
|
| 70 |
+
<p>To use certain features of the Platform, you must register for an account:</p>
|
| 71 |
+
<ul>
|
| 72 |
+
<li>
|
| 73 |
+
<strong>Accuracy:</strong> You agree to provide accurate, current, and complete
|
| 74 |
+
information during registration and to update it as necessary.
|
| 75 |
+
</li>
|
| 76 |
+
<li>
|
| 77 |
+
<strong>Security:</strong> You are responsible for safeguarding your password and for
|
| 78 |
+
all activities under your account. Notify the maintainers immediately of any
|
| 79 |
+
unauthorized use.
|
| 80 |
+
</li>
|
| 81 |
+
<li>
|
| 82 |
+
<strong>Account types:</strong> The Platform supports email/password registration and
|
| 83 |
+
optional Google OAuth sign-in.
|
| 84 |
+
</li>
|
| 85 |
+
<li>
|
| 86 |
+
<strong>One account per person:</strong> You may not create multiple accounts for the
|
| 87 |
+
same individual unless explicitly permitted.
|
| 88 |
+
</li>
|
| 89 |
+
<li>
|
| 90 |
+
<strong>No shared accounts:</strong> Account sharing with unauthorized users is
|
| 91 |
+
prohibited.
|
| 92 |
+
</li>
|
| 93 |
+
</ul>
|
| 94 |
+
</>
|
| 95 |
+
),
|
| 96 |
+
},
|
| 97 |
+
{
|
| 98 |
+
id: "acceptable-use",
|
| 99 |
+
icon: Ban,
|
| 100 |
+
title: "4. Acceptable Use",
|
| 101 |
+
content: (
|
| 102 |
+
<>
|
| 103 |
+
<p>You agree to use the Platform only for lawful purposes and in accordance with these Terms. Prohibited activities include:</p>
|
| 104 |
+
<ul>
|
| 105 |
+
<li>
|
| 106 |
+
Uploading malware, viruses, or any malicious code
|
| 107 |
+
</li>
|
| 108 |
+
<li>
|
| 109 |
+
Uploading illegal, obscene, defamatory, or infringing content
|
| 110 |
+
</li>
|
| 111 |
+
<li>
|
| 112 |
+
Attempting to bypass authentication, access other users' data, or exploit the
|
| 113 |
+
system
|
| 114 |
+
</li>
|
| 115 |
+
<li>
|
| 116 |
+
Using the Platform for automated scraping, data mining, or high-volume API abuse
|
| 117 |
+
</li>
|
| 118 |
+
<li>
|
| 119 |
+
Reverse-engineering, decompiling, or attempting to extract the source code of
|
| 120 |
+
proprietary components
|
| 121 |
+
</li>
|
| 122 |
+
<li>
|
| 123 |
+
Interfering with the operation of the Platform or its underlying infrastructure
|
| 124 |
+
</li>
|
| 125 |
+
</ul>
|
| 126 |
+
</>
|
| 127 |
+
),
|
| 128 |
+
},
|
| 129 |
+
{
|
| 130 |
+
id: "content-data",
|
| 131 |
+
icon: Shield,
|
| 132 |
+
title: "5. Uploaded Content & Data",
|
| 133 |
+
content: (
|
| 134 |
+
<>
|
| 135 |
+
<p>
|
| 136 |
+
You retain full ownership of all documents and content you upload to the Platform
|
| 137 |
+
(“Your Content”). By uploading, you grant the Platform a limited, temporary
|
| 138 |
+
license to process, store, and analyze Your Content solely for the purpose of providing
|
| 139 |
+
the service.
|
| 140 |
+
</p>
|
| 141 |
+
<h3>Data Handling</h3>
|
| 142 |
+
<ul>
|
| 143 |
+
<li>
|
| 144 |
+
Your documents are processed by open-source LLMs hosted on HuggingFace. Document
|
| 145 |
+
snippets may be sent for inference but are not stored or used for training.
|
| 146 |
+
</li>
|
| 147 |
+
<li>
|
| 148 |
+
Document embeddings are stored in per-user isolated vector collections (ChromaDB).
|
| 149 |
+
</li>
|
| 150 |
+
<li>
|
| 151 |
+
Chat history is stored per session to maintain conversation context.
|
| 152 |
+
</li>
|
| 153 |
+
</ul>
|
| 154 |
+
<h3>Your Responsibilities</h3>
|
| 155 |
+
<ul>
|
| 156 |
+
<li>
|
| 157 |
+
You represent that you own or have the necessary rights to upload and process Your
|
| 158 |
+
Content.
|
| 159 |
+
</li>
|
| 160 |
+
<li>
|
| 161 |
+
You must not upload documents containing sensitive personal information, trade secrets,
|
| 162 |
+
or classified data unless you have the legal right to do so.
|
| 163 |
+
</li>
|
| 164 |
+
<li>
|
| 165 |
+
You are solely responsible for the legality, reliability, and accuracy of Your Content.
|
| 166 |
+
</li>
|
| 167 |
+
</ul>
|
| 168 |
+
<p>
|
| 169 |
+
See our{" "}
|
| 170 |
+
<Link href="/privacy" className="text-primary hover:underline">
|
| 171 |
+
Privacy Policy
|
| 172 |
+
</Link>{" "}
|
| 173 |
+
for more details on how we handle your data.
|
| 174 |
+
</p>
|
| 175 |
+
</>
|
| 176 |
+
),
|
| 177 |
+
},
|
| 178 |
+
{
|
| 179 |
+
id: "intellectual-property",
|
| 180 |
+
icon: Scale,
|
| 181 |
+
title: "6. Intellectual Property",
|
| 182 |
+
content: (
|
| 183 |
+
<>
|
| 184 |
+
<p>
|
| 185 |
+
The Platform codebase is open-source and licensed under the{" "}
|
| 186 |
+
<a
|
| 187 |
+
href="https://opensource.org/licenses/MIT"
|
| 188 |
+
target="_blank"
|
| 189 |
+
rel="noopener noreferrer"
|
| 190 |
+
>
|
| 191 |
+
MIT License
|
| 192 |
+
</a>. This means:
|
| 193 |
+
</p>
|
| 194 |
+
<ul>
|
| 195 |
+
<li>
|
| 196 |
+
You may freely use, modify, and distribute the source code, subject to the terms of
|
| 197 |
+
the MIT License.
|
| 198 |
+
</li>
|
| 199 |
+
<li>
|
| 200 |
+
The name “PDF-Assistant-RAG,” its logo, and branding elements may not be
|
| 201 |
+
used without explicit permission.
|
| 202 |
+
</li>
|
| 203 |
+
<li>
|
| 204 |
+
AI-generated responses produced by the Platform are provided without warranty and
|
| 205 |
+
should not be considered professional advice (legal, financial, medical, etc.).
|
| 206 |
+
</li>
|
| 207 |
+
</ul>
|
| 208 |
+
</>
|
| 209 |
+
),
|
| 210 |
+
},
|
| 211 |
+
{
|
| 212 |
+
id: "liability",
|
| 213 |
+
icon: AlertTriangle,
|
| 214 |
+
title: "7. Limitation of Liability",
|
| 215 |
+
content: (
|
| 216 |
+
<>
|
| 217 |
+
<p>
|
| 218 |
+
The Platform is provided free of charge as an open-source project. To the fullest extent
|
| 219 |
+
permitted by law:
|
| 220 |
+
</p>
|
| 221 |
+
<ul>
|
| 222 |
+
<li>
|
| 223 |
+
The maintainers shall not be liable for any indirect, incidental, special,
|
| 224 |
+
consequential, or punitive damages arising from your use of the Platform.
|
| 225 |
+
</li>
|
| 226 |
+
<li>
|
| 227 |
+
AI-generated content may contain errors, omissions, or inaccuracies. You should
|
| 228 |
+
independently verify critical information.
|
| 229 |
+
</li>
|
| 230 |
+
<li>
|
| 231 |
+
The Platform makes no guarantees about uptime, availability, or data durability,
|
| 232 |
+
though reasonable efforts are made to maintain the service.
|
| 233 |
+
</li>
|
| 234 |
+
</ul>
|
| 235 |
+
</>
|
| 236 |
+
),
|
| 237 |
+
},
|
| 238 |
+
{
|
| 239 |
+
id: "termination",
|
| 240 |
+
icon: Ban,
|
| 241 |
+
title: "8. Termination",
|
| 242 |
+
content: (
|
| 243 |
+
<>
|
| 244 |
+
<p>
|
| 245 |
+
We reserve the right to suspend or terminate your access to the Platform at any time,
|
| 246 |
+
without prior notice, for:
|
| 247 |
+
</p>
|
| 248 |
+
<ul>
|
| 249 |
+
<li>Violation of these Terms of Service</li>
|
| 250 |
+
<li>Engaging in prohibited or illegal activities</li>
|
| 251 |
+
<li>Extended inactivity of your account</li>
|
| 252 |
+
<li>At your request via account deletion</li>
|
| 253 |
+
</ul>
|
| 254 |
+
<p>
|
| 255 |
+
Upon termination, your access to documents, chat history, and account data will be
|
| 256 |
+
revoked. You may request a data export before account deletion by contacting the
|
| 257 |
+
maintainers.
|
| 258 |
+
</p>
|
| 259 |
+
</>
|
| 260 |
+
),
|
| 261 |
+
},
|
| 262 |
+
{
|
| 263 |
+
id: "changes-to-terms",
|
| 264 |
+
icon: RefreshCw,
|
| 265 |
+
title: "9. Changes to These Terms",
|
| 266 |
+
content: (
|
| 267 |
+
<>
|
| 268 |
+
<p>
|
| 269 |
+
We may revise these Terms from time to time. The most current version will always be
|
| 270 |
+
posted on this page. Material changes will be communicated via:
|
| 271 |
+
</p>
|
| 272 |
+
<ul>
|
| 273 |
+
<li>A notice on the Platform dashboard</li>
|
| 274 |
+
<li>Email notification to registered users (for significant changes)</li>
|
| 275 |
+
</ul>
|
| 276 |
+
<p>
|
| 277 |
+
Your continued use of the Platform after changes take effect constitutes acceptance of
|
| 278 |
+
the revised Terms.
|
| 279 |
+
</p>
|
| 280 |
+
</>
|
| 281 |
+
),
|
| 282 |
+
},
|
| 283 |
+
{
|
| 284 |
+
id: "contact",
|
| 285 |
+
icon: Mail,
|
| 286 |
+
title: "10. Contact Us",
|
| 287 |
+
content: (
|
| 288 |
+
<>
|
| 289 |
+
<p>
|
| 290 |
+
If you have any questions about these Terms, please reach out through the project’s
|
| 291 |
+
official channels:
|
| 292 |
+
</p>
|
| 293 |
+
<ul>
|
| 294 |
+
<li>
|
| 295 |
+
<strong>GitHub Issues:</strong>{" "}
|
| 296 |
+
<a
|
| 297 |
+
href="https://github.com/param20h/PDF-Assistant-RAG/issues"
|
| 298 |
+
target="_blank"
|
| 299 |
+
rel="noopener noreferrer"
|
| 300 |
+
>
|
| 301 |
+
github.com/param20h/PDF-Assistant-RAG/issues
|
| 302 |
+
</a>
|
| 303 |
+
</li>
|
| 304 |
+
<li>
|
| 305 |
+
<strong>GitHub Discussions:</strong>{" "}
|
| 306 |
+
<a
|
| 307 |
+
href="https://github.com/param20h/PDF-Assistant-RAG/discussions"
|
| 308 |
+
target="_blank"
|
| 309 |
+
rel="noopener noreferrer"
|
| 310 |
+
>
|
| 311 |
+
github.com/param20h/PDF-Assistant-RAG/discussions
|
| 312 |
+
</a>
|
| 313 |
+
</li>
|
| 314 |
+
<li>
|
| 315 |
+
<strong>LinkedIn:</strong>{" "}
|
| 316 |
+
<a
|
| 317 |
+
href="https://www.linkedin.com/in/param20h/"
|
| 318 |
+
target="_blank"
|
| 319 |
+
rel="noopener noreferrer"
|
| 320 |
+
>
|
| 321 |
+
linkedin.com/in/param20h
|
| 322 |
+
</a>
|
| 323 |
+
</li>
|
| 324 |
+
</ul>
|
| 325 |
+
</>
|
| 326 |
+
),
|
| 327 |
+
},
|
| 328 |
+
];
|
| 329 |
+
|
| 330 |
+
export default function TermsPage() {
|
| 331 |
+
return (
|
| 332 |
+
<div className="min-h-screen bg-background">
|
| 333 |
+
{/* ββ Header ββββββββββββββββββββββββββββββββββββββ */}
|
| 334 |
+
<header className="sticky top-0 z-50 border-b border-border/50 bg-card/50 backdrop-blur-md">
|
| 335 |
+
<div className="mx-auto max-w-4xl flex items-center justify-between px-6 h-14">
|
| 336 |
+
<Link
|
| 337 |
+
href="/"
|
| 338 |
+
className="flex items-center gap-2 text-sm text-muted-foreground hover:text-foreground transition-colors"
|
| 339 |
+
>
|
| 340 |
+
<ArrowLeft className="w-4 h-4" />
|
| 341 |
+
Back to Home
|
| 342 |
+
</Link>
|
| 343 |
+
<div className="flex items-center gap-2">
|
| 344 |
+
<div className="w-7 h-7 rounded-lg bg-primary/15 flex items-center justify-center">
|
| 345 |
+
<Scale className="w-4 h-4 text-primary" />
|
| 346 |
+
</div>
|
| 347 |
+
<span className="font-semibold text-sm">Terms of Service</span>
|
| 348 |
+
</div>
|
| 349 |
+
</div>
|
| 350 |
+
</header>
|
| 351 |
+
|
| 352 |
+
{/* ββ Hero ββββββββββββββββββββββββββββββββββββββββ */}
|
| 353 |
+
<section className="border-b border-border/50">
|
| 354 |
+
<div className="mx-auto max-w-4xl px-6 py-16 sm:py-20 text-center">
|
| 355 |
+
<div className="inline-flex items-center gap-2 px-4 py-1.5 rounded-full bg-primary/10 border border-primary/20 text-sm text-primary mb-6">
|
| 356 |
+
<Scale className="w-4 h-4" />
|
| 357 |
+
Know your rights
|
| 358 |
+
</div>
|
| 359 |
+
<h1 className="text-4xl sm:text-5xl font-bold tracking-tight mb-4">
|
| 360 |
+
Terms of Service
|
| 361 |
+
</h1>
|
| 362 |
+
<p className="text-lg text-muted-foreground max-w-2xl mx-auto">
|
| 363 |
+
The rules and guidelines for using PDF-Assistant-RAG, our open-source document
|
| 364 |
+
analysis platform.
|
| 365 |
+
</p>
|
| 366 |
+
<p className="mt-4 text-sm text-muted-foreground">
|
| 367 |
+
<em>Last updated: May 30, 2026</em>
|
| 368 |
+
</p>
|
| 369 |
+
</div>
|
| 370 |
+
</section>
|
| 371 |
+
|
| 372 |
+
{/* ββ Content βββββββββββββββββββββββββββββββββββββ */}
|
| 373 |
+
<div className="mx-auto max-w-4xl px-6 py-12 sm:py-16">
|
| 374 |
+
{/* Table of Contents */}
|
| 375 |
+
<nav
|
| 376 |
+
className="mb-12 p-6 rounded-xl border border-border/50 bg-card/30"
|
| 377 |
+
aria-label="Table of contents"
|
| 378 |
+
>
|
| 379 |
+
<h2 className="text-sm font-semibold uppercase tracking-wider text-muted-foreground mb-4">
|
| 380 |
+
On this page
|
| 381 |
+
</h2>
|
| 382 |
+
<ul className="space-y-2">
|
| 383 |
+
{sections.map((section) => (
|
| 384 |
+
<li key={section.id}>
|
| 385 |
+
<a
|
| 386 |
+
href={`#${section.id}`}
|
| 387 |
+
className="flex items-center gap-2 text-sm text-muted-foreground hover:text-foreground transition-colors"
|
| 388 |
+
>
|
| 389 |
+
<section.icon className="w-3.5 h-3.5 shrink-0 text-primary" />
|
| 390 |
+
{section.title}
|
| 391 |
+
</a>
|
| 392 |
+
</li>
|
| 393 |
+
))}
|
| 394 |
+
</ul>
|
| 395 |
+
</nav>
|
| 396 |
+
|
| 397 |
+
{/* Sections */}
|
| 398 |
+
<div className="prose prose-sm sm:prose-base dark:prose-invert max-w-none prose-headings:font-semibold prose-headings:tracking-tight prose-h2:text-foreground prose-h3:text-foreground prose-p:text-muted-foreground prose-p:leading-relaxed prose-a:text-primary prose-a:no-underline hover:prose-a:underline prose-strong:text-foreground prose-li:text-muted-foreground prose-li:marker:text-primary/60">
|
| 399 |
+
{sections.map((section) => (
|
| 400 |
+
<section key={section.id} id={section.id} className="mb-12 scroll-mt-20">
|
| 401 |
+
<div className="flex items-center gap-3 mb-6">
|
| 402 |
+
<div className="w-8 h-8 rounded-lg bg-primary/10 flex items-center justify-center shrink-0">
|
| 403 |
+
<section.icon className="w-4 h-4 text-primary" />
|
| 404 |
+
</div>
|
| 405 |
+
<h2 className="text-xl sm:text-2xl !my-0">{section.title}</h2>
|
| 406 |
+
</div>
|
| 407 |
+
{section.content}
|
| 408 |
+
<hr className="mt-8 border-border/30" />
|
| 409 |
+
</section>
|
| 410 |
+
))}
|
| 411 |
+
</div>
|
| 412 |
+
|
| 413 |
+
{/* Footer note */}
|
| 414 |
+
<div className="mt-8 text-center">
|
| 415 |
+
<p className="text-sm text-muted-foreground">
|
| 416 |
+
Have questions?{" "}
|
| 417 |
+
<a
|
| 418 |
+
href="https://github.com/param20h/PDF-Assistant-RAG/discussions"
|
| 419 |
+
target="_blank"
|
| 420 |
+
rel="noopener noreferrer"
|
| 421 |
+
className="text-primary hover:underline"
|
| 422 |
+
>
|
| 423 |
+
Start a discussion
|
| 424 |
+
</a>
|
| 425 |
+
</p>
|
| 426 |
+
</div>
|
| 427 |
+
</div>
|
| 428 |
+
|
| 429 |
+
{/* ββ Footer ββββββββββββββββββββββββββββββββββ */}
|
| 430 |
+
<footer className="text-center py-6 text-xs text-muted-foreground border-t border-border/50">
|
| 431 |
+
Built with FastAPI • LangChain • ChromaDB • HuggingFace • Next.js
|
| 432 |
+
</footer>
|
| 433 |
+
</div>
|
| 434 |
+
);
|
| 435 |
+
}
|
frontend/src/components/document/PDFViewer.tsx
CHANGED
|
@@ -3,8 +3,16 @@
|
|
| 3 |
import { useState } from "react";
|
| 4 |
import { Button } from "@/components/ui/button";
|
| 5 |
import { Input } from "@/components/ui/input";
|
| 6 |
-
import { ChevronLeft, ChevronRight, ZoomIn, ZoomOut, Loader2 } from "lucide-react";
|
| 7 |
import { API_BASE } from "@/lib/api";
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 8 |
|
| 9 |
interface Props {
|
| 10 |
documentId: string;
|
|
@@ -15,15 +23,24 @@ interface Props {
|
|
| 15 |
|
| 16 |
export default function PDFViewer({ documentId, currentPage, onPageChange, totalPages }: Props) {
|
| 17 |
const [scale, setScale] = useState(1.0);
|
| 18 |
-
const [
|
| 19 |
-
// Local editable value β initialized from currentPage prop.
|
| 20 |
-
// The iframe key={documentId-currentPage} already forces remount on
|
| 21 |
-
// external page changes, so no useEffect sync is needed.
|
| 22 |
const [pageInput, setPageInput] = useState(String(currentPage));
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 23 |
const pdfUrl = `${API_BASE}/api/v1/documents/${documentId}/pdf`;
|
|
|
|
| 24 |
|
| 25 |
-
//
|
| 26 |
-
const
|
|
|
|
|
|
|
|
|
|
| 27 |
|
| 28 |
const handlePageSubmit = (e: React.FormEvent) => {
|
| 29 |
e.preventDefault();
|
|
@@ -31,12 +48,10 @@ export default function PDFViewer({ documentId, currentPage, onPageChange, total
|
|
| 31 |
if (!isNaN(num) && num >= 1 && num <= totalPages) {
|
| 32 |
onPageChange(num);
|
| 33 |
} else {
|
| 34 |
-
// Reset to the current valid page without needing a useEffect
|
| 35 |
setPageInput(String(currentPage));
|
| 36 |
}
|
| 37 |
};
|
| 38 |
|
| 39 |
-
|
| 40 |
return (
|
| 41 |
<div className="h-full flex flex-col bg-background">
|
| 42 |
{/* ββ Toolbar βββββββββββββββββββββββββββββββββββ */}
|
|
@@ -46,7 +61,11 @@ export default function PDFViewer({ documentId, currentPage, onPageChange, total
|
|
| 46 |
variant="ghost"
|
| 47 |
size="icon"
|
| 48 |
className="h-7 w-7"
|
| 49 |
-
onClick={() =>
|
|
|
|
|
|
|
|
|
|
|
|
|
| 50 |
disabled={currentPage <= 1}
|
| 51 |
>
|
| 52 |
<ChevronLeft className="w-4 h-4" />
|
|
@@ -68,7 +87,11 @@ export default function PDFViewer({ documentId, currentPage, onPageChange, total
|
|
| 68 |
variant="ghost"
|
| 69 |
size="icon"
|
| 70 |
className="h-7 w-7"
|
| 71 |
-
onClick={() =>
|
|
|
|
|
|
|
|
|
|
|
|
|
| 72 |
disabled={currentPage >= totalPages}
|
| 73 |
>
|
| 74 |
<ChevronRight className="w-4 h-4" />
|
|
@@ -99,20 +122,50 @@ export default function PDFViewer({ documentId, currentPage, onPageChange, total
|
|
| 99 |
</div>
|
| 100 |
|
| 101 |
{/* ββ PDF Render ββββββββββββββββββββββββββββββββ */}
|
| 102 |
-
<div className="flex-1 overflow-auto relative">
|
| 103 |
-
|
| 104 |
-
|
| 105 |
-
|
| 106 |
-
|
| 107 |
-
|
| 108 |
-
|
| 109 |
-
|
| 110 |
-
|
| 111 |
-
|
| 112 |
-
|
| 113 |
-
|
| 114 |
-
|
| 115 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 116 |
</div>
|
| 117 |
</div>
|
| 118 |
);
|
|
|
|
| 3 |
import { useState } from "react";
|
| 4 |
import { Button } from "@/components/ui/button";
|
| 5 |
import { Input } from "@/components/ui/input";
|
| 6 |
+
import { ChevronLeft, ChevronRight, ZoomIn, ZoomOut, Loader2, AlertCircle } from "lucide-react";
|
| 7 |
import { API_BASE } from "@/lib/api";
|
| 8 |
+
import { Document, Page, pdfjs } from "react-pdf";
|
| 9 |
+
|
| 10 |
+
// Import styles for react-pdf layers
|
| 11 |
+
import "react-pdf/dist/Page/AnnotationLayer.css";
|
| 12 |
+
import "react-pdf/dist/Page/TextLayer.css";
|
| 13 |
+
|
| 14 |
+
// Configure PDF.js worker using standard unpkg URL
|
| 15 |
+
pdfjs.GlobalWorkerOptions.workerSrc = `//unpkg.com/pdfjs-dist@${pdfjs.version}/build/pdf.worker.min.mjs`;
|
| 16 |
|
| 17 |
interface Props {
|
| 18 |
documentId: string;
|
|
|
|
| 23 |
|
| 24 |
export default function PDFViewer({ documentId, currentPage, onPageChange, totalPages }: Props) {
|
| 25 |
const [scale, setScale] = useState(1.0);
|
| 26 |
+
const [, setLoading] = useState(true);
|
|
|
|
|
|
|
|
|
|
| 27 |
const [pageInput, setPageInput] = useState(String(currentPage));
|
| 28 |
+
const [prevCurrentPage, setPrevCurrentPage] = useState(currentPage);
|
| 29 |
+
|
| 30 |
+
// Sync page input state with current page prop updates during render phase
|
| 31 |
+
if (currentPage !== prevCurrentPage) {
|
| 32 |
+
setPrevCurrentPage(currentPage);
|
| 33 |
+
setPageInput(String(currentPage));
|
| 34 |
+
}
|
| 35 |
+
|
| 36 |
const pdfUrl = `${API_BASE}/api/v1/documents/${documentId}/pdf`;
|
| 37 |
+
const token = typeof window !== "undefined" ? localStorage.getItem("token") : null;
|
| 38 |
|
| 39 |
+
// Configure file object with Authorization headers
|
| 40 |
+
const fileConfig = {
|
| 41 |
+
url: pdfUrl,
|
| 42 |
+
httpHeaders: token ? { Authorization: `Bearer ${token}` } : undefined,
|
| 43 |
+
};
|
| 44 |
|
| 45 |
const handlePageSubmit = (e: React.FormEvent) => {
|
| 46 |
e.preventDefault();
|
|
|
|
| 48 |
if (!isNaN(num) && num >= 1 && num <= totalPages) {
|
| 49 |
onPageChange(num);
|
| 50 |
} else {
|
|
|
|
| 51 |
setPageInput(String(currentPage));
|
| 52 |
}
|
| 53 |
};
|
| 54 |
|
|
|
|
| 55 |
return (
|
| 56 |
<div className="h-full flex flex-col bg-background">
|
| 57 |
{/* ββ Toolbar βββββββββββββββββββββββββββββββββββ */}
|
|
|
|
| 61 |
variant="ghost"
|
| 62 |
size="icon"
|
| 63 |
className="h-7 w-7"
|
| 64 |
+
onClick={() => {
|
| 65 |
+
const newPage = Math.max(1, currentPage - 1);
|
| 66 |
+
onPageChange(newPage);
|
| 67 |
+
setPageInput(String(newPage));
|
| 68 |
+
}}
|
| 69 |
disabled={currentPage <= 1}
|
| 70 |
>
|
| 71 |
<ChevronLeft className="w-4 h-4" />
|
|
|
|
| 87 |
variant="ghost"
|
| 88 |
size="icon"
|
| 89 |
className="h-7 w-7"
|
| 90 |
+
onClick={() => {
|
| 91 |
+
const newPage = Math.min(totalPages, currentPage + 1);
|
| 92 |
+
onPageChange(newPage);
|
| 93 |
+
setPageInput(String(newPage));
|
| 94 |
+
}}
|
| 95 |
disabled={currentPage >= totalPages}
|
| 96 |
>
|
| 97 |
<ChevronRight className="w-4 h-4" />
|
|
|
|
| 122 |
</div>
|
| 123 |
|
| 124 |
{/* ββ PDF Render ββββββββββββββββββββββββββββββββ */}
|
| 125 |
+
<div className="flex-1 overflow-auto bg-muted/30 flex justify-center items-start p-4 relative w-full">
|
| 126 |
+
<Document
|
| 127 |
+
file={fileConfig}
|
| 128 |
+
onLoadSuccess={() => setLoading(false)}
|
| 129 |
+
onLoadError={(err) => {
|
| 130 |
+
console.error("PDF load error:", err);
|
| 131 |
+
setLoading(false);
|
| 132 |
+
}}
|
| 133 |
+
loading={
|
| 134 |
+
<div className="absolute inset-0 flex items-center justify-center bg-background/80 z-10">
|
| 135 |
+
<Loader2 className="w-6 h-6 animate-spin text-primary" />
|
| 136 |
+
</div>
|
| 137 |
+
}
|
| 138 |
+
error={
|
| 139 |
+
<div className="flex flex-col items-center justify-center p-8 text-center bg-card border border-destructive/20 rounded-lg max-w-md mx-auto my-12 shadow-sm gap-3">
|
| 140 |
+
<AlertCircle className="w-8 h-8 text-destructive animate-pulse" />
|
| 141 |
+
<div>
|
| 142 |
+
<p className="font-semibold text-sm text-foreground mb-1">Failed to load PDF</p>
|
| 143 |
+
<p className="text-xs text-muted-foreground leading-relaxed">
|
| 144 |
+
We encountered an error loading this PDF document. Please verify the document is ready or try refreshing the page.
|
| 145 |
+
</p>
|
| 146 |
+
</div>
|
| 147 |
+
</div>
|
| 148 |
+
}
|
| 149 |
+
noData={
|
| 150 |
+
<div className="flex flex-col items-center justify-center p-8 text-center bg-card border border-border rounded-lg max-w-md mx-auto my-12 shadow-sm gap-2">
|
| 151 |
+
<p className="font-semibold text-sm text-foreground">No PDF document selected</p>
|
| 152 |
+
<p className="text-xs text-muted-foreground">Select or upload a document to view it here.</p>
|
| 153 |
+
</div>
|
| 154 |
+
}
|
| 155 |
+
className="shadow-md border border-border bg-card max-w-full"
|
| 156 |
+
>
|
| 157 |
+
<Page
|
| 158 |
+
pageNumber={currentPage}
|
| 159 |
+
scale={scale}
|
| 160 |
+
renderAnnotationLayer={false}
|
| 161 |
+
renderTextLayer={true}
|
| 162 |
+
loading={
|
| 163 |
+
<div className="flex items-center justify-center p-8">
|
| 164 |
+
<Loader2 className="w-6 h-6 animate-spin text-primary" />
|
| 165 |
+
</div>
|
| 166 |
+
}
|
| 167 |
+
/>
|
| 168 |
+
</Document>
|
| 169 |
</div>
|
| 170 |
</div>
|
| 171 |
);
|
frontend/src/components/layout/Header.tsx
CHANGED
|
@@ -1,5 +1,6 @@
|
|
| 1 |
"use client";
|
| 2 |
|
|
|
|
| 3 |
import { useAuth } from "@/lib/auth";
|
| 4 |
import { useTranslation } from "react-i18next";
|
| 5 |
import { useRouter } from "next/navigation";
|
|
@@ -22,28 +23,39 @@ import {
|
|
| 22 |
Moon,
|
| 23 |
Shield,
|
| 24 |
Sun,
|
|
|
|
|
|
|
| 25 |
} from "lucide-react";
|
| 26 |
-
import { useSyncExternalStore } from "react";
|
| 27 |
import { useTheme } from "next-themes";
|
| 28 |
|
|
|
|
| 29 |
|
| 30 |
interface HeaderProps {
|
| 31 |
sidebarOpen: boolean;
|
| 32 |
onToggleSidebar: () => void;
|
| 33 |
viewerOpen: boolean;
|
| 34 |
onToggleViewer: () => void;
|
|
|
|
|
|
|
| 35 |
}
|
| 36 |
|
| 37 |
const subscribe = () => () => {};
|
| 38 |
const getSnapshot = () => true;
|
| 39 |
const getServerSnapshot = () => false;
|
| 40 |
|
| 41 |
-
export default function Header({
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 42 |
const { user, logout } = useAuth();
|
| 43 |
const { t, i18n } = useTranslation();
|
| 44 |
const router = useRouter();
|
| 45 |
const { theme, setTheme } = useTheme();
|
| 46 |
-
const mounted = useSyncExternalStore(subscribe, getSnapshot, getServerSnapshot);
|
|
|
|
| 47 |
|
| 48 |
const isDark = theme === "dark";
|
| 49 |
const toggleTheme = () => setTheme(isDark ? "light" : "dark");
|
|
@@ -67,79 +79,147 @@ export default function Header({ sidebarOpen, onToggleSidebar, viewerOpen, onTog
|
|
| 67 |
};
|
| 68 |
|
| 69 |
return (
|
| 70 |
-
<
|
| 71 |
-
|
| 72 |
-
|
| 73 |
-
<
|
| 74 |
-
{
|
| 75 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 76 |
|
| 77 |
-
|
| 78 |
-
<
|
| 79 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 80 |
</div>
|
| 81 |
-
<span className="font-semibold text-sm hidden sm:inline">{t("common.appName")}</span>
|
| 82 |
</div>
|
| 83 |
-
</div>
|
| 84 |
-
|
| 85 |
-
{/* Right */}
|
| 86 |
-
<div className="flex items-center gap-2">
|
| 87 |
-
<Button variant="ghost" size="icon" className="h-8 w-8" onClick={onToggleViewer} title={viewerOpen ? t("header.closeViewer") : t("header.openViewer")}>
|
| 88 |
-
{viewerOpen ? <PanelRightClose className="w-4 h-4" /> : <PanelRightOpen className="w-4 h-4" />}
|
| 89 |
-
</Button>
|
| 90 |
|
| 91 |
-
{
|
| 92 |
-
|
| 93 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 94 |
</Button>
|
| 95 |
-
|
| 96 |
-
|
| 97 |
-
|
| 98 |
-
|
| 99 |
-
|
| 100 |
-
|
| 101 |
-
|
| 102 |
-
|
| 103 |
-
|
| 104 |
-
|
| 105 |
-
|
| 106 |
-
|
| 107 |
-
|
| 108 |
-
|
| 109 |
-
|
| 110 |
-
|
| 111 |
-
|
| 112 |
-
|
| 113 |
-
<
|
| 114 |
-
|
| 115 |
-
|
| 116 |
-
|
| 117 |
-
|
| 118 |
-
|
| 119 |
-
|
| 120 |
-
|
| 121 |
-
|
| 122 |
-
|
| 123 |
-
|
| 124 |
-
|
| 125 |
-
|
| 126 |
-
|
| 127 |
-
|
| 128 |
-
|
| 129 |
-
{user?.is_admin && (
|
| 130 |
-
<DropdownMenuItem className="cursor-pointer" onClick={() => router.push("/admin")}>
|
| 131 |
-
<Shield className="w-4 h-4 mr-2" />
|
| 132 |
-
Admin metrics
|
| 133 |
</DropdownMenuItem>
|
| 134 |
-
|
| 135 |
-
|
| 136 |
-
|
| 137 |
-
|
| 138 |
-
|
| 139 |
-
|
| 140 |
-
|
| 141 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 142 |
</div>
|
| 143 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 144 |
);
|
| 145 |
}
|
|
|
|
| 1 |
"use client";
|
| 2 |
|
| 3 |
+
import { useState } from "react";
|
| 4 |
import { useAuth } from "@/lib/auth";
|
| 5 |
import { useTranslation } from "react-i18next";
|
| 6 |
import { useRouter } from "next/navigation";
|
|
|
|
| 23 |
Moon,
|
| 24 |
Shield,
|
| 25 |
Sun,
|
| 26 |
+
Menu,
|
| 27 |
+
X,
|
| 28 |
} from "lucide-react";
|
|
|
|
| 29 |
import { useTheme } from "next-themes";
|
| 30 |
|
| 31 |
+
import { useSyncExternalStore } from "react";
|
| 32 |
|
| 33 |
interface HeaderProps {
|
| 34 |
sidebarOpen: boolean;
|
| 35 |
onToggleSidebar: () => void;
|
| 36 |
viewerOpen: boolean;
|
| 37 |
onToggleViewer: () => void;
|
| 38 |
+
/** Pass DocumentSidebar JSX so the mobile sheet can render it */
|
| 39 |
+
mobileSheetContent?: React.ReactNode;
|
| 40 |
}
|
| 41 |
|
| 42 |
const subscribe = () => () => {};
|
| 43 |
const getSnapshot = () => true;
|
| 44 |
const getServerSnapshot = () => false;
|
| 45 |
|
| 46 |
+
export default function Header({
|
| 47 |
+
sidebarOpen,
|
| 48 |
+
onToggleSidebar,
|
| 49 |
+
viewerOpen,
|
| 50 |
+
onToggleViewer,
|
| 51 |
+
mobileSheetContent,
|
| 52 |
+
}: HeaderProps) {
|
| 53 |
const { user, logout } = useAuth();
|
| 54 |
const { t, i18n } = useTranslation();
|
| 55 |
const router = useRouter();
|
| 56 |
const { theme, setTheme } = useTheme();
|
| 57 |
+
const mounted = useSyncExternalStore(subscribe, getSnapshot, getServerSnapshot);
|
| 58 |
+
const [sheetOpen, setSheetOpen] = useState(false);
|
| 59 |
|
| 60 |
const isDark = theme === "dark";
|
| 61 |
const toggleTheme = () => setTheme(isDark ? "light" : "dark");
|
|
|
|
| 79 |
};
|
| 80 |
|
| 81 |
return (
|
| 82 |
+
<>
|
| 83 |
+
<header className="h-14 flex items-center justify-between px-4 border-b border-border/50 bg-card/50 backdrop-blur-md flex-shrink-0 z-50">
|
| 84 |
+
{/* Left */}
|
| 85 |
+
<div className="flex items-center gap-3">
|
| 86 |
+
{/* Hamburger β mobile only */}
|
| 87 |
+
<Button
|
| 88 |
+
variant="ghost"
|
| 89 |
+
size="icon"
|
| 90 |
+
className="h-8 w-8 md:hidden"
|
| 91 |
+
onClick={() => setSheetOpen(true)}
|
| 92 |
+
title="Open sidebar"
|
| 93 |
+
>
|
| 94 |
+
<Menu className="w-4 h-4" />
|
| 95 |
+
</Button>
|
| 96 |
|
| 97 |
+
{/* Desktop sidebar toggle β hidden on mobile */}
|
| 98 |
+
<Button
|
| 99 |
+
variant="ghost"
|
| 100 |
+
size="icon"
|
| 101 |
+
className="h-8 w-8 hidden md:inline-flex"
|
| 102 |
+
onClick={onToggleSidebar}
|
| 103 |
+
title={sidebarOpen ? "Close sidebar" : "Open sidebar"}
|
| 104 |
+
>
|
| 105 |
+
{sidebarOpen ? (
|
| 106 |
+
<PanelLeftClose className="w-4 h-4" />
|
| 107 |
+
) : (
|
| 108 |
+
<PanelLeftOpen className="w-4 h-4" />
|
| 109 |
+
)}
|
| 110 |
+
</Button>
|
| 111 |
+
|
| 112 |
+
<div className="flex items-center gap-2">
|
| 113 |
+
<div className="w-7 h-7 rounded-lg bg-primary/15 flex items-center justify-center">
|
| 114 |
+
<Brain className="w-4 h-4 text-primary" />
|
| 115 |
+
</div>
|
| 116 |
+
<span className="font-semibold text-sm hidden sm:inline">
|
| 117 |
+
Document AI Analyst
|
| 118 |
+
</span>
|
| 119 |
</div>
|
|
|
|
| 120 |
</div>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 121 |
|
| 122 |
+
{/* Right */}
|
| 123 |
+
<div className="flex items-center gap-2">
|
| 124 |
+
<Button
|
| 125 |
+
variant="ghost"
|
| 126 |
+
size="icon"
|
| 127 |
+
className="h-8 w-8"
|
| 128 |
+
onClick={onToggleViewer}
|
| 129 |
+
title={viewerOpen ? "Close viewer" : "Open viewer"}
|
| 130 |
+
>
|
| 131 |
+
{viewerOpen ? (
|
| 132 |
+
<PanelRightClose className="w-4 h-4" />
|
| 133 |
+
) : (
|
| 134 |
+
<PanelRightOpen className="w-4 h-4" />
|
| 135 |
+
)}
|
| 136 |
</Button>
|
| 137 |
+
|
| 138 |
+
{mounted && (
|
| 139 |
+
<Button
|
| 140 |
+
variant="ghost"
|
| 141 |
+
size="icon"
|
| 142 |
+
className="h-8 w-8"
|
| 143 |
+
onClick={toggleTheme}
|
| 144 |
+
title={isDark ? "Light mode" : "Dark mode"}
|
| 145 |
+
>
|
| 146 |
+
{isDark ? <Sun className="w-4 h-4" /> : <Moon className="w-4 h-4" />}
|
| 147 |
+
</Button>
|
| 148 |
+
)}
|
| 149 |
+
|
| 150 |
+
<DropdownMenu>
|
| 151 |
+
<DropdownMenuTrigger className="flex items-center h-8 gap-2 px-2 rounded-md hover:bg-accent transition-colors cursor-pointer">
|
| 152 |
+
<Avatar className="w-6 h-6">
|
| 153 |
+
<AvatarFallback className="text-[10px] bg-primary/20 text-primary">
|
| 154 |
+
{user?.username?.slice(0, 2).toUpperCase() || "U"}
|
| 155 |
+
</AvatarFallback>
|
| 156 |
+
</Avatar>
|
| 157 |
+
<span className="text-sm hidden sm:inline">{user?.username}</span>
|
| 158 |
+
</DropdownMenuTrigger>
|
| 159 |
+
<DropdownMenuContent align="end" className="w-48">
|
| 160 |
+
<div className="px-3 py-2">
|
| 161 |
+
<p className="text-sm font-medium">{user?.username}</p>
|
| 162 |
+
<p className="text-xs text-muted-foreground truncate">{user?.email}</p>
|
| 163 |
+
</div>
|
| 164 |
+
<DropdownMenuSeparator />
|
| 165 |
+
<DropdownMenuItem
|
| 166 |
+
className="text-destructive cursor-pointer"
|
| 167 |
+
onClick={handleLogout}
|
| 168 |
+
>
|
| 169 |
+
<LogOut className="w-4 h-4 mr-2" />
|
| 170 |
+
Sign out
|
|
|
|
|
|
|
|
|
|
|
|
|
| 171 |
</DropdownMenuItem>
|
| 172 |
+
</DropdownMenuContent>
|
| 173 |
+
</DropdownMenu>
|
| 174 |
+
</div>
|
| 175 |
+
</header>
|
| 176 |
+
|
| 177 |
+
{/* ββ Mobile Navigation Sheet ββββββββββββββββββββββββββββββββββββ */}
|
| 178 |
+
{/* Backdrop */}
|
| 179 |
+
{sheetOpen && (
|
| 180 |
+
<div
|
| 181 |
+
className="fixed inset-0 z-40 bg-black/50 backdrop-blur-sm md:hidden"
|
| 182 |
+
onClick={() => setSheetOpen(false)}
|
| 183 |
+
aria-hidden="true"
|
| 184 |
+
/>
|
| 185 |
+
)}
|
| 186 |
+
|
| 187 |
+
{/* Slide-in panel */}
|
| 188 |
+
<aside
|
| 189 |
+
className={[
|
| 190 |
+
"fixed inset-y-0 left-0 z-50 w-72 flex flex-col",
|
| 191 |
+
"bg-sidebar border-r border-sidebar-border",
|
| 192 |
+
"transform transition-transform duration-300 ease-in-out md:hidden",
|
| 193 |
+
sheetOpen ? "translate-x-0" : "-translate-x-full",
|
| 194 |
+
].join(" ")}
|
| 195 |
+
aria-label="Mobile navigation"
|
| 196 |
+
aria-hidden={!sheetOpen}
|
| 197 |
+
inert={!sheetOpen ? true : undefined}
|
| 198 |
+
>
|
| 199 |
+
{/* Sheet header */}
|
| 200 |
+
<div className="h-14 flex items-center justify-between px-4 border-b border-sidebar-border flex-shrink-0">
|
| 201 |
+
<div className="flex items-center gap-2">
|
| 202 |
+
<div className="w-7 h-7 rounded-lg bg-primary/15 flex items-center justify-center">
|
| 203 |
+
<Brain className="w-4 h-4 text-primary" />
|
| 204 |
</div>
|
| 205 |
+
<span className="font-semibold text-sm">Document AI Analyst</span>
|
| 206 |
+
</div>
|
| 207 |
+
<Button
|
| 208 |
+
variant="ghost"
|
| 209 |
+
size="icon"
|
| 210 |
+
className="h-8 w-8"
|
| 211 |
+
onClick={() => setSheetOpen(false)}
|
| 212 |
+
aria-label="Close navigation"
|
| 213 |
+
>
|
| 214 |
+
<X className="w-4 h-4" />
|
| 215 |
+
</Button>
|
| 216 |
+
</div>
|
| 217 |
+
|
| 218 |
+
{/* Sidebar content */}
|
| 219 |
+
<div className="flex-1 overflow-hidden">
|
| 220 |
+
{sheetOpen ? mobileSheetContent : null}
|
| 221 |
+
</div>
|
| 222 |
+
</aside>
|
| 223 |
+
</>
|
| 224 |
);
|
| 225 |
}
|