Yuvraj Sarathe commited on
Commit
c1d1230
Β·
unverified Β·
2 Parent(s): 465cfceef02d76

Merge branch 'dev' into Hugging-Face-Token-Input

Browse files
.github/ISSUE_TEMPLATE/bug_report.yml CHANGED
@@ -59,7 +59,7 @@ body:
59
  - type: checkboxes
60
  id: gssoc
61
  attributes:
62
- label: "GSSoC '24"
63
  description: "Are you a GSSoC contributor?"
64
  options:
65
  - label: "Yes, I am participating in GirlScript Summer of Code and would like to fix this."
 
59
  - type: checkboxes
60
  id: gssoc
61
  attributes:
62
+ label: "GSSoC '26"
63
  description: "Are you a GSSoC contributor?"
64
  options:
65
  - label: "Yes, I am participating in GirlScript Summer of Code and would like to fix this."
.github/ISSUE_TEMPLATE/feature_request.yml CHANGED
@@ -42,7 +42,7 @@ body:
42
  - type: checkboxes
43
  id: gssoc
44
  attributes:
45
- label: "GSSoC '24"
46
  description: "Are you a GSSoC contributor?"
47
  options:
48
  - label: "Yes, I am participating in GirlScript Summer of Code and would like to build this."
 
42
  - type: checkboxes
43
  id: gssoc
44
  attributes:
45
+ label: "GSSoC '26"
46
  description: "Are you a GSSoC contributor?"
47
  options:
48
  - label: "Yes, I am participating in GirlScript Summer of Code and would like to build this."
.github/workflows/sync-issue-labels.yml ADDED
@@ -0,0 +1,164 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: Sync Labels β€” Issue to PR
2
+
3
+ # ──────────────────────────────────────────────────────────
4
+ # Auto-syncs labels from referenced issue(s) to the PR when
5
+ # a PR is opened or updated targeting `dev`.
6
+ #
7
+ # Why pull_request_target:
8
+ # Label operations need write permissions on the target
9
+ # repo. pull_request_target runs in the context of the
10
+ # base repo with access to secrets and write token.
11
+ # Since we only read issue data and apply labels, there
12
+ # is no security concern.
13
+ # ──────────────────────────────────────────────────────────
14
+
15
+ on:
16
+ pull_request_target:
17
+ types: [closed]
18
+ branches: ["dev"]
19
+
20
+ permissions:
21
+ contents: read
22
+ issues: read
23
+ pull-requests: write
24
+
25
+ jobs:
26
+ sync-labels:
27
+ name: Sync labels from referenced issue
28
+ runs-on: ubuntu-latest
29
+ if: github.event.pull_request.merged == true
30
+
31
+ steps:
32
+ - name: Extract issue numbers from PR body
33
+ id: extract
34
+ env:
35
+ PR_BODY: ${{ github.event.pull_request.body }}
36
+ run: |
37
+ # Match patterns:
38
+ # "Closes #123"
39
+ # "Fixes #456, #789" (comma-separated)
40
+ # "Resolves #111, #222, #333"
41
+ #
42
+ # Approach: grab lines containing a keyword, then
43
+ # extract every NNN from those lines.
44
+ # We place '|| true' at the very end of the pipeline so it doesn't short-circuit.
45
+ ISSUES=$(
46
+ echo "${PR_BODY:-}" \
47
+ | grep -ioE '.*(closes|fixes|resolves).*' \
48
+ | grep -oE '#[0-9]+' \
49
+ | grep -oE '[0-9]+' \
50
+ | sort -un \
51
+ | xargs \
52
+ || true
53
+ )
54
+ echo "Found issues: [$ISSUES]"
55
+ echo "issues=$ISSUES" >> "$GITHUB_OUTPUT"
56
+
57
+ - name: Fetch and apply labels
58
+ env:
59
+ GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
60
+ ISSUES: ${{ steps.extract.outputs.issues }}
61
+ PR_NUMBER: ${{ github.event.pull_request.number }}
62
+ REPO: ${{ github.repository }}
63
+ run: |
64
+ set -euo pipefail
65
+
66
+ ALL_LABELS="gssoc"$'\n'"gssoc:approved"$'\n'"mentor:param20h"$'\n'
67
+
68
+ for ISSUE in $ISSUES; do
69
+ echo "--- Fetching labels for #$ISSUE ---"
70
+
71
+ LABELS=$(gh issue view "$ISSUE" --repo "$REPO" --json labels --jq '.labels[].name' 2>/dev/null || true)
72
+
73
+ if [ -z "$LABELS" ]; then
74
+ echo " β†’ No labels on #$ISSUE, skipping"
75
+ continue
76
+ fi
77
+
78
+ echo " β†’ Labels: $(echo "$LABELS" | tr '\n' ' ')"
79
+
80
+ # Accumulate labels (newline-separated, deduplicated later)
81
+ ALL_LABELS="${ALL_LABELS}${LABELS}"$'\n'
82
+ done
83
+
84
+ if [ -z "$ALL_LABELS" ]; then
85
+ echo "No labels to apply. Exiting."
86
+ exit 0
87
+ fi
88
+
89
+ # Deduplicate and remove empty lines
90
+ UNIQUE_LABELS=$(echo "$ALL_LABELS" | sort -u | grep -v '^$')
91
+
92
+ echo ""
93
+ echo "=== Applying labels to PR #$PR_NUMBER ==="
94
+ echo "$UNIQUE_LABELS"
95
+
96
+ # Get labels already on the PR
97
+ EXISTING=$(gh pr view "$PR_NUMBER" --repo "$REPO" --json labels --jq '.labels[].name' 2>/dev/null || true)
98
+
99
+ MISSING=0
100
+ while IFS= read -r LABEL; do
101
+ [ -z "$LABEL" ] && continue
102
+ if echo "$EXISTING" | grep -qxF "$LABEL"; then
103
+ echo " βœ“ Already present: $LABEL"
104
+ else
105
+ echo " + Adding: $LABEL"
106
+ gh label create "$LABEL" --repo "$REPO" 2>/dev/null || true # create if not exists
107
+ gh pr edit "$PR_NUMBER" --repo "$REPO" --add-label "$LABEL"
108
+ MISSING=$((MISSING + 1))
109
+ fi
110
+ done <<< "$UNIQUE_LABELS"
111
+
112
+ if [ "$MISSING" -eq 0 ]; then
113
+ echo "All labels already synced β€” nothing to add."
114
+ else
115
+ echo "Done. Added $MISSING label(s) to PR #$PR_NUMBER."
116
+ fi
117
+
118
+ - name: Calculate GSSoC Points and Comment
119
+ env:
120
+ GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
121
+ PR_NUMBER: ${{ github.event.pull_request.number }}
122
+ REPO: ${{ github.repository }}
123
+ run: |
124
+ set -euo pipefail
125
+
126
+ echo "Calculating GSSoC points for PR #$PR_NUMBER..."
127
+
128
+ # Fetch all labels currently on the PR (including the ones we just synced)
129
+ PR_LABELS=$(gh pr view "$PR_NUMBER" --repo "$REPO" --json labels --jq '.labels[].name' 2>/dev/null || true)
130
+
131
+ POINTS=0
132
+
133
+ while IFS= read -r LABEL; do
134
+ [ -z "$LABEL" ] && continue
135
+ case "$LABEL" in
136
+ "level:beginner") POINTS=$((POINTS + 20)) ;;
137
+ "level:intermediate") POINTS=$((POINTS + 35)) ;;
138
+ "level:advanced") POINTS=$((POINTS + 55)) ;;
139
+ "level:critical") POINTS=$((POINTS + 80)) ;;
140
+ "type:accessibility") POINTS=$((POINTS + 15)) ;;
141
+ "type:bug") POINTS=$((POINTS + 10)) ;;
142
+ "type:design") POINTS=$((POINTS + 10)) ;;
143
+ "type:devops") POINTS=$((POINTS + 15)) ;;
144
+ "type:docs") POINTS=$((POINTS + 5)) ;;
145
+ "type:feature") POINTS=$((POINTS + 10)) ;;
146
+ "type:performance") POINTS=$((POINTS + 15)) ;;
147
+ "type:refactor") POINTS=$((POINTS + 10)) ;;
148
+ "type:security") POINTS=$((POINTS + 20)) ;;
149
+ "type:testing") POINTS=$((POINTS + 10)) ;;
150
+ esac
151
+ done <<< "$PR_LABELS"
152
+
153
+ echo "Total Points Calculated: $POINTS"
154
+
155
+ if [ "$POINTS" -gt 0 ]; then
156
+ printf -v COMMENT "πŸŽ‰ **Congratulations on getting your Pull Request merged!** πŸŽ‰\n\nThank you for contributing to PDF-Assistant-RAG as part of GSSoC '26! πŸš€\n\nKeep up the great work! ✨"
157
+
158
+ # Post the comment to the PR
159
+ echo "Posting comment..."
160
+ gh pr comment "$PR_NUMBER" --repo "$REPO" --body "$COMMENT"
161
+ else
162
+ echo "No scorable labels found. No comment posted."
163
+ fi
164
+
README.md CHANGED
@@ -362,7 +362,25 @@ CHROMA_PERSIST_DIR=./data/chroma_db
362
 
363
  > Get your free HuggingFace token at [huggingface.co/settings/tokens](https://huggingface.co/settings/tokens)
364
 
365
- ### 3. Run Locally
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
366
 
367
  Open **two terminals**:
368
 
@@ -384,7 +402,7 @@ npm run dev
384
  # β†’ App running at http://localhost:3000
385
  ```
386
 
387
- ### 4. Run with Docker
388
 
389
  ```bash
390
  docker compose up --build
 
362
 
363
  > Get your free HuggingFace token at [huggingface.co/settings/tokens](https://huggingface.co/settings/tokens)
364
 
365
+ ### 3. Set up crawl4ai (URL Upload Feature)
366
+
367
+ The URL upload feature (`POST /api/v1/documents/urlupload`) uses **crawl4ai** with a Playwright browser to crawl web pages. `crawl4ai-setup` handles the Playwright browser installation automatically β€” run it once after `pip install`:
368
+
369
+ ```bash
370
+ crawl4ai-setup
371
+ ```
372
+
373
+ > **Linux / Docker users:** If Chromium fails to launch due to missing system libraries, also run:
374
+ > ```bash
375
+ > playwright install-deps chromium
376
+ > ```
377
+ > This installs OS-level dependencies (libnss, libatk, etc.) on fresh Ubuntu/Debian servers.
378
+
379
+ > **Windows users:** No extra steps β€” the `NotImplementedError` (SelectorEventLoop + subprocess) is already handled in the backend automatically.
380
+
381
+ ---
382
+
383
+ ### 4. Run Locally
384
 
385
  Open **two terminals**:
386
 
 
402
  # β†’ App running at http://localhost:3000
403
  ```
404
 
405
+ ### 5. Run with Docker
406
 
407
  ```bash
408
  docker compose up --build
backend/app/auth.py CHANGED
@@ -12,7 +12,7 @@ from sqlalchemy.orm import Session
12
 
13
  from app.config import get_settings
14
  from app.database import get_db
15
- from app.models import User
16
 
17
  settings = get_settings()
18
  security = HTTPBearer()
@@ -30,10 +30,10 @@ def verify_password(plain: str, hashed: str) -> bool:
30
 
31
  # ── JWT Token ────────────────────────────────────────
32
 
33
- def create_access_token(user_id: str) -> str:
34
  """Create a JWT access token with user_id as the subject."""
35
  payload = {
36
- "sub": user_id,
37
  "type": "access",
38
  "exp": datetime.now(timezone.utc) + timedelta(minutes=settings.JWT_ACCESS_EXPIRY_MINUTES),
39
  "iat": datetime.now(timezone.utc),
@@ -41,10 +41,10 @@ def create_access_token(user_id: str) -> str:
41
  return jwt.encode(payload, settings.SECRET_KEY, algorithm=settings.JWT_ALGORITHM)
42
 
43
 
44
- def create_refresh_token(user_id: str) -> str:
45
  """Create a JWT refresh token with user_id as the subject."""
46
  payload = {
47
- "sub": user_id,
48
  "type": "refresh",
49
  "exp": datetime.now(timezone.utc) + timedelta(days=settings.JWT_REFRESH_EXPIRY_DAYS),
50
  "iat": datetime.now(timezone.utc),
@@ -120,11 +120,39 @@ def get_current_user(
120
  return user
121
 
122
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
123
  def get_admin_user(user: User = Depends(get_current_user)) -> User:
124
- """Dependency: require admin privileges."""
125
- if not user.is_admin:
 
 
 
126
  raise HTTPException(
127
  status_code=status.HTTP_403_FORBIDDEN,
128
  detail="Admin access required",
129
  )
130
  return user
 
 
 
 
 
 
 
 
 
 
 
12
 
13
  from app.config import get_settings
14
  from app.database import get_db
15
+ from app.models import User, UserRole
16
 
17
  settings = get_settings()
18
  security = HTTPBearer()
 
30
 
31
  # ── JWT Token ────────────────────────────────────────
32
 
33
+ def create_access_token(user_id) -> str:
34
  """Create a JWT access token with user_id as the subject."""
35
  payload = {
36
+ "sub": str(user_id),
37
  "type": "access",
38
  "exp": datetime.now(timezone.utc) + timedelta(minutes=settings.JWT_ACCESS_EXPIRY_MINUTES),
39
  "iat": datetime.now(timezone.utc),
 
41
  return jwt.encode(payload, settings.SECRET_KEY, algorithm=settings.JWT_ALGORITHM)
42
 
43
 
44
+ def create_refresh_token(user_id) -> str:
45
  """Create a JWT refresh token with user_id as the subject."""
46
  payload = {
47
+ "sub": str(user_id),
48
  "type": "refresh",
49
  "exp": datetime.now(timezone.utc) + timedelta(days=settings.JWT_REFRESH_EXPIRY_DAYS),
50
  "iat": datetime.now(timezone.utc),
 
120
  return user
121
 
122
 
123
+ def _is_admin_user(user: User) -> bool:
124
+ """
125
+ Check if a user has administrative privileges.
126
+ Supports both the modern 'role' field and the legacy 'is_admin' boolean.
127
+ """
128
+ if not user:
129
+ return False
130
+
131
+ # We check the role first (it can be an Enum or a plain string depending on the environment)
132
+ role_check = (user.role == UserRole.admin) or (str(user.role) == "admin")
133
+
134
+ # Fallback to the legacy is_admin flag
135
+ return role_check or bool(user.is_admin)
136
+
137
+
138
  def get_admin_user(user: User = Depends(get_current_user)) -> User:
139
+ """
140
+ FastAPI dependency that restricts access to administrators only.
141
+ Raises 403 Forbidden if the user lacks sufficient permissions.
142
+ """
143
+ if not _is_admin_user(user):
144
  raise HTTPException(
145
  status_code=status.HTTP_403_FORBIDDEN,
146
  detail="Admin access required",
147
  )
148
  return user
149
+
150
+
151
+ async def get_current_admin(
152
+ current_user: User = Depends(get_current_user)
153
+ ) -> User:
154
+ """
155
+ Alias for get_admin_user to maintain compatibility with existing routes.
156
+ Ensures the requesting user has administrative rights.
157
+ """
158
+ return get_admin_user(current_user)
backend/app/config.py CHANGED
@@ -33,7 +33,10 @@ class Settings(BaseSettings):
33
  ".docx": [
34
  "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
35
  "application/zip",
36
- ]
 
 
 
37
  }
38
 
39
  # ── RAG Pipeline ─────────────────────────────────────
 
33
  ".docx": [
34
  "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
35
  "application/zip",
36
+ ],
37
+ ".txt": ["text/plain"],
38
+ ".md": ["text/markdown"],
39
+
40
  }
41
 
42
  # ── RAG Pipeline ─────────────────────────────────────
backend/app/database.py CHANGED
@@ -48,6 +48,7 @@ def _migrate_schema():
48
 
49
  migrations = [
50
  ("users", "hf_token", "ALTER TABLE users ADD COLUMN hf_token VARCHAR(255)"),
 
51
  ]
52
 
53
  for table, column, ddl in migrations:
 
48
 
49
  migrations = [
50
  ("users", "hf_token", "ALTER TABLE users ADD COLUMN hf_token VARCHAR(255)"),
51
+ ("users", "role", "ALTER TABLE users ADD COLUMN role VARCHAR(20) DEFAULT 'user'"),
52
  ]
53
 
54
  for table, column, ddl in migrations:
backend/app/models.py CHANGED
@@ -2,27 +2,128 @@
2
  SQLAlchemy ORM models for users, documents, and chat messages.
3
  """
4
  import uuid
 
 
 
5
  from datetime import datetime, timezone
 
 
6
  from sqlalchemy import Column, String, Integer, DateTime, ForeignKey, Text, Boolean
 
 
 
 
7
  from sqlalchemy.orm import relationship
 
8
  from app.database import Base
9
 
10
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
  def generate_uuid():
 
12
  return str(uuid.uuid4())
13
 
14
 
 
 
 
 
 
 
 
 
 
 
15
  class User(Base):
 
 
 
 
16
  __tablename__ = "users"
17
 
18
- id = Column(String, primary_key=True, default=generate_uuid)
19
  username = Column(String(80), unique=True, nullable=False, index=True)
20
  email = Column(String(120), unique=True, nullable=False, index=True)
21
  hashed_password = Column(String(255), nullable=False)
 
 
 
 
 
 
 
 
22
  is_admin = Column(Boolean, default=False)
 
23
  created_at = Column(DateTime, default=lambda: datetime.now(timezone.utc))
24
  last_login = Column(DateTime, nullable=True, index=True)
25
- hf_token = Column(String(255), nullable=True)
26
 
27
  # Relationships
28
  documents = relationship("Document", back_populates="owner", cascade="all, delete-orphan")
@@ -31,10 +132,13 @@ class User(Base):
31
 
32
 
33
  class ApiKey(Base):
 
 
 
34
  __tablename__ = "api_keys"
35
 
36
- id = Column(String, primary_key=True, default=generate_uuid)
37
- user_id = Column(String, ForeignKey("users.id"), nullable=False, index=True)
38
  key_prefix = Column(String(10), nullable=False)
39
  hashed_key = Column(String(255), nullable=False, unique=True, index=True)
40
  created_at = Column(DateTime, default=lambda: datetime.now(timezone.utc))
@@ -45,19 +149,27 @@ class ApiKey(Base):
45
 
46
 
47
  class Document(Base):
 
 
 
48
  __tablename__ = "documents"
49
 
50
- id = Column(String, primary_key=True, default=generate_uuid)
51
- user_id = Column(String, ForeignKey("users.id"), nullable=False, index=True)
52
  filename = Column(String(255), nullable=False) # Stored filename (UUID-based)
53
  original_name = Column(String(255), nullable=False) # User's original filename
54
  file_size = Column(Integer, default=0) # Size in bytes
 
 
 
 
 
55
  page_count = Column(Integer, default=0)
56
  chunk_count = Column(Integer, default=0)
57
- status = Column(String(20), default="pending") # pending | processing | ready | failed
58
  error_message = Column(Text, nullable=True)
59
  uploaded_at = Column(DateTime, default=lambda: datetime.now(timezone.utc))
60
- summary = Column(Text, nullable=True) # Optional summary of the document's content
61
 
62
  # Relationships
63
  owner = relationship("User", back_populates="documents")
@@ -65,14 +177,17 @@ class Document(Base):
65
 
66
 
67
  class ChatMessage(Base):
 
 
 
68
  __tablename__ = "chat_messages"
69
 
70
- id = Column(String, primary_key=True, default=generate_uuid)
71
- user_id = Column(String, ForeignKey("users.id"), nullable=False, index=True)
72
- document_id = Column(String, ForeignKey("documents.id"), nullable=True, index=True)
73
  role = Column(String(20), nullable=False) # "user" | "assistant"
74
  content = Column(Text, nullable=False)
75
- sources_json = Column(Text, nullable=True) # JSON string of source citations
76
  created_at = Column(DateTime, default=lambda: datetime.now(timezone.utc))
77
 
78
  # Relationships
@@ -82,10 +197,13 @@ class ChatMessage(Base):
82
 
83
 
84
  class SharedMessage(Base):
 
 
 
85
  __tablename__ = "shared_messages"
86
 
87
- id = Column(String, primary_key=True, default=generate_uuid)
88
- message_id = Column(String, ForeignKey("chat_messages.id"), nullable=False, unique=True, index=True)
89
  created_at = Column(DateTime, default=lambda: datetime.now(timezone.utc))
90
 
91
  # Relationships
 
2
  SQLAlchemy ORM models for users, documents, and chat messages.
3
  """
4
  import uuid
5
+ import enum
6
+ import base64
7
+ import hashlib
8
  from datetime import datetime, timezone
9
+
10
+ from cryptography.fernet import Fernet
11
  from sqlalchemy import Column, String, Integer, DateTime, ForeignKey, Text, Boolean
12
+ from sqlalchemy.types import TypeDecorator, CHAR
13
+ from sqlalchemy.dialects.postgresql import UUID as PG_UUID
14
+ from sqlalchemy import Column, String, Integer, DateTime, ForeignKey, Text, Boolean, Enum as SQLAlchemyEnum
15
+ from sqlalchemy.types import TypeDecorator
16
  from sqlalchemy.orm import relationship
17
+
18
  from app.database import Base
19
 
20
 
21
+ class GUID(TypeDecorator):
22
+ """Platform-independent GUID type.
23
+ Uses PostgreSQL's UUID type, otherwise uses CHAR(36).
24
+ """
25
+ impl = CHAR
26
+ cache_ok = True
27
+
28
+ def load_dialect_impl(self, dialect):
29
+ if dialect.name == 'postgresql':
30
+ return dialect.type_descriptor(PG_UUID(as_uuid=True))
31
+ else:
32
+ return dialect.type_descriptor(CHAR(36))
33
+
34
+ def process_bind_param(self, value, dialect):
35
+ if value is None:
36
+ return value
37
+ if isinstance(value, uuid.UUID):
38
+ return value if dialect.name == 'postgresql' else str(value)
39
+ try:
40
+ val_uuid = uuid.UUID(value)
41
+ return val_uuid if dialect.name == 'postgresql' else str(val_uuid)
42
+ except ValueError:
43
+ if dialect.name == 'postgresql':
44
+ return uuid.UUID(int=0)
45
+ return value
46
+
47
+ def process_result_value(self, value, dialect):
48
+ if value is None:
49
+ return value
50
+ return str(value)
51
+
52
+
53
+ class EncryptedString(TypeDecorator):
54
+ """
55
+ A custom SQLAlchemy type that transparently encrypts strings in the database
56
+ using Fernet (AES). This ensures sensitive tokens aren't stored in plain text
57
+ while remaining easily accessible in code.
58
+ """
59
+ impl = Text
60
+ cache_ok = False
61
+
62
+ def _get_cipher(self):
63
+ from app.config import get_settings
64
+ settings = get_settings()
65
+ # Derive a 32-byte key from the SECRET_KEY for Fernet encryption
66
+ key = base64.urlsafe_b64encode(hashlib.sha256(settings.SECRET_KEY.encode()).digest())
67
+ return Fernet(key)
68
+
69
+ def process_bind_param(self, value, dialect):
70
+ """Encrypt the value before saving to the database."""
71
+ if value is None:
72
+ return value
73
+ cipher = self._get_cipher()
74
+ return cipher.encrypt(value.encode()).decode()
75
+
76
+ def process_result_value(self, value, dialect):
77
+ """Decrypt the value after reading from the database."""
78
+ if value is None:
79
+ return value
80
+ cipher = self._get_cipher()
81
+ try:
82
+ return cipher.decrypt(value.encode()).decode()
83
+ except Exception:
84
+ # Fallback for unencrypted data or if decryption fails
85
+ return value
86
+
87
+
88
  def generate_uuid():
89
+ """Generates a standard unique string identifier for database records."""
90
  return str(uuid.uuid4())
91
 
92
 
93
+ class UserRole(str, enum.Enum):
94
+ """
95
+ Defines the available user roles for Role-Based Access Control (RBAC).
96
+ - 'admin': Full access to system statistics and user management.
97
+ - 'user': Standard access for uploading and chatting with documents.
98
+ """
99
+ user = "user"
100
+ admin = "admin"
101
+
102
+
103
  class User(Base):
104
+ """
105
+ Represents a registered user within the system.
106
+ Supports both legacy 'is_admin' flags and the modern 'role' enum for permissions.
107
+ """
108
  __tablename__ = "users"
109
 
110
+ id = Column(GUID, primary_key=True, default=uuid.uuid4)
111
  username = Column(String(80), unique=True, nullable=False, index=True)
112
  email = Column(String(120), unique=True, nullable=False, index=True)
113
  hashed_password = Column(String(255), nullable=False)
114
+
115
+ # Permission fields: transitioning towards 'role', keeping 'is_admin' for compatibility
116
+ role = Column(
117
+ SQLAlchemyEnum(UserRole),
118
+ default=UserRole.user,
119
+ nullable=False,
120
+ server_default="user"
121
+ )
122
  is_admin = Column(Boolean, default=False)
123
+
124
  created_at = Column(DateTime, default=lambda: datetime.now(timezone.utc))
125
  last_login = Column(DateTime, nullable=True, index=True)
126
+ hf_token = Column(EncryptedString, nullable=True)
127
 
128
  # Relationships
129
  documents = relationship("Document", back_populates="owner", cascade="all, delete-orphan")
 
132
 
133
 
134
  class ApiKey(Base):
135
+ """
136
+ Stores secure hashes of API keys used for programmatic interaction with the system.
137
+ """
138
  __tablename__ = "api_keys"
139
 
140
+ id = Column(GUID, primary_key=True, default=uuid.uuid4)
141
+ user_id = Column(GUID, ForeignKey("users.id"), nullable=False, index=True)
142
  key_prefix = Column(String(10), nullable=False)
143
  hashed_key = Column(String(255), nullable=False, unique=True, index=True)
144
  created_at = Column(DateTime, default=lambda: datetime.now(timezone.utc))
 
149
 
150
 
151
  class Document(Base):
152
+ """
153
+ Metadata and processing status for files uploaded by users.
154
+ """
155
  __tablename__ = "documents"
156
 
157
+ id = Column(GUID, primary_key=True, default=uuid.uuid4)
158
+ user_id = Column(GUID, ForeignKey("users.id"), nullable=False, index=True)
159
  filename = Column(String(255), nullable=False) # Stored filename (UUID-based)
160
  original_name = Column(String(255), nullable=False) # User's original filename
161
  file_size = Column(Integer, default=0) # Size in bytes
162
+ id = Column(String, primary_key=True, default=generate_uuid)
163
+ user_id = Column(String, ForeignKey("users.id"), nullable=False, index=True)
164
+ filename = Column(String(255), nullable=False) # Internal UUID-based filename
165
+ original_name = Column(String(255), nullable=False) # Original name for user display
166
+ file_size = Column(Integer, default=0) # Size in bytes
167
  page_count = Column(Integer, default=0)
168
  chunk_count = Column(Integer, default=0)
169
+ status = Column(String(20), default="pending") # pending | processing | ready | failed
170
  error_message = Column(Text, nullable=True)
171
  uploaded_at = Column(DateTime, default=lambda: datetime.now(timezone.utc))
172
+ summary = Column(Text, nullable=True)
173
 
174
  # Relationships
175
  owner = relationship("User", back_populates="documents")
 
177
 
178
 
179
  class ChatMessage(Base):
180
+ """
181
+ Persistent log of conversations between users and the AI analyst.
182
+ """
183
  __tablename__ = "chat_messages"
184
 
185
+ id = Column(GUID, primary_key=True, default=uuid.uuid4)
186
+ user_id = Column(GUID, ForeignKey("users.id"), nullable=False, index=True)
187
+ document_id = Column(GUID, ForeignKey("documents.id"), nullable=True, index=True)
188
  role = Column(String(20), nullable=False) # "user" | "assistant"
189
  content = Column(Text, nullable=False)
190
+ sources_json = Column(Text, nullable=True) # JSON representation of retrieved sources
191
  created_at = Column(DateTime, default=lambda: datetime.now(timezone.utc))
192
 
193
  # Relationships
 
197
 
198
 
199
  class SharedMessage(Base):
200
+ """
201
+ Links specific chat messages to public sharing URLs.
202
+ """
203
  __tablename__ = "shared_messages"
204
 
205
+ id = Column(GUID, primary_key=True, default=uuid.uuid4)
206
+ message_id = Column(GUID, ForeignKey("chat_messages.id"), nullable=False, unique=True, index=True)
207
  created_at = Column(DateTime, default=lambda: datetime.now(timezone.utc))
208
 
209
  # Relationships
backend/app/rag/agent.py CHANGED
@@ -15,21 +15,11 @@ from app.rag.tracing import trace_function
15
  logger = logging.getLogger(__name__)
16
  settings = get_settings()
17
 
18
- # ── Singleton LLM client ─────────────────────────────
19
- _llm_client = None
20
-
21
-
22
- def get_llm_client() -> InferenceClient:
23
- """Get or create HuggingFace InferenceClient (singleton)."""
24
- global _llm_client
25
-
26
- if _llm_client is None:
27
- _llm_client = InferenceClient(
28
- token=settings.HF_TOKEN,
29
- )
30
- logger.info(f"LLM client initialized for model: {settings.LLM_MODEL}")
31
-
32
- return _llm_client
33
 
34
 
35
  def is_greeting(question: str) -> bool:
@@ -68,7 +58,7 @@ def _chat_messages(system: str, user_content: str) -> list:
68
 
69
  @trace_function(
70
  "generate_answer",
71
- metadata_factory=lambda question, user_id, document_id=None: {
72
  "user_id": user_id,
73
  "document_id": document_id,
74
  "llm_model": settings.LLM_MODEL,
@@ -78,13 +68,14 @@ def generate_answer(
78
  question: str,
79
  user_id: str,
80
  document_id: Optional[str] = None,
 
81
  ) -> Dict[str, Any]:
82
  """
83
  Full RAG pipeline: retrieve β†’ build context β†’ generate answer.
84
  Returns dict with 'answer' and 'sources'.
85
  """
86
- # Get HuggingFace InferenceClient singleton (created once, reused)
87
- client = get_llm_client()
88
 
89
  # ── Handle greetings ─────────────────────────────
90
  # Short-circuit: if user just says "hello", skip RAG entirely
@@ -156,7 +147,7 @@ def generate_answer(
156
 
157
  @trace_function(
158
  "generate_answer_stream",
159
- metadata_factory=lambda question, user_id, document_id=None: {
160
  "user_id": user_id,
161
  "document_id": document_id,
162
  "llm_model": settings.LLM_MODEL,
@@ -166,13 +157,14 @@ def generate_answer_stream(
166
  question: str,
167
  user_id: str,
168
  document_id: Optional[str] = None,
 
169
  ) -> Generator[str, None, None]:
170
  """
171
  Streaming RAG pipeline β€” yields SSE-formatted chunks.
172
  First yields sources, then streams answer tokens.
173
  """
174
- # Get HuggingFace InferenceClient singleton (created once, reused)
175
- client = get_llm_client()
176
 
177
  # ── Handle greetings ─────────────────────────────
178
  # Short-circuit: if user just says "hello", skip RAG entirely
 
15
  logger = logging.getLogger(__name__)
16
  settings = get_settings()
17
 
18
+ def get_llm_client(hf_token: Optional[str] = None) -> InferenceClient:
19
+ """Create a HuggingFace InferenceClient per-request."""
20
+ return InferenceClient(
21
+ token=hf_token or settings.HF_TOKEN,
22
+ )
 
 
 
 
 
 
 
 
 
 
23
 
24
 
25
  def is_greeting(question: str) -> bool:
 
58
 
59
  @trace_function(
60
  "generate_answer",
61
+ metadata_factory=lambda question, user_id, document_id=None, **kwargs: {
62
  "user_id": user_id,
63
  "document_id": document_id,
64
  "llm_model": settings.LLM_MODEL,
 
68
  question: str,
69
  user_id: str,
70
  document_id: Optional[str] = None,
71
+ hf_token: Optional[str] = None,
72
  ) -> Dict[str, Any]:
73
  """
74
  Full RAG pipeline: retrieve β†’ build context β†’ generate answer.
75
  Returns dict with 'answer' and 'sources'.
76
  """
77
+ # Get HuggingFace InferenceClient per-request
78
+ client = get_llm_client(hf_token)
79
 
80
  # ── Handle greetings ─────────────────────────────
81
  # Short-circuit: if user just says "hello", skip RAG entirely
 
147
 
148
  @trace_function(
149
  "generate_answer_stream",
150
+ metadata_factory=lambda question, user_id, document_id=None, **kwargs: {
151
  "user_id": user_id,
152
  "document_id": document_id,
153
  "llm_model": settings.LLM_MODEL,
 
157
  question: str,
158
  user_id: str,
159
  document_id: Optional[str] = None,
160
+ hf_token: Optional[str] = None,
161
  ) -> Generator[str, None, None]:
162
  """
163
  Streaming RAG pipeline β€” yields SSE-formatted chunks.
164
  First yields sources, then streams answer tokens.
165
  """
166
+ # Get HuggingFace InferenceClient per-request
167
+ client = get_llm_client(hf_token)
168
 
169
  # ── Handle greetings ─────────────────────────────
170
  # Short-circuit: if user just says "hello", skip RAG entirely
backend/app/rag/retriever.py CHANGED
@@ -1,7 +1,9 @@
1
  """
2
  Two-stage retrieval: ChromaDB similarity search + cross-encoder reranking.
3
  """
 
4
  import logging
 
5
  from typing import List, Dict, Any, Optional
6
  from app.config import get_settings
7
  from app.rag.embeddings import embed_query
@@ -10,6 +12,7 @@ from app.rag.vectorstore import query_chunks
10
 
11
  logger = logging.getLogger(__name__)
12
  settings = get_settings()
 
13
 
14
  # ── Singleton reranker ───────────────────────────────
15
  _reranker = None
@@ -32,6 +35,136 @@ def get_reranker():
32
  return _reranker if _reranker != "disabled" else None
33
 
34
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
35
  @trace_function(
36
  "retrieve",
37
  metadata_factory=lambda query, user_id, document_id=None: {
@@ -55,18 +188,24 @@ def retrieve(
55
 
56
  Returns chunks with confidence scores.
57
  """
58
- # ── Stage 1: Embedding search ────────────────────
59
- query_vector = embed_query(query)
60
- candidates = query_chunks(
61
- query_embedding=query_vector,
62
- user_id=user_id,
63
- document_id=document_id,
64
- top_k=settings.TOP_K_RETRIEVAL,
65
- )
 
 
 
 
66
 
67
  if not candidates:
68
  return []
69
 
 
 
70
  # ── Stage 2: Cross-encoder reranking ─────────────
71
  reranker = get_reranker()
72
 
@@ -86,6 +225,8 @@ def retrieve(
86
  except Exception as e:
87
  logger.warning(f"Reranking failed, using embedding scores: {e}")
88
 
 
 
89
  # ── Take top-K after reranking ───────────────────
90
  top_chunks = candidates[:settings.TOP_K_RERANK]
91
 
 
1
  """
2
  Two-stage retrieval: ChromaDB similarity search + cross-encoder reranking.
3
  """
4
+ import json
5
  import logging
6
+ import re
7
  from typing import List, Dict, Any, Optional
8
  from app.config import get_settings
9
  from app.rag.embeddings import embed_query
 
12
 
13
  logger = logging.getLogger(__name__)
14
  settings = get_settings()
15
+ MAX_QUERY_VARIANTS = 4
16
 
17
  # ── Singleton reranker ───────────────────────────────
18
  _reranker = None
 
35
  return _reranker if _reranker != "disabled" else None
36
 
37
 
38
+ def transform_query(query: str) -> List[str]:
39
+ """Rewrite a user question into multiple retrieval-friendly search queries."""
40
+ original_query = query.strip()
41
+ if not original_query:
42
+ return []
43
+
44
+ try:
45
+ generated_queries = _generate_query_variants(original_query)
46
+ except Exception as e:
47
+ logger.warning(f"Query transformation failed, using original query only: {e}")
48
+ generated_queries = []
49
+
50
+ return _dedupe_queries([original_query, *generated_queries])[:MAX_QUERY_VARIANTS]
51
+
52
+
53
+ def _generate_query_variants(query: str) -> List[str]:
54
+ """Use the configured LLM to split/rewrite a user query for semantic search."""
55
+ if not settings.HF_TOKEN:
56
+ return []
57
+
58
+ from huggingface_hub import InferenceClient
59
+
60
+ client = InferenceClient(token=settings.HF_TOKEN)
61
+ prompt = (
62
+ "Rewrite the user question into concise semantic search queries for document retrieval. "
63
+ "Split independent topics into separate queries. Return a JSON array of strings only. "
64
+ f"User question: {query}"
65
+ )
66
+ response = client.chat_completion(
67
+ messages=[
68
+ {
69
+ "role": "system",
70
+ "content": "You create optimized search queries for a RAG retriever.",
71
+ },
72
+ {"role": "user", "content": prompt},
73
+ ],
74
+ model=settings.LLM_MODEL,
75
+ max_tokens=256,
76
+ temperature=0.2,
77
+ )
78
+
79
+ if not response.choices:
80
+ return []
81
+
82
+ content = response.choices[0].message.content or ""
83
+ return _parse_query_variants(content)
84
+
85
+
86
+ def _parse_query_variants(content: str) -> List[str]:
87
+ """Parse LLM output into a list even when it adds light prose around JSON."""
88
+ content = content.strip()
89
+ if not content:
90
+ return []
91
+
92
+ parsed = _try_parse_query_json(content)
93
+ if parsed is not None:
94
+ return parsed
95
+
96
+ match = re.search(r"\[[\s\S]*\]", content)
97
+ if match:
98
+ parsed = _try_parse_query_json(match.group(0))
99
+ if parsed is not None:
100
+ return parsed
101
+
102
+ queries = []
103
+ for line in content.splitlines():
104
+ cleaned = re.sub(r"^\s*[-*\d.)]+\s*", "", line).strip().strip('"')
105
+ if cleaned:
106
+ queries.append(cleaned)
107
+ return queries
108
+
109
+
110
+ def _try_parse_query_json(content: str) -> Optional[List[str]]:
111
+ try:
112
+ parsed = json.loads(content)
113
+ except json.JSONDecodeError:
114
+ return None
115
+
116
+ if isinstance(parsed, dict):
117
+ parsed = parsed.get("queries", [])
118
+
119
+ if not isinstance(parsed, list):
120
+ return []
121
+
122
+ return [item.strip() for item in parsed if isinstance(item, str) and item.strip()]
123
+
124
+
125
+ def _dedupe_queries(queries: List[str]) -> List[str]:
126
+ deduped = []
127
+ seen = set()
128
+ for query in queries:
129
+ normalized = " ".join(query.split())
130
+ key = normalized.lower()
131
+ if normalized and key not in seen:
132
+ seen.add(key)
133
+ deduped.append(normalized)
134
+ return deduped
135
+
136
+
137
+ def _candidate_key(chunk: Dict[str, Any]) -> str:
138
+ for key in ("id", "chunk_id"):
139
+ if chunk.get(key):
140
+ return str(chunk[key])
141
+
142
+ text = str(chunk.get("text", ""))
143
+ return "|".join(
144
+ str(part)
145
+ for part in (
146
+ chunk.get("document_id", ""),
147
+ chunk.get("filename", ""),
148
+ chunk.get("page", ""),
149
+ text[:200],
150
+ )
151
+ )
152
+
153
+
154
+ def _merge_candidates(candidates: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
155
+ merged: Dict[str, Dict[str, Any]] = {}
156
+
157
+ for candidate in candidates:
158
+ candidate_copy = dict(candidate)
159
+ key = _candidate_key(candidate_copy)
160
+ existing = merged.get(key)
161
+
162
+ if existing is None or candidate_copy.get("score", 0) > existing.get("score", 0):
163
+ merged[key] = candidate_copy
164
+
165
+ return list(merged.values())
166
+
167
+
168
  @trace_function(
169
  "retrieve",
170
  metadata_factory=lambda query, user_id, document_id=None: {
 
188
 
189
  Returns chunks with confidence scores.
190
  """
191
+ # ── Stage 1: Query transformation + embedding search ─────────────
192
+ candidates = []
193
+ for search_query in transform_query(query):
194
+ query_vector = embed_query(search_query)
195
+ candidates.extend(
196
+ query_chunks(
197
+ query_embedding=query_vector,
198
+ user_id=user_id,
199
+ document_id=document_id,
200
+ top_k=settings.TOP_K_RETRIEVAL,
201
+ )
202
+ )
203
 
204
  if not candidates:
205
  return []
206
 
207
+ candidates = _merge_candidates(candidates)
208
+
209
  # ── Stage 2: Cross-encoder reranking ─────────────
210
  reranker = get_reranker()
211
 
 
225
  except Exception as e:
226
  logger.warning(f"Reranking failed, using embedding scores: {e}")
227
 
228
+ candidates.sort(key=lambda x: x.get("rerank_score", x.get("score", 0)), reverse=True)
229
+
230
  # ── Take top-K after reranking ───────────────────
231
  top_chunks = candidates[:settings.TOP_K_RERANK]
232
 
backend/app/routes/admin.py CHANGED
@@ -3,17 +3,18 @@ Admin-only operational statistics routes.
3
  """
4
  import shutil
5
  from pathlib import Path
 
6
 
7
  from fastapi import APIRouter, Depends
8
  from sqlalchemy import func
9
  from sqlalchemy.orm import Session
10
 
11
- from app.auth import get_admin_user
12
  from app.config import get_settings
13
  from app.database import get_db
14
  from app.metrics import get_query_metrics
15
- from app.models import Document, User
16
- from app.schemas import AdminStatsResponse, DiskUsageResponse
17
 
18
  router = APIRouter(prefix="/admin", tags=["Admin"])
19
  settings = get_settings()
@@ -35,8 +36,8 @@ def _directory_size(path: Path) -> int:
35
 
36
  @router.get("/stats", response_model=AdminStatsResponse)
37
  def get_admin_stats(
38
- _admin: User = Depends(get_admin_user),
39
  db: Session = Depends(get_db),
 
40
  ):
41
  """Return aggregate system statistics for administrators."""
42
  upload_dir = Path(settings.UPLOAD_DIR).resolve()
@@ -59,6 +60,8 @@ def get_admin_stats(
59
  return AdminStatsResponse(
60
  total_users=db.query(User).count(),
61
  total_pdfs_uploaded=total_pdfs_uploaded,
 
 
62
  average_query_response_time_ms=float(
63
  query_metrics["average_query_response_time_ms"]
64
  ),
@@ -70,4 +73,14 @@ def get_admin_stats(
70
  usage_percent=used_percent,
71
  upload_dir_bytes=_directory_size(upload_dir),
72
  ),
 
73
  )
 
 
 
 
 
 
 
 
 
 
3
  """
4
  import shutil
5
  from pathlib import Path
6
+ from typing import List
7
 
8
  from fastapi import APIRouter, Depends
9
  from sqlalchemy import func
10
  from sqlalchemy.orm import Session
11
 
12
+ from app.auth import get_current_admin
13
  from app.config import get_settings
14
  from app.database import get_db
15
  from app.metrics import get_query_metrics
16
+ from app.models import Document, User, ChatMessage
17
+ from app.schemas import AdminStatsResponse, DiskUsageResponse, UserResponse
18
 
19
  router = APIRouter(prefix="/admin", tags=["Admin"])
20
  settings = get_settings()
 
36
 
37
  @router.get("/stats", response_model=AdminStatsResponse)
38
  def get_admin_stats(
 
39
  db: Session = Depends(get_db),
40
+ _admin: User = Depends(get_current_admin),
41
  ):
42
  """Return aggregate system statistics for administrators."""
43
  upload_dir = Path(settings.UPLOAD_DIR).resolve()
 
60
  return AdminStatsResponse(
61
  total_users=db.query(User).count(),
62
  total_pdfs_uploaded=total_pdfs_uploaded,
63
+ total_documents=db.query(Document).count(),
64
+ total_messages=db.query(ChatMessage).count(),
65
  average_query_response_time_ms=float(
66
  query_metrics["average_query_response_time_ms"]
67
  ),
 
73
  usage_percent=used_percent,
74
  upload_dir_bytes=_directory_size(upload_dir),
75
  ),
76
+ users=db.query(User).all()
77
  )
78
+
79
+
80
+ @router.get("/users", response_model=List[UserResponse])
81
+ def list_all_users(
82
+ db: Session = Depends(get_db),
83
+ _admin: User = Depends(get_current_admin),
84
+ ):
85
+ """List all registered users (admin-only)."""
86
+ return db.query(User).all()
backend/app/routes/auth.py CHANGED
@@ -11,7 +11,7 @@ from sqlalchemy.orm import Session
11
  from sqlalchemy import select
12
  from app.config import get_settings
13
  from app.database import get_db
14
- from app.models import User, ApiKey
15
  from app.schemas import (
16
  GoogleLoginRequest,
17
  HFTokenUpdate,
@@ -140,6 +140,7 @@ def register(payload: UserRegister, db: Session = Depends(get_db)):
140
  username=payload.username,
141
  email=payload.email,
142
  hashed_password=hash_password(payload.password),
 
143
  )
144
  db.add(user)
145
  db.commit()
 
11
  from sqlalchemy import select
12
  from app.config import get_settings
13
  from app.database import get_db
14
+ from app.models import User, ApiKey, UserRole
15
  from app.schemas import (
16
  GoogleLoginRequest,
17
  HFTokenUpdate,
 
140
  username=payload.username,
141
  email=payload.email,
142
  hashed_password=hash_password(payload.password),
143
+ role=UserRole.user,
144
  )
145
  db.add(user)
146
  db.commit()
backend/app/routes/chat.py CHANGED
@@ -82,16 +82,16 @@ def create_share_link(
82
  )
83
 
84
 
85
- def generate_answer(question: str, user_id: str, document_id: Optional[str] = None):
86
  from app.rag.agent import generate_answer as _generate_answer
87
 
88
- return _generate_answer(question=question, user_id=user_id, document_id=document_id)
89
 
90
 
91
- def generate_answer_stream(question: str, user_id: str, document_id: Optional[str] = None):
92
  from app.rag.agent import generate_answer_stream as _generate_answer_stream
93
 
94
- return _generate_answer_stream(question=question, user_id=user_id, document_id=document_id)
95
 
96
 
97
  @router.post("/ask", response_model=ChatResponse)
@@ -151,6 +151,7 @@ def ask_question(
151
  question=payload.question,
152
  user_id=user.id,
153
  document_id=payload.document_id,
 
154
  )
155
 
156
  # Save to chat history
@@ -240,6 +241,7 @@ def ask_question_stream(
240
  question=payload.question,
241
  user_id=user.id,
242
  document_id=payload.document_id,
 
243
  ):
244
  yield chunk
245
 
 
82
  )
83
 
84
 
85
+ def generate_answer(question: str, user_id: str, document_id: Optional[str] = None, hf_token: Optional[str] = None):
86
  from app.rag.agent import generate_answer as _generate_answer
87
 
88
+ return _generate_answer(question=question, user_id=user_id, document_id=document_id, hf_token=hf_token)
89
 
90
 
91
+ def generate_answer_stream(question: str, user_id: str, document_id: Optional[str] = None, hf_token: Optional[str] = None):
92
  from app.rag.agent import generate_answer_stream as _generate_answer_stream
93
 
94
+ return _generate_answer_stream(question=question, user_id=user_id, document_id=document_id, hf_token=hf_token)
95
 
96
 
97
  @router.post("/ask", response_model=ChatResponse)
 
151
  question=payload.question,
152
  user_id=user.id,
153
  document_id=payload.document_id,
154
+ hf_token=user.hf_token,
155
  )
156
 
157
  # Save to chat history
 
241
  question=payload.question,
242
  user_id=user.id,
243
  document_id=payload.document_id,
244
+ hf_token=user.hf_token,
245
  ):
246
  yield chunk
247
 
backend/app/routes/documents.py CHANGED
@@ -3,24 +3,32 @@ Document management routes β€” upload, list, delete, and serve PDF files.
3
  Background ingestion via FastAPI BackgroundTasks.
4
  """
5
  import os
 
6
  import uuid
7
  import logging
 
 
8
  from typing import Optional
9
  from pathlib import Path
10
  import shutil
11
  import tempfile
12
-
13
  from fastapi import APIRouter, Depends, HTTPException, UploadFile, File, BackgroundTasks, status, Query
14
  from fastapi.responses import FileResponse
15
  from sqlalchemy.orm import Session
16
 
17
  from app.database import get_db
18
  from app.models import User, Document
19
- from app.schemas import DocumentResponse, DocumentListResponse, DocumentStatusResponse
20
  from app.auth import get_current_user
21
  from app.config import get_settings
22
  from app.rag.chunker import chunk_document, get_page_count
23
  from app.rag.vectorstore import store_chunks, delete_document_chunks
 
 
 
 
 
24
  from sqlalchemy import select
25
  logger = logging.getLogger(__name__)
26
  settings = get_settings()
@@ -62,7 +70,7 @@ async def validate_upload(file: UploadFile):
62
 
63
  # extension without leading dot in settings
64
  if ext.lstrip(".") not in settings.ALLOWED_EXTENSIONS:
65
- raise HTTPException(status_code=400, detail="Only PDF and DOCX files are allowed")
66
 
67
  # save to a temporary file
68
  with tempfile.NamedTemporaryFile(delete=False, suffix=ext) as tmp:
@@ -205,6 +213,36 @@ def _ingest_document(document_id: str, filepath: str, original_name: str, user_i
205
  db.close()
206
 
207
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
208
  @router.post("/upload", response_model=DocumentResponse, status_code=status.HTTP_202_ACCEPTED)
209
  async def upload_document(
210
  background_tasks: BackgroundTasks,
@@ -287,6 +325,100 @@ async def upload_document(
287
 
288
  return DocumentResponse.model_validate(document)
289
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
290
 
291
  @router.get("/{document_id}/status", response_model=DocumentStatusResponse)
292
  def get_document_status(
 
3
  Background ingestion via FastAPI BackgroundTasks.
4
  """
5
  import os
6
+ import sys
7
  import uuid
8
  import logging
9
+ import asyncio
10
+ import concurrent.futures
11
  from typing import Optional
12
  from pathlib import Path
13
  import shutil
14
  import tempfile
15
+ from urllib.parse import urlparse
16
  from fastapi import APIRouter, Depends, HTTPException, UploadFile, File, BackgroundTasks, status, Query
17
  from fastapi.responses import FileResponse
18
  from sqlalchemy.orm import Session
19
 
20
  from app.database import get_db
21
  from app.models import User, Document
22
+ from app.schemas import DocumentResponse, DocumentListResponse, DocumentStatusResponse, UploadUrl
23
  from app.auth import get_current_user
24
  from app.config import get_settings
25
  from app.rag.chunker import chunk_document, get_page_count
26
  from app.rag.vectorstore import store_chunks, delete_document_chunks
27
+
28
+ import crawl4ai
29
+ from crawl4ai import AsyncWebCrawler
30
+ from crawl4ai.async_configs import BrowserConfig, CrawlerRunConfig, CacheMode
31
+
32
  from sqlalchemy import select
33
  logger = logging.getLogger(__name__)
34
  settings = get_settings()
 
70
 
71
  # extension without leading dot in settings
72
  if ext.lstrip(".") not in settings.ALLOWED_EXTENSIONS:
73
+ raise HTTPException(status_code=400, detail="Only PDF, DOCX, TEXT, AND MARKDOWN files are allowed")
74
 
75
  # save to a temporary file
76
  with tempfile.NamedTemporaryFile(delete=False, suffix=ext) as tmp:
 
213
  db.close()
214
 
215
 
216
+
217
+ def _crawl_in_new_loop(url: str) -> str:
218
+ """Run the async crawler in a fresh event loop on a worker thread.
219
+ On Windows this must be a ProactorEventLoop to support subprocesses.
220
+ """
221
+ if sys.platform == "win32":
222
+ loop = asyncio.ProactorEventLoop()
223
+ else:
224
+ loop = asyncio.new_event_loop()
225
+ asyncio.set_event_loop(loop)
226
+ try:
227
+ async def _crawl():
228
+ browser_config = BrowserConfig()
229
+ run_config = CrawlerRunConfig(
230
+ excluded_tags=['form', 'header'],
231
+
232
+ # Content processing
233
+ process_iframes=True,
234
+ # remove_overlay_elements=True,
235
+
236
+ # Cache control
237
+ # cache_mode=CacheMode.ENABLED
238
+ )
239
+ async with AsyncWebCrawler(config=browser_config) as crawler:
240
+ result = await crawler.arun(url=url, config=run_config)
241
+ return result.markdown or ""
242
+ return loop.run_until_complete(_crawl())
243
+ finally:
244
+ loop.close()
245
+
246
  @router.post("/upload", response_model=DocumentResponse, status_code=status.HTTP_202_ACCEPTED)
247
  async def upload_document(
248
  background_tasks: BackgroundTasks,
 
325
 
326
  return DocumentResponse.model_validate(document)
327
 
328
+ @router.post("/urlupload", status_code=status.HTTP_202_ACCEPTED)
329
+ async def upload_document_url(
330
+ payload: UploadUrl,
331
+ background_tasks: BackgroundTasks,
332
+ user: User = Depends(get_current_user),
333
+ db: Session = Depends(get_db),
334
+ ):
335
+ """
336
+ Uses crawl4ai's AsyncWebCrawler in a dedicated thread with its own
337
+ event loop. This is required on Windows because uvicorn's default
338
+ SelectorEventLoop does not support subprocess creation (used by
339
+ Playwright/crawl4ai), which causes a NotImplementedError.
340
+ On Linux (production) a plain new_event_loop() is used instead.
341
+ """
342
+ temp_path: Optional[str] = None
343
+ try:
344
+ parsed = urlparse(payload.url)
345
+ if not all([parsed.scheme, parsed.netloc]):
346
+ raise HTTPException(status_code=400, detail="Invalid URL")
347
+
348
+
349
+ # Run in a worker thread with its own event loop to avoid
350
+ # NotImplementedError on Windows (SelectorEventLoop can't spawn subprocesses)
351
+ with concurrent.futures.ThreadPoolExecutor(max_workers=1) as pool:
352
+ markdown = await asyncio.get_event_loop().run_in_executor(
353
+ pool, _crawl_in_new_loop, payload.url
354
+ )
355
+
356
+ if not markdown:
357
+ raise HTTPException(status_code=422, detail="No content could be extracted from the URL")
358
+
359
+
360
+ with tempfile.NamedTemporaryFile(
361
+ mode="w",
362
+ suffix=".txt",
363
+ delete=False,
364
+ encoding="utf-8",
365
+ ) as tmp:
366
+ tmp.write(markdown)
367
+ temp_path = tmp.name
368
+
369
+ # ── Move temp file to permanent user upload directory ──
370
+ ext = "txt"
371
+ user_dir = os.path.join(settings.UPLOAD_DIR, user.id)
372
+ os.makedirs(user_dir, exist_ok=True)
373
+
374
+ stored_filename = f"{uuid.uuid4().hex}.{ext}"
375
+ filepath = os.path.join(user_dir, stored_filename)
376
+ shutil.move(temp_path, filepath)
377
+ temp_path = None # file is now at filepath; no longer a temp to clean up
378
+
379
+ file_size = Path(filepath).stat().st_size
380
+
381
+ # ── Derive a human-readable name from the URL ─────────
382
+ url_path = parsed.path.rstrip("/")
383
+ original_name = f"{parsed.netloc}{url_path or ''}.txt"
384
+
385
+ # ── Create database record ─────────────────────────────
386
+ document = Document(
387
+ user_id=user.id,
388
+ filename=stored_filename,
389
+ original_name=original_name,
390
+ file_size=file_size,
391
+ status="pending",
392
+ )
393
+ db.add(document)
394
+ db.commit()
395
+ db.refresh(document)
396
+
397
+ # ── Trigger background ingestion ───────────────────────
398
+ background_tasks.add_task(
399
+ _ingest_document,
400
+ document_id=document.id,
401
+ filepath=filepath,
402
+ original_name=original_name,
403
+ user_id=user.id,
404
+ )
405
+
406
+ return DocumentResponse.model_validate(document)
407
+
408
+ except HTTPException:
409
+ raise
410
+ except ValueError:
411
+ raise HTTPException(status_code=400, detail="Invalid URL")
412
+ except Exception as e:
413
+ logger.error(f"URL upload error: {e}")
414
+ raise HTTPException(status_code=400, detail=f"Something went wrong with URL processing: {str(e)}")
415
+ finally:
416
+ '''Runs whether the request succeeded, raised an HTTPException,
417
+ or hit an unexpected error β€” no temp files are ever left behind.'''
418
+ if temp_path is not None:
419
+ Path(temp_path).unlink(missing_ok=True)
420
+
421
+
422
 
423
  @router.get("/{document_id}/status", response_model=DocumentStatusResponse)
424
  def get_document_status(
backend/app/schemas.py CHANGED
@@ -4,6 +4,7 @@ Pydantic schemas for API request/response validation.
4
  from pydantic import BaseModel, EmailStr, Field
5
  from typing import Optional, List
6
  from datetime import datetime
 
7
 
8
 
9
  # ── Auth ─────────────────────────────────────────────
@@ -75,6 +76,7 @@ class UserResponse(BaseModel):
75
  id: str
76
  username: str
77
  email: str
 
78
  is_admin: bool
79
  hf_token: Optional[str] = None
80
  created_at: datetime
@@ -131,9 +133,12 @@ class DiskUsageResponse(BaseModel):
131
  class AdminStatsResponse(BaseModel):
132
  total_users: int
133
  total_pdfs_uploaded: int
 
 
134
  average_query_response_time_ms: float
135
  query_count: int
136
  disk_space_usage: DiskUsageResponse
 
137
 
138
 
139
  # ── Chat ─────────────────────────────────────────────
@@ -172,6 +177,8 @@ class ChatHistoryResponse(BaseModel):
172
  messages: List[ChatMessageResponse]
173
  document_id: Optional[str] = None
174
 
 
 
175
 
176
  class ShareAnswerResponse(BaseModel):
177
  id: str
 
4
  from pydantic import BaseModel, EmailStr, Field
5
  from typing import Optional, List
6
  from datetime import datetime
7
+ from app.models import UserRole
8
 
9
 
10
  # ── Auth ─────────────────────────────────────────────
 
76
  id: str
77
  username: str
78
  email: str
79
+ role: UserRole
80
  is_admin: bool
81
  hf_token: Optional[str] = None
82
  created_at: datetime
 
133
  class AdminStatsResponse(BaseModel):
134
  total_users: int
135
  total_pdfs_uploaded: int
136
+ total_documents: int
137
+ total_messages: int
138
  average_query_response_time_ms: float
139
  query_count: int
140
  disk_space_usage: DiskUsageResponse
141
+ users: List[UserResponse]
142
 
143
 
144
  # ── Chat ─────────────────────────────────────────────
 
177
  messages: List[ChatMessageResponse]
178
  document_id: Optional[str] = None
179
 
180
+ class UploadUrl(BaseModel):
181
+ url: str
182
 
183
  class ShareAnswerResponse(BaseModel):
184
  id: str
backend/migrate_add_role.py ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ One-time migration script to add the 'role' column to the 'users' table.
3
+ Run this from the 'backend' directory.
4
+ """
5
+ import sys
6
+ import os
7
+
8
+ # Add the current directory to sys.path to allow importing 'app'
9
+ sys.path.append(os.path.dirname(os.path.abspath(__file__)))
10
+
11
+ from app.database import engine
12
+ from sqlalchemy import text
13
+
14
+ def migrate():
15
+ print("πŸš€ Starting migration: adding 'role' column to 'users' table...")
16
+ try:
17
+ with engine.connect() as conn:
18
+ # SQLite doesn't support adding a column with NOT NULL without a default value
19
+ # if there are already rows, but we provide a default 'user'.
20
+ conn.execute(text(
21
+ "ALTER TABLE users ADD COLUMN role VARCHAR DEFAULT 'user'"
22
+ ))
23
+ # Update existing rows to have the 'user' role
24
+ conn.execute(text(
25
+ "UPDATE users SET role = 'user' WHERE role IS NULL"
26
+ ))
27
+ conn.execute(text(
28
+ "UPDATE users SET role = 'admin' WHERE is_admin = 1"
29
+ ))
30
+ conn.commit()
31
+ print("βœ… Migration successful!")
32
+ except Exception as e:
33
+ if "duplicate column name" in str(e).lower():
34
+ print("ℹ️ Column 'role' already exists. Skipping migration.")
35
+ else:
36
+ print(f"❌ Migration failed: {e}")
37
+
38
+ if __name__ == "__main__":
39
+ migrate()
backend/requirements.txt CHANGED
@@ -8,6 +8,7 @@ python-multipart
8
  # Database
9
  sqlalchemy
10
  aiosqlite
 
11
 
12
  # Auth
13
  pyjwt
@@ -50,8 +51,9 @@ slowapi
50
  # File Validation
51
  #sudo apt-get install libmagic1 // for Debian/Ubuntu
52
  #brew install libmagic // for OSX
53
- python-magic-bin==0.4.27; sys_platform == "win32" # for windows
54
  python-magic; sys_platform != "win32"
55
  python-docx
56
  pypdf
57
  reportlab
 
 
8
  # Database
9
  sqlalchemy
10
  aiosqlite
11
+ psycopg[binary]
12
 
13
  # Auth
14
  pyjwt
 
51
  # File Validation
52
  #sudo apt-get install libmagic1 // for Debian/Ubuntu
53
  #brew install libmagic // for OSX
54
+ python-magic-bin; sys_platform == "win32" # for windows
55
  python-magic; sys_platform != "win32"
56
  python-docx
57
  pypdf
58
  reportlab
59
+ crawl4ai
backend/scripts/migrate_sqlite_to_postgres.py ADDED
@@ -0,0 +1,524 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Migrate SQLite app data into a Supabase/Postgres database.
2
+
3
+ The script supports both the current FastAPI SQLite schema
4
+ (`users`, `documents`, `chat_messages`) and the older legacy
5
+ `instance/users.db` schema (`user` only).
6
+ """
7
+ from __future__ import annotations
8
+
9
+ import argparse
10
+ import logging
11
+ import os
12
+ import sys
13
+ import uuid
14
+ from dataclasses import dataclass, field
15
+ from datetime import datetime, timezone
16
+ from pathlib import Path
17
+ from typing import Any
18
+
19
+ from sqlalchemy import (
20
+ Boolean,
21
+ Column,
22
+ DateTime,
23
+ ForeignKey,
24
+ Integer,
25
+ MetaData,
26
+ String,
27
+ Table,
28
+ Text,
29
+ create_engine,
30
+ inspect,
31
+ select,
32
+ )
33
+ from sqlalchemy.engine import Engine
34
+ from sqlalchemy.exc import IntegrityError
35
+ from sqlalchemy.orm import Session, sessionmaker
36
+
37
+ LOGGER = logging.getLogger("sqlite_to_postgres")
38
+
39
+
40
+ def generate_uuid() -> str:
41
+ return str(uuid.uuid4())
42
+
43
+
44
+ metadata = MetaData()
45
+
46
+ users = Table(
47
+ "users",
48
+ metadata,
49
+ Column("id", String, primary_key=True, default=generate_uuid),
50
+ Column("username", String(80), unique=True, nullable=False, index=True),
51
+ Column("email", String(120), unique=True, nullable=False, index=True),
52
+ Column("hashed_password", String(255), nullable=False),
53
+ Column("is_admin", Boolean, default=False),
54
+ Column("created_at", DateTime, default=lambda: datetime.now(timezone.utc)),
55
+ Column("last_login", DateTime, nullable=True, index=True),
56
+ Column("hf_token", String(255), nullable=True),
57
+ )
58
+
59
+ api_keys = Table(
60
+ "api_keys",
61
+ metadata,
62
+ Column("id", String, primary_key=True, default=generate_uuid),
63
+ Column("user_id", String, ForeignKey("users.id"), nullable=False, index=True),
64
+ Column("key_prefix", String(10), nullable=False),
65
+ Column("hashed_key", String(255), nullable=False, unique=True, index=True),
66
+ Column("created_at", DateTime, default=lambda: datetime.now(timezone.utc)),
67
+ Column("last_used", DateTime, nullable=True),
68
+ )
69
+
70
+ documents = Table(
71
+ "documents",
72
+ metadata,
73
+ Column("id", String, primary_key=True, default=generate_uuid),
74
+ Column("user_id", String, ForeignKey("users.id"), nullable=False, index=True),
75
+ Column("filename", String(255), nullable=False),
76
+ Column("original_name", String(255), nullable=False),
77
+ Column("file_size", Integer, default=0),
78
+ Column("page_count", Integer, default=0),
79
+ Column("chunk_count", Integer, default=0),
80
+ Column("status", String(20), default="pending"),
81
+ Column("error_message", Text, nullable=True),
82
+ Column("uploaded_at", DateTime, default=lambda: datetime.now(timezone.utc)),
83
+ Column("summary", Text, nullable=True),
84
+ )
85
+
86
+ chat_messages = Table(
87
+ "chat_messages",
88
+ metadata,
89
+ Column("id", String, primary_key=True, default=generate_uuid),
90
+ Column("user_id", String, ForeignKey("users.id"), nullable=False, index=True),
91
+ Column("document_id", String, ForeignKey("documents.id"), nullable=True, index=True),
92
+ Column("role", String(20), nullable=False),
93
+ Column("content", Text, nullable=False),
94
+ Column("sources_json", Text, nullable=True),
95
+ Column("created_at", DateTime, default=lambda: datetime.now(timezone.utc)),
96
+ )
97
+
98
+ shared_messages = Table(
99
+ "shared_messages",
100
+ metadata,
101
+ Column("id", String, primary_key=True, default=generate_uuid),
102
+ Column("message_id", String, ForeignKey("chat_messages.id"), nullable=False, unique=True, index=True),
103
+ Column("created_at", DateTime, default=lambda: datetime.now(timezone.utc)),
104
+ )
105
+
106
+
107
+ @dataclass
108
+ class MigrationStats:
109
+ inserted: dict[str, int] = field(default_factory=dict)
110
+ reused: dict[str, int] = field(default_factory=dict)
111
+ skipped: dict[str, int] = field(default_factory=dict)
112
+
113
+ def add(self, table_name: str, action: str) -> None:
114
+ getattr(self, action)[table_name] = getattr(self, action).get(table_name, 0) + 1
115
+
116
+
117
+ def normalize_postgres_url(url: str) -> str:
118
+ """Prefer psycopg v3 when callers pass Supabase's common URL forms."""
119
+ if url.startswith("postgres://"):
120
+ return "postgresql+psycopg://" + url.removeprefix("postgres://")
121
+ if url.startswith("postgresql://"):
122
+ return "postgresql+psycopg://" + url.removeprefix("postgresql://")
123
+ return url
124
+
125
+
126
+ def sqlite_url_from_path(path: str) -> str:
127
+ return f"sqlite:///{Path(path).resolve().as_posix()}"
128
+
129
+
130
+ def make_engine(url: str) -> Engine:
131
+ return create_engine(url, future=True)
132
+
133
+
134
+ def make_session(engine: Engine) -> Session:
135
+ return sessionmaker(bind=engine, autocommit=False, autoflush=False, future=True)()
136
+
137
+
138
+ def reflected_table(engine: Engine, table_name: str) -> Table | None:
139
+ if not inspect(engine).has_table(table_name):
140
+ return None
141
+ reflected = MetaData()
142
+ return Table(table_name, reflected, autoload_with=engine)
143
+
144
+
145
+ def fetch_rows(session: Session, table: Table) -> list[dict[str, Any]]:
146
+ stmt = select(table)
147
+ if "id" in table.c:
148
+ stmt = stmt.order_by(table.c.id)
149
+ return [dict(row) for row in session.execute(stmt).mappings().all()]
150
+
151
+
152
+ def existing_id(session: Session, table: Table, source_id: str | None) -> str | None:
153
+ if not source_id:
154
+ return None
155
+ return session.execute(select(table.c.id).where(table.c.id == source_id)).scalar_one_or_none()
156
+
157
+
158
+ def available_id(session: Session, table: Table, source_id: Any) -> str:
159
+ candidate = str(source_id) if source_id is not None else generate_uuid()
160
+ if existing_id(session, table, candidate) is None:
161
+ return candidate
162
+
163
+ while True:
164
+ candidate = generate_uuid()
165
+ if existing_id(session, table, candidate) is None:
166
+ return candidate
167
+
168
+
169
+ def first_existing_user(session: Session, row: dict[str, Any]) -> str | None:
170
+ email = row.get("email")
171
+ username = row.get("username")
172
+ if email:
173
+ match = session.execute(select(users.c.id).where(users.c.email == email)).scalar_one_or_none()
174
+ if match:
175
+ return match
176
+ if username:
177
+ return session.execute(select(users.c.id).where(users.c.username == username)).scalar_one_or_none()
178
+ return None
179
+
180
+
181
+ def copy_users(
182
+ source_session: Session,
183
+ target_session: Session,
184
+ source_table: Table,
185
+ stats: MigrationStats,
186
+ ) -> dict[str, str]:
187
+ id_map: dict[str, str] = {}
188
+ now = datetime.now(timezone.utc)
189
+
190
+ for row in fetch_rows(source_session, source_table):
191
+ old_id = str(row.get("id"))
192
+ existing = existing_id(target_session, users, old_id) or first_existing_user(target_session, row)
193
+ if existing:
194
+ id_map[old_id] = existing
195
+ stats.add("users", "reused")
196
+ continue
197
+
198
+ is_legacy = source_table.name == "user"
199
+ new_id = available_id(target_session, users, None if is_legacy else old_id)
200
+ user_values = {
201
+ "id": new_id,
202
+ "username": row["username"],
203
+ "email": row["email"],
204
+ "hashed_password": row.get("hashed_password") or row.get("password") or "",
205
+ "is_admin": bool(row.get("is_admin") or False),
206
+ "created_at": row.get("created_at") or now,
207
+ "last_login": row.get("last_login"),
208
+ "hf_token": row.get("hf_token"),
209
+ }
210
+ target_session.execute(users.insert().values(**user_values))
211
+ id_map[old_id] = new_id
212
+ stats.add("users", "inserted")
213
+
214
+ return id_map
215
+
216
+
217
+ def copy_api_keys(
218
+ source_session: Session,
219
+ target_session: Session,
220
+ source_table: Table | None,
221
+ user_id_map: dict[str, str],
222
+ stats: MigrationStats,
223
+ ) -> dict[str, str]:
224
+ id_map: dict[str, str] = {}
225
+ if source_table is None:
226
+ return id_map
227
+
228
+ for row in fetch_rows(source_session, source_table):
229
+ old_id = str(row.get("id"))
230
+ new_user_id = user_id_map.get(str(row.get("user_id")))
231
+ if not new_user_id:
232
+ stats.add("api_keys", "skipped")
233
+ continue
234
+
235
+ existing = (
236
+ existing_id(target_session, api_keys, old_id)
237
+ or target_session.execute(
238
+ select(api_keys.c.id).where(api_keys.c.hashed_key == row.get("hashed_key"))
239
+ ).scalar_one_or_none()
240
+ )
241
+ if existing:
242
+ id_map[old_id] = existing
243
+ stats.add("api_keys", "reused")
244
+ continue
245
+
246
+ new_id = available_id(target_session, api_keys, old_id)
247
+ target_session.execute(
248
+ api_keys.insert().values(
249
+ id=new_id,
250
+ user_id=new_user_id,
251
+ key_prefix=row["key_prefix"],
252
+ hashed_key=row["hashed_key"],
253
+ created_at=row.get("created_at") or datetime.now(timezone.utc),
254
+ last_used=row.get("last_used"),
255
+ )
256
+ )
257
+ id_map[old_id] = new_id
258
+ stats.add("api_keys", "inserted")
259
+
260
+ return id_map
261
+
262
+
263
+ def copy_documents(
264
+ source_session: Session,
265
+ target_session: Session,
266
+ source_table: Table | None,
267
+ user_id_map: dict[str, str],
268
+ stats: MigrationStats,
269
+ ) -> dict[str, str]:
270
+ id_map: dict[str, str] = {}
271
+ if source_table is None:
272
+ return id_map
273
+
274
+ for row in fetch_rows(source_session, source_table):
275
+ old_id = str(row.get("id"))
276
+ new_user_id = user_id_map.get(str(row.get("user_id")))
277
+ if not new_user_id:
278
+ stats.add("documents", "skipped")
279
+ continue
280
+
281
+ existing = existing_id(target_session, documents, old_id)
282
+ if existing:
283
+ id_map[old_id] = existing
284
+ stats.add("documents", "reused")
285
+ continue
286
+
287
+ new_id = available_id(target_session, documents, old_id)
288
+ target_session.execute(
289
+ documents.insert().values(
290
+ id=new_id,
291
+ user_id=new_user_id,
292
+ filename=row["filename"],
293
+ original_name=row["original_name"],
294
+ file_size=row.get("file_size") or 0,
295
+ page_count=row.get("page_count") or 0,
296
+ chunk_count=row.get("chunk_count") or 0,
297
+ status=row.get("status") or "pending",
298
+ error_message=row.get("error_message"),
299
+ uploaded_at=row.get("uploaded_at") or datetime.now(timezone.utc),
300
+ summary=row.get("summary"),
301
+ )
302
+ )
303
+ id_map[old_id] = new_id
304
+ stats.add("documents", "inserted")
305
+
306
+ return id_map
307
+
308
+
309
+ def copy_chat_messages(
310
+ source_session: Session,
311
+ target_session: Session,
312
+ source_table: Table | None,
313
+ user_id_map: dict[str, str],
314
+ document_id_map: dict[str, str],
315
+ stats: MigrationStats,
316
+ ) -> dict[str, str]:
317
+ id_map: dict[str, str] = {}
318
+ if source_table is None:
319
+ return id_map
320
+
321
+ for row in fetch_rows(source_session, source_table):
322
+ old_id = str(row.get("id"))
323
+ new_user_id = user_id_map.get(str(row.get("user_id")))
324
+ old_document_id = row.get("document_id")
325
+ new_document_id = document_id_map.get(str(old_document_id)) if old_document_id else None
326
+ if not new_user_id or (old_document_id and not new_document_id):
327
+ stats.add("chat_messages", "skipped")
328
+ continue
329
+
330
+ existing = existing_id(target_session, chat_messages, old_id)
331
+ if existing:
332
+ id_map[old_id] = existing
333
+ stats.add("chat_messages", "reused")
334
+ continue
335
+
336
+ new_id = available_id(target_session, chat_messages, old_id)
337
+ target_session.execute(
338
+ chat_messages.insert().values(
339
+ id=new_id,
340
+ user_id=new_user_id,
341
+ document_id=new_document_id,
342
+ role=row["role"],
343
+ content=row["content"],
344
+ sources_json=row.get("sources_json"),
345
+ created_at=row.get("created_at") or datetime.now(timezone.utc),
346
+ )
347
+ )
348
+ id_map[old_id] = new_id
349
+ stats.add("chat_messages", "inserted")
350
+
351
+ return id_map
352
+
353
+
354
+ def copy_shared_messages(
355
+ source_session: Session,
356
+ target_session: Session,
357
+ source_table: Table | None,
358
+ message_id_map: dict[str, str],
359
+ stats: MigrationStats,
360
+ ) -> None:
361
+ if source_table is None:
362
+ return
363
+
364
+ for row in fetch_rows(source_session, source_table):
365
+ old_id = str(row.get("id"))
366
+ new_message_id = message_id_map.get(str(row.get("message_id")))
367
+ if not new_message_id:
368
+ stats.add("shared_messages", "skipped")
369
+ continue
370
+
371
+ existing = (
372
+ existing_id(target_session, shared_messages, old_id)
373
+ or target_session.execute(
374
+ select(shared_messages.c.id).where(shared_messages.c.message_id == new_message_id)
375
+ ).scalar_one_or_none()
376
+ )
377
+ if existing:
378
+ stats.add("shared_messages", "reused")
379
+ continue
380
+
381
+ target_session.execute(
382
+ shared_messages.insert().values(
383
+ id=available_id(target_session, shared_messages, old_id),
384
+ message_id=new_message_id,
385
+ created_at=row.get("created_at") or datetime.now(timezone.utc),
386
+ )
387
+ )
388
+ stats.add("shared_messages", "inserted")
389
+
390
+
391
+ def migrate(
392
+ sqlite_url: str,
393
+ postgres_url: str,
394
+ create_tables: bool,
395
+ dry_run: bool,
396
+ ) -> MigrationStats:
397
+ source_engine = make_engine(sqlite_url)
398
+ target_engine = make_engine(normalize_postgres_url(postgres_url))
399
+
400
+ if create_tables:
401
+ metadata.create_all(target_engine)
402
+
403
+ source_session = make_session(source_engine)
404
+ target_session = make_session(target_engine)
405
+ stats = MigrationStats()
406
+
407
+ try:
408
+ current_users = reflected_table(source_engine, "users")
409
+ legacy_users = reflected_table(source_engine, "user")
410
+ source_users = current_users if current_users is not None else legacy_users
411
+ if source_users is None:
412
+ raise RuntimeError("No users table found. Expected 'users' or legacy 'user'.")
413
+
414
+ user_id_map = copy_users(source_session, target_session, source_users, stats)
415
+ copy_api_keys(source_session, target_session, reflected_table(source_engine, "api_keys"), user_id_map, stats)
416
+ document_id_map = copy_documents(
417
+ source_session,
418
+ target_session,
419
+ reflected_table(source_engine, "documents"),
420
+ user_id_map,
421
+ stats,
422
+ )
423
+ message_id_map = copy_chat_messages(
424
+ source_session,
425
+ target_session,
426
+ reflected_table(source_engine, "chat_messages"),
427
+ user_id_map,
428
+ document_id_map,
429
+ stats,
430
+ )
431
+ copy_shared_messages(
432
+ source_session,
433
+ target_session,
434
+ reflected_table(source_engine, "shared_messages"),
435
+ message_id_map,
436
+ stats,
437
+ )
438
+
439
+ if dry_run:
440
+ target_session.rollback()
441
+ LOGGER.info("Dry run complete; rolled back target transaction.")
442
+ else:
443
+ target_session.commit()
444
+ LOGGER.info("Migration committed.")
445
+
446
+ return stats
447
+ except IntegrityError:
448
+ target_session.rollback()
449
+ LOGGER.exception("Migration failed because the target database rejected a row.")
450
+ raise
451
+ except Exception:
452
+ target_session.rollback()
453
+ LOGGER.exception("Migration failed; rolled back target transaction.")
454
+ raise
455
+ finally:
456
+ source_session.close()
457
+ target_session.close()
458
+ source_engine.dispose()
459
+ target_engine.dispose()
460
+
461
+
462
+ def parse_args() -> argparse.Namespace:
463
+ parser = argparse.ArgumentParser(description="Migrate SQLite users/documents/chat history to Supabase Postgres.")
464
+ parser.add_argument(
465
+ "--sqlite-path",
466
+ default="instance/users.db",
467
+ help="Path to the SQLite database file. Defaults to instance/users.db.",
468
+ )
469
+ parser.add_argument(
470
+ "--sqlite-url",
471
+ help="Full SQLite SQLAlchemy URL. Overrides --sqlite-path.",
472
+ )
473
+ parser.add_argument(
474
+ "--postgres-url",
475
+ default=os.getenv("SUPABASE_DB_URL") or os.getenv("POSTGRES_DATABASE_URL") or os.getenv("DATABASE_URL"),
476
+ help="Supabase/Postgres SQLAlchemy URL. Also read from SUPABASE_DB_URL, POSTGRES_DATABASE_URL, or DATABASE_URL.",
477
+ )
478
+ parser.add_argument(
479
+ "--no-create-tables",
480
+ action="store_true",
481
+ help="Do not create missing target tables before migrating.",
482
+ )
483
+ parser.add_argument(
484
+ "--dry-run",
485
+ action="store_true",
486
+ help="Run the migration and roll back the target transaction.",
487
+ )
488
+ parser.add_argument("--verbose", action="store_true", help="Enable debug logging.")
489
+ return parser.parse_args()
490
+
491
+
492
+ def main() -> int:
493
+ args = parse_args()
494
+ logging.basicConfig(
495
+ level=logging.DEBUG if args.verbose else logging.INFO,
496
+ format="%(levelname)s %(message)s",
497
+ )
498
+
499
+ postgres_url = args.postgres_url
500
+ if not postgres_url or postgres_url.startswith("sqlite"):
501
+ LOGGER.error("Provide a Supabase/Postgres URL with --postgres-url or SUPABASE_DB_URL.")
502
+ return 2
503
+
504
+ sqlite_url = args.sqlite_url or sqlite_url_from_path(args.sqlite_path)
505
+ stats = migrate(
506
+ sqlite_url=sqlite_url,
507
+ postgres_url=postgres_url,
508
+ create_tables=not args.no_create_tables,
509
+ dry_run=args.dry_run,
510
+ )
511
+
512
+ for table_name in sorted(set(stats.inserted) | set(stats.reused) | set(stats.skipped)):
513
+ LOGGER.info(
514
+ "%s: inserted=%s reused=%s skipped=%s",
515
+ table_name,
516
+ stats.inserted.get(table_name, 0),
517
+ stats.reused.get(table_name, 0),
518
+ stats.skipped.get(table_name, 0),
519
+ )
520
+ return 0
521
+
522
+
523
+ if __name__ == "__main__":
524
+ sys.exit(main())
backend/tests/test_auth.py CHANGED
@@ -115,3 +115,10 @@ def test_hf_token_appears_in_user_response(client, auth_headers, user, db_sessio
115
  me_resp = client.get("/api/v1/auth/me", headers=auth_headers)
116
  assert me_resp.status_code == 200
117
  assert me_resp.json()["hf_token"] == "hf_persist_token"
 
 
 
 
 
 
 
 
115
  me_resp = client.get("/api/v1/auth/me", headers=auth_headers)
116
  assert me_resp.status_code == 200
117
  assert me_resp.json()["hf_token"] == "hf_persist_token"
118
+
119
+ # Verify encryption at rest in the database directly
120
+ from sqlalchemy import text
121
+ row = db_session.execute(text("SELECT hf_token FROM users WHERE id = :id"), {"id": user.id}).fetchone()
122
+ stored_token = row[0]
123
+ assert stored_token is not None
124
+ assert stored_token != "hf_persist_token"
backend/tests/test_chat.py CHANGED
@@ -1,7 +1,7 @@
1
  def test_chat_ask_success(client, auth_headers, ready_document, monkeypatch):
2
  monkeypatch.setattr(
3
  "app.routes.chat.generate_answer",
4
- lambda question, user_id, document_id=None: {
5
  "answer": "Mocked answer",
6
  "sources": [
7
  {
@@ -48,3 +48,34 @@ def test_chat_ask_document_not_ready(client, auth_headers, pending_document):
48
 
49
  assert response.status_code == 400
50
  assert "Document is still pending" in response.json()["detail"]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  def test_chat_ask_success(client, auth_headers, ready_document, monkeypatch):
2
  monkeypatch.setattr(
3
  "app.routes.chat.generate_answer",
4
+ lambda question, user_id, document_id=None, **kwargs: {
5
  "answer": "Mocked answer",
6
  "sources": [
7
  {
 
48
 
49
  assert response.status_code == 400
50
  assert "Document is still pending" in response.json()["detail"]
51
+
52
+
53
+ def test_agent_dynamic_token(monkeypatch):
54
+ from app.rag.agent import generate_answer
55
+ import app.rag.agent
56
+
57
+ called_with_token = None
58
+
59
+ class MockInferenceClient:
60
+ def __init__(self, token=None, **kwargs):
61
+ nonlocal called_with_token
62
+ called_with_token = token
63
+
64
+ def chat_completion(self, *args, **kwargs):
65
+ class MockResponse:
66
+ choices = []
67
+ return MockResponse()
68
+
69
+ # Mock the InferenceClient in app.rag.agent
70
+ monkeypatch.setattr(app.rag.agent, "InferenceClient", MockInferenceClient)
71
+ # Mock retrieval to return empty chunks
72
+ monkeypatch.setattr("app.rag.agent.retrieve", lambda **kwargs: [])
73
+
74
+ # Test with custom token
75
+ generate_answer(question="hello?", user_id="some-user", hf_token="my-custom-hf-token")
76
+ assert called_with_token == "my-custom-hf-token"
77
+
78
+ # Test with None (should fallback to global token in config)
79
+ generate_answer(question="hello?", user_id="some-user", hf_token=None)
80
+ from app.config import get_settings
81
+ assert called_with_token == get_settings().HF_TOKEN
backend/tests/test_retriever.py ADDED
@@ -0,0 +1,77 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from app.rag import retriever
2
+
3
+
4
+ def test_transform_query_includes_original_and_dedupes(monkeypatch):
5
+ monkeypatch.setattr(
6
+ retriever,
7
+ "_generate_query_variants",
8
+ lambda _query: [
9
+ "How do taxes work?",
10
+ "how do taxes work?",
11
+ "How does healthcare work?",
12
+ "healthcare overview",
13
+ ],
14
+ )
15
+
16
+ queries = retriever.transform_query("How do taxes and healthcare work?")
17
+
18
+ assert queries == [
19
+ "How do taxes and healthcare work?",
20
+ "How do taxes work?",
21
+ "How does healthcare work?",
22
+ "healthcare overview",
23
+ ]
24
+
25
+
26
+ def test_retrieve_fans_out_transformed_queries_and_merges_duplicates(monkeypatch):
27
+ searched_queries = []
28
+
29
+ monkeypatch.setattr(retriever, "transform_query", lambda _query: ["taxes", "healthcare"])
30
+ monkeypatch.setattr(retriever, "embed_query", lambda query: f"embedding:{query}")
31
+ monkeypatch.setattr(retriever, "get_reranker", lambda: None)
32
+
33
+ def fake_query_chunks(query_embedding, user_id, document_id=None, top_k=10):
34
+ searched_queries.append(query_embedding)
35
+ if query_embedding == "embedding:taxes":
36
+ return [
37
+ {
38
+ "id": "shared",
39
+ "text": "Shared chunk",
40
+ "filename": "policy.pdf",
41
+ "page": 1,
42
+ "score": 0.2,
43
+ },
44
+ {
45
+ "id": "taxes",
46
+ "text": "Tax chunk",
47
+ "filename": "policy.pdf",
48
+ "page": 2,
49
+ "score": 0.7,
50
+ },
51
+ ]
52
+
53
+ return [
54
+ {
55
+ "id": "shared",
56
+ "text": "Shared chunk",
57
+ "filename": "policy.pdf",
58
+ "page": 1,
59
+ "score": 0.9,
60
+ },
61
+ {
62
+ "id": "healthcare",
63
+ "text": "Healthcare chunk",
64
+ "filename": "policy.pdf",
65
+ "page": 3,
66
+ "score": 0.8,
67
+ },
68
+ ]
69
+
70
+ monkeypatch.setattr(retriever, "query_chunks", fake_query_chunks)
71
+
72
+ chunks = retriever.retrieve("How do taxes and healthcare work?", user_id="user-1")
73
+
74
+ assert searched_queries == ["embedding:taxes", "embedding:healthcare"]
75
+ assert [chunk["id"] for chunk in chunks] == ["shared", "healthcare", "taxes"]
76
+ assert chunks[0]["score"] == 0.9
77
+ assert chunks[0]["confidence"] == 100.0
frontend/package-lock.json CHANGED
@@ -9,6 +9,7 @@
9
  "version": "0.1.0",
10
  "dependencies": {
11
  "@base-ui/react": "^1.4.1",
 
12
  "class-variance-authority": "^0.7.1",
13
  "clsx": "^2.1.1",
14
  "i18next": "^26.3.0",
@@ -2532,6 +2533,31 @@
2532
  "tailwindcss": "4.2.2"
2533
  }
2534
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2535
  "node_modules/@ts-morph/common": {
2536
  "version": "0.27.0",
2537
  "resolved": "https://registry.npmjs.org/@ts-morph/common/-/common-0.27.0.tgz",
@@ -10845,7 +10871,6 @@
10845
  "version": "4.2.2",
10846
  "resolved": "https://registry.npmjs.org/tailwindcss/-/tailwindcss-4.2.2.tgz",
10847
  "integrity": "sha512-KWBIxs1Xb6NoLdMVqhbhgwZf2PGBpPEiwOqgI4pFIYbNTfBXiKYyWoTsXgBQ9WFg/OlhnvHaY+AEpW7wSmFo2Q==",
10848
- "dev": true,
10849
  "license": "MIT"
10850
  },
10851
  "node_modules/tapable": {
 
9
  "version": "0.1.0",
10
  "dependencies": {
11
  "@base-ui/react": "^1.4.1",
12
+ "@tailwindcss/typography": "^0.5.19",
13
  "class-variance-authority": "^0.7.1",
14
  "clsx": "^2.1.1",
15
  "i18next": "^26.3.0",
 
2533
  "tailwindcss": "4.2.2"
2534
  }
2535
  },
2536
+ "node_modules/@tailwindcss/typography": {
2537
+ "version": "0.5.19",
2538
+ "resolved": "https://registry.npmjs.org/@tailwindcss/typography/-/typography-0.5.19.tgz",
2539
+ "integrity": "sha512-w31dd8HOx3k9vPtcQh5QHP9GwKcgbMp87j58qi6xgiBnFFtKEAgCWnDw4qUT8aHwkCp8bKvb/KGKWWHedP0AAg==",
2540
+ "license": "MIT",
2541
+ "dependencies": {
2542
+ "postcss-selector-parser": "6.0.10"
2543
+ },
2544
+ "peerDependencies": {
2545
+ "tailwindcss": ">=3.0.0 || insiders || >=4.0.0-alpha.20 || >=4.0.0-beta.1"
2546
+ }
2547
+ },
2548
+ "node_modules/@tailwindcss/typography/node_modules/postcss-selector-parser": {
2549
+ "version": "6.0.10",
2550
+ "resolved": "https://registry.npmjs.org/postcss-selector-parser/-/postcss-selector-parser-6.0.10.tgz",
2551
+ "integrity": "sha512-IQ7TZdoaqbT+LCpShg46jnZVlhWD2w6iQYAcYXfHARZ7X1t/UGhhceQDs5X0cGqKvYlHNOuv7Oa1xmb0oQuA3w==",
2552
+ "license": "MIT",
2553
+ "dependencies": {
2554
+ "cssesc": "^3.0.0",
2555
+ "util-deprecate": "^1.0.2"
2556
+ },
2557
+ "engines": {
2558
+ "node": ">=4"
2559
+ }
2560
+ },
2561
  "node_modules/@ts-morph/common": {
2562
  "version": "0.27.0",
2563
  "resolved": "https://registry.npmjs.org/@ts-morph/common/-/common-0.27.0.tgz",
 
10871
  "version": "4.2.2",
10872
  "resolved": "https://registry.npmjs.org/tailwindcss/-/tailwindcss-4.2.2.tgz",
10873
  "integrity": "sha512-KWBIxs1Xb6NoLdMVqhbhgwZf2PGBpPEiwOqgI4pFIYbNTfBXiKYyWoTsXgBQ9WFg/OlhnvHaY+AEpW7wSmFo2Q==",
 
10874
  "license": "MIT"
10875
  },
10876
  "node_modules/tapable": {
frontend/package.json CHANGED
@@ -12,6 +12,7 @@
12
  },
13
  "dependencies": {
14
  "@base-ui/react": "^1.4.1",
 
15
  "class-variance-authority": "^0.7.1",
16
  "clsx": "^2.1.1",
17
  "i18next": "^26.3.0",
 
12
  },
13
  "dependencies": {
14
  "@base-ui/react": "^1.4.1",
15
+ "@tailwindcss/typography": "^0.5.19",
16
  "class-variance-authority": "^0.7.1",
17
  "clsx": "^2.1.1",
18
  "i18next": "^26.3.0",
frontend/src/app/dashboard/page.tsx CHANGED
@@ -3,17 +3,41 @@
3
  import { useEffect, useState, useCallback } from "react";
4
  import { useRouter } from "next/navigation";
5
  import { useAuth } from "@/lib/auth";
6
- import {
7
- api,
8
- CONNECTION_ERROR_BANNER_MESSAGE,
9
- CONNECTION_ERROR_MESSAGE,
10
- } from "@/lib/api";
11
-
12
  import Header from "@/components/layout/Header";
13
  import DocumentSidebar from "@/components/document/DocumentSidebar";
14
  import ChatPanel from "@/components/chat/ChatPanel";
15
- import PDFViewer from "@/components/document/PDFViewer";
16
- import { Skeleton } from "@/components/ui/skeleton";
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
17
 
18
  export interface DocInfo {
19
  summary: string;
@@ -27,23 +51,6 @@ export interface DocInfo {
27
  uploaded_at: string;
28
  }
29
 
30
- function DocumentSkeleton() {
31
- return (
32
- <div className="w-72 flex-shrink-0 border-r border-border/50 p-4 space-y-4">
33
- {[1, 2, 3, 4].map((item) => (
34
- <div
35
- key={item}
36
- className="rounded-lg border border-border/50 p-4 space-y-3"
37
- >
38
- <Skeleton className="h-4 w-[180px]" />
39
- <Skeleton className="h-3 w-[120px]" />
40
- <Skeleton className="h-3 w-[90px]" />
41
- </div>
42
- ))}
43
- </div>
44
- );
45
- }
46
-
47
  export default function DashboardPage() {
48
  const { user, loading } = useAuth();
49
  const router = useRouter();
@@ -54,7 +61,6 @@ export default function DashboardPage() {
54
  const [sidebarOpen, setSidebarOpen] = useState(true);
55
  const [viewerOpen, setViewerOpen] = useState(true);
56
  const [connectionError, setConnectionError] = useState("");
57
- const [documentsLoading, setDocumentsLoading] = useState(true);
58
 
59
  // Auth guard
60
  useEffect(() => {
@@ -76,31 +82,23 @@ export default function DashboardPage() {
76
  // Load documents
77
  const loadDocuments = useCallback(async () => {
78
  try {
79
- setDocumentsLoading(true);
80
-
81
  const data = await api.get<{ documents?: DocInfo[]; items?: DocInfo[] }>(
82
  "/api/v1/documents/"
83
  );
84
-
85
  setDocuments(data?.documents ?? data?.items ?? []);
86
  setConnectionError("");
87
  } catch (err) {
88
- const message =
89
- err instanceof Error ? err.message : CONNECTION_ERROR_MESSAGE;
90
-
91
  setConnectionError(
92
  message === CONNECTION_ERROR_MESSAGE
93
  ? CONNECTION_ERROR_BANNER_MESSAGE
94
  : `⚠️ ${message}`
95
  );
96
- } finally {
97
- setDocumentsLoading(false);
98
  }
99
  }, []);
100
 
101
  useEffect(() => {
102
  if (!user) return;
103
-
104
  void (async () => {
105
  await loadDocuments();
106
  })();
@@ -111,11 +109,9 @@ export default function DashboardPage() {
111
  const hasPending = (documents || []).some(
112
  (d) => d.status === "pending" || d.status === "processing"
113
  );
114
-
115
  if (!hasPending) return;
116
 
117
  const interval = setInterval(loadDocuments, 3000);
118
-
119
  return () => clearInterval(interval);
120
  }, [documents, loadDocuments]);
121
 
@@ -127,6 +123,19 @@ export default function DashboardPage() {
127
  );
128
  }
129
 
 
 
 
 
 
 
 
 
 
 
 
 
 
130
  return (
131
  <div className="h-screen flex flex-col overflow-hidden">
132
  <Header
@@ -134,6 +143,7 @@ export default function DashboardPage() {
134
  onToggleSidebar={() => setSidebarOpen(!sidebarOpen)}
135
  viewerOpen={viewerOpen}
136
  onToggleViewer={() => setViewerOpen(!viewerOpen)}
 
137
  />
138
 
139
  {connectionError && (
@@ -146,49 +156,35 @@ export default function DashboardPage() {
146
  )}
147
 
148
  <div className="flex-1 flex overflow-hidden">
149
- {/* ── Left: Document Sidebar / Skeleton ──────────────── */}
150
- {sidebarOpen &&
151
- (documentsLoading ? (
152
- <DocumentSkeleton />
153
- ) : (
154
- <div className="w-72 flex-shrink-0 border-r border-border/50 overflow-hidden animate-fade-in-up">
155
- <DocumentSidebar
156
- documents={documents}
157
- activeDoc={activeDoc}
158
- onSelectDoc={(doc) => {
159
- setActiveDoc(doc);
160
- setPdfPage(1);
161
- }}
162
- onDocumentsChange={loadDocuments}
163
- />
164
- </div>
165
- ))}
166
-
167
- {/* ── Center: Chat Panel ─────────────────── */}
168
  <div className="flex-1 min-w-0 flex flex-col">
169
  <ChatPanel
170
  activeDoc={activeDoc}
171
  onCitationClick={(page) => {
172
  setPdfPage(page);
173
-
174
  if (!viewerOpen) setViewerOpen(true);
175
  }}
176
  />
177
  </div>
178
 
179
- {/* ── Right: PDF Viewer ──────────────────── */}
180
- {viewerOpen &&
181
- activeDoc &&
182
- activeDoc.original_name.endsWith(".pdf") && (
183
- <div className="w-[480px] flex-shrink-0 border-l border-border/50 overflow-hidden animate-fade-in-up">
184
- <PDFViewer
185
- documentId={activeDoc.id}
186
- currentPage={pdfPage}
187
- onPageChange={setPdfPage}
188
- totalPages={activeDoc.page_count}
189
- />
190
- </div>
191
- )}
192
  </div>
193
  </div>
194
  );
 
3
  import { useEffect, useState, useCallback } from "react";
4
  import { useRouter } from "next/navigation";
5
  import { useAuth } from "@/lib/auth";
6
+ import { api, CONNECTION_ERROR_BANNER_MESSAGE, CONNECTION_ERROR_MESSAGE } from "@/lib/api";
 
 
 
 
 
7
  import Header from "@/components/layout/Header";
8
  import DocumentSidebar from "@/components/document/DocumentSidebar";
9
  import ChatPanel from "@/components/chat/ChatPanel";
10
+
11
+ function PDFViewerSkeleton() {
12
+ return (
13
+ <div
14
+ className="h-full flex flex-col bg-background"
15
+ aria-busy="true"
16
+ aria-label="Loading PDF viewer"
17
+ >
18
+ <div className="flex items-center justify-between px-3 py-2 border-b border-border/50 bg-card/50 shrink-0">
19
+ <div className="flex items-center gap-2">
20
+ <div className="h-7 w-7 rounded-md bg-muted/70 animate-pulse" />
21
+ <div className="h-7 w-20 rounded-md bg-muted/70 animate-pulse" />
22
+ <div className="h-7 w-7 rounded-md bg-muted/70 animate-pulse" />
23
+ </div>
24
+ <div className="flex items-center gap-2">
25
+ <div className="h-7 w-7 rounded-md bg-muted/70 animate-pulse" />
26
+ <div className="h-4 w-10 rounded bg-muted/70 animate-pulse" />
27
+ <div className="h-7 w-7 rounded-md bg-muted/70 animate-pulse" />
28
+ </div>
29
+ </div>
30
+ <div className="flex-1 p-4">
31
+ <div className="h-full rounded-lg border border-border/50 bg-muted/40 animate-pulse" />
32
+ </div>
33
+ </div>
34
+ );
35
+ }
36
+
37
+ const PDFViewer = dynamic(() => import("@/components/document/PDFViewer"), {
38
+ ssr: false,
39
+ loading: () => <PDFViewerSkeleton />,
40
+ });
41
 
42
  export interface DocInfo {
43
  summary: string;
 
51
  uploaded_at: string;
52
  }
53
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
54
  export default function DashboardPage() {
55
  const { user, loading } = useAuth();
56
  const router = useRouter();
 
61
  const [sidebarOpen, setSidebarOpen] = useState(true);
62
  const [viewerOpen, setViewerOpen] = useState(true);
63
  const [connectionError, setConnectionError] = useState("");
 
64
 
65
  // Auth guard
66
  useEffect(() => {
 
82
  // Load documents
83
  const loadDocuments = useCallback(async () => {
84
  try {
 
 
85
  const data = await api.get<{ documents?: DocInfo[]; items?: DocInfo[] }>(
86
  "/api/v1/documents/"
87
  );
 
88
  setDocuments(data?.documents ?? data?.items ?? []);
89
  setConnectionError("");
90
  } catch (err) {
91
+ const message = err instanceof Error ? err.message : CONNECTION_ERROR_MESSAGE;
 
 
92
  setConnectionError(
93
  message === CONNECTION_ERROR_MESSAGE
94
  ? CONNECTION_ERROR_BANNER_MESSAGE
95
  : `⚠️ ${message}`
96
  );
 
 
97
  }
98
  }, []);
99
 
100
  useEffect(() => {
101
  if (!user) return;
 
102
  void (async () => {
103
  await loadDocuments();
104
  })();
 
109
  const hasPending = (documents || []).some(
110
  (d) => d.status === "pending" || d.status === "processing"
111
  );
 
112
  if (!hasPending) return;
113
 
114
  const interval = setInterval(loadDocuments, 3000);
 
115
  return () => clearInterval(interval);
116
  }, [documents, loadDocuments]);
117
 
 
123
  );
124
  }
125
 
126
+ // Shared sidebar content β€” used by both desktop panel and mobile sheet
127
+ const sidebarContent = (
128
+ <DocumentSidebar
129
+ documents={documents}
130
+ activeDoc={activeDoc}
131
+ onSelectDoc={(doc) => {
132
+ setActiveDoc(doc);
133
+ setPdfPage(1);
134
+ }}
135
+ onDocumentsChange={loadDocuments}
136
+ />
137
+ );
138
+
139
  return (
140
  <div className="h-screen flex flex-col overflow-hidden">
141
  <Header
 
143
  onToggleSidebar={() => setSidebarOpen(!sidebarOpen)}
144
  viewerOpen={viewerOpen}
145
  onToggleViewer={() => setViewerOpen(!viewerOpen)}
146
+ mobileSheetContent={sidebarContent}
147
  />
148
 
149
  {connectionError && (
 
156
  )}
157
 
158
  <div className="flex-1 flex overflow-hidden">
159
+ {/* ── Left: Document Sidebar β€” desktop only (md+) ─────────── */}
160
+ {sidebarOpen && (
161
+ <div className="hidden md:block w-72 flex-shrink-0 border-r border-border/50 overflow-hidden animate-fade-in-up">
162
+ {sidebarContent}
163
+ </div>
164
+ )}
165
+
166
+ {/* ── Center: Chat Panel ──────────────────────────────────── */}
 
 
 
 
 
 
 
 
 
 
 
167
  <div className="flex-1 min-w-0 flex flex-col">
168
  <ChatPanel
169
  activeDoc={activeDoc}
170
  onCitationClick={(page) => {
171
  setPdfPage(page);
 
172
  if (!viewerOpen) setViewerOpen(true);
173
  }}
174
  />
175
  </div>
176
 
177
+ {/* ── Right: PDF Viewer β€” hidden on mobile ────────────────── */}
178
+ {viewerOpen && activeDoc && activeDoc.original_name.endsWith(".pdf") && (
179
+ <div className="hidden md:block w-[480px] flex-shrink-0 border-l border-border/50 overflow-hidden animate-fade-in-up">
180
+ <PDFViewer
181
+ documentId={activeDoc.id}
182
+ currentPage={pdfPage}
183
+ onPageChange={setPdfPage}
184
+ totalPages={activeDoc.page_count}
185
+ />
186
+ </div>
187
+ )}
 
 
188
  </div>
189
  </div>
190
  );
frontend/src/app/globals.css CHANGED
@@ -1,6 +1,7 @@
1
  @import "tailwindcss";
2
  @import "tw-animate-css";
3
  @import "shadcn/tailwind.css";
 
4
 
5
  @custom-variant dark (&:is(.dark *));
6
 
 
1
  @import "tailwindcss";
2
  @import "tw-animate-css";
3
  @import "shadcn/tailwind.css";
4
+ @plugin "@tailwindcss/typography";
5
 
6
  @custom-variant dark (&:is(.dark *));
7
 
frontend/src/app/page.tsx CHANGED
@@ -128,8 +128,18 @@ export default function HomePage() {
128
  </div>
129
 
130
  {/* ── Footer ──────────────────────────────────── */}
131
- <footer className="text-center py-6 text-xs text-muted-foreground border-t border-border/50">
132
- Built with FastAPI β€’ LangChain β€’ ChromaDB β€’ HuggingFace β€’ Next.js
 
 
 
 
 
 
 
 
 
 
133
  </footer>
134
 
135
  {/* Hall of Fame Modal */}
 
128
  </div>
129
 
130
  {/* ── Footer ──────────────────────────────────── */}
131
+ <footer className="py-8 text-xs text-muted-foreground border-t border-border/50">
132
+ <div className="max-w-4xl mx-auto px-6 flex flex-col sm:flex-row items-center justify-between gap-4">
133
+ <span>Built with FastAPI β€’ LangChain β€’ ChromaDB β€’ HuggingFace β€’ Next.js</span>
134
+ <div className="flex items-center gap-4">
135
+ <Link href="/privacy" className="hover:text-foreground transition-colors">
136
+ Privacy Policy
137
+ </Link>
138
+ <Link href="/terms" className="hover:text-foreground transition-colors">
139
+ Terms of Service
140
+ </Link>
141
+ </div>
142
+ </div>
143
  </footer>
144
 
145
  {/* Hall of Fame Modal */}
frontend/src/app/privacy/page.tsx ADDED
@@ -0,0 +1,450 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import type { Metadata } from "next";
2
+ import Link from "next/link";
3
+ import { ArrowLeft, Shield, Brain, FileText, Database, Cookie, UserCheck, Mail } from "lucide-react";
4
+
5
+ export const metadata: Metadata = {
6
+ title: "Privacy Policy β€” Document AI Analyst",
7
+ description:
8
+ "How PDF-Assistant-RAG collects, uses, and protects your data. Learn about our privacy practices for document uploads, AI processing, and account information.",
9
+ openGraph: {
10
+ title: "Privacy Policy β€” Document AI Analyst",
11
+ description:
12
+ "How PDF-Assistant-RAG collects, uses, and protects your data.",
13
+ },
14
+ };
15
+
16
+ const sections = [
17
+ {
18
+ id: "information-we-collect",
19
+ icon: FileText,
20
+ title: "1. Information We Collect",
21
+ content: (
22
+ <>
23
+ <p>
24
+ When you use PDF-Assistant-RAG, we collect the following categories of information
25
+ to provide and improve our service:
26
+ </p>
27
+ <h3>Account Information</h3>
28
+ <ul>
29
+ <li>
30
+ <strong>Registration data:</strong> username, email address, and a securely hashed
31
+ password when you create an account.
32
+ </li>
33
+ <li>
34
+ <strong>Profile information:</strong> any optional details you choose to provide.
35
+ </li>
36
+ </ul>
37
+ <h3>Document Data</h3>
38
+ <ul>
39
+ <li>
40
+ <strong>Uploaded files:</strong> PDFs, DOCX, TXT, Markdown, and other documents you
41
+ upload for analysis.
42
+ </li>
43
+ <li>
44
+ <strong>Extracted content:</strong> text, embeddings, and metadata extracted from your
45
+ documents to enable semantic search and AI-powered question answering.
46
+ </li>
47
+ <li>
48
+ <strong>Chat history:</strong> questions you ask and the AI-generated responses, stored
49
+ to maintain conversation context.
50
+ </li>
51
+ </ul>
52
+ <h3>Usage Data</h3>
53
+ <ul>
54
+ <li>
55
+ <strong>Technical metadata:</strong> page views, feature interactions, query timestamps,
56
+ and performance metrics to improve the platform.
57
+ </li>
58
+ <li>
59
+ <strong>Device &amp; browser info:</strong> browser type, operating system, and basic
60
+ device information for compatibility optimization.
61
+ </li>
62
+ </ul>
63
+ </>
64
+ ),
65
+ },
66
+ {
67
+ id: "how-we-use-data",
68
+ icon: Brain,
69
+ title: "2. How We Use Your Data",
70
+ content: (
71
+ <>
72
+ <p>Your data is used solely for the core functionality of the platform:</p>
73
+ <ul>
74
+ <li>
75
+ <strong>AI-powered document analysis:</strong> Your documents are processed by
76
+ open-source large language models (LLMs) hosted on HuggingFace to generate insights,
77
+ summaries, and answers to your questions.
78
+ </li>
79
+ <li>
80
+ <strong>Semantic search &amp; retrieval:</strong> Document embeddings are stored in
81
+ vector databases (ChromaDB) to enable fast, accurate retrieval of relevant content.
82
+ </li>
83
+ <li>
84
+ <strong>Conversation continuity:</strong> Chat history is stored per session so you
85
+ can refer back to previous interactions.
86
+ </li>
87
+ <li>
88
+ <strong>Service improvement:</strong> Aggregated, anonymized usage patterns help us
89
+ identify bugs, optimize performance, and prioritize features.
90
+ </li>
91
+ </ul>
92
+ <p>
93
+ We <strong>do not</strong> use your uploaded documents or chat data to train or fine-tune
94
+ any AI models. Your content remains private to your account.
95
+ </p>
96
+ </>
97
+ ),
98
+ },
99
+ {
100
+ id: "data-storage-security",
101
+ icon: Shield,
102
+ title: "3. Data Storage &amp; Security",
103
+ content: (
104
+ <>
105
+ <p>We take data protection seriously and implement multiple layers of security:</p>
106
+ <h3>Encryption</h3>
107
+ <ul>
108
+ <li>
109
+ <strong>In transit:</strong> All communications between your browser and our servers
110
+ are encrypted using TLS 1.3.
111
+ </li>
112
+ <li>
113
+ <strong>At rest:</strong> Document files, embeddings, and user data are stored in
114
+ encrypted storage volumes.
115
+ </li>
116
+ <li>
117
+ <strong>Passwords:</strong> Never stored in plain text β€” we use bcrypt hashing with
118
+ per-user salts.
119
+ </li>
120
+ </ul>
121
+ <h3>Data Isolation</h3>
122
+ <ul>
123
+ <li>
124
+ Each user&apos;s documents and embeddings are stored in isolated vector collections.
125
+ </li>
126
+ <li>
127
+ Authentication is enforced at every API endpoint β€” users can only access their own
128
+ data.
129
+ </li>
130
+ <li>
131
+ JWT tokens with short expiration and refresh token rotation prevent unauthorized
132
+ access.
133
+ </li>
134
+ </ul>
135
+ <h3>Infrastructure</h3>
136
+ <ul>
137
+ <li>
138
+ Servers are hosted on secure cloud infrastructure with strict access controls.
139
+ </li>
140
+ <li>
141
+ Regular security audits and dependency updates are performed.
142
+ </li>
143
+ </ul>
144
+ </>
145
+ ),
146
+ },
147
+ {
148
+ id: "data-retention",
149
+ icon: Database,
150
+ title: "4. Data Retention",
151
+ content: (
152
+ <>
153
+ <p>We retain your data only as long as necessary to provide the service:</p>
154
+ <ul>
155
+ <li>
156
+ <strong>Account data:</strong> Retained until you delete your account. You can request
157
+ account deletion at any time.
158
+ </li>
159
+ <li>
160
+ <strong>Uploaded documents &amp; embeddings:</strong> Retained until you delete them
161
+ or close your account. Documents can be removed individually from the dashboard.
162
+ </li>
163
+ <li>
164
+ <strong>Chat history:</strong> Retained per conversation. You can clear individual
165
+ chats or your entire history from the settings page.
166
+ </li>
167
+ <li>
168
+ <strong>Logs &amp; analytics:</strong> Aggregated usage data may be retained longer
169
+ in anonymized form for service improvement.
170
+ </li>
171
+ </ul>
172
+ <p>
173
+ When you delete your account, all associated documents, embeddings, chat histories, and
174
+ personal information are permanently deleted within 30 days.
175
+ </p>
176
+ </>
177
+ ),
178
+ },
179
+ {
180
+ id: "third-party-services",
181
+ icon: Database,
182
+ title: "5. Third-Party Services",
183
+ content: (
184
+ <>
185
+ <p>
186
+ PDF-Assistant-RAG integrates with the following third-party services to deliver its
187
+ functionality:
188
+ </p>
189
+ <ul>
190
+ <li>
191
+ <strong>HuggingFace Inference API:</strong> Used to run open-source LLMs for document
192
+ analysis. Document snippets may be sent to HuggingFace for inference; they are not
193
+ stored or used for training. See{" "}
194
+ <a
195
+ href="https://huggingface.co/privacy"
196
+ target="_blank"
197
+ rel="noopener noreferrer"
198
+ >
199
+ HuggingFace&apos;s Privacy Policy
200
+ </a>.
201
+ </li>
202
+ <li>
203
+ <strong>Google OAuth (optional):</strong> If you choose to sign in with Google, we
204
+ receive only your name and email address from your Google profile. See{" "}
205
+ <a
206
+ href="https://policies.google.com/privacy"
207
+ target="_blank"
208
+ rel="noopener noreferrer"
209
+ >
210
+ Google&apos;s Privacy Policy
211
+ </a>.
212
+ </li>
213
+ </ul>
214
+ <p>
215
+ We do not sell your personal information or document data to any third party.
216
+ </p>
217
+ </>
218
+ ),
219
+ },
220
+ {
221
+ id: "cookies",
222
+ icon: Cookie,
223
+ title: "6. Cookies",
224
+ content: (
225
+ <>
226
+ <p>We use only essential cookies required for the platform to function:</p>
227
+ <ul>
228
+ <li>
229
+ <strong>Authentication cookies:</strong> JWT refresh tokens stored securely as
230
+ HTTP-only cookies to maintain your login session.
231
+ </li>
232
+ <li>
233
+ <strong>Local storage:</strong> Access tokens and UI preferences (theme, language)
234
+ are stored in your browser&apos;s local storage. No tracking or advertising cookies
235
+ are used.
236
+ </li>
237
+ </ul>
238
+ <p>
239
+ You can clear these at any time via your browser settings. Note that clearing
240
+ authentication data will sign you out of your session.
241
+ </p>
242
+ </>
243
+ ),
244
+ },
245
+ {
246
+ id: "your-rights",
247
+ icon: UserCheck,
248
+ title: "7. Your Rights",
249
+ content: (
250
+ <>
251
+ <p>You have the following rights regarding your data:</p>
252
+ <ul>
253
+ <li>
254
+ <strong>Access:</strong> View all documents and data associated with your account at
255
+ any time from your dashboard.
256
+ </li>
257
+ <li>
258
+ <strong>Deletion:</strong> Delete individual documents or your entire account and
259
+ associated data.
260
+ </li>
261
+ <li>
262
+ <strong>Export:</strong> Request a copy of your data in a machine-readable format.
263
+ </li>
264
+ <li>
265
+ <strong>Correction:</strong> Update your account information (username, email) from
266
+ your profile settings.
267
+ </li>
268
+ <li>
269
+ <strong>Withdraw consent:</strong> Stop using the service and delete your account at
270
+ any time.
271
+ </li>
272
+ </ul>
273
+ <p>
274
+ To exercise any of these rights, please contact us using the information in the
275
+ &ldquo;Contact&rdquo; section below.
276
+ </p>
277
+ </>
278
+ ),
279
+ },
280
+ {
281
+ id: "changes",
282
+ icon: Shield,
283
+ title: "8. Changes to This Policy",
284
+ content: (
285
+ <>
286
+ <p>
287
+ We may update this Privacy Policy from time to time. Changes will be communicated by:
288
+ </p>
289
+ <ul>
290
+ <li>Posting the updated policy on this page with a new &ldquo;Last updated&rdquo; date.</li>
291
+ <li>
292
+ Sending a notification to your registered email address for material changes.
293
+ </li>
294
+ </ul>
295
+ <p>
296
+ Your continued use of the platform after changes constitutes acceptance of the updated
297
+ policy. We encourage you to review this page periodically.
298
+ </p>
299
+ </>
300
+ ),
301
+ },
302
+ {
303
+ id: "contact",
304
+ icon: Mail,
305
+ title: "9. Contact Us",
306
+ content: (
307
+ <>
308
+ <p>
309
+ If you have any questions, concerns, or requests regarding this Privacy Policy or your
310
+ data, please reach out through the project&rsquo;s official channels:
311
+ </p>
312
+ <ul>
313
+ <li>
314
+ <strong>GitHub Issues:</strong>{" "}
315
+ <a
316
+ href="https://github.com/param20h/PDF-Assistant-RAG/issues"
317
+ target="_blank"
318
+ rel="noopener noreferrer"
319
+ >
320
+ github.com/param20h/PDF-Assistant-RAG/issues
321
+ </a>
322
+ </li>
323
+ <li>
324
+ <strong>GitHub Discussions:</strong>{" "}
325
+ <a
326
+ href="https://github.com/param20h/PDF-Assistant-RAG/discussions"
327
+ target="_blank"
328
+ rel="noopener noreferrer"
329
+ >
330
+ github.com/param20h/PDF-Assistant-RAG/discussions
331
+ </a>
332
+ </li>
333
+ <li>
334
+ <strong>LinkedIn:</strong>{" "}
335
+ <a
336
+ href="https://www.linkedin.com/in/param20h/"
337
+ target="_blank"
338
+ rel="noopener noreferrer"
339
+ >
340
+ linkedin.com/in/param20h
341
+ </a>
342
+ </li>
343
+ </ul>
344
+ </>
345
+ ),
346
+ },
347
+ ];
348
+
349
+ export default function PrivacyPage() {
350
+ return (
351
+ <div className="min-h-screen bg-background">
352
+ {/* ── Header ────────────────────────────────────── */}
353
+ <header className="sticky top-0 z-50 border-b border-border/50 bg-card/50 backdrop-blur-md">
354
+ <div className="mx-auto max-w-4xl flex items-center justify-between px-6 h-14">
355
+ <Link
356
+ href="/"
357
+ className="flex items-center gap-2 text-sm text-muted-foreground hover:text-foreground transition-colors"
358
+ >
359
+ <ArrowLeft className="w-4 h-4" />
360
+ Back to Home
361
+ </Link>
362
+ <div className="flex items-center gap-2">
363
+ <div className="w-7 h-7 rounded-lg bg-primary/15 flex items-center justify-center">
364
+ <Shield className="w-4 h-4 text-primary" />
365
+ </div>
366
+ <span className="font-semibold text-sm">Privacy Policy</span>
367
+ </div>
368
+ </div>
369
+ </header>
370
+
371
+ {/* ── Hero ──────────────────────────────────────── */}
372
+ <section className="border-b border-border/50">
373
+ <div className="mx-auto max-w-4xl px-6 py-16 sm:py-20 text-center">
374
+ <div className="inline-flex items-center gap-2 px-4 py-1.5 rounded-full bg-primary/10 border border-primary/20 text-sm text-primary mb-6">
375
+ <Shield className="w-4 h-4" />
376
+ Your data matters
377
+ </div>
378
+ <h1 className="text-4xl sm:text-5xl font-bold tracking-tight mb-4">
379
+ Privacy Policy
380
+ </h1>
381
+ <p className="text-lg text-muted-foreground max-w-2xl mx-auto">
382
+ How we collect, use, and protect your data when you use PDF-Assistant-RAG.
383
+ </p>
384
+ <p className="mt-4 text-sm text-muted-foreground">
385
+ <em>Last updated: May 30, 2026</em>
386
+ </p>
387
+ </div>
388
+ </section>
389
+
390
+ {/* ── Content ───────────────────────────────────── */}
391
+ <div className="mx-auto max-w-4xl px-6 py-12 sm:py-16">
392
+ {/* Table of Contents */}
393
+ <nav className="mb-12 p-6 rounded-xl border border-border/50 bg-card/30" aria-label="Table of contents">
394
+ <h2 className="text-sm font-semibold uppercase tracking-wider text-muted-foreground mb-4">
395
+ On this page
396
+ </h2>
397
+ <ul className="space-y-2">
398
+ {sections.map((section) => (
399
+ <li key={section.id}>
400
+ <a
401
+ href={`#${section.id}`}
402
+ className="flex items-center gap-2 text-sm text-muted-foreground hover:text-foreground transition-colors"
403
+ >
404
+ <section.icon className="w-3.5 h-3.5 shrink-0 text-primary" />
405
+ {section.title}
406
+ </a>
407
+ </li>
408
+ ))}
409
+ </ul>
410
+ </nav>
411
+
412
+ {/* Sections */}
413
+ <div className="prose prose-sm sm:prose-base dark:prose-invert max-w-none prose-headings:font-semibold prose-headings:tracking-tight prose-h2:text-foreground prose-h3:text-foreground prose-p:text-muted-foreground prose-p:leading-relaxed prose-a:text-primary prose-a:no-underline hover:prose-a:underline prose-strong:text-foreground prose-li:text-muted-foreground prose-li:marker:text-primary/60">
414
+ {sections.map((section) => (
415
+ <section key={section.id} id={section.id} className="mb-12 scroll-mt-20">
416
+ <div className="flex items-center gap-3 mb-6">
417
+ <div className="w-8 h-8 rounded-lg bg-primary/10 flex items-center justify-center shrink-0">
418
+ <section.icon className="w-4 h-4 text-primary" />
419
+ </div>
420
+ <h2 className="text-xl sm:text-2xl !my-0">{section.title}</h2>
421
+ </div>
422
+ {section.content}
423
+ <hr className="mt-8 border-border/30" />
424
+ </section>
425
+ ))}
426
+ </div>
427
+
428
+ {/* Footer note */}
429
+ <div className="mt-8 text-center">
430
+ <p className="text-sm text-muted-foreground">
431
+ Have questions?{" "}
432
+ <a
433
+ href="https://github.com/param20h/PDF-Assistant-RAG/discussions"
434
+ target="_blank"
435
+ rel="noopener noreferrer"
436
+ className="text-primary hover:underline"
437
+ >
438
+ Start a discussion
439
+ </a>
440
+ </p>
441
+ </div>
442
+ </div>
443
+
444
+ {/* ── Footer ────────────────────────────────── */}
445
+ <footer className="text-center py-6 text-xs text-muted-foreground border-t border-border/50">
446
+ Built with FastAPI β€’ LangChain β€’ ChromaDB β€’ HuggingFace β€’ Next.js
447
+ </footer>
448
+ </div>
449
+ );
450
+ }
frontend/src/app/terms/page.tsx ADDED
@@ -0,0 +1,435 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import type { Metadata } from "next";
2
+ import Link from "next/link";
3
+ import {
4
+ ArrowLeft,
5
+ Shield,
6
+ CheckCircle,
7
+ FileText,
8
+ AlertTriangle,
9
+ UserCheck,
10
+ Scale,
11
+ Ban,
12
+ RefreshCw,
13
+ Mail,
14
+ } from "lucide-react";
15
+
16
+ const sections = [
17
+ {
18
+ id: "acceptance",
19
+ icon: CheckCircle,
20
+ title: "1. Acceptance of Terms",
21
+ content: (
22
+ <>
23
+ <p>
24
+ By accessing or using PDF-Assistant-RAG (&ldquo;the Platform&rdquo;), you agree to be
25
+ bound by these Terms of Service (&ldquo;Terms&rdquo;). If you do not agree to all terms,
26
+ you must not use the Platform.
27
+ </p>
28
+ <p>
29
+ These Terms apply to all visitors, users, and contributors to the Platform. By creating
30
+ an account, uploading documents, or interacting with the service in any way, you signify
31
+ your acceptance of these Terms.
32
+ </p>
33
+ </>
34
+ ),
35
+ },
36
+ {
37
+ id: "service-description",
38
+ icon: FileText,
39
+ title: "2. Description of Service",
40
+ content: (
41
+ <>
42
+ <p>
43
+ PDF-Assistant-RAG is an open-source document analysis platform that allows users to upload
44
+ documents (PDF, DOCX, TXT, Markdown) and interact with them through AI-powered semantic
45
+ search and chat, using Retrieval-Augmented Generation (RAG) and open-source large language
46
+ models (LLMs).
47
+ </p>
48
+ <p>The core features include:</p>
49
+ <ul>
50
+ <li>Document upload, storage, and management</li>
51
+ <li>AI-powered question answering and document analysis</li>
52
+ <li>Semantic search across uploaded documents</li>
53
+ <li>Conversation history and context retention</li>
54
+ <li>Multi-language support (English, Hindi, Spanish, French)</li>
55
+ </ul>
56
+ <p>
57
+ The Platform is provided &ldquo;as is&rdquo; and &ldquo;as available&rdquo; for
58
+ educational and productivity purposes. The maintainers make no guarantees about the
59
+ accuracy, completeness, or reliability of AI-generated responses.
60
+ </p>
61
+ </>
62
+ ),
63
+ },
64
+ {
65
+ id: "accounts",
66
+ icon: UserCheck,
67
+ title: "3. User Accounts &amp; Registration",
68
+ content: (
69
+ <>
70
+ <p>To use certain features of the Platform, you must register for an account:</p>
71
+ <ul>
72
+ <li>
73
+ <strong>Accuracy:</strong> You agree to provide accurate, current, and complete
74
+ information during registration and to update it as necessary.
75
+ </li>
76
+ <li>
77
+ <strong>Security:</strong> You are responsible for safeguarding your password and for
78
+ all activities under your account. Notify the maintainers immediately of any
79
+ unauthorized use.
80
+ </li>
81
+ <li>
82
+ <strong>Account types:</strong> The Platform supports email/password registration and
83
+ optional Google OAuth sign-in.
84
+ </li>
85
+ <li>
86
+ <strong>One account per person:</strong> You may not create multiple accounts for the
87
+ same individual unless explicitly permitted.
88
+ </li>
89
+ <li>
90
+ <strong>No shared accounts:</strong> Account sharing with unauthorized users is
91
+ prohibited.
92
+ </li>
93
+ </ul>
94
+ </>
95
+ ),
96
+ },
97
+ {
98
+ id: "acceptable-use",
99
+ icon: Ban,
100
+ title: "4. Acceptable Use",
101
+ content: (
102
+ <>
103
+ <p>You agree to use the Platform only for lawful purposes and in accordance with these Terms. Prohibited activities include:</p>
104
+ <ul>
105
+ <li>
106
+ Uploading malware, viruses, or any malicious code
107
+ </li>
108
+ <li>
109
+ Uploading illegal, obscene, defamatory, or infringing content
110
+ </li>
111
+ <li>
112
+ Attempting to bypass authentication, access other users&apos; data, or exploit the
113
+ system
114
+ </li>
115
+ <li>
116
+ Using the Platform for automated scraping, data mining, or high-volume API abuse
117
+ </li>
118
+ <li>
119
+ Reverse-engineering, decompiling, or attempting to extract the source code of
120
+ proprietary components
121
+ </li>
122
+ <li>
123
+ Interfering with the operation of the Platform or its underlying infrastructure
124
+ </li>
125
+ </ul>
126
+ </>
127
+ ),
128
+ },
129
+ {
130
+ id: "content-data",
131
+ icon: Shield,
132
+ title: "5. Uploaded Content &amp; Data",
133
+ content: (
134
+ <>
135
+ <p>
136
+ You retain full ownership of all documents and content you upload to the Platform
137
+ (&ldquo;Your Content&rdquo;). By uploading, you grant the Platform a limited, temporary
138
+ license to process, store, and analyze Your Content solely for the purpose of providing
139
+ the service.
140
+ </p>
141
+ <h3>Data Handling</h3>
142
+ <ul>
143
+ <li>
144
+ Your documents are processed by open-source LLMs hosted on HuggingFace. Document
145
+ snippets may be sent for inference but are not stored or used for training.
146
+ </li>
147
+ <li>
148
+ Document embeddings are stored in per-user isolated vector collections (ChromaDB).
149
+ </li>
150
+ <li>
151
+ Chat history is stored per session to maintain conversation context.
152
+ </li>
153
+ </ul>
154
+ <h3>Your Responsibilities</h3>
155
+ <ul>
156
+ <li>
157
+ You represent that you own or have the necessary rights to upload and process Your
158
+ Content.
159
+ </li>
160
+ <li>
161
+ You must not upload documents containing sensitive personal information, trade secrets,
162
+ or classified data unless you have the legal right to do so.
163
+ </li>
164
+ <li>
165
+ You are solely responsible for the legality, reliability, and accuracy of Your Content.
166
+ </li>
167
+ </ul>
168
+ <p>
169
+ See our{" "}
170
+ <Link href="/privacy" className="text-primary hover:underline">
171
+ Privacy Policy
172
+ </Link>{" "}
173
+ for more details on how we handle your data.
174
+ </p>
175
+ </>
176
+ ),
177
+ },
178
+ {
179
+ id: "intellectual-property",
180
+ icon: Scale,
181
+ title: "6. Intellectual Property",
182
+ content: (
183
+ <>
184
+ <p>
185
+ The Platform codebase is open-source and licensed under the{" "}
186
+ <a
187
+ href="https://opensource.org/licenses/MIT"
188
+ target="_blank"
189
+ rel="noopener noreferrer"
190
+ >
191
+ MIT License
192
+ </a>. This means:
193
+ </p>
194
+ <ul>
195
+ <li>
196
+ You may freely use, modify, and distribute the source code, subject to the terms of
197
+ the MIT License.
198
+ </li>
199
+ <li>
200
+ The name &ldquo;PDF-Assistant-RAG,&rdquo; its logo, and branding elements may not be
201
+ used without explicit permission.
202
+ </li>
203
+ <li>
204
+ AI-generated responses produced by the Platform are provided without warranty and
205
+ should not be considered professional advice (legal, financial, medical, etc.).
206
+ </li>
207
+ </ul>
208
+ </>
209
+ ),
210
+ },
211
+ {
212
+ id: "liability",
213
+ icon: AlertTriangle,
214
+ title: "7. Limitation of Liability",
215
+ content: (
216
+ <>
217
+ <p>
218
+ The Platform is provided free of charge as an open-source project. To the fullest extent
219
+ permitted by law:
220
+ </p>
221
+ <ul>
222
+ <li>
223
+ The maintainers shall not be liable for any indirect, incidental, special,
224
+ consequential, or punitive damages arising from your use of the Platform.
225
+ </li>
226
+ <li>
227
+ AI-generated content may contain errors, omissions, or inaccuracies. You should
228
+ independently verify critical information.
229
+ </li>
230
+ <li>
231
+ The Platform makes no guarantees about uptime, availability, or data durability,
232
+ though reasonable efforts are made to maintain the service.
233
+ </li>
234
+ </ul>
235
+ </>
236
+ ),
237
+ },
238
+ {
239
+ id: "termination",
240
+ icon: Ban,
241
+ title: "8. Termination",
242
+ content: (
243
+ <>
244
+ <p>
245
+ We reserve the right to suspend or terminate your access to the Platform at any time,
246
+ without prior notice, for:
247
+ </p>
248
+ <ul>
249
+ <li>Violation of these Terms of Service</li>
250
+ <li>Engaging in prohibited or illegal activities</li>
251
+ <li>Extended inactivity of your account</li>
252
+ <li>At your request via account deletion</li>
253
+ </ul>
254
+ <p>
255
+ Upon termination, your access to documents, chat history, and account data will be
256
+ revoked. You may request a data export before account deletion by contacting the
257
+ maintainers.
258
+ </p>
259
+ </>
260
+ ),
261
+ },
262
+ {
263
+ id: "changes-to-terms",
264
+ icon: RefreshCw,
265
+ title: "9. Changes to These Terms",
266
+ content: (
267
+ <>
268
+ <p>
269
+ We may revise these Terms from time to time. The most current version will always be
270
+ posted on this page. Material changes will be communicated via:
271
+ </p>
272
+ <ul>
273
+ <li>A notice on the Platform dashboard</li>
274
+ <li>Email notification to registered users (for significant changes)</li>
275
+ </ul>
276
+ <p>
277
+ Your continued use of the Platform after changes take effect constitutes acceptance of
278
+ the revised Terms.
279
+ </p>
280
+ </>
281
+ ),
282
+ },
283
+ {
284
+ id: "contact",
285
+ icon: Mail,
286
+ title: "10. Contact Us",
287
+ content: (
288
+ <>
289
+ <p>
290
+ If you have any questions about these Terms, please reach out through the project&rsquo;s
291
+ official channels:
292
+ </p>
293
+ <ul>
294
+ <li>
295
+ <strong>GitHub Issues:</strong>{" "}
296
+ <a
297
+ href="https://github.com/param20h/PDF-Assistant-RAG/issues"
298
+ target="_blank"
299
+ rel="noopener noreferrer"
300
+ >
301
+ github.com/param20h/PDF-Assistant-RAG/issues
302
+ </a>
303
+ </li>
304
+ <li>
305
+ <strong>GitHub Discussions:</strong>{" "}
306
+ <a
307
+ href="https://github.com/param20h/PDF-Assistant-RAG/discussions"
308
+ target="_blank"
309
+ rel="noopener noreferrer"
310
+ >
311
+ github.com/param20h/PDF-Assistant-RAG/discussions
312
+ </a>
313
+ </li>
314
+ <li>
315
+ <strong>LinkedIn:</strong>{" "}
316
+ <a
317
+ href="https://www.linkedin.com/in/param20h/"
318
+ target="_blank"
319
+ rel="noopener noreferrer"
320
+ >
321
+ linkedin.com/in/param20h
322
+ </a>
323
+ </li>
324
+ </ul>
325
+ </>
326
+ ),
327
+ },
328
+ ];
329
+
330
+ export default function TermsPage() {
331
+ return (
332
+ <div className="min-h-screen bg-background">
333
+ {/* ── Header ────────────────────────────────────── */}
334
+ <header className="sticky top-0 z-50 border-b border-border/50 bg-card/50 backdrop-blur-md">
335
+ <div className="mx-auto max-w-4xl flex items-center justify-between px-6 h-14">
336
+ <Link
337
+ href="/"
338
+ className="flex items-center gap-2 text-sm text-muted-foreground hover:text-foreground transition-colors"
339
+ >
340
+ <ArrowLeft className="w-4 h-4" />
341
+ Back to Home
342
+ </Link>
343
+ <div className="flex items-center gap-2">
344
+ <div className="w-7 h-7 rounded-lg bg-primary/15 flex items-center justify-center">
345
+ <Scale className="w-4 h-4 text-primary" />
346
+ </div>
347
+ <span className="font-semibold text-sm">Terms of Service</span>
348
+ </div>
349
+ </div>
350
+ </header>
351
+
352
+ {/* ── Hero ──────────────────────────────────────── */}
353
+ <section className="border-b border-border/50">
354
+ <div className="mx-auto max-w-4xl px-6 py-16 sm:py-20 text-center">
355
+ <div className="inline-flex items-center gap-2 px-4 py-1.5 rounded-full bg-primary/10 border border-primary/20 text-sm text-primary mb-6">
356
+ <Scale className="w-4 h-4" />
357
+ Know your rights
358
+ </div>
359
+ <h1 className="text-4xl sm:text-5xl font-bold tracking-tight mb-4">
360
+ Terms of Service
361
+ </h1>
362
+ <p className="text-lg text-muted-foreground max-w-2xl mx-auto">
363
+ The rules and guidelines for using PDF-Assistant-RAG, our open-source document
364
+ analysis platform.
365
+ </p>
366
+ <p className="mt-4 text-sm text-muted-foreground">
367
+ <em>Last updated: May 30, 2026</em>
368
+ </p>
369
+ </div>
370
+ </section>
371
+
372
+ {/* ── Content ───────────────────────────────────── */}
373
+ <div className="mx-auto max-w-4xl px-6 py-12 sm:py-16">
374
+ {/* Table of Contents */}
375
+ <nav
376
+ className="mb-12 p-6 rounded-xl border border-border/50 bg-card/30"
377
+ aria-label="Table of contents"
378
+ >
379
+ <h2 className="text-sm font-semibold uppercase tracking-wider text-muted-foreground mb-4">
380
+ On this page
381
+ </h2>
382
+ <ul className="space-y-2">
383
+ {sections.map((section) => (
384
+ <li key={section.id}>
385
+ <a
386
+ href={`#${section.id}`}
387
+ className="flex items-center gap-2 text-sm text-muted-foreground hover:text-foreground transition-colors"
388
+ >
389
+ <section.icon className="w-3.5 h-3.5 shrink-0 text-primary" />
390
+ {section.title}
391
+ </a>
392
+ </li>
393
+ ))}
394
+ </ul>
395
+ </nav>
396
+
397
+ {/* Sections */}
398
+ <div className="prose prose-sm sm:prose-base dark:prose-invert max-w-none prose-headings:font-semibold prose-headings:tracking-tight prose-h2:text-foreground prose-h3:text-foreground prose-p:text-muted-foreground prose-p:leading-relaxed prose-a:text-primary prose-a:no-underline hover:prose-a:underline prose-strong:text-foreground prose-li:text-muted-foreground prose-li:marker:text-primary/60">
399
+ {sections.map((section) => (
400
+ <section key={section.id} id={section.id} className="mb-12 scroll-mt-20">
401
+ <div className="flex items-center gap-3 mb-6">
402
+ <div className="w-8 h-8 rounded-lg bg-primary/10 flex items-center justify-center shrink-0">
403
+ <section.icon className="w-4 h-4 text-primary" />
404
+ </div>
405
+ <h2 className="text-xl sm:text-2xl !my-0">{section.title}</h2>
406
+ </div>
407
+ {section.content}
408
+ <hr className="mt-8 border-border/30" />
409
+ </section>
410
+ ))}
411
+ </div>
412
+
413
+ {/* Footer note */}
414
+ <div className="mt-8 text-center">
415
+ <p className="text-sm text-muted-foreground">
416
+ Have questions?{" "}
417
+ <a
418
+ href="https://github.com/param20h/PDF-Assistant-RAG/discussions"
419
+ target="_blank"
420
+ rel="noopener noreferrer"
421
+ className="text-primary hover:underline"
422
+ >
423
+ Start a discussion
424
+ </a>
425
+ </p>
426
+ </div>
427
+ </div>
428
+
429
+ {/* ── Footer ────────────────────────────────── */}
430
+ <footer className="text-center py-6 text-xs text-muted-foreground border-t border-border/50">
431
+ Built with FastAPI &bull; LangChain &bull; ChromaDB &bull; HuggingFace &bull; Next.js
432
+ </footer>
433
+ </div>
434
+ );
435
+ }
frontend/src/components/document/PDFViewer.tsx CHANGED
@@ -3,8 +3,16 @@
3
  import { useState } from "react";
4
  import { Button } from "@/components/ui/button";
5
  import { Input } from "@/components/ui/input";
6
- import { ChevronLeft, ChevronRight, ZoomIn, ZoomOut, Loader2 } from "lucide-react";
7
  import { API_BASE } from "@/lib/api";
 
 
 
 
 
 
 
 
8
 
9
  interface Props {
10
  documentId: string;
@@ -15,15 +23,24 @@ interface Props {
15
 
16
  export default function PDFViewer({ documentId, currentPage, onPageChange, totalPages }: Props) {
17
  const [scale, setScale] = useState(1.0);
18
- const [loading, setLoading] = useState(true);
19
- // Local editable value β€” initialized from currentPage prop.
20
- // The iframe key={documentId-currentPage} already forces remount on
21
- // external page changes, so no useEffect sync is needed.
22
  const [pageInput, setPageInput] = useState(String(currentPage));
 
 
 
 
 
 
 
 
23
  const pdfUrl = `${API_BASE}/api/v1/documents/${documentId}/pdf`;
 
24
 
25
- // Append page fragment for native viewer navigation
26
- const iframeSrc = `${pdfUrl}#page=${currentPage}`;
 
 
 
27
 
28
  const handlePageSubmit = (e: React.FormEvent) => {
29
  e.preventDefault();
@@ -31,12 +48,10 @@ export default function PDFViewer({ documentId, currentPage, onPageChange, total
31
  if (!isNaN(num) && num >= 1 && num <= totalPages) {
32
  onPageChange(num);
33
  } else {
34
- // Reset to the current valid page without needing a useEffect
35
  setPageInput(String(currentPage));
36
  }
37
  };
38
 
39
-
40
  return (
41
  <div className="h-full flex flex-col bg-background">
42
  {/* ── Toolbar ─────────────────────────────────── */}
@@ -46,7 +61,11 @@ export default function PDFViewer({ documentId, currentPage, onPageChange, total
46
  variant="ghost"
47
  size="icon"
48
  className="h-7 w-7"
49
- onClick={() => onPageChange(Math.max(1, currentPage - 1))}
 
 
 
 
50
  disabled={currentPage <= 1}
51
  >
52
  <ChevronLeft className="w-4 h-4" />
@@ -68,7 +87,11 @@ export default function PDFViewer({ documentId, currentPage, onPageChange, total
68
  variant="ghost"
69
  size="icon"
70
  className="h-7 w-7"
71
- onClick={() => onPageChange(Math.min(totalPages, currentPage + 1))}
 
 
 
 
72
  disabled={currentPage >= totalPages}
73
  >
74
  <ChevronRight className="w-4 h-4" />
@@ -99,20 +122,50 @@ export default function PDFViewer({ documentId, currentPage, onPageChange, total
99
  </div>
100
 
101
  {/* ── PDF Render ──────────────────────────────── */}
102
- <div className="flex-1 overflow-auto relative">
103
- {loading && (
104
- <div className="absolute inset-0 flex items-center justify-center bg-background/80 z-10">
105
- <Loader2 className="w-6 h-6 animate-spin text-primary" />
106
- </div>
107
- )}
108
- <iframe
109
- key={`${documentId}-${currentPage}`}
110
- src={iframeSrc}
111
- className="w-full h-full border-0"
112
- style={{ transform: `scale(${scale})`, transformOrigin: "top left", width: `${100/scale}%`, height: `${100/scale}%` }}
113
- onLoad={() => setLoading(false)}
114
- title="PDF Viewer"
115
- />
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
116
  </div>
117
  </div>
118
  );
 
3
  import { useState } from "react";
4
  import { Button } from "@/components/ui/button";
5
  import { Input } from "@/components/ui/input";
6
+ import { ChevronLeft, ChevronRight, ZoomIn, ZoomOut, Loader2, AlertCircle } from "lucide-react";
7
  import { API_BASE } from "@/lib/api";
8
+ import { Document, Page, pdfjs } from "react-pdf";
9
+
10
+ // Import styles for react-pdf layers
11
+ import "react-pdf/dist/Page/AnnotationLayer.css";
12
+ import "react-pdf/dist/Page/TextLayer.css";
13
+
14
+ // Configure PDF.js worker using standard unpkg URL
15
+ pdfjs.GlobalWorkerOptions.workerSrc = `//unpkg.com/pdfjs-dist@${pdfjs.version}/build/pdf.worker.min.mjs`;
16
 
17
  interface Props {
18
  documentId: string;
 
23
 
24
  export default function PDFViewer({ documentId, currentPage, onPageChange, totalPages }: Props) {
25
  const [scale, setScale] = useState(1.0);
26
+ const [, setLoading] = useState(true);
 
 
 
27
  const [pageInput, setPageInput] = useState(String(currentPage));
28
+ const [prevCurrentPage, setPrevCurrentPage] = useState(currentPage);
29
+
30
+ // Sync page input state with current page prop updates during render phase
31
+ if (currentPage !== prevCurrentPage) {
32
+ setPrevCurrentPage(currentPage);
33
+ setPageInput(String(currentPage));
34
+ }
35
+
36
  const pdfUrl = `${API_BASE}/api/v1/documents/${documentId}/pdf`;
37
+ const token = typeof window !== "undefined" ? localStorage.getItem("token") : null;
38
 
39
+ // Configure file object with Authorization headers
40
+ const fileConfig = {
41
+ url: pdfUrl,
42
+ httpHeaders: token ? { Authorization: `Bearer ${token}` } : undefined,
43
+ };
44
 
45
  const handlePageSubmit = (e: React.FormEvent) => {
46
  e.preventDefault();
 
48
  if (!isNaN(num) && num >= 1 && num <= totalPages) {
49
  onPageChange(num);
50
  } else {
 
51
  setPageInput(String(currentPage));
52
  }
53
  };
54
 
 
55
  return (
56
  <div className="h-full flex flex-col bg-background">
57
  {/* ── Toolbar ─────────────────────────────────── */}
 
61
  variant="ghost"
62
  size="icon"
63
  className="h-7 w-7"
64
+ onClick={() => {
65
+ const newPage = Math.max(1, currentPage - 1);
66
+ onPageChange(newPage);
67
+ setPageInput(String(newPage));
68
+ }}
69
  disabled={currentPage <= 1}
70
  >
71
  <ChevronLeft className="w-4 h-4" />
 
87
  variant="ghost"
88
  size="icon"
89
  className="h-7 w-7"
90
+ onClick={() => {
91
+ const newPage = Math.min(totalPages, currentPage + 1);
92
+ onPageChange(newPage);
93
+ setPageInput(String(newPage));
94
+ }}
95
  disabled={currentPage >= totalPages}
96
  >
97
  <ChevronRight className="w-4 h-4" />
 
122
  </div>
123
 
124
  {/* ── PDF Render ──────────────────────────────── */}
125
+ <div className="flex-1 overflow-auto bg-muted/30 flex justify-center items-start p-4 relative w-full">
126
+ <Document
127
+ file={fileConfig}
128
+ onLoadSuccess={() => setLoading(false)}
129
+ onLoadError={(err) => {
130
+ console.error("PDF load error:", err);
131
+ setLoading(false);
132
+ }}
133
+ loading={
134
+ <div className="absolute inset-0 flex items-center justify-center bg-background/80 z-10">
135
+ <Loader2 className="w-6 h-6 animate-spin text-primary" />
136
+ </div>
137
+ }
138
+ error={
139
+ <div className="flex flex-col items-center justify-center p-8 text-center bg-card border border-destructive/20 rounded-lg max-w-md mx-auto my-12 shadow-sm gap-3">
140
+ <AlertCircle className="w-8 h-8 text-destructive animate-pulse" />
141
+ <div>
142
+ <p className="font-semibold text-sm text-foreground mb-1">Failed to load PDF</p>
143
+ <p className="text-xs text-muted-foreground leading-relaxed">
144
+ We encountered an error loading this PDF document. Please verify the document is ready or try refreshing the page.
145
+ </p>
146
+ </div>
147
+ </div>
148
+ }
149
+ noData={
150
+ <div className="flex flex-col items-center justify-center p-8 text-center bg-card border border-border rounded-lg max-w-md mx-auto my-12 shadow-sm gap-2">
151
+ <p className="font-semibold text-sm text-foreground">No PDF document selected</p>
152
+ <p className="text-xs text-muted-foreground">Select or upload a document to view it here.</p>
153
+ </div>
154
+ }
155
+ className="shadow-md border border-border bg-card max-w-full"
156
+ >
157
+ <Page
158
+ pageNumber={currentPage}
159
+ scale={scale}
160
+ renderAnnotationLayer={false}
161
+ renderTextLayer={true}
162
+ loading={
163
+ <div className="flex items-center justify-center p-8">
164
+ <Loader2 className="w-6 h-6 animate-spin text-primary" />
165
+ </div>
166
+ }
167
+ />
168
+ </Document>
169
  </div>
170
  </div>
171
  );
frontend/src/components/layout/Header.tsx CHANGED
@@ -1,5 +1,6 @@
1
  "use client";
2
 
 
3
  import { useAuth } from "@/lib/auth";
4
  import { useTranslation } from "react-i18next";
5
  import { useRouter } from "next/navigation";
@@ -22,28 +23,39 @@ import {
22
  Moon,
23
  Shield,
24
  Sun,
 
 
25
  } from "lucide-react";
26
- import { useSyncExternalStore } from "react";
27
  import { useTheme } from "next-themes";
28
 
 
29
 
30
  interface HeaderProps {
31
  sidebarOpen: boolean;
32
  onToggleSidebar: () => void;
33
  viewerOpen: boolean;
34
  onToggleViewer: () => void;
 
 
35
  }
36
 
37
  const subscribe = () => () => {};
38
  const getSnapshot = () => true;
39
  const getServerSnapshot = () => false;
40
 
41
- export default function Header({ sidebarOpen, onToggleSidebar, viewerOpen, onToggleViewer }: HeaderProps) {
 
 
 
 
 
 
42
  const { user, logout } = useAuth();
43
  const { t, i18n } = useTranslation();
44
  const router = useRouter();
45
  const { theme, setTheme } = useTheme();
46
- const mounted = useSyncExternalStore(subscribe, getSnapshot, getServerSnapshot); // ← replaces useState + useEffect
 
47
 
48
  const isDark = theme === "dark";
49
  const toggleTheme = () => setTheme(isDark ? "light" : "dark");
@@ -67,79 +79,147 @@ export default function Header({ sidebarOpen, onToggleSidebar, viewerOpen, onTog
67
  };
68
 
69
  return (
70
- <header className="h-14 flex items-center justify-between px-4 border-b border-border/50 bg-card/50 backdrop-blur-md flex-shrink-0 z-50">
71
- {/* Left */}
72
- <div className="flex items-center gap-3">
73
- <Button variant="ghost" size="icon" className="h-8 w-8" onClick={onToggleSidebar} title={sidebarOpen ? t("header.closeSidebar") : t("header.openSidebar")}>
74
- {sidebarOpen ? <PanelLeftClose className="w-4 h-4" /> : <PanelLeftOpen className="w-4 h-4" />}
75
- </Button>
 
 
 
 
 
 
 
 
76
 
77
- <div className="flex items-center gap-2">
78
- <div className="w-7 h-7 rounded-lg bg-primary/15 flex items-center justify-center">
79
- <Brain className="w-4 h-4 text-primary" />
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
80
  </div>
81
- <span className="font-semibold text-sm hidden sm:inline">{t("common.appName")}</span>
82
  </div>
83
- </div>
84
-
85
- {/* Right */}
86
- <div className="flex items-center gap-2">
87
- <Button variant="ghost" size="icon" className="h-8 w-8" onClick={onToggleViewer} title={viewerOpen ? t("header.closeViewer") : t("header.openViewer")}>
88
- {viewerOpen ? <PanelRightClose className="w-4 h-4" /> : <PanelRightOpen className="w-4 h-4" />}
89
- </Button>
90
 
91
- {mounted && (
92
- <Button variant="ghost" size="icon" className="h-8 w-8" onClick={toggleTheme} title={isDark ? t("header.lightMode") : t("header.darkMode")}>
93
- {isDark ? <Sun className="w-4 h-4" /> : <Moon className="w-4 h-4" />}
 
 
 
 
 
 
 
 
 
 
 
94
  </Button>
95
- )}
96
-
97
- <select
98
- aria-label={t("common.language")}
99
- value={i18n.resolvedLanguage || "en"}
100
- onChange={(e) => void i18n.changeLanguage(e.target.value)}
101
- className="h-8 rounded-md border border-border bg-background px-2 text-xs text-foreground"
102
- >
103
- <option value="en">{languageLabel("en")}</option>
104
- <option value="hi">{languageLabel("hi")}</option>
105
- <option value="es">{languageLabel("es")}</option>
106
- <option value="fr">{languageLabel("fr")}</option>
107
- </select>
108
-
109
- <DropdownMenu>
110
- <DropdownMenuTrigger
111
- render={
112
- <button className="flex items-center h-8 gap-2 px-2 rounded-md hover:bg-accent transition-colors cursor-pointer">
113
- <Avatar className="w-6 h-6">
114
- <AvatarFallback className="text-[10px] bg-primary/20 text-primary">
115
- {user?.username?.slice(0, 2).toUpperCase() || "U"}
116
- </AvatarFallback>
117
- </Avatar>
118
- <span className="text-sm hidden sm:inline">{user?.username}</span>
119
- </button>
120
- }
121
- />
122
-
123
- <DropdownMenuContent align="end" className="w-56">
124
- <div className="px-3 py-2">
125
- <p className="text-sm font-medium">{user?.username}</p>
126
- <p className="text-xs text-muted-foreground truncate">{user?.email}</p>
127
- </div>
128
- <DropdownMenuSeparator />
129
- {user?.is_admin && (
130
- <DropdownMenuItem className="cursor-pointer" onClick={() => router.push("/admin")}>
131
- <Shield className="w-4 h-4 mr-2" />
132
- Admin metrics
133
  </DropdownMenuItem>
134
- )}
135
- {user?.is_admin && <DropdownMenuSeparator />}
136
- <DropdownMenuItem className="text-destructive cursor-pointer" onClick={handleLogout}>
137
- <LogOut className="w-4 h-4 mr-2" />
138
- {t("header.signOut")}
139
- </DropdownMenuItem>
140
- </DropdownMenuContent>
141
- </DropdownMenu>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
142
  </div>
143
- </header>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
144
  );
145
  }
 
1
  "use client";
2
 
3
+ import { useState } from "react";
4
  import { useAuth } from "@/lib/auth";
5
  import { useTranslation } from "react-i18next";
6
  import { useRouter } from "next/navigation";
 
23
  Moon,
24
  Shield,
25
  Sun,
26
+ Menu,
27
+ X,
28
  } from "lucide-react";
 
29
  import { useTheme } from "next-themes";
30
 
31
+ import { useSyncExternalStore } from "react";
32
 
33
  interface HeaderProps {
34
  sidebarOpen: boolean;
35
  onToggleSidebar: () => void;
36
  viewerOpen: boolean;
37
  onToggleViewer: () => void;
38
+ /** Pass DocumentSidebar JSX so the mobile sheet can render it */
39
+ mobileSheetContent?: React.ReactNode;
40
  }
41
 
42
  const subscribe = () => () => {};
43
  const getSnapshot = () => true;
44
  const getServerSnapshot = () => false;
45
 
46
+ export default function Header({
47
+ sidebarOpen,
48
+ onToggleSidebar,
49
+ viewerOpen,
50
+ onToggleViewer,
51
+ mobileSheetContent,
52
+ }: HeaderProps) {
53
  const { user, logout } = useAuth();
54
  const { t, i18n } = useTranslation();
55
  const router = useRouter();
56
  const { theme, setTheme } = useTheme();
57
+ const mounted = useSyncExternalStore(subscribe, getSnapshot, getServerSnapshot);
58
+ const [sheetOpen, setSheetOpen] = useState(false);
59
 
60
  const isDark = theme === "dark";
61
  const toggleTheme = () => setTheme(isDark ? "light" : "dark");
 
79
  };
80
 
81
  return (
82
+ <>
83
+ <header className="h-14 flex items-center justify-between px-4 border-b border-border/50 bg-card/50 backdrop-blur-md flex-shrink-0 z-50">
84
+ {/* Left */}
85
+ <div className="flex items-center gap-3">
86
+ {/* Hamburger β€” mobile only */}
87
+ <Button
88
+ variant="ghost"
89
+ size="icon"
90
+ className="h-8 w-8 md:hidden"
91
+ onClick={() => setSheetOpen(true)}
92
+ title="Open sidebar"
93
+ >
94
+ <Menu className="w-4 h-4" />
95
+ </Button>
96
 
97
+ {/* Desktop sidebar toggle β€” hidden on mobile */}
98
+ <Button
99
+ variant="ghost"
100
+ size="icon"
101
+ className="h-8 w-8 hidden md:inline-flex"
102
+ onClick={onToggleSidebar}
103
+ title={sidebarOpen ? "Close sidebar" : "Open sidebar"}
104
+ >
105
+ {sidebarOpen ? (
106
+ <PanelLeftClose className="w-4 h-4" />
107
+ ) : (
108
+ <PanelLeftOpen className="w-4 h-4" />
109
+ )}
110
+ </Button>
111
+
112
+ <div className="flex items-center gap-2">
113
+ <div className="w-7 h-7 rounded-lg bg-primary/15 flex items-center justify-center">
114
+ <Brain className="w-4 h-4 text-primary" />
115
+ </div>
116
+ <span className="font-semibold text-sm hidden sm:inline">
117
+ Document AI Analyst
118
+ </span>
119
  </div>
 
120
  </div>
 
 
 
 
 
 
 
121
 
122
+ {/* Right */}
123
+ <div className="flex items-center gap-2">
124
+ <Button
125
+ variant="ghost"
126
+ size="icon"
127
+ className="h-8 w-8"
128
+ onClick={onToggleViewer}
129
+ title={viewerOpen ? "Close viewer" : "Open viewer"}
130
+ >
131
+ {viewerOpen ? (
132
+ <PanelRightClose className="w-4 h-4" />
133
+ ) : (
134
+ <PanelRightOpen className="w-4 h-4" />
135
+ )}
136
  </Button>
137
+
138
+ {mounted && (
139
+ <Button
140
+ variant="ghost"
141
+ size="icon"
142
+ className="h-8 w-8"
143
+ onClick={toggleTheme}
144
+ title={isDark ? "Light mode" : "Dark mode"}
145
+ >
146
+ {isDark ? <Sun className="w-4 h-4" /> : <Moon className="w-4 h-4" />}
147
+ </Button>
148
+ )}
149
+
150
+ <DropdownMenu>
151
+ <DropdownMenuTrigger className="flex items-center h-8 gap-2 px-2 rounded-md hover:bg-accent transition-colors cursor-pointer">
152
+ <Avatar className="w-6 h-6">
153
+ <AvatarFallback className="text-[10px] bg-primary/20 text-primary">
154
+ {user?.username?.slice(0, 2).toUpperCase() || "U"}
155
+ </AvatarFallback>
156
+ </Avatar>
157
+ <span className="text-sm hidden sm:inline">{user?.username}</span>
158
+ </DropdownMenuTrigger>
159
+ <DropdownMenuContent align="end" className="w-48">
160
+ <div className="px-3 py-2">
161
+ <p className="text-sm font-medium">{user?.username}</p>
162
+ <p className="text-xs text-muted-foreground truncate">{user?.email}</p>
163
+ </div>
164
+ <DropdownMenuSeparator />
165
+ <DropdownMenuItem
166
+ className="text-destructive cursor-pointer"
167
+ onClick={handleLogout}
168
+ >
169
+ <LogOut className="w-4 h-4 mr-2" />
170
+ Sign out
 
 
 
 
171
  </DropdownMenuItem>
172
+ </DropdownMenuContent>
173
+ </DropdownMenu>
174
+ </div>
175
+ </header>
176
+
177
+ {/* ── Mobile Navigation Sheet ──────────────────────────────────── */}
178
+ {/* Backdrop */}
179
+ {sheetOpen && (
180
+ <div
181
+ className="fixed inset-0 z-40 bg-black/50 backdrop-blur-sm md:hidden"
182
+ onClick={() => setSheetOpen(false)}
183
+ aria-hidden="true"
184
+ />
185
+ )}
186
+
187
+ {/* Slide-in panel */}
188
+ <aside
189
+ className={[
190
+ "fixed inset-y-0 left-0 z-50 w-72 flex flex-col",
191
+ "bg-sidebar border-r border-sidebar-border",
192
+ "transform transition-transform duration-300 ease-in-out md:hidden",
193
+ sheetOpen ? "translate-x-0" : "-translate-x-full",
194
+ ].join(" ")}
195
+ aria-label="Mobile navigation"
196
+ aria-hidden={!sheetOpen}
197
+ inert={!sheetOpen ? true : undefined}
198
+ >
199
+ {/* Sheet header */}
200
+ <div className="h-14 flex items-center justify-between px-4 border-b border-sidebar-border flex-shrink-0">
201
+ <div className="flex items-center gap-2">
202
+ <div className="w-7 h-7 rounded-lg bg-primary/15 flex items-center justify-center">
203
+ <Brain className="w-4 h-4 text-primary" />
204
  </div>
205
+ <span className="font-semibold text-sm">Document AI Analyst</span>
206
+ </div>
207
+ <Button
208
+ variant="ghost"
209
+ size="icon"
210
+ className="h-8 w-8"
211
+ onClick={() => setSheetOpen(false)}
212
+ aria-label="Close navigation"
213
+ >
214
+ <X className="w-4 h-4" />
215
+ </Button>
216
+ </div>
217
+
218
+ {/* Sidebar content */}
219
+ <div className="flex-1 overflow-hidden">
220
+ {sheetOpen ? mobileSheetContent : null}
221
+ </div>
222
+ </aside>
223
+ </>
224
  );
225
  }