Spaces:
Running
Running
Merge remote-tracking branch 'upstream/dev' into feat/issue-114-hybrid-search
Browse files- .env.example +10 -0
- .gitignore +1 -0
- .pre-commit-config.yaml +55 -0
- CONTRIBUTING.md +30 -0
- Dockerfile +3 -2
- backend/app/config.py +16 -0
- backend/app/models.py +20 -13
- backend/app/rag/agent.py +33 -2
- backend/app/rag/chunker.py +120 -1
- backend/app/rag/graph_builder.py +185 -0
- backend/app/rag/graph_retriever.py +123 -0
- backend/app/rag/vectorstore.py +5 -0
- backend/app/routes/chat.py +183 -276
- backend/app/routes/documents.py +17 -0
- backend/app/schemas.py +16 -0
- backend/requirements.txt +4 -0
- backend/tests/test_chunker.py +49 -0
- backend/tests/test_documents.py +79 -0
- backend/tests/test_graph_builder.py +89 -0
- backend/tests/test_graph_retriever.py +97 -0
- backend/tests/test_graphrag_agent.py +92 -0
- frontend/e2e/snapshots.spec.ts +109 -0
- frontend/src/app/dashboard/page.tsx +4 -1
- frontend/src/components/auth/HuggingFaceTokenModal.tsx +231 -0
- frontend/src/components/chat/ChatPanel.tsx +10 -2
- frontend/src/components/chat/ChatSessionSidebar.tsx +184 -0
- frontend/src/components/chat/SourceCard.tsx +150 -82
- frontend/src/components/layout/ContributorsPanel.tsx +2 -1
- frontend/src/components/layout/Header.tsx +1 -0
- frontend/src/store/chat-store.ts +106 -3
- package-lock.json +6 -0
- requirements.txt +2 -1
.env.example
CHANGED
|
@@ -122,6 +122,16 @@ HF_TOKEN=your_huggingface_token_here
|
|
| 122 |
|
| 123 |
# ── RAG Config (Optional — defaults shown) ───────────
|
| 124 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 125 |
# ── ChromaDB (Vector Store) ─────────────────────────────────
|
| 126 |
|
| 127 |
# Directory where ChromaDB persists its vector index to disk.
|
|
|
|
| 122 |
|
| 123 |
# ── RAG Config (Optional — defaults shown) ───────────
|
| 124 |
|
| 125 |
+
# ── Knowledge Graph / GraphRAG (Optional — defaults shown) ─────────────────
|
| 126 |
+
|
| 127 |
+
# Directory where GraphRAG stores per-document knowledge graphs.
|
| 128 |
+
# Optional — defaults to "./data/graphs"
|
| 129 |
+
# GRAPH_PERSIST_DIR=./data/graphs
|
| 130 |
+
|
| 131 |
+
# Maximum number of graph relationships appended to the RAG prompt.
|
| 132 |
+
# Optional — defaults to 12
|
| 133 |
+
# GRAPH_MAX_RELATIONSHIPS=12
|
| 134 |
+
|
| 135 |
# ── ChromaDB (Vector Store) ─────────────────────────────────
|
| 136 |
|
| 137 |
# Directory where ChromaDB persists its vector index to disk.
|
.gitignore
CHANGED
|
@@ -29,3 +29,4 @@ Thumbs.db
|
|
| 29 |
# Misc
|
| 30 |
*.log
|
| 31 |
static/
|
|
|
|
|
|
| 29 |
# Misc
|
| 30 |
*.log
|
| 31 |
static/
|
| 32 |
+
.planning/
|
.pre-commit-config.yaml
ADDED
|
@@ -0,0 +1,55 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
repos:
|
| 2 |
+
# ── Python Formatting ─────────────────────────────────────
|
| 3 |
+
- repo: https://github.com/psf/black
|
| 4 |
+
rev: 24.10.0
|
| 5 |
+
hooks:
|
| 6 |
+
- id: black
|
| 7 |
+
language_version: python3
|
| 8 |
+
args: [--line-length=120]
|
| 9 |
+
files: ^backend/
|
| 10 |
+
|
| 11 |
+
# ── Python Linting ────────────────────────────────────────
|
| 12 |
+
- repo: https://github.com/PyCQA/flake8
|
| 13 |
+
rev: 7.1.1
|
| 14 |
+
hooks:
|
| 15 |
+
- id: flake8
|
| 16 |
+
args:
|
| 17 |
+
- --max-line-length=120
|
| 18 |
+
- --select=E9,F63,F7,F82,E501
|
| 19 |
+
- --count
|
| 20 |
+
files: ^backend/
|
| 21 |
+
|
| 22 |
+
# ── JavaScript / TypeScript / JSON / CSS / Markdown Formatting ──
|
| 23 |
+
- repo: https://github.com/pre-commit/mirrors-prettier
|
| 24 |
+
rev: v4.0.0-alpha.8
|
| 25 |
+
hooks:
|
| 26 |
+
- id: prettier
|
| 27 |
+
types_or: [javascript, jsx, ts, tsx, json, css, markdown]
|
| 28 |
+
files: ^frontend/
|
| 29 |
+
exclude: ^frontend/(node_modules|.next|dist|build)/
|
| 30 |
+
|
| 31 |
+
# ── General Hygiene ───────────────────────────────────────
|
| 32 |
+
- repo: https://github.com/pre-commit/pre-commit-hooks
|
| 33 |
+
rev: v5.0.0
|
| 34 |
+
hooks:
|
| 35 |
+
- id: trailing-whitespace
|
| 36 |
+
args: [--markdown-linebreak-ext=md]
|
| 37 |
+
- id: end-of-file-fixer
|
| 38 |
+
- id: check-yaml
|
| 39 |
+
args: [--allow-multiple-documents]
|
| 40 |
+
- id: check-json
|
| 41 |
+
exclude: ^frontend/(node_modules|.next)/
|
| 42 |
+
- id: check-merge-conflict
|
| 43 |
+
- id: check-added-large-files
|
| 44 |
+
args: [--maxkb=1024]
|
| 45 |
+
- id: mixed-line-ending
|
| 46 |
+
args: [--fix=lf]
|
| 47 |
+
exclude: \.(bat|cmd|ps1)$
|
| 48 |
+
|
| 49 |
+
# ── Security ─────────────────────────────────────────────
|
| 50 |
+
- repo: https://github.com/Yelp/detect-secrets
|
| 51 |
+
rev: v1.5.0
|
| 52 |
+
hooks:
|
| 53 |
+
- id: detect-secrets
|
| 54 |
+
args: [--baseline, .secrets.baseline]
|
| 55 |
+
exclude: \.env\.example$
|
CONTRIBUTING.md
CHANGED
|
@@ -61,6 +61,36 @@ cp ../.env.example .env # Fill in your own dev values
|
|
| 61 |
uvicorn app.main:app --reload --port 8000
|
| 62 |
```
|
| 63 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 64 |
### Frontend (Next.js)
|
| 65 |
|
| 66 |
```bash
|
|
|
|
| 61 |
uvicorn app.main:app --reload --port 8000
|
| 62 |
```
|
| 63 |
|
| 64 |
+
### Pre-commit Hooks (Required)
|
| 65 |
+
|
| 66 |
+
We use [`pre-commit`](https://pre-commit.com/) to enforce code style automatically before every commit. This prevents style-related CI failures.
|
| 67 |
+
|
| 68 |
+
```bash
|
| 69 |
+
# Install pre-commit (one-time setup)
|
| 70 |
+
pip install pre-commit
|
| 71 |
+
|
| 72 |
+
# Install the hooks into your local clone (one-time per checkout)
|
| 73 |
+
pre-commit install
|
| 74 |
+
|
| 75 |
+
# (Optional) Run against all files to verify setup
|
| 76 |
+
pre-commit run --all-files
|
| 77 |
+
```
|
| 78 |
+
|
| 79 |
+
**What the hooks check:**
|
| 80 |
+
|
| 81 |
+
| Hook | Tool | Scope |
|
| 82 |
+
|------|------|-------|
|
| 83 |
+
| Python formatting | `black` (line-length 120) | `backend/` |
|
| 84 |
+
| Python linting | `flake8` (errors only) | `backend/` |
|
| 85 |
+
| JS/TS/JSON/CSS/MD formatting | `prettier` | `frontend/` |
|
| 86 |
+
| Trailing whitespace | `pre-commit-hooks` | All files |
|
| 87 |
+
| YAML/JSON validity | `pre-commit-hooks` | All files |
|
| 88 |
+
| Merge-conflict markers | `pre-commit-hooks` | All files |
|
| 89 |
+
| Large file guard (>1 MB) | `pre-commit-hooks` | All files |
|
| 90 |
+
| Secret detection | `detect-secrets` | All files |
|
| 91 |
+
|
| 92 |
+
> ⚠️ If a hook modifies files, it will block your commit. Just `git add` the auto-fixed files and commit again.
|
| 93 |
+
|
| 94 |
### Frontend (Next.js)
|
| 95 |
|
| 96 |
```bash
|
Dockerfile
CHANGED
|
@@ -33,7 +33,8 @@ RUN python -m venv "$VIRTUAL_ENV"
|
|
| 33 |
|
| 34 |
COPY backend/requirements.txt ./requirements.txt
|
| 35 |
RUN pip install --no-cache-dir --upgrade pip setuptools wheel && \
|
| 36 |
-
pip install --no-cache-dir -r requirements.txt
|
|
|
|
| 37 |
|
| 38 |
# --------------------------------------------------------
|
| 39 |
# Stage 3: Runtime image with only app code and artifacts
|
|
@@ -68,7 +69,7 @@ COPY backend/__init__.py ./backend/__init__.py
|
|
| 68 |
COPY --from=frontend-builder /app/frontend/out ./frontend/out
|
| 69 |
|
| 70 |
# Create data directories with proper permissions
|
| 71 |
-
RUN mkdir -p /app/data/uploads /app/data/chroma_db /app/data/huggingface && \
|
| 72 |
chown -R appuser:appuser /app
|
| 73 |
|
| 74 |
# Copy entrypoint
|
|
|
|
| 33 |
|
| 34 |
COPY backend/requirements.txt ./requirements.txt
|
| 35 |
RUN pip install --no-cache-dir --upgrade pip setuptools wheel && \
|
| 36 |
+
pip install --no-cache-dir -r requirements.txt && \
|
| 37 |
+
python -m spacy download en_core_web_sm
|
| 38 |
|
| 39 |
# --------------------------------------------------------
|
| 40 |
# Stage 3: Runtime image with only app code and artifacts
|
|
|
|
| 69 |
COPY --from=frontend-builder /app/frontend/out ./frontend/out
|
| 70 |
|
| 71 |
# Create data directories with proper permissions
|
| 72 |
+
RUN mkdir -p /app/data/uploads /app/data/chroma_db /app/data/graphs /app/data/huggingface && \
|
| 73 |
chown -R appuser:appuser /app
|
| 74 |
|
| 75 |
# Copy entrypoint
|
backend/app/config.py
CHANGED
|
@@ -45,6 +45,22 @@ class Settings(BaseSettings):
|
|
| 45 |
TOP_K_RETRIEVAL: int = 10
|
| 46 |
TOP_K_RERANK: int = 5
|
| 47 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 48 |
# ── Embeddings (local HuggingFace model) ─────────────
|
| 49 |
EMBEDDING_MODEL: str = "sentence-transformers/all-MiniLM-L6-v2"
|
| 50 |
EMBEDDING_DIMENSION: int = 384
|
|
|
|
| 45 |
TOP_K_RETRIEVAL: int = 10
|
| 46 |
TOP_K_RERANK: int = 5
|
| 47 |
|
| 48 |
+
# ── Knowledge Graph (GraphRAG) ───────────────────────
|
| 49 |
+
GRAPH_PERSIST_DIR: str = "./data/graphs"
|
| 50 |
+
GRAPH_ENTITY_LABELS: set = {
|
| 51 |
+
"PERSON",
|
| 52 |
+
"ORG",
|
| 53 |
+
"GPE",
|
| 54 |
+
"LOC",
|
| 55 |
+
"PRODUCT",
|
| 56 |
+
"EVENT",
|
| 57 |
+
"WORK_OF_ART",
|
| 58 |
+
"LAW",
|
| 59 |
+
"NORP",
|
| 60 |
+
"FAC",
|
| 61 |
+
}
|
| 62 |
+
GRAPH_MAX_RELATIONSHIPS: int = 12
|
| 63 |
+
|
| 64 |
# ── Embeddings (local HuggingFace model) ─────────────
|
| 65 |
EMBEDDING_MODEL: str = "sentence-transformers/all-MiniLM-L6-v2"
|
| 66 |
EMBEDDING_DIMENSION: int = 384
|
backend/app/models.py
CHANGED
|
@@ -8,11 +8,9 @@ import hashlib
|
|
| 8 |
from datetime import datetime, timezone
|
| 9 |
|
| 10 |
from cryptography.fernet import Fernet
|
| 11 |
-
from sqlalchemy import Column, String, Integer, DateTime, ForeignKey, Text, Boolean
|
| 12 |
from sqlalchemy.types import TypeDecorator, CHAR
|
| 13 |
from sqlalchemy.dialects.postgresql import UUID as PG_UUID
|
| 14 |
-
from sqlalchemy import Column, String, Integer, DateTime, ForeignKey, Text, Boolean, Enum as SQLAlchemyEnum
|
| 15 |
-
from sqlalchemy.types import TypeDecorator
|
| 16 |
from sqlalchemy.orm import relationship
|
| 17 |
|
| 18 |
from app.database import Base
|
|
@@ -85,11 +83,6 @@ class EncryptedString(TypeDecorator):
|
|
| 85 |
return value
|
| 86 |
|
| 87 |
|
| 88 |
-
def generate_uuid():
|
| 89 |
-
"""Generates a standard unique string identifier for database records."""
|
| 90 |
-
return str(uuid.uuid4())
|
| 91 |
-
|
| 92 |
-
|
| 93 |
class UserRole(str, enum.Enum):
|
| 94 |
"""
|
| 95 |
Defines the available user roles for Role-Based Access Control (RBAC).
|
|
@@ -129,6 +122,7 @@ class User(Base):
|
|
| 129 |
documents = relationship("Document", back_populates="owner", cascade="all, delete-orphan")
|
| 130 |
messages = relationship("ChatMessage", back_populates="user", cascade="all, delete-orphan")
|
| 131 |
api_keys = relationship("ApiKey", back_populates="user", cascade="all, delete-orphan")
|
|
|
|
| 132 |
|
| 133 |
|
| 134 |
class ApiKey(Base):
|
|
@@ -148,6 +142,22 @@ class ApiKey(Base):
|
|
| 148 |
user = relationship("User", back_populates="api_keys")
|
| 149 |
|
| 150 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 151 |
class Document(Base):
|
| 152 |
"""
|
| 153 |
Metadata and processing status for files uploaded by users.
|
|
@@ -159,11 +169,6 @@ class Document(Base):
|
|
| 159 |
filename = Column(String(255), nullable=False) # Stored filename (UUID-based)
|
| 160 |
original_name = Column(String(255), nullable=False) # User's original filename
|
| 161 |
file_size = Column(Integer, default=0) # Size in bytes
|
| 162 |
-
id = Column(String, primary_key=True, default=generate_uuid)
|
| 163 |
-
user_id = Column(String, ForeignKey("users.id"), nullable=False, index=True)
|
| 164 |
-
filename = Column(String(255), nullable=False) # Internal UUID-based filename
|
| 165 |
-
original_name = Column(String(255), nullable=False) # Original name for user display
|
| 166 |
-
file_size = Column(Integer, default=0) # Size in bytes
|
| 167 |
page_count = Column(Integer, default=0)
|
| 168 |
chunk_count = Column(Integer, default=0)
|
| 169 |
status = Column(String(20), default="pending") # pending | processing | ready | failed
|
|
@@ -185,6 +190,7 @@ class ChatMessage(Base):
|
|
| 185 |
id = Column(GUID, primary_key=True, default=uuid.uuid4)
|
| 186 |
user_id = Column(GUID, ForeignKey("users.id"), nullable=False, index=True)
|
| 187 |
document_id = Column(GUID, ForeignKey("documents.id"), nullable=True, index=True)
|
|
|
|
| 188 |
role = Column(String(20), nullable=False) # "user" | "assistant"
|
| 189 |
content = Column(Text, nullable=False)
|
| 190 |
sources_json = Column(Text, nullable=True) # JSON representation of retrieved sources
|
|
@@ -193,6 +199,7 @@ class ChatMessage(Base):
|
|
| 193 |
# Relationships
|
| 194 |
user = relationship("User", back_populates="messages")
|
| 195 |
document = relationship("Document", back_populates="messages")
|
|
|
|
| 196 |
shared_message = relationship("SharedMessage", back_populates="message", uselist=False, cascade="all, delete-orphan")
|
| 197 |
|
| 198 |
|
|
|
|
| 8 |
from datetime import datetime, timezone
|
| 9 |
|
| 10 |
from cryptography.fernet import Fernet
|
| 11 |
+
from sqlalchemy import Column, String, Integer, DateTime, ForeignKey, Text, Boolean, Enum as SQLAlchemyEnum
|
| 12 |
from sqlalchemy.types import TypeDecorator, CHAR
|
| 13 |
from sqlalchemy.dialects.postgresql import UUID as PG_UUID
|
|
|
|
|
|
|
| 14 |
from sqlalchemy.orm import relationship
|
| 15 |
|
| 16 |
from app.database import Base
|
|
|
|
| 83 |
return value
|
| 84 |
|
| 85 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 86 |
class UserRole(str, enum.Enum):
|
| 87 |
"""
|
| 88 |
Defines the available user roles for Role-Based Access Control (RBAC).
|
|
|
|
| 122 |
documents = relationship("Document", back_populates="owner", cascade="all, delete-orphan")
|
| 123 |
messages = relationship("ChatMessage", back_populates="user", cascade="all, delete-orphan")
|
| 124 |
api_keys = relationship("ApiKey", back_populates="user", cascade="all, delete-orphan")
|
| 125 |
+
chat_sessions = relationship("ChatSession", back_populates="user", cascade="all, delete-orphan")
|
| 126 |
|
| 127 |
|
| 128 |
class ApiKey(Base):
|
|
|
|
| 142 |
user = relationship("User", back_populates="api_keys")
|
| 143 |
|
| 144 |
|
| 145 |
+
class ChatSession(Base):
|
| 146 |
+
"""
|
| 147 |
+
Groups chat messages into logical sessions/threads.
|
| 148 |
+
"""
|
| 149 |
+
__tablename__ = "chat_sessions"
|
| 150 |
+
|
| 151 |
+
id = Column(GUID, primary_key=True, default=uuid.uuid4)
|
| 152 |
+
user_id = Column(GUID, ForeignKey("users.id"), nullable=False, index=True)
|
| 153 |
+
title = Column(String(255), nullable=False)
|
| 154 |
+
created_at = Column(DateTime, default=lambda: datetime.now(timezone.utc))
|
| 155 |
+
|
| 156 |
+
# Relationships
|
| 157 |
+
user = relationship("User", back_populates="chat_sessions")
|
| 158 |
+
messages = relationship("ChatMessage", back_populates="session", cascade="all, delete-orphan")
|
| 159 |
+
|
| 160 |
+
|
| 161 |
class Document(Base):
|
| 162 |
"""
|
| 163 |
Metadata and processing status for files uploaded by users.
|
|
|
|
| 169 |
filename = Column(String(255), nullable=False) # Stored filename (UUID-based)
|
| 170 |
original_name = Column(String(255), nullable=False) # User's original filename
|
| 171 |
file_size = Column(Integer, default=0) # Size in bytes
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 172 |
page_count = Column(Integer, default=0)
|
| 173 |
chunk_count = Column(Integer, default=0)
|
| 174 |
status = Column(String(20), default="pending") # pending | processing | ready | failed
|
|
|
|
| 190 |
id = Column(GUID, primary_key=True, default=uuid.uuid4)
|
| 191 |
user_id = Column(GUID, ForeignKey("users.id"), nullable=False, index=True)
|
| 192 |
document_id = Column(GUID, ForeignKey("documents.id"), nullable=True, index=True)
|
| 193 |
+
session_id = Column(GUID, ForeignKey("chat_sessions.id"), nullable=True, index=True)
|
| 194 |
role = Column(String(20), nullable=False) # "user" | "assistant"
|
| 195 |
content = Column(Text, nullable=False)
|
| 196 |
sources_json = Column(Text, nullable=True) # JSON representation of retrieved sources
|
|
|
|
| 199 |
# Relationships
|
| 200 |
user = relationship("User", back_populates="messages")
|
| 201 |
document = relationship("Document", back_populates="messages")
|
| 202 |
+
session = relationship("ChatSession", back_populates="messages")
|
| 203 |
shared_message = relationship("SharedMessage", back_populates="message", uselist=False, cascade="all, delete-orphan")
|
| 204 |
|
| 205 |
|
backend/app/rag/agent.py
CHANGED
|
@@ -9,6 +9,7 @@ from typing import List, Dict, Any, Optional, Generator
|
|
| 9 |
from huggingface_hub import InferenceClient
|
| 10 |
from app.config import get_settings
|
| 11 |
from app.rag.retriever import retrieve
|
|
|
|
| 12 |
from app.rag.prompts import SYSTEM_PROMPT, RAG_PROMPT_TEMPLATE, GREETING_PROMPT
|
| 13 |
from app.rag.tracing import trace_function
|
| 14 |
|
|
@@ -48,6 +49,26 @@ def build_context(chunks: List[Dict[str, Any]]) -> str:
|
|
| 48 |
return "\n\n---\n\n".join(context_parts)
|
| 49 |
|
| 50 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 51 |
def _chat_messages(system: str, user_content: str) -> list:
|
| 52 |
"""Build messages list for chat completion API."""
|
| 53 |
return [
|
|
@@ -108,7 +129,12 @@ def generate_answer(
|
|
| 108 |
|
| 109 |
# ── Build prompt ─────────────────────────────────
|
| 110 |
# Format retrieved chunks into a readable context block, then inject into the RAG prompt template
|
| 111 |
-
context =
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 112 |
user_content = RAG_PROMPT_TEMPLATE.format(context=context, question=question)
|
| 113 |
messages = _chat_messages(SYSTEM_PROMPT, user_content)
|
| 114 |
|
|
@@ -222,7 +248,12 @@ def generate_answer_stream(
|
|
| 222 |
|
| 223 |
# ── Build prompt ─────────────────────────────────
|
| 224 |
# Format retrieved chunks into a readable context block, then inject into the RAG prompt template
|
| 225 |
-
context =
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 226 |
user_content = RAG_PROMPT_TEMPLATE.format(context=context, question=question)
|
| 227 |
messages = _chat_messages(SYSTEM_PROMPT, user_content)
|
| 228 |
|
|
|
|
| 9 |
from huggingface_hub import InferenceClient
|
| 10 |
from app.config import get_settings
|
| 11 |
from app.rag.retriever import retrieve
|
| 12 |
+
from app.rag.graph_retriever import get_entity_context
|
| 13 |
from app.rag.prompts import SYSTEM_PROMPT, RAG_PROMPT_TEMPLATE, GREETING_PROMPT
|
| 14 |
from app.rag.tracing import trace_function
|
| 15 |
|
|
|
|
| 49 |
return "\n\n---\n\n".join(context_parts)
|
| 50 |
|
| 51 |
|
| 52 |
+
def build_augmented_context(
|
| 53 |
+
chunks: List[Dict[str, Any]],
|
| 54 |
+
question: str,
|
| 55 |
+
user_id: str,
|
| 56 |
+
document_id: Optional[str] = None,
|
| 57 |
+
) -> str:
|
| 58 |
+
"""Combine vector-retrieved excerpts with GraphRAG relationships."""
|
| 59 |
+
context = build_context(chunks)
|
| 60 |
+
graph_context = get_entity_context(
|
| 61 |
+
query=question,
|
| 62 |
+
user_id=user_id,
|
| 63 |
+
document_id=document_id,
|
| 64 |
+
)
|
| 65 |
+
|
| 66 |
+
if not graph_context:
|
| 67 |
+
return context
|
| 68 |
+
|
| 69 |
+
return f"{context}\n\n---\n\n{graph_context}"
|
| 70 |
+
|
| 71 |
+
|
| 72 |
def _chat_messages(system: str, user_content: str) -> list:
|
| 73 |
"""Build messages list for chat completion API."""
|
| 74 |
return [
|
|
|
|
| 129 |
|
| 130 |
# ── Build prompt ─────────────────────────────────
|
| 131 |
# Format retrieved chunks into a readable context block, then inject into the RAG prompt template
|
| 132 |
+
context = build_augmented_context(
|
| 133 |
+
chunks=chunks,
|
| 134 |
+
question=question,
|
| 135 |
+
user_id=user_id,
|
| 136 |
+
document_id=document_id,
|
| 137 |
+
)
|
| 138 |
user_content = RAG_PROMPT_TEMPLATE.format(context=context, question=question)
|
| 139 |
messages = _chat_messages(SYSTEM_PROMPT, user_content)
|
| 140 |
|
|
|
|
| 248 |
|
| 249 |
# ── Build prompt ─────────────────────────────────
|
| 250 |
# Format retrieved chunks into a readable context block, then inject into the RAG prompt template
|
| 251 |
+
context = build_augmented_context(
|
| 252 |
+
chunks=chunks,
|
| 253 |
+
question=question,
|
| 254 |
+
user_id=user_id,
|
| 255 |
+
document_id=document_id,
|
| 256 |
+
)
|
| 257 |
user_content = RAG_PROMPT_TEMPLATE.format(context=context, question=question)
|
| 258 |
messages = _chat_messages(SYSTEM_PROMPT, user_content)
|
| 259 |
|
backend/app/rag/chunker.py
CHANGED
|
@@ -2,6 +2,7 @@
|
|
| 2 |
Smart document chunking using LangChain's RecursiveCharacterTextSplitter.
|
| 3 |
Supports PDF, DOCX, TXT, and Markdown files with page-level metadata.
|
| 4 |
"""
|
|
|
|
| 5 |
import fitz # PyMuPDF
|
| 6 |
import docx
|
| 7 |
from typing import List, Dict, Any
|
|
@@ -11,8 +12,72 @@ from app.config import get_settings
|
|
| 11 |
settings = get_settings()
|
| 12 |
|
| 13 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 14 |
def extract_pdf(filepath: str) -> List[Dict[str, Any]]:
|
| 15 |
-
"""Extract text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 16 |
doc = fitz.open(filepath)
|
| 17 |
pages = []
|
| 18 |
|
|
@@ -22,12 +87,52 @@ def extract_pdf(filepath: str) -> List[Dict[str, Any]]:
|
|
| 22 |
pages.append({
|
| 23 |
"text": text,
|
| 24 |
"page": page_num + 1,
|
|
|
|
| 25 |
})
|
| 26 |
|
| 27 |
doc.close()
|
| 28 |
return pages
|
| 29 |
|
| 30 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 31 |
def extract_pdf_images(filepath: str) -> List[Dict[str, Any]]:
|
| 32 |
"""Extract images from a PDF and return list of dicts with image bytes and page number.
|
| 33 |
|
|
@@ -109,6 +214,19 @@ def chunk_document(filepath: str) -> List[Dict[str, Any]]:
|
|
| 109 |
for page_data in pages:
|
| 110 |
text = page_data["text"]
|
| 111 |
page_num = page_data["page"]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 112 |
|
| 113 |
# Split this page's text
|
| 114 |
splits = splitter.split_text(text)
|
|
@@ -119,6 +237,7 @@ def chunk_document(filepath: str) -> List[Dict[str, Any]]:
|
|
| 119 |
"text": split_text.strip(),
|
| 120 |
"page": page_num,
|
| 121 |
"chunk_index": chunk_index,
|
|
|
|
| 122 |
})
|
| 123 |
chunk_index += 1
|
| 124 |
|
|
|
|
| 2 |
Smart document chunking using LangChain's RecursiveCharacterTextSplitter.
|
| 3 |
Supports PDF, DOCX, TXT, and Markdown files with page-level metadata.
|
| 4 |
"""
|
| 5 |
+
import json
|
| 6 |
import fitz # PyMuPDF
|
| 7 |
import docx
|
| 8 |
from typing import List, Dict, Any
|
|
|
|
| 12 |
settings = get_settings()
|
| 13 |
|
| 14 |
|
| 15 |
+
def _is_word_inside_bbox(word: Dict[str, Any], bbox: tuple) -> bool:
|
| 16 |
+
"""Return True when the word center falls inside a pdfplumber bbox."""
|
| 17 |
+
x0, top, x1, bottom = bbox
|
| 18 |
+
word_x = (float(word["x0"]) + float(word["x1"])) / 2
|
| 19 |
+
word_y = (float(word["top"]) + float(word["bottom"])) / 2
|
| 20 |
+
return x0 <= word_x <= x1 and top <= word_y <= bottom
|
| 21 |
+
|
| 22 |
+
|
| 23 |
+
def _words_to_text(words: List[Dict[str, Any]], line_tolerance: float = 3.0) -> str:
|
| 24 |
+
"""Rebuild readable text from positioned pdfplumber words."""
|
| 25 |
+
if not words:
|
| 26 |
+
return ""
|
| 27 |
+
|
| 28 |
+
sorted_words = sorted(words, key=lambda item: (round(float(item["top"]) / line_tolerance), item["x0"]))
|
| 29 |
+
lines: List[List[Dict[str, Any]]] = []
|
| 30 |
+
|
| 31 |
+
for word in sorted_words:
|
| 32 |
+
if not lines:
|
| 33 |
+
lines.append([word])
|
| 34 |
+
continue
|
| 35 |
+
|
| 36 |
+
current_top = sum(float(item["top"]) for item in lines[-1]) / len(lines[-1])
|
| 37 |
+
if abs(float(word["top"]) - current_top) <= line_tolerance:
|
| 38 |
+
lines[-1].append(word)
|
| 39 |
+
else:
|
| 40 |
+
lines.append([word])
|
| 41 |
+
|
| 42 |
+
text_lines = [
|
| 43 |
+
" ".join(item["text"] for item in sorted(line, key=lambda item: item["x0"]))
|
| 44 |
+
for line in lines
|
| 45 |
+
]
|
| 46 |
+
return "\n".join(line for line in text_lines if line.strip())
|
| 47 |
+
|
| 48 |
+
|
| 49 |
+
def _table_to_markdown(rows: List[List[Any]]) -> str:
|
| 50 |
+
"""Serialize extracted table rows into Markdown for retrieval."""
|
| 51 |
+
cleaned_rows = [
|
| 52 |
+
["" if cell is None else str(cell).replace("\n", " ").strip() for cell in row]
|
| 53 |
+
for row in rows
|
| 54 |
+
if row and any(cell is not None and str(cell).strip() for cell in row)
|
| 55 |
+
]
|
| 56 |
+
if not cleaned_rows:
|
| 57 |
+
return ""
|
| 58 |
+
|
| 59 |
+
width = max(len(row) for row in cleaned_rows)
|
| 60 |
+
normalized = [row + [""] * (width - len(row)) for row in cleaned_rows]
|
| 61 |
+
|
| 62 |
+
def fmt(row: List[str]) -> str:
|
| 63 |
+
return "| " + " | ".join(cell.replace("|", "\\|") for cell in row) + " |"
|
| 64 |
+
|
| 65 |
+
header = normalized[0]
|
| 66 |
+
separator = ["---"] * width
|
| 67 |
+
body = normalized[1:]
|
| 68 |
+
return "\n".join([fmt(header), fmt(separator), *[fmt(row) for row in body]])
|
| 69 |
+
|
| 70 |
+
|
| 71 |
def extract_pdf(filepath: str) -> List[Dict[str, Any]]:
|
| 72 |
+
"""Extract PDF text while preserving tables as separate bbox-aware chunks."""
|
| 73 |
+
try:
|
| 74 |
+
return extract_pdf_with_tables(filepath)
|
| 75 |
+
except ImportError:
|
| 76 |
+
return extract_pdf_with_pymupdf(filepath)
|
| 77 |
+
|
| 78 |
+
|
| 79 |
+
def extract_pdf_with_pymupdf(filepath: str) -> List[Dict[str, Any]]:
|
| 80 |
+
"""Fallback PDF extraction with page numbers using PyMuPDF."""
|
| 81 |
doc = fitz.open(filepath)
|
| 82 |
pages = []
|
| 83 |
|
|
|
|
| 87 |
pages.append({
|
| 88 |
"text": text,
|
| 89 |
"page": page_num + 1,
|
| 90 |
+
"chunk_type": "text",
|
| 91 |
})
|
| 92 |
|
| 93 |
doc.close()
|
| 94 |
return pages
|
| 95 |
|
| 96 |
|
| 97 |
+
def extract_pdf_with_tables(filepath: str) -> List[Dict[str, Any]]:
|
| 98 |
+
"""Detect tables with pdfplumber, remove table text from paragraphs, and keep table bboxes."""
|
| 99 |
+
import pdfplumber
|
| 100 |
+
|
| 101 |
+
pages: List[Dict[str, Any]] = []
|
| 102 |
+
|
| 103 |
+
with pdfplumber.open(filepath) as pdf:
|
| 104 |
+
for page_num, page in enumerate(pdf.pages, start=1):
|
| 105 |
+
tables = page.find_tables()
|
| 106 |
+
table_bboxes = [table.bbox for table in tables]
|
| 107 |
+
|
| 108 |
+
words = page.extract_words() or []
|
| 109 |
+
paragraph_words = [
|
| 110 |
+
word for word in words
|
| 111 |
+
if not any(_is_word_inside_bbox(word, bbox) for bbox in table_bboxes)
|
| 112 |
+
]
|
| 113 |
+
paragraph_text = _words_to_text(paragraph_words)
|
| 114 |
+
|
| 115 |
+
if paragraph_text.strip():
|
| 116 |
+
pages.append({
|
| 117 |
+
"text": paragraph_text,
|
| 118 |
+
"page": page_num,
|
| 119 |
+
"chunk_type": "text",
|
| 120 |
+
})
|
| 121 |
+
|
| 122 |
+
for table_index, table in enumerate(tables):
|
| 123 |
+
table_text = _table_to_markdown(table.extract() or [])
|
| 124 |
+
if table_text.strip():
|
| 125 |
+
pages.append({
|
| 126 |
+
"text": table_text,
|
| 127 |
+
"page": page_num,
|
| 128 |
+
"chunk_type": "table",
|
| 129 |
+
"bbox": json.dumps([round(float(value), 2) for value in table.bbox]),
|
| 130 |
+
"table_index": table_index,
|
| 131 |
+
})
|
| 132 |
+
|
| 133 |
+
return pages
|
| 134 |
+
|
| 135 |
+
|
| 136 |
def extract_pdf_images(filepath: str) -> List[Dict[str, Any]]:
|
| 137 |
"""Extract images from a PDF and return list of dicts with image bytes and page number.
|
| 138 |
|
|
|
|
| 214 |
for page_data in pages:
|
| 215 |
text = page_data["text"]
|
| 216 |
page_num = page_data["page"]
|
| 217 |
+
chunk_type = page_data.get("chunk_type", "text")
|
| 218 |
+
|
| 219 |
+
if chunk_type == "table":
|
| 220 |
+
all_chunks.append({
|
| 221 |
+
"text": text.strip(),
|
| 222 |
+
"page": page_num,
|
| 223 |
+
"chunk_index": chunk_index,
|
| 224 |
+
"chunk_type": "table",
|
| 225 |
+
"bbox": page_data.get("bbox", ""),
|
| 226 |
+
"table_index": page_data.get("table_index", 0),
|
| 227 |
+
})
|
| 228 |
+
chunk_index += 1
|
| 229 |
+
continue
|
| 230 |
|
| 231 |
# Split this page's text
|
| 232 |
splits = splitter.split_text(text)
|
|
|
|
| 237 |
"text": split_text.strip(),
|
| 238 |
"page": page_num,
|
| 239 |
"chunk_index": chunk_index,
|
| 240 |
+
"chunk_type": chunk_type,
|
| 241 |
})
|
| 242 |
chunk_index += 1
|
| 243 |
|
backend/app/rag/graph_builder.py
ADDED
|
@@ -0,0 +1,185 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Knowledge graph construction and persistence for GraphRAG.
|
| 3 |
+
"""
|
| 4 |
+
import json
|
| 5 |
+
import logging
|
| 6 |
+
import re
|
| 7 |
+
from dataclasses import dataclass
|
| 8 |
+
from pathlib import Path
|
| 9 |
+
from typing import Any, Dict, Iterable, List, Optional
|
| 10 |
+
|
| 11 |
+
import networkx as nx
|
| 12 |
+
|
| 13 |
+
from app.config import get_settings
|
| 14 |
+
|
| 15 |
+
logger = logging.getLogger(__name__)
|
| 16 |
+
settings = get_settings()
|
| 17 |
+
|
| 18 |
+
_nlp = None
|
| 19 |
+
|
| 20 |
+
|
| 21 |
+
@dataclass(frozen=True)
|
| 22 |
+
class Entity:
|
| 23 |
+
id: str
|
| 24 |
+
text: str
|
| 25 |
+
label: str
|
| 26 |
+
|
| 27 |
+
|
| 28 |
+
def _safe_id(value: str) -> str:
|
| 29 |
+
safe = re.sub(r"[^A-Za-z0-9_.-]+", "_", value).strip("._")
|
| 30 |
+
return safe or "unknown"
|
| 31 |
+
|
| 32 |
+
|
| 33 |
+
def get_graph_path(user_id: str, document_id: str) -> Path:
|
| 34 |
+
"""Return the on-disk graph path for one user/document pair."""
|
| 35 |
+
filename = f"{_safe_id(user_id)}_{_safe_id(document_id)}.json"
|
| 36 |
+
return Path(settings.GRAPH_PERSIST_DIR) / filename
|
| 37 |
+
|
| 38 |
+
|
| 39 |
+
def iter_graph_paths(user_id: str) -> Iterable[Path]:
|
| 40 |
+
"""Yield every persisted graph path for a user."""
|
| 41 |
+
graph_dir = Path(settings.GRAPH_PERSIST_DIR)
|
| 42 |
+
if not graph_dir.exists():
|
| 43 |
+
return []
|
| 44 |
+
|
| 45 |
+
prefix = f"{_safe_id(user_id)}_"
|
| 46 |
+
return sorted(graph_dir.glob(f"{prefix}*.json"))
|
| 47 |
+
|
| 48 |
+
|
| 49 |
+
def _get_nlp():
|
| 50 |
+
"""Load the spaCy English NER model lazily."""
|
| 51 |
+
global _nlp
|
| 52 |
+
if _nlp is None:
|
| 53 |
+
import spacy
|
| 54 |
+
|
| 55 |
+
try:
|
| 56 |
+
_nlp = spacy.load("en_core_web_sm")
|
| 57 |
+
except OSError as exc:
|
| 58 |
+
raise RuntimeError(
|
| 59 |
+
"spaCy model 'en_core_web_sm' is required for GraphRAG entity extraction. "
|
| 60 |
+
"Install it with: python -m spacy download en_core_web_sm"
|
| 61 |
+
) from exc
|
| 62 |
+
return _nlp
|
| 63 |
+
|
| 64 |
+
|
| 65 |
+
def _entity_id(text: str, label: str) -> str:
|
| 66 |
+
normalized = " ".join(text.split()).casefold()
|
| 67 |
+
return f"{label}:{normalized}"
|
| 68 |
+
|
| 69 |
+
|
| 70 |
+
def extract_entities(text: str) -> List[Entity]:
|
| 71 |
+
"""Extract configured named entities from text."""
|
| 72 |
+
if not text or not text.strip():
|
| 73 |
+
return []
|
| 74 |
+
|
| 75 |
+
doc = _get_nlp()(text)
|
| 76 |
+
entities: Dict[str, Entity] = {}
|
| 77 |
+
|
| 78 |
+
for ent in doc.ents:
|
| 79 |
+
value = " ".join(ent.text.split()).strip()
|
| 80 |
+
if not value or ent.label_ not in settings.GRAPH_ENTITY_LABELS:
|
| 81 |
+
continue
|
| 82 |
+
|
| 83 |
+
entity_id = _entity_id(value, ent.label_)
|
| 84 |
+
entities.setdefault(
|
| 85 |
+
entity_id,
|
| 86 |
+
Entity(id=entity_id, text=value, label=ent.label_),
|
| 87 |
+
)
|
| 88 |
+
|
| 89 |
+
return list(entities.values())
|
| 90 |
+
|
| 91 |
+
|
| 92 |
+
def build_graph(chunks: List[Dict[str, Any]]) -> nx.Graph:
|
| 93 |
+
"""Build an entity co-occurrence graph from document chunks."""
|
| 94 |
+
graph = nx.Graph()
|
| 95 |
+
|
| 96 |
+
for chunk in chunks:
|
| 97 |
+
text = chunk.get("text", "")
|
| 98 |
+
page = chunk.get("page")
|
| 99 |
+
chunk_index = chunk.get("chunk_index")
|
| 100 |
+
entities = extract_entities(text)
|
| 101 |
+
|
| 102 |
+
for entity in entities:
|
| 103 |
+
if graph.has_node(entity.id):
|
| 104 |
+
graph.nodes[entity.id]["mentions"] += 1
|
| 105 |
+
graph.nodes[entity.id]["pages"].add(page)
|
| 106 |
+
graph.nodes[entity.id]["chunks"].add(chunk_index)
|
| 107 |
+
else:
|
| 108 |
+
graph.add_node(
|
| 109 |
+
entity.id,
|
| 110 |
+
name=entity.text,
|
| 111 |
+
label=entity.label,
|
| 112 |
+
mentions=1,
|
| 113 |
+
pages={page},
|
| 114 |
+
chunks={chunk_index},
|
| 115 |
+
)
|
| 116 |
+
|
| 117 |
+
for left_index, left in enumerate(entities):
|
| 118 |
+
for right in entities[left_index + 1:]:
|
| 119 |
+
if graph.has_edge(left.id, right.id):
|
| 120 |
+
graph[left.id][right.id]["weight"] += 1
|
| 121 |
+
graph[left.id][right.id]["pages"].add(page)
|
| 122 |
+
graph[left.id][right.id]["chunks"].add(chunk_index)
|
| 123 |
+
else:
|
| 124 |
+
graph.add_edge(
|
| 125 |
+
left.id,
|
| 126 |
+
right.id,
|
| 127 |
+
weight=1,
|
| 128 |
+
pages={page},
|
| 129 |
+
chunks={chunk_index},
|
| 130 |
+
)
|
| 131 |
+
|
| 132 |
+
_convert_sets_for_json(graph)
|
| 133 |
+
return graph
|
| 134 |
+
|
| 135 |
+
|
| 136 |
+
def _convert_sets_for_json(graph: nx.Graph) -> None:
|
| 137 |
+
for _, data in graph.nodes(data=True):
|
| 138 |
+
data["pages"] = sorted(item for item in data.get("pages", []) if item is not None)
|
| 139 |
+
data["chunks"] = sorted(item for item in data.get("chunks", []) if item is not None)
|
| 140 |
+
|
| 141 |
+
for _, _, data in graph.edges(data=True):
|
| 142 |
+
data["pages"] = sorted(item for item in data.get("pages", []) if item is not None)
|
| 143 |
+
data["chunks"] = sorted(item for item in data.get("chunks", []) if item is not None)
|
| 144 |
+
|
| 145 |
+
|
| 146 |
+
def save_graph(graph: nx.Graph, user_id: str, document_id: str) -> Path:
|
| 147 |
+
"""Persist a graph to disk as node-link JSON."""
|
| 148 |
+
graph_path = get_graph_path(user_id, document_id)
|
| 149 |
+
graph_path.parent.mkdir(parents=True, exist_ok=True)
|
| 150 |
+
|
| 151 |
+
data = nx.node_link_data(graph)
|
| 152 |
+
data["metadata"] = {
|
| 153 |
+
"user_id": user_id,
|
| 154 |
+
"document_id": document_id,
|
| 155 |
+
"node_count": graph.number_of_nodes(),
|
| 156 |
+
"edge_count": graph.number_of_edges(),
|
| 157 |
+
}
|
| 158 |
+
|
| 159 |
+
graph_path.write_text(json.dumps(data, ensure_ascii=True, indent=2), encoding="utf-8")
|
| 160 |
+
logger.info(
|
| 161 |
+
"Saved knowledge graph for document %s with %s nodes and %s edges",
|
| 162 |
+
document_id,
|
| 163 |
+
graph.number_of_nodes(),
|
| 164 |
+
graph.number_of_edges(),
|
| 165 |
+
)
|
| 166 |
+
return graph_path
|
| 167 |
+
|
| 168 |
+
|
| 169 |
+
def load_graph(user_id: str, document_id: str) -> Optional[nx.Graph]:
|
| 170 |
+
"""Load a persisted graph for one user/document pair."""
|
| 171 |
+
return load_graph_path(get_graph_path(user_id, document_id))
|
| 172 |
+
|
| 173 |
+
|
| 174 |
+
def load_graph_path(graph_path: Path) -> Optional[nx.Graph]:
|
| 175 |
+
"""Load a graph from a concrete JSON path."""
|
| 176 |
+
if not graph_path.exists():
|
| 177 |
+
return None
|
| 178 |
+
|
| 179 |
+
data = json.loads(graph_path.read_text(encoding="utf-8"))
|
| 180 |
+
return nx.node_link_graph(data)
|
| 181 |
+
|
| 182 |
+
|
| 183 |
+
def delete_graph(user_id: str, document_id: str) -> None:
|
| 184 |
+
"""Delete a persisted graph file if it exists."""
|
| 185 |
+
get_graph_path(user_id, document_id).unlink(missing_ok=True)
|
backend/app/rag/graph_retriever.py
ADDED
|
@@ -0,0 +1,123 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Knowledge graph retrieval for augmenting RAG context.
|
| 3 |
+
"""
|
| 4 |
+
import logging
|
| 5 |
+
from typing import Dict, Iterable, List, Optional, Set, Tuple
|
| 6 |
+
|
| 7 |
+
import networkx as nx
|
| 8 |
+
|
| 9 |
+
from app.config import get_settings
|
| 10 |
+
from app.rag.graph_builder import (
|
| 11 |
+
extract_entities,
|
| 12 |
+
iter_graph_paths,
|
| 13 |
+
load_graph,
|
| 14 |
+
load_graph_path,
|
| 15 |
+
)
|
| 16 |
+
|
| 17 |
+
logger = logging.getLogger(__name__)
|
| 18 |
+
settings = get_settings()
|
| 19 |
+
|
| 20 |
+
|
| 21 |
+
def _candidate_graphs(user_id: str, document_id: Optional[str]) -> Iterable[nx.Graph]:
|
| 22 |
+
if document_id:
|
| 23 |
+
graph = load_graph(user_id, document_id)
|
| 24 |
+
return [graph] if graph is not None else []
|
| 25 |
+
|
| 26 |
+
graphs = []
|
| 27 |
+
for path in iter_graph_paths(user_id):
|
| 28 |
+
graph = load_graph_path(path)
|
| 29 |
+
if graph is not None:
|
| 30 |
+
graphs.append(graph)
|
| 31 |
+
return graphs
|
| 32 |
+
|
| 33 |
+
|
| 34 |
+
def _node_name(graph: nx.Graph, node_id: str) -> str:
|
| 35 |
+
return graph.nodes[node_id].get("name", node_id.split(":", 1)[-1])
|
| 36 |
+
|
| 37 |
+
|
| 38 |
+
def _match_query_nodes(graph: nx.Graph, query: str) -> Set[str]:
|
| 39 |
+
query_entities = extract_entities(query)
|
| 40 |
+
matched = {entity.id for entity in query_entities if graph.has_node(entity.id)}
|
| 41 |
+
|
| 42 |
+
if matched:
|
| 43 |
+
return matched
|
| 44 |
+
|
| 45 |
+
query_text = query.casefold()
|
| 46 |
+
for node_id, data in graph.nodes(data=True):
|
| 47 |
+
name = data.get("name", "").casefold()
|
| 48 |
+
if name and name in query_text:
|
| 49 |
+
matched.add(node_id)
|
| 50 |
+
|
| 51 |
+
return matched
|
| 52 |
+
|
| 53 |
+
|
| 54 |
+
def _format_pages(pages: List[int]) -> str:
|
| 55 |
+
if not pages:
|
| 56 |
+
return "unknown pages"
|
| 57 |
+
if len(pages) == 1:
|
| 58 |
+
return f"page {pages[0]}"
|
| 59 |
+
return "pages " + ", ".join(str(page) for page in pages[:4])
|
| 60 |
+
|
| 61 |
+
|
| 62 |
+
def _relationship_key(left: str, right: str) -> Tuple[str, str]:
|
| 63 |
+
return tuple(sorted((left, right)))
|
| 64 |
+
|
| 65 |
+
|
| 66 |
+
def get_entity_context(
|
| 67 |
+
query: str,
|
| 68 |
+
user_id: str,
|
| 69 |
+
document_id: Optional[str] = None,
|
| 70 |
+
) -> str:
|
| 71 |
+
"""Return compact graph relationship context relevant to the query."""
|
| 72 |
+
relationships: Dict[Tuple[str, str], Dict[str, object]] = {}
|
| 73 |
+
|
| 74 |
+
try:
|
| 75 |
+
graphs = _candidate_graphs(user_id=user_id, document_id=document_id)
|
| 76 |
+
for graph in graphs:
|
| 77 |
+
matched_nodes = _match_query_nodes(graph, query)
|
| 78 |
+
|
| 79 |
+
for node_id in matched_nodes:
|
| 80 |
+
neighbors = sorted(
|
| 81 |
+
graph.neighbors(node_id),
|
| 82 |
+
key=lambda neighbor: graph[node_id][neighbor].get("weight", 0),
|
| 83 |
+
reverse=True,
|
| 84 |
+
)
|
| 85 |
+
for neighbor_id in neighbors:
|
| 86 |
+
edge = graph[node_id][neighbor_id]
|
| 87 |
+
left = _node_name(graph, node_id)
|
| 88 |
+
right = _node_name(graph, neighbor_id)
|
| 89 |
+
key = _relationship_key(left.casefold(), right.casefold())
|
| 90 |
+
existing = relationships.setdefault(
|
| 91 |
+
key,
|
| 92 |
+
{
|
| 93 |
+
"left": left,
|
| 94 |
+
"right": right,
|
| 95 |
+
"weight": 0,
|
| 96 |
+
"pages": set(),
|
| 97 |
+
},
|
| 98 |
+
)
|
| 99 |
+
existing["weight"] = int(existing["weight"]) + int(edge.get("weight", 1))
|
| 100 |
+
existing["pages"].update(edge.get("pages", []))
|
| 101 |
+
except Exception as exc:
|
| 102 |
+
logger.warning("GraphRAG context retrieval failed: %s", exc)
|
| 103 |
+
return ""
|
| 104 |
+
|
| 105 |
+
if not relationships:
|
| 106 |
+
return ""
|
| 107 |
+
|
| 108 |
+
ranked = sorted(
|
| 109 |
+
relationships.values(),
|
| 110 |
+
key=lambda item: int(item["weight"]),
|
| 111 |
+
reverse=True,
|
| 112 |
+
)[: settings.GRAPH_MAX_RELATIONSHIPS]
|
| 113 |
+
|
| 114 |
+
lines = ["## Knowledge Graph Context"]
|
| 115 |
+
for item in ranked:
|
| 116 |
+
pages = sorted(item["pages"])
|
| 117 |
+
lines.append(
|
| 118 |
+
f"- {item['left']} is related to {item['right']} "
|
| 119 |
+
f"through document co-occurrence on {_format_pages(pages)} "
|
| 120 |
+
f"(strength: {item['weight']})."
|
| 121 |
+
)
|
| 122 |
+
|
| 123 |
+
return "\n".join(lines)
|
backend/app/rag/vectorstore.py
CHANGED
|
@@ -91,6 +91,9 @@ def store_chunks(
|
|
| 91 |
"document_id": document_id,
|
| 92 |
"page": chunk["page"],
|
| 93 |
"chunk_index": chunk["chunk_index"],
|
|
|
|
|
|
|
|
|
|
| 94 |
# Indicate whether this chunk was originally an image and include a short caption
|
| 95 |
**({"is_image": True, "image_caption": chunk.get("image_caption", "")}
|
| 96 |
if chunk.get("is_image") else {}),
|
|
@@ -169,6 +172,8 @@ def query_chunks(
|
|
| 169 |
"filename": metadata.get("filename", ""),
|
| 170 |
"document_id": metadata.get("document_id", ""),
|
| 171 |
"page": metadata.get("page", 1),
|
|
|
|
|
|
|
| 172 |
"score": round(similarity, 4),
|
| 173 |
})
|
| 174 |
|
|
|
|
| 91 |
"document_id": document_id,
|
| 92 |
"page": chunk["page"],
|
| 93 |
"chunk_index": chunk["chunk_index"],
|
| 94 |
+
"chunk_type": chunk.get("chunk_type", "text"),
|
| 95 |
+
**({"bbox": chunk.get("bbox", "")} if chunk.get("bbox") else {}),
|
| 96 |
+
**({"table_index": chunk.get("table_index", 0)} if chunk.get("chunk_type") == "table" else {}),
|
| 97 |
# Indicate whether this chunk was originally an image and include a short caption
|
| 98 |
**({"is_image": True, "image_caption": chunk.get("image_caption", "")}
|
| 99 |
if chunk.get("is_image") else {}),
|
|
|
|
| 172 |
"filename": metadata.get("filename", ""),
|
| 173 |
"document_id": metadata.get("document_id", ""),
|
| 174 |
"page": metadata.get("page", 1),
|
| 175 |
+
"chunk_type": metadata.get("chunk_type", "text"),
|
| 176 |
+
"bbox": metadata.get("bbox", ""),
|
| 177 |
"score": round(similarity, 4),
|
| 178 |
})
|
| 179 |
|
backend/app/routes/chat.py
CHANGED
|
@@ -7,20 +7,16 @@ import time
|
|
| 7 |
from datetime import datetime
|
| 8 |
from io import BytesIO
|
| 9 |
import logging
|
| 10 |
-
from typing import Optional
|
| 11 |
|
| 12 |
from fastapi import APIRouter, Depends, HTTPException, Request
|
| 13 |
from fastapi.responses import Response, StreamingResponse
|
| 14 |
-
from reportlab.lib.pagesizes import letter
|
| 15 |
-
from reportlab.lib.styles import ParagraphStyle, getSampleStyleSheet
|
| 16 |
-
from reportlab.lib.units import inch
|
| 17 |
-
from reportlab.platypus import Paragraph, SimpleDocTemplate, Spacer
|
| 18 |
from sqlalchemy.orm import Session
|
| 19 |
|
| 20 |
from app.auth import get_current_user
|
| 21 |
from app.database import get_db
|
| 22 |
from app.metrics import record_query_response_time
|
| 23 |
-
from app.models import User, ChatMessage, Document, SharedMessage
|
| 24 |
from app.rate_limit import limiter
|
| 25 |
from app.schemas import (
|
| 26 |
ChatRequest,
|
|
@@ -30,6 +26,8 @@ from app.schemas import (
|
|
| 30 |
ShareAnswerResponse,
|
| 31 |
ShareLinkResponse,
|
| 32 |
SourceChunk,
|
|
|
|
|
|
|
| 33 |
)
|
| 34 |
|
| 35 |
logger = logging.getLogger(__name__)
|
|
@@ -77,11 +75,139 @@ def create_share_link(
|
|
| 77 |
db.commit()
|
| 78 |
|
| 79 |
return ShareLinkResponse(
|
| 80 |
-
message_id=message.id,
|
| 81 |
share_url=f"/share?message_id={message.id}",
|
| 82 |
)
|
| 83 |
|
| 84 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 85 |
def generate_answer(question: str, user_id: str, document_id: Optional[str] = None, hf_token: Optional[str] = None):
|
| 86 |
from app.rag.agent import generate_answer as _generate_answer
|
| 87 |
|
|
@@ -102,33 +228,7 @@ def ask_question(
|
|
| 102 |
user: User = Depends(get_current_user),
|
| 103 |
db: Session = Depends(get_db),
|
| 104 |
):
|
| 105 |
-
"""Ask a question with RAG retrieval (non-streaming).
|
| 106 |
-
|
| 107 |
-
Processes a user's question by retrieving relevant document chunks,
|
| 108 |
-
generating an answer using an LLM, and saving the conversation to chat
|
| 109 |
-
history. If a `document_id` is provided, the retrieval is scoped to that
|
| 110 |
-
specific document; otherwise, it searches across all documents owned by
|
| 111 |
-
the user.
|
| 112 |
-
|
| 113 |
-
Args:
|
| 114 |
-
payload: ChatRequest containing the `question` text and optionally a
|
| 115 |
-
`document_id` to limit the retrieval scope.
|
| 116 |
-
user: The currently authenticated user, obtained from the dependency.
|
| 117 |
-
db: SQLAlchemy database session, obtained from the dependency.
|
| 118 |
-
|
| 119 |
-
Returns:
|
| 120 |
-
ChatResponse: An object containing:
|
| 121 |
-
- answer: The generated answer text.
|
| 122 |
-
- sources: A list of `SourceChunk` objects with metadata about
|
| 123 |
-
the retrieved chunks (e.g., filename, page number, text snippet).
|
| 124 |
-
- document_id: The document ID that was used (if any).
|
| 125 |
-
|
| 126 |
-
Raises:
|
| 127 |
-
HTTPException: 404 if the specified `document_id` does not exist or
|
| 128 |
-
does not belong to the authenticated user.
|
| 129 |
-
HTTPException: 400 if the document exists but its status is not
|
| 130 |
-
"ready" (e.g., still processing or failed).
|
| 131 |
-
"""
|
| 132 |
started_at = time.perf_counter()
|
| 133 |
try:
|
| 134 |
# Validate document exists if specified
|
|
@@ -147,6 +247,17 @@ def ask_question(
|
|
| 147 |
detail=f"Document is still {doc.status}. Please wait for processing to complete.",
|
| 148 |
)
|
| 149 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 150 |
result = generate_answer(
|
| 151 |
question=payload.question,
|
| 152 |
user_id=user.id,
|
|
@@ -155,8 +266,8 @@ def ask_question(
|
|
| 155 |
)
|
| 156 |
|
| 157 |
# Save to chat history
|
| 158 |
-
_save_message(db, user.id, payload.document_id, "user", payload.question)
|
| 159 |
-
_save_message(db, user.id, payload.document_id, "assistant", result["answer"], result["sources"])
|
| 160 |
|
| 161 |
return ChatResponse(
|
| 162 |
answer=result["answer"],
|
|
@@ -175,41 +286,7 @@ def ask_question_stream(
|
|
| 175 |
user: User = Depends(get_current_user),
|
| 176 |
db: Session = Depends(get_db),
|
| 177 |
):
|
| 178 |
-
"""Ask a question with Server-Sent Events (SSE) streaming response.
|
| 179 |
-
|
| 180 |
-
Processes a user's question using RAG and streams the answer token by
|
| 181 |
-
token over SSE. The user's question is saved to chat history immediately.
|
| 182 |
-
The assistant's answer is accumulated on the server and saved to history
|
| 183 |
-
only after the stream completes. If a `document_id` is provided, retrieval
|
| 184 |
-
is scoped to that document.
|
| 185 |
-
|
| 186 |
-
Args:
|
| 187 |
-
payload: ChatRequest containing the `question` text and optionally a
|
| 188 |
-
`document_id` to limit the retrieval scope.
|
| 189 |
-
user: The currently authenticated user, obtained from the dependency.
|
| 190 |
-
db: SQLAlchemy database session, obtained from the dependency.
|
| 191 |
-
|
| 192 |
-
Returns:
|
| 193 |
-
StreamingResponse: A FastAPI `StreamingResponse` with:
|
| 194 |
-
- media_type: "text/event-stream"
|
| 195 |
-
- Headers: Cache-Control, Connection, and X-Accel-Buffering set
|
| 196 |
-
for proper SSE behavior.
|
| 197 |
-
- Body: A generator yielding SSE messages with `token` (partial
|
| 198 |
-
answer) and `sources` (final source metadata) events.
|
| 199 |
-
|
| 200 |
-
Raises:
|
| 201 |
-
HTTPException: 404 if the specified `document_id` does not exist or
|
| 202 |
-
does not belong to the authenticated user.
|
| 203 |
-
HTTPException: 400 if the document exists but its status is not
|
| 204 |
-
"ready" (e.g., still processing or failed).
|
| 205 |
-
|
| 206 |
-
Note:
|
| 207 |
-
The streaming response uses a generator `event_stream` that yields
|
| 208 |
-
raw SSE chunks. The assistant's full answer is reconstructed from
|
| 209 |
-
the stream to save the complete conversation history. A separate
|
| 210 |
-
database session is created inside the generator to avoid using the
|
| 211 |
-
closed request session.
|
| 212 |
-
"""
|
| 213 |
# Validate document
|
| 214 |
if payload.document_id:
|
| 215 |
doc = db.query(Document).filter(
|
|
@@ -228,8 +305,19 @@ def ask_question_stream(
|
|
| 228 |
|
| 229 |
started_at = time.perf_counter()
|
| 230 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 231 |
# Save user message immediately
|
| 232 |
-
_save_message(db, user.id, payload.document_id, "user", payload.question)
|
| 233 |
|
| 234 |
# Stream response
|
| 235 |
def event_stream():
|
|
@@ -260,7 +348,7 @@ def ask_question_stream(
|
|
| 260 |
from app.database import SessionLocal
|
| 261 |
save_db = SessionLocal()
|
| 262 |
try:
|
| 263 |
-
_save_message(save_db, user.id, payload.document_id, "assistant", full_answer, sources)
|
| 264 |
finally:
|
| 265 |
save_db.close()
|
| 266 |
finally:
|
|
@@ -283,25 +371,7 @@ def get_chat_history(
|
|
| 283 |
user: User = Depends(get_current_user),
|
| 284 |
db: Session = Depends(get_db),
|
| 285 |
):
|
| 286 |
-
"""Retrieve the complete chat history for a specific document.
|
| 287 |
-
|
| 288 |
-
Fetches all messages (both user and assistant) associated with the given
|
| 289 |
-
document and the authenticated user, ordered chronologically from oldest
|
| 290 |
-
to newest. Assistant messages that contain source metadata will have the
|
| 291 |
-
`sources` field populated.
|
| 292 |
-
|
| 293 |
-
Args:
|
| 294 |
-
document_id: The unique identifier of the document whose chat history is requested.
|
| 295 |
-
user: The currently authenticated user, obtained from the dependency.
|
| 296 |
-
db: SQLAlchemy database session, obtained from the dependency.
|
| 297 |
-
|
| 298 |
-
Returns:
|
| 299 |
-
ChatHistoryResponse: An object containing:
|
| 300 |
-
- messages: A list of `ChatMessageResponse` objects, each with
|
| 301 |
-
`id`, `role` ("user" or "assistant"), `content`, `sources`
|
| 302 |
-
(list of `SourceChunk` for assistant messages), and `created_at`.
|
| 303 |
-
- document_id: The document ID that was queried.
|
| 304 |
-
"""
|
| 305 |
messages = (
|
| 306 |
db.query(ChatMessage)
|
| 307 |
.filter(
|
|
@@ -322,7 +392,7 @@ def get_chat_history(
|
|
| 322 |
pass
|
| 323 |
|
| 324 |
formatted.append(ChatMessageResponse(
|
| 325 |
-
id=msg.id,
|
| 326 |
role=msg.role,
|
| 327 |
content=msg.content,
|
| 328 |
sources=sources,
|
|
@@ -339,33 +409,7 @@ def export_chat_history(
|
|
| 339 |
token: Optional[str] = None,
|
| 340 |
db: Session = Depends(get_db),
|
| 341 |
):
|
| 342 |
-
"""Export the chat history for a document as a downloadable file.
|
| 343 |
-
|
| 344 |
-
Supports Markdown (.md), plain text (.txt), or PDF (.pdf) export. The function accepts
|
| 345 |
-
authentication via either the standard `Authorization: Bearer <token>`
|
| 346 |
-
header (handled by the dependency chain) or a `token` query parameter to
|
| 347 |
-
facilitate browser-initiated downloads that cannot set custom headers.
|
| 348 |
-
|
| 349 |
-
Args:
|
| 350 |
-
document_id: The unique identifier of the document whose chat history is to be exported.
|
| 351 |
-
format: Output format, either "md" (Markdown), "txt" (plain text), or "pdf". Defaults to "md".
|
| 352 |
-
token: Optional JWT token passed as a query parameter. Used for browser
|
| 353 |
-
downloads when the `Authorization` header is not available.
|
| 354 |
-
db: SQLAlchemy database session, obtained from the dependency.
|
| 355 |
-
|
| 356 |
-
Returns:
|
| 357 |
-
Response: A FastAPI `Response` object with:
|
| 358 |
-
- `content`: Formatted chat history as a string or PDF bytes.
|
| 359 |
-
- `media_type`: `text/markdown`, `text/plain`, or `application/pdf`.
|
| 360 |
-
- `headers`: `Content-Disposition` attachment header with a generated filename.
|
| 361 |
-
|
| 362 |
-
Raises:
|
| 363 |
-
HTTPException: 401 if neither the token query parameter nor a valid
|
| 364 |
-
bearer token provides an authenticated user.
|
| 365 |
-
HTTPException: 400 if the `format` parameter is not "md", "txt", or "pdf".
|
| 366 |
-
HTTPException: 404 if the document does not exist or does not belong
|
| 367 |
-
to the user, or if no chat messages are found for the document.
|
| 368 |
-
"""
|
| 369 |
from app.auth import decode_token as _decode
|
| 370 |
|
| 371 |
# Resolve user from query-param token (browser download links can't set headers)
|
|
@@ -412,6 +456,7 @@ def export_chat_history(
|
|
| 412 |
media_type = "text/plain"
|
| 413 |
extension = "txt"
|
| 414 |
else:
|
|
|
|
| 415 |
content = _format_pdf(doc, messages)
|
| 416 |
media_type = "application/pdf"
|
| 417 |
extension = "pdf"
|
|
@@ -434,20 +479,7 @@ def clear_chat_history(
|
|
| 434 |
user: User = Depends(get_current_user),
|
| 435 |
db: Session = Depends(get_db),
|
| 436 |
):
|
| 437 |
-
"""Delete all chat messages associated with a specific document.
|
| 438 |
-
|
| 439 |
-
Removes every chat message (both user and assistant) linked to the given
|
| 440 |
-
`document_id` and the authenticated user. The deletion is permanent and
|
| 441 |
-
cannot be undone.
|
| 442 |
-
|
| 443 |
-
Args:
|
| 444 |
-
document_id: The unique identifier of the document whose chat history should be cleared.
|
| 445 |
-
user: The currently authenticated user, obtained from the dependency.
|
| 446 |
-
db: SQLAlchemy database session, obtained from the dependency.
|
| 447 |
-
|
| 448 |
-
Returns:
|
| 449 |
-
dict: A simple JSON object with a `message` field confirming the deletion.
|
| 450 |
-
"""
|
| 451 |
db.query(ChatMessage).filter(
|
| 452 |
ChatMessage.user_id == user.id,
|
| 453 |
ChatMessage.document_id == document_id,
|
|
@@ -464,35 +496,22 @@ def _save_message(
|
|
| 464 |
role: str,
|
| 465 |
content: str,
|
| 466 |
sources: list = None,
|
|
|
|
| 467 |
):
|
| 468 |
-
"""Save a chat message to the database.
|
| 469 |
-
|
| 470 |
-
|
| 471 |
-
|
| 472 |
-
|
| 473 |
-
|
| 474 |
-
|
| 475 |
-
|
| 476 |
-
|
| 477 |
-
|
| 478 |
-
Can be `None` for global chat contexts.
|
| 479 |
-
db: SQLAlchemy database session (active, typically from a dependency).
|
| 480 |
-
role: The message sender role, e.g., "user" or "assistant".
|
| 481 |
-
content: The full text content of the message.
|
| 482 |
-
sources: Optional list of source dictionaries (usually from RAG
|
| 483 |
-
retrieval) to be stored as JSON. Defaults to `None`.
|
| 484 |
-
|
| 485 |
-
Returns:
|
| 486 |
-
None
|
| 487 |
-
|
| 488 |
-
Note:
|
| 489 |
-
The function commits the transaction. It does not close the session,
|
| 490 |
-
leaving that responsibility to the caller. If `sources` is provided,
|
| 491 |
-
it is serialized using `json.dumps()`.
|
| 492 |
-
"""
|
| 493 |
msg = ChatMessage(
|
| 494 |
user_id=user_id,
|
| 495 |
document_id=document_id,
|
|
|
|
| 496 |
role=role,
|
| 497 |
content=content,
|
| 498 |
sources_json=json.dumps(sources) if sources else None,
|
|
@@ -511,7 +530,7 @@ def _share_answer_response(message: ChatMessage) -> ShareAnswerResponse:
|
|
| 511 |
sources = []
|
| 512 |
|
| 513 |
return ShareAnswerResponse(
|
| 514 |
-
id=message.id,
|
| 515 |
content=message.content,
|
| 516 |
created_at=message.created_at,
|
| 517 |
sources=sources,
|
|
@@ -519,28 +538,12 @@ def _share_answer_response(message: ChatMessage) -> ShareAnswerResponse:
|
|
| 519 |
|
| 520 |
|
| 521 |
def _format_markdown(doc, messages) -> str:
|
| 522 |
-
"""Format chat history as a Markdown document.
|
| 523 |
-
|
| 524 |
-
Generates a Markdown string containing the document metadata and the
|
| 525 |
-
full conversation. User messages are labeled "You", assistant messages
|
| 526 |
-
are labeled "Assistant". For assistant responses, if source information
|
| 527 |
-
is available, it is rendered as a numbered list with filename, page,
|
| 528 |
-
confidence, and a text preview.
|
| 529 |
-
|
| 530 |
-
Args:
|
| 531 |
-
doc: The Document object (must have `original_name` attribute).
|
| 532 |
-
messages: List of ChatMessage objects, each with attributes:
|
| 533 |
-
`role` (str), `content` (str), `created_at` (datetime, optional),
|
| 534 |
-
and `sources_json` (str, JSON-encoded list of source dicts).
|
| 535 |
-
|
| 536 |
-
Returns:
|
| 537 |
-
str: A Markdown string ready for writing to a `.md` file.
|
| 538 |
-
"""
|
| 539 |
lines = [
|
| 540 |
f"# Chat History — {doc.original_name}",
|
| 541 |
"",
|
| 542 |
f"**Document:** {doc.original_name} ",
|
| 543 |
-
f"**Exported at:** {
|
| 544 |
f"**Total messages:** {len(messages)}",
|
| 545 |
"",
|
| 546 |
"---",
|
|
@@ -557,7 +560,6 @@ def _format_markdown(doc, messages) -> str:
|
|
| 557 |
lines.append(msg.content)
|
| 558 |
lines.append("")
|
| 559 |
|
| 560 |
-
# Include source citations for assistant messages
|
| 561 |
if msg.role == "assistant" and msg.sources_json:
|
| 562 |
try:
|
| 563 |
sources = json.loads(msg.sources_json)
|
|
@@ -583,26 +585,10 @@ def _format_markdown(doc, messages) -> str:
|
|
| 583 |
|
| 584 |
|
| 585 |
def _format_plaintext(doc, messages) -> str:
|
| 586 |
-
"""Format chat history as a plain text document.
|
| 587 |
-
|
| 588 |
-
Generates a plain text string containing the document metadata and the
|
| 589 |
-
full conversation. User messages are labeled "You", assistant messages
|
| 590 |
-
are labeled "Assistant". For assistant responses, if source information
|
| 591 |
-
is available, it is rendered as a numbered list with filename, page,
|
| 592 |
-
and confidence (text preview is omitted in plain text format).
|
| 593 |
-
|
| 594 |
-
Args:
|
| 595 |
-
doc: The Document object (must have `original_name` attribute).
|
| 596 |
-
messages: List of ChatMessage objects, each with attributes:
|
| 597 |
-
`role` (str), `content` (str), `created_at` (datetime, optional),
|
| 598 |
-
and `sources_json` (str, JSON‑encoded list of source dicts).
|
| 599 |
-
|
| 600 |
-
Returns:
|
| 601 |
-
str: A plain text string ready for writing to a `.txt` file.
|
| 602 |
-
"""
|
| 603 |
lines = [
|
| 604 |
f"Chat History — {doc.original_name}",
|
| 605 |
-
f"Exported at: {
|
| 606 |
f"Total messages: {len(messages)}",
|
| 607 |
"=" * 60,
|
| 608 |
"",
|
|
@@ -615,7 +601,6 @@ def _format_plaintext(doc, messages) -> str:
|
|
| 615 |
lines.append(f"[{role_label}] ({timestamp})")
|
| 616 |
lines.append(msg.content)
|
| 617 |
|
| 618 |
-
# Include source citations for assistant messages
|
| 619 |
if msg.role == "assistant" and msg.sources_json:
|
| 620 |
try:
|
| 621 |
sources = json.loads(msg.sources_json)
|
|
@@ -633,81 +618,3 @@ def _format_plaintext(doc, messages) -> str:
|
|
| 633 |
lines.append("")
|
| 634 |
|
| 635 |
return "\n".join(lines)
|
| 636 |
-
|
| 637 |
-
|
| 638 |
-
def _format_pdf(doc, messages) -> bytes:
|
| 639 |
-
"""Format chat history as a PDF document."""
|
| 640 |
-
buffer = BytesIO()
|
| 641 |
-
pdf = SimpleDocTemplate(
|
| 642 |
-
buffer,
|
| 643 |
-
pagesize=letter,
|
| 644 |
-
leftMargin=0.75 * inch,
|
| 645 |
-
rightMargin=0.75 * inch,
|
| 646 |
-
topMargin=0.75 * inch,
|
| 647 |
-
bottomMargin=0.75 * inch,
|
| 648 |
-
)
|
| 649 |
-
|
| 650 |
-
styles = getSampleStyleSheet()
|
| 651 |
-
metadata_style = styles["Normal"]
|
| 652 |
-
metadata_style.spaceAfter = 6
|
| 653 |
-
content_style = ParagraphStyle(
|
| 654 |
-
"ChatContent",
|
| 655 |
-
parent=styles["BodyText"],
|
| 656 |
-
leading=14,
|
| 657 |
-
spaceAfter=10,
|
| 658 |
-
)
|
| 659 |
-
source_style = ParagraphStyle(
|
| 660 |
-
"ChatSource",
|
| 661 |
-
parent=styles["BodyText"],
|
| 662 |
-
leftIndent=14,
|
| 663 |
-
leading=12,
|
| 664 |
-
spaceAfter=4,
|
| 665 |
-
)
|
| 666 |
-
|
| 667 |
-
story = [
|
| 668 |
-
Paragraph(f"Chat History - {html.escape(doc.original_name)}", styles["Title"]),
|
| 669 |
-
Spacer(1, 0.15 * inch),
|
| 670 |
-
Paragraph(f"Document: {html.escape(doc.original_name)}", metadata_style),
|
| 671 |
-
Paragraph(f"Exported at: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}", metadata_style),
|
| 672 |
-
Paragraph(f"Total messages: {len(messages)}", metadata_style),
|
| 673 |
-
Spacer(1, 0.2 * inch),
|
| 674 |
-
]
|
| 675 |
-
|
| 676 |
-
for msg in messages:
|
| 677 |
-
timestamp = msg.created_at.strftime("%Y-%m-%d %H:%M:%S") if msg.created_at else ""
|
| 678 |
-
role_label = "You" if msg.role == "user" else "Assistant"
|
| 679 |
-
|
| 680 |
-
story.append(Paragraph(f"<b>{html.escape(role_label)}</b>", styles["Heading3"]))
|
| 681 |
-
story.append(Paragraph(html.escape(timestamp), styles["Italic"]))
|
| 682 |
-
story.append(Paragraph(_pdf_text(msg.content), content_style))
|
| 683 |
-
|
| 684 |
-
if msg.role == "assistant" and msg.sources_json:
|
| 685 |
-
try:
|
| 686 |
-
sources = json.loads(msg.sources_json)
|
| 687 |
-
if sources:
|
| 688 |
-
story.append(Paragraph("<b>Sources:</b>", metadata_style))
|
| 689 |
-
for i, src in enumerate(sources, 1):
|
| 690 |
-
filename = html.escape(str(src.get("filename", "Unknown")))
|
| 691 |
-
page = html.escape(str(src.get("page", "?")))
|
| 692 |
-
confidence = html.escape(str(src.get("confidence", 0)))
|
| 693 |
-
story.append(
|
| 694 |
-
Paragraph(
|
| 695 |
-
f"[{i}] {filename}, Page {page} (Confidence: {confidence}%)",
|
| 696 |
-
source_style,
|
| 697 |
-
)
|
| 698 |
-
)
|
| 699 |
-
text_preview = str(src.get("text", "")).strip()
|
| 700 |
-
if text_preview:
|
| 701 |
-
story.append(Paragraph(_pdf_text(text_preview), source_style))
|
| 702 |
-
except Exception:
|
| 703 |
-
pass
|
| 704 |
-
|
| 705 |
-
story.append(Spacer(1, 0.15 * inch))
|
| 706 |
-
|
| 707 |
-
pdf.build(story)
|
| 708 |
-
return buffer.getvalue()
|
| 709 |
-
|
| 710 |
-
|
| 711 |
-
def _pdf_text(text: str) -> str:
|
| 712 |
-
"""Escape text for ReportLab paragraphs while preserving line breaks."""
|
| 713 |
-
return html.escape(text or "").replace("\n", "<br/>")
|
|
|
|
| 7 |
from datetime import datetime
|
| 8 |
from io import BytesIO
|
| 9 |
import logging
|
| 10 |
+
from typing import Optional, List
|
| 11 |
|
| 12 |
from fastapi import APIRouter, Depends, HTTPException, Request
|
| 13 |
from fastapi.responses import Response, StreamingResponse
|
|
|
|
|
|
|
|
|
|
|
|
|
| 14 |
from sqlalchemy.orm import Session
|
| 15 |
|
| 16 |
from app.auth import get_current_user
|
| 17 |
from app.database import get_db
|
| 18 |
from app.metrics import record_query_response_time
|
| 19 |
+
from app.models import User, ChatMessage, Document, SharedMessage, ChatSession
|
| 20 |
from app.rate_limit import limiter
|
| 21 |
from app.schemas import (
|
| 22 |
ChatRequest,
|
|
|
|
| 26 |
ShareAnswerResponse,
|
| 27 |
ShareLinkResponse,
|
| 28 |
SourceChunk,
|
| 29 |
+
ChatSessionCreate,
|
| 30 |
+
ChatSessionResponse,
|
| 31 |
)
|
| 32 |
|
| 33 |
logger = logging.getLogger(__name__)
|
|
|
|
| 75 |
db.commit()
|
| 76 |
|
| 77 |
return ShareLinkResponse(
|
| 78 |
+
message_id=str(message.id),
|
| 79 |
share_url=f"/share?message_id={message.id}",
|
| 80 |
)
|
| 81 |
|
| 82 |
|
| 83 |
+
@router.get("/sessions", response_model=List[ChatSessionResponse])
|
| 84 |
+
def get_chat_sessions(
|
| 85 |
+
user: User = Depends(get_current_user),
|
| 86 |
+
db: Session = Depends(get_db),
|
| 87 |
+
):
|
| 88 |
+
"""Retrieve all chat sessions for the authenticated user."""
|
| 89 |
+
sessions = (
|
| 90 |
+
db.query(ChatSession)
|
| 91 |
+
.filter(ChatSession.user_id == user.id)
|
| 92 |
+
.order_by(ChatSession.created_at.desc())
|
| 93 |
+
.all()
|
| 94 |
+
)
|
| 95 |
+
return sessions
|
| 96 |
+
|
| 97 |
+
|
| 98 |
+
@router.post("/sessions", response_model=ChatSessionResponse, status_code=201)
|
| 99 |
+
def create_chat_session(
|
| 100 |
+
payload: ChatSessionCreate,
|
| 101 |
+
user: User = Depends(get_current_user),
|
| 102 |
+
db: Session = Depends(get_db),
|
| 103 |
+
):
|
| 104 |
+
"""Create a new chat session."""
|
| 105 |
+
session = ChatSession(
|
| 106 |
+
user_id=user.id,
|
| 107 |
+
title=payload.title,
|
| 108 |
+
)
|
| 109 |
+
db.add(session)
|
| 110 |
+
db.commit()
|
| 111 |
+
db.refresh(session)
|
| 112 |
+
return session
|
| 113 |
+
|
| 114 |
+
|
| 115 |
+
@router.put("/sessions/{session_id}", response_model=ChatSessionResponse)
|
| 116 |
+
def rename_chat_session(
|
| 117 |
+
session_id: str,
|
| 118 |
+
payload: ChatSessionCreate,
|
| 119 |
+
user: User = Depends(get_current_user),
|
| 120 |
+
db: Session = Depends(get_db),
|
| 121 |
+
):
|
| 122 |
+
"""Rename an existing chat session."""
|
| 123 |
+
session = (
|
| 124 |
+
db.query(ChatSession)
|
| 125 |
+
.filter(
|
| 126 |
+
ChatSession.id == session_id,
|
| 127 |
+
ChatSession.user_id == user.id,
|
| 128 |
+
)
|
| 129 |
+
.first()
|
| 130 |
+
)
|
| 131 |
+
if not session:
|
| 132 |
+
raise HTTPException(status_code=404, detail="Chat session not found")
|
| 133 |
+
session.title = payload.title
|
| 134 |
+
db.commit()
|
| 135 |
+
db.refresh(session)
|
| 136 |
+
return session
|
| 137 |
+
|
| 138 |
+
|
| 139 |
+
@router.delete("/sessions/{session_id}")
|
| 140 |
+
def delete_chat_session(
|
| 141 |
+
session_id: str,
|
| 142 |
+
user: User = Depends(get_current_user),
|
| 143 |
+
db: Session = Depends(get_db),
|
| 144 |
+
):
|
| 145 |
+
"""Delete a chat session and all its messages."""
|
| 146 |
+
session = (
|
| 147 |
+
db.query(ChatSession)
|
| 148 |
+
.filter(
|
| 149 |
+
ChatSession.id == session_id,
|
| 150 |
+
ChatSession.user_id == user.id,
|
| 151 |
+
)
|
| 152 |
+
.first()
|
| 153 |
+
)
|
| 154 |
+
if not session:
|
| 155 |
+
raise HTTPException(status_code=404, detail="Chat session not found")
|
| 156 |
+
db.delete(session)
|
| 157 |
+
db.commit()
|
| 158 |
+
return Response(status_code=204)
|
| 159 |
+
|
| 160 |
+
|
| 161 |
+
@router.get("/history/session/{session_id}", response_model=ChatHistoryResponse)
|
| 162 |
+
def get_session_history(
|
| 163 |
+
session_id: str,
|
| 164 |
+
user: User = Depends(get_current_user),
|
| 165 |
+
db: Session = Depends(get_db),
|
| 166 |
+
):
|
| 167 |
+
"""Retrieve chat history for a specific chat session."""
|
| 168 |
+
session = (
|
| 169 |
+
db.query(ChatSession)
|
| 170 |
+
.filter(
|
| 171 |
+
ChatSession.id == session_id,
|
| 172 |
+
ChatSession.user_id == user.id,
|
| 173 |
+
)
|
| 174 |
+
.first()
|
| 175 |
+
)
|
| 176 |
+
if not session:
|
| 177 |
+
raise HTTPException(status_code=404, detail="Chat session not found")
|
| 178 |
+
|
| 179 |
+
messages = (
|
| 180 |
+
db.query(ChatMessage)
|
| 181 |
+
.filter(
|
| 182 |
+
ChatMessage.session_id == session_id,
|
| 183 |
+
ChatMessage.user_id == user.id,
|
| 184 |
+
)
|
| 185 |
+
.order_by(ChatMessage.created_at.asc())
|
| 186 |
+
.all()
|
| 187 |
+
)
|
| 188 |
+
|
| 189 |
+
formatted = []
|
| 190 |
+
for msg in messages:
|
| 191 |
+
sources = []
|
| 192 |
+
if msg.sources_json:
|
| 193 |
+
try:
|
| 194 |
+
sources = [SourceChunk(**s) for s in json.loads(msg.sources_json)]
|
| 195 |
+
except Exception:
|
| 196 |
+
pass
|
| 197 |
+
|
| 198 |
+
formatted.append(
|
| 199 |
+
ChatMessageResponse(
|
| 200 |
+
id=str(msg.id),
|
| 201 |
+
role=msg.role,
|
| 202 |
+
content=msg.content,
|
| 203 |
+
sources=sources,
|
| 204 |
+
created_at=msg.created_at,
|
| 205 |
+
)
|
| 206 |
+
)
|
| 207 |
+
|
| 208 |
+
return ChatHistoryResponse(messages=formatted, document_id=None)
|
| 209 |
+
|
| 210 |
+
|
| 211 |
def generate_answer(question: str, user_id: str, document_id: Optional[str] = None, hf_token: Optional[str] = None):
|
| 212 |
from app.rag.agent import generate_answer as _generate_answer
|
| 213 |
|
|
|
|
| 228 |
user: User = Depends(get_current_user),
|
| 229 |
db: Session = Depends(get_db),
|
| 230 |
):
|
| 231 |
+
"""Ask a question with RAG retrieval (non-streaming)."""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 232 |
started_at = time.perf_counter()
|
| 233 |
try:
|
| 234 |
# Validate document exists if specified
|
|
|
|
| 247 |
detail=f"Document is still {doc.status}. Please wait for processing to complete.",
|
| 248 |
)
|
| 249 |
|
| 250 |
+
# Resolve or create session
|
| 251 |
+
session_id = payload.session_id
|
| 252 |
+
if not session_id:
|
| 253 |
+
session = db.query(ChatSession).filter(ChatSession.user_id == user.id).first()
|
| 254 |
+
if not session:
|
| 255 |
+
session = ChatSession(user_id=user.id, title="Default Chat")
|
| 256 |
+
db.add(session)
|
| 257 |
+
db.commit()
|
| 258 |
+
db.refresh(session)
|
| 259 |
+
session_id = session.id
|
| 260 |
+
|
| 261 |
result = generate_answer(
|
| 262 |
question=payload.question,
|
| 263 |
user_id=user.id,
|
|
|
|
| 266 |
)
|
| 267 |
|
| 268 |
# Save to chat history
|
| 269 |
+
_save_message(db, user.id, payload.document_id, "user", payload.question, session_id=session_id)
|
| 270 |
+
_save_message(db, user.id, payload.document_id, "assistant", result["answer"], result["sources"], session_id=session_id)
|
| 271 |
|
| 272 |
return ChatResponse(
|
| 273 |
answer=result["answer"],
|
|
|
|
| 286 |
user: User = Depends(get_current_user),
|
| 287 |
db: Session = Depends(get_db),
|
| 288 |
):
|
| 289 |
+
"""Ask a question with Server-Sent Events (SSE) streaming response."""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 290 |
# Validate document
|
| 291 |
if payload.document_id:
|
| 292 |
doc = db.query(Document).filter(
|
|
|
|
| 305 |
|
| 306 |
started_at = time.perf_counter()
|
| 307 |
|
| 308 |
+
# Resolve or create session
|
| 309 |
+
session_id = payload.session_id
|
| 310 |
+
if not session_id:
|
| 311 |
+
session = db.query(ChatSession).filter(ChatSession.user_id == user.id).first()
|
| 312 |
+
if not session:
|
| 313 |
+
session = ChatSession(user_id=user.id, title="Default Chat")
|
| 314 |
+
db.add(session)
|
| 315 |
+
db.commit()
|
| 316 |
+
db.refresh(session)
|
| 317 |
+
session_id = session.id
|
| 318 |
+
|
| 319 |
# Save user message immediately
|
| 320 |
+
_save_message(db, user.id, payload.document_id, "user", payload.question, session_id=session_id)
|
| 321 |
|
| 322 |
# Stream response
|
| 323 |
def event_stream():
|
|
|
|
| 348 |
from app.database import SessionLocal
|
| 349 |
save_db = SessionLocal()
|
| 350 |
try:
|
| 351 |
+
_save_message(save_db, user.id, payload.document_id, "assistant", full_answer, sources, session_id=session_id)
|
| 352 |
finally:
|
| 353 |
save_db.close()
|
| 354 |
finally:
|
|
|
|
| 371 |
user: User = Depends(get_current_user),
|
| 372 |
db: Session = Depends(get_db),
|
| 373 |
):
|
| 374 |
+
"""Retrieve the complete chat history for a specific document."""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 375 |
messages = (
|
| 376 |
db.query(ChatMessage)
|
| 377 |
.filter(
|
|
|
|
| 392 |
pass
|
| 393 |
|
| 394 |
formatted.append(ChatMessageResponse(
|
| 395 |
+
id=str(msg.id),
|
| 396 |
role=msg.role,
|
| 397 |
content=msg.content,
|
| 398 |
sources=sources,
|
|
|
|
| 409 |
token: Optional[str] = None,
|
| 410 |
db: Session = Depends(get_db),
|
| 411 |
):
|
| 412 |
+
"""Export the chat history for a document as a downloadable file."""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 413 |
from app.auth import decode_token as _decode
|
| 414 |
|
| 415 |
# Resolve user from query-param token (browser download links can't set headers)
|
|
|
|
| 456 |
media_type = "text/plain"
|
| 457 |
extension = "txt"
|
| 458 |
else:
|
| 459 |
+
from app.routes.chat_export import format_pdf as _format_pdf
|
| 460 |
content = _format_pdf(doc, messages)
|
| 461 |
media_type = "application/pdf"
|
| 462 |
extension = "pdf"
|
|
|
|
| 479 |
user: User = Depends(get_current_user),
|
| 480 |
db: Session = Depends(get_db),
|
| 481 |
):
|
| 482 |
+
"""Delete all chat messages associated with a specific document."""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 483 |
db.query(ChatMessage).filter(
|
| 484 |
ChatMessage.user_id == user.id,
|
| 485 |
ChatMessage.document_id == document_id,
|
|
|
|
| 496 |
role: str,
|
| 497 |
content: str,
|
| 498 |
sources: list = None,
|
| 499 |
+
session_id: Optional[str] = None,
|
| 500 |
):
|
| 501 |
+
"""Save a chat message to the database."""
|
| 502 |
+
if not session_id:
|
| 503 |
+
session = db.query(ChatSession).filter(ChatSession.user_id == user_id).first()
|
| 504 |
+
if not session:
|
| 505 |
+
session = ChatSession(user_id=user_id, title="Default Chat")
|
| 506 |
+
db.add(session)
|
| 507 |
+
db.commit()
|
| 508 |
+
db.refresh(session)
|
| 509 |
+
session_id = session.id
|
| 510 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 511 |
msg = ChatMessage(
|
| 512 |
user_id=user_id,
|
| 513 |
document_id=document_id,
|
| 514 |
+
session_id=session_id,
|
| 515 |
role=role,
|
| 516 |
content=content,
|
| 517 |
sources_json=json.dumps(sources) if sources else None,
|
|
|
|
| 530 |
sources = []
|
| 531 |
|
| 532 |
return ShareAnswerResponse(
|
| 533 |
+
id=str(message.id),
|
| 534 |
content=message.content,
|
| 535 |
created_at=message.created_at,
|
| 536 |
sources=sources,
|
|
|
|
| 538 |
|
| 539 |
|
| 540 |
def _format_markdown(doc, messages) -> str:
|
| 541 |
+
"""Format chat history as a Markdown document."""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 542 |
lines = [
|
| 543 |
f"# Chat History — {doc.original_name}",
|
| 544 |
"",
|
| 545 |
f"**Document:** {doc.original_name} ",
|
| 546 |
+
f"**Exported at:** {datetime.now().strftime('%Y-%m-%d %H:%M:%S')} ",
|
| 547 |
f"**Total messages:** {len(messages)}",
|
| 548 |
"",
|
| 549 |
"---",
|
|
|
|
| 560 |
lines.append(msg.content)
|
| 561 |
lines.append("")
|
| 562 |
|
|
|
|
| 563 |
if msg.role == "assistant" and msg.sources_json:
|
| 564 |
try:
|
| 565 |
sources = json.loads(msg.sources_json)
|
|
|
|
| 585 |
|
| 586 |
|
| 587 |
def _format_plaintext(doc, messages) -> str:
|
| 588 |
+
"""Format chat history as a plain text document."""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 589 |
lines = [
|
| 590 |
f"Chat History — {doc.original_name}",
|
| 591 |
+
f"Exported at: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}",
|
| 592 |
f"Total messages: {len(messages)}",
|
| 593 |
"=" * 60,
|
| 594 |
"",
|
|
|
|
| 601 |
lines.append(f"[{role_label}] ({timestamp})")
|
| 602 |
lines.append(msg.content)
|
| 603 |
|
|
|
|
| 604 |
if msg.role == "assistant" and msg.sources_json:
|
| 605 |
try:
|
| 606 |
sources = json.loads(msg.sources_json)
|
|
|
|
| 618 |
lines.append("")
|
| 619 |
|
| 620 |
return "\n".join(lines)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
backend/app/routes/documents.py
CHANGED
|
@@ -172,6 +172,15 @@ def _ingest_document(document_id: str, filepath: str, original_name: str, user_i
|
|
| 172 |
db.commit()
|
| 173 |
return
|
| 174 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 175 |
# Store embeddings in ChromaDB
|
| 176 |
chunk_count = store_chunks(
|
| 177 |
chunks=chunks,
|
|
@@ -629,6 +638,14 @@ def delete_document(
|
|
| 629 |
except Exception as e:
|
| 630 |
logger.warning(f"Error deleting vectors: {e}")
|
| 631 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 632 |
# Delete from database (cascades to chat messages)
|
| 633 |
db.delete(doc)
|
| 634 |
db.commit()
|
|
|
|
| 172 |
db.commit()
|
| 173 |
return
|
| 174 |
|
| 175 |
+
# Build and persist a lightweight entity co-occurrence graph for GraphRAG.
|
| 176 |
+
try:
|
| 177 |
+
from app.rag.graph_builder import build_graph, save_graph
|
| 178 |
+
|
| 179 |
+
graph = build_graph(chunks)
|
| 180 |
+
save_graph(graph, user_id=user_id, document_id=document_id)
|
| 181 |
+
except Exception as e:
|
| 182 |
+
logger.warning(f"Could not build knowledge graph for document {document_id}: {e}")
|
| 183 |
+
|
| 184 |
# Store embeddings in ChromaDB
|
| 185 |
chunk_count = store_chunks(
|
| 186 |
chunks=chunks,
|
|
|
|
| 638 |
except Exception as e:
|
| 639 |
logger.warning(f"Error deleting vectors: {e}")
|
| 640 |
|
| 641 |
+
# Delete persisted knowledge graph
|
| 642 |
+
try:
|
| 643 |
+
from app.rag.graph_builder import delete_graph
|
| 644 |
+
|
| 645 |
+
delete_graph(user_id=user.id, document_id=document_id)
|
| 646 |
+
except Exception as e:
|
| 647 |
+
logger.warning(f"Error deleting knowledge graph: {e}")
|
| 648 |
+
|
| 649 |
# Delete from database (cascades to chat messages)
|
| 650 |
db.delete(doc)
|
| 651 |
db.commit()
|
backend/app/schemas.py
CHANGED
|
@@ -146,6 +146,7 @@ class AdminStatsResponse(BaseModel):
|
|
| 146 |
class ChatRequest(BaseModel):
|
| 147 |
question: str = Field(..., min_length=1, max_length=2000)
|
| 148 |
document_id: Optional[str] = None
|
|
|
|
| 149 |
|
| 150 |
|
| 151 |
class SourceChunk(BaseModel):
|
|
@@ -192,5 +193,20 @@ class ShareLinkResponse(BaseModel):
|
|
| 192 |
share_url: str
|
| 193 |
|
| 194 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 195 |
# Rebuild models for forward references
|
| 196 |
TokenResponse.model_rebuild()
|
|
|
|
| 146 |
class ChatRequest(BaseModel):
|
| 147 |
question: str = Field(..., min_length=1, max_length=2000)
|
| 148 |
document_id: Optional[str] = None
|
| 149 |
+
session_id: Optional[str] = None
|
| 150 |
|
| 151 |
|
| 152 |
class SourceChunk(BaseModel):
|
|
|
|
| 193 |
share_url: str
|
| 194 |
|
| 195 |
|
| 196 |
+
# ── Chat Session ──────────────────────────────────────
|
| 197 |
+
|
| 198 |
+
class ChatSessionCreate(BaseModel):
|
| 199 |
+
title: str = Field(..., min_length=1, max_length=255)
|
| 200 |
+
|
| 201 |
+
|
| 202 |
+
class ChatSessionResponse(BaseModel):
|
| 203 |
+
id: str
|
| 204 |
+
title: str
|
| 205 |
+
created_at: datetime
|
| 206 |
+
|
| 207 |
+
class Config:
|
| 208 |
+
from_attributes = True
|
| 209 |
+
|
| 210 |
+
|
| 211 |
# Rebuild models for forward references
|
| 212 |
TokenResponse.model_rebuild()
|
backend/requirements.txt
CHANGED
|
@@ -25,6 +25,7 @@ httpx
|
|
| 25 |
|
| 26 |
# Document Processing
|
| 27 |
PyMuPDF
|
|
|
|
| 28 |
python-docx
|
| 29 |
|
| 30 |
# LangChain & RAG
|
|
@@ -42,6 +43,9 @@ transformers
|
|
| 42 |
|
| 43 |
# Vector Database
|
| 44 |
chromadb
|
|
|
|
|
|
|
|
|
|
| 45 |
|
| 46 |
# LLM Inference
|
| 47 |
huggingface-hub
|
|
|
|
| 25 |
|
| 26 |
# Document Processing
|
| 27 |
PyMuPDF
|
| 28 |
+
pdfplumber
|
| 29 |
python-docx
|
| 30 |
|
| 31 |
# LangChain & RAG
|
|
|
|
| 43 |
|
| 44 |
# Vector Database
|
| 45 |
chromadb
|
| 46 |
+
networkx>=3.3
|
| 47 |
+
spacy>=3.7
|
| 48 |
+
neo4j>=5.0
|
| 49 |
|
| 50 |
# LLM Inference
|
| 51 |
huggingface-hub
|
backend/tests/test_chunker.py
CHANGED
|
@@ -1,7 +1,10 @@
|
|
| 1 |
from pathlib import Path
|
|
|
|
|
|
|
| 2 |
|
| 3 |
import pytest
|
| 4 |
|
|
|
|
| 5 |
from app.rag.chunker import chunk_document, get_page_count
|
| 6 |
|
| 7 |
|
|
@@ -36,3 +39,49 @@ def test_get_page_count_for_txt_returns_one(tmp_path):
|
|
| 36 |
file_path.write_text("hello", encoding="utf-8")
|
| 37 |
|
| 38 |
assert get_page_count(str(file_path)) == 1
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
from pathlib import Path
|
| 2 |
+
import sys
|
| 3 |
+
import types
|
| 4 |
|
| 5 |
import pytest
|
| 6 |
|
| 7 |
+
from app.rag import chunker
|
| 8 |
from app.rag.chunker import chunk_document, get_page_count
|
| 9 |
|
| 10 |
|
|
|
|
| 39 |
file_path.write_text("hello", encoding="utf-8")
|
| 40 |
|
| 41 |
assert get_page_count(str(file_path)) == 1
|
| 42 |
+
|
| 43 |
+
|
| 44 |
+
def test_pdf_table_detection_separates_table_from_paragraph(monkeypatch):
|
| 45 |
+
class FakeTable:
|
| 46 |
+
bbox = (40, 90, 300, 160)
|
| 47 |
+
|
| 48 |
+
def extract(self):
|
| 49 |
+
return [["Name", "Amount"], ["Alpha", "$10"]]
|
| 50 |
+
|
| 51 |
+
class FakePage:
|
| 52 |
+
def find_tables(self):
|
| 53 |
+
return [FakeTable()]
|
| 54 |
+
|
| 55 |
+
def extract_words(self):
|
| 56 |
+
return [
|
| 57 |
+
{"text": "Intro", "x0": 40, "x1": 70, "top": 20, "bottom": 30},
|
| 58 |
+
{"text": "paragraph", "x0": 75, "x1": 140, "top": 20, "bottom": 30},
|
| 59 |
+
{"text": "Name", "x0": 45, "x1": 80, "top": 100, "bottom": 110},
|
| 60 |
+
{"text": "Amount", "x0": 160, "x1": 220, "top": 100, "bottom": 110},
|
| 61 |
+
{"text": "Alpha", "x0": 45, "x1": 85, "top": 125, "bottom": 135},
|
| 62 |
+
{"text": "$10", "x0": 160, "x1": 185, "top": 125, "bottom": 135},
|
| 63 |
+
]
|
| 64 |
+
|
| 65 |
+
class FakePdf:
|
| 66 |
+
pages = [FakePage()]
|
| 67 |
+
|
| 68 |
+
def __enter__(self):
|
| 69 |
+
return self
|
| 70 |
+
|
| 71 |
+
def __exit__(self, exc_type, exc, traceback):
|
| 72 |
+
return False
|
| 73 |
+
|
| 74 |
+
fake_pdfplumber = types.SimpleNamespace(open=lambda _filepath: FakePdf())
|
| 75 |
+
monkeypatch.setitem(sys.modules, "pdfplumber", fake_pdfplumber)
|
| 76 |
+
monkeypatch.setattr(chunker, "extract_pdf_images", lambda _filepath: [])
|
| 77 |
+
|
| 78 |
+
chunks = chunk_document("report.pdf")
|
| 79 |
+
|
| 80 |
+
assert len(chunks) == 2
|
| 81 |
+
assert chunks[0]["chunk_type"] == "text"
|
| 82 |
+
assert chunks[0]["text"] == "Intro paragraph"
|
| 83 |
+
assert "Name" not in chunks[0]["text"]
|
| 84 |
+
assert chunks[1]["chunk_type"] == "table"
|
| 85 |
+
assert chunks[1]["bbox"] == "[40.0, 90.0, 300.0, 160.0]"
|
| 86 |
+
assert "| Name | Amount |" in chunks[1]["text"]
|
| 87 |
+
assert "| Alpha | $10 |" in chunks[1]["text"]
|
backend/tests/test_documents.py
CHANGED
|
@@ -1,3 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
def test_api_health(client):
|
| 2 |
response = client.get("/api/health")
|
| 3 |
|
|
@@ -32,3 +38,76 @@ def test_upload_rejects_unsupported_extension_before_deep_validation(client, aut
|
|
| 32 |
|
| 33 |
assert response.status_code == 400
|
| 34 |
assert "not supported" in response.json()["detail"]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import types
|
| 2 |
+
|
| 3 |
+
from app.models import Document
|
| 4 |
+
from app.routes.documents import _ingest_document
|
| 5 |
+
|
| 6 |
+
|
| 7 |
def test_api_health(client):
|
| 8 |
response = client.get("/api/health")
|
| 9 |
|
|
|
|
| 38 |
|
| 39 |
assert response.status_code == 400
|
| 40 |
assert "not supported" in response.json()["detail"]
|
| 41 |
+
|
| 42 |
+
|
| 43 |
+
def test_ingest_document_builds_and_saves_graph(db_session, monkeypatch, tmp_path, user):
|
| 44 |
+
document = Document(
|
| 45 |
+
user_id=user.id,
|
| 46 |
+
filename="graph.txt",
|
| 47 |
+
original_name="graph.txt",
|
| 48 |
+
file_size=128,
|
| 49 |
+
status="pending",
|
| 50 |
+
)
|
| 51 |
+
db_session.add(document)
|
| 52 |
+
db_session.commit()
|
| 53 |
+
db_session.refresh(document)
|
| 54 |
+
user_id = user.id
|
| 55 |
+
document_id = document.id
|
| 56 |
+
chunks = [{"text": "OpenAI works with Microsoft.", "page": 1, "chunk_index": 0}]
|
| 57 |
+
saved = {}
|
| 58 |
+
|
| 59 |
+
monkeypatch.setattr("app.routes.documents.get_page_count", lambda filepath: 1)
|
| 60 |
+
monkeypatch.setattr("app.routes.documents.chunk_document", lambda filepath: chunks)
|
| 61 |
+
monkeypatch.setattr("app.routes.documents.store_chunks", lambda **kwargs: len(chunks))
|
| 62 |
+
monkeypatch.setattr("app.database.SessionLocal", lambda: db_session)
|
| 63 |
+
|
| 64 |
+
fake_summary = types.ModuleType("app.rag.summarizer")
|
| 65 |
+
fake_summary.generate_document_summary = lambda filepath, max_sentences=2: "Summary"
|
| 66 |
+
monkeypatch.setitem(__import__("sys").modules, "app.rag.summarizer", fake_summary)
|
| 67 |
+
|
| 68 |
+
monkeypatch.setattr(
|
| 69 |
+
"app.rag.graph_builder.build_graph",
|
| 70 |
+
lambda received_chunks: {"chunks": received_chunks},
|
| 71 |
+
)
|
| 72 |
+
monkeypatch.setattr(
|
| 73 |
+
"app.rag.graph_builder.save_graph",
|
| 74 |
+
lambda graph, user_id, document_id: saved.update(
|
| 75 |
+
{"graph": graph, "user_id": user_id, "document_id": document_id}
|
| 76 |
+
),
|
| 77 |
+
)
|
| 78 |
+
|
| 79 |
+
_ingest_document(
|
| 80 |
+
document_id=document_id,
|
| 81 |
+
filepath=str(tmp_path / "graph.txt"),
|
| 82 |
+
original_name=document.original_name,
|
| 83 |
+
user_id=user_id,
|
| 84 |
+
)
|
| 85 |
+
|
| 86 |
+
assert saved == {
|
| 87 |
+
"graph": {"chunks": chunks},
|
| 88 |
+
"user_id": user_id,
|
| 89 |
+
"document_id": document_id,
|
| 90 |
+
}
|
| 91 |
+
refreshed = db_session.get(Document, document_id)
|
| 92 |
+
assert refreshed.status == "ready"
|
| 93 |
+
assert refreshed.chunk_count == 1
|
| 94 |
+
|
| 95 |
+
|
| 96 |
+
def test_delete_document_removes_knowledge_graph(client, auth_headers, ready_document, monkeypatch):
|
| 97 |
+
deleted = {}
|
| 98 |
+
|
| 99 |
+
monkeypatch.setattr("app.routes.documents.delete_document_chunks", lambda **kwargs: None)
|
| 100 |
+
monkeypatch.setattr(
|
| 101 |
+
"app.rag.graph_builder.delete_graph",
|
| 102 |
+
lambda user_id, document_id: deleted.update(
|
| 103 |
+
{"user_id": user_id, "document_id": document_id}
|
| 104 |
+
),
|
| 105 |
+
)
|
| 106 |
+
|
| 107 |
+
response = client.delete(
|
| 108 |
+
f"/api/v1/documents/{ready_document.id}",
|
| 109 |
+
headers=auth_headers,
|
| 110 |
+
)
|
| 111 |
+
|
| 112 |
+
assert response.status_code == 200
|
| 113 |
+
assert deleted["document_id"] == ready_document.id
|
backend/tests/test_graph_builder.py
ADDED
|
@@ -0,0 +1,89 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import json
|
| 2 |
+
|
| 3 |
+
from app.rag import graph_builder
|
| 4 |
+
|
| 5 |
+
|
| 6 |
+
class FakeEntity:
|
| 7 |
+
def __init__(self, text, label):
|
| 8 |
+
self.text = text
|
| 9 |
+
self.label_ = label
|
| 10 |
+
|
| 11 |
+
|
| 12 |
+
class FakeDoc:
|
| 13 |
+
def __init__(self, entities):
|
| 14 |
+
self.ents = entities
|
| 15 |
+
|
| 16 |
+
|
| 17 |
+
class FakeNlp:
|
| 18 |
+
def __call__(self, text):
|
| 19 |
+
entities = []
|
| 20 |
+
for value, label in (
|
| 21 |
+
("OpenAI", "ORG"),
|
| 22 |
+
("Microsoft", "ORG"),
|
| 23 |
+
("Azure", "PRODUCT"),
|
| 24 |
+
("Ignored Date", "DATE"),
|
| 25 |
+
):
|
| 26 |
+
if value in text:
|
| 27 |
+
entities.append(FakeEntity(value, label))
|
| 28 |
+
return FakeDoc(entities)
|
| 29 |
+
|
| 30 |
+
|
| 31 |
+
def test_extract_entities_filters_configured_labels(monkeypatch):
|
| 32 |
+
monkeypatch.setattr(graph_builder, "_nlp", FakeNlp())
|
| 33 |
+
|
| 34 |
+
entities = graph_builder.extract_entities("OpenAI works with Microsoft on Ignored Date")
|
| 35 |
+
|
| 36 |
+
assert {entity.text for entity in entities} == {"OpenAI", "Microsoft"}
|
| 37 |
+
assert {entity.label for entity in entities} == {"ORG"}
|
| 38 |
+
|
| 39 |
+
|
| 40 |
+
def test_build_graph_tracks_entity_edges_and_weights(monkeypatch):
|
| 41 |
+
monkeypatch.setattr(graph_builder, "_nlp", FakeNlp())
|
| 42 |
+
chunks = [
|
| 43 |
+
{
|
| 44 |
+
"text": "OpenAI works with Microsoft.",
|
| 45 |
+
"page": 1,
|
| 46 |
+
"chunk_index": 0,
|
| 47 |
+
},
|
| 48 |
+
{
|
| 49 |
+
"text": "OpenAI and Microsoft use Azure.",
|
| 50 |
+
"page": 2,
|
| 51 |
+
"chunk_index": 1,
|
| 52 |
+
},
|
| 53 |
+
]
|
| 54 |
+
|
| 55 |
+
graph = graph_builder.build_graph(chunks)
|
| 56 |
+
|
| 57 |
+
openai_id = "ORG:openai"
|
| 58 |
+
microsoft_id = "ORG:microsoft"
|
| 59 |
+
azure_id = "PRODUCT:azure"
|
| 60 |
+
assert graph.nodes[openai_id]["name"] == "OpenAI"
|
| 61 |
+
assert graph.nodes[openai_id]["pages"] == [1, 2]
|
| 62 |
+
assert graph[openai_id][microsoft_id]["weight"] == 2
|
| 63 |
+
assert graph[openai_id][microsoft_id]["pages"] == [1, 2]
|
| 64 |
+
assert graph.has_edge(microsoft_id, azure_id)
|
| 65 |
+
|
| 66 |
+
|
| 67 |
+
def test_save_load_and_delete_graph_roundtrip(tmp_path, monkeypatch):
|
| 68 |
+
monkeypatch.setattr(graph_builder.settings, "GRAPH_PERSIST_DIR", str(tmp_path))
|
| 69 |
+
graph = graph_builder.build_graph([])
|
| 70 |
+
graph.add_node("ORG:openai", name="OpenAI", label="ORG", mentions=1, pages=[1], chunks=[0])
|
| 71 |
+
|
| 72 |
+
path = graph_builder.save_graph(graph, user_id="user-1", document_id="doc-1")
|
| 73 |
+
payload = json.loads(path.read_text(encoding="utf-8"))
|
| 74 |
+
loaded = graph_builder.load_graph(user_id="user-1", document_id="doc-1")
|
| 75 |
+
|
| 76 |
+
assert payload["metadata"]["document_id"] == "doc-1"
|
| 77 |
+
assert loaded.nodes["ORG:openai"]["name"] == "OpenAI"
|
| 78 |
+
|
| 79 |
+
graph_builder.delete_graph(user_id="user-1", document_id="doc-1")
|
| 80 |
+
assert not path.exists()
|
| 81 |
+
|
| 82 |
+
|
| 83 |
+
def test_empty_chunks_produce_empty_graph(monkeypatch):
|
| 84 |
+
monkeypatch.setattr(graph_builder, "_nlp", FakeNlp())
|
| 85 |
+
|
| 86 |
+
graph = graph_builder.build_graph([])
|
| 87 |
+
|
| 88 |
+
assert graph.number_of_nodes() == 0
|
| 89 |
+
assert graph.number_of_edges() == 0
|
backend/tests/test_graph_retriever.py
ADDED
|
@@ -0,0 +1,97 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from app.rag import graph_builder, graph_retriever
|
| 2 |
+
|
| 3 |
+
|
| 4 |
+
class FakeEntity:
|
| 5 |
+
def __init__(self, text, label):
|
| 6 |
+
self.text = text
|
| 7 |
+
self.label_ = label
|
| 8 |
+
|
| 9 |
+
|
| 10 |
+
class FakeDoc:
|
| 11 |
+
def __init__(self, entities):
|
| 12 |
+
self.ents = entities
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
class FakeNlp:
|
| 16 |
+
def __call__(self, text):
|
| 17 |
+
entities = []
|
| 18 |
+
for value, label in (
|
| 19 |
+
("OpenAI", "ORG"),
|
| 20 |
+
("Microsoft", "ORG"),
|
| 21 |
+
("Azure", "PRODUCT"),
|
| 22 |
+
):
|
| 23 |
+
if value in text:
|
| 24 |
+
entities.append(FakeEntity(value, label))
|
| 25 |
+
return FakeDoc(entities)
|
| 26 |
+
|
| 27 |
+
|
| 28 |
+
def _save_sample_graph(tmp_path, monkeypatch, user_id="user-1", document_id="doc-1"):
|
| 29 |
+
monkeypatch.setattr(graph_builder.settings, "GRAPH_PERSIST_DIR", str(tmp_path))
|
| 30 |
+
monkeypatch.setattr(graph_builder, "_nlp", FakeNlp())
|
| 31 |
+
graph = graph_builder.build_graph(
|
| 32 |
+
[
|
| 33 |
+
{
|
| 34 |
+
"text": "OpenAI works with Microsoft.",
|
| 35 |
+
"page": 1,
|
| 36 |
+
"chunk_index": 0,
|
| 37 |
+
},
|
| 38 |
+
{
|
| 39 |
+
"text": "Microsoft deploys Azure.",
|
| 40 |
+
"page": 2,
|
| 41 |
+
"chunk_index": 1,
|
| 42 |
+
},
|
| 43 |
+
]
|
| 44 |
+
)
|
| 45 |
+
graph_builder.save_graph(graph, user_id=user_id, document_id=document_id)
|
| 46 |
+
|
| 47 |
+
|
| 48 |
+
def test_get_entity_context_returns_one_hop_relationships(tmp_path, monkeypatch):
|
| 49 |
+
_save_sample_graph(tmp_path, monkeypatch)
|
| 50 |
+
|
| 51 |
+
context = graph_retriever.get_entity_context(
|
| 52 |
+
query="How is OpenAI related to Microsoft?",
|
| 53 |
+
user_id="user-1",
|
| 54 |
+
document_id="doc-1",
|
| 55 |
+
)
|
| 56 |
+
|
| 57 |
+
assert "## Knowledge Graph Context" in context
|
| 58 |
+
assert "OpenAI" in context
|
| 59 |
+
assert "Microsoft" in context
|
| 60 |
+
assert "page 1" in context
|
| 61 |
+
|
| 62 |
+
|
| 63 |
+
def test_get_entity_context_returns_empty_for_no_match(tmp_path, monkeypatch):
|
| 64 |
+
_save_sample_graph(tmp_path, monkeypatch)
|
| 65 |
+
|
| 66 |
+
context = graph_retriever.get_entity_context(
|
| 67 |
+
query="What about Google?",
|
| 68 |
+
user_id="user-1",
|
| 69 |
+
document_id="doc-1",
|
| 70 |
+
)
|
| 71 |
+
|
| 72 |
+
assert context == ""
|
| 73 |
+
|
| 74 |
+
|
| 75 |
+
def test_get_entity_context_returns_empty_for_missing_graph(tmp_path, monkeypatch):
|
| 76 |
+
monkeypatch.setattr(graph_builder.settings, "GRAPH_PERSIST_DIR", str(tmp_path))
|
| 77 |
+
monkeypatch.setattr(graph_builder, "_nlp", FakeNlp())
|
| 78 |
+
|
| 79 |
+
context = graph_retriever.get_entity_context(
|
| 80 |
+
query="OpenAI",
|
| 81 |
+
user_id="user-1",
|
| 82 |
+
document_id="missing",
|
| 83 |
+
)
|
| 84 |
+
|
| 85 |
+
assert context == ""
|
| 86 |
+
|
| 87 |
+
|
| 88 |
+
def test_get_entity_context_isolates_users(tmp_path, monkeypatch):
|
| 89 |
+
_save_sample_graph(tmp_path, monkeypatch, user_id="user-1", document_id="doc-1")
|
| 90 |
+
|
| 91 |
+
context = graph_retriever.get_entity_context(
|
| 92 |
+
query="OpenAI",
|
| 93 |
+
user_id="user-2",
|
| 94 |
+
document_id="doc-1",
|
| 95 |
+
)
|
| 96 |
+
|
| 97 |
+
assert context == ""
|
backend/tests/test_graphrag_agent.py
ADDED
|
@@ -0,0 +1,92 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from app.rag import agent
|
| 2 |
+
|
| 3 |
+
|
| 4 |
+
class FakeMessage:
|
| 5 |
+
content = "Graph answer"
|
| 6 |
+
|
| 7 |
+
|
| 8 |
+
class FakeChoice:
|
| 9 |
+
message = FakeMessage()
|
| 10 |
+
|
| 11 |
+
|
| 12 |
+
class FakeResponse:
|
| 13 |
+
choices = [FakeChoice()]
|
| 14 |
+
|
| 15 |
+
|
| 16 |
+
class FakeClient:
|
| 17 |
+
def __init__(self):
|
| 18 |
+
self.messages = None
|
| 19 |
+
|
| 20 |
+
def chat_completion(self, messages, **kwargs):
|
| 21 |
+
self.messages = messages
|
| 22 |
+
return FakeResponse()
|
| 23 |
+
|
| 24 |
+
|
| 25 |
+
def test_generate_answer_appends_graph_context_without_changing_sources(monkeypatch):
|
| 26 |
+
client = FakeClient()
|
| 27 |
+
chunks = [
|
| 28 |
+
{
|
| 29 |
+
"text": "Vector context",
|
| 30 |
+
"filename": "doc.pdf",
|
| 31 |
+
"page": 1,
|
| 32 |
+
"score": 0.9,
|
| 33 |
+
"confidence": 100.0,
|
| 34 |
+
}
|
| 35 |
+
]
|
| 36 |
+
|
| 37 |
+
monkeypatch.setattr(agent, "get_llm_client", lambda: client)
|
| 38 |
+
monkeypatch.setattr(agent, "retrieve", lambda **kwargs: chunks)
|
| 39 |
+
monkeypatch.setattr(
|
| 40 |
+
agent,
|
| 41 |
+
"get_entity_context",
|
| 42 |
+
lambda **kwargs: "## Knowledge Graph Context\n- OpenAI is related to Microsoft on page 1.",
|
| 43 |
+
)
|
| 44 |
+
|
| 45 |
+
result = agent.generate_answer("How are OpenAI and Microsoft related?", "user-1", "doc-1")
|
| 46 |
+
|
| 47 |
+
prompt = client.messages[1]["content"]
|
| 48 |
+
assert "Vector context" in prompt
|
| 49 |
+
assert "Knowledge Graph Context" in prompt
|
| 50 |
+
assert result["sources"] == [
|
| 51 |
+
{
|
| 52 |
+
"text": "Vector context",
|
| 53 |
+
"filename": "doc.pdf",
|
| 54 |
+
"page": 1,
|
| 55 |
+
"score": 0.9,
|
| 56 |
+
"confidence": 100.0,
|
| 57 |
+
}
|
| 58 |
+
]
|
| 59 |
+
|
| 60 |
+
|
| 61 |
+
def test_generate_answer_stream_appends_graph_context(monkeypatch):
|
| 62 |
+
captured = {}
|
| 63 |
+
|
| 64 |
+
class StreamingClient:
|
| 65 |
+
def chat_completion(self, messages, **kwargs):
|
| 66 |
+
captured["messages"] = messages
|
| 67 |
+
return iter([])
|
| 68 |
+
|
| 69 |
+
monkeypatch.setattr(agent, "get_llm_client", lambda: StreamingClient())
|
| 70 |
+
monkeypatch.setattr(
|
| 71 |
+
agent,
|
| 72 |
+
"retrieve",
|
| 73 |
+
lambda **kwargs: [
|
| 74 |
+
{
|
| 75 |
+
"text": "Vector stream context",
|
| 76 |
+
"filename": "doc.pdf",
|
| 77 |
+
"page": 1,
|
| 78 |
+
"score": 0.9,
|
| 79 |
+
"confidence": 100.0,
|
| 80 |
+
}
|
| 81 |
+
],
|
| 82 |
+
)
|
| 83 |
+
monkeypatch.setattr(
|
| 84 |
+
agent,
|
| 85 |
+
"get_entity_context",
|
| 86 |
+
lambda **kwargs: "## Knowledge Graph Context\n- OpenAI is related to Microsoft on page 1.",
|
| 87 |
+
)
|
| 88 |
+
|
| 89 |
+
events = list(agent.generate_answer_stream("OpenAI Microsoft", "user-1", "doc-1"))
|
| 90 |
+
|
| 91 |
+
assert events[0].startswith("data:")
|
| 92 |
+
assert "Knowledge Graph Context" in captured["messages"][1]["content"]
|
frontend/e2e/snapshots.spec.ts
ADDED
|
@@ -0,0 +1,109 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import { expect, test, type Page } from "@playwright/test";
|
| 2 |
+
|
| 3 |
+
const user = {
|
| 4 |
+
id: "user-1",
|
| 5 |
+
username: "tester",
|
| 6 |
+
email: "tester@example.com",
|
| 7 |
+
is_admin: false,
|
| 8 |
+
created_at: "2026-05-28T00:00:00Z",
|
| 9 |
+
};
|
| 10 |
+
|
| 11 |
+
const uploadedDocument = {
|
| 12 |
+
id: "doc-1",
|
| 13 |
+
original_name: "notes.txt",
|
| 14 |
+
file_size: 11,
|
| 15 |
+
page_count: 1,
|
| 16 |
+
chunk_count: 1,
|
| 17 |
+
status: "ready",
|
| 18 |
+
error_message: null,
|
| 19 |
+
uploaded_at: "2026-05-28T00:00:00Z",
|
| 20 |
+
};
|
| 21 |
+
|
| 22 |
+
async function mockDashboardApis(page: Page, documents: typeof uploadedDocument[] = []) {
|
| 23 |
+
await page.route("**/api/v1/auth/me", async (route) => {
|
| 24 |
+
await route.fulfill({ json: user });
|
| 25 |
+
});
|
| 26 |
+
|
| 27 |
+
await page.route("**/api/v1/documents/", async (route) => {
|
| 28 |
+
await route.fulfill({
|
| 29 |
+
json: {
|
| 30 |
+
items: documents,
|
| 31 |
+
total: documents.length,
|
| 32 |
+
page: 1,
|
| 33 |
+
pages: documents.length > 0 ? 1 : 0,
|
| 34 |
+
},
|
| 35 |
+
});
|
| 36 |
+
});
|
| 37 |
+
}
|
| 38 |
+
|
| 39 |
+
test.describe("Frontend Snapshot Tests", () => {
|
| 40 |
+
test("login page snapshot", async ({ page }) => {
|
| 41 |
+
await page.goto("/login");
|
| 42 |
+
await page.waitForSelector("#login-email");
|
| 43 |
+
|
| 44 |
+
if (!process.env.CI) {
|
| 45 |
+
await expect(page).toHaveScreenshot("login-page.png", {
|
| 46 |
+
maxDiffPixelRatio: 0.1,
|
| 47 |
+
threshold: 0.2,
|
| 48 |
+
});
|
| 49 |
+
} else {
|
| 50 |
+
await expect(page.locator("#login-email")).toBeVisible();
|
| 51 |
+
}
|
| 52 |
+
});
|
| 53 |
+
|
| 54 |
+
test("register page snapshot", async ({ page }) => {
|
| 55 |
+
await page.goto("/register");
|
| 56 |
+
await page.waitForSelector("#reg-username");
|
| 57 |
+
|
| 58 |
+
if (!process.env.CI) {
|
| 59 |
+
await expect(page).toHaveScreenshot("register-page.png", {
|
| 60 |
+
maxDiffPixelRatio: 0.1,
|
| 61 |
+
threshold: 0.2,
|
| 62 |
+
});
|
| 63 |
+
} else {
|
| 64 |
+
await expect(page.locator("#reg-username")).toBeVisible();
|
| 65 |
+
}
|
| 66 |
+
});
|
| 67 |
+
|
| 68 |
+
test("dashboard empty page snapshot", async ({ page }) => {
|
| 69 |
+
// Set mock token
|
| 70 |
+
await page.addInitScript(() => {
|
| 71 |
+
localStorage.setItem("token", "access-token");
|
| 72 |
+
localStorage.setItem("refresh_token", "refresh-token");
|
| 73 |
+
});
|
| 74 |
+
|
| 75 |
+
await mockDashboardApis(page, []);
|
| 76 |
+
await page.goto("/dashboard");
|
| 77 |
+
await page.waitForSelector("text=No documents yet");
|
| 78 |
+
|
| 79 |
+
if (!process.env.CI) {
|
| 80 |
+
await expect(page).toHaveScreenshot("dashboard-empty.png", {
|
| 81 |
+
maxDiffPixelRatio: 0.1,
|
| 82 |
+
threshold: 0.2,
|
| 83 |
+
});
|
| 84 |
+
} else {
|
| 85 |
+
await expect(page.locator("text=No documents yet")).toBeVisible();
|
| 86 |
+
}
|
| 87 |
+
});
|
| 88 |
+
|
| 89 |
+
test("dashboard with document page snapshot", async ({ page }) => {
|
| 90 |
+
// Set mock token
|
| 91 |
+
await page.addInitScript(() => {
|
| 92 |
+
localStorage.setItem("token", "access-token");
|
| 93 |
+
localStorage.setItem("refresh_token", "refresh-token");
|
| 94 |
+
});
|
| 95 |
+
|
| 96 |
+
await mockDashboardApis(page, [uploadedDocument]);
|
| 97 |
+
await page.goto("/dashboard");
|
| 98 |
+
await page.waitForSelector("text=notes.txt");
|
| 99 |
+
|
| 100 |
+
if (!process.env.CI) {
|
| 101 |
+
await expect(page).toHaveScreenshot("dashboard-with-doc.png", {
|
| 102 |
+
maxDiffPixelRatio: 0.1,
|
| 103 |
+
threshold: 0.2,
|
| 104 |
+
});
|
| 105 |
+
} else {
|
| 106 |
+
await expect(page.locator("text=notes.txt")).toBeVisible();
|
| 107 |
+
}
|
| 108 |
+
});
|
| 109 |
+
});
|
frontend/src/app/dashboard/page.tsx
CHANGED
|
@@ -7,8 +7,8 @@ import { useAuth } from "@/lib/auth";
|
|
| 7 |
import { api, CONNECTION_ERROR_BANNER_MESSAGE, CONNECTION_ERROR_MESSAGE } from "@/lib/api";
|
| 8 |
import Header from "@/components/layout/Header";
|
| 9 |
import DocumentSidebar from "@/components/document/DocumentSidebar";
|
|
|
|
| 10 |
import ChatPanel from "@/components/chat/ChatPanel";
|
| 11 |
-
|
| 12 |
function PDFViewerSkeleton() {
|
| 13 |
return (
|
| 14 |
<div
|
|
@@ -164,6 +164,9 @@ export default function DashboardPage() {
|
|
| 164 |
</div>
|
| 165 |
)}
|
| 166 |
|
|
|
|
|
|
|
|
|
|
| 167 |
{/* ── Center: Chat Panel ──────────────────────────────────── */}
|
| 168 |
<div className="flex-1 min-w-0 flex flex-col">
|
| 169 |
<ChatPanel
|
|
|
|
| 7 |
import { api, CONNECTION_ERROR_BANNER_MESSAGE, CONNECTION_ERROR_MESSAGE } from "@/lib/api";
|
| 8 |
import Header from "@/components/layout/Header";
|
| 9 |
import DocumentSidebar from "@/components/document/DocumentSidebar";
|
| 10 |
+
import ChatSessionSidebar from "@/components/chat/ChatSessionSidebar";
|
| 11 |
import ChatPanel from "@/components/chat/ChatPanel";
|
|
|
|
| 12 |
function PDFViewerSkeleton() {
|
| 13 |
return (
|
| 14 |
<div
|
|
|
|
| 164 |
</div>
|
| 165 |
)}
|
| 166 |
|
| 167 |
+
{/* ── Left-Center: Chat Sessions Sidebar ──── */}
|
| 168 |
+
<ChatSessionSidebar />
|
| 169 |
+
|
| 170 |
{/* ── Center: Chat Panel ──────────────────────────────────── */}
|
| 171 |
<div className="flex-1 min-w-0 flex flex-col">
|
| 172 |
<ChatPanel
|
frontend/src/components/auth/HuggingFaceTokenModal.tsx
ADDED
|
@@ -0,0 +1,231 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"use client";
|
| 2 |
+
|
| 3 |
+
import { useState, useRef, useEffect, isValidElement, type ReactNode } from "react";
|
| 4 |
+
import { Button } from "@/components/ui/button";
|
| 5 |
+
import { Input } from "@/components/ui/input";
|
| 6 |
+
import {
|
| 7 |
+
Dialog,
|
| 8 |
+
DialogContent,
|
| 9 |
+
DialogDescription,
|
| 10 |
+
DialogFooter,
|
| 11 |
+
DialogHeader,
|
| 12 |
+
DialogTitle,
|
| 13 |
+
DialogTrigger,
|
| 14 |
+
} from "@/components/ui/dialog";
|
| 15 |
+
import { useAuthStore } from "@/store/auth-store";
|
| 16 |
+
import { Eye, EyeOff, AlertCircle, CheckCircle2, Loader2, ExternalLink, Key } from "lucide-react";
|
| 17 |
+
|
| 18 |
+
interface HuggingFaceTokenModalProps {
|
| 19 |
+
/** Optional — if provided, allows a button-triggered dialog pattern */
|
| 20 |
+
children?: ReactNode;
|
| 21 |
+
}
|
| 22 |
+
|
| 23 |
+
export default function HuggingFaceTokenModal({ children }: HuggingFaceTokenModalProps) {
|
| 24 |
+
const user = useAuthStore((state) => state.user);
|
| 25 |
+
const setHfToken = useAuthStore((state) => state.setHfToken);
|
| 26 |
+
|
| 27 |
+
const existingToken = user?.hf_token ?? "";
|
| 28 |
+
const hasExistingToken = existingToken.length > 0;
|
| 29 |
+
|
| 30 |
+
const [open, setOpen] = useState(false);
|
| 31 |
+
const [inputToken, setInputToken] = useState(existingToken);
|
| 32 |
+
const [saving, setSaving] = useState(false);
|
| 33 |
+
const [error, setError] = useState<string | null>(null);
|
| 34 |
+
const [success, setSuccess] = useState(false);
|
| 35 |
+
const [showToken, setShowToken] = useState(false);
|
| 36 |
+
|
| 37 |
+
const mountedRef = useRef(true);
|
| 38 |
+
const timeoutRef = useRef<ReturnType<typeof setTimeout> | null>(null);
|
| 39 |
+
|
| 40 |
+
// Cleanup auto-close timeout and unmount guard on unmount
|
| 41 |
+
useEffect(() => {
|
| 42 |
+
return () => {
|
| 43 |
+
mountedRef.current = false;
|
| 44 |
+
if (timeoutRef.current) {
|
| 45 |
+
clearTimeout(timeoutRef.current);
|
| 46 |
+
timeoutRef.current = null;
|
| 47 |
+
}
|
| 48 |
+
};
|
| 49 |
+
}, []);
|
| 50 |
+
|
| 51 |
+
const clearAutoCloseTimeout = () => {
|
| 52 |
+
if (timeoutRef.current) {
|
| 53 |
+
clearTimeout(timeoutRef.current);
|
| 54 |
+
timeoutRef.current = null;
|
| 55 |
+
}
|
| 56 |
+
};
|
| 57 |
+
|
| 58 |
+
const handleOpenChange = (newOpen: boolean) => {
|
| 59 |
+
clearAutoCloseTimeout();
|
| 60 |
+
setOpen(newOpen);
|
| 61 |
+
if (newOpen) {
|
| 62 |
+
// Reset to current store value when opening (picks up changes from background saves)
|
| 63 |
+
const currentToken = useAuthStore.getState().user?.hf_token ?? "";
|
| 64 |
+
setInputToken(currentToken);
|
| 65 |
+
setSaving(false);
|
| 66 |
+
setError(null);
|
| 67 |
+
setSuccess(false);
|
| 68 |
+
setShowToken(false);
|
| 69 |
+
}
|
| 70 |
+
};
|
| 71 |
+
|
| 72 |
+
const handleSave = async () => {
|
| 73 |
+
if (saving) return;
|
| 74 |
+
const token = inputToken.trim();
|
| 75 |
+
if (!token) {
|
| 76 |
+
setError("Please enter a valid token");
|
| 77 |
+
return;
|
| 78 |
+
}
|
| 79 |
+
|
| 80 |
+
setSaving(true);
|
| 81 |
+
setError(null);
|
| 82 |
+
setSuccess(false);
|
| 83 |
+
|
| 84 |
+
try {
|
| 85 |
+
await setHfToken(token);
|
| 86 |
+
if (!mountedRef.current) return;
|
| 87 |
+
setSaving(false);
|
| 88 |
+
setSuccess(true);
|
| 89 |
+
// Auto-close after 1.5s
|
| 90 |
+
timeoutRef.current = setTimeout(() => setOpen(false), 1500);
|
| 91 |
+
} catch (err) {
|
| 92 |
+
if (!mountedRef.current) return;
|
| 93 |
+
setSaving(false);
|
| 94 |
+
setError(err instanceof Error ? err.message : "Failed to save token");
|
| 95 |
+
}
|
| 96 |
+
};
|
| 97 |
+
|
| 98 |
+
const isSaveDisabled = inputToken.trim() === "" || saving;
|
| 99 |
+
|
| 100 |
+
return (
|
| 101 |
+
<Dialog open={open} onOpenChange={handleOpenChange}>
|
| 102 |
+
{children ? (
|
| 103 |
+
<DialogTrigger render={isValidElement(children) ? children : <span>{children}</span>} />
|
| 104 |
+
) : (
|
| 105 |
+
<DialogTrigger
|
| 106 |
+
render={
|
| 107 |
+
<button className="flex w-full cursor-pointer items-center rounded-sm px-2 py-1.5 text-sm outline-none transition-colors hover:bg-accent hover:text-accent-foreground">
|
| 108 |
+
<Key className="mr-2 h-4 w-4" />
|
| 109 |
+
<span>HuggingFace Token</span>
|
| 110 |
+
</button>
|
| 111 |
+
}
|
| 112 |
+
/>
|
| 113 |
+
)}
|
| 114 |
+
<DialogContent className="max-w-md sm:rounded-2xl border-border/40 p-6 md:p-8 bg-background/95 backdrop-blur-xl shadow-2xl" showCloseButton={false}>
|
| 115 |
+
<DialogHeader className="gap-1">
|
| 116 |
+
<DialogTitle className="text-2xl font-bold tracking-tight">
|
| 117 |
+
🤗 HuggingFace Token
|
| 118 |
+
</DialogTitle>
|
| 119 |
+
<DialogDescription className="text-sm text-muted-foreground mt-1.5">
|
| 120 |
+
Enter your HuggingFace API token to enable inference endpoints and model access.
|
| 121 |
+
</DialogDescription>
|
| 122 |
+
</DialogHeader>
|
| 123 |
+
|
| 124 |
+
<form onSubmit={(e) => { e.preventDefault(); if (!isSaveDisabled) handleSave(); }}>
|
| 125 |
+
<div className="space-y-4 mt-6">
|
| 126 |
+
{/* Token label with configured indicator */}
|
| 127 |
+
<div className="flex items-center gap-2">
|
| 128 |
+
<label htmlFor="hf-token-input" className="text-sm font-medium text-foreground/80">
|
| 129 |
+
Token
|
| 130 |
+
</label>
|
| 131 |
+
{hasExistingToken && (
|
| 132 |
+
<span className="inline-flex items-center gap-1 text-xs text-primary">
|
| 133 |
+
<CheckCircle2 className="w-3 h-3" />
|
| 134 |
+
Token configured
|
| 135 |
+
</span>
|
| 136 |
+
)}
|
| 137 |
+
</div>
|
| 138 |
+
|
| 139 |
+
{/* Input wrapper with visibility toggle */}
|
| 140 |
+
<div className="relative">
|
| 141 |
+
<Input
|
| 142 |
+
id="hf-token-input"
|
| 143 |
+
type={showToken ? "text" : "password"}
|
| 144 |
+
value={inputToken}
|
| 145 |
+
onChange={(e) => {
|
| 146 |
+
setInputToken(e.target.value);
|
| 147 |
+
if (error) setError(null);
|
| 148 |
+
if (success) setSuccess(false);
|
| 149 |
+
}}
|
| 150 |
+
placeholder="hf_..."
|
| 151 |
+
className="pr-10 font-mono"
|
| 152 |
+
disabled={saving}
|
| 153 |
+
autoFocus
|
| 154 |
+
aria-label="HuggingFace API Token"
|
| 155 |
+
/>
|
| 156 |
+
<Button
|
| 157 |
+
variant="ghost"
|
| 158 |
+
size="icon-xs"
|
| 159 |
+
className="absolute right-2 top-1/2 -translate-y-1/2"
|
| 160 |
+
onClick={() => setShowToken(!showToken)}
|
| 161 |
+
type="button"
|
| 162 |
+
aria-label={showToken ? "Hide token" : "Show token"}
|
| 163 |
+
disabled={saving}
|
| 164 |
+
>
|
| 165 |
+
{showToken ? <EyeOff className="w-4 h-4" /> : <Eye className="w-4 h-4" />}
|
| 166 |
+
</Button>
|
| 167 |
+
</div>
|
| 168 |
+
|
| 169 |
+
{/* External link */}
|
| 170 |
+
<a
|
| 171 |
+
href="https://huggingface.co/settings/tokens"
|
| 172 |
+
target="_blank"
|
| 173 |
+
rel="noopener noreferrer"
|
| 174 |
+
className="text-xs text-muted-foreground hover:text-primary underline-offset-2 transition-colors inline-flex items-center gap-1"
|
| 175 |
+
>
|
| 176 |
+
<ExternalLink className="w-3 h-3" />
|
| 177 |
+
Get your API token from HuggingFace Settings
|
| 178 |
+
</a>
|
| 179 |
+
</div>
|
| 180 |
+
|
| 181 |
+
{/* Error banner */}
|
| 182 |
+
{error && (
|
| 183 |
+
<div
|
| 184 |
+
className="p-4 border border-destructive/30 bg-destructive/5 rounded-xl text-sm text-destructive flex items-start gap-2 mt-4 animate-in fade-in slide-in-from-top-2 duration-200"
|
| 185 |
+
role="alert"
|
| 186 |
+
aria-live="polite"
|
| 187 |
+
>
|
| 188 |
+
<AlertCircle className="w-4 h-4 mt-0.5 shrink-0" />
|
| 189 |
+
<span>{error}</span>
|
| 190 |
+
</div>
|
| 191 |
+
)}
|
| 192 |
+
|
| 193 |
+
{/* Success banner */}
|
| 194 |
+
{success && (
|
| 195 |
+
<div
|
| 196 |
+
className="p-4 border border-primary/20 bg-primary/5 rounded-xl text-sm text-primary flex items-start gap-2 mt-4 animate-in fade-in slide-in-from-top-2 duration-200"
|
| 197 |
+
aria-live="polite"
|
| 198 |
+
>
|
| 199 |
+
<CheckCircle2 className="w-4 h-4 mt-0.5 shrink-0" />
|
| 200 |
+
<span>Token saved successfully</span>
|
| 201 |
+
</div>
|
| 202 |
+
)}
|
| 203 |
+
</form>
|
| 204 |
+
|
| 205 |
+
{/* Footer */}
|
| 206 |
+
<DialogFooter className="mt-4">
|
| 207 |
+
<Button variant="outline" onClick={() => setOpen(false)}>
|
| 208 |
+
Cancel
|
| 209 |
+
</Button>
|
| 210 |
+
<Button
|
| 211 |
+
onClick={handleSave}
|
| 212 |
+
disabled={isSaveDisabled}
|
| 213 |
+
aria-busy={saving}
|
| 214 |
+
title={hasExistingToken ? "Replace existing token with a new one" : undefined}
|
| 215 |
+
>
|
| 216 |
+
{saving ? (
|
| 217 |
+
<>
|
| 218 |
+
<Loader2 className="w-4 h-4 animate-spin mr-1.5" />
|
| 219 |
+
Saving...
|
| 220 |
+
</>
|
| 221 |
+
) : hasExistingToken ? (
|
| 222 |
+
"Update Token"
|
| 223 |
+
) : (
|
| 224 |
+
"Save Token"
|
| 225 |
+
)}
|
| 226 |
+
</Button>
|
| 227 |
+
</DialogFooter>
|
| 228 |
+
</DialogContent>
|
| 229 |
+
</Dialog>
|
| 230 |
+
);
|
| 231 |
+
}
|
frontend/src/components/chat/ChatPanel.tsx
CHANGED
|
@@ -22,11 +22,13 @@ export default function ChatPanel({ activeDoc, onCitationClick }: Props) {
|
|
| 22 |
const input = useChatStore((state) => state.input);
|
| 23 |
const streaming = useChatStore((state) => state.streaming);
|
| 24 |
const isTyping = useChatStore((state) => state.isTyping);
|
|
|
|
| 25 |
const setMessages = useChatStore((state) => state.setMessages);
|
| 26 |
const setInput = useChatStore((state) => state.setInput);
|
| 27 |
const setStreaming = useChatStore((state) => state.setStreaming);
|
| 28 |
const setIsTyping = useChatStore((state) => state.setIsTyping);
|
| 29 |
const resetChat = useChatStore((state) => state.resetChat);
|
|
|
|
| 30 |
const [showExportMenu, setShowExportMenu] = useState(false);
|
| 31 |
const textareaRef = useRef<HTMLTextAreaElement>(null);
|
| 32 |
const bottomRef = useRef<HTMLDivElement>(null);
|
|
@@ -61,8 +63,13 @@ export default function ChatPanel({ activeDoc, onCitationClick }: Props) {
|
|
| 61 |
};
|
| 62 |
}, [resetChat]);
|
| 63 |
|
| 64 |
-
// Load history on
|
| 65 |
useEffect(() => {
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 66 |
if (!activeDoc) {
|
| 67 |
prevDocId.current = null;
|
| 68 |
setMessages([]);
|
|
@@ -100,7 +107,7 @@ export default function ChatPanel({ activeDoc, onCitationClick }: Props) {
|
|
| 100 |
return () => {
|
| 101 |
cancelled = true;
|
| 102 |
};
|
| 103 |
-
}, [activeDoc,
|
| 104 |
|
| 105 |
const handleSend = async () => {
|
| 106 |
if (!input.trim() || streaming) return;
|
|
@@ -128,6 +135,7 @@ export default function ChatPanel({ activeDoc, onCitationClick }: Props) {
|
|
| 128 |
const stream = api.streamPost("/api/v1/chat/ask/stream", {
|
| 129 |
question,
|
| 130 |
document_id: activeDoc?.id || null,
|
|
|
|
| 131 |
});
|
| 132 |
|
| 133 |
for await (const event of stream) {
|
|
|
|
| 22 |
const input = useChatStore((state) => state.input);
|
| 23 |
const streaming = useChatStore((state) => state.streaming);
|
| 24 |
const isTyping = useChatStore((state) => state.isTyping);
|
| 25 |
+
const activeSessionId = useChatStore((state) => state.activeSessionId);
|
| 26 |
const setMessages = useChatStore((state) => state.setMessages);
|
| 27 |
const setInput = useChatStore((state) => state.setInput);
|
| 28 |
const setStreaming = useChatStore((state) => state.setStreaming);
|
| 29 |
const setIsTyping = useChatStore((state) => state.setIsTyping);
|
| 30 |
const resetChat = useChatStore((state) => state.resetChat);
|
| 31 |
+
const fetchSessionHistory = useChatStore((state) => state.fetchSessionHistory);
|
| 32 |
const [showExportMenu, setShowExportMenu] = useState(false);
|
| 33 |
const textareaRef = useRef<HTMLTextAreaElement>(null);
|
| 34 |
const bottomRef = useRef<HTMLDivElement>(null);
|
|
|
|
| 63 |
};
|
| 64 |
}, [resetChat]);
|
| 65 |
|
| 66 |
+
// Load history on activeSessionId or fallback to activeDoc change
|
| 67 |
useEffect(() => {
|
| 68 |
+
if (activeSessionId) {
|
| 69 |
+
fetchSessionHistory(activeSessionId);
|
| 70 |
+
return;
|
| 71 |
+
}
|
| 72 |
+
|
| 73 |
if (!activeDoc) {
|
| 74 |
prevDocId.current = null;
|
| 75 |
setMessages([]);
|
|
|
|
| 107 |
return () => {
|
| 108 |
cancelled = true;
|
| 109 |
};
|
| 110 |
+
}, [activeSessionId, activeDoc, fetchSessionHistory, setMessages]);
|
| 111 |
|
| 112 |
const handleSend = async () => {
|
| 113 |
if (!input.trim() || streaming) return;
|
|
|
|
| 135 |
const stream = api.streamPost("/api/v1/chat/ask/stream", {
|
| 136 |
question,
|
| 137 |
document_id: activeDoc?.id || null,
|
| 138 |
+
session_id: activeSessionId,
|
| 139 |
});
|
| 140 |
|
| 141 |
for await (const event of stream) {
|
frontend/src/components/chat/ChatSessionSidebar.tsx
ADDED
|
@@ -0,0 +1,184 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"use client";
|
| 2 |
+
|
| 3 |
+
import { useState, useEffect } from "react";
|
| 4 |
+
import { Plus, Edit2, Trash2, MessageSquare, ChevronLeft } from "lucide-react";
|
| 5 |
+
import { useChatStore, type ChatSession } from "@/store/chat-store";
|
| 6 |
+
import { Button } from "@/components/ui/button";
|
| 7 |
+
import { Input } from "@/components/ui/input";
|
| 8 |
+
import { cn } from "@/lib/utils";
|
| 9 |
+
|
| 10 |
+
export default function ChatSessionSidebar() {
|
| 11 |
+
const sessions = useChatStore((state) => state.sessions);
|
| 12 |
+
const activeSessionId = useChatStore((state) => state.activeSessionId);
|
| 13 |
+
const fetchSessions = useChatStore((state) => state.fetchSessions);
|
| 14 |
+
const createSession = useChatStore((state) => state.createSession);
|
| 15 |
+
const renameSession = useChatStore((state) => state.renameSession);
|
| 16 |
+
const deleteSession = useChatStore((state) => state.deleteSession);
|
| 17 |
+
const setActiveSessionId = useChatStore((state) => state.setActiveSessionId);
|
| 18 |
+
const fetchSessionHistory = useChatStore((state) => state.fetchSessionHistory);
|
| 19 |
+
|
| 20 |
+
const [isOpen, setIsOpen] = useState(true);
|
| 21 |
+
const [editingId, setEditingId] = useState<string | null>(null);
|
| 22 |
+
const [editTitle, setEditTitle] = useState("");
|
| 23 |
+
const [creating, setCreating] = useState(false);
|
| 24 |
+
|
| 25 |
+
// Load sessions on mount
|
| 26 |
+
useEffect(() => {
|
| 27 |
+
fetchSessions();
|
| 28 |
+
}, [fetchSessions]);
|
| 29 |
+
|
| 30 |
+
const handleCreate = async () => {
|
| 31 |
+
if (creating) return;
|
| 32 |
+
setCreating(true);
|
| 33 |
+
try {
|
| 34 |
+
const defaultTitle = `Chat ${sessions.length + 1}`;
|
| 35 |
+
const newId = await createSession(defaultTitle);
|
| 36 |
+
setEditingId(newId);
|
| 37 |
+
setEditTitle(defaultTitle);
|
| 38 |
+
} catch (err) {
|
| 39 |
+
console.error(err);
|
| 40 |
+
} finally {
|
| 41 |
+
setCreating(false);
|
| 42 |
+
}
|
| 43 |
+
};
|
| 44 |
+
|
| 45 |
+
const handleStartRename = (session: ChatSession, e: React.MouseEvent) => {
|
| 46 |
+
e.stopPropagation();
|
| 47 |
+
setEditingId(session.id);
|
| 48 |
+
setEditTitle(session.title);
|
| 49 |
+
};
|
| 50 |
+
|
| 51 |
+
const handleSaveRename = async (id: string, e?: React.FormEvent) => {
|
| 52 |
+
if (e) e.preventDefault();
|
| 53 |
+
if (!editTitle.trim()) {
|
| 54 |
+
setEditingId(null);
|
| 55 |
+
return;
|
| 56 |
+
}
|
| 57 |
+
try {
|
| 58 |
+
await renameSession(id, editTitle.trim());
|
| 59 |
+
} catch (err) {
|
| 60 |
+
console.error(err);
|
| 61 |
+
} finally {
|
| 62 |
+
setEditingId(null);
|
| 63 |
+
}
|
| 64 |
+
};
|
| 65 |
+
|
| 66 |
+
const handleDelete = async (id: string, e: React.MouseEvent) => {
|
| 67 |
+
e.stopPropagation();
|
| 68 |
+
if (confirm("Are you sure you want to delete this chat session?")) {
|
| 69 |
+
try {
|
| 70 |
+
await deleteSession(id);
|
| 71 |
+
} catch (err) {
|
| 72 |
+
console.error(err);
|
| 73 |
+
}
|
| 74 |
+
}
|
| 75 |
+
};
|
| 76 |
+
|
| 77 |
+
const handleSelectSession = async (id: string) => {
|
| 78 |
+
setActiveSessionId(id);
|
| 79 |
+
await fetchSessionHistory(id);
|
| 80 |
+
};
|
| 81 |
+
|
| 82 |
+
return (
|
| 83 |
+
<div className={cn("relative flex h-full border-r border-border/50 bg-card/20 select-none transition-all duration-300", isOpen ? "w-64" : "w-0")}>
|
| 84 |
+
<div className={cn("flex flex-col h-full w-full overflow-hidden transition-opacity duration-200", isOpen ? "opacity-100" : "opacity-0 pointer-events-none")}>
|
| 85 |
+
{/* Sidebar Header */}
|
| 86 |
+
<div className="flex items-center justify-between p-3 border-b border-border/50 shrink-0 bg-card/45">
|
| 87 |
+
<span className="text-xs font-semibold uppercase tracking-wider text-muted-foreground">Chat Sessions</span>
|
| 88 |
+
<Button
|
| 89 |
+
onClick={handleCreate}
|
| 90 |
+
variant="outline"
|
| 91 |
+
size="icon"
|
| 92 |
+
className="h-7 w-7 bg-background/50 hover:bg-accent hover:text-accent-foreground"
|
| 93 |
+
disabled={creating}
|
| 94 |
+
>
|
| 95 |
+
<Plus className="w-4 h-4" />
|
| 96 |
+
</Button>
|
| 97 |
+
</div>
|
| 98 |
+
|
| 99 |
+
{/* Sessions List */}
|
| 100 |
+
<div className="flex-1 overflow-y-auto p-2 space-y-1 scrollbar-thin">
|
| 101 |
+
{sessions.length === 0 ? (
|
| 102 |
+
<div className="text-center py-8 px-4">
|
| 103 |
+
<p className="text-xs text-muted-foreground">No chat sessions. Click "+" to start a new chat.</p>
|
| 104 |
+
</div>
|
| 105 |
+
) : (
|
| 106 |
+
sessions.map((session) => {
|
| 107 |
+
const isActive = session.id === activeSessionId;
|
| 108 |
+
const isEditing = session.id === editingId;
|
| 109 |
+
|
| 110 |
+
return (
|
| 111 |
+
<div
|
| 112 |
+
key={session.id}
|
| 113 |
+
onClick={() => !isEditing && handleSelectSession(session.id)}
|
| 114 |
+
className={cn(
|
| 115 |
+
"group flex items-center justify-between rounded-lg px-3 py-2 text-sm transition-all duration-200 cursor-pointer border",
|
| 116 |
+
isActive
|
| 117 |
+
? "bg-accent/80 border-accent text-accent-foreground shadow-sm"
|
| 118 |
+
: "border-transparent hover:bg-card/60 hover:text-foreground text-muted-foreground"
|
| 119 |
+
)}
|
| 120 |
+
>
|
| 121 |
+
<div className="flex items-center gap-2 min-w-0 flex-1">
|
| 122 |
+
<MessageSquare className={cn("w-4 h-4 shrink-0", isActive ? "text-primary" : "text-muted-foreground")} />
|
| 123 |
+
|
| 124 |
+
{isEditing ? (
|
| 125 |
+
<form
|
| 126 |
+
onSubmit={(e) => handleSaveRename(session.id, e)}
|
| 127 |
+
className="flex items-center gap-1 w-full"
|
| 128 |
+
onClick={(e) => e.stopPropagation()}
|
| 129 |
+
>
|
| 130 |
+
<Input
|
| 131 |
+
value={editTitle}
|
| 132 |
+
onChange={(e) => setEditTitle(e.target.value)}
|
| 133 |
+
className="h-6 text-xs px-1 py-0 bg-background/50 border-input w-full"
|
| 134 |
+
autoFocus
|
| 135 |
+
onBlur={() => handleSaveRename(session.id)}
|
| 136 |
+
/>
|
| 137 |
+
</form>
|
| 138 |
+
) : (
|
| 139 |
+
<span className="truncate text-xs font-medium">{session.title}</span>
|
| 140 |
+
)}
|
| 141 |
+
</div>
|
| 142 |
+
|
| 143 |
+
{!isEditing && (
|
| 144 |
+
<div className="flex items-center gap-1 opacity-0 group-hover:opacity-100 transition-opacity duration-150 shrink-0 ml-1">
|
| 145 |
+
<Button
|
| 146 |
+
variant="ghost"
|
| 147 |
+
size="icon"
|
| 148 |
+
className="h-5 w-5 rounded-md hover:bg-background/80"
|
| 149 |
+
onClick={(e) => handleStartRename(session, e)}
|
| 150 |
+
>
|
| 151 |
+
<Edit2 className="w-3 h-3" />
|
| 152 |
+
</Button>
|
| 153 |
+
<Button
|
| 154 |
+
variant="ghost"
|
| 155 |
+
size="icon"
|
| 156 |
+
className="h-5 w-5 rounded-md hover:bg-destructive/10 hover:text-destructive"
|
| 157 |
+
onClick={(e) => handleDelete(session.id, e)}
|
| 158 |
+
>
|
| 159 |
+
<Trash2 className="w-3 h-3" />
|
| 160 |
+
</Button>
|
| 161 |
+
</div>
|
| 162 |
+
)}
|
| 163 |
+
</div>
|
| 164 |
+
);
|
| 165 |
+
})
|
| 166 |
+
)}
|
| 167 |
+
</div>
|
| 168 |
+
</div>
|
| 169 |
+
|
| 170 |
+
{/* Collapse Toggle Button */}
|
| 171 |
+
<Button
|
| 172 |
+
onClick={() => setIsOpen(!isOpen)}
|
| 173 |
+
variant="ghost"
|
| 174 |
+
size="icon"
|
| 175 |
+
className={cn(
|
| 176 |
+
"absolute -right-3 top-1/2 -translate-y-1/2 z-40 h-6 w-6 rounded-full border border-border bg-background shadow-md hover:bg-accent hover:text-accent-foreground",
|
| 177 |
+
!isOpen && "right-auto -left-3 rotate-180"
|
| 178 |
+
)}
|
| 179 |
+
>
|
| 180 |
+
<ChevronLeft className="w-3.5 h-3.5" />
|
| 181 |
+
</Button>
|
| 182 |
+
</div>
|
| 183 |
+
);
|
| 184 |
+
}
|
frontend/src/components/chat/SourceCard.tsx
CHANGED
|
@@ -13,6 +13,77 @@ import { ChevronDown, ChevronUp, FileText, Eye, TextQuote } from "lucide-react";
|
|
| 13 |
|
| 14 |
const EXCERPT_THRESHOLD = 200;
|
| 15 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 16 |
interface Props {
|
| 17 |
sources: SourceChunk[];
|
| 18 |
onPageClick: (page: number) => void;
|
|
@@ -36,34 +107,37 @@ export default function SourceCard({ sources = [], onPageClick }: Props) {
|
|
| 36 |
|
| 37 |
return (
|
| 38 |
<div className="rounded-lg border border-border/50 bg-card/50 overflow-hidden">
|
| 39 |
-
|
| 40 |
-
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
>
|
| 44 |
-
<
|
| 45 |
-
|
| 46 |
-
|
| 47 |
-
|
| 48 |
-
|
| 49 |
-
|
| 50 |
-
|
| 51 |
-
|
| 52 |
-
|
| 53 |
-
|
| 54 |
-
|
| 55 |
-
|
| 56 |
-
|
| 57 |
-
|
| 58 |
-
|
|
|
|
|
|
|
|
|
|
| 59 |
<Tooltip key={i}>
|
| 60 |
<TooltipTrigger className="inline-flex">
|
| 61 |
<Badge
|
| 62 |
-
variant="
|
| 63 |
-
className=
|
| 64 |
onClick={() => onPageClick(src.page + 1)}
|
| 65 |
>
|
| 66 |
-
p.{src.page + 1}
|
| 67 |
</Badge>
|
| 68 |
</TooltipTrigger>
|
| 69 |
<TooltipContent
|
|
@@ -71,74 +145,68 @@ export default function SourceCard({ sources = [], onPageClick }: Props) {
|
|
| 71 |
align="center"
|
| 72 |
className="max-w-xs p-2"
|
| 73 |
>
|
|
|
|
|
|
|
|
|
|
|
|
|
| 74 |
<p className="text-[11px] leading-relaxed line-clamp-6">
|
| 75 |
{src.text}
|
| 76 |
</p>
|
| 77 |
</TooltipContent>
|
| 78 |
</Tooltip>
|
| 79 |
-
)
|
| 80 |
-
|
| 81 |
-
|
|
|
|
| 82 |
|
| 83 |
-
|
| 84 |
-
|
| 85 |
-
|
| 86 |
-
|
| 87 |
-
|
| 88 |
-
|
| 89 |
-
|
| 90 |
-
>
|
| 91 |
-
<div className="flex
|
| 92 |
-
<
|
| 93 |
-
|
| 94 |
-
|
| 95 |
-
|
| 96 |
-
|
| 97 |
-
|
| 98 |
-
|
| 99 |
-
|
| 100 |
-
variant="secondary"
|
| 101 |
-
className={`text-[9px] h-4 px-1.5 ${
|
| 102 |
-
src.confidence >= 80
|
| 103 |
-
? "text-emerald-400 bg-emerald-400/10"
|
| 104 |
-
: src.confidence >= 50
|
| 105 |
-
? "text-yellow-400 bg-yellow-400/10"
|
| 106 |
-
: "text-muted-foreground"
|
| 107 |
-
}`}
|
| 108 |
-
>
|
| 109 |
-
{src.confidence}% match
|
| 110 |
-
</Badge>
|
| 111 |
-
</div>
|
| 112 |
-
<Button
|
| 113 |
-
variant="ghost"
|
| 114 |
-
size="sm"
|
| 115 |
-
className="h-6 px-2 text-[10px]"
|
| 116 |
-
onClick={() => onPageClick(src.page + 1)}
|
| 117 |
-
>
|
| 118 |
-
<Eye className="w-3 h-3 mr-1" />
|
| 119 |
-
View
|
| 120 |
-
</Button>
|
| 121 |
</div>
|
| 122 |
-
<
|
| 123 |
-
|
| 124 |
-
|
| 125 |
-
|
|
|
|
| 126 |
>
|
| 127 |
-
|
| 128 |
-
|
| 129 |
-
|
| 130 |
-
<button
|
| 131 |
-
onClick={() => toggleExcerpt(i)}
|
| 132 |
-
className="mt-1.5 flex items-center gap-1 text-[10px] text-primary/70 hover:text-primary transition-colors"
|
| 133 |
-
>
|
| 134 |
-
<TextQuote className="w-3 h-3" />
|
| 135 |
-
{excerptOpen.has(i) ? "Hide excerpt" : "Show excerpt"}
|
| 136 |
-
</button>
|
| 137 |
-
)}
|
| 138 |
</div>
|
| 139 |
-
|
| 140 |
-
|
| 141 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 142 |
</div>
|
| 143 |
);
|
| 144 |
}
|
|
|
|
| 13 |
|
| 14 |
const EXCERPT_THRESHOLD = 200;
|
| 15 |
|
| 16 |
+
type ConfidenceLevel = "High" | "Medium" | "Low" | "Unknown";
|
| 17 |
+
|
| 18 |
+
interface ConfidenceBadgeMeta {
|
| 19 |
+
label: ConfidenceLevel;
|
| 20 |
+
className: string;
|
| 21 |
+
}
|
| 22 |
+
|
| 23 |
+
const normalizeMetricValue = (value?: number) => {
|
| 24 |
+
if (typeof value !== "number" || Number.isNaN(value)) return undefined;
|
| 25 |
+
return value > 1 ? value / 100 : value;
|
| 26 |
+
};
|
| 27 |
+
|
| 28 |
+
const formatMetricValue = (value?: number) => {
|
| 29 |
+
const normalizedValue = normalizeMetricValue(value);
|
| 30 |
+
if (normalizedValue === undefined) return "N/A";
|
| 31 |
+
return `${Math.round(normalizedValue * 100)}%`;
|
| 32 |
+
};
|
| 33 |
+
|
| 34 |
+
const getConfidenceBadgeMeta = (value?: number): ConfidenceBadgeMeta => {
|
| 35 |
+
const normalizedValue = normalizeMetricValue(value);
|
| 36 |
+
|
| 37 |
+
if (normalizedValue === undefined) {
|
| 38 |
+
return {
|
| 39 |
+
label: "Unknown",
|
| 40 |
+
className: "border-muted bg-muted/40 text-muted-foreground",
|
| 41 |
+
};
|
| 42 |
+
}
|
| 43 |
+
|
| 44 |
+
if (normalizedValue >= 0.8) {
|
| 45 |
+
return {
|
| 46 |
+
label: "High",
|
| 47 |
+
className: "border-emerald-500/30 bg-emerald-500/10 text-emerald-600",
|
| 48 |
+
};
|
| 49 |
+
}
|
| 50 |
+
|
| 51 |
+
if (normalizedValue >= 0.5) {
|
| 52 |
+
return {
|
| 53 |
+
label: "Medium",
|
| 54 |
+
className: "border-amber-500/30 bg-amber-500/10 text-amber-600",
|
| 55 |
+
};
|
| 56 |
+
}
|
| 57 |
+
|
| 58 |
+
return {
|
| 59 |
+
label: "Low",
|
| 60 |
+
className: "border-red-500/30 bg-red-500/10 text-red-600",
|
| 61 |
+
};
|
| 62 |
+
};
|
| 63 |
+
|
| 64 |
+
const getPrimarySourceMetric = (source: SourceChunk) =>
|
| 65 |
+
source.confidence ?? source.score;
|
| 66 |
+
|
| 67 |
+
const MetricBadge = ({
|
| 68 |
+
label,
|
| 69 |
+
value,
|
| 70 |
+
}: {
|
| 71 |
+
label: "Score" | "Confidence";
|
| 72 |
+
value?: number;
|
| 73 |
+
}) => {
|
| 74 |
+
const badgeMeta = getConfidenceBadgeMeta(value);
|
| 75 |
+
|
| 76 |
+
return (
|
| 77 |
+
<Badge
|
| 78 |
+
variant="outline"
|
| 79 |
+
className={`h-5 px-1.5 text-[9px] font-medium ${badgeMeta.className}`}
|
| 80 |
+
title={`${label}: ${formatMetricValue(value)}`}
|
| 81 |
+
>
|
| 82 |
+
{label}: {badgeMeta.label}
|
| 83 |
+
</Badge>
|
| 84 |
+
);
|
| 85 |
+
};
|
| 86 |
+
|
| 87 |
interface Props {
|
| 88 |
sources: SourceChunk[];
|
| 89 |
onPageClick: (page: number) => void;
|
|
|
|
| 107 |
|
| 108 |
return (
|
| 109 |
<div className="rounded-lg border border-border/50 bg-card/50 overflow-hidden">
|
| 110 |
+
<button
|
| 111 |
+
onClick={() => setExpanded(!expanded)}
|
| 112 |
+
className="w-full flex items-center justify-between px-3 py-2 text-xs hover:bg-accent/30 transition-colors"
|
| 113 |
+
>
|
| 114 |
+
<span className="flex items-center gap-1.5 text-muted-foreground">
|
| 115 |
+
<FileText className="w-3.5 h-3.5" />
|
| 116 |
+
{sources.length} source{sources.length > 1 ? "s" : ""} cited
|
| 117 |
+
</span>
|
| 118 |
+
{expanded ? (
|
| 119 |
+
<ChevronUp className="w-3.5 h-3.5 text-muted-foreground" />
|
| 120 |
+
) : (
|
| 121 |
+
<ChevronDown className="w-3.5 h-3.5 text-muted-foreground" />
|
| 122 |
+
)}
|
| 123 |
+
</button>
|
| 124 |
+
|
| 125 |
+
{!expanded && (
|
| 126 |
+
<div className="px-3 pb-2 flex flex-wrap gap-1">
|
| 127 |
+
{sources.map((src, i) => {
|
| 128 |
+
const badgeMeta = getConfidenceBadgeMeta(
|
| 129 |
+
getPrimarySourceMetric(src)
|
| 130 |
+
);
|
| 131 |
+
|
| 132 |
+
return (
|
| 133 |
<Tooltip key={i}>
|
| 134 |
<TooltipTrigger className="inline-flex">
|
| 135 |
<Badge
|
| 136 |
+
variant="outline"
|
| 137 |
+
className={`text-[10px] h-5 cursor-pointer hover:bg-primary/20 transition-colors ${badgeMeta.className}`}
|
| 138 |
onClick={() => onPageClick(src.page + 1)}
|
| 139 |
>
|
| 140 |
+
p.{src.page + 1} - {badgeMeta.label}
|
| 141 |
</Badge>
|
| 142 |
</TooltipTrigger>
|
| 143 |
<TooltipContent
|
|
|
|
| 145 |
align="center"
|
| 146 |
className="max-w-xs p-2"
|
| 147 |
>
|
| 148 |
+
<div className="mb-1 flex flex-wrap gap-1">
|
| 149 |
+
<MetricBadge label="Score" value={src.score} />
|
| 150 |
+
<MetricBadge label="Confidence" value={src.confidence} />
|
| 151 |
+
</div>
|
| 152 |
<p className="text-[11px] leading-relaxed line-clamp-6">
|
| 153 |
{src.text}
|
| 154 |
</p>
|
| 155 |
</TooltipContent>
|
| 156 |
</Tooltip>
|
| 157 |
+
);
|
| 158 |
+
})}
|
| 159 |
+
</div>
|
| 160 |
+
)}
|
| 161 |
|
| 162 |
+
{expanded && (
|
| 163 |
+
<div className="border-t border-border/30">
|
| 164 |
+
{sources.map((src, i) => (
|
| 165 |
+
<div
|
| 166 |
+
key={i}
|
| 167 |
+
className="px-3 py-2.5 border-b border-border/20 last:border-b-0 hover:bg-accent/20 transition-colors"
|
| 168 |
+
>
|
| 169 |
+
<div className="flex items-center justify-between gap-2 mb-1.5">
|
| 170 |
+
<div className="flex min-w-0 flex-wrap items-center gap-2">
|
| 171 |
+
<span className="truncate text-[10px] font-medium text-muted-foreground">
|
| 172 |
+
{src.filename}
|
| 173 |
+
</span>
|
| 174 |
+
<Badge variant="outline" className="h-5 px-1.5 text-[9px]">
|
| 175 |
+
Page {src.page + 1}
|
| 176 |
+
</Badge>
|
| 177 |
+
<MetricBadge label="Score" value={src.score} />
|
| 178 |
+
<MetricBadge label="Confidence" value={src.confidence} />
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 179 |
</div>
|
| 180 |
+
<Button
|
| 181 |
+
variant="ghost"
|
| 182 |
+
size="sm"
|
| 183 |
+
className="h-6 shrink-0 px-2 text-[10px]"
|
| 184 |
+
onClick={() => onPageClick(src.page + 1)}
|
| 185 |
>
|
| 186 |
+
<Eye className="w-3 h-3 mr-1" />
|
| 187 |
+
View
|
| 188 |
+
</Button>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 189 |
</div>
|
| 190 |
+
<p
|
| 191 |
+
className={`text-[11px] text-muted-foreground leading-relaxed ${
|
| 192 |
+
excerptOpen.has(i) ? "" : "line-clamp-3"
|
| 193 |
+
}`}
|
| 194 |
+
>
|
| 195 |
+
{src.text}
|
| 196 |
+
</p>
|
| 197 |
+
{src.text.length > EXCERPT_THRESHOLD && (
|
| 198 |
+
<button
|
| 199 |
+
onClick={() => toggleExcerpt(i)}
|
| 200 |
+
className="mt-1.5 flex items-center gap-1 text-[10px] text-primary/70 hover:text-primary transition-colors"
|
| 201 |
+
>
|
| 202 |
+
<TextQuote className="w-3 h-3" />
|
| 203 |
+
{excerptOpen.has(i) ? "Hide excerpt" : "Show excerpt"}
|
| 204 |
+
</button>
|
| 205 |
+
)}
|
| 206 |
+
</div>
|
| 207 |
+
))}
|
| 208 |
+
</div>
|
| 209 |
+
)}
|
| 210 |
</div>
|
| 211 |
);
|
| 212 |
}
|
frontend/src/components/layout/ContributorsPanel.tsx
CHANGED
|
@@ -1,6 +1,7 @@
|
|
| 1 |
"use client";
|
| 2 |
|
| 3 |
import { useState, useEffect } from "react";
|
|
|
|
| 4 |
import { GitBranch, Star, GitPullRequest, Users, X, Trophy, ExternalLink } from "lucide-react";
|
| 5 |
import { Button } from "@/components/ui/button";
|
| 6 |
import { api } from "@/lib/api";
|
|
@@ -124,7 +125,7 @@ export default function ContributorsPanel({ onClose }: { onClose: () => void })
|
|
| 124 |
{medals[i]}
|
| 125 |
</span>
|
| 126 |
)}
|
| 127 |
-
<
|
| 128 |
src={c.avatar_url}
|
| 129 |
alt={c.login}
|
| 130 |
width={56}
|
|
|
|
| 1 |
"use client";
|
| 2 |
|
| 3 |
import { useState, useEffect } from "react";
|
| 4 |
+
import Image from "next/image";
|
| 5 |
import { GitBranch, Star, GitPullRequest, Users, X, Trophy, ExternalLink } from "lucide-react";
|
| 6 |
import { Button } from "@/components/ui/button";
|
| 7 |
import { api } from "@/lib/api";
|
|
|
|
| 125 |
{medals[i]}
|
| 126 |
</span>
|
| 127 |
)}
|
| 128 |
+
<Image
|
| 129 |
src={c.avatar_url}
|
| 130 |
alt={c.login}
|
| 131 |
width={56}
|
frontend/src/components/layout/Header.tsx
CHANGED
|
@@ -27,6 +27,7 @@ import {
|
|
| 27 |
X,
|
| 28 |
} from "lucide-react";
|
| 29 |
import { useTheme } from "next-themes";
|
|
|
|
| 30 |
import { useSyncExternalStore } from "react";
|
| 31 |
|
| 32 |
interface HeaderProps {
|
|
|
|
| 27 |
X,
|
| 28 |
} from "lucide-react";
|
| 29 |
import { useTheme } from "next-themes";
|
| 30 |
+
|
| 31 |
import { useSyncExternalStore } from "react";
|
| 32 |
|
| 33 |
interface HeaderProps {
|
frontend/src/store/chat-store.ts
CHANGED
|
@@ -1,13 +1,14 @@
|
|
| 1 |
"use client";
|
| 2 |
|
| 3 |
import { create } from "zustand";
|
|
|
|
| 4 |
|
| 5 |
export interface SourceChunk {
|
| 6 |
text: string;
|
| 7 |
filename: string;
|
| 8 |
page: number;
|
| 9 |
-
score: number;
|
| 10 |
-
confidence: number;
|
| 11 |
}
|
| 12 |
|
| 13 |
export interface ChatMsg {
|
|
@@ -18,6 +19,12 @@ export interface ChatMsg {
|
|
| 18 |
isStreaming?: boolean;
|
| 19 |
}
|
| 20 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 21 |
type Setter<T> = T | ((prev: T) => T);
|
| 22 |
|
| 23 |
interface ChatStore {
|
|
@@ -25,21 +32,32 @@ interface ChatStore {
|
|
| 25 |
input: string;
|
| 26 |
streaming: boolean;
|
| 27 |
isTyping: boolean;
|
|
|
|
|
|
|
| 28 |
setMessages: (value: Setter<ChatMsg[]>) => void;
|
| 29 |
setInput: (value: Setter<string>) => void;
|
| 30 |
setStreaming: (value: Setter<boolean>) => void;
|
| 31 |
setIsTyping: (value: Setter<boolean>) => void;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 32 |
resetChat: () => void;
|
| 33 |
}
|
| 34 |
|
| 35 |
const resolveValue = <T,>(value: Setter<T>, current: T): T =>
|
| 36 |
typeof value === "function" ? (value as (prev: T) => T)(current) : value;
|
| 37 |
|
| 38 |
-
export const useChatStore = create<ChatStore>((set) => ({
|
| 39 |
messages: [],
|
| 40 |
input: "",
|
| 41 |
streaming: false,
|
| 42 |
isTyping: false,
|
|
|
|
|
|
|
| 43 |
|
| 44 |
setMessages(value) {
|
| 45 |
set((state) => ({ messages: resolveValue(value, state.messages) }));
|
|
@@ -57,12 +75,97 @@ export const useChatStore = create<ChatStore>((set) => ({
|
|
| 57 |
set((state) => ({ isTyping: resolveValue(value, state.isTyping) }));
|
| 58 |
},
|
| 59 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 60 |
resetChat() {
|
| 61 |
set({
|
| 62 |
messages: [],
|
| 63 |
input: "",
|
| 64 |
streaming: false,
|
| 65 |
isTyping: false,
|
|
|
|
|
|
|
| 66 |
});
|
| 67 |
},
|
| 68 |
}));
|
|
|
|
| 1 |
"use client";
|
| 2 |
|
| 3 |
import { create } from "zustand";
|
| 4 |
+
import { api } from "@/lib/api";
|
| 5 |
|
| 6 |
export interface SourceChunk {
|
| 7 |
text: string;
|
| 8 |
filename: string;
|
| 9 |
page: number;
|
| 10 |
+
score?: number;
|
| 11 |
+
confidence?: number;
|
| 12 |
}
|
| 13 |
|
| 14 |
export interface ChatMsg {
|
|
|
|
| 19 |
isStreaming?: boolean;
|
| 20 |
}
|
| 21 |
|
| 22 |
+
export interface ChatSession {
|
| 23 |
+
id: string;
|
| 24 |
+
title: string;
|
| 25 |
+
created_at: string;
|
| 26 |
+
}
|
| 27 |
+
|
| 28 |
type Setter<T> = T | ((prev: T) => T);
|
| 29 |
|
| 30 |
interface ChatStore {
|
|
|
|
| 32 |
input: string;
|
| 33 |
streaming: boolean;
|
| 34 |
isTyping: boolean;
|
| 35 |
+
sessions: ChatSession[];
|
| 36 |
+
activeSessionId: string | null;
|
| 37 |
setMessages: (value: Setter<ChatMsg[]>) => void;
|
| 38 |
setInput: (value: Setter<string>) => void;
|
| 39 |
setStreaming: (value: Setter<boolean>) => void;
|
| 40 |
setIsTyping: (value: Setter<boolean>) => void;
|
| 41 |
+
setSessions: (value: Setter<ChatSession[]>) => void;
|
| 42 |
+
setActiveSessionId: (value: Setter<string | null>) => void;
|
| 43 |
+
fetchSessions: () => Promise<void>;
|
| 44 |
+
createSession: (title: string) => Promise<string>;
|
| 45 |
+
renameSession: (id: string, title: string) => Promise<void>;
|
| 46 |
+
deleteSession: (id: string) => Promise<void>;
|
| 47 |
+
fetchSessionHistory: (id: string) => Promise<void>;
|
| 48 |
resetChat: () => void;
|
| 49 |
}
|
| 50 |
|
| 51 |
const resolveValue = <T,>(value: Setter<T>, current: T): T =>
|
| 52 |
typeof value === "function" ? (value as (prev: T) => T)(current) : value;
|
| 53 |
|
| 54 |
+
export const useChatStore = create<ChatStore>((set, get) => ({
|
| 55 |
messages: [],
|
| 56 |
input: "",
|
| 57 |
streaming: false,
|
| 58 |
isTyping: false,
|
| 59 |
+
sessions: [],
|
| 60 |
+
activeSessionId: null,
|
| 61 |
|
| 62 |
setMessages(value) {
|
| 63 |
set((state) => ({ messages: resolveValue(value, state.messages) }));
|
|
|
|
| 75 |
set((state) => ({ isTyping: resolveValue(value, state.isTyping) }));
|
| 76 |
},
|
| 77 |
|
| 78 |
+
setSessions(value) {
|
| 79 |
+
set((state) => ({ sessions: resolveValue(value, state.sessions) }));
|
| 80 |
+
},
|
| 81 |
+
|
| 82 |
+
setActiveSessionId(value) {
|
| 83 |
+
set((state) => ({ activeSessionId: resolveValue(value, state.activeSessionId) }));
|
| 84 |
+
},
|
| 85 |
+
|
| 86 |
+
async fetchSessions() {
|
| 87 |
+
try {
|
| 88 |
+
const data = await api.get<ChatSession[]>("/api/v1/chat/sessions");
|
| 89 |
+
set({ sessions: data });
|
| 90 |
+
if (data.length > 0 && !get().activeSessionId) {
|
| 91 |
+
set({ activeSessionId: data[0].id });
|
| 92 |
+
await get().fetchSessionHistory(data[0].id);
|
| 93 |
+
}
|
| 94 |
+
} catch (err) {
|
| 95 |
+
console.error("Failed to fetch chat sessions:", err);
|
| 96 |
+
}
|
| 97 |
+
},
|
| 98 |
+
|
| 99 |
+
async createSession(title) {
|
| 100 |
+
try {
|
| 101 |
+
const session = await api.post<ChatSession>("/api/v1/chat/sessions", { title });
|
| 102 |
+
set((state) => ({
|
| 103 |
+
sessions: [session, ...state.sessions],
|
| 104 |
+
activeSessionId: session.id,
|
| 105 |
+
messages: [],
|
| 106 |
+
}));
|
| 107 |
+
return session.id;
|
| 108 |
+
} catch (err) {
|
| 109 |
+
console.error("Failed to create chat session:", err);
|
| 110 |
+
throw err;
|
| 111 |
+
}
|
| 112 |
+
},
|
| 113 |
+
|
| 114 |
+
async renameSession(id, title) {
|
| 115 |
+
try {
|
| 116 |
+
const updated = await api.put<ChatSession>(`/api/v1/chat/sessions/${id}`, { title });
|
| 117 |
+
set((state) => ({
|
| 118 |
+
sessions: state.sessions.map((s) => (s.id === id ? updated : s)),
|
| 119 |
+
}));
|
| 120 |
+
} catch (err) {
|
| 121 |
+
console.error("Failed to rename chat session:", err);
|
| 122 |
+
throw err;
|
| 123 |
+
}
|
| 124 |
+
},
|
| 125 |
+
|
| 126 |
+
async deleteSession(id) {
|
| 127 |
+
try {
|
| 128 |
+
await api.delete(`/api/v1/chat/sessions/${id}`);
|
| 129 |
+
set((state) => {
|
| 130 |
+
const nextSessions = state.sessions.filter((s) => s.id !== id);
|
| 131 |
+
let nextActiveId = state.activeSessionId;
|
| 132 |
+
if (state.activeSessionId === id) {
|
| 133 |
+
nextActiveId = nextSessions.length > 0 ? nextSessions[0].id : null;
|
| 134 |
+
}
|
| 135 |
+
return {
|
| 136 |
+
sessions: nextSessions,
|
| 137 |
+
activeSessionId: nextActiveId,
|
| 138 |
+
};
|
| 139 |
+
});
|
| 140 |
+
const activeId = get().activeSessionId;
|
| 141 |
+
if (activeId) {
|
| 142 |
+
await get().fetchSessionHistory(activeId);
|
| 143 |
+
} else {
|
| 144 |
+
set({ messages: [] });
|
| 145 |
+
}
|
| 146 |
+
} catch (err) {
|
| 147 |
+
console.error("Failed to delete chat session:", err);
|
| 148 |
+
throw err;
|
| 149 |
+
}
|
| 150 |
+
},
|
| 151 |
+
|
| 152 |
+
async fetchSessionHistory(id) {
|
| 153 |
+
try {
|
| 154 |
+
const data = await api.get<{ messages: ChatMsg[] }>(`/api/v1/chat/history/session/${id}`);
|
| 155 |
+
set({ messages: data.messages });
|
| 156 |
+
} catch (err) {
|
| 157 |
+
console.error("Failed to fetch session history:", err);
|
| 158 |
+
}
|
| 159 |
+
},
|
| 160 |
+
|
| 161 |
resetChat() {
|
| 162 |
set({
|
| 163 |
messages: [],
|
| 164 |
input: "",
|
| 165 |
streaming: false,
|
| 166 |
isTyping: false,
|
| 167 |
+
sessions: [],
|
| 168 |
+
activeSessionId: null,
|
| 169 |
});
|
| 170 |
},
|
| 171 |
}));
|
package-lock.json
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"name": "PDF-Assistant-RAG",
|
| 3 |
+
"lockfileVersion": 3,
|
| 4 |
+
"requires": true,
|
| 5 |
+
"packages": {}
|
| 6 |
+
}
|
requirements.txt
CHANGED
|
@@ -1,6 +1,7 @@
|
|
| 1 |
flask
|
| 2 |
python-dotenv
|
| 3 |
pymupdf
|
|
|
|
| 4 |
flask-login
|
| 5 |
pymongo
|
| 6 |
werkzeug
|
|
@@ -12,4 +13,4 @@ requests-oauthlib
|
|
| 12 |
google-genai
|
| 13 |
cryptography
|
| 14 |
gunicorn
|
| 15 |
-
pinecone
|
|
|
|
| 1 |
flask
|
| 2 |
python-dotenv
|
| 3 |
pymupdf
|
| 4 |
+
pdfplumber
|
| 5 |
flask-login
|
| 6 |
pymongo
|
| 7 |
werkzeug
|
|
|
|
| 13 |
google-genai
|
| 14 |
cryptography
|
| 15 |
gunicorn
|
| 16 |
+
pinecone
|