Spaces:
Running
Running
fix: resolve i18n merge conflicts
Browse files- .env.example +18 -0
- .github/workflows/ci.yml +9 -0
- CODE_OF_CONDUCT.md +85 -0
- SECURITY.md +26 -0
- backend/app/auth.py +28 -1
- backend/app/config.py +10 -0
- backend/app/database.py +31 -2
- backend/app/models.py +16 -0
- backend/app/rag/agent.py +17 -0
- backend/app/rag/chunker.py +41 -0
- backend/app/rag/embeddings.py +19 -2
- backend/app/rag/retriever.py +12 -0
- backend/app/rag/tracing.py +102 -0
- backend/app/rag/vectorstore.py +10 -0
- backend/app/rag/vision.py +99 -0
- backend/app/routes/auth.py +39 -1
- backend/app/schemas.py +23 -0
- backend/requirements.txt +4 -0
- backend/tests/conftest.py +183 -0
- backend/tests/test_auth.py +81 -0
- backend/tests/test_chat.py +50 -0
- backend/tests/test_chunker.py +38 -0
- backend/tests/test_documents.py +34 -0
- bots/discord/README.md +37 -0
- bots/discord/bot.py +68 -0
- bots/discord/requirements.txt +2 -0
- frontend/package-lock.json +11 -0
- frontend/package.json +1 -0
- frontend/src/app/dashboard/page.tsx +13 -1
- frontend/src/app/globals.css +29 -0
- frontend/src/app/layout.tsx +14 -8
- frontend/src/components/auth/ApiKeyManager.tsx +158 -0
- frontend/src/components/chat/SourceCard.tsx +114 -71
- frontend/src/components/layout/Header.tsx +34 -26
- frontend/src/components/layout/ThemeProvider.tsx +8 -0
- frontend/src/components/layout/ThemeToggle.tsx +31 -0
.env.example
CHANGED
|
@@ -91,6 +91,24 @@ HF_TOKEN=your_huggingface_token_here
|
|
| 91 |
# Optional — defaults to 1024
|
| 92 |
# LLM_MAX_NEW_TOKENS=1024
|
| 93 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 94 |
# ── Embeddings (Optional — defaults shown)──────────────────────────────────────────────
|
| 95 |
|
| 96 |
# SentenceTransformer model ID for generating document embeddings.
|
|
|
|
| 91 |
# Optional — defaults to 1024
|
| 92 |
# LLM_MAX_NEW_TOKENS=1024
|
| 93 |
|
| 94 |
+
# ── LangSmith Tracing (Optional) ────────────────────────
|
| 95 |
+
|
| 96 |
+
# Enable LangSmith tracing for the backend RAG pipeline.
|
| 97 |
+
# Optional — defaults to False
|
| 98 |
+
# LANGSMITH_TRACING=False
|
| 99 |
+
|
| 100 |
+
# LangSmith API key.
|
| 101 |
+
# Optional — only needed when LANGSMITH_TRACING=True
|
| 102 |
+
# LANGSMITH_API_KEY=
|
| 103 |
+
|
| 104 |
+
# LangSmith API endpoint.
|
| 105 |
+
# Optional — defaults to "https://api.smith.langchain.com"
|
| 106 |
+
# LANGSMITH_ENDPOINT=https://api.smith.langchain.com
|
| 107 |
+
|
| 108 |
+
# LangSmith project name used for traced runs.
|
| 109 |
+
# Optional — defaults to "pdf-assistant-rag"
|
| 110 |
+
# LANGSMITH_PROJECT=pdf-assistant-rag
|
| 111 |
+
|
| 112 |
# ── Embeddings (Optional — defaults shown)──────────────────────────────────────────────
|
| 113 |
|
| 114 |
# SentenceTransformer model ID for generating document embeddings.
|
.github/workflows/ci.yml
CHANGED
|
@@ -54,6 +54,15 @@ jobs:
|
|
| 54 |
run: |
|
| 55 |
python -c "import sys; sys.path.insert(0, 'backend'); from app.config import settings; print('✅ Config imports OK')" || true
|
| 56 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 57 |
# ── 2. Frontend Build Check ─────────────────────────────
|
| 58 |
frontend-check:
|
| 59 |
name: ⚛️ Frontend — TypeScript & Build
|
|
|
|
| 54 |
run: |
|
| 55 |
python -c "import sys; sys.path.insert(0, 'backend'); from app.config import settings; print('✅ Config imports OK')" || true
|
| 56 |
|
| 57 |
+
- name: Run backend pytest suite
|
| 58 |
+
env:
|
| 59 |
+
SECRET_KEY: ci-dummy-secret
|
| 60 |
+
DATABASE_URL: sqlite:///./ci_test.db
|
| 61 |
+
HF_TOKEN: ci-dummy-token
|
| 62 |
+
UPLOAD_DIR: /tmp/uploads
|
| 63 |
+
CHROMA_PERSIST_DIR: /tmp/chroma
|
| 64 |
+
run: pytest backend/tests -v
|
| 65 |
+
|
| 66 |
# ── 2. Frontend Build Check ─────────────────────────────
|
| 67 |
frontend-check:
|
| 68 |
name: ⚛️ Frontend — TypeScript & Build
|
CODE_OF_CONDUCT.md
ADDED
|
@@ -0,0 +1,85 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Contributor Covenant Code of Conduct
|
| 2 |
+
|
| 3 |
+
## Our Pledge
|
| 4 |
+
|
| 5 |
+
We as members, contributors, and leaders pledge to make participation in our
|
| 6 |
+
community a harassment-free experience for everyone, regardless of age, body
|
| 7 |
+
size, visible or invisible disability, ethnicity, sex characteristics, gender
|
| 8 |
+
identity and expression, level of experience, education, socio-economic status,
|
| 9 |
+
nationality, personal appearance, race, religion, or sexual identity
|
| 10 |
+
and orientation.
|
| 11 |
+
|
| 12 |
+
We pledge to act and interact in ways that contribute to an open, welcoming,
|
| 13 |
+
diverse, inclusive, and healthy community.
|
| 14 |
+
|
| 15 |
+
## Our Standards
|
| 16 |
+
|
| 17 |
+
Examples of behavior that contributes to a positive environment for our
|
| 18 |
+
community include:
|
| 19 |
+
|
| 20 |
+
* Demonstrating empathy and kindness toward other people
|
| 21 |
+
* Being respectful of differing opinions, viewpoints, and experiences
|
| 22 |
+
* Giving and gracefully accepting constructive feedback
|
| 23 |
+
* Accepting responsibility and apologizing to those affected by our mistakes,
|
| 24 |
+
and learning from the experience
|
| 25 |
+
* Focusing on what is best not just for us as individuals, but for the
|
| 26 |
+
overall community
|
| 27 |
+
|
| 28 |
+
Examples of unacceptable behavior include:
|
| 29 |
+
|
| 30 |
+
* The use of sexualized language or imagery, and sexual attention or
|
| 31 |
+
advances of any kind
|
| 32 |
+
* Trolling, insulting or derogatory comments, and personal or political attacks
|
| 33 |
+
* Public or private harassment
|
| 34 |
+
* Publishing others' private information, such as a physical or email
|
| 35 |
+
address, without their explicit permission
|
| 36 |
+
* Other conduct which could reasonably be considered inappropriate in a
|
| 37 |
+
professional setting
|
| 38 |
+
|
| 39 |
+
## Enforcement Responsibilities
|
| 40 |
+
|
| 41 |
+
Community leaders are responsible for clarifying and enforcing our standards of
|
| 42 |
+
acceptable behavior and will take appropriate and fair corrective action in
|
| 43 |
+
response to any behavior that they deem inappropriate, threatening, offensive,
|
| 44 |
+
or harmful.
|
| 45 |
+
|
| 46 |
+
Community leaders have the right and responsibility to remove, edit, or reject
|
| 47 |
+
comments, commits, code, wiki edits, issues, and other contributions that are
|
| 48 |
+
not aligned to this Code of Conduct, and will communicate reasons for moderation
|
| 49 |
+
decisions when appropriate.
|
| 50 |
+
|
| 51 |
+
## Scope
|
| 52 |
+
|
| 53 |
+
This Code of Conduct applies within all community spaces, and also applies when
|
| 54 |
+
an individual is officially representing the community in public spaces.
|
| 55 |
+
Examples of representing our community include using an official e-mail address,
|
| 56 |
+
posting via an official social media account, or acting as an appointed
|
| 57 |
+
representative at an online or offline event.
|
| 58 |
+
|
| 59 |
+
## Enforcement
|
| 60 |
+
|
| 61 |
+
Instances of abusive, harassing, or otherwise unacceptable behavior may be
|
| 62 |
+
reported to the community leaders responsible for enforcement.
|
| 63 |
+
All complaints will be reviewed and investigated promptly and fairly.
|
| 64 |
+
|
| 65 |
+
All community leaders are obligated to respect the privacy and security of the
|
| 66 |
+
reporter of any incident.
|
| 67 |
+
|
| 68 |
+
## Attribution
|
| 69 |
+
|
| 70 |
+
This Code of Conduct is adapted from the [Contributor Covenant][homepage],
|
| 71 |
+
version 2.1, available at
|
| 72 |
+
[https://www.contributor-covenant.org/version/2/1/code_of_conduct.html][v2.1].
|
| 73 |
+
|
| 74 |
+
Community Impact Guidelines were inspired by
|
| 75 |
+
[Mozilla's code of conduct enforcement ladder][Mozilla CoC].
|
| 76 |
+
|
| 77 |
+
For answers to common questions about this code of conduct, see the FAQ at
|
| 78 |
+
[https://www.contributor-covenant.org/faq][FAQ]. Translations are available
|
| 79 |
+
at [https://www.contributor-covenant.org/translations][translations].
|
| 80 |
+
|
| 81 |
+
[homepage]: https://www.contributor-covenant.org
|
| 82 |
+
[v2.1]: https://www.contributor-covenant.org/version/2/1/code_of_conduct.html
|
| 83 |
+
[Mozilla CoC]: https://github.com/mozilla/diversity
|
| 84 |
+
[FAQ]: https://www.contributor-covenant.org/faq
|
| 85 |
+
[translations]: https://www.contributor-covenant.org/translations
|
SECURITY.md
ADDED
|
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Security Policy
|
| 2 |
+
|
| 3 |
+
## Supported Versions
|
| 4 |
+
|
| 5 |
+
Currently, the following branches and versions of PDF-Assistant-RAG are supported with security updates.
|
| 6 |
+
|
| 7 |
+
| Version | Supported |
|
| 8 |
+
| ------- | ------------------ |
|
| 9 |
+
| `dev` | :white_check_mark: |
|
| 10 |
+
| `main` | :white_check_mark: |
|
| 11 |
+
| < 1.0 | :x: |
|
| 12 |
+
|
| 13 |
+
## Reporting a Vulnerability
|
| 14 |
+
|
| 15 |
+
We take the security of our users and their data very seriously. If you discover a security vulnerability in this project, please **do not** report it by creating a public GitHub issue.
|
| 16 |
+
|
| 17 |
+
Instead, please privately report it by emailing the repository owner directly.
|
| 18 |
+
|
| 19 |
+
When reporting a vulnerability, please include:
|
| 20 |
+
* A detailed description of the vulnerability.
|
| 21 |
+
* The steps required to reproduce the vulnerability.
|
| 22 |
+
* Any potential impact or risk to users.
|
| 23 |
+
|
| 24 |
+
We will acknowledge your email within 48 hours and work with you to understand and resolve the issue. We aim to fix critical security issues as fast as possible and will credit you in the release notes if you wish.
|
| 25 |
+
|
| 26 |
+
Thank you for helping keep this project secure!
|
backend/app/auth.py
CHANGED
|
@@ -67,12 +67,39 @@ def decode_token(token: str, token_type: str = "access") -> Optional[str]:
|
|
| 67 |
|
| 68 |
# ── FastAPI Dependencies ─────────────────────────────
|
| 69 |
|
|
|
|
|
|
|
| 70 |
def get_current_user(
|
| 71 |
credentials: HTTPAuthorizationCredentials = Depends(security),
|
| 72 |
db: Session = Depends(get_db),
|
| 73 |
) -> User:
|
| 74 |
-
"""Dependency: extract and validate user from JWT bearer token."""
|
| 75 |
token = credentials.credentials
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 76 |
user_id = decode_token(token)
|
| 77 |
|
| 78 |
if not user_id:
|
|
|
|
| 67 |
|
| 68 |
# ── FastAPI Dependencies ─────────────────────────────
|
| 69 |
|
| 70 |
+
import hashlib
|
| 71 |
+
|
| 72 |
def get_current_user(
|
| 73 |
credentials: HTTPAuthorizationCredentials = Depends(security),
|
| 74 |
db: Session = Depends(get_db),
|
| 75 |
) -> User:
|
| 76 |
+
"""Dependency: extract and validate user from JWT bearer token or API key."""
|
| 77 |
token = credentials.credentials
|
| 78 |
+
|
| 79 |
+
# Check if token is an API key
|
| 80 |
+
if token.startswith("rag_"):
|
| 81 |
+
hashed = hashlib.sha256(token.encode("utf-8")).hexdigest()
|
| 82 |
+
from app.models import ApiKey
|
| 83 |
+
api_key = db.query(ApiKey).filter(ApiKey.hashed_key == hashed).first()
|
| 84 |
+
if not api_key:
|
| 85 |
+
raise HTTPException(
|
| 86 |
+
status_code=status.HTTP_401_UNAUTHORIZED,
|
| 87 |
+
detail="Invalid API key",
|
| 88 |
+
headers={"WWW-Authenticate": "Bearer"},
|
| 89 |
+
)
|
| 90 |
+
|
| 91 |
+
api_key.last_used = datetime.now(timezone.utc)
|
| 92 |
+
db.commit()
|
| 93 |
+
|
| 94 |
+
user = api_key.user
|
| 95 |
+
if not user:
|
| 96 |
+
raise HTTPException(
|
| 97 |
+
status_code=status.HTTP_401_UNAUTHORIZED,
|
| 98 |
+
detail="User not found for this API key",
|
| 99 |
+
)
|
| 100 |
+
return user
|
| 101 |
+
|
| 102 |
+
# Otherwise, process as JWT
|
| 103 |
user_id = decode_token(token)
|
| 104 |
|
| 105 |
if not user_id:
|
backend/app/config.py
CHANGED
|
@@ -56,8 +56,18 @@ class Settings(BaseSettings):
|
|
| 56 |
LLM_TEMPERATURE: float = 0.3
|
| 57 |
SUMMARY_MAX_TOKENS: int = 512
|
| 58 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 59 |
# ── Reranker ─────────────────────────────────────────
|
| 60 |
RERANKER_MODEL: str = "cross-encoder/ms-marco-MiniLM-L-6-v2"
|
|
|
|
|
|
|
|
|
|
|
|
|
| 61 |
|
| 62 |
|
| 63 |
@property
|
|
|
|
| 56 |
LLM_TEMPERATURE: float = 0.3
|
| 57 |
SUMMARY_MAX_TOKENS: int = 512
|
| 58 |
|
| 59 |
+
# ── LangSmith Tracing (optional) ─────────────────────
|
| 60 |
+
LANGSMITH_TRACING: bool = False
|
| 61 |
+
LANGSMITH_API_KEY: str = ""
|
| 62 |
+
LANGSMITH_ENDPOINT: str = "https://api.smith.langchain.com"
|
| 63 |
+
LANGSMITH_PROJECT: str = "pdf-assistant-rag"
|
| 64 |
+
|
| 65 |
# ── Reranker ─────────────────────────────────────────
|
| 66 |
RERANKER_MODEL: str = "cross-encoder/ms-marco-MiniLM-L-6-v2"
|
| 67 |
+
# ── Vision / Image captioning ─────────────────────
|
| 68 |
+
VISION_PROVIDER: str | None = None # e.g. 'openai'
|
| 69 |
+
VISION_MODEL: str | None = None
|
| 70 |
+
OPENAI_API_KEY: str = ""
|
| 71 |
|
| 72 |
|
| 73 |
@property
|
backend/app/database.py
CHANGED
|
@@ -3,11 +3,13 @@ SQLAlchemy database setup with SQLite.
|
|
| 3 |
Uses synchronous SQLAlchemy for simplicity and compatibility.
|
| 4 |
"""
|
| 5 |
import os
|
| 6 |
-
|
|
|
|
| 7 |
from sqlalchemy.orm import sessionmaker, declarative_base
|
| 8 |
from app.config import get_settings
|
| 9 |
|
| 10 |
settings = get_settings()
|
|
|
|
| 11 |
|
| 12 |
# ── Ensure data directory exists ─────────────────────
|
| 13 |
db_path = settings.DATABASE_URL.replace("sqlite:///", "")
|
|
@@ -34,7 +36,34 @@ def get_db():
|
|
| 34 |
db.close()
|
| 35 |
|
| 36 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 37 |
def init_db():
|
| 38 |
-
"""Create all tables on startup."""
|
| 39 |
from app import models # noqa: F401 — import to register models
|
| 40 |
Base.metadata.create_all(bind=engine)
|
|
|
|
|
|
| 3 |
Uses synchronous SQLAlchemy for simplicity and compatibility.
|
| 4 |
"""
|
| 5 |
import os
|
| 6 |
+
import logging
|
| 7 |
+
from sqlalchemy import create_engine, inspect, text
|
| 8 |
from sqlalchemy.orm import sessionmaker, declarative_base
|
| 9 |
from app.config import get_settings
|
| 10 |
|
| 11 |
settings = get_settings()
|
| 12 |
+
logger = logging.getLogger(__name__)
|
| 13 |
|
| 14 |
# ── Ensure data directory exists ─────────────────────
|
| 15 |
db_path = settings.DATABASE_URL.replace("sqlite:///", "")
|
|
|
|
| 36 |
db.close()
|
| 37 |
|
| 38 |
|
| 39 |
+
def _migrate_schema():
|
| 40 |
+
"""Apply schema migrations for existing databases (SQLite-compatible).
|
| 41 |
+
|
| 42 |
+
SQLAlchemy's ``create_all`` only creates new tables and does **not**
|
| 43 |
+
add missing columns to existing tables. This helper fills that gap
|
| 44 |
+
for non-destructive changes such as new nullable columns.
|
| 45 |
+
"""
|
| 46 |
+
inspector = inspect(engine)
|
| 47 |
+
existing_columns = {c["name"] for c in inspector.get_columns("users")}
|
| 48 |
+
|
| 49 |
+
migrations = [
|
| 50 |
+
("users", "hf_token", "ALTER TABLE users ADD COLUMN hf_token VARCHAR(255)"),
|
| 51 |
+
]
|
| 52 |
+
|
| 53 |
+
for table, column, ddl in migrations:
|
| 54 |
+
if column not in existing_columns:
|
| 55 |
+
try:
|
| 56 |
+
with engine.begin() as conn:
|
| 57 |
+
conn.execute(text(ddl))
|
| 58 |
+
logger.info("Migration: added column %s.%s", table, column)
|
| 59 |
+
except Exception:
|
| 60 |
+
logger.warning(
|
| 61 |
+
"Migration skipped (may already exist): %s.%s", table, column
|
| 62 |
+
)
|
| 63 |
+
|
| 64 |
+
|
| 65 |
def init_db():
|
| 66 |
+
"""Create all tables on startup and apply schema migrations."""
|
| 67 |
from app import models # noqa: F401 — import to register models
|
| 68 |
Base.metadata.create_all(bind=engine)
|
| 69 |
+
_migrate_schema()
|
backend/app/models.py
CHANGED
|
@@ -22,10 +22,26 @@ class User(Base):
|
|
| 22 |
is_admin = Column(Boolean, default=False)
|
| 23 |
created_at = Column(DateTime, default=lambda: datetime.now(timezone.utc))
|
| 24 |
last_login = Column(DateTime, nullable=True, index=True)
|
|
|
|
| 25 |
|
| 26 |
# Relationships
|
| 27 |
documents = relationship("Document", back_populates="owner", cascade="all, delete-orphan")
|
| 28 |
messages = relationship("ChatMessage", back_populates="user", cascade="all, delete-orphan")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 29 |
|
| 30 |
|
| 31 |
class Document(Base):
|
|
|
|
| 22 |
is_admin = Column(Boolean, default=False)
|
| 23 |
created_at = Column(DateTime, default=lambda: datetime.now(timezone.utc))
|
| 24 |
last_login = Column(DateTime, nullable=True, index=True)
|
| 25 |
+
hf_token = Column(String(255), nullable=True)
|
| 26 |
|
| 27 |
# Relationships
|
| 28 |
documents = relationship("Document", back_populates="owner", cascade="all, delete-orphan")
|
| 29 |
messages = relationship("ChatMessage", back_populates="user", cascade="all, delete-orphan")
|
| 30 |
+
api_keys = relationship("ApiKey", back_populates="user", cascade="all, delete-orphan")
|
| 31 |
+
|
| 32 |
+
|
| 33 |
+
class ApiKey(Base):
|
| 34 |
+
__tablename__ = "api_keys"
|
| 35 |
+
|
| 36 |
+
id = Column(String, primary_key=True, default=generate_uuid)
|
| 37 |
+
user_id = Column(String, ForeignKey("users.id"), nullable=False, index=True)
|
| 38 |
+
key_prefix = Column(String(10), nullable=False)
|
| 39 |
+
hashed_key = Column(String(255), nullable=False, unique=True, index=True)
|
| 40 |
+
created_at = Column(DateTime, default=lambda: datetime.now(timezone.utc))
|
| 41 |
+
last_used = Column(DateTime, nullable=True)
|
| 42 |
+
|
| 43 |
+
# Relationships
|
| 44 |
+
user = relationship("User", back_populates="api_keys")
|
| 45 |
|
| 46 |
|
| 47 |
class Document(Base):
|
backend/app/rag/agent.py
CHANGED
|
@@ -10,6 +10,7 @@ from huggingface_hub import InferenceClient
|
|
| 10 |
from app.config import get_settings
|
| 11 |
from app.rag.retriever import retrieve
|
| 12 |
from app.rag.prompts import SYSTEM_PROMPT, RAG_PROMPT_TEMPLATE, GREETING_PROMPT
|
|
|
|
| 13 |
|
| 14 |
logger = logging.getLogger(__name__)
|
| 15 |
settings = get_settings()
|
|
@@ -65,6 +66,14 @@ def _chat_messages(system: str, user_content: str) -> list:
|
|
| 65 |
]
|
| 66 |
|
| 67 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 68 |
def generate_answer(
|
| 69 |
question: str,
|
| 70 |
user_id: str,
|
|
@@ -145,6 +154,14 @@ def generate_answer(
|
|
| 145 |
return {"answer": answer, "sources": sources}
|
| 146 |
|
| 147 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 148 |
def generate_answer_stream(
|
| 149 |
question: str,
|
| 150 |
user_id: str,
|
|
|
|
| 10 |
from app.config import get_settings
|
| 11 |
from app.rag.retriever import retrieve
|
| 12 |
from app.rag.prompts import SYSTEM_PROMPT, RAG_PROMPT_TEMPLATE, GREETING_PROMPT
|
| 13 |
+
from app.rag.tracing import trace_function
|
| 14 |
|
| 15 |
logger = logging.getLogger(__name__)
|
| 16 |
settings = get_settings()
|
|
|
|
| 66 |
]
|
| 67 |
|
| 68 |
|
| 69 |
+
@trace_function(
|
| 70 |
+
"generate_answer",
|
| 71 |
+
metadata_factory=lambda question, user_id, document_id=None: {
|
| 72 |
+
"user_id": user_id,
|
| 73 |
+
"document_id": document_id,
|
| 74 |
+
"llm_model": settings.LLM_MODEL,
|
| 75 |
+
},
|
| 76 |
+
)
|
| 77 |
def generate_answer(
|
| 78 |
question: str,
|
| 79 |
user_id: str,
|
|
|
|
| 154 |
return {"answer": answer, "sources": sources}
|
| 155 |
|
| 156 |
|
| 157 |
+
@trace_function(
|
| 158 |
+
"generate_answer_stream",
|
| 159 |
+
metadata_factory=lambda question, user_id, document_id=None: {
|
| 160 |
+
"user_id": user_id,
|
| 161 |
+
"document_id": document_id,
|
| 162 |
+
"llm_model": settings.LLM_MODEL,
|
| 163 |
+
},
|
| 164 |
+
)
|
| 165 |
def generate_answer_stream(
|
| 166 |
question: str,
|
| 167 |
user_id: str,
|
backend/app/rag/chunker.py
CHANGED
|
@@ -28,6 +28,34 @@ def extract_pdf(filepath: str) -> List[Dict[str, Any]]:
|
|
| 28 |
return pages
|
| 29 |
|
| 30 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 31 |
def extract_docx(filepath: str) -> List[Dict[str, Any]]:
|
| 32 |
"""Extract text from DOCX files."""
|
| 33 |
doc = docx.Document(filepath)
|
|
@@ -50,10 +78,13 @@ def chunk_document(filepath: str) -> List[Dict[str, Any]]:
|
|
| 50 |
Returns list of dicts with 'text', 'page', and 'chunk_index'.
|
| 51 |
"""
|
| 52 |
ext = filepath.rsplit(".", 1)[-1].lower()
|
|
|
|
| 53 |
|
| 54 |
# ── Extract text by file type ────────────────────
|
| 55 |
if ext == "pdf":
|
| 56 |
pages = extract_pdf(filepath)
|
|
|
|
|
|
|
| 57 |
elif ext == "docx":
|
| 58 |
pages = extract_docx(filepath)
|
| 59 |
elif ext in ("txt", "md"):
|
|
@@ -91,6 +122,16 @@ def chunk_document(filepath: str) -> List[Dict[str, Any]]:
|
|
| 91 |
})
|
| 92 |
chunk_index += 1
|
| 93 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 94 |
return all_chunks
|
| 95 |
|
| 96 |
|
|
|
|
| 28 |
return pages
|
| 29 |
|
| 30 |
|
| 31 |
+
def extract_pdf_images(filepath: str) -> List[Dict[str, Any]]:
|
| 32 |
+
"""Extract images from a PDF and return list of dicts with image bytes and page number.
|
| 33 |
+
|
| 34 |
+
Each entry: {"image_bytes": b"...", "page": int}
|
| 35 |
+
"""
|
| 36 |
+
images = []
|
| 37 |
+
doc = fitz.open(filepath)
|
| 38 |
+
|
| 39 |
+
for page_num, page in enumerate(doc):
|
| 40 |
+
# get_images returns a list of tuples where first item is xref
|
| 41 |
+
for img in page.get_images(full=True):
|
| 42 |
+
xref = img[0]
|
| 43 |
+
try:
|
| 44 |
+
pix = fitz.Pixmap(doc, xref)
|
| 45 |
+
# Convert to RGB if it's CMYK or has alpha
|
| 46 |
+
if pix.n >= 4:
|
| 47 |
+
pix = fitz.Pixmap(fitz.csRGB, pix)
|
| 48 |
+
|
| 49 |
+
img_bytes = pix.tobytes("png")
|
| 50 |
+
images.append({"image_bytes": img_bytes, "page": page_num + 1})
|
| 51 |
+
except Exception:
|
| 52 |
+
# ignore extracting this image
|
| 53 |
+
continue
|
| 54 |
+
|
| 55 |
+
doc.close()
|
| 56 |
+
return images
|
| 57 |
+
|
| 58 |
+
|
| 59 |
def extract_docx(filepath: str) -> List[Dict[str, Any]]:
|
| 60 |
"""Extract text from DOCX files."""
|
| 61 |
doc = docx.Document(filepath)
|
|
|
|
| 78 |
Returns list of dicts with 'text', 'page', and 'chunk_index'.
|
| 79 |
"""
|
| 80 |
ext = filepath.rsplit(".", 1)[-1].lower()
|
| 81 |
+
images = []
|
| 82 |
|
| 83 |
# ── Extract text by file type ────────────────────
|
| 84 |
if ext == "pdf":
|
| 85 |
pages = extract_pdf(filepath)
|
| 86 |
+
# also extract images for later captioning/embedding
|
| 87 |
+
images = extract_pdf_images(filepath)
|
| 88 |
elif ext == "docx":
|
| 89 |
pages = extract_docx(filepath)
|
| 90 |
elif ext in ("txt", "md"):
|
|
|
|
| 122 |
})
|
| 123 |
chunk_index += 1
|
| 124 |
|
| 125 |
+
# Attach any images that belong to this page after text chunks for the page
|
| 126 |
+
for img in [i for i in images if i["page"] == page_num]:
|
| 127 |
+
all_chunks.append({
|
| 128 |
+
"text": "",
|
| 129 |
+
"page": page_num,
|
| 130 |
+
"chunk_index": chunk_index,
|
| 131 |
+
"image_bytes": img["image_bytes"],
|
| 132 |
+
})
|
| 133 |
+
chunk_index += 1
|
| 134 |
+
|
| 135 |
return all_chunks
|
| 136 |
|
| 137 |
|
backend/app/rag/embeddings.py
CHANGED
|
@@ -6,6 +6,7 @@ import logging
|
|
| 6 |
from typing import List
|
| 7 |
from langchain_huggingface import HuggingFaceEmbeddings
|
| 8 |
from app.config import get_settings
|
|
|
|
| 9 |
|
| 10 |
logger = logging.getLogger(__name__)
|
| 11 |
settings = get_settings()
|
|
@@ -36,10 +37,26 @@ def get_embedding_model() -> HuggingFaceEmbeddings:
|
|
| 36 |
def embed_texts(texts: List[str]) -> List[List[float]]:
|
| 37 |
"""Embed a batch of texts into vectors."""
|
| 38 |
model = get_embedding_model()
|
| 39 |
-
return
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 40 |
|
| 41 |
|
| 42 |
def embed_query(query: str) -> List[float]:
|
| 43 |
"""Embed a single query string."""
|
| 44 |
model = get_embedding_model()
|
| 45 |
-
return
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 6 |
from typing import List
|
| 7 |
from langchain_huggingface import HuggingFaceEmbeddings
|
| 8 |
from app.config import get_settings
|
| 9 |
+
from app.rag.tracing import trace_call
|
| 10 |
|
| 11 |
logger = logging.getLogger(__name__)
|
| 12 |
settings = get_settings()
|
|
|
|
| 37 |
def embed_texts(texts: List[str]) -> List[List[float]]:
|
| 38 |
"""Embed a batch of texts into vectors."""
|
| 39 |
model = get_embedding_model()
|
| 40 |
+
return trace_call(
|
| 41 |
+
"embed_texts",
|
| 42 |
+
lambda: model.embed_documents(texts),
|
| 43 |
+
run_type="embedding",
|
| 44 |
+
metadata={
|
| 45 |
+
"embedding_model": settings.EMBEDDING_MODEL,
|
| 46 |
+
"text_count": len(texts),
|
| 47 |
+
},
|
| 48 |
+
)
|
| 49 |
|
| 50 |
|
| 51 |
def embed_query(query: str) -> List[float]:
|
| 52 |
"""Embed a single query string."""
|
| 53 |
model = get_embedding_model()
|
| 54 |
+
return trace_call(
|
| 55 |
+
"embed_query",
|
| 56 |
+
lambda: model.embed_query(query),
|
| 57 |
+
run_type="embedding",
|
| 58 |
+
metadata={
|
| 59 |
+
"embedding_model": settings.EMBEDDING_MODEL,
|
| 60 |
+
"query_length": len(query),
|
| 61 |
+
},
|
| 62 |
+
)
|
backend/app/rag/retriever.py
CHANGED
|
@@ -5,6 +5,7 @@ import logging
|
|
| 5 |
from typing import List, Dict, Any, Optional
|
| 6 |
from app.config import get_settings
|
| 7 |
from app.rag.embeddings import embed_query
|
|
|
|
| 8 |
from app.rag.vectorstore import query_chunks
|
| 9 |
|
| 10 |
logger = logging.getLogger(__name__)
|
|
@@ -31,6 +32,17 @@ def get_reranker():
|
|
| 31 |
return _reranker if _reranker != "disabled" else None
|
| 32 |
|
| 33 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 34 |
def retrieve(
|
| 35 |
query: str,
|
| 36 |
user_id: str,
|
|
|
|
| 5 |
from typing import List, Dict, Any, Optional
|
| 6 |
from app.config import get_settings
|
| 7 |
from app.rag.embeddings import embed_query
|
| 8 |
+
from app.rag.tracing import trace_function
|
| 9 |
from app.rag.vectorstore import query_chunks
|
| 10 |
|
| 11 |
logger = logging.getLogger(__name__)
|
|
|
|
| 32 |
return _reranker if _reranker != "disabled" else None
|
| 33 |
|
| 34 |
|
| 35 |
+
@trace_function(
|
| 36 |
+
"retrieve",
|
| 37 |
+
metadata_factory=lambda query, user_id, document_id=None: {
|
| 38 |
+
"user_id": user_id,
|
| 39 |
+
"document_id": document_id,
|
| 40 |
+
"embedding_model": settings.EMBEDDING_MODEL,
|
| 41 |
+
"reranker_model": settings.RERANKER_MODEL,
|
| 42 |
+
"top_k_retrieval": settings.TOP_K_RETRIEVAL,
|
| 43 |
+
"top_k_rerank": settings.TOP_K_RERANK,
|
| 44 |
+
},
|
| 45 |
+
)
|
| 46 |
def retrieve(
|
| 47 |
query: str,
|
| 48 |
user_id: str,
|
backend/app/rag/tracing.py
ADDED
|
@@ -0,0 +1,102 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Optional LangSmith tracing helpers for the RAG pipeline.
|
| 3 |
+
Safe to import even when LangSmith is not installed or configured.
|
| 4 |
+
"""
|
| 5 |
+
import logging
|
| 6 |
+
import os
|
| 7 |
+
from functools import wraps
|
| 8 |
+
from typing import Any, Callable, Optional
|
| 9 |
+
|
| 10 |
+
from app.config import get_settings
|
| 11 |
+
|
| 12 |
+
logger = logging.getLogger(__name__)
|
| 13 |
+
settings = get_settings()
|
| 14 |
+
|
| 15 |
+
try:
|
| 16 |
+
from langsmith import traceable as _langsmith_traceable
|
| 17 |
+
except Exception: # pragma: no cover - optional dependency safety
|
| 18 |
+
_langsmith_traceable = None
|
| 19 |
+
|
| 20 |
+
|
| 21 |
+
def configure_langsmith() -> bool:
|
| 22 |
+
"""Configure LangSmith environment variables when tracing is enabled."""
|
| 23 |
+
if not settings.LANGSMITH_TRACING:
|
| 24 |
+
return False
|
| 25 |
+
|
| 26 |
+
if not settings.LANGSMITH_API_KEY:
|
| 27 |
+
logger.warning("LangSmith tracing enabled but LANGSMITH_API_KEY is not set; tracing disabled.")
|
| 28 |
+
return False
|
| 29 |
+
|
| 30 |
+
os.environ["LANGSMITH_TRACING"] = "true"
|
| 31 |
+
os.environ["LANGSMITH_API_KEY"] = settings.LANGSMITH_API_KEY
|
| 32 |
+
os.environ["LANGSMITH_ENDPOINT"] = settings.LANGSMITH_ENDPOINT
|
| 33 |
+
os.environ["LANGSMITH_PROJECT"] = settings.LANGSMITH_PROJECT
|
| 34 |
+
return _langsmith_traceable is not None
|
| 35 |
+
|
| 36 |
+
|
| 37 |
+
LANGSMITH_ENABLED = configure_langsmith()
|
| 38 |
+
|
| 39 |
+
|
| 40 |
+
def _sanitize_metadata(metadata: Optional[dict[str, Any]]) -> dict[str, Any]:
|
| 41 |
+
return {key: value for key, value in (metadata or {}).items() if value is not None}
|
| 42 |
+
|
| 43 |
+
|
| 44 |
+
def _build_traceable(name: str, run_type: str, metadata: Optional[dict[str, Any]] = None):
|
| 45 |
+
"""Build a LangSmith traceable decorator safely across versions."""
|
| 46 |
+
if _langsmith_traceable is None:
|
| 47 |
+
return None
|
| 48 |
+
|
| 49 |
+
sanitized = _sanitize_metadata(metadata)
|
| 50 |
+
try:
|
| 51 |
+
return _langsmith_traceable(
|
| 52 |
+
name=name,
|
| 53 |
+
run_type=run_type,
|
| 54 |
+
metadata=sanitized or None,
|
| 55 |
+
)
|
| 56 |
+
except TypeError:
|
| 57 |
+
return _langsmith_traceable(name=name, run_type=run_type)
|
| 58 |
+
|
| 59 |
+
|
| 60 |
+
def trace_call(
|
| 61 |
+
name: str,
|
| 62 |
+
fn: Callable[..., Any],
|
| 63 |
+
*args: Any,
|
| 64 |
+
run_type: str = "chain",
|
| 65 |
+
metadata: Optional[dict[str, Any]] = None,
|
| 66 |
+
**kwargs: Any,
|
| 67 |
+
) -> Any:
|
| 68 |
+
"""Execute a callable with LangSmith tracing when available."""
|
| 69 |
+
if not LANGSMITH_ENABLED:
|
| 70 |
+
return fn(*args, **kwargs)
|
| 71 |
+
|
| 72 |
+
decorator = _build_traceable(name, run_type, metadata)
|
| 73 |
+
if decorator is None:
|
| 74 |
+
return fn(*args, **kwargs)
|
| 75 |
+
|
| 76 |
+
traced_fn = decorator(fn)
|
| 77 |
+
return traced_fn(*args, **kwargs)
|
| 78 |
+
|
| 79 |
+
|
| 80 |
+
def trace_function(
|
| 81 |
+
name: str,
|
| 82 |
+
*,
|
| 83 |
+
run_type: str = "chain",
|
| 84 |
+
metadata_factory: Optional[Callable[..., dict[str, Any]]] = None,
|
| 85 |
+
) -> Callable[[Callable[..., Any]], Callable[..., Any]]:
|
| 86 |
+
"""Decorator wrapper that becomes a no-op when LangSmith is disabled."""
|
| 87 |
+
def decorator(fn: Callable[..., Any]) -> Callable[..., Any]:
|
| 88 |
+
@wraps(fn)
|
| 89 |
+
def wrapped(*args: Any, **kwargs: Any) -> Any:
|
| 90 |
+
metadata = metadata_factory(*args, **kwargs) if metadata_factory else None
|
| 91 |
+
return trace_call(
|
| 92 |
+
name,
|
| 93 |
+
fn,
|
| 94 |
+
*args,
|
| 95 |
+
run_type=run_type,
|
| 96 |
+
metadata=metadata,
|
| 97 |
+
**kwargs,
|
| 98 |
+
)
|
| 99 |
+
|
| 100 |
+
return wrapped
|
| 101 |
+
|
| 102 |
+
return decorator
|
backend/app/rag/vectorstore.py
CHANGED
|
@@ -8,6 +8,7 @@ import chromadb
|
|
| 8 |
from chromadb.config import Settings as ChromaSettings
|
| 9 |
from app.config import get_settings
|
| 10 |
from app.rag.embeddings import get_embedding_model
|
|
|
|
| 11 |
|
| 12 |
logger = logging.getLogger(__name__)
|
| 13 |
settings = get_settings()
|
|
@@ -55,6 +56,12 @@ def store_chunks(
|
|
| 55 |
if not chunks:
|
| 56 |
return 0
|
| 57 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 58 |
client = get_chroma_client()
|
| 59 |
embedding_model = get_embedding_model()
|
| 60 |
|
|
@@ -74,6 +81,9 @@ def store_chunks(
|
|
| 74 |
"document_id": document_id,
|
| 75 |
"page": chunk["page"],
|
| 76 |
"chunk_index": chunk["chunk_index"],
|
|
|
|
|
|
|
|
|
|
| 77 |
}
|
| 78 |
for chunk in chunks
|
| 79 |
]
|
|
|
|
| 8 |
from chromadb.config import Settings as ChromaSettings
|
| 9 |
from app.config import get_settings
|
| 10 |
from app.rag.embeddings import get_embedding_model
|
| 11 |
+
from app.rag.vision import generate_captions_for_chunks
|
| 12 |
|
| 13 |
logger = logging.getLogger(__name__)
|
| 14 |
settings = get_settings()
|
|
|
|
| 56 |
if not chunks:
|
| 57 |
return 0
|
| 58 |
|
| 59 |
+
# Generate captions for any extracted images before embedding
|
| 60 |
+
try:
|
| 61 |
+
generate_captions_for_chunks(chunks)
|
| 62 |
+
except Exception as e:
|
| 63 |
+
logger.warning(f"Could not generate image captions: {e}")
|
| 64 |
+
|
| 65 |
client = get_chroma_client()
|
| 66 |
embedding_model = get_embedding_model()
|
| 67 |
|
|
|
|
| 81 |
"document_id": document_id,
|
| 82 |
"page": chunk["page"],
|
| 83 |
"chunk_index": chunk["chunk_index"],
|
| 84 |
+
# Indicate whether this chunk was originally an image and include a short caption
|
| 85 |
+
**({"is_image": True, "image_caption": chunk.get("image_caption", "")}
|
| 86 |
+
if chunk.get("is_image") else {}),
|
| 87 |
}
|
| 88 |
for chunk in chunks
|
| 89 |
]
|
backend/app/rag/vision.py
ADDED
|
@@ -0,0 +1,99 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Image captioning / vision helpers for RAG pipeline.
|
| 2 |
+
|
| 3 |
+
Provides a simple, pluggable interface to generate textual descriptions
|
| 4 |
+
for images extracted from PDFs. By default it uses local OCR (pytesseract)
|
| 5 |
+
when available as a robust fallback. An external VLM provider (OpenAI)
|
| 6 |
+
can be integrated by setting `VISION_PROVIDER` and appropriate API keys
|
| 7 |
+
in settings; the provider hook is intentionally small and optional.
|
| 8 |
+
"""
|
| 9 |
+
import logging
|
| 10 |
+
from typing import List, Dict, Any
|
| 11 |
+
from io import BytesIO
|
| 12 |
+
|
| 13 |
+
from app.config import get_settings
|
| 14 |
+
|
| 15 |
+
logger = logging.getLogger(__name__)
|
| 16 |
+
settings = get_settings()
|
| 17 |
+
|
| 18 |
+
|
| 19 |
+
def _ocr_caption(image_bytes: bytes) -> str:
|
| 20 |
+
"""Try to produce a caption using pytesseract OCR; returns empty string if not available."""
|
| 21 |
+
try:
|
| 22 |
+
from PIL import Image
|
| 23 |
+
import pytesseract
|
| 24 |
+
except Exception:
|
| 25 |
+
return ""
|
| 26 |
+
|
| 27 |
+
try:
|
| 28 |
+
img = Image.open(BytesIO(image_bytes)).convert("RGB")
|
| 29 |
+
text = pytesseract.image_to_string(img)
|
| 30 |
+
text = text.strip()
|
| 31 |
+
return text
|
| 32 |
+
except Exception as e:
|
| 33 |
+
logger.debug(f"OCR failed: {e}")
|
| 34 |
+
return ""
|
| 35 |
+
|
| 36 |
+
|
| 37 |
+
def caption_image(image_bytes: bytes, page: int | None = None) -> str:
|
| 38 |
+
"""Generate a caption for a single image.
|
| 39 |
+
|
| 40 |
+
Order of operations:
|
| 41 |
+
- If an external VLM provider is configured, attempt to call it (not implemented as mandatory).
|
| 42 |
+
- Fall back to local OCR (pytesseract) if available.
|
| 43 |
+
- Otherwise return a simple placeholder caption including the page number.
|
| 44 |
+
"""
|
| 45 |
+
# Placeholder for provider-based captioning (e.g., OpenAI / LLaVA hooks)
|
| 46 |
+
provider = getattr(settings, "VISION_PROVIDER", None)
|
| 47 |
+
if provider == "openai":
|
| 48 |
+
try:
|
| 49 |
+
import openai
|
| 50 |
+
# Minimal integration: attempt a text-only caption via responses if available.
|
| 51 |
+
# This is a best-effort hook; users should adapt to their provider's API.
|
| 52 |
+
api_key = getattr(settings, "OPENAI_API_KEY", None)
|
| 53 |
+
if api_key:
|
| 54 |
+
openai.api_key = api_key
|
| 55 |
+
# Use a generic prompt: "Describe the following image"
|
| 56 |
+
# Note: concrete multimodal API usage may vary across SDK versions.
|
| 57 |
+
resp = openai.Image.create(
|
| 58 |
+
prompt="Describe this image in one concise sentence.",
|
| 59 |
+
n=1,
|
| 60 |
+
# We do not re-upload image bytes here; this is a placeholder to show
|
| 61 |
+
# where provider code would be invoked. For production, follow
|
| 62 |
+
# provider docs for sending image data.
|
| 63 |
+
)
|
| 64 |
+
# openai.Image.create returns generated images, not captions — so skip.
|
| 65 |
+
except Exception:
|
| 66 |
+
# If provider integration fails, fall back to OCR below
|
| 67 |
+
logger.debug("OpenAI vision provider failed, falling back to OCR")
|
| 68 |
+
|
| 69 |
+
# Try OCR caption
|
| 70 |
+
ocr = _ocr_caption(image_bytes)
|
| 71 |
+
if ocr:
|
| 72 |
+
# Keep it short if very long
|
| 73 |
+
return (ocr[:500] + "...") if len(ocr) > 500 else ocr
|
| 74 |
+
|
| 75 |
+
# Last-resort caption
|
| 76 |
+
if page:
|
| 77 |
+
return f"Image on page {page}."
|
| 78 |
+
return "Image."
|
| 79 |
+
|
| 80 |
+
|
| 81 |
+
def generate_captions_for_chunks(chunks: List[Dict[str, Any]]) -> None:
|
| 82 |
+
"""Mutate chunks in-place: for any chunk containing `image_bytes` but empty `text`,
|
| 83 |
+
generate a caption and set `text`.
|
| 84 |
+
"""
|
| 85 |
+
for chunk in chunks:
|
| 86 |
+
if chunk.get("image_bytes") and not chunk.get("text"):
|
| 87 |
+
try:
|
| 88 |
+
caption = caption_image(chunk["image_bytes"], page=chunk.get("page"))
|
| 89 |
+
chunk["text"] = caption
|
| 90 |
+
# Remove raw bytes to avoid accidentally serializing them later
|
| 91 |
+
chunk.pop("image_bytes", None)
|
| 92 |
+
chunk["is_image"] = True
|
| 93 |
+
chunk["image_caption"] = caption
|
| 94 |
+
except Exception as e:
|
| 95 |
+
logger.debug(f"Failed to caption image chunk: {e}")
|
| 96 |
+
# ensure we still mark it as image to avoid losing it
|
| 97 |
+
chunk.pop("image_bytes", None)
|
| 98 |
+
chunk["is_image"] = True
|
| 99 |
+
chunk.setdefault("text", f"Image on page {chunk.get('page')}")
|
backend/app/routes/auth.py
CHANGED
|
@@ -11,7 +11,7 @@ from sqlalchemy.orm import Session
|
|
| 11 |
from sqlalchemy import select
|
| 12 |
from app.config import get_settings
|
| 13 |
from app.database import get_db
|
| 14 |
-
from app.models import User
|
| 15 |
from app.schemas import (
|
| 16 |
GoogleLoginRequest,
|
| 17 |
RefreshRequest,
|
|
@@ -23,6 +23,8 @@ from app.schemas import (
|
|
| 23 |
UserResponse,
|
| 24 |
UserUpdate,
|
| 25 |
UserUpdateResponse,
|
|
|
|
|
|
|
| 26 |
)
|
| 27 |
from app.auth import hash_password, verify_password, create_access_token, create_refresh_token, get_current_user, decode_token
|
| 28 |
|
|
@@ -383,6 +385,42 @@ def update_password(payload:UpdatePassword,
|
|
| 383 |
db.rollback()
|
| 384 |
raise HTTPException(status_code=400, detail="Database error")
|
| 385 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 386 |
@router.get("/config")
|
| 387 |
def get_auth_config():
|
| 388 |
"""Return public configuration for auth providers"""
|
|
|
|
| 11 |
from sqlalchemy import select
|
| 12 |
from app.config import get_settings
|
| 13 |
from app.database import get_db
|
| 14 |
+
from app.models import User, ApiKey
|
| 15 |
from app.schemas import (
|
| 16 |
GoogleLoginRequest,
|
| 17 |
RefreshRequest,
|
|
|
|
| 23 |
UserResponse,
|
| 24 |
UserUpdate,
|
| 25 |
UserUpdateResponse,
|
| 26 |
+
ApiKeyResponse,
|
| 27 |
+
ApiKeyCreateResponse,
|
| 28 |
)
|
| 29 |
from app.auth import hash_password, verify_password, create_access_token, create_refresh_token, get_current_user, decode_token
|
| 30 |
|
|
|
|
| 385 |
db.rollback()
|
| 386 |
raise HTTPException(status_code=400, detail="Database error")
|
| 387 |
|
| 388 |
+
from typing import List
|
| 389 |
+
import hashlib
|
| 390 |
+
|
| 391 |
+
@router.post("/api-keys", response_model=ApiKeyCreateResponse, status_code=status.HTTP_201_CREATED)
|
| 392 |
+
def create_api_key(user: User = Depends(get_current_user), db: Session = Depends(get_db)):
|
| 393 |
+
"""Create a new API key for the authenticated user."""
|
| 394 |
+
raw_key = "rag_" + secrets.token_urlsafe(32)
|
| 395 |
+
hashed_key = hashlib.sha256(raw_key.encode("utf-8")).hexdigest()
|
| 396 |
+
|
| 397 |
+
api_key = ApiKey(
|
| 398 |
+
user_id=user.id,
|
| 399 |
+
key_prefix=raw_key[:10],
|
| 400 |
+
hashed_key=hashed_key,
|
| 401 |
+
)
|
| 402 |
+
db.add(api_key)
|
| 403 |
+
db.commit()
|
| 404 |
+
db.refresh(api_key)
|
| 405 |
+
|
| 406 |
+
return {"key": raw_key, "api_key": api_key}
|
| 407 |
+
|
| 408 |
+
@router.get("/api-keys", response_model=List[ApiKeyResponse])
|
| 409 |
+
def list_api_keys(user: User = Depends(get_current_user), db: Session = Depends(get_db)):
|
| 410 |
+
"""List all API keys for the authenticated user."""
|
| 411 |
+
return db.query(ApiKey).filter(ApiKey.user_id == user.id).all()
|
| 412 |
+
|
| 413 |
+
@router.delete("/api-keys/{key_id}", status_code=status.HTTP_204_NO_CONTENT)
|
| 414 |
+
def delete_api_key(key_id: str, user: User = Depends(get_current_user), db: Session = Depends(get_db)):
|
| 415 |
+
"""Revoke an API key."""
|
| 416 |
+
api_key = db.query(ApiKey).filter(ApiKey.id == key_id, ApiKey.user_id == user.id).first()
|
| 417 |
+
if not api_key:
|
| 418 |
+
raise HTTPException(status_code=404, detail="API key not found")
|
| 419 |
+
|
| 420 |
+
db.delete(api_key)
|
| 421 |
+
db.commit()
|
| 422 |
+
return None
|
| 423 |
+
|
| 424 |
@router.get("/config")
|
| 425 |
def get_auth_config():
|
| 426 |
"""Return public configuration for auth providers"""
|
backend/app/schemas.py
CHANGED
|
@@ -53,11 +53,17 @@ class RefreshRequest(BaseModel):
|
|
| 53 |
refresh_token: str
|
| 54 |
|
| 55 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 56 |
class UserResponse(BaseModel):
|
| 57 |
id: str
|
| 58 |
username: str
|
| 59 |
email: str
|
| 60 |
is_admin: bool
|
|
|
|
| 61 |
created_at: datetime
|
| 62 |
|
| 63 |
class Config:
|
|
@@ -136,5 +142,22 @@ class ChatHistoryResponse(BaseModel):
|
|
| 136 |
document_id: Optional[str] = None
|
| 137 |
|
| 138 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 139 |
# Rebuild models for forward references
|
| 140 |
TokenResponse.model_rebuild()
|
|
|
|
| 53 |
refresh_token: str
|
| 54 |
|
| 55 |
|
| 56 |
+
class HFTokenUpdate(BaseModel):
|
| 57 |
+
"""Request schema for updating the user's HuggingFace token."""
|
| 58 |
+
hf_token: str
|
| 59 |
+
|
| 60 |
+
|
| 61 |
class UserResponse(BaseModel):
|
| 62 |
id: str
|
| 63 |
username: str
|
| 64 |
email: str
|
| 65 |
is_admin: bool
|
| 66 |
+
hf_token: Optional[str] = None
|
| 67 |
created_at: datetime
|
| 68 |
|
| 69 |
class Config:
|
|
|
|
| 142 |
document_id: Optional[str] = None
|
| 143 |
|
| 144 |
|
| 145 |
+
# ── ApiKeys ─────────────────────────────────────────────
|
| 146 |
+
|
| 147 |
+
class ApiKeyResponse(BaseModel):
|
| 148 |
+
id: str
|
| 149 |
+
key_prefix: str
|
| 150 |
+
created_at: datetime
|
| 151 |
+
last_used: Optional[datetime] = None
|
| 152 |
+
|
| 153 |
+
class Config:
|
| 154 |
+
from_attributes = True
|
| 155 |
+
|
| 156 |
+
|
| 157 |
+
class ApiKeyCreateResponse(BaseModel):
|
| 158 |
+
key: str
|
| 159 |
+
api_key: ApiKeyResponse
|
| 160 |
+
|
| 161 |
+
|
| 162 |
# Rebuild models for forward references
|
| 163 |
TokenResponse.model_rebuild()
|
backend/requirements.txt
CHANGED
|
@@ -18,6 +18,9 @@ google-auth
|
|
| 18 |
# Config
|
| 19 |
pydantic-settings
|
| 20 |
pydantic[email]
|
|
|
|
|
|
|
|
|
|
| 21 |
|
| 22 |
# Document Processing
|
| 23 |
PyMuPDF
|
|
@@ -28,6 +31,7 @@ langchain
|
|
| 28 |
langchain-community
|
| 29 |
langchain-huggingface
|
| 30 |
langchain-text-splitters
|
|
|
|
| 31 |
|
| 32 |
# Embeddings & ML
|
| 33 |
sentence-transformers
|
|
|
|
| 18 |
# Config
|
| 19 |
pydantic-settings
|
| 20 |
pydantic[email]
|
| 21 |
+
pytest
|
| 22 |
+
pytest-cov
|
| 23 |
+
httpx
|
| 24 |
|
| 25 |
# Document Processing
|
| 26 |
PyMuPDF
|
|
|
|
| 31 |
langchain-community
|
| 32 |
langchain-huggingface
|
| 33 |
langchain-text-splitters
|
| 34 |
+
langsmith
|
| 35 |
|
| 36 |
# Embeddings & ML
|
| 37 |
sentence-transformers
|
backend/tests/conftest.py
ADDED
|
@@ -0,0 +1,183 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import sys
|
| 3 |
+
import types
|
| 4 |
+
from contextlib import asynccontextmanager
|
| 5 |
+
from pathlib import Path
|
| 6 |
+
|
| 7 |
+
import pytest
|
| 8 |
+
from fastapi.testclient import TestClient
|
| 9 |
+
from sqlalchemy import create_engine
|
| 10 |
+
from sqlalchemy.orm import sessionmaker
|
| 11 |
+
|
| 12 |
+
|
| 13 |
+
ROOT = Path(__file__).resolve().parents[2]
|
| 14 |
+
BACKEND_DIR = ROOT / "backend"
|
| 15 |
+
|
| 16 |
+
if str(BACKEND_DIR) not in sys.path:
|
| 17 |
+
sys.path.insert(0, str(BACKEND_DIR))
|
| 18 |
+
|
| 19 |
+
os.environ.setdefault("SECRET_KEY", "test-secret-key")
|
| 20 |
+
os.environ.setdefault("DATABASE_URL", "sqlite:///./test_bootstrap.db")
|
| 21 |
+
os.environ.setdefault("HF_TOKEN", "test-hf-token")
|
| 22 |
+
os.environ.setdefault("UPLOAD_DIR", str(ROOT / "backend" / "test_uploads"))
|
| 23 |
+
os.environ.setdefault("CHROMA_PERSIST_DIR", str(ROOT / "backend" / "test_chroma"))
|
| 24 |
+
|
| 25 |
+
|
| 26 |
+
fake_embeddings = types.ModuleType("app.rag.embeddings")
|
| 27 |
+
fake_embeddings.get_embedding_model = lambda: object()
|
| 28 |
+
fake_embeddings.embed_query = lambda query: [0.0]
|
| 29 |
+
fake_embeddings.embed_texts = lambda texts: [[0.0] for _ in texts]
|
| 30 |
+
sys.modules.setdefault("app.rag.embeddings", fake_embeddings)
|
| 31 |
+
|
| 32 |
+
|
| 33 |
+
class _FakeChromaClient:
|
| 34 |
+
def heartbeat(self):
|
| 35 |
+
return "ok"
|
| 36 |
+
|
| 37 |
+
|
| 38 |
+
fake_vectorstore = types.ModuleType("app.rag.vectorstore")
|
| 39 |
+
fake_vectorstore.get_chroma_client = lambda: _FakeChromaClient()
|
| 40 |
+
fake_vectorstore.store_chunks = lambda chunks, document_id, filename, user_id: len(chunks)
|
| 41 |
+
fake_vectorstore.delete_document_chunks = lambda document_id, user_id: None
|
| 42 |
+
fake_vectorstore.query_chunks = lambda query_embedding, user_id, document_id=None, top_k=10: []
|
| 43 |
+
sys.modules.setdefault("app.rag.vectorstore", fake_vectorstore)
|
| 44 |
+
|
| 45 |
+
slowapi_module = types.ModuleType("slowapi")
|
| 46 |
+
slowapi_errors = types.ModuleType("slowapi.errors")
|
| 47 |
+
slowapi_middleware = types.ModuleType("slowapi.middleware")
|
| 48 |
+
slowapi_util = types.ModuleType("slowapi.util")
|
| 49 |
+
|
| 50 |
+
|
| 51 |
+
class RateLimitExceeded(Exception):
|
| 52 |
+
pass
|
| 53 |
+
|
| 54 |
+
|
| 55 |
+
class SlowAPIMiddleware:
|
| 56 |
+
def __init__(self, app, *args, **kwargs):
|
| 57 |
+
self.app = app
|
| 58 |
+
|
| 59 |
+
async def __call__(self, scope, receive, send):
|
| 60 |
+
await self.app(scope, receive, send)
|
| 61 |
+
|
| 62 |
+
|
| 63 |
+
class Limiter:
|
| 64 |
+
def __init__(self, key_func=None, *args, **kwargs):
|
| 65 |
+
self.key_func = key_func
|
| 66 |
+
|
| 67 |
+
def limit(self, _value):
|
| 68 |
+
def decorator(fn):
|
| 69 |
+
return fn
|
| 70 |
+
return decorator
|
| 71 |
+
|
| 72 |
+
|
| 73 |
+
slowapi_errors.RateLimitExceeded = RateLimitExceeded
|
| 74 |
+
slowapi_middleware.SlowAPIMiddleware = SlowAPIMiddleware
|
| 75 |
+
slowapi_util.get_remote_address = lambda request: "127.0.0.1"
|
| 76 |
+
slowapi_module.Limiter = Limiter
|
| 77 |
+
|
| 78 |
+
sys.modules.setdefault("slowapi", slowapi_module)
|
| 79 |
+
sys.modules.setdefault("slowapi.errors", slowapi_errors)
|
| 80 |
+
sys.modules.setdefault("slowapi.middleware", slowapi_middleware)
|
| 81 |
+
sys.modules.setdefault("slowapi.util", slowapi_util)
|
| 82 |
+
|
| 83 |
+
from app.auth import create_access_token, create_refresh_token, hash_password
|
| 84 |
+
from app.database import Base, get_db
|
| 85 |
+
from app.main import app
|
| 86 |
+
from app.models import Document, User
|
| 87 |
+
|
| 88 |
+
|
| 89 |
+
@pytest.fixture()
|
| 90 |
+
def db_session(tmp_path):
|
| 91 |
+
db_file = tmp_path / "test.db"
|
| 92 |
+
engine = create_engine(
|
| 93 |
+
f"sqlite:///{db_file}",
|
| 94 |
+
connect_args={"check_same_thread": False},
|
| 95 |
+
)
|
| 96 |
+
TestingSessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine)
|
| 97 |
+
Base.metadata.create_all(bind=engine)
|
| 98 |
+
|
| 99 |
+
session = TestingSessionLocal()
|
| 100 |
+
try:
|
| 101 |
+
yield session
|
| 102 |
+
finally:
|
| 103 |
+
session.close()
|
| 104 |
+
Base.metadata.drop_all(bind=engine)
|
| 105 |
+
engine.dispose()
|
| 106 |
+
|
| 107 |
+
|
| 108 |
+
@pytest.fixture()
|
| 109 |
+
def client(db_session, monkeypatch):
|
| 110 |
+
def override_get_db():
|
| 111 |
+
try:
|
| 112 |
+
yield db_session
|
| 113 |
+
finally:
|
| 114 |
+
pass
|
| 115 |
+
|
| 116 |
+
@asynccontextmanager
|
| 117 |
+
async def no_lifespan(_app):
|
| 118 |
+
yield
|
| 119 |
+
|
| 120 |
+
monkeypatch.setattr("app.database.SessionLocal", lambda: db_session)
|
| 121 |
+
app.dependency_overrides[get_db] = override_get_db
|
| 122 |
+
app.router.lifespan_context = no_lifespan
|
| 123 |
+
|
| 124 |
+
with TestClient(app) as test_client:
|
| 125 |
+
yield test_client
|
| 126 |
+
|
| 127 |
+
app.dependency_overrides.clear()
|
| 128 |
+
|
| 129 |
+
|
| 130 |
+
@pytest.fixture()
|
| 131 |
+
def user(db_session):
|
| 132 |
+
instance = User(
|
| 133 |
+
username="tester",
|
| 134 |
+
email="tester@example.com",
|
| 135 |
+
hashed_password=hash_password("password123"),
|
| 136 |
+
)
|
| 137 |
+
db_session.add(instance)
|
| 138 |
+
db_session.commit()
|
| 139 |
+
db_session.refresh(instance)
|
| 140 |
+
return instance
|
| 141 |
+
|
| 142 |
+
|
| 143 |
+
@pytest.fixture()
|
| 144 |
+
def auth_headers(user):
|
| 145 |
+
token = create_access_token(user.id)
|
| 146 |
+
return {"Authorization": f"Bearer {token}"}
|
| 147 |
+
|
| 148 |
+
|
| 149 |
+
@pytest.fixture()
|
| 150 |
+
def refresh_token(user):
|
| 151 |
+
return create_refresh_token(user.id)
|
| 152 |
+
|
| 153 |
+
|
| 154 |
+
@pytest.fixture()
|
| 155 |
+
def ready_document(db_session, user):
|
| 156 |
+
instance = Document(
|
| 157 |
+
user_id=user.id,
|
| 158 |
+
filename="ready.txt",
|
| 159 |
+
original_name="ready.txt",
|
| 160 |
+
file_size=128,
|
| 161 |
+
page_count=1,
|
| 162 |
+
chunk_count=2,
|
| 163 |
+
status="ready",
|
| 164 |
+
)
|
| 165 |
+
db_session.add(instance)
|
| 166 |
+
db_session.commit()
|
| 167 |
+
db_session.refresh(instance)
|
| 168 |
+
return instance
|
| 169 |
+
|
| 170 |
+
|
| 171 |
+
@pytest.fixture()
|
| 172 |
+
def pending_document(db_session, user):
|
| 173 |
+
instance = Document(
|
| 174 |
+
user_id=user.id,
|
| 175 |
+
filename="pending.txt",
|
| 176 |
+
original_name="pending.txt",
|
| 177 |
+
file_size=64,
|
| 178 |
+
status="pending",
|
| 179 |
+
)
|
| 180 |
+
db_session.add(instance)
|
| 181 |
+
db_session.commit()
|
| 182 |
+
db_session.refresh(instance)
|
| 183 |
+
return instance
|
backend/tests/test_auth.py
ADDED
|
@@ -0,0 +1,81 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
def test_register_success(client):
|
| 2 |
+
response = client.post(
|
| 3 |
+
"/api/v1/auth/register",
|
| 4 |
+
json={
|
| 5 |
+
"username": "newuser",
|
| 6 |
+
"email": "newuser@example.com",
|
| 7 |
+
"password": "password123",
|
| 8 |
+
},
|
| 9 |
+
)
|
| 10 |
+
|
| 11 |
+
assert response.status_code == 201
|
| 12 |
+
payload = response.json()
|
| 13 |
+
assert payload["access_token"]
|
| 14 |
+
assert payload["refresh_token"]
|
| 15 |
+
assert payload["user"]["email"] == "newuser@example.com"
|
| 16 |
+
|
| 17 |
+
|
| 18 |
+
def test_register_duplicate_email_or_username_conflict(client):
|
| 19 |
+
payload = {
|
| 20 |
+
"username": "dupuser",
|
| 21 |
+
"email": "dup@example.com",
|
| 22 |
+
"password": "password123",
|
| 23 |
+
}
|
| 24 |
+
first = client.post("/api/v1/auth/register", json=payload)
|
| 25 |
+
assert first.status_code == 201
|
| 26 |
+
|
| 27 |
+
duplicate_email = client.post(
|
| 28 |
+
"/api/v1/auth/register",
|
| 29 |
+
json={**payload, "username": "anotheruser"},
|
| 30 |
+
)
|
| 31 |
+
assert duplicate_email.status_code == 409
|
| 32 |
+
assert duplicate_email.json()["detail"] == "Email already registered"
|
| 33 |
+
|
| 34 |
+
duplicate_username = client.post(
|
| 35 |
+
"/api/v1/auth/register",
|
| 36 |
+
json={**payload, "email": "another@example.com"},
|
| 37 |
+
)
|
| 38 |
+
assert duplicate_username.status_code == 409
|
| 39 |
+
assert duplicate_username.json()["detail"] == "Username already taken"
|
| 40 |
+
|
| 41 |
+
|
| 42 |
+
def test_login_success(client, user):
|
| 43 |
+
response = client.post(
|
| 44 |
+
"/api/v1/auth/login",
|
| 45 |
+
json={"email": user.email, "password": "password123"},
|
| 46 |
+
)
|
| 47 |
+
|
| 48 |
+
assert response.status_code == 200
|
| 49 |
+
payload = response.json()
|
| 50 |
+
assert payload["access_token"]
|
| 51 |
+
assert payload["refresh_token"]
|
| 52 |
+
assert payload["user"]["username"] == user.username
|
| 53 |
+
|
| 54 |
+
|
| 55 |
+
def test_login_invalid_password(client, user):
|
| 56 |
+
response = client.post(
|
| 57 |
+
"/api/v1/auth/login",
|
| 58 |
+
json={"email": user.email, "password": "wrong-password"},
|
| 59 |
+
)
|
| 60 |
+
|
| 61 |
+
assert response.status_code == 401
|
| 62 |
+
assert response.json()["detail"] == "Invalid email or password"
|
| 63 |
+
|
| 64 |
+
|
| 65 |
+
def test_auth_me_requires_auth(client):
|
| 66 |
+
response = client.get("/api/v1/auth/me")
|
| 67 |
+
|
| 68 |
+
assert response.status_code in (401, 403)
|
| 69 |
+
|
| 70 |
+
|
| 71 |
+
def test_refresh_token_success(client, refresh_token):
|
| 72 |
+
response = client.post(
|
| 73 |
+
"/api/v1/auth/refresh",
|
| 74 |
+
json={"refresh_token": refresh_token},
|
| 75 |
+
)
|
| 76 |
+
|
| 77 |
+
assert response.status_code == 200
|
| 78 |
+
payload = response.json()
|
| 79 |
+
assert payload["access_token"]
|
| 80 |
+
assert payload["refresh_token"]
|
| 81 |
+
assert payload["token_type"] == "bearer"
|
backend/tests/test_chat.py
ADDED
|
@@ -0,0 +1,50 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
def test_chat_ask_success(client, auth_headers, ready_document, monkeypatch):
|
| 2 |
+
monkeypatch.setattr(
|
| 3 |
+
"app.routes.chat.generate_answer",
|
| 4 |
+
lambda question, user_id, document_id=None: {
|
| 5 |
+
"answer": "Mocked answer",
|
| 6 |
+
"sources": [
|
| 7 |
+
{
|
| 8 |
+
"text": "Mock source",
|
| 9 |
+
"filename": "ready.txt",
|
| 10 |
+
"page": 1,
|
| 11 |
+
"score": 0.99,
|
| 12 |
+
"confidence": 99.0,
|
| 13 |
+
}
|
| 14 |
+
],
|
| 15 |
+
},
|
| 16 |
+
)
|
| 17 |
+
|
| 18 |
+
response = client.post(
|
| 19 |
+
"/api/v1/chat/ask",
|
| 20 |
+
headers=auth_headers,
|
| 21 |
+
json={"question": "What is in the doc?", "document_id": ready_document.id},
|
| 22 |
+
)
|
| 23 |
+
|
| 24 |
+
assert response.status_code == 200
|
| 25 |
+
payload = response.json()
|
| 26 |
+
assert payload["answer"] == "Mocked answer"
|
| 27 |
+
assert payload["document_id"] == ready_document.id
|
| 28 |
+
assert payload["sources"][0]["filename"] == "ready.txt"
|
| 29 |
+
|
| 30 |
+
|
| 31 |
+
def test_chat_ask_document_not_found(client, auth_headers):
|
| 32 |
+
response = client.post(
|
| 33 |
+
"/api/v1/chat/ask",
|
| 34 |
+
headers=auth_headers,
|
| 35 |
+
json={"question": "Missing doc?", "document_id": "missing-doc-id"},
|
| 36 |
+
)
|
| 37 |
+
|
| 38 |
+
assert response.status_code == 404
|
| 39 |
+
assert response.json()["detail"] == "Document not found"
|
| 40 |
+
|
| 41 |
+
|
| 42 |
+
def test_chat_ask_document_not_ready(client, auth_headers, pending_document):
|
| 43 |
+
response = client.post(
|
| 44 |
+
"/api/v1/chat/ask",
|
| 45 |
+
headers=auth_headers,
|
| 46 |
+
json={"question": "Pending doc?", "document_id": pending_document.id},
|
| 47 |
+
)
|
| 48 |
+
|
| 49 |
+
assert response.status_code == 400
|
| 50 |
+
assert "Document is still pending" in response.json()["detail"]
|
backend/tests/test_chunker.py
ADDED
|
@@ -0,0 +1,38 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from pathlib import Path
|
| 2 |
+
|
| 3 |
+
import pytest
|
| 4 |
+
|
| 5 |
+
from app.rag.chunker import chunk_document, get_page_count
|
| 6 |
+
|
| 7 |
+
|
| 8 |
+
def test_txt_extraction_and_chunking(tmp_path):
|
| 9 |
+
file_path = tmp_path / "notes.txt"
|
| 10 |
+
file_path.write_text("This is a sample text file for chunking.", encoding="utf-8")
|
| 11 |
+
|
| 12 |
+
chunks = chunk_document(str(file_path))
|
| 13 |
+
|
| 14 |
+
assert len(chunks) >= 1
|
| 15 |
+
assert chunks[0]["page"] == 1
|
| 16 |
+
assert "sample text file" in chunks[0]["text"]
|
| 17 |
+
|
| 18 |
+
|
| 19 |
+
def test_empty_txt_returns_no_chunks(tmp_path):
|
| 20 |
+
file_path = tmp_path / "empty.txt"
|
| 21 |
+
file_path.write_text(" \n", encoding="utf-8")
|
| 22 |
+
|
| 23 |
+
assert chunk_document(str(file_path)) == []
|
| 24 |
+
|
| 25 |
+
|
| 26 |
+
def test_unsupported_extension_raises_value_error(tmp_path):
|
| 27 |
+
file_path = tmp_path / "data.csv"
|
| 28 |
+
file_path.write_text("a,b,c", encoding="utf-8")
|
| 29 |
+
|
| 30 |
+
with pytest.raises(ValueError, match="Unsupported file type"):
|
| 31 |
+
chunk_document(str(file_path))
|
| 32 |
+
|
| 33 |
+
|
| 34 |
+
def test_get_page_count_for_txt_returns_one(tmp_path):
|
| 35 |
+
file_path = tmp_path / "single.txt"
|
| 36 |
+
file_path.write_text("hello", encoding="utf-8")
|
| 37 |
+
|
| 38 |
+
assert get_page_count(str(file_path)) == 1
|
backend/tests/test_documents.py
ADDED
|
@@ -0,0 +1,34 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
def test_api_health(client):
|
| 2 |
+
response = client.get("/api/health")
|
| 3 |
+
|
| 4 |
+
assert response.status_code == 200
|
| 5 |
+
payload = response.json()
|
| 6 |
+
assert payload["status"] == "healthy"
|
| 7 |
+
assert payload["version"] == "2.0.0"
|
| 8 |
+
|
| 9 |
+
|
| 10 |
+
def test_protected_documents_list_requires_auth(client):
|
| 11 |
+
response = client.get("/api/v1/documents/")
|
| 12 |
+
|
| 13 |
+
assert response.status_code in (401, 403)
|
| 14 |
+
|
| 15 |
+
|
| 16 |
+
def test_documents_list_authenticated(client, auth_headers, ready_document):
|
| 17 |
+
response = client.get("/api/v1/documents/", headers=auth_headers)
|
| 18 |
+
|
| 19 |
+
assert response.status_code == 200
|
| 20 |
+
payload = response.json()
|
| 21 |
+
assert payload["total"] == 1
|
| 22 |
+
assert payload["items"][0]["id"] == ready_document.id
|
| 23 |
+
assert payload["items"][0]["original_name"] == "ready.txt"
|
| 24 |
+
|
| 25 |
+
|
| 26 |
+
def test_upload_rejects_unsupported_extension_before_deep_validation(client, auth_headers):
|
| 27 |
+
response = client.post(
|
| 28 |
+
"/api/v1/documents/upload",
|
| 29 |
+
headers=auth_headers,
|
| 30 |
+
files={"file": ("payload.exe", b"binary-data", "application/octet-stream")},
|
| 31 |
+
)
|
| 32 |
+
|
| 33 |
+
assert response.status_code == 400
|
| 34 |
+
assert "not supported" in response.json()["detail"]
|
bots/discord/README.md
ADDED
|
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Discord RAG Bot
|
| 2 |
+
|
| 3 |
+
This bot connects to the PDF-Assistant-RAG backend to answer questions based on your uploaded documents, directly from Discord.
|
| 4 |
+
|
| 5 |
+
## Setup
|
| 6 |
+
|
| 7 |
+
1. Install dependencies:
|
| 8 |
+
```bash
|
| 9 |
+
pip install -r requirements.txt
|
| 10 |
+
```
|
| 11 |
+
|
| 12 |
+
2. Create a Discord Bot on the [Discord Developer Portal](https://discord.com/developers/applications):
|
| 13 |
+
- Go to "Bot" tab and enable **Message Content Intent**.
|
| 14 |
+
- Copy the bot token.
|
| 15 |
+
- Invite the bot to your server via the OAuth2 URL Generator (check `bot` scope and `Send Messages` permission).
|
| 16 |
+
|
| 17 |
+
3. Generate an API Key from your PDF-Assistant-RAG profile dashboard.
|
| 18 |
+
|
| 19 |
+
4. Set the environment variables and run:
|
| 20 |
+
```bash
|
| 21 |
+
export DISCORD_TOKEN="your-discord-bot-token"
|
| 22 |
+
export RAG_API_KEY="rag_your-api-key"
|
| 23 |
+
|
| 24 |
+
# Optional: set API_URL if backend is not running on localhost:8000
|
| 25 |
+
# export API_URL="http://localhost:8000/api/v1"
|
| 26 |
+
|
| 27 |
+
python bot.py
|
| 28 |
+
```
|
| 29 |
+
|
| 30 |
+
## Usage
|
| 31 |
+
In a Discord channel where the bot is present, simply use the `!ask` command:
|
| 32 |
+
|
| 33 |
+
```
|
| 34 |
+
!ask Summarize the latest uploaded report for me
|
| 35 |
+
```
|
| 36 |
+
|
| 37 |
+
The bot will query the backend API using your personal API key and reply with the generated answer.
|
bots/discord/bot.py
ADDED
|
@@ -0,0 +1,68 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import discord
|
| 3 |
+
import requests
|
| 4 |
+
from discord.ext import commands
|
| 5 |
+
|
| 6 |
+
DISCORD_TOKEN = os.getenv("DISCORD_TOKEN")
|
| 7 |
+
API_URL = os.getenv("API_URL", "http://localhost:8000/api/v1")
|
| 8 |
+
RAG_API_KEY = os.getenv("RAG_API_KEY")
|
| 9 |
+
|
| 10 |
+
if not DISCORD_TOKEN or not RAG_API_KEY:
|
| 11 |
+
print("Error: DISCORD_TOKEN and RAG_API_KEY must be set in environment variables.")
|
| 12 |
+
exit(1)
|
| 13 |
+
|
| 14 |
+
intents = discord.Intents.default()
|
| 15 |
+
intents.message_content = True
|
| 16 |
+
bot = commands.Bot(command_prefix="!", intents=intents)
|
| 17 |
+
|
| 18 |
+
@bot.event
|
| 19 |
+
async def on_ready():
|
| 20 |
+
print(f"Logged in as {bot.user.name} ({bot.user.id})")
|
| 21 |
+
print("Ready to answer questions via '!ask <question>'")
|
| 22 |
+
|
| 23 |
+
@bot.command(name="ask")
|
| 24 |
+
async def ask_rag(ctx, *, question: str):
|
| 25 |
+
"""Ask the RAG Assistant a question. Example: !ask What is in my documents?"""
|
| 26 |
+
loading_msg = await ctx.send("🤔 Thinking...")
|
| 27 |
+
|
| 28 |
+
try:
|
| 29 |
+
headers = {
|
| 30 |
+
"Authorization": f"Bearer {RAG_API_KEY}",
|
| 31 |
+
"Content-Type": "application/json"
|
| 32 |
+
}
|
| 33 |
+
|
| 34 |
+
# We can also support document_id if we want, but for now we do global ask.
|
| 35 |
+
payload = {"question": question}
|
| 36 |
+
|
| 37 |
+
response = requests.post(
|
| 38 |
+
f"{API_URL}/chat/ask",
|
| 39 |
+
json=payload,
|
| 40 |
+
headers=headers,
|
| 41 |
+
timeout=30 # Give the RAG backend some time to process
|
| 42 |
+
)
|
| 43 |
+
|
| 44 |
+
if response.status_code == 200:
|
| 45 |
+
data = response.json()
|
| 46 |
+
answer = data.get("answer", "No answer provided.")
|
| 47 |
+
|
| 48 |
+
if len(answer) > 2000:
|
| 49 |
+
# Discord has a 2000 character limit per message
|
| 50 |
+
chunks = [answer[i:i+2000] for i in range(0, len(answer), 2000)]
|
| 51 |
+
await loading_msg.edit(content=chunks[0])
|
| 52 |
+
for chunk in chunks[1:]:
|
| 53 |
+
await ctx.send(chunk)
|
| 54 |
+
else:
|
| 55 |
+
await loading_msg.edit(content=answer)
|
| 56 |
+
else:
|
| 57 |
+
await loading_msg.edit(content=f"⚠️ Error from RAG API: `{response.status_code}`")
|
| 58 |
+
print(f"API Error: {response.text}")
|
| 59 |
+
|
| 60 |
+
except requests.exceptions.RequestException as e:
|
| 61 |
+
await loading_msg.edit(content=f"❌ Failed to connect to backend API.")
|
| 62 |
+
print(f"Request Error: {e}")
|
| 63 |
+
except Exception as e:
|
| 64 |
+
await loading_msg.edit(content=f"❌ An unexpected error occurred.")
|
| 65 |
+
print(f"Error: {e}")
|
| 66 |
+
|
| 67 |
+
if __name__ == "__main__":
|
| 68 |
+
bot.run(DISCORD_TOKEN)
|
bots/discord/requirements.txt
ADDED
|
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
|
|
|
| 1 |
+
discord.py==2.3.2
|
| 2 |
+
requests==2.31.0
|
frontend/package-lock.json
CHANGED
|
@@ -15,6 +15,7 @@
|
|
| 15 |
"i18next-browser-languagedetector": "^8.2.1",
|
| 16 |
"lucide-react": "^1.8.0",
|
| 17 |
"next": "16.2.4",
|
|
|
|
| 18 |
"pdfjs-dist": "^5.6.205",
|
| 19 |
"react": "19.2.4",
|
| 20 |
"react-dom": "19.2.4",
|
|
@@ -8755,6 +8756,16 @@
|
|
| 8755 |
}
|
| 8756 |
}
|
| 8757 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 8758 |
"node_modules/next/node_modules/postcss": {
|
| 8759 |
"version": "8.4.31",
|
| 8760 |
"resolved": "https://registry.npmjs.org/postcss/-/postcss-8.4.31.tgz",
|
|
|
|
| 15 |
"i18next-browser-languagedetector": "^8.2.1",
|
| 16 |
"lucide-react": "^1.8.0",
|
| 17 |
"next": "16.2.4",
|
| 18 |
+
"next-themes": "^0.4.6",
|
| 19 |
"pdfjs-dist": "^5.6.205",
|
| 20 |
"react": "19.2.4",
|
| 21 |
"react-dom": "19.2.4",
|
|
|
|
| 8756 |
}
|
| 8757 |
}
|
| 8758 |
},
|
| 8759 |
+
"node_modules/next-themes": {
|
| 8760 |
+
"version": "0.4.6",
|
| 8761 |
+
"resolved": "https://registry.npmjs.org/next-themes/-/next-themes-0.4.6.tgz",
|
| 8762 |
+
"integrity": "sha512-pZvgD5L0IEvX5/9GWyHMf3m8BKiVQwsCMHfoFosXtXBMnaS0ZnIJ9ST4b4NqLVKDEm8QBxoNNGNaBv2JNF6XNA==",
|
| 8763 |
+
"license": "MIT",
|
| 8764 |
+
"peerDependencies": {
|
| 8765 |
+
"react": "^16.8 || ^17 || ^18 || ^19 || ^19.0.0-rc",
|
| 8766 |
+
"react-dom": "^16.8 || ^17 || ^18 || ^19 || ^19.0.0-rc"
|
| 8767 |
+
}
|
| 8768 |
+
},
|
| 8769 |
"node_modules/next/node_modules/postcss": {
|
| 8770 |
"version": "8.4.31",
|
| 8771 |
"resolved": "https://registry.npmjs.org/postcss/-/postcss-8.4.31.tgz",
|
frontend/package.json
CHANGED
|
@@ -18,6 +18,7 @@
|
|
| 18 |
"i18next-browser-languagedetector": "^8.2.1",
|
| 19 |
"lucide-react": "^1.8.0",
|
| 20 |
"next": "16.2.4",
|
|
|
|
| 21 |
"pdfjs-dist": "^5.6.205",
|
| 22 |
"react": "19.2.4",
|
| 23 |
"react-dom": "19.2.4",
|
|
|
|
| 18 |
"i18next-browser-languagedetector": "^8.2.1",
|
| 19 |
"lucide-react": "^1.8.0",
|
| 20 |
"next": "16.2.4",
|
| 21 |
+
"next-themes": "^0.4.6",
|
| 22 |
"pdfjs-dist": "^5.6.205",
|
| 23 |
"react": "19.2.4",
|
| 24 |
"react-dom": "19.2.4",
|
frontend/src/app/dashboard/page.tsx
CHANGED
|
@@ -57,11 +57,23 @@ export default function DashboardPage() {
|
|
| 57 |
const [connectionError, setConnectionError] = useState("");
|
| 58 |
const [documentsLoading, setDocumentsLoading] = useState(true);
|
| 59 |
|
| 60 |
-
|
| 61 |
useEffect(() => {
|
| 62 |
if (!loading && !user) router.replace("/login");
|
| 63 |
}, [user, loading, router]);
|
| 64 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 65 |
// Load documents
|
| 66 |
const loadDocuments = useCallback(async () => {
|
| 67 |
try {
|
|
|
|
| 57 |
const [connectionError, setConnectionError] = useState("");
|
| 58 |
const [documentsLoading, setDocumentsLoading] = useState(true);
|
| 59 |
|
| 60 |
+
// Auth guard
|
| 61 |
useEffect(() => {
|
| 62 |
if (!loading && !user) router.replace("/login");
|
| 63 |
}, [user, loading, router]);
|
| 64 |
|
| 65 |
+
// Intercept dashboard if Hugging Face token configuration is missing
|
| 66 |
+
useEffect(() => {
|
| 67 |
+
if (user) {
|
| 68 |
+
const existingHfToken = localStorage.getItem("hf_token");
|
| 69 |
+
|
| 70 |
+
if (!existingHfToken) {
|
| 71 |
+
console.warn("Hugging Face API configuration key missing.");
|
| 72 |
+
}
|
| 73 |
+
}
|
| 74 |
+
}, [user]);
|
| 75 |
+
|
| 76 |
+
|
| 77 |
// Load documents
|
| 78 |
const loadDocuments = useCallback(async () => {
|
| 79 |
try {
|
frontend/src/app/globals.css
CHANGED
|
@@ -83,6 +83,35 @@
|
|
| 83 |
--sidebar-ring: oklch(0.65 0.2 265);
|
| 84 |
}
|
| 85 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 86 |
.light {
|
| 87 |
--background: oklch(0.985 0 0);
|
| 88 |
--foreground: oklch(0.145 0 0);
|
|
|
|
| 83 |
--sidebar-ring: oklch(0.65 0.2 265);
|
| 84 |
}
|
| 85 |
|
| 86 |
+
.dark {
|
| 87 |
+
--background: oklch(0.145 0 0);
|
| 88 |
+
--foreground: oklch(0.985 0 0);
|
| 89 |
+
--card: oklch(0.178 0 0);
|
| 90 |
+
--card-foreground: oklch(0.985 0 0);
|
| 91 |
+
--popover: oklch(0.178 0 0);
|
| 92 |
+
--popover-foreground: oklch(0.985 0 0);
|
| 93 |
+
--primary: oklch(0.65 0.2 265);
|
| 94 |
+
--primary-foreground: oklch(0.985 0 0);
|
| 95 |
+
--secondary: oklch(0.22 0 0);
|
| 96 |
+
--secondary-foreground: oklch(0.985 0 0);
|
| 97 |
+
--muted: oklch(0.22 0 0);
|
| 98 |
+
--muted-foreground: oklch(0.6 0 0);
|
| 99 |
+
--accent: oklch(0.55 0.18 265);
|
| 100 |
+
--accent-foreground: oklch(0.985 0 0);
|
| 101 |
+
--destructive: oklch(0.704 0.191 22.216);
|
| 102 |
+
--border: oklch(1 0 0 / 10%);
|
| 103 |
+
--input: oklch(1 0 0 / 12%);
|
| 104 |
+
--ring: oklch(0.65 0.2 265);
|
| 105 |
+
--sidebar: oklch(0.12 0 0);
|
| 106 |
+
--sidebar-foreground: oklch(0.985 0 0);
|
| 107 |
+
--sidebar-primary: oklch(0.65 0.2 265);
|
| 108 |
+
--sidebar-primary-foreground: oklch(0.985 0 0);
|
| 109 |
+
--sidebar-accent: oklch(0.22 0 0);
|
| 110 |
+
--sidebar-accent-foreground: oklch(0.985 0 0);
|
| 111 |
+
--sidebar-border: oklch(1 0 0 / 8%);
|
| 112 |
+
--sidebar-ring: oklch(0.65 0.2 265);
|
| 113 |
+
}
|
| 114 |
+
|
| 115 |
.light {
|
| 116 |
--background: oklch(0.985 0 0);
|
| 117 |
--foreground: oklch(0.145 0 0);
|
frontend/src/app/layout.tsx
CHANGED
|
@@ -4,6 +4,7 @@ import "./globals.css";
|
|
| 4 |
import { AuthProvider } from "@/lib/auth";
|
| 5 |
import { TooltipProvider } from "@/components/ui/tooltip";
|
| 6 |
import I18nProvider from "@/components/providers/I18nProvider";
|
|
|
|
| 7 |
|
| 8 |
const inter = Inter({
|
| 9 |
variable: "--font-sans",
|
|
@@ -24,15 +25,20 @@ export default function RootLayout({
|
|
| 24 |
children: React.ReactNode;
|
| 25 |
}>) {
|
| 26 |
return (
|
| 27 |
-
<html lang="en" className={`${inter.variable}
|
| 28 |
<body className="min-h-full flex flex-col bg-background text-foreground">
|
| 29 |
-
<
|
| 30 |
-
|
| 31 |
-
|
| 32 |
-
|
| 33 |
-
|
| 34 |
-
|
| 35 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 36 |
</body>
|
| 37 |
</html>
|
| 38 |
);
|
|
|
|
| 4 |
import { AuthProvider } from "@/lib/auth";
|
| 5 |
import { TooltipProvider } from "@/components/ui/tooltip";
|
| 6 |
import I18nProvider from "@/components/providers/I18nProvider";
|
| 7 |
+
import { ThemeProvider } from "@/components/layout/ThemeProvider";
|
| 8 |
|
| 9 |
const inter = Inter({
|
| 10 |
variable: "--font-sans",
|
|
|
|
| 25 |
children: React.ReactNode;
|
| 26 |
}>) {
|
| 27 |
return (
|
| 28 |
+
<html lang="en" className={`${inter.variable} h-full antialiased`} suppressHydrationWarning>
|
| 29 |
<body className="min-h-full flex flex-col bg-background text-foreground">
|
| 30 |
+
<ThemeProvider
|
| 31 |
+
attribute="class"
|
| 32 |
+
defaultTheme="dark"
|
| 33 |
+
enableSystem={false}
|
| 34 |
+
disableTransitionOnChange
|
| 35 |
+
>
|
| 36 |
+
<AuthProvider>
|
| 37 |
+
<I18nProvider>
|
| 38 |
+
<TooltipProvider>{children}</TooltipProvider>
|
| 39 |
+
</I18nProvider>
|
| 40 |
+
</AuthProvider>
|
| 41 |
+
</ThemeProvider>
|
| 42 |
</body>
|
| 43 |
</html>
|
| 44 |
);
|
frontend/src/components/auth/ApiKeyManager.tsx
ADDED
|
@@ -0,0 +1,158 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"use client";
|
| 2 |
+
|
| 3 |
+
import { useState, useEffect } from "react";
|
| 4 |
+
import { Button } from "@/components/ui/button";
|
| 5 |
+
import { Dialog, DialogContent, DialogHeader, DialogTitle, DialogTrigger } from "@/components/ui/dialog";
|
| 6 |
+
import { api } from "@/lib/api";
|
| 7 |
+
import { Key, Plus, Trash2, Copy, Check } from "lucide-react";
|
| 8 |
+
|
| 9 |
+
interface ApiKey {
|
| 10 |
+
id: string;
|
| 11 |
+
key_prefix: string;
|
| 12 |
+
created_at: string;
|
| 13 |
+
last_used: string | null;
|
| 14 |
+
}
|
| 15 |
+
|
| 16 |
+
export default function ApiKeyManager() {
|
| 17 |
+
const [keys, setKeys] = useState<ApiKey[]>([]);
|
| 18 |
+
const [newKey, setNewKey] = useState<string | null>(null);
|
| 19 |
+
const [loading, setLoading] = useState(false);
|
| 20 |
+
const [copied, setCopied] = useState(false);
|
| 21 |
+
|
| 22 |
+
const fetchKeys = async () => {
|
| 23 |
+
try {
|
| 24 |
+
setLoading(true);
|
| 25 |
+
const data = await api.get<ApiKey[]>("/api/v1/auth/api-keys");
|
| 26 |
+
setKeys(data || []);
|
| 27 |
+
} catch (err) {
|
| 28 |
+
console.error("Failed to load API keys", err);
|
| 29 |
+
} finally {
|
| 30 |
+
setLoading(false);
|
| 31 |
+
}
|
| 32 |
+
};
|
| 33 |
+
|
| 34 |
+
useEffect(() => {
|
| 35 |
+
const timer = setTimeout(() => {
|
| 36 |
+
fetchKeys();
|
| 37 |
+
}, 0);
|
| 38 |
+
return () => clearTimeout(timer);
|
| 39 |
+
}, []);
|
| 40 |
+
|
| 41 |
+
const generateKey = async () => {
|
| 42 |
+
try {
|
| 43 |
+
setLoading(true);
|
| 44 |
+
const data = await api.post<{ key: string; api_key: ApiKey }>("/api/v1/auth/api-keys");
|
| 45 |
+
setNewKey(data.key);
|
| 46 |
+
setKeys((prev) => [...prev, data.api_key]);
|
| 47 |
+
} catch (err) {
|
| 48 |
+
console.error("Failed to generate API key", err);
|
| 49 |
+
} finally {
|
| 50 |
+
setLoading(false);
|
| 51 |
+
}
|
| 52 |
+
};
|
| 53 |
+
|
| 54 |
+
const revokeKey = async (id: string) => {
|
| 55 |
+
if (!confirm("Are you sure you want to revoke this key? Any integrations using it will immediately break.")) return;
|
| 56 |
+
|
| 57 |
+
try {
|
| 58 |
+
await api.delete(`/api/v1/auth/api-keys/${id}`);
|
| 59 |
+
setKeys((prev) => prev.filter((k) => k.id !== id));
|
| 60 |
+
} catch (err) {
|
| 61 |
+
console.error("Failed to revoke API key", err);
|
| 62 |
+
}
|
| 63 |
+
};
|
| 64 |
+
|
| 65 |
+
const copyToClipboard = () => {
|
| 66 |
+
if (newKey) {
|
| 67 |
+
navigator.clipboard.writeText(newKey);
|
| 68 |
+
setCopied(true);
|
| 69 |
+
setTimeout(() => setCopied(false), 2000);
|
| 70 |
+
}
|
| 71 |
+
};
|
| 72 |
+
|
| 73 |
+
return (
|
| 74 |
+
<Dialog onOpenChange={(open) => { if (!open) setNewKey(null); }}>
|
| 75 |
+
<DialogTrigger
|
| 76 |
+
render={
|
| 77 |
+
<button className="flex w-full cursor-pointer items-center rounded-sm px-2 py-1.5 text-sm outline-none transition-colors hover:bg-accent hover:text-accent-foreground">
|
| 78 |
+
<Key className="mr-2 h-4 w-4" />
|
| 79 |
+
<span>API Keys</span>
|
| 80 |
+
</button>
|
| 81 |
+
}
|
| 82 |
+
/>
|
| 83 |
+
<DialogContent className="max-w-2xl sm:rounded-2xl border-border/40 p-6 md:p-8 bg-background/95 backdrop-blur-xl shadow-2xl">
|
| 84 |
+
|
| 85 |
+
<DialogHeader>
|
| 86 |
+
<DialogTitle className="text-2xl font-bold tracking-tight">API Keys</DialogTitle>
|
| 87 |
+
<p className="text-sm text-muted-foreground mt-1.5">
|
| 88 |
+
Manage API keys to access the RAG engine programmatically from your own applications or scripts.
|
| 89 |
+
</p>
|
| 90 |
+
</DialogHeader>
|
| 91 |
+
|
| 92 |
+
{newKey && (
|
| 93 |
+
<div className="my-6 p-5 border border-primary/20 bg-primary/5 rounded-xl space-y-3 animate-in fade-in zoom-in-95 duration-300">
|
| 94 |
+
<h4 className="font-semibold text-primary flex items-center gap-2">
|
| 95 |
+
<Key className="w-4 h-4" /> Save your new API key
|
| 96 |
+
</h4>
|
| 97 |
+
<p className="text-sm text-muted-foreground">
|
| 98 |
+
Please copy this key and store it somewhere safe. For security reasons, you will <strong>never</strong> be able to view it again.
|
| 99 |
+
</p>
|
| 100 |
+
<div className="flex items-center gap-2 mt-2">
|
| 101 |
+
<code className="flex-1 bg-background/80 border border-border/50 px-4 py-2.5 rounded-lg text-sm font-mono break-all text-foreground shadow-inner">
|
| 102 |
+
{newKey}
|
| 103 |
+
</code>
|
| 104 |
+
<Button onClick={copyToClipboard} variant={copied ? "default" : "secondary"} className="shrink-0 shadow-sm">
|
| 105 |
+
{copied ? <Check className="w-4 h-4 mr-2" /> : <Copy className="w-4 h-4 mr-2" />}
|
| 106 |
+
{copied ? "Copied!" : "Copy"}
|
| 107 |
+
</Button>
|
| 108 |
+
</div>
|
| 109 |
+
</div>
|
| 110 |
+
)}
|
| 111 |
+
|
| 112 |
+
<div className="space-y-4 mt-6">
|
| 113 |
+
<div className="flex items-center justify-between">
|
| 114 |
+
<h3 className="text-sm font-medium text-foreground/80 uppercase tracking-wider">Active Keys</h3>
|
| 115 |
+
<Button onClick={generateKey} disabled={loading} size="sm" className="rounded-full shadow-sm hover:shadow-md transition-shadow">
|
| 116 |
+
<Plus className="w-4 h-4 mr-1.5" />
|
| 117 |
+
Generate New Key
|
| 118 |
+
</Button>
|
| 119 |
+
</div>
|
| 120 |
+
|
| 121 |
+
<div className="rounded-xl border border-border/50 bg-card overflow-hidden shadow-sm">
|
| 122 |
+
{keys.length === 0 ? (
|
| 123 |
+
<div className="p-8 text-center text-sm text-muted-foreground bg-muted/20">
|
| 124 |
+
<Key className="w-8 h-8 mx-auto mb-3 opacity-20" />
|
| 125 |
+
You don't have any API keys yet.
|
| 126 |
+
</div>
|
| 127 |
+
) : (
|
| 128 |
+
<div className="divide-y divide-border/50">
|
| 129 |
+
{keys.map((key) => (
|
| 130 |
+
<div key={key.id} className="flex items-center justify-between p-4 hover:bg-muted/30 transition-colors group">
|
| 131 |
+
<div className="space-y-1">
|
| 132 |
+
<div className="font-mono text-sm font-medium tracking-tight">
|
| 133 |
+
{key.key_prefix}••••••••••••••••••••••
|
| 134 |
+
</div>
|
| 135 |
+
<div className="text-xs text-muted-foreground flex gap-4">
|
| 136 |
+
<span>Created: {new Date(key.created_at).toLocaleDateString()}</span>
|
| 137 |
+
<span>Last used: {key.last_used ? new Date(key.last_used).toLocaleDateString() : "Never"}</span>
|
| 138 |
+
</div>
|
| 139 |
+
</div>
|
| 140 |
+
<Button
|
| 141 |
+
variant="ghost"
|
| 142 |
+
size="icon"
|
| 143 |
+
onClick={() => revokeKey(key.id)}
|
| 144 |
+
className="text-destructive/70 hover:text-destructive hover:bg-destructive/10 opacity-0 group-hover:opacity-100 transition-all"
|
| 145 |
+
title="Revoke key"
|
| 146 |
+
>
|
| 147 |
+
<Trash2 className="w-4 h-4" />
|
| 148 |
+
</Button>
|
| 149 |
+
</div>
|
| 150 |
+
))}
|
| 151 |
+
</div>
|
| 152 |
+
)}
|
| 153 |
+
</div>
|
| 154 |
+
</div>
|
| 155 |
+
</DialogContent>
|
| 156 |
+
</Dialog>
|
| 157 |
+
);
|
| 158 |
+
}
|
frontend/src/components/chat/SourceCard.tsx
CHANGED
|
@@ -4,7 +4,14 @@ import { useState } from "react";
|
|
| 4 |
import type { SourceChunk } from "@/store/chat-store";
|
| 5 |
import { Badge } from "@/components/ui/badge";
|
| 6 |
import { Button } from "@/components/ui/button";
|
| 7 |
-
import {
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 8 |
|
| 9 |
interface Props {
|
| 10 |
sources: SourceChunk[];
|
|
@@ -13,89 +20,125 @@ interface Props {
|
|
| 13 |
|
| 14 |
export default function SourceCard({ sources = [], onPageClick }: Props) {
|
| 15 |
const [expanded, setExpanded] = useState(false);
|
|
|
|
| 16 |
|
| 17 |
if (sources.length === 0) return null;
|
| 18 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 19 |
return (
|
| 20 |
<div className="rounded-lg border border-border/50 bg-card/50 overflow-hidden">
|
| 21 |
-
|
| 22 |
-
|
| 23 |
-
|
| 24 |
-
|
| 25 |
-
|
| 26 |
-
|
| 27 |
-
|
| 28 |
-
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
|
| 32 |
-
|
| 33 |
-
|
| 34 |
-
|
| 35 |
-
|
| 36 |
|
| 37 |
-
|
| 38 |
-
|
| 39 |
-
|
| 40 |
-
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
variant="secondary"
|
| 44 |
-
className="text-[10px] h-5 cursor-pointer hover:bg-primary/20 transition-colors"
|
| 45 |
-
onClick={() => onPageClick(src.page + 1)}
|
| 46 |
-
>
|
| 47 |
-
p.{src.page + 1} • {src.confidence}%
|
| 48 |
-
</Badge>
|
| 49 |
-
))}
|
| 50 |
-
</div>
|
| 51 |
-
)}
|
| 52 |
-
|
| 53 |
-
{/* ── Expanded: Full source cards ─────────────── */}
|
| 54 |
-
{expanded && (
|
| 55 |
-
<div className="border-t border-border/30">
|
| 56 |
-
{sources.map((src, i) => (
|
| 57 |
-
<div
|
| 58 |
-
key={i}
|
| 59 |
-
className="px-3 py-2.5 border-b border-border/20 last:border-b-0 hover:bg-accent/20 transition-colors"
|
| 60 |
-
>
|
| 61 |
-
<div className="flex items-center justify-between mb-1.5">
|
| 62 |
-
<div className="flex items-center gap-2">
|
| 63 |
-
<span className="text-[10px] font-medium text-muted-foreground">
|
| 64 |
-
{src.filename}
|
| 65 |
-
</span>
|
| 66 |
-
<Badge variant="outline" className="text-[9px] h-4 px-1.5">
|
| 67 |
-
Page {src.page + 1}
|
| 68 |
-
</Badge>
|
| 69 |
<Badge
|
| 70 |
variant="secondary"
|
| 71 |
-
className=
|
| 72 |
-
|
| 73 |
-
? "text-emerald-400 bg-emerald-400/10"
|
| 74 |
-
: src.confidence >= 50
|
| 75 |
-
? "text-yellow-400 bg-yellow-400/10"
|
| 76 |
-
: "text-muted-foreground"
|
| 77 |
-
}`}
|
| 78 |
>
|
| 79 |
-
{src.confidence}%
|
| 80 |
</Badge>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 81 |
</div>
|
| 82 |
-
<
|
| 83 |
-
|
| 84 |
-
|
| 85 |
-
|
| 86 |
-
onClick={() => onPageClick(src.page + 1)}
|
| 87 |
>
|
| 88 |
-
|
| 89 |
-
|
| 90 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 91 |
</div>
|
| 92 |
-
|
| 93 |
-
|
| 94 |
-
|
| 95 |
-
</div>
|
| 96 |
-
))}
|
| 97 |
-
</div>
|
| 98 |
-
)}
|
| 99 |
</div>
|
| 100 |
);
|
| 101 |
}
|
|
|
|
| 4 |
import type { SourceChunk } from "@/store/chat-store";
|
| 5 |
import { Badge } from "@/components/ui/badge";
|
| 6 |
import { Button } from "@/components/ui/button";
|
| 7 |
+
import {
|
| 8 |
+
Tooltip,
|
| 9 |
+
TooltipContent,
|
| 10 |
+
TooltipTrigger,
|
| 11 |
+
} from "@/components/ui/tooltip";
|
| 12 |
+
import { ChevronDown, ChevronUp, FileText, Eye, TextQuote } from "lucide-react";
|
| 13 |
+
|
| 14 |
+
const EXCERPT_THRESHOLD = 200;
|
| 15 |
|
| 16 |
interface Props {
|
| 17 |
sources: SourceChunk[];
|
|
|
|
| 20 |
|
| 21 |
export default function SourceCard({ sources = [], onPageClick }: Props) {
|
| 22 |
const [expanded, setExpanded] = useState(false);
|
| 23 |
+
const [excerptOpen, setExcerptOpen] = useState<Set<number>>(new Set());
|
| 24 |
|
| 25 |
if (sources.length === 0) return null;
|
| 26 |
|
| 27 |
+
const toggleExcerpt = (i: number) => {
|
| 28 |
+
const next = new Set(excerptOpen);
|
| 29 |
+
if (next.has(i)) {
|
| 30 |
+
next.delete(i);
|
| 31 |
+
} else {
|
| 32 |
+
next.add(i);
|
| 33 |
+
}
|
| 34 |
+
setExcerptOpen(next);
|
| 35 |
+
};
|
| 36 |
+
|
| 37 |
return (
|
| 38 |
<div className="rounded-lg border border-border/50 bg-card/50 overflow-hidden">
|
| 39 |
+
{/* ── Header ──────────────────────────────────── */}
|
| 40 |
+
<button
|
| 41 |
+
onClick={() => setExpanded(!expanded)}
|
| 42 |
+
className="w-full flex items-center justify-between px-3 py-2 text-xs hover:bg-accent/30 transition-colors"
|
| 43 |
+
>
|
| 44 |
+
<span className="flex items-center gap-1.5 text-muted-foreground">
|
| 45 |
+
<FileText className="w-3.5 h-3.5" />
|
| 46 |
+
{sources.length} source{sources.length > 1 ? "s" : ""} cited
|
| 47 |
+
</span>
|
| 48 |
+
{expanded ? (
|
| 49 |
+
<ChevronUp className="w-3.5 h-3.5 text-muted-foreground" />
|
| 50 |
+
) : (
|
| 51 |
+
<ChevronDown className="w-3.5 h-3.5 text-muted-foreground" />
|
| 52 |
+
)}
|
| 53 |
+
</button>
|
| 54 |
|
| 55 |
+
{/* ── Collapsed: Mini badges with hover preview ── */}
|
| 56 |
+
{!expanded && (
|
| 57 |
+
<div className="px-3 pb-2 flex flex-wrap gap-1">
|
| 58 |
+
{sources.map((src, i) => (
|
| 59 |
+
<Tooltip key={i}>
|
| 60 |
+
<TooltipTrigger className="inline-flex">
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 61 |
<Badge
|
| 62 |
variant="secondary"
|
| 63 |
+
className="text-[10px] h-5 cursor-pointer hover:bg-primary/20 transition-colors"
|
| 64 |
+
onClick={() => onPageClick(src.page + 1)}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 65 |
>
|
| 66 |
+
p.{src.page + 1} • {src.confidence}%
|
| 67 |
</Badge>
|
| 68 |
+
</TooltipTrigger>
|
| 69 |
+
<TooltipContent
|
| 70 |
+
side="top"
|
| 71 |
+
align="center"
|
| 72 |
+
className="max-w-xs p-2"
|
| 73 |
+
>
|
| 74 |
+
<p className="text-[11px] leading-relaxed line-clamp-6">
|
| 75 |
+
{src.text}
|
| 76 |
+
</p>
|
| 77 |
+
</TooltipContent>
|
| 78 |
+
</Tooltip>
|
| 79 |
+
))}
|
| 80 |
+
</div>
|
| 81 |
+
)}
|
| 82 |
+
|
| 83 |
+
{/* ── Expanded: Full source cards ─────────────── */}
|
| 84 |
+
{expanded && (
|
| 85 |
+
<div className="border-t border-border/30">
|
| 86 |
+
{sources.map((src, i) => (
|
| 87 |
+
<div
|
| 88 |
+
key={i}
|
| 89 |
+
className="px-3 py-2.5 border-b border-border/20 last:border-b-0 hover:bg-accent/20 transition-colors"
|
| 90 |
+
>
|
| 91 |
+
<div className="flex items-center justify-between mb-1.5">
|
| 92 |
+
<div className="flex items-center gap-2">
|
| 93 |
+
<span className="text-[10px] font-medium text-muted-foreground">
|
| 94 |
+
{src.filename}
|
| 95 |
+
</span>
|
| 96 |
+
<Badge variant="outline" className="text-[9px] h-4 px-1.5">
|
| 97 |
+
Page {src.page + 1}
|
| 98 |
+
</Badge>
|
| 99 |
+
<Badge
|
| 100 |
+
variant="secondary"
|
| 101 |
+
className={`text-[9px] h-4 px-1.5 ${
|
| 102 |
+
src.confidence >= 80
|
| 103 |
+
? "text-emerald-400 bg-emerald-400/10"
|
| 104 |
+
: src.confidence >= 50
|
| 105 |
+
? "text-yellow-400 bg-yellow-400/10"
|
| 106 |
+
: "text-muted-foreground"
|
| 107 |
+
}`}
|
| 108 |
+
>
|
| 109 |
+
{src.confidence}% match
|
| 110 |
+
</Badge>
|
| 111 |
+
</div>
|
| 112 |
+
<Button
|
| 113 |
+
variant="ghost"
|
| 114 |
+
size="sm"
|
| 115 |
+
className="h-6 px-2 text-[10px]"
|
| 116 |
+
onClick={() => onPageClick(src.page + 1)}
|
| 117 |
+
>
|
| 118 |
+
<Eye className="w-3 h-3 mr-1" />
|
| 119 |
+
View
|
| 120 |
+
</Button>
|
| 121 |
</div>
|
| 122 |
+
<p
|
| 123 |
+
className={`text-[11px] text-muted-foreground leading-relaxed ${
|
| 124 |
+
excerptOpen.has(i) ? "" : "line-clamp-3"
|
| 125 |
+
}`}
|
|
|
|
| 126 |
>
|
| 127 |
+
{src.text}
|
| 128 |
+
</p>
|
| 129 |
+
{src.text.length > EXCERPT_THRESHOLD && (
|
| 130 |
+
<button
|
| 131 |
+
onClick={() => toggleExcerpt(i)}
|
| 132 |
+
className="mt-1.5 flex items-center gap-1 text-[10px] text-primary/70 hover:text-primary transition-colors"
|
| 133 |
+
>
|
| 134 |
+
<TextQuote className="w-3 h-3" />
|
| 135 |
+
{excerptOpen.has(i) ? "Hide excerpt" : "Show excerpt"}
|
| 136 |
+
</button>
|
| 137 |
+
)}
|
| 138 |
</div>
|
| 139 |
+
))}
|
| 140 |
+
</div>
|
| 141 |
+
)}
|
|
|
|
|
|
|
|
|
|
|
|
|
| 142 |
</div>
|
| 143 |
);
|
| 144 |
}
|
frontend/src/components/layout/Header.tsx
CHANGED
|
@@ -22,7 +22,10 @@ import {
|
|
| 22 |
Moon,
|
| 23 |
Sun,
|
| 24 |
} from "lucide-react";
|
| 25 |
-
import {
|
|
|
|
|
|
|
|
|
|
| 26 |
|
| 27 |
interface HeaderProps {
|
| 28 |
sidebarOpen: boolean;
|
|
@@ -31,23 +34,19 @@ interface HeaderProps {
|
|
| 31 |
onToggleViewer: () => void;
|
| 32 |
}
|
| 33 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 34 |
export default function Header({ sidebarOpen, onToggleSidebar, viewerOpen, onToggleViewer }: HeaderProps) {
|
| 35 |
const { user, logout } = useAuth();
|
| 36 |
const { t, i18n } = useTranslation();
|
| 37 |
const router = useRouter();
|
| 38 |
-
const
|
|
|
|
| 39 |
|
| 40 |
-
const
|
| 41 |
-
|
| 42 |
-
if (isDark) {
|
| 43 |
-
html.classList.remove("dark");
|
| 44 |
-
html.classList.add("light");
|
| 45 |
-
} else {
|
| 46 |
-
html.classList.remove("light");
|
| 47 |
-
html.classList.add("dark");
|
| 48 |
-
}
|
| 49 |
-
setIsDark(!isDark);
|
| 50 |
-
};
|
| 51 |
|
| 52 |
const handleLogout = () => {
|
| 53 |
logout();
|
|
@@ -89,9 +88,11 @@ export default function Header({ sidebarOpen, onToggleSidebar, viewerOpen, onTog
|
|
| 89 |
{viewerOpen ? <PanelRightClose className="w-4 h-4" /> : <PanelRightOpen className="w-4 h-4" />}
|
| 90 |
</Button>
|
| 91 |
|
| 92 |
-
|
| 93 |
-
|
| 94 |
-
|
|
|
|
|
|
|
| 95 |
|
| 96 |
<select
|
| 97 |
aria-label={t("common.language")}
|
|
@@ -106,20 +107,27 @@ export default function Header({ sidebarOpen, onToggleSidebar, viewerOpen, onTog
|
|
| 106 |
</select>
|
| 107 |
|
| 108 |
<DropdownMenu>
|
| 109 |
-
<DropdownMenuTrigger
|
| 110 |
-
|
| 111 |
-
<
|
| 112 |
-
|
| 113 |
-
|
| 114 |
-
|
| 115 |
-
|
| 116 |
-
|
| 117 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 118 |
<div className="px-3 py-2">
|
| 119 |
<p className="text-sm font-medium">{user?.username}</p>
|
| 120 |
<p className="text-xs text-muted-foreground truncate">{user?.email}</p>
|
| 121 |
</div>
|
| 122 |
<DropdownMenuSeparator />
|
|
|
|
|
|
|
| 123 |
<DropdownMenuItem className="text-destructive cursor-pointer" onClick={handleLogout}>
|
| 124 |
<LogOut className="w-4 h-4 mr-2" />
|
| 125 |
{t("header.signOut")}
|
|
@@ -129,4 +137,4 @@ export default function Header({ sidebarOpen, onToggleSidebar, viewerOpen, onTog
|
|
| 129 |
</div>
|
| 130 |
</header>
|
| 131 |
);
|
| 132 |
-
}
|
|
|
|
| 22 |
Moon,
|
| 23 |
Sun,
|
| 24 |
} from "lucide-react";
|
| 25 |
+
import { useSyncExternalStore } from "react";
|
| 26 |
+
import { useTheme } from "next-themes";
|
| 27 |
+
import ApiKeyManager from "@/components/auth/ApiKeyManager";
|
| 28 |
+
|
| 29 |
|
| 30 |
interface HeaderProps {
|
| 31 |
sidebarOpen: boolean;
|
|
|
|
| 34 |
onToggleViewer: () => void;
|
| 35 |
}
|
| 36 |
|
| 37 |
+
const subscribe = () => () => {};
|
| 38 |
+
const getSnapshot = () => true;
|
| 39 |
+
const getServerSnapshot = () => false;
|
| 40 |
+
|
| 41 |
export default function Header({ sidebarOpen, onToggleSidebar, viewerOpen, onToggleViewer }: HeaderProps) {
|
| 42 |
const { user, logout } = useAuth();
|
| 43 |
const { t, i18n } = useTranslation();
|
| 44 |
const router = useRouter();
|
| 45 |
+
const { theme, setTheme } = useTheme();
|
| 46 |
+
const mounted = useSyncExternalStore(subscribe, getSnapshot, getServerSnapshot); // ← replaces useState + useEffect
|
| 47 |
|
| 48 |
+
const isDark = theme === "dark";
|
| 49 |
+
const toggleTheme = () => setTheme(isDark ? "light" : "dark");
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 50 |
|
| 51 |
const handleLogout = () => {
|
| 52 |
logout();
|
|
|
|
| 88 |
{viewerOpen ? <PanelRightClose className="w-4 h-4" /> : <PanelRightOpen className="w-4 h-4" />}
|
| 89 |
</Button>
|
| 90 |
|
| 91 |
+
{mounted && (
|
| 92 |
+
<Button variant="ghost" size="icon" className="h-8 w-8" onClick={toggleTheme} title={isDark ? t("header.lightMode") : t("header.darkMode")}>
|
| 93 |
+
{isDark ? <Sun className="w-4 h-4" /> : <Moon className="w-4 h-4" />}
|
| 94 |
+
</Button>
|
| 95 |
+
)}
|
| 96 |
|
| 97 |
<select
|
| 98 |
aria-label={t("common.language")}
|
|
|
|
| 107 |
</select>
|
| 108 |
|
| 109 |
<DropdownMenu>
|
| 110 |
+
<DropdownMenuTrigger
|
| 111 |
+
render={
|
| 112 |
+
<button className="flex items-center h-8 gap-2 px-2 rounded-md hover:bg-accent transition-colors cursor-pointer">
|
| 113 |
+
<Avatar className="w-6 h-6">
|
| 114 |
+
<AvatarFallback className="text-[10px] bg-primary/20 text-primary">
|
| 115 |
+
{user?.username?.slice(0, 2).toUpperCase() || "U"}
|
| 116 |
+
</AvatarFallback>
|
| 117 |
+
</Avatar>
|
| 118 |
+
<span className="text-sm hidden sm:inline">{user?.username}</span>
|
| 119 |
+
</button>
|
| 120 |
+
}
|
| 121 |
+
/>
|
| 122 |
+
|
| 123 |
+
<DropdownMenuContent align="end" className="w-56">
|
| 124 |
<div className="px-3 py-2">
|
| 125 |
<p className="text-sm font-medium">{user?.username}</p>
|
| 126 |
<p className="text-xs text-muted-foreground truncate">{user?.email}</p>
|
| 127 |
</div>
|
| 128 |
<DropdownMenuSeparator />
|
| 129 |
+
<ApiKeyManager />
|
| 130 |
+
<DropdownMenuSeparator />
|
| 131 |
<DropdownMenuItem className="text-destructive cursor-pointer" onClick={handleLogout}>
|
| 132 |
<LogOut className="w-4 h-4 mr-2" />
|
| 133 |
{t("header.signOut")}
|
|
|
|
| 137 |
</div>
|
| 138 |
</header>
|
| 139 |
);
|
| 140 |
+
}
|
frontend/src/components/layout/ThemeProvider.tsx
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"use client";
|
| 2 |
+
|
| 3 |
+
import { ThemeProvider as NextThemesProvider } from "next-themes";
|
| 4 |
+
import { type ThemeProviderProps } from "next-themes";
|
| 5 |
+
|
| 6 |
+
export function ThemeProvider({ children, ...props }: ThemeProviderProps) {
|
| 7 |
+
return <NextThemesProvider {...props}>{children}</NextThemesProvider>;
|
| 8 |
+
}
|
frontend/src/components/layout/ThemeToggle.tsx
ADDED
|
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"use client";
|
| 2 |
+
|
| 3 |
+
import { useTheme } from "next-themes";
|
| 4 |
+
import { useSyncExternalStore } from "react";
|
| 5 |
+
import { Sun, Moon } from "lucide-react";
|
| 6 |
+
|
| 7 |
+
// useSyncExternalStore with identical server/client snapshots = no hydration mismatch
|
| 8 |
+
const subscribe = () => () => {};
|
| 9 |
+
const getSnapshot = () => true;
|
| 10 |
+
const getServerSnapshot = () => false;
|
| 11 |
+
|
| 12 |
+
export function ThemeToggle() {
|
| 13 |
+
const { theme, setTheme } = useTheme();
|
| 14 |
+
const mounted = useSyncExternalStore(subscribe, getSnapshot, getServerSnapshot);
|
| 15 |
+
|
| 16 |
+
if (!mounted) return null;
|
| 17 |
+
|
| 18 |
+
return (
|
| 19 |
+
<button
|
| 20 |
+
onClick={() => setTheme(theme === "dark" ? "light" : "dark")}
|
| 21 |
+
aria-label="Toggle theme"
|
| 22 |
+
className="rounded-md p-2 transition-colors hover:bg-gray-100 dark:hover:bg-gray-800"
|
| 23 |
+
>
|
| 24 |
+
{theme === "dark" ? (
|
| 25 |
+
<Sun className="h-5 w-5 text-yellow-400" />
|
| 26 |
+
) : (
|
| 27 |
+
<Moon className="h-5 w-5 text-gray-700" />
|
| 28 |
+
)}
|
| 29 |
+
</button>
|
| 30 |
+
);
|
| 31 |
+
}
|