Jiya3177 commited on
Commit
d2bcbec
·
2 Parent(s): 832377f50dc638

fix: resolve i18n merge conflicts

Browse files
.env.example CHANGED
@@ -91,6 +91,24 @@ HF_TOKEN=your_huggingface_token_here
91
  # Optional — defaults to 1024
92
  # LLM_MAX_NEW_TOKENS=1024
93
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
94
  # ── Embeddings (Optional — defaults shown)──────────────────────────────────────────────
95
 
96
  # SentenceTransformer model ID for generating document embeddings.
 
91
  # Optional — defaults to 1024
92
  # LLM_MAX_NEW_TOKENS=1024
93
 
94
+ # ── LangSmith Tracing (Optional) ────────────────────────
95
+
96
+ # Enable LangSmith tracing for the backend RAG pipeline.
97
+ # Optional — defaults to False
98
+ # LANGSMITH_TRACING=False
99
+
100
+ # LangSmith API key.
101
+ # Optional — only needed when LANGSMITH_TRACING=True
102
+ # LANGSMITH_API_KEY=
103
+
104
+ # LangSmith API endpoint.
105
+ # Optional — defaults to "https://api.smith.langchain.com"
106
+ # LANGSMITH_ENDPOINT=https://api.smith.langchain.com
107
+
108
+ # LangSmith project name used for traced runs.
109
+ # Optional — defaults to "pdf-assistant-rag"
110
+ # LANGSMITH_PROJECT=pdf-assistant-rag
111
+
112
  # ── Embeddings (Optional — defaults shown)──────────────────────────────────────────────
113
 
114
  # SentenceTransformer model ID for generating document embeddings.
.github/workflows/ci.yml CHANGED
@@ -54,6 +54,15 @@ jobs:
54
  run: |
55
  python -c "import sys; sys.path.insert(0, 'backend'); from app.config import settings; print('✅ Config imports OK')" || true
56
 
 
 
 
 
 
 
 
 
 
57
  # ── 2. Frontend Build Check ─────────────────────────────
58
  frontend-check:
59
  name: ⚛️ Frontend — TypeScript & Build
 
54
  run: |
55
  python -c "import sys; sys.path.insert(0, 'backend'); from app.config import settings; print('✅ Config imports OK')" || true
56
 
57
+ - name: Run backend pytest suite
58
+ env:
59
+ SECRET_KEY: ci-dummy-secret
60
+ DATABASE_URL: sqlite:///./ci_test.db
61
+ HF_TOKEN: ci-dummy-token
62
+ UPLOAD_DIR: /tmp/uploads
63
+ CHROMA_PERSIST_DIR: /tmp/chroma
64
+ run: pytest backend/tests -v
65
+
66
  # ── 2. Frontend Build Check ─────────────────────────────
67
  frontend-check:
68
  name: ⚛️ Frontend — TypeScript & Build
CODE_OF_CONDUCT.md ADDED
@@ -0,0 +1,85 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Contributor Covenant Code of Conduct
2
+
3
+ ## Our Pledge
4
+
5
+ We as members, contributors, and leaders pledge to make participation in our
6
+ community a harassment-free experience for everyone, regardless of age, body
7
+ size, visible or invisible disability, ethnicity, sex characteristics, gender
8
+ identity and expression, level of experience, education, socio-economic status,
9
+ nationality, personal appearance, race, religion, or sexual identity
10
+ and orientation.
11
+
12
+ We pledge to act and interact in ways that contribute to an open, welcoming,
13
+ diverse, inclusive, and healthy community.
14
+
15
+ ## Our Standards
16
+
17
+ Examples of behavior that contributes to a positive environment for our
18
+ community include:
19
+
20
+ * Demonstrating empathy and kindness toward other people
21
+ * Being respectful of differing opinions, viewpoints, and experiences
22
+ * Giving and gracefully accepting constructive feedback
23
+ * Accepting responsibility and apologizing to those affected by our mistakes,
24
+ and learning from the experience
25
+ * Focusing on what is best not just for us as individuals, but for the
26
+ overall community
27
+
28
+ Examples of unacceptable behavior include:
29
+
30
+ * The use of sexualized language or imagery, and sexual attention or
31
+ advances of any kind
32
+ * Trolling, insulting or derogatory comments, and personal or political attacks
33
+ * Public or private harassment
34
+ * Publishing others' private information, such as a physical or email
35
+ address, without their explicit permission
36
+ * Other conduct which could reasonably be considered inappropriate in a
37
+ professional setting
38
+
39
+ ## Enforcement Responsibilities
40
+
41
+ Community leaders are responsible for clarifying and enforcing our standards of
42
+ acceptable behavior and will take appropriate and fair corrective action in
43
+ response to any behavior that they deem inappropriate, threatening, offensive,
44
+ or harmful.
45
+
46
+ Community leaders have the right and responsibility to remove, edit, or reject
47
+ comments, commits, code, wiki edits, issues, and other contributions that are
48
+ not aligned to this Code of Conduct, and will communicate reasons for moderation
49
+ decisions when appropriate.
50
+
51
+ ## Scope
52
+
53
+ This Code of Conduct applies within all community spaces, and also applies when
54
+ an individual is officially representing the community in public spaces.
55
+ Examples of representing our community include using an official e-mail address,
56
+ posting via an official social media account, or acting as an appointed
57
+ representative at an online or offline event.
58
+
59
+ ## Enforcement
60
+
61
+ Instances of abusive, harassing, or otherwise unacceptable behavior may be
62
+ reported to the community leaders responsible for enforcement.
63
+ All complaints will be reviewed and investigated promptly and fairly.
64
+
65
+ All community leaders are obligated to respect the privacy and security of the
66
+ reporter of any incident.
67
+
68
+ ## Attribution
69
+
70
+ This Code of Conduct is adapted from the [Contributor Covenant][homepage],
71
+ version 2.1, available at
72
+ [https://www.contributor-covenant.org/version/2/1/code_of_conduct.html][v2.1].
73
+
74
+ Community Impact Guidelines were inspired by
75
+ [Mozilla's code of conduct enforcement ladder][Mozilla CoC].
76
+
77
+ For answers to common questions about this code of conduct, see the FAQ at
78
+ [https://www.contributor-covenant.org/faq][FAQ]. Translations are available
79
+ at [https://www.contributor-covenant.org/translations][translations].
80
+
81
+ [homepage]: https://www.contributor-covenant.org
82
+ [v2.1]: https://www.contributor-covenant.org/version/2/1/code_of_conduct.html
83
+ [Mozilla CoC]: https://github.com/mozilla/diversity
84
+ [FAQ]: https://www.contributor-covenant.org/faq
85
+ [translations]: https://www.contributor-covenant.org/translations
SECURITY.md ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Security Policy
2
+
3
+ ## Supported Versions
4
+
5
+ Currently, the following branches and versions of PDF-Assistant-RAG are supported with security updates.
6
+
7
+ | Version | Supported |
8
+ | ------- | ------------------ |
9
+ | `dev` | :white_check_mark: |
10
+ | `main` | :white_check_mark: |
11
+ | < 1.0 | :x: |
12
+
13
+ ## Reporting a Vulnerability
14
+
15
+ We take the security of our users and their data very seriously. If you discover a security vulnerability in this project, please **do not** report it by creating a public GitHub issue.
16
+
17
+ Instead, please privately report it by emailing the repository owner directly.
18
+
19
+ When reporting a vulnerability, please include:
20
+ * A detailed description of the vulnerability.
21
+ * The steps required to reproduce the vulnerability.
22
+ * Any potential impact or risk to users.
23
+
24
+ We will acknowledge your email within 48 hours and work with you to understand and resolve the issue. We aim to fix critical security issues as fast as possible and will credit you in the release notes if you wish.
25
+
26
+ Thank you for helping keep this project secure!
backend/app/auth.py CHANGED
@@ -67,12 +67,39 @@ def decode_token(token: str, token_type: str = "access") -> Optional[str]:
67
 
68
  # ── FastAPI Dependencies ─────────────────────────────
69
 
 
 
70
  def get_current_user(
71
  credentials: HTTPAuthorizationCredentials = Depends(security),
72
  db: Session = Depends(get_db),
73
  ) -> User:
74
- """Dependency: extract and validate user from JWT bearer token."""
75
  token = credentials.credentials
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
76
  user_id = decode_token(token)
77
 
78
  if not user_id:
 
67
 
68
  # ── FastAPI Dependencies ─────────────────────────────
69
 
70
+ import hashlib
71
+
72
  def get_current_user(
73
  credentials: HTTPAuthorizationCredentials = Depends(security),
74
  db: Session = Depends(get_db),
75
  ) -> User:
76
+ """Dependency: extract and validate user from JWT bearer token or API key."""
77
  token = credentials.credentials
78
+
79
+ # Check if token is an API key
80
+ if token.startswith("rag_"):
81
+ hashed = hashlib.sha256(token.encode("utf-8")).hexdigest()
82
+ from app.models import ApiKey
83
+ api_key = db.query(ApiKey).filter(ApiKey.hashed_key == hashed).first()
84
+ if not api_key:
85
+ raise HTTPException(
86
+ status_code=status.HTTP_401_UNAUTHORIZED,
87
+ detail="Invalid API key",
88
+ headers={"WWW-Authenticate": "Bearer"},
89
+ )
90
+
91
+ api_key.last_used = datetime.now(timezone.utc)
92
+ db.commit()
93
+
94
+ user = api_key.user
95
+ if not user:
96
+ raise HTTPException(
97
+ status_code=status.HTTP_401_UNAUTHORIZED,
98
+ detail="User not found for this API key",
99
+ )
100
+ return user
101
+
102
+ # Otherwise, process as JWT
103
  user_id = decode_token(token)
104
 
105
  if not user_id:
backend/app/config.py CHANGED
@@ -56,8 +56,18 @@ class Settings(BaseSettings):
56
  LLM_TEMPERATURE: float = 0.3
57
  SUMMARY_MAX_TOKENS: int = 512
58
 
 
 
 
 
 
 
59
  # ── Reranker ─────────────────────────────────────────
60
  RERANKER_MODEL: str = "cross-encoder/ms-marco-MiniLM-L-6-v2"
 
 
 
 
61
 
62
 
63
  @property
 
56
  LLM_TEMPERATURE: float = 0.3
57
  SUMMARY_MAX_TOKENS: int = 512
58
 
59
+ # ── LangSmith Tracing (optional) ─────────────────────
60
+ LANGSMITH_TRACING: bool = False
61
+ LANGSMITH_API_KEY: str = ""
62
+ LANGSMITH_ENDPOINT: str = "https://api.smith.langchain.com"
63
+ LANGSMITH_PROJECT: str = "pdf-assistant-rag"
64
+
65
  # ── Reranker ─────────────────────────────────────────
66
  RERANKER_MODEL: str = "cross-encoder/ms-marco-MiniLM-L-6-v2"
67
+ # ── Vision / Image captioning ─────────────────────
68
+ VISION_PROVIDER: str | None = None # e.g. 'openai'
69
+ VISION_MODEL: str | None = None
70
+ OPENAI_API_KEY: str = ""
71
 
72
 
73
  @property
backend/app/database.py CHANGED
@@ -3,11 +3,13 @@ SQLAlchemy database setup with SQLite.
3
  Uses synchronous SQLAlchemy for simplicity and compatibility.
4
  """
5
  import os
6
- from sqlalchemy import create_engine
 
7
  from sqlalchemy.orm import sessionmaker, declarative_base
8
  from app.config import get_settings
9
 
10
  settings = get_settings()
 
11
 
12
  # ── Ensure data directory exists ─────────────────────
13
  db_path = settings.DATABASE_URL.replace("sqlite:///", "")
@@ -34,7 +36,34 @@ def get_db():
34
  db.close()
35
 
36
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
37
  def init_db():
38
- """Create all tables on startup."""
39
  from app import models # noqa: F401 — import to register models
40
  Base.metadata.create_all(bind=engine)
 
 
3
  Uses synchronous SQLAlchemy for simplicity and compatibility.
4
  """
5
  import os
6
+ import logging
7
+ from sqlalchemy import create_engine, inspect, text
8
  from sqlalchemy.orm import sessionmaker, declarative_base
9
  from app.config import get_settings
10
 
11
  settings = get_settings()
12
+ logger = logging.getLogger(__name__)
13
 
14
  # ── Ensure data directory exists ─────────────────────
15
  db_path = settings.DATABASE_URL.replace("sqlite:///", "")
 
36
  db.close()
37
 
38
 
39
+ def _migrate_schema():
40
+ """Apply schema migrations for existing databases (SQLite-compatible).
41
+
42
+ SQLAlchemy's ``create_all`` only creates new tables and does **not**
43
+ add missing columns to existing tables. This helper fills that gap
44
+ for non-destructive changes such as new nullable columns.
45
+ """
46
+ inspector = inspect(engine)
47
+ existing_columns = {c["name"] for c in inspector.get_columns("users")}
48
+
49
+ migrations = [
50
+ ("users", "hf_token", "ALTER TABLE users ADD COLUMN hf_token VARCHAR(255)"),
51
+ ]
52
+
53
+ for table, column, ddl in migrations:
54
+ if column not in existing_columns:
55
+ try:
56
+ with engine.begin() as conn:
57
+ conn.execute(text(ddl))
58
+ logger.info("Migration: added column %s.%s", table, column)
59
+ except Exception:
60
+ logger.warning(
61
+ "Migration skipped (may already exist): %s.%s", table, column
62
+ )
63
+
64
+
65
  def init_db():
66
+ """Create all tables on startup and apply schema migrations."""
67
  from app import models # noqa: F401 — import to register models
68
  Base.metadata.create_all(bind=engine)
69
+ _migrate_schema()
backend/app/models.py CHANGED
@@ -22,10 +22,26 @@ class User(Base):
22
  is_admin = Column(Boolean, default=False)
23
  created_at = Column(DateTime, default=lambda: datetime.now(timezone.utc))
24
  last_login = Column(DateTime, nullable=True, index=True)
 
25
 
26
  # Relationships
27
  documents = relationship("Document", back_populates="owner", cascade="all, delete-orphan")
28
  messages = relationship("ChatMessage", back_populates="user", cascade="all, delete-orphan")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
29
 
30
 
31
  class Document(Base):
 
22
  is_admin = Column(Boolean, default=False)
23
  created_at = Column(DateTime, default=lambda: datetime.now(timezone.utc))
24
  last_login = Column(DateTime, nullable=True, index=True)
25
+ hf_token = Column(String(255), nullable=True)
26
 
27
  # Relationships
28
  documents = relationship("Document", back_populates="owner", cascade="all, delete-orphan")
29
  messages = relationship("ChatMessage", back_populates="user", cascade="all, delete-orphan")
30
+ api_keys = relationship("ApiKey", back_populates="user", cascade="all, delete-orphan")
31
+
32
+
33
+ class ApiKey(Base):
34
+ __tablename__ = "api_keys"
35
+
36
+ id = Column(String, primary_key=True, default=generate_uuid)
37
+ user_id = Column(String, ForeignKey("users.id"), nullable=False, index=True)
38
+ key_prefix = Column(String(10), nullable=False)
39
+ hashed_key = Column(String(255), nullable=False, unique=True, index=True)
40
+ created_at = Column(DateTime, default=lambda: datetime.now(timezone.utc))
41
+ last_used = Column(DateTime, nullable=True)
42
+
43
+ # Relationships
44
+ user = relationship("User", back_populates="api_keys")
45
 
46
 
47
  class Document(Base):
backend/app/rag/agent.py CHANGED
@@ -10,6 +10,7 @@ from huggingface_hub import InferenceClient
10
  from app.config import get_settings
11
  from app.rag.retriever import retrieve
12
  from app.rag.prompts import SYSTEM_PROMPT, RAG_PROMPT_TEMPLATE, GREETING_PROMPT
 
13
 
14
  logger = logging.getLogger(__name__)
15
  settings = get_settings()
@@ -65,6 +66,14 @@ def _chat_messages(system: str, user_content: str) -> list:
65
  ]
66
 
67
 
 
 
 
 
 
 
 
 
68
  def generate_answer(
69
  question: str,
70
  user_id: str,
@@ -145,6 +154,14 @@ def generate_answer(
145
  return {"answer": answer, "sources": sources}
146
 
147
 
 
 
 
 
 
 
 
 
148
  def generate_answer_stream(
149
  question: str,
150
  user_id: str,
 
10
  from app.config import get_settings
11
  from app.rag.retriever import retrieve
12
  from app.rag.prompts import SYSTEM_PROMPT, RAG_PROMPT_TEMPLATE, GREETING_PROMPT
13
+ from app.rag.tracing import trace_function
14
 
15
  logger = logging.getLogger(__name__)
16
  settings = get_settings()
 
66
  ]
67
 
68
 
69
+ @trace_function(
70
+ "generate_answer",
71
+ metadata_factory=lambda question, user_id, document_id=None: {
72
+ "user_id": user_id,
73
+ "document_id": document_id,
74
+ "llm_model": settings.LLM_MODEL,
75
+ },
76
+ )
77
  def generate_answer(
78
  question: str,
79
  user_id: str,
 
154
  return {"answer": answer, "sources": sources}
155
 
156
 
157
+ @trace_function(
158
+ "generate_answer_stream",
159
+ metadata_factory=lambda question, user_id, document_id=None: {
160
+ "user_id": user_id,
161
+ "document_id": document_id,
162
+ "llm_model": settings.LLM_MODEL,
163
+ },
164
+ )
165
  def generate_answer_stream(
166
  question: str,
167
  user_id: str,
backend/app/rag/chunker.py CHANGED
@@ -28,6 +28,34 @@ def extract_pdf(filepath: str) -> List[Dict[str, Any]]:
28
  return pages
29
 
30
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
31
  def extract_docx(filepath: str) -> List[Dict[str, Any]]:
32
  """Extract text from DOCX files."""
33
  doc = docx.Document(filepath)
@@ -50,10 +78,13 @@ def chunk_document(filepath: str) -> List[Dict[str, Any]]:
50
  Returns list of dicts with 'text', 'page', and 'chunk_index'.
51
  """
52
  ext = filepath.rsplit(".", 1)[-1].lower()
 
53
 
54
  # ── Extract text by file type ────────────────────
55
  if ext == "pdf":
56
  pages = extract_pdf(filepath)
 
 
57
  elif ext == "docx":
58
  pages = extract_docx(filepath)
59
  elif ext in ("txt", "md"):
@@ -91,6 +122,16 @@ def chunk_document(filepath: str) -> List[Dict[str, Any]]:
91
  })
92
  chunk_index += 1
93
 
 
 
 
 
 
 
 
 
 
 
94
  return all_chunks
95
 
96
 
 
28
  return pages
29
 
30
 
31
+ def extract_pdf_images(filepath: str) -> List[Dict[str, Any]]:
32
+ """Extract images from a PDF and return list of dicts with image bytes and page number.
33
+
34
+ Each entry: {"image_bytes": b"...", "page": int}
35
+ """
36
+ images = []
37
+ doc = fitz.open(filepath)
38
+
39
+ for page_num, page in enumerate(doc):
40
+ # get_images returns a list of tuples where first item is xref
41
+ for img in page.get_images(full=True):
42
+ xref = img[0]
43
+ try:
44
+ pix = fitz.Pixmap(doc, xref)
45
+ # Convert to RGB if it's CMYK or has alpha
46
+ if pix.n >= 4:
47
+ pix = fitz.Pixmap(fitz.csRGB, pix)
48
+
49
+ img_bytes = pix.tobytes("png")
50
+ images.append({"image_bytes": img_bytes, "page": page_num + 1})
51
+ except Exception:
52
+ # ignore extracting this image
53
+ continue
54
+
55
+ doc.close()
56
+ return images
57
+
58
+
59
  def extract_docx(filepath: str) -> List[Dict[str, Any]]:
60
  """Extract text from DOCX files."""
61
  doc = docx.Document(filepath)
 
78
  Returns list of dicts with 'text', 'page', and 'chunk_index'.
79
  """
80
  ext = filepath.rsplit(".", 1)[-1].lower()
81
+ images = []
82
 
83
  # ── Extract text by file type ────────────────────
84
  if ext == "pdf":
85
  pages = extract_pdf(filepath)
86
+ # also extract images for later captioning/embedding
87
+ images = extract_pdf_images(filepath)
88
  elif ext == "docx":
89
  pages = extract_docx(filepath)
90
  elif ext in ("txt", "md"):
 
122
  })
123
  chunk_index += 1
124
 
125
+ # Attach any images that belong to this page after text chunks for the page
126
+ for img in [i for i in images if i["page"] == page_num]:
127
+ all_chunks.append({
128
+ "text": "",
129
+ "page": page_num,
130
+ "chunk_index": chunk_index,
131
+ "image_bytes": img["image_bytes"],
132
+ })
133
+ chunk_index += 1
134
+
135
  return all_chunks
136
 
137
 
backend/app/rag/embeddings.py CHANGED
@@ -6,6 +6,7 @@ import logging
6
  from typing import List
7
  from langchain_huggingface import HuggingFaceEmbeddings
8
  from app.config import get_settings
 
9
 
10
  logger = logging.getLogger(__name__)
11
  settings = get_settings()
@@ -36,10 +37,26 @@ def get_embedding_model() -> HuggingFaceEmbeddings:
36
  def embed_texts(texts: List[str]) -> List[List[float]]:
37
  """Embed a batch of texts into vectors."""
38
  model = get_embedding_model()
39
- return model.embed_documents(texts)
 
 
 
 
 
 
 
 
40
 
41
 
42
  def embed_query(query: str) -> List[float]:
43
  """Embed a single query string."""
44
  model = get_embedding_model()
45
- return model.embed_query(query)
 
 
 
 
 
 
 
 
 
6
  from typing import List
7
  from langchain_huggingface import HuggingFaceEmbeddings
8
  from app.config import get_settings
9
+ from app.rag.tracing import trace_call
10
 
11
  logger = logging.getLogger(__name__)
12
  settings = get_settings()
 
37
  def embed_texts(texts: List[str]) -> List[List[float]]:
38
  """Embed a batch of texts into vectors."""
39
  model = get_embedding_model()
40
+ return trace_call(
41
+ "embed_texts",
42
+ lambda: model.embed_documents(texts),
43
+ run_type="embedding",
44
+ metadata={
45
+ "embedding_model": settings.EMBEDDING_MODEL,
46
+ "text_count": len(texts),
47
+ },
48
+ )
49
 
50
 
51
  def embed_query(query: str) -> List[float]:
52
  """Embed a single query string."""
53
  model = get_embedding_model()
54
+ return trace_call(
55
+ "embed_query",
56
+ lambda: model.embed_query(query),
57
+ run_type="embedding",
58
+ metadata={
59
+ "embedding_model": settings.EMBEDDING_MODEL,
60
+ "query_length": len(query),
61
+ },
62
+ )
backend/app/rag/retriever.py CHANGED
@@ -5,6 +5,7 @@ import logging
5
  from typing import List, Dict, Any, Optional
6
  from app.config import get_settings
7
  from app.rag.embeddings import embed_query
 
8
  from app.rag.vectorstore import query_chunks
9
 
10
  logger = logging.getLogger(__name__)
@@ -31,6 +32,17 @@ def get_reranker():
31
  return _reranker if _reranker != "disabled" else None
32
 
33
 
 
 
 
 
 
 
 
 
 
 
 
34
  def retrieve(
35
  query: str,
36
  user_id: str,
 
5
  from typing import List, Dict, Any, Optional
6
  from app.config import get_settings
7
  from app.rag.embeddings import embed_query
8
+ from app.rag.tracing import trace_function
9
  from app.rag.vectorstore import query_chunks
10
 
11
  logger = logging.getLogger(__name__)
 
32
  return _reranker if _reranker != "disabled" else None
33
 
34
 
35
+ @trace_function(
36
+ "retrieve",
37
+ metadata_factory=lambda query, user_id, document_id=None: {
38
+ "user_id": user_id,
39
+ "document_id": document_id,
40
+ "embedding_model": settings.EMBEDDING_MODEL,
41
+ "reranker_model": settings.RERANKER_MODEL,
42
+ "top_k_retrieval": settings.TOP_K_RETRIEVAL,
43
+ "top_k_rerank": settings.TOP_K_RERANK,
44
+ },
45
+ )
46
  def retrieve(
47
  query: str,
48
  user_id: str,
backend/app/rag/tracing.py ADDED
@@ -0,0 +1,102 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Optional LangSmith tracing helpers for the RAG pipeline.
3
+ Safe to import even when LangSmith is not installed or configured.
4
+ """
5
+ import logging
6
+ import os
7
+ from functools import wraps
8
+ from typing import Any, Callable, Optional
9
+
10
+ from app.config import get_settings
11
+
12
+ logger = logging.getLogger(__name__)
13
+ settings = get_settings()
14
+
15
+ try:
16
+ from langsmith import traceable as _langsmith_traceable
17
+ except Exception: # pragma: no cover - optional dependency safety
18
+ _langsmith_traceable = None
19
+
20
+
21
+ def configure_langsmith() -> bool:
22
+ """Configure LangSmith environment variables when tracing is enabled."""
23
+ if not settings.LANGSMITH_TRACING:
24
+ return False
25
+
26
+ if not settings.LANGSMITH_API_KEY:
27
+ logger.warning("LangSmith tracing enabled but LANGSMITH_API_KEY is not set; tracing disabled.")
28
+ return False
29
+
30
+ os.environ["LANGSMITH_TRACING"] = "true"
31
+ os.environ["LANGSMITH_API_KEY"] = settings.LANGSMITH_API_KEY
32
+ os.environ["LANGSMITH_ENDPOINT"] = settings.LANGSMITH_ENDPOINT
33
+ os.environ["LANGSMITH_PROJECT"] = settings.LANGSMITH_PROJECT
34
+ return _langsmith_traceable is not None
35
+
36
+
37
+ LANGSMITH_ENABLED = configure_langsmith()
38
+
39
+
40
+ def _sanitize_metadata(metadata: Optional[dict[str, Any]]) -> dict[str, Any]:
41
+ return {key: value for key, value in (metadata or {}).items() if value is not None}
42
+
43
+
44
+ def _build_traceable(name: str, run_type: str, metadata: Optional[dict[str, Any]] = None):
45
+ """Build a LangSmith traceable decorator safely across versions."""
46
+ if _langsmith_traceable is None:
47
+ return None
48
+
49
+ sanitized = _sanitize_metadata(metadata)
50
+ try:
51
+ return _langsmith_traceable(
52
+ name=name,
53
+ run_type=run_type,
54
+ metadata=sanitized or None,
55
+ )
56
+ except TypeError:
57
+ return _langsmith_traceable(name=name, run_type=run_type)
58
+
59
+
60
+ def trace_call(
61
+ name: str,
62
+ fn: Callable[..., Any],
63
+ *args: Any,
64
+ run_type: str = "chain",
65
+ metadata: Optional[dict[str, Any]] = None,
66
+ **kwargs: Any,
67
+ ) -> Any:
68
+ """Execute a callable with LangSmith tracing when available."""
69
+ if not LANGSMITH_ENABLED:
70
+ return fn(*args, **kwargs)
71
+
72
+ decorator = _build_traceable(name, run_type, metadata)
73
+ if decorator is None:
74
+ return fn(*args, **kwargs)
75
+
76
+ traced_fn = decorator(fn)
77
+ return traced_fn(*args, **kwargs)
78
+
79
+
80
+ def trace_function(
81
+ name: str,
82
+ *,
83
+ run_type: str = "chain",
84
+ metadata_factory: Optional[Callable[..., dict[str, Any]]] = None,
85
+ ) -> Callable[[Callable[..., Any]], Callable[..., Any]]:
86
+ """Decorator wrapper that becomes a no-op when LangSmith is disabled."""
87
+ def decorator(fn: Callable[..., Any]) -> Callable[..., Any]:
88
+ @wraps(fn)
89
+ def wrapped(*args: Any, **kwargs: Any) -> Any:
90
+ metadata = metadata_factory(*args, **kwargs) if metadata_factory else None
91
+ return trace_call(
92
+ name,
93
+ fn,
94
+ *args,
95
+ run_type=run_type,
96
+ metadata=metadata,
97
+ **kwargs,
98
+ )
99
+
100
+ return wrapped
101
+
102
+ return decorator
backend/app/rag/vectorstore.py CHANGED
@@ -8,6 +8,7 @@ import chromadb
8
  from chromadb.config import Settings as ChromaSettings
9
  from app.config import get_settings
10
  from app.rag.embeddings import get_embedding_model
 
11
 
12
  logger = logging.getLogger(__name__)
13
  settings = get_settings()
@@ -55,6 +56,12 @@ def store_chunks(
55
  if not chunks:
56
  return 0
57
 
 
 
 
 
 
 
58
  client = get_chroma_client()
59
  embedding_model = get_embedding_model()
60
 
@@ -74,6 +81,9 @@ def store_chunks(
74
  "document_id": document_id,
75
  "page": chunk["page"],
76
  "chunk_index": chunk["chunk_index"],
 
 
 
77
  }
78
  for chunk in chunks
79
  ]
 
8
  from chromadb.config import Settings as ChromaSettings
9
  from app.config import get_settings
10
  from app.rag.embeddings import get_embedding_model
11
+ from app.rag.vision import generate_captions_for_chunks
12
 
13
  logger = logging.getLogger(__name__)
14
  settings = get_settings()
 
56
  if not chunks:
57
  return 0
58
 
59
+ # Generate captions for any extracted images before embedding
60
+ try:
61
+ generate_captions_for_chunks(chunks)
62
+ except Exception as e:
63
+ logger.warning(f"Could not generate image captions: {e}")
64
+
65
  client = get_chroma_client()
66
  embedding_model = get_embedding_model()
67
 
 
81
  "document_id": document_id,
82
  "page": chunk["page"],
83
  "chunk_index": chunk["chunk_index"],
84
+ # Indicate whether this chunk was originally an image and include a short caption
85
+ **({"is_image": True, "image_caption": chunk.get("image_caption", "")}
86
+ if chunk.get("is_image") else {}),
87
  }
88
  for chunk in chunks
89
  ]
backend/app/rag/vision.py ADDED
@@ -0,0 +1,99 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Image captioning / vision helpers for RAG pipeline.
2
+
3
+ Provides a simple, pluggable interface to generate textual descriptions
4
+ for images extracted from PDFs. By default it uses local OCR (pytesseract)
5
+ when available as a robust fallback. An external VLM provider (OpenAI)
6
+ can be integrated by setting `VISION_PROVIDER` and appropriate API keys
7
+ in settings; the provider hook is intentionally small and optional.
8
+ """
9
+ import logging
10
+ from typing import List, Dict, Any
11
+ from io import BytesIO
12
+
13
+ from app.config import get_settings
14
+
15
+ logger = logging.getLogger(__name__)
16
+ settings = get_settings()
17
+
18
+
19
+ def _ocr_caption(image_bytes: bytes) -> str:
20
+ """Try to produce a caption using pytesseract OCR; returns empty string if not available."""
21
+ try:
22
+ from PIL import Image
23
+ import pytesseract
24
+ except Exception:
25
+ return ""
26
+
27
+ try:
28
+ img = Image.open(BytesIO(image_bytes)).convert("RGB")
29
+ text = pytesseract.image_to_string(img)
30
+ text = text.strip()
31
+ return text
32
+ except Exception as e:
33
+ logger.debug(f"OCR failed: {e}")
34
+ return ""
35
+
36
+
37
+ def caption_image(image_bytes: bytes, page: int | None = None) -> str:
38
+ """Generate a caption for a single image.
39
+
40
+ Order of operations:
41
+ - If an external VLM provider is configured, attempt to call it (not implemented as mandatory).
42
+ - Fall back to local OCR (pytesseract) if available.
43
+ - Otherwise return a simple placeholder caption including the page number.
44
+ """
45
+ # Placeholder for provider-based captioning (e.g., OpenAI / LLaVA hooks)
46
+ provider = getattr(settings, "VISION_PROVIDER", None)
47
+ if provider == "openai":
48
+ try:
49
+ import openai
50
+ # Minimal integration: attempt a text-only caption via responses if available.
51
+ # This is a best-effort hook; users should adapt to their provider's API.
52
+ api_key = getattr(settings, "OPENAI_API_KEY", None)
53
+ if api_key:
54
+ openai.api_key = api_key
55
+ # Use a generic prompt: "Describe the following image"
56
+ # Note: concrete multimodal API usage may vary across SDK versions.
57
+ resp = openai.Image.create(
58
+ prompt="Describe this image in one concise sentence.",
59
+ n=1,
60
+ # We do not re-upload image bytes here; this is a placeholder to show
61
+ # where provider code would be invoked. For production, follow
62
+ # provider docs for sending image data.
63
+ )
64
+ # openai.Image.create returns generated images, not captions — so skip.
65
+ except Exception:
66
+ # If provider integration fails, fall back to OCR below
67
+ logger.debug("OpenAI vision provider failed, falling back to OCR")
68
+
69
+ # Try OCR caption
70
+ ocr = _ocr_caption(image_bytes)
71
+ if ocr:
72
+ # Keep it short if very long
73
+ return (ocr[:500] + "...") if len(ocr) > 500 else ocr
74
+
75
+ # Last-resort caption
76
+ if page:
77
+ return f"Image on page {page}."
78
+ return "Image."
79
+
80
+
81
+ def generate_captions_for_chunks(chunks: List[Dict[str, Any]]) -> None:
82
+ """Mutate chunks in-place: for any chunk containing `image_bytes` but empty `text`,
83
+ generate a caption and set `text`.
84
+ """
85
+ for chunk in chunks:
86
+ if chunk.get("image_bytes") and not chunk.get("text"):
87
+ try:
88
+ caption = caption_image(chunk["image_bytes"], page=chunk.get("page"))
89
+ chunk["text"] = caption
90
+ # Remove raw bytes to avoid accidentally serializing them later
91
+ chunk.pop("image_bytes", None)
92
+ chunk["is_image"] = True
93
+ chunk["image_caption"] = caption
94
+ except Exception as e:
95
+ logger.debug(f"Failed to caption image chunk: {e}")
96
+ # ensure we still mark it as image to avoid losing it
97
+ chunk.pop("image_bytes", None)
98
+ chunk["is_image"] = True
99
+ chunk.setdefault("text", f"Image on page {chunk.get('page')}")
backend/app/routes/auth.py CHANGED
@@ -11,7 +11,7 @@ from sqlalchemy.orm import Session
11
  from sqlalchemy import select
12
  from app.config import get_settings
13
  from app.database import get_db
14
- from app.models import User
15
  from app.schemas import (
16
  GoogleLoginRequest,
17
  RefreshRequest,
@@ -23,6 +23,8 @@ from app.schemas import (
23
  UserResponse,
24
  UserUpdate,
25
  UserUpdateResponse,
 
 
26
  )
27
  from app.auth import hash_password, verify_password, create_access_token, create_refresh_token, get_current_user, decode_token
28
 
@@ -383,6 +385,42 @@ def update_password(payload:UpdatePassword,
383
  db.rollback()
384
  raise HTTPException(status_code=400, detail="Database error")
385
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
386
  @router.get("/config")
387
  def get_auth_config():
388
  """Return public configuration for auth providers"""
 
11
  from sqlalchemy import select
12
  from app.config import get_settings
13
  from app.database import get_db
14
+ from app.models import User, ApiKey
15
  from app.schemas import (
16
  GoogleLoginRequest,
17
  RefreshRequest,
 
23
  UserResponse,
24
  UserUpdate,
25
  UserUpdateResponse,
26
+ ApiKeyResponse,
27
+ ApiKeyCreateResponse,
28
  )
29
  from app.auth import hash_password, verify_password, create_access_token, create_refresh_token, get_current_user, decode_token
30
 
 
385
  db.rollback()
386
  raise HTTPException(status_code=400, detail="Database error")
387
 
388
+ from typing import List
389
+ import hashlib
390
+
391
+ @router.post("/api-keys", response_model=ApiKeyCreateResponse, status_code=status.HTTP_201_CREATED)
392
+ def create_api_key(user: User = Depends(get_current_user), db: Session = Depends(get_db)):
393
+ """Create a new API key for the authenticated user."""
394
+ raw_key = "rag_" + secrets.token_urlsafe(32)
395
+ hashed_key = hashlib.sha256(raw_key.encode("utf-8")).hexdigest()
396
+
397
+ api_key = ApiKey(
398
+ user_id=user.id,
399
+ key_prefix=raw_key[:10],
400
+ hashed_key=hashed_key,
401
+ )
402
+ db.add(api_key)
403
+ db.commit()
404
+ db.refresh(api_key)
405
+
406
+ return {"key": raw_key, "api_key": api_key}
407
+
408
+ @router.get("/api-keys", response_model=List[ApiKeyResponse])
409
+ def list_api_keys(user: User = Depends(get_current_user), db: Session = Depends(get_db)):
410
+ """List all API keys for the authenticated user."""
411
+ return db.query(ApiKey).filter(ApiKey.user_id == user.id).all()
412
+
413
+ @router.delete("/api-keys/{key_id}", status_code=status.HTTP_204_NO_CONTENT)
414
+ def delete_api_key(key_id: str, user: User = Depends(get_current_user), db: Session = Depends(get_db)):
415
+ """Revoke an API key."""
416
+ api_key = db.query(ApiKey).filter(ApiKey.id == key_id, ApiKey.user_id == user.id).first()
417
+ if not api_key:
418
+ raise HTTPException(status_code=404, detail="API key not found")
419
+
420
+ db.delete(api_key)
421
+ db.commit()
422
+ return None
423
+
424
  @router.get("/config")
425
  def get_auth_config():
426
  """Return public configuration for auth providers"""
backend/app/schemas.py CHANGED
@@ -53,11 +53,17 @@ class RefreshRequest(BaseModel):
53
  refresh_token: str
54
 
55
 
 
 
 
 
 
56
  class UserResponse(BaseModel):
57
  id: str
58
  username: str
59
  email: str
60
  is_admin: bool
 
61
  created_at: datetime
62
 
63
  class Config:
@@ -136,5 +142,22 @@ class ChatHistoryResponse(BaseModel):
136
  document_id: Optional[str] = None
137
 
138
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
139
  # Rebuild models for forward references
140
  TokenResponse.model_rebuild()
 
53
  refresh_token: str
54
 
55
 
56
+ class HFTokenUpdate(BaseModel):
57
+ """Request schema for updating the user's HuggingFace token."""
58
+ hf_token: str
59
+
60
+
61
  class UserResponse(BaseModel):
62
  id: str
63
  username: str
64
  email: str
65
  is_admin: bool
66
+ hf_token: Optional[str] = None
67
  created_at: datetime
68
 
69
  class Config:
 
142
  document_id: Optional[str] = None
143
 
144
 
145
+ # ── ApiKeys ─────────────────────────────────────────────
146
+
147
+ class ApiKeyResponse(BaseModel):
148
+ id: str
149
+ key_prefix: str
150
+ created_at: datetime
151
+ last_used: Optional[datetime] = None
152
+
153
+ class Config:
154
+ from_attributes = True
155
+
156
+
157
+ class ApiKeyCreateResponse(BaseModel):
158
+ key: str
159
+ api_key: ApiKeyResponse
160
+
161
+
162
  # Rebuild models for forward references
163
  TokenResponse.model_rebuild()
backend/requirements.txt CHANGED
@@ -18,6 +18,9 @@ google-auth
18
  # Config
19
  pydantic-settings
20
  pydantic[email]
 
 
 
21
 
22
  # Document Processing
23
  PyMuPDF
@@ -28,6 +31,7 @@ langchain
28
  langchain-community
29
  langchain-huggingface
30
  langchain-text-splitters
 
31
 
32
  # Embeddings & ML
33
  sentence-transformers
 
18
  # Config
19
  pydantic-settings
20
  pydantic[email]
21
+ pytest
22
+ pytest-cov
23
+ httpx
24
 
25
  # Document Processing
26
  PyMuPDF
 
31
  langchain-community
32
  langchain-huggingface
33
  langchain-text-splitters
34
+ langsmith
35
 
36
  # Embeddings & ML
37
  sentence-transformers
backend/tests/conftest.py ADDED
@@ -0,0 +1,183 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import sys
3
+ import types
4
+ from contextlib import asynccontextmanager
5
+ from pathlib import Path
6
+
7
+ import pytest
8
+ from fastapi.testclient import TestClient
9
+ from sqlalchemy import create_engine
10
+ from sqlalchemy.orm import sessionmaker
11
+
12
+
13
+ ROOT = Path(__file__).resolve().parents[2]
14
+ BACKEND_DIR = ROOT / "backend"
15
+
16
+ if str(BACKEND_DIR) not in sys.path:
17
+ sys.path.insert(0, str(BACKEND_DIR))
18
+
19
+ os.environ.setdefault("SECRET_KEY", "test-secret-key")
20
+ os.environ.setdefault("DATABASE_URL", "sqlite:///./test_bootstrap.db")
21
+ os.environ.setdefault("HF_TOKEN", "test-hf-token")
22
+ os.environ.setdefault("UPLOAD_DIR", str(ROOT / "backend" / "test_uploads"))
23
+ os.environ.setdefault("CHROMA_PERSIST_DIR", str(ROOT / "backend" / "test_chroma"))
24
+
25
+
26
+ fake_embeddings = types.ModuleType("app.rag.embeddings")
27
+ fake_embeddings.get_embedding_model = lambda: object()
28
+ fake_embeddings.embed_query = lambda query: [0.0]
29
+ fake_embeddings.embed_texts = lambda texts: [[0.0] for _ in texts]
30
+ sys.modules.setdefault("app.rag.embeddings", fake_embeddings)
31
+
32
+
33
+ class _FakeChromaClient:
34
+ def heartbeat(self):
35
+ return "ok"
36
+
37
+
38
+ fake_vectorstore = types.ModuleType("app.rag.vectorstore")
39
+ fake_vectorstore.get_chroma_client = lambda: _FakeChromaClient()
40
+ fake_vectorstore.store_chunks = lambda chunks, document_id, filename, user_id: len(chunks)
41
+ fake_vectorstore.delete_document_chunks = lambda document_id, user_id: None
42
+ fake_vectorstore.query_chunks = lambda query_embedding, user_id, document_id=None, top_k=10: []
43
+ sys.modules.setdefault("app.rag.vectorstore", fake_vectorstore)
44
+
45
+ slowapi_module = types.ModuleType("slowapi")
46
+ slowapi_errors = types.ModuleType("slowapi.errors")
47
+ slowapi_middleware = types.ModuleType("slowapi.middleware")
48
+ slowapi_util = types.ModuleType("slowapi.util")
49
+
50
+
51
+ class RateLimitExceeded(Exception):
52
+ pass
53
+
54
+
55
+ class SlowAPIMiddleware:
56
+ def __init__(self, app, *args, **kwargs):
57
+ self.app = app
58
+
59
+ async def __call__(self, scope, receive, send):
60
+ await self.app(scope, receive, send)
61
+
62
+
63
+ class Limiter:
64
+ def __init__(self, key_func=None, *args, **kwargs):
65
+ self.key_func = key_func
66
+
67
+ def limit(self, _value):
68
+ def decorator(fn):
69
+ return fn
70
+ return decorator
71
+
72
+
73
+ slowapi_errors.RateLimitExceeded = RateLimitExceeded
74
+ slowapi_middleware.SlowAPIMiddleware = SlowAPIMiddleware
75
+ slowapi_util.get_remote_address = lambda request: "127.0.0.1"
76
+ slowapi_module.Limiter = Limiter
77
+
78
+ sys.modules.setdefault("slowapi", slowapi_module)
79
+ sys.modules.setdefault("slowapi.errors", slowapi_errors)
80
+ sys.modules.setdefault("slowapi.middleware", slowapi_middleware)
81
+ sys.modules.setdefault("slowapi.util", slowapi_util)
82
+
83
+ from app.auth import create_access_token, create_refresh_token, hash_password
84
+ from app.database import Base, get_db
85
+ from app.main import app
86
+ from app.models import Document, User
87
+
88
+
89
+ @pytest.fixture()
90
+ def db_session(tmp_path):
91
+ db_file = tmp_path / "test.db"
92
+ engine = create_engine(
93
+ f"sqlite:///{db_file}",
94
+ connect_args={"check_same_thread": False},
95
+ )
96
+ TestingSessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine)
97
+ Base.metadata.create_all(bind=engine)
98
+
99
+ session = TestingSessionLocal()
100
+ try:
101
+ yield session
102
+ finally:
103
+ session.close()
104
+ Base.metadata.drop_all(bind=engine)
105
+ engine.dispose()
106
+
107
+
108
+ @pytest.fixture()
109
+ def client(db_session, monkeypatch):
110
+ def override_get_db():
111
+ try:
112
+ yield db_session
113
+ finally:
114
+ pass
115
+
116
+ @asynccontextmanager
117
+ async def no_lifespan(_app):
118
+ yield
119
+
120
+ monkeypatch.setattr("app.database.SessionLocal", lambda: db_session)
121
+ app.dependency_overrides[get_db] = override_get_db
122
+ app.router.lifespan_context = no_lifespan
123
+
124
+ with TestClient(app) as test_client:
125
+ yield test_client
126
+
127
+ app.dependency_overrides.clear()
128
+
129
+
130
+ @pytest.fixture()
131
+ def user(db_session):
132
+ instance = User(
133
+ username="tester",
134
+ email="tester@example.com",
135
+ hashed_password=hash_password("password123"),
136
+ )
137
+ db_session.add(instance)
138
+ db_session.commit()
139
+ db_session.refresh(instance)
140
+ return instance
141
+
142
+
143
+ @pytest.fixture()
144
+ def auth_headers(user):
145
+ token = create_access_token(user.id)
146
+ return {"Authorization": f"Bearer {token}"}
147
+
148
+
149
+ @pytest.fixture()
150
+ def refresh_token(user):
151
+ return create_refresh_token(user.id)
152
+
153
+
154
+ @pytest.fixture()
155
+ def ready_document(db_session, user):
156
+ instance = Document(
157
+ user_id=user.id,
158
+ filename="ready.txt",
159
+ original_name="ready.txt",
160
+ file_size=128,
161
+ page_count=1,
162
+ chunk_count=2,
163
+ status="ready",
164
+ )
165
+ db_session.add(instance)
166
+ db_session.commit()
167
+ db_session.refresh(instance)
168
+ return instance
169
+
170
+
171
+ @pytest.fixture()
172
+ def pending_document(db_session, user):
173
+ instance = Document(
174
+ user_id=user.id,
175
+ filename="pending.txt",
176
+ original_name="pending.txt",
177
+ file_size=64,
178
+ status="pending",
179
+ )
180
+ db_session.add(instance)
181
+ db_session.commit()
182
+ db_session.refresh(instance)
183
+ return instance
backend/tests/test_auth.py ADDED
@@ -0,0 +1,81 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ def test_register_success(client):
2
+ response = client.post(
3
+ "/api/v1/auth/register",
4
+ json={
5
+ "username": "newuser",
6
+ "email": "newuser@example.com",
7
+ "password": "password123",
8
+ },
9
+ )
10
+
11
+ assert response.status_code == 201
12
+ payload = response.json()
13
+ assert payload["access_token"]
14
+ assert payload["refresh_token"]
15
+ assert payload["user"]["email"] == "newuser@example.com"
16
+
17
+
18
+ def test_register_duplicate_email_or_username_conflict(client):
19
+ payload = {
20
+ "username": "dupuser",
21
+ "email": "dup@example.com",
22
+ "password": "password123",
23
+ }
24
+ first = client.post("/api/v1/auth/register", json=payload)
25
+ assert first.status_code == 201
26
+
27
+ duplicate_email = client.post(
28
+ "/api/v1/auth/register",
29
+ json={**payload, "username": "anotheruser"},
30
+ )
31
+ assert duplicate_email.status_code == 409
32
+ assert duplicate_email.json()["detail"] == "Email already registered"
33
+
34
+ duplicate_username = client.post(
35
+ "/api/v1/auth/register",
36
+ json={**payload, "email": "another@example.com"},
37
+ )
38
+ assert duplicate_username.status_code == 409
39
+ assert duplicate_username.json()["detail"] == "Username already taken"
40
+
41
+
42
+ def test_login_success(client, user):
43
+ response = client.post(
44
+ "/api/v1/auth/login",
45
+ json={"email": user.email, "password": "password123"},
46
+ )
47
+
48
+ assert response.status_code == 200
49
+ payload = response.json()
50
+ assert payload["access_token"]
51
+ assert payload["refresh_token"]
52
+ assert payload["user"]["username"] == user.username
53
+
54
+
55
+ def test_login_invalid_password(client, user):
56
+ response = client.post(
57
+ "/api/v1/auth/login",
58
+ json={"email": user.email, "password": "wrong-password"},
59
+ )
60
+
61
+ assert response.status_code == 401
62
+ assert response.json()["detail"] == "Invalid email or password"
63
+
64
+
65
+ def test_auth_me_requires_auth(client):
66
+ response = client.get("/api/v1/auth/me")
67
+
68
+ assert response.status_code in (401, 403)
69
+
70
+
71
+ def test_refresh_token_success(client, refresh_token):
72
+ response = client.post(
73
+ "/api/v1/auth/refresh",
74
+ json={"refresh_token": refresh_token},
75
+ )
76
+
77
+ assert response.status_code == 200
78
+ payload = response.json()
79
+ assert payload["access_token"]
80
+ assert payload["refresh_token"]
81
+ assert payload["token_type"] == "bearer"
backend/tests/test_chat.py ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ def test_chat_ask_success(client, auth_headers, ready_document, monkeypatch):
2
+ monkeypatch.setattr(
3
+ "app.routes.chat.generate_answer",
4
+ lambda question, user_id, document_id=None: {
5
+ "answer": "Mocked answer",
6
+ "sources": [
7
+ {
8
+ "text": "Mock source",
9
+ "filename": "ready.txt",
10
+ "page": 1,
11
+ "score": 0.99,
12
+ "confidence": 99.0,
13
+ }
14
+ ],
15
+ },
16
+ )
17
+
18
+ response = client.post(
19
+ "/api/v1/chat/ask",
20
+ headers=auth_headers,
21
+ json={"question": "What is in the doc?", "document_id": ready_document.id},
22
+ )
23
+
24
+ assert response.status_code == 200
25
+ payload = response.json()
26
+ assert payload["answer"] == "Mocked answer"
27
+ assert payload["document_id"] == ready_document.id
28
+ assert payload["sources"][0]["filename"] == "ready.txt"
29
+
30
+
31
+ def test_chat_ask_document_not_found(client, auth_headers):
32
+ response = client.post(
33
+ "/api/v1/chat/ask",
34
+ headers=auth_headers,
35
+ json={"question": "Missing doc?", "document_id": "missing-doc-id"},
36
+ )
37
+
38
+ assert response.status_code == 404
39
+ assert response.json()["detail"] == "Document not found"
40
+
41
+
42
+ def test_chat_ask_document_not_ready(client, auth_headers, pending_document):
43
+ response = client.post(
44
+ "/api/v1/chat/ask",
45
+ headers=auth_headers,
46
+ json={"question": "Pending doc?", "document_id": pending_document.id},
47
+ )
48
+
49
+ assert response.status_code == 400
50
+ assert "Document is still pending" in response.json()["detail"]
backend/tests/test_chunker.py ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pathlib import Path
2
+
3
+ import pytest
4
+
5
+ from app.rag.chunker import chunk_document, get_page_count
6
+
7
+
8
+ def test_txt_extraction_and_chunking(tmp_path):
9
+ file_path = tmp_path / "notes.txt"
10
+ file_path.write_text("This is a sample text file for chunking.", encoding="utf-8")
11
+
12
+ chunks = chunk_document(str(file_path))
13
+
14
+ assert len(chunks) >= 1
15
+ assert chunks[0]["page"] == 1
16
+ assert "sample text file" in chunks[0]["text"]
17
+
18
+
19
+ def test_empty_txt_returns_no_chunks(tmp_path):
20
+ file_path = tmp_path / "empty.txt"
21
+ file_path.write_text(" \n", encoding="utf-8")
22
+
23
+ assert chunk_document(str(file_path)) == []
24
+
25
+
26
+ def test_unsupported_extension_raises_value_error(tmp_path):
27
+ file_path = tmp_path / "data.csv"
28
+ file_path.write_text("a,b,c", encoding="utf-8")
29
+
30
+ with pytest.raises(ValueError, match="Unsupported file type"):
31
+ chunk_document(str(file_path))
32
+
33
+
34
+ def test_get_page_count_for_txt_returns_one(tmp_path):
35
+ file_path = tmp_path / "single.txt"
36
+ file_path.write_text("hello", encoding="utf-8")
37
+
38
+ assert get_page_count(str(file_path)) == 1
backend/tests/test_documents.py ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ def test_api_health(client):
2
+ response = client.get("/api/health")
3
+
4
+ assert response.status_code == 200
5
+ payload = response.json()
6
+ assert payload["status"] == "healthy"
7
+ assert payload["version"] == "2.0.0"
8
+
9
+
10
+ def test_protected_documents_list_requires_auth(client):
11
+ response = client.get("/api/v1/documents/")
12
+
13
+ assert response.status_code in (401, 403)
14
+
15
+
16
+ def test_documents_list_authenticated(client, auth_headers, ready_document):
17
+ response = client.get("/api/v1/documents/", headers=auth_headers)
18
+
19
+ assert response.status_code == 200
20
+ payload = response.json()
21
+ assert payload["total"] == 1
22
+ assert payload["items"][0]["id"] == ready_document.id
23
+ assert payload["items"][0]["original_name"] == "ready.txt"
24
+
25
+
26
+ def test_upload_rejects_unsupported_extension_before_deep_validation(client, auth_headers):
27
+ response = client.post(
28
+ "/api/v1/documents/upload",
29
+ headers=auth_headers,
30
+ files={"file": ("payload.exe", b"binary-data", "application/octet-stream")},
31
+ )
32
+
33
+ assert response.status_code == 400
34
+ assert "not supported" in response.json()["detail"]
bots/discord/README.md ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Discord RAG Bot
2
+
3
+ This bot connects to the PDF-Assistant-RAG backend to answer questions based on your uploaded documents, directly from Discord.
4
+
5
+ ## Setup
6
+
7
+ 1. Install dependencies:
8
+ ```bash
9
+ pip install -r requirements.txt
10
+ ```
11
+
12
+ 2. Create a Discord Bot on the [Discord Developer Portal](https://discord.com/developers/applications):
13
+ - Go to "Bot" tab and enable **Message Content Intent**.
14
+ - Copy the bot token.
15
+ - Invite the bot to your server via the OAuth2 URL Generator (check `bot` scope and `Send Messages` permission).
16
+
17
+ 3. Generate an API Key from your PDF-Assistant-RAG profile dashboard.
18
+
19
+ 4. Set the environment variables and run:
20
+ ```bash
21
+ export DISCORD_TOKEN="your-discord-bot-token"
22
+ export RAG_API_KEY="rag_your-api-key"
23
+
24
+ # Optional: set API_URL if backend is not running on localhost:8000
25
+ # export API_URL="http://localhost:8000/api/v1"
26
+
27
+ python bot.py
28
+ ```
29
+
30
+ ## Usage
31
+ In a Discord channel where the bot is present, simply use the `!ask` command:
32
+
33
+ ```
34
+ !ask Summarize the latest uploaded report for me
35
+ ```
36
+
37
+ The bot will query the backend API using your personal API key and reply with the generated answer.
bots/discord/bot.py ADDED
@@ -0,0 +1,68 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import discord
3
+ import requests
4
+ from discord.ext import commands
5
+
6
+ DISCORD_TOKEN = os.getenv("DISCORD_TOKEN")
7
+ API_URL = os.getenv("API_URL", "http://localhost:8000/api/v1")
8
+ RAG_API_KEY = os.getenv("RAG_API_KEY")
9
+
10
+ if not DISCORD_TOKEN or not RAG_API_KEY:
11
+ print("Error: DISCORD_TOKEN and RAG_API_KEY must be set in environment variables.")
12
+ exit(1)
13
+
14
+ intents = discord.Intents.default()
15
+ intents.message_content = True
16
+ bot = commands.Bot(command_prefix="!", intents=intents)
17
+
18
+ @bot.event
19
+ async def on_ready():
20
+ print(f"Logged in as {bot.user.name} ({bot.user.id})")
21
+ print("Ready to answer questions via '!ask <question>'")
22
+
23
+ @bot.command(name="ask")
24
+ async def ask_rag(ctx, *, question: str):
25
+ """Ask the RAG Assistant a question. Example: !ask What is in my documents?"""
26
+ loading_msg = await ctx.send("🤔 Thinking...")
27
+
28
+ try:
29
+ headers = {
30
+ "Authorization": f"Bearer {RAG_API_KEY}",
31
+ "Content-Type": "application/json"
32
+ }
33
+
34
+ # We can also support document_id if we want, but for now we do global ask.
35
+ payload = {"question": question}
36
+
37
+ response = requests.post(
38
+ f"{API_URL}/chat/ask",
39
+ json=payload,
40
+ headers=headers,
41
+ timeout=30 # Give the RAG backend some time to process
42
+ )
43
+
44
+ if response.status_code == 200:
45
+ data = response.json()
46
+ answer = data.get("answer", "No answer provided.")
47
+
48
+ if len(answer) > 2000:
49
+ # Discord has a 2000 character limit per message
50
+ chunks = [answer[i:i+2000] for i in range(0, len(answer), 2000)]
51
+ await loading_msg.edit(content=chunks[0])
52
+ for chunk in chunks[1:]:
53
+ await ctx.send(chunk)
54
+ else:
55
+ await loading_msg.edit(content=answer)
56
+ else:
57
+ await loading_msg.edit(content=f"⚠️ Error from RAG API: `{response.status_code}`")
58
+ print(f"API Error: {response.text}")
59
+
60
+ except requests.exceptions.RequestException as e:
61
+ await loading_msg.edit(content=f"❌ Failed to connect to backend API.")
62
+ print(f"Request Error: {e}")
63
+ except Exception as e:
64
+ await loading_msg.edit(content=f"❌ An unexpected error occurred.")
65
+ print(f"Error: {e}")
66
+
67
+ if __name__ == "__main__":
68
+ bot.run(DISCORD_TOKEN)
bots/discord/requirements.txt ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ discord.py==2.3.2
2
+ requests==2.31.0
frontend/package-lock.json CHANGED
@@ -15,6 +15,7 @@
15
  "i18next-browser-languagedetector": "^8.2.1",
16
  "lucide-react": "^1.8.0",
17
  "next": "16.2.4",
 
18
  "pdfjs-dist": "^5.6.205",
19
  "react": "19.2.4",
20
  "react-dom": "19.2.4",
@@ -8755,6 +8756,16 @@
8755
  }
8756
  }
8757
  },
 
 
 
 
 
 
 
 
 
 
8758
  "node_modules/next/node_modules/postcss": {
8759
  "version": "8.4.31",
8760
  "resolved": "https://registry.npmjs.org/postcss/-/postcss-8.4.31.tgz",
 
15
  "i18next-browser-languagedetector": "^8.2.1",
16
  "lucide-react": "^1.8.0",
17
  "next": "16.2.4",
18
+ "next-themes": "^0.4.6",
19
  "pdfjs-dist": "^5.6.205",
20
  "react": "19.2.4",
21
  "react-dom": "19.2.4",
 
8756
  }
8757
  }
8758
  },
8759
+ "node_modules/next-themes": {
8760
+ "version": "0.4.6",
8761
+ "resolved": "https://registry.npmjs.org/next-themes/-/next-themes-0.4.6.tgz",
8762
+ "integrity": "sha512-pZvgD5L0IEvX5/9GWyHMf3m8BKiVQwsCMHfoFosXtXBMnaS0ZnIJ9ST4b4NqLVKDEm8QBxoNNGNaBv2JNF6XNA==",
8763
+ "license": "MIT",
8764
+ "peerDependencies": {
8765
+ "react": "^16.8 || ^17 || ^18 || ^19 || ^19.0.0-rc",
8766
+ "react-dom": "^16.8 || ^17 || ^18 || ^19 || ^19.0.0-rc"
8767
+ }
8768
+ },
8769
  "node_modules/next/node_modules/postcss": {
8770
  "version": "8.4.31",
8771
  "resolved": "https://registry.npmjs.org/postcss/-/postcss-8.4.31.tgz",
frontend/package.json CHANGED
@@ -18,6 +18,7 @@
18
  "i18next-browser-languagedetector": "^8.2.1",
19
  "lucide-react": "^1.8.0",
20
  "next": "16.2.4",
 
21
  "pdfjs-dist": "^5.6.205",
22
  "react": "19.2.4",
23
  "react-dom": "19.2.4",
 
18
  "i18next-browser-languagedetector": "^8.2.1",
19
  "lucide-react": "^1.8.0",
20
  "next": "16.2.4",
21
+ "next-themes": "^0.4.6",
22
  "pdfjs-dist": "^5.6.205",
23
  "react": "19.2.4",
24
  "react-dom": "19.2.4",
frontend/src/app/dashboard/page.tsx CHANGED
@@ -57,11 +57,23 @@ export default function DashboardPage() {
57
  const [connectionError, setConnectionError] = useState("");
58
  const [documentsLoading, setDocumentsLoading] = useState(true);
59
 
60
- // Auth guard
61
  useEffect(() => {
62
  if (!loading && !user) router.replace("/login");
63
  }, [user, loading, router]);
64
 
 
 
 
 
 
 
 
 
 
 
 
 
65
  // Load documents
66
  const loadDocuments = useCallback(async () => {
67
  try {
 
57
  const [connectionError, setConnectionError] = useState("");
58
  const [documentsLoading, setDocumentsLoading] = useState(true);
59
 
60
+ // Auth guard
61
  useEffect(() => {
62
  if (!loading && !user) router.replace("/login");
63
  }, [user, loading, router]);
64
 
65
+ // Intercept dashboard if Hugging Face token configuration is missing
66
+ useEffect(() => {
67
+ if (user) {
68
+ const existingHfToken = localStorage.getItem("hf_token");
69
+
70
+ if (!existingHfToken) {
71
+ console.warn("Hugging Face API configuration key missing.");
72
+ }
73
+ }
74
+ }, [user]);
75
+
76
+
77
  // Load documents
78
  const loadDocuments = useCallback(async () => {
79
  try {
frontend/src/app/globals.css CHANGED
@@ -83,6 +83,35 @@
83
  --sidebar-ring: oklch(0.65 0.2 265);
84
  }
85
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
86
  .light {
87
  --background: oklch(0.985 0 0);
88
  --foreground: oklch(0.145 0 0);
 
83
  --sidebar-ring: oklch(0.65 0.2 265);
84
  }
85
 
86
+ .dark {
87
+ --background: oklch(0.145 0 0);
88
+ --foreground: oklch(0.985 0 0);
89
+ --card: oklch(0.178 0 0);
90
+ --card-foreground: oklch(0.985 0 0);
91
+ --popover: oklch(0.178 0 0);
92
+ --popover-foreground: oklch(0.985 0 0);
93
+ --primary: oklch(0.65 0.2 265);
94
+ --primary-foreground: oklch(0.985 0 0);
95
+ --secondary: oklch(0.22 0 0);
96
+ --secondary-foreground: oklch(0.985 0 0);
97
+ --muted: oklch(0.22 0 0);
98
+ --muted-foreground: oklch(0.6 0 0);
99
+ --accent: oklch(0.55 0.18 265);
100
+ --accent-foreground: oklch(0.985 0 0);
101
+ --destructive: oklch(0.704 0.191 22.216);
102
+ --border: oklch(1 0 0 / 10%);
103
+ --input: oklch(1 0 0 / 12%);
104
+ --ring: oklch(0.65 0.2 265);
105
+ --sidebar: oklch(0.12 0 0);
106
+ --sidebar-foreground: oklch(0.985 0 0);
107
+ --sidebar-primary: oklch(0.65 0.2 265);
108
+ --sidebar-primary-foreground: oklch(0.985 0 0);
109
+ --sidebar-accent: oklch(0.22 0 0);
110
+ --sidebar-accent-foreground: oklch(0.985 0 0);
111
+ --sidebar-border: oklch(1 0 0 / 8%);
112
+ --sidebar-ring: oklch(0.65 0.2 265);
113
+ }
114
+
115
  .light {
116
  --background: oklch(0.985 0 0);
117
  --foreground: oklch(0.145 0 0);
frontend/src/app/layout.tsx CHANGED
@@ -4,6 +4,7 @@ import "./globals.css";
4
  import { AuthProvider } from "@/lib/auth";
5
  import { TooltipProvider } from "@/components/ui/tooltip";
6
  import I18nProvider from "@/components/providers/I18nProvider";
 
7
 
8
  const inter = Inter({
9
  variable: "--font-sans",
@@ -24,15 +25,20 @@ export default function RootLayout({
24
  children: React.ReactNode;
25
  }>) {
26
  return (
27
- <html lang="en" className={`${inter.variable} dark h-full antialiased`}>
28
  <body className="min-h-full flex flex-col bg-background text-foreground">
29
- <AuthProvider>
30
- <I18nProvider>
31
- <TooltipProvider>
32
- {children}
33
- </TooltipProvider>
34
- </I18nProvider>
35
- </AuthProvider>
 
 
 
 
 
36
  </body>
37
  </html>
38
  );
 
4
  import { AuthProvider } from "@/lib/auth";
5
  import { TooltipProvider } from "@/components/ui/tooltip";
6
  import I18nProvider from "@/components/providers/I18nProvider";
7
+ import { ThemeProvider } from "@/components/layout/ThemeProvider";
8
 
9
  const inter = Inter({
10
  variable: "--font-sans",
 
25
  children: React.ReactNode;
26
  }>) {
27
  return (
28
+ <html lang="en" className={`${inter.variable} h-full antialiased`} suppressHydrationWarning>
29
  <body className="min-h-full flex flex-col bg-background text-foreground">
30
+ <ThemeProvider
31
+ attribute="class"
32
+ defaultTheme="dark"
33
+ enableSystem={false}
34
+ disableTransitionOnChange
35
+ >
36
+ <AuthProvider>
37
+ <I18nProvider>
38
+ <TooltipProvider>{children}</TooltipProvider>
39
+ </I18nProvider>
40
+ </AuthProvider>
41
+ </ThemeProvider>
42
  </body>
43
  </html>
44
  );
frontend/src/components/auth/ApiKeyManager.tsx ADDED
@@ -0,0 +1,158 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ "use client";
2
+
3
+ import { useState, useEffect } from "react";
4
+ import { Button } from "@/components/ui/button";
5
+ import { Dialog, DialogContent, DialogHeader, DialogTitle, DialogTrigger } from "@/components/ui/dialog";
6
+ import { api } from "@/lib/api";
7
+ import { Key, Plus, Trash2, Copy, Check } from "lucide-react";
8
+
9
+ interface ApiKey {
10
+ id: string;
11
+ key_prefix: string;
12
+ created_at: string;
13
+ last_used: string | null;
14
+ }
15
+
16
+ export default function ApiKeyManager() {
17
+ const [keys, setKeys] = useState<ApiKey[]>([]);
18
+ const [newKey, setNewKey] = useState<string | null>(null);
19
+ const [loading, setLoading] = useState(false);
20
+ const [copied, setCopied] = useState(false);
21
+
22
+ const fetchKeys = async () => {
23
+ try {
24
+ setLoading(true);
25
+ const data = await api.get<ApiKey[]>("/api/v1/auth/api-keys");
26
+ setKeys(data || []);
27
+ } catch (err) {
28
+ console.error("Failed to load API keys", err);
29
+ } finally {
30
+ setLoading(false);
31
+ }
32
+ };
33
+
34
+ useEffect(() => {
35
+ const timer = setTimeout(() => {
36
+ fetchKeys();
37
+ }, 0);
38
+ return () => clearTimeout(timer);
39
+ }, []);
40
+
41
+ const generateKey = async () => {
42
+ try {
43
+ setLoading(true);
44
+ const data = await api.post<{ key: string; api_key: ApiKey }>("/api/v1/auth/api-keys");
45
+ setNewKey(data.key);
46
+ setKeys((prev) => [...prev, data.api_key]);
47
+ } catch (err) {
48
+ console.error("Failed to generate API key", err);
49
+ } finally {
50
+ setLoading(false);
51
+ }
52
+ };
53
+
54
+ const revokeKey = async (id: string) => {
55
+ if (!confirm("Are you sure you want to revoke this key? Any integrations using it will immediately break.")) return;
56
+
57
+ try {
58
+ await api.delete(`/api/v1/auth/api-keys/${id}`);
59
+ setKeys((prev) => prev.filter((k) => k.id !== id));
60
+ } catch (err) {
61
+ console.error("Failed to revoke API key", err);
62
+ }
63
+ };
64
+
65
+ const copyToClipboard = () => {
66
+ if (newKey) {
67
+ navigator.clipboard.writeText(newKey);
68
+ setCopied(true);
69
+ setTimeout(() => setCopied(false), 2000);
70
+ }
71
+ };
72
+
73
+ return (
74
+ <Dialog onOpenChange={(open) => { if (!open) setNewKey(null); }}>
75
+ <DialogTrigger
76
+ render={
77
+ <button className="flex w-full cursor-pointer items-center rounded-sm px-2 py-1.5 text-sm outline-none transition-colors hover:bg-accent hover:text-accent-foreground">
78
+ <Key className="mr-2 h-4 w-4" />
79
+ <span>API Keys</span>
80
+ </button>
81
+ }
82
+ />
83
+ <DialogContent className="max-w-2xl sm:rounded-2xl border-border/40 p-6 md:p-8 bg-background/95 backdrop-blur-xl shadow-2xl">
84
+
85
+ <DialogHeader>
86
+ <DialogTitle className="text-2xl font-bold tracking-tight">API Keys</DialogTitle>
87
+ <p className="text-sm text-muted-foreground mt-1.5">
88
+ Manage API keys to access the RAG engine programmatically from your own applications or scripts.
89
+ </p>
90
+ </DialogHeader>
91
+
92
+ {newKey && (
93
+ <div className="my-6 p-5 border border-primary/20 bg-primary/5 rounded-xl space-y-3 animate-in fade-in zoom-in-95 duration-300">
94
+ <h4 className="font-semibold text-primary flex items-center gap-2">
95
+ <Key className="w-4 h-4" /> Save your new API key
96
+ </h4>
97
+ <p className="text-sm text-muted-foreground">
98
+ Please copy this key and store it somewhere safe. For security reasons, you will <strong>never</strong> be able to view it again.
99
+ </p>
100
+ <div className="flex items-center gap-2 mt-2">
101
+ <code className="flex-1 bg-background/80 border border-border/50 px-4 py-2.5 rounded-lg text-sm font-mono break-all text-foreground shadow-inner">
102
+ {newKey}
103
+ </code>
104
+ <Button onClick={copyToClipboard} variant={copied ? "default" : "secondary"} className="shrink-0 shadow-sm">
105
+ {copied ? <Check className="w-4 h-4 mr-2" /> : <Copy className="w-4 h-4 mr-2" />}
106
+ {copied ? "Copied!" : "Copy"}
107
+ </Button>
108
+ </div>
109
+ </div>
110
+ )}
111
+
112
+ <div className="space-y-4 mt-6">
113
+ <div className="flex items-center justify-between">
114
+ <h3 className="text-sm font-medium text-foreground/80 uppercase tracking-wider">Active Keys</h3>
115
+ <Button onClick={generateKey} disabled={loading} size="sm" className="rounded-full shadow-sm hover:shadow-md transition-shadow">
116
+ <Plus className="w-4 h-4 mr-1.5" />
117
+ Generate New Key
118
+ </Button>
119
+ </div>
120
+
121
+ <div className="rounded-xl border border-border/50 bg-card overflow-hidden shadow-sm">
122
+ {keys.length === 0 ? (
123
+ <div className="p-8 text-center text-sm text-muted-foreground bg-muted/20">
124
+ <Key className="w-8 h-8 mx-auto mb-3 opacity-20" />
125
+ You don&apos;t have any API keys yet.
126
+ </div>
127
+ ) : (
128
+ <div className="divide-y divide-border/50">
129
+ {keys.map((key) => (
130
+ <div key={key.id} className="flex items-center justify-between p-4 hover:bg-muted/30 transition-colors group">
131
+ <div className="space-y-1">
132
+ <div className="font-mono text-sm font-medium tracking-tight">
133
+ {key.key_prefix}••••••••••••••••••••••
134
+ </div>
135
+ <div className="text-xs text-muted-foreground flex gap-4">
136
+ <span>Created: {new Date(key.created_at).toLocaleDateString()}</span>
137
+ <span>Last used: {key.last_used ? new Date(key.last_used).toLocaleDateString() : "Never"}</span>
138
+ </div>
139
+ </div>
140
+ <Button
141
+ variant="ghost"
142
+ size="icon"
143
+ onClick={() => revokeKey(key.id)}
144
+ className="text-destructive/70 hover:text-destructive hover:bg-destructive/10 opacity-0 group-hover:opacity-100 transition-all"
145
+ title="Revoke key"
146
+ >
147
+ <Trash2 className="w-4 h-4" />
148
+ </Button>
149
+ </div>
150
+ ))}
151
+ </div>
152
+ )}
153
+ </div>
154
+ </div>
155
+ </DialogContent>
156
+ </Dialog>
157
+ );
158
+ }
frontend/src/components/chat/SourceCard.tsx CHANGED
@@ -4,7 +4,14 @@ import { useState } from "react";
4
  import type { SourceChunk } from "@/store/chat-store";
5
  import { Badge } from "@/components/ui/badge";
6
  import { Button } from "@/components/ui/button";
7
- import { ChevronDown, ChevronUp, FileText, Eye } from "lucide-react";
 
 
 
 
 
 
 
8
 
9
  interface Props {
10
  sources: SourceChunk[];
@@ -13,89 +20,125 @@ interface Props {
13
 
14
  export default function SourceCard({ sources = [], onPageClick }: Props) {
15
  const [expanded, setExpanded] = useState(false);
 
16
 
17
  if (sources.length === 0) return null;
18
 
 
 
 
 
 
 
 
 
 
 
19
  return (
20
  <div className="rounded-lg border border-border/50 bg-card/50 overflow-hidden">
21
- {/* ── Header ──────────────────────────────────── */}
22
- <button
23
- onClick={() => setExpanded(!expanded)}
24
- className="w-full flex items-center justify-between px-3 py-2 text-xs hover:bg-accent/30 transition-colors"
25
- >
26
- <span className="flex items-center gap-1.5 text-muted-foreground">
27
- <FileText className="w-3.5 h-3.5" />
28
- {sources.length} source{sources.length > 1 ? "s" : ""} cited
29
- </span>
30
- {expanded ? (
31
- <ChevronUp className="w-3.5 h-3.5 text-muted-foreground" />
32
- ) : (
33
- <ChevronDown className="w-3.5 h-3.5 text-muted-foreground" />
34
- )}
35
- </button>
36
 
37
- {/* ── Collapsed: Mini badges ──────────────────── */}
38
- {!expanded && (
39
- <div className="px-3 pb-2 flex flex-wrap gap-1">
40
- {sources.map((src, i) => (
41
- <Badge
42
- key={i}
43
- variant="secondary"
44
- className="text-[10px] h-5 cursor-pointer hover:bg-primary/20 transition-colors"
45
- onClick={() => onPageClick(src.page + 1)}
46
- >
47
- p.{src.page + 1} • {src.confidence}%
48
- </Badge>
49
- ))}
50
- </div>
51
- )}
52
-
53
- {/* ── Expanded: Full source cards ─────────────── */}
54
- {expanded && (
55
- <div className="border-t border-border/30">
56
- {sources.map((src, i) => (
57
- <div
58
- key={i}
59
- className="px-3 py-2.5 border-b border-border/20 last:border-b-0 hover:bg-accent/20 transition-colors"
60
- >
61
- <div className="flex items-center justify-between mb-1.5">
62
- <div className="flex items-center gap-2">
63
- <span className="text-[10px] font-medium text-muted-foreground">
64
- {src.filename}
65
- </span>
66
- <Badge variant="outline" className="text-[9px] h-4 px-1.5">
67
- Page {src.page + 1}
68
- </Badge>
69
  <Badge
70
  variant="secondary"
71
- className={`text-[9px] h-4 px-1.5 ${
72
- src.confidence >= 80
73
- ? "text-emerald-400 bg-emerald-400/10"
74
- : src.confidence >= 50
75
- ? "text-yellow-400 bg-yellow-400/10"
76
- : "text-muted-foreground"
77
- }`}
78
  >
79
- {src.confidence}% match
80
  </Badge>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
81
  </div>
82
- <Button
83
- variant="ghost"
84
- size="sm"
85
- className="h-6 px-2 text-[10px]"
86
- onClick={() => onPageClick(src.page + 1)}
87
  >
88
- <Eye className="w-3 h-3 mr-1" />
89
- View
90
- </Button>
 
 
 
 
 
 
 
 
91
  </div>
92
- <p className="text-[11px] text-muted-foreground leading-relaxed line-clamp-3">
93
- {src.text}
94
- </p>
95
- </div>
96
- ))}
97
- </div>
98
- )}
99
  </div>
100
  );
101
  }
 
4
  import type { SourceChunk } from "@/store/chat-store";
5
  import { Badge } from "@/components/ui/badge";
6
  import { Button } from "@/components/ui/button";
7
+ import {
8
+ Tooltip,
9
+ TooltipContent,
10
+ TooltipTrigger,
11
+ } from "@/components/ui/tooltip";
12
+ import { ChevronDown, ChevronUp, FileText, Eye, TextQuote } from "lucide-react";
13
+
14
+ const EXCERPT_THRESHOLD = 200;
15
 
16
  interface Props {
17
  sources: SourceChunk[];
 
20
 
21
  export default function SourceCard({ sources = [], onPageClick }: Props) {
22
  const [expanded, setExpanded] = useState(false);
23
+ const [excerptOpen, setExcerptOpen] = useState<Set<number>>(new Set());
24
 
25
  if (sources.length === 0) return null;
26
 
27
+ const toggleExcerpt = (i: number) => {
28
+ const next = new Set(excerptOpen);
29
+ if (next.has(i)) {
30
+ next.delete(i);
31
+ } else {
32
+ next.add(i);
33
+ }
34
+ setExcerptOpen(next);
35
+ };
36
+
37
  return (
38
  <div className="rounded-lg border border-border/50 bg-card/50 overflow-hidden">
39
+ {/* ── Header ──────────────────────────────────── */}
40
+ <button
41
+ onClick={() => setExpanded(!expanded)}
42
+ className="w-full flex items-center justify-between px-3 py-2 text-xs hover:bg-accent/30 transition-colors"
43
+ >
44
+ <span className="flex items-center gap-1.5 text-muted-foreground">
45
+ <FileText className="w-3.5 h-3.5" />
46
+ {sources.length} source{sources.length > 1 ? "s" : ""} cited
47
+ </span>
48
+ {expanded ? (
49
+ <ChevronUp className="w-3.5 h-3.5 text-muted-foreground" />
50
+ ) : (
51
+ <ChevronDown className="w-3.5 h-3.5 text-muted-foreground" />
52
+ )}
53
+ </button>
54
 
55
+ {/* ── Collapsed: Mini badges with hover preview ── */}
56
+ {!expanded && (
57
+ <div className="px-3 pb-2 flex flex-wrap gap-1">
58
+ {sources.map((src, i) => (
59
+ <Tooltip key={i}>
60
+ <TooltipTrigger className="inline-flex">
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
61
  <Badge
62
  variant="secondary"
63
+ className="text-[10px] h-5 cursor-pointer hover:bg-primary/20 transition-colors"
64
+ onClick={() => onPageClick(src.page + 1)}
 
 
 
 
 
65
  >
66
+ p.{src.page + 1} • {src.confidence}%
67
  </Badge>
68
+ </TooltipTrigger>
69
+ <TooltipContent
70
+ side="top"
71
+ align="center"
72
+ className="max-w-xs p-2"
73
+ >
74
+ <p className="text-[11px] leading-relaxed line-clamp-6">
75
+ {src.text}
76
+ </p>
77
+ </TooltipContent>
78
+ </Tooltip>
79
+ ))}
80
+ </div>
81
+ )}
82
+
83
+ {/* ── Expanded: Full source cards ─────────────── */}
84
+ {expanded && (
85
+ <div className="border-t border-border/30">
86
+ {sources.map((src, i) => (
87
+ <div
88
+ key={i}
89
+ className="px-3 py-2.5 border-b border-border/20 last:border-b-0 hover:bg-accent/20 transition-colors"
90
+ >
91
+ <div className="flex items-center justify-between mb-1.5">
92
+ <div className="flex items-center gap-2">
93
+ <span className="text-[10px] font-medium text-muted-foreground">
94
+ {src.filename}
95
+ </span>
96
+ <Badge variant="outline" className="text-[9px] h-4 px-1.5">
97
+ Page {src.page + 1}
98
+ </Badge>
99
+ <Badge
100
+ variant="secondary"
101
+ className={`text-[9px] h-4 px-1.5 ${
102
+ src.confidence >= 80
103
+ ? "text-emerald-400 bg-emerald-400/10"
104
+ : src.confidence >= 50
105
+ ? "text-yellow-400 bg-yellow-400/10"
106
+ : "text-muted-foreground"
107
+ }`}
108
+ >
109
+ {src.confidence}% match
110
+ </Badge>
111
+ </div>
112
+ <Button
113
+ variant="ghost"
114
+ size="sm"
115
+ className="h-6 px-2 text-[10px]"
116
+ onClick={() => onPageClick(src.page + 1)}
117
+ >
118
+ <Eye className="w-3 h-3 mr-1" />
119
+ View
120
+ </Button>
121
  </div>
122
+ <p
123
+ className={`text-[11px] text-muted-foreground leading-relaxed ${
124
+ excerptOpen.has(i) ? "" : "line-clamp-3"
125
+ }`}
 
126
  >
127
+ {src.text}
128
+ </p>
129
+ {src.text.length > EXCERPT_THRESHOLD && (
130
+ <button
131
+ onClick={() => toggleExcerpt(i)}
132
+ className="mt-1.5 flex items-center gap-1 text-[10px] text-primary/70 hover:text-primary transition-colors"
133
+ >
134
+ <TextQuote className="w-3 h-3" />
135
+ {excerptOpen.has(i) ? "Hide excerpt" : "Show excerpt"}
136
+ </button>
137
+ )}
138
  </div>
139
+ ))}
140
+ </div>
141
+ )}
 
 
 
 
142
  </div>
143
  );
144
  }
frontend/src/components/layout/Header.tsx CHANGED
@@ -22,7 +22,10 @@ import {
22
  Moon,
23
  Sun,
24
  } from "lucide-react";
25
- import { useState } from "react";
 
 
 
26
 
27
  interface HeaderProps {
28
  sidebarOpen: boolean;
@@ -31,23 +34,19 @@ interface HeaderProps {
31
  onToggleViewer: () => void;
32
  }
33
 
 
 
 
 
34
  export default function Header({ sidebarOpen, onToggleSidebar, viewerOpen, onToggleViewer }: HeaderProps) {
35
  const { user, logout } = useAuth();
36
  const { t, i18n } = useTranslation();
37
  const router = useRouter();
38
- const [isDark, setIsDark] = useState(true);
 
39
 
40
- const toggleTheme = () => {
41
- const html = document.documentElement;
42
- if (isDark) {
43
- html.classList.remove("dark");
44
- html.classList.add("light");
45
- } else {
46
- html.classList.remove("light");
47
- html.classList.add("dark");
48
- }
49
- setIsDark(!isDark);
50
- };
51
 
52
  const handleLogout = () => {
53
  logout();
@@ -89,9 +88,11 @@ export default function Header({ sidebarOpen, onToggleSidebar, viewerOpen, onTog
89
  {viewerOpen ? <PanelRightClose className="w-4 h-4" /> : <PanelRightOpen className="w-4 h-4" />}
90
  </Button>
91
 
92
- <Button variant="ghost" size="icon" className="h-8 w-8" onClick={toggleTheme} title={isDark ? t("header.lightMode") : t("header.darkMode")}>
93
- {isDark ? <Sun className="w-4 h-4" /> : <Moon className="w-4 h-4" />}
94
- </Button>
 
 
95
 
96
  <select
97
  aria-label={t("common.language")}
@@ -106,20 +107,27 @@ export default function Header({ sidebarOpen, onToggleSidebar, viewerOpen, onTog
106
  </select>
107
 
108
  <DropdownMenu>
109
- <DropdownMenuTrigger className="flex items-center h-8 gap-2 px-2 rounded-md hover:bg-accent transition-colors cursor-pointer">
110
- <Avatar className="w-6 h-6">
111
- <AvatarFallback className="text-[10px] bg-primary/20 text-primary">
112
- {user?.username?.slice(0, 2).toUpperCase() || "U"}
113
- </AvatarFallback>
114
- </Avatar>
115
- <span className="text-sm hidden sm:inline">{user?.username}</span>
116
- </DropdownMenuTrigger>
117
- <DropdownMenuContent align="end" className="w-48">
 
 
 
 
 
118
  <div className="px-3 py-2">
119
  <p className="text-sm font-medium">{user?.username}</p>
120
  <p className="text-xs text-muted-foreground truncate">{user?.email}</p>
121
  </div>
122
  <DropdownMenuSeparator />
 
 
123
  <DropdownMenuItem className="text-destructive cursor-pointer" onClick={handleLogout}>
124
  <LogOut className="w-4 h-4 mr-2" />
125
  {t("header.signOut")}
@@ -129,4 +137,4 @@ export default function Header({ sidebarOpen, onToggleSidebar, viewerOpen, onTog
129
  </div>
130
  </header>
131
  );
132
- }
 
22
  Moon,
23
  Sun,
24
  } from "lucide-react";
25
+ import { useSyncExternalStore } from "react";
26
+ import { useTheme } from "next-themes";
27
+ import ApiKeyManager from "@/components/auth/ApiKeyManager";
28
+
29
 
30
  interface HeaderProps {
31
  sidebarOpen: boolean;
 
34
  onToggleViewer: () => void;
35
  }
36
 
37
+ const subscribe = () => () => {};
38
+ const getSnapshot = () => true;
39
+ const getServerSnapshot = () => false;
40
+
41
  export default function Header({ sidebarOpen, onToggleSidebar, viewerOpen, onToggleViewer }: HeaderProps) {
42
  const { user, logout } = useAuth();
43
  const { t, i18n } = useTranslation();
44
  const router = useRouter();
45
+ const { theme, setTheme } = useTheme();
46
+ const mounted = useSyncExternalStore(subscribe, getSnapshot, getServerSnapshot); // ← replaces useState + useEffect
47
 
48
+ const isDark = theme === "dark";
49
+ const toggleTheme = () => setTheme(isDark ? "light" : "dark");
 
 
 
 
 
 
 
 
 
50
 
51
  const handleLogout = () => {
52
  logout();
 
88
  {viewerOpen ? <PanelRightClose className="w-4 h-4" /> : <PanelRightOpen className="w-4 h-4" />}
89
  </Button>
90
 
91
+ {mounted && (
92
+ <Button variant="ghost" size="icon" className="h-8 w-8" onClick={toggleTheme} title={isDark ? t("header.lightMode") : t("header.darkMode")}>
93
+ {isDark ? <Sun className="w-4 h-4" /> : <Moon className="w-4 h-4" />}
94
+ </Button>
95
+ )}
96
 
97
  <select
98
  aria-label={t("common.language")}
 
107
  </select>
108
 
109
  <DropdownMenu>
110
+ <DropdownMenuTrigger
111
+ render={
112
+ <button className="flex items-center h-8 gap-2 px-2 rounded-md hover:bg-accent transition-colors cursor-pointer">
113
+ <Avatar className="w-6 h-6">
114
+ <AvatarFallback className="text-[10px] bg-primary/20 text-primary">
115
+ {user?.username?.slice(0, 2).toUpperCase() || "U"}
116
+ </AvatarFallback>
117
+ </Avatar>
118
+ <span className="text-sm hidden sm:inline">{user?.username}</span>
119
+ </button>
120
+ }
121
+ />
122
+
123
+ <DropdownMenuContent align="end" className="w-56">
124
  <div className="px-3 py-2">
125
  <p className="text-sm font-medium">{user?.username}</p>
126
  <p className="text-xs text-muted-foreground truncate">{user?.email}</p>
127
  </div>
128
  <DropdownMenuSeparator />
129
+ <ApiKeyManager />
130
+ <DropdownMenuSeparator />
131
  <DropdownMenuItem className="text-destructive cursor-pointer" onClick={handleLogout}>
132
  <LogOut className="w-4 h-4 mr-2" />
133
  {t("header.signOut")}
 
137
  </div>
138
  </header>
139
  );
140
+ }
frontend/src/components/layout/ThemeProvider.tsx ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ "use client";
2
+
3
+ import { ThemeProvider as NextThemesProvider } from "next-themes";
4
+ import { type ThemeProviderProps } from "next-themes";
5
+
6
+ export function ThemeProvider({ children, ...props }: ThemeProviderProps) {
7
+ return <NextThemesProvider {...props}>{children}</NextThemesProvider>;
8
+ }
frontend/src/components/layout/ThemeToggle.tsx ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ "use client";
2
+
3
+ import { useTheme } from "next-themes";
4
+ import { useSyncExternalStore } from "react";
5
+ import { Sun, Moon } from "lucide-react";
6
+
7
+ // useSyncExternalStore with identical server/client snapshots = no hydration mismatch
8
+ const subscribe = () => () => {};
9
+ const getSnapshot = () => true;
10
+ const getServerSnapshot = () => false;
11
+
12
+ export function ThemeToggle() {
13
+ const { theme, setTheme } = useTheme();
14
+ const mounted = useSyncExternalStore(subscribe, getSnapshot, getServerSnapshot);
15
+
16
+ if (!mounted) return null;
17
+
18
+ return (
19
+ <button
20
+ onClick={() => setTheme(theme === "dark" ? "light" : "dark")}
21
+ aria-label="Toggle theme"
22
+ className="rounded-md p-2 transition-colors hover:bg-gray-100 dark:hover:bg-gray-800"
23
+ >
24
+ {theme === "dark" ? (
25
+ <Sun className="h-5 w-5 text-yellow-400" />
26
+ ) : (
27
+ <Moon className="h-5 w-5 text-gray-700" />
28
+ )}
29
+ </button>
30
+ );
31
+ }