Paramjit Singh commited on
Commit
2b2b242
·
unverified ·
2 Parent(s): 3a10c696a0e5f9

Merge pull request #174 from Jiya3177/test/backend-rag-pipeline-121

Browse files
.github/workflows/ci.yml CHANGED
@@ -54,6 +54,15 @@ jobs:
54
  run: |
55
  python -c "import sys; sys.path.insert(0, 'backend'); from app.config import settings; print('✅ Config imports OK')" || true
56
 
 
 
 
 
 
 
 
 
 
57
  # ── 2. Frontend Build Check ─────────────────────────────
58
  frontend-check:
59
  name: ⚛️ Frontend — TypeScript & Build
 
54
  run: |
55
  python -c "import sys; sys.path.insert(0, 'backend'); from app.config import settings; print('✅ Config imports OK')" || true
56
 
57
+ - name: Run backend pytest suite
58
+ env:
59
+ SECRET_KEY: ci-dummy-secret
60
+ DATABASE_URL: sqlite:///./ci_test.db
61
+ HF_TOKEN: ci-dummy-token
62
+ UPLOAD_DIR: /tmp/uploads
63
+ CHROMA_PERSIST_DIR: /tmp/chroma
64
+ run: pytest backend/tests -v
65
+
66
  # ── 2. Frontend Build Check ─────────────────────────────
67
  frontend-check:
68
  name: ⚛️ Frontend — TypeScript & Build
backend/requirements.txt CHANGED
@@ -18,6 +18,9 @@ google-auth
18
  # Config
19
  pydantic-settings
20
  pydantic[email]
 
 
 
21
 
22
  # Document Processing
23
  PyMuPDF
 
18
  # Config
19
  pydantic-settings
20
  pydantic[email]
21
+ pytest
22
+ pytest-cov
23
+ httpx
24
 
25
  # Document Processing
26
  PyMuPDF
backend/tests/conftest.py ADDED
@@ -0,0 +1,183 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import sys
3
+ import types
4
+ from contextlib import asynccontextmanager
5
+ from pathlib import Path
6
+
7
+ import pytest
8
+ from fastapi.testclient import TestClient
9
+ from sqlalchemy import create_engine
10
+ from sqlalchemy.orm import sessionmaker
11
+
12
+
13
+ ROOT = Path(__file__).resolve().parents[2]
14
+ BACKEND_DIR = ROOT / "backend"
15
+
16
+ if str(BACKEND_DIR) not in sys.path:
17
+ sys.path.insert(0, str(BACKEND_DIR))
18
+
19
+ os.environ.setdefault("SECRET_KEY", "test-secret-key")
20
+ os.environ.setdefault("DATABASE_URL", "sqlite:///./test_bootstrap.db")
21
+ os.environ.setdefault("HF_TOKEN", "test-hf-token")
22
+ os.environ.setdefault("UPLOAD_DIR", str(ROOT / "backend" / "test_uploads"))
23
+ os.environ.setdefault("CHROMA_PERSIST_DIR", str(ROOT / "backend" / "test_chroma"))
24
+
25
+
26
+ fake_embeddings = types.ModuleType("app.rag.embeddings")
27
+ fake_embeddings.get_embedding_model = lambda: object()
28
+ fake_embeddings.embed_query = lambda query: [0.0]
29
+ fake_embeddings.embed_texts = lambda texts: [[0.0] for _ in texts]
30
+ sys.modules.setdefault("app.rag.embeddings", fake_embeddings)
31
+
32
+
33
+ class _FakeChromaClient:
34
+ def heartbeat(self):
35
+ return "ok"
36
+
37
+
38
+ fake_vectorstore = types.ModuleType("app.rag.vectorstore")
39
+ fake_vectorstore.get_chroma_client = lambda: _FakeChromaClient()
40
+ fake_vectorstore.store_chunks = lambda chunks, document_id, filename, user_id: len(chunks)
41
+ fake_vectorstore.delete_document_chunks = lambda document_id, user_id: None
42
+ fake_vectorstore.query_chunks = lambda query_embedding, user_id, document_id=None, top_k=10: []
43
+ sys.modules.setdefault("app.rag.vectorstore", fake_vectorstore)
44
+
45
+ slowapi_module = types.ModuleType("slowapi")
46
+ slowapi_errors = types.ModuleType("slowapi.errors")
47
+ slowapi_middleware = types.ModuleType("slowapi.middleware")
48
+ slowapi_util = types.ModuleType("slowapi.util")
49
+
50
+
51
+ class RateLimitExceeded(Exception):
52
+ pass
53
+
54
+
55
+ class SlowAPIMiddleware:
56
+ def __init__(self, app, *args, **kwargs):
57
+ self.app = app
58
+
59
+ async def __call__(self, scope, receive, send):
60
+ await self.app(scope, receive, send)
61
+
62
+
63
+ class Limiter:
64
+ def __init__(self, key_func=None, *args, **kwargs):
65
+ self.key_func = key_func
66
+
67
+ def limit(self, _value):
68
+ def decorator(fn):
69
+ return fn
70
+ return decorator
71
+
72
+
73
+ slowapi_errors.RateLimitExceeded = RateLimitExceeded
74
+ slowapi_middleware.SlowAPIMiddleware = SlowAPIMiddleware
75
+ slowapi_util.get_remote_address = lambda request: "127.0.0.1"
76
+ slowapi_module.Limiter = Limiter
77
+
78
+ sys.modules.setdefault("slowapi", slowapi_module)
79
+ sys.modules.setdefault("slowapi.errors", slowapi_errors)
80
+ sys.modules.setdefault("slowapi.middleware", slowapi_middleware)
81
+ sys.modules.setdefault("slowapi.util", slowapi_util)
82
+
83
+ from app.auth import create_access_token, create_refresh_token, hash_password
84
+ from app.database import Base, get_db
85
+ from app.main import app
86
+ from app.models import Document, User
87
+
88
+
89
+ @pytest.fixture()
90
+ def db_session(tmp_path):
91
+ db_file = tmp_path / "test.db"
92
+ engine = create_engine(
93
+ f"sqlite:///{db_file}",
94
+ connect_args={"check_same_thread": False},
95
+ )
96
+ TestingSessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine)
97
+ Base.metadata.create_all(bind=engine)
98
+
99
+ session = TestingSessionLocal()
100
+ try:
101
+ yield session
102
+ finally:
103
+ session.close()
104
+ Base.metadata.drop_all(bind=engine)
105
+ engine.dispose()
106
+
107
+
108
+ @pytest.fixture()
109
+ def client(db_session, monkeypatch):
110
+ def override_get_db():
111
+ try:
112
+ yield db_session
113
+ finally:
114
+ pass
115
+
116
+ @asynccontextmanager
117
+ async def no_lifespan(_app):
118
+ yield
119
+
120
+ monkeypatch.setattr("app.database.SessionLocal", lambda: db_session)
121
+ app.dependency_overrides[get_db] = override_get_db
122
+ app.router.lifespan_context = no_lifespan
123
+
124
+ with TestClient(app) as test_client:
125
+ yield test_client
126
+
127
+ app.dependency_overrides.clear()
128
+
129
+
130
+ @pytest.fixture()
131
+ def user(db_session):
132
+ instance = User(
133
+ username="tester",
134
+ email="tester@example.com",
135
+ hashed_password=hash_password("password123"),
136
+ )
137
+ db_session.add(instance)
138
+ db_session.commit()
139
+ db_session.refresh(instance)
140
+ return instance
141
+
142
+
143
+ @pytest.fixture()
144
+ def auth_headers(user):
145
+ token = create_access_token(user.id)
146
+ return {"Authorization": f"Bearer {token}"}
147
+
148
+
149
+ @pytest.fixture()
150
+ def refresh_token(user):
151
+ return create_refresh_token(user.id)
152
+
153
+
154
+ @pytest.fixture()
155
+ def ready_document(db_session, user):
156
+ instance = Document(
157
+ user_id=user.id,
158
+ filename="ready.txt",
159
+ original_name="ready.txt",
160
+ file_size=128,
161
+ page_count=1,
162
+ chunk_count=2,
163
+ status="ready",
164
+ )
165
+ db_session.add(instance)
166
+ db_session.commit()
167
+ db_session.refresh(instance)
168
+ return instance
169
+
170
+
171
+ @pytest.fixture()
172
+ def pending_document(db_session, user):
173
+ instance = Document(
174
+ user_id=user.id,
175
+ filename="pending.txt",
176
+ original_name="pending.txt",
177
+ file_size=64,
178
+ status="pending",
179
+ )
180
+ db_session.add(instance)
181
+ db_session.commit()
182
+ db_session.refresh(instance)
183
+ return instance
backend/tests/test_auth.py ADDED
@@ -0,0 +1,81 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ def test_register_success(client):
2
+ response = client.post(
3
+ "/api/v1/auth/register",
4
+ json={
5
+ "username": "newuser",
6
+ "email": "newuser@example.com",
7
+ "password": "password123",
8
+ },
9
+ )
10
+
11
+ assert response.status_code == 201
12
+ payload = response.json()
13
+ assert payload["access_token"]
14
+ assert payload["refresh_token"]
15
+ assert payload["user"]["email"] == "newuser@example.com"
16
+
17
+
18
+ def test_register_duplicate_email_or_username_conflict(client):
19
+ payload = {
20
+ "username": "dupuser",
21
+ "email": "dup@example.com",
22
+ "password": "password123",
23
+ }
24
+ first = client.post("/api/v1/auth/register", json=payload)
25
+ assert first.status_code == 201
26
+
27
+ duplicate_email = client.post(
28
+ "/api/v1/auth/register",
29
+ json={**payload, "username": "anotheruser"},
30
+ )
31
+ assert duplicate_email.status_code == 409
32
+ assert duplicate_email.json()["detail"] == "Email already registered"
33
+
34
+ duplicate_username = client.post(
35
+ "/api/v1/auth/register",
36
+ json={**payload, "email": "another@example.com"},
37
+ )
38
+ assert duplicate_username.status_code == 409
39
+ assert duplicate_username.json()["detail"] == "Username already taken"
40
+
41
+
42
+ def test_login_success(client, user):
43
+ response = client.post(
44
+ "/api/v1/auth/login",
45
+ json={"email": user.email, "password": "password123"},
46
+ )
47
+
48
+ assert response.status_code == 200
49
+ payload = response.json()
50
+ assert payload["access_token"]
51
+ assert payload["refresh_token"]
52
+ assert payload["user"]["username"] == user.username
53
+
54
+
55
+ def test_login_invalid_password(client, user):
56
+ response = client.post(
57
+ "/api/v1/auth/login",
58
+ json={"email": user.email, "password": "wrong-password"},
59
+ )
60
+
61
+ assert response.status_code == 401
62
+ assert response.json()["detail"] == "Invalid email or password"
63
+
64
+
65
+ def test_auth_me_requires_auth(client):
66
+ response = client.get("/api/v1/auth/me")
67
+
68
+ assert response.status_code in (401, 403)
69
+
70
+
71
+ def test_refresh_token_success(client, refresh_token):
72
+ response = client.post(
73
+ "/api/v1/auth/refresh",
74
+ json={"refresh_token": refresh_token},
75
+ )
76
+
77
+ assert response.status_code == 200
78
+ payload = response.json()
79
+ assert payload["access_token"]
80
+ assert payload["refresh_token"]
81
+ assert payload["token_type"] == "bearer"
backend/tests/test_chat.py ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ def test_chat_ask_success(client, auth_headers, ready_document, monkeypatch):
2
+ monkeypatch.setattr(
3
+ "app.routes.chat.generate_answer",
4
+ lambda question, user_id, document_id=None: {
5
+ "answer": "Mocked answer",
6
+ "sources": [
7
+ {
8
+ "text": "Mock source",
9
+ "filename": "ready.txt",
10
+ "page": 1,
11
+ "score": 0.99,
12
+ "confidence": 99.0,
13
+ }
14
+ ],
15
+ },
16
+ )
17
+
18
+ response = client.post(
19
+ "/api/v1/chat/ask",
20
+ headers=auth_headers,
21
+ json={"question": "What is in the doc?", "document_id": ready_document.id},
22
+ )
23
+
24
+ assert response.status_code == 200
25
+ payload = response.json()
26
+ assert payload["answer"] == "Mocked answer"
27
+ assert payload["document_id"] == ready_document.id
28
+ assert payload["sources"][0]["filename"] == "ready.txt"
29
+
30
+
31
+ def test_chat_ask_document_not_found(client, auth_headers):
32
+ response = client.post(
33
+ "/api/v1/chat/ask",
34
+ headers=auth_headers,
35
+ json={"question": "Missing doc?", "document_id": "missing-doc-id"},
36
+ )
37
+
38
+ assert response.status_code == 404
39
+ assert response.json()["detail"] == "Document not found"
40
+
41
+
42
+ def test_chat_ask_document_not_ready(client, auth_headers, pending_document):
43
+ response = client.post(
44
+ "/api/v1/chat/ask",
45
+ headers=auth_headers,
46
+ json={"question": "Pending doc?", "document_id": pending_document.id},
47
+ )
48
+
49
+ assert response.status_code == 400
50
+ assert "Document is still pending" in response.json()["detail"]
backend/tests/test_chunker.py ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pathlib import Path
2
+
3
+ import pytest
4
+
5
+ from app.rag.chunker import chunk_document, get_page_count
6
+
7
+
8
+ def test_txt_extraction_and_chunking(tmp_path):
9
+ file_path = tmp_path / "notes.txt"
10
+ file_path.write_text("This is a sample text file for chunking.", encoding="utf-8")
11
+
12
+ chunks = chunk_document(str(file_path))
13
+
14
+ assert len(chunks) >= 1
15
+ assert chunks[0]["page"] == 1
16
+ assert "sample text file" in chunks[0]["text"]
17
+
18
+
19
+ def test_empty_txt_returns_no_chunks(tmp_path):
20
+ file_path = tmp_path / "empty.txt"
21
+ file_path.write_text(" \n", encoding="utf-8")
22
+
23
+ assert chunk_document(str(file_path)) == []
24
+
25
+
26
+ def test_unsupported_extension_raises_value_error(tmp_path):
27
+ file_path = tmp_path / "data.csv"
28
+ file_path.write_text("a,b,c", encoding="utf-8")
29
+
30
+ with pytest.raises(ValueError, match="Unsupported file type"):
31
+ chunk_document(str(file_path))
32
+
33
+
34
+ def test_get_page_count_for_txt_returns_one(tmp_path):
35
+ file_path = tmp_path / "single.txt"
36
+ file_path.write_text("hello", encoding="utf-8")
37
+
38
+ assert get_page_count(str(file_path)) == 1
backend/tests/test_documents.py ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ def test_api_health(client):
2
+ response = client.get("/api/health")
3
+
4
+ assert response.status_code == 200
5
+ payload = response.json()
6
+ assert payload["status"] == "healthy"
7
+ assert payload["version"] == "2.0.0"
8
+
9
+
10
+ def test_protected_documents_list_requires_auth(client):
11
+ response = client.get("/api/v1/documents/")
12
+
13
+ assert response.status_code in (401, 403)
14
+
15
+
16
+ def test_documents_list_authenticated(client, auth_headers, ready_document):
17
+ response = client.get("/api/v1/documents/", headers=auth_headers)
18
+
19
+ assert response.status_code == 200
20
+ payload = response.json()
21
+ assert payload["total"] == 1
22
+ assert payload["items"][0]["id"] == ready_document.id
23
+ assert payload["items"][0]["original_name"] == "ready.txt"
24
+
25
+
26
+ def test_upload_rejects_unsupported_extension_before_deep_validation(client, auth_headers):
27
+ response = client.post(
28
+ "/api/v1/documents/upload",
29
+ headers=auth_headers,
30
+ files={"file": ("payload.exe", b"binary-data", "application/octet-stream")},
31
+ )
32
+
33
+ assert response.status_code == 400
34
+ assert "not supported" in response.json()["detail"]