PDF-Assit_RAG / backend /tests /test_documents.py
Param20h's picture
deploy: pure backend API with keywords fix
7c46845 unverified
Raw
History Blame Contribute Delete
9.55 kB
import types
from app.models import Document
from app.services.document_ingestion import ingest_document
def test_api_health(client):
response = client.get("/api/health")
assert response.status_code == 200
payload = response.json()
assert payload["status"] == "healthy"
assert payload["version"] == "2.0.0"
def test_protected_documents_list_requires_auth(client):
response = client.get("/api/v1/documents/")
assert response.status_code in (401, 403)
def test_documents_list_authenticated(client, auth_headers, ready_document):
response = client.get("/api/v1/documents/", headers=auth_headers)
assert response.status_code == 200
payload = response.json()
assert payload["total"] == 1
assert payload["items"][0]["id"] == ready_document.id
assert payload["items"][0]["original_name"] == "ready.txt"
def test_upload_rejects_unsupported_extension_before_deep_validation(client, auth_headers):
response = client.post(
"/api/v1/documents/upload",
headers=auth_headers,
files={"file": ("payload.exe", b"binary-data", "application/octet-stream")},
)
assert response.status_code == 400
assert "not supported" in response.json()["error"]["message"]
def test_rename_document_updates_original_name(client, auth_headers, ready_document, db_session):
response = client.patch(
f"/api/v1/documents/{ready_document.id}",
headers=auth_headers,
json={"name": " renamed-report.pdf "},
)
assert response.status_code == 200
payload = response.json()
assert payload["id"] == ready_document.id
assert payload["original_name"] == "renamed-report.pdf"
db_session.refresh(ready_document)
assert ready_document.original_name == "renamed-report.pdf"
assert ready_document.filename == "ready.txt"
def test_rename_document_rejects_empty_name(client, auth_headers, ready_document):
response = client.patch(
f"/api/v1/documents/{ready_document.id}",
headers=auth_headers,
json={"name": " "},
)
assert response.status_code == 422
def test_rename_document_returns_404_for_missing_document(client, auth_headers):
response = client.patch(
"/api/v1/documents/00000000-0000-0000-0000-000000000000",
headers=auth_headers,
json={"name": "missing.pdf"},
)
assert response.status_code == 404
def test_update_document_preserves_existing_rename_behavior(client, auth_headers, ready_document, db_session):
"""The new DocumentUpdate schema should still accept 'name' the same way
the old DocumentRename schema did — backward compatibility."""
response = client.patch(
f"/api/v1/documents/{ready_document.id}",
headers=auth_headers,
json={"name": " renamed-report.pdf "},
)
assert response.status_code == 200
payload = response.json()
assert payload["id"] == ready_document.id
assert payload["original_name"] == "renamed-report.pdf"
db_session.refresh(ready_document)
assert ready_document.original_name == "renamed-report.pdf"
assert ready_document.filename == "ready.txt"
def test_update_document_sets_summary(client, auth_headers, ready_document, db_session):
response = client.patch(
f"/api/v1/documents/{ready_document.id}",
headers=auth_headers,
json={"summary": "This is a test document for RAG evaluation."},
)
assert response.status_code == 200
payload = response.json()
assert payload["id"] == ready_document.id
assert payload["summary"] == "This is a test document for RAG evaluation."
db_session.refresh(ready_document)
assert ready_document.summary == "This is a test document for RAG evaluation."
def test_update_document_sets_both_name_and_summary(client, auth_headers, ready_document, db_session):
response = client.patch(
f"/api/v1/documents/{ready_document.id}",
headers=auth_headers,
json={"name": "full-update.txt", "summary": "Both fields updated."},
)
assert response.status_code == 200
payload = response.json()
assert payload["original_name"] == "full-update.txt"
assert payload["summary"] == "Both fields updated."
db_session.refresh(ready_document)
assert ready_document.original_name == "full-update.txt"
assert ready_document.summary == "Both fields updated."
def test_update_document_clears_summary_with_empty_string(client, auth_headers, ready_document, db_session):
# First set a summary
ready_document.summary = "Existing summary"
db_session.commit()
# Then clear it with empty string
response = client.patch(
f"/api/v1/documents/{ready_document.id}",
headers=auth_headers,
json={"summary": ""},
)
assert response.status_code == 200
payload = response.json()
assert payload["summary"] is None
db_session.refresh(ready_document)
assert ready_document.summary is None
def test_update_document_clears_summary_with_whitespace(client, auth_headers, ready_document, db_session):
ready_document.summary = "Existing summary"
db_session.commit()
response = client.patch(
f"/api/v1/documents/{ready_document.id}",
headers=auth_headers,
json={"summary": " \t "},
)
assert response.status_code == 200
payload = response.json()
assert payload["summary"] is None
db_session.refresh(ready_document)
assert ready_document.summary is None
def test_update_document_no_fields_does_nothing(client, auth_headers, ready_document, db_session):
"""Sending an empty body (or body with no known fields) is a no-op."""
response = client.patch(
f"/api/v1/documents/{ready_document.id}",
headers=auth_headers,
json={},
)
assert response.status_code == 200
db_session.refresh(ready_document)
assert ready_document.original_name == "ready.txt"
assert ready_document.summary is None
def test_rename_document_returns_403_for_other_users_document(client, auth_headers, db_session, other_user):
other_document = Document(
user_id=other_user.id,
filename="other.txt",
original_name="other.txt",
file_size=64,
status="ready",
)
db_session.add(other_document)
db_session.commit()
db_session.refresh(other_document)
response = client.patch(
f"/api/v1/documents/{other_document.id}",
headers=auth_headers,
json={"name": "renamed.txt"},
)
assert response.status_code == 403
db_session.refresh(other_document)
assert other_document.original_name == "other.txt"
def test_ingest_document_builds_and_saves_graph(db_session, monkeypatch, tmp_path, user):
document = Document(
user_id=user.id,
filename="graph.txt",
original_name="graph.txt",
file_size=128,
status="pending",
)
db_session.add(document)
db_session.commit()
db_session.refresh(document)
user_id = user.id
document_id = document.id
chunks = [{"text": "OpenAI works with Microsoft.", "page": 1, "chunk_index": 0}]
saved = {}
monkeypatch.setattr("app.services.document_ingestion.get_page_count", lambda filepath: 1)
monkeypatch.setattr("app.services.document_ingestion.chunk_document", lambda filepath: chunks)
monkeypatch.setattr("app.services.document_ingestion.store_chunks", lambda **kwargs: len(chunks))
monkeypatch.setattr("app.database.SessionLocal", lambda: db_session)
fake_summary = types.ModuleType("app.rag.summarizer")
fake_summary.generate_document_summary = lambda filepath, max_sentences=2: "Summary"
monkeypatch.setitem(__import__("sys").modules, "app.rag.summarizer", fake_summary)
monkeypatch.setattr(
"app.rag.graph_builder.build_graph",
lambda received_chunks: {"chunks": received_chunks},
)
monkeypatch.setattr(
"app.rag.graph_builder.save_graph",
lambda graph, user_id, document_id: saved.update(
{"graph": graph, "user_id": user_id, "document_id": document_id}
),
)
ingest_document(
document_id=document_id,
filepath=str(tmp_path / "graph.txt"),
original_name=document.original_name,
user_id=user_id,
)
assert saved == {
"graph": {"chunks": chunks},
"user_id": user_id,
"document_id": document_id,
}
refreshed = db_session.get(Document, document_id)
assert refreshed.status == "ready"
assert refreshed.chunk_count == 1
def test_delete_document_soft_deletes_and_hides_document(client, auth_headers, ready_document, db_session, monkeypatch):
deletion_calls = []
doc_id = ready_document.id
monkeypatch.setattr(
"app.rag.graph_builder.delete_graph",
lambda user_id, document_id: deletion_calls.append(
{"user_id": user_id, "document_id": document_id}
),
)
response = client.delete(
f"/api/v1/documents/{doc_id}",
headers=auth_headers,
)
assert response.status_code == 200
assert deletion_calls == []
db_session.refresh(ready_document)
assert ready_document.is_deleted is True
assert ready_document.deleted_at is not None
list_response = client.get("/api/v1/documents/", headers=auth_headers)
assert list_response.status_code == 200
assert list_response.json()["total"] == 0
get_response = client.get(f"/api/v1/documents/{doc_id}", headers=auth_headers)
assert get_response.status_code == 404