import types from app.models import Document from app.services.document_ingestion import ingest_document def test_api_health(client): response = client.get("/api/health") assert response.status_code == 200 payload = response.json() assert payload["status"] == "healthy" assert payload["version"] == "2.0.0" def test_protected_documents_list_requires_auth(client): response = client.get("/api/v1/documents/") assert response.status_code in (401, 403) def test_documents_list_authenticated(client, auth_headers, ready_document): response = client.get("/api/v1/documents/", headers=auth_headers) assert response.status_code == 200 payload = response.json() assert payload["total"] == 1 assert payload["items"][0]["id"] == ready_document.id assert payload["items"][0]["original_name"] == "ready.txt" def test_upload_rejects_unsupported_extension_before_deep_validation(client, auth_headers): response = client.post( "/api/v1/documents/upload", headers=auth_headers, files={"file": ("payload.exe", b"binary-data", "application/octet-stream")}, ) assert response.status_code == 400 assert "not supported" in response.json()["error"]["message"] def test_rename_document_updates_original_name(client, auth_headers, ready_document, db_session): response = client.patch( f"/api/v1/documents/{ready_document.id}", headers=auth_headers, json={"name": " renamed-report.pdf "}, ) assert response.status_code == 200 payload = response.json() assert payload["id"] == ready_document.id assert payload["original_name"] == "renamed-report.pdf" db_session.refresh(ready_document) assert ready_document.original_name == "renamed-report.pdf" assert ready_document.filename == "ready.txt" def test_rename_document_rejects_empty_name(client, auth_headers, ready_document): response = client.patch( f"/api/v1/documents/{ready_document.id}", headers=auth_headers, json={"name": " "}, ) assert response.status_code == 422 def test_rename_document_returns_404_for_missing_document(client, auth_headers): response = client.patch( "/api/v1/documents/00000000-0000-0000-0000-000000000000", headers=auth_headers, json={"name": "missing.pdf"}, ) assert response.status_code == 404 def test_update_document_preserves_existing_rename_behavior(client, auth_headers, ready_document, db_session): """The new DocumentUpdate schema should still accept 'name' the same way the old DocumentRename schema did — backward compatibility.""" response = client.patch( f"/api/v1/documents/{ready_document.id}", headers=auth_headers, json={"name": " renamed-report.pdf "}, ) assert response.status_code == 200 payload = response.json() assert payload["id"] == ready_document.id assert payload["original_name"] == "renamed-report.pdf" db_session.refresh(ready_document) assert ready_document.original_name == "renamed-report.pdf" assert ready_document.filename == "ready.txt" def test_update_document_sets_summary(client, auth_headers, ready_document, db_session): response = client.patch( f"/api/v1/documents/{ready_document.id}", headers=auth_headers, json={"summary": "This is a test document for RAG evaluation."}, ) assert response.status_code == 200 payload = response.json() assert payload["id"] == ready_document.id assert payload["summary"] == "This is a test document for RAG evaluation." db_session.refresh(ready_document) assert ready_document.summary == "This is a test document for RAG evaluation." def test_update_document_sets_both_name_and_summary(client, auth_headers, ready_document, db_session): response = client.patch( f"/api/v1/documents/{ready_document.id}", headers=auth_headers, json={"name": "full-update.txt", "summary": "Both fields updated."}, ) assert response.status_code == 200 payload = response.json() assert payload["original_name"] == "full-update.txt" assert payload["summary"] == "Both fields updated." db_session.refresh(ready_document) assert ready_document.original_name == "full-update.txt" assert ready_document.summary == "Both fields updated." def test_update_document_clears_summary_with_empty_string(client, auth_headers, ready_document, db_session): # First set a summary ready_document.summary = "Existing summary" db_session.commit() # Then clear it with empty string response = client.patch( f"/api/v1/documents/{ready_document.id}", headers=auth_headers, json={"summary": ""}, ) assert response.status_code == 200 payload = response.json() assert payload["summary"] is None db_session.refresh(ready_document) assert ready_document.summary is None def test_update_document_clears_summary_with_whitespace(client, auth_headers, ready_document, db_session): ready_document.summary = "Existing summary" db_session.commit() response = client.patch( f"/api/v1/documents/{ready_document.id}", headers=auth_headers, json={"summary": " \t "}, ) assert response.status_code == 200 payload = response.json() assert payload["summary"] is None db_session.refresh(ready_document) assert ready_document.summary is None def test_update_document_no_fields_does_nothing(client, auth_headers, ready_document, db_session): """Sending an empty body (or body with no known fields) is a no-op.""" response = client.patch( f"/api/v1/documents/{ready_document.id}", headers=auth_headers, json={}, ) assert response.status_code == 200 db_session.refresh(ready_document) assert ready_document.original_name == "ready.txt" assert ready_document.summary is None def test_rename_document_returns_403_for_other_users_document(client, auth_headers, db_session, other_user): other_document = Document( user_id=other_user.id, filename="other.txt", original_name="other.txt", file_size=64, status="ready", ) db_session.add(other_document) db_session.commit() db_session.refresh(other_document) response = client.patch( f"/api/v1/documents/{other_document.id}", headers=auth_headers, json={"name": "renamed.txt"}, ) assert response.status_code == 403 db_session.refresh(other_document) assert other_document.original_name == "other.txt" def test_ingest_document_builds_and_saves_graph(db_session, monkeypatch, tmp_path, user): document = Document( user_id=user.id, filename="graph.txt", original_name="graph.txt", file_size=128, status="pending", ) db_session.add(document) db_session.commit() db_session.refresh(document) user_id = user.id document_id = document.id chunks = [{"text": "OpenAI works with Microsoft.", "page": 1, "chunk_index": 0}] saved = {} monkeypatch.setattr("app.services.document_ingestion.get_page_count", lambda filepath: 1) monkeypatch.setattr("app.services.document_ingestion.chunk_document", lambda filepath: chunks) monkeypatch.setattr("app.services.document_ingestion.store_chunks", lambda **kwargs: len(chunks)) monkeypatch.setattr("app.database.SessionLocal", lambda: db_session) fake_summary = types.ModuleType("app.rag.summarizer") fake_summary.generate_document_summary = lambda filepath, max_sentences=2: "Summary" monkeypatch.setitem(__import__("sys").modules, "app.rag.summarizer", fake_summary) monkeypatch.setattr( "app.rag.graph_builder.build_graph", lambda received_chunks: {"chunks": received_chunks}, ) monkeypatch.setattr( "app.rag.graph_builder.save_graph", lambda graph, user_id, document_id: saved.update( {"graph": graph, "user_id": user_id, "document_id": document_id} ), ) ingest_document( document_id=document_id, filepath=str(tmp_path / "graph.txt"), original_name=document.original_name, user_id=user_id, ) assert saved == { "graph": {"chunks": chunks}, "user_id": user_id, "document_id": document_id, } refreshed = db_session.get(Document, document_id) assert refreshed.status == "ready" assert refreshed.chunk_count == 1 def test_delete_document_soft_deletes_and_hides_document(client, auth_headers, ready_document, db_session, monkeypatch): deletion_calls = [] doc_id = ready_document.id monkeypatch.setattr( "app.rag.graph_builder.delete_graph", lambda user_id, document_id: deletion_calls.append( {"user_id": user_id, "document_id": document_id} ), ) response = client.delete( f"/api/v1/documents/{doc_id}", headers=auth_headers, ) assert response.status_code == 200 assert deletion_calls == [] db_session.refresh(ready_document) assert ready_document.is_deleted is True assert ready_document.deleted_at is not None list_response = client.get("/api/v1/documents/", headers=auth_headers) assert list_response.status_code == 200 assert list_response.json()["total"] == 0 get_response = client.get(f"/api/v1/documents/{doc_id}", headers=auth_headers) assert get_response.status_code == 404