|
|
import pytest |
|
|
import pandas as pd |
|
|
from langchain.schema import Document |
|
|
from app.processor import summarize |
|
|
|
|
|
@pytest.fixture |
|
|
def dummy_pages(): |
|
|
return [ |
|
|
Document( |
|
|
page_content="LangChain is a framework for developing LLM-based applications.", |
|
|
metadata={"source": "sample_paper.pdf", "page": 1}, |
|
|
), |
|
|
Document( |
|
|
page_content="It provides utilities for prompt management, chains, and agents.", |
|
|
metadata={"source": "sample_paper.pdf", "page": 2}, |
|
|
), |
|
|
] |
|
|
|
|
|
def test_summarize_documents_returns_dataframe(dummy_pages): |
|
|
summary_df = summarize_documents(dummy_pages) |
|
|
|
|
|
assert isinstance(summary_df, pd.DataFrame) |
|
|
assert "file_name" in summary_df.columns |
|
|
assert "page_number" in summary_df.columns |
|
|
assert "chunks" in summary_df.columns |
|
|
assert "concise_summary" in summary_df.columns |
|
|
assert len(summary_df) == len(dummy_pages) |
|
|
assert summary_df["page_number"].iloc[0] == 1 |
|
|
|