SynopSync / src /tests /test_summarization.py
Nirmal
file upload
cf71c95
import pytest
import pandas as pd
from langchain.schema import Document
from app.processor import summarize # Adjust this import path as needed
@pytest.fixture
def dummy_pages():
return [
Document(
page_content="LangChain is a framework for developing LLM-based applications.",
metadata={"source": "sample_paper.pdf", "page": 1},
),
Document(
page_content="It provides utilities for prompt management, chains, and agents.",
metadata={"source": "sample_paper.pdf", "page": 2},
),
]
def test_summarize_documents_returns_dataframe(dummy_pages):
summary_df = summarize_documents(dummy_pages)
assert isinstance(summary_df, pd.DataFrame)
assert "file_name" in summary_df.columns
assert "page_number" in summary_df.columns
assert "chunks" in summary_df.columns
assert "concise_summary" in summary_df.columns
assert len(summary_df) == len(dummy_pages)
assert summary_df["page_number"].iloc[0] == 1