Spaces:
Sleeping
Sleeping
| from unittest.mock import patch | |
| from ingestion.embedder import chunk_text, embed_and_store_filing, embed_and_store_transcript | |
| def test_chunk_long_text_produces_multiple_chunks(): | |
| text = "word " * 600 | |
| chunks = chunk_text(text, chunk_size=100, overlap=10) | |
| assert len(chunks) > 1 | |
| def test_chunk_short_text_returns_single_chunk(): | |
| assert chunk_text("Short text.") == ["Short text."] | |
| def test_chunk_overlap_repeats_words(): | |
| text = " ".join(str(i) for i in range(200)) | |
| chunks = chunk_text(text, chunk_size=100, overlap=20) | |
| last_word = chunks[0].split()[-1] | |
| assert last_word in chunks[1] | |
| def test_embed_filing_calls_add_chunks_twice(mock_vs): | |
| embed_and_store_filing( | |
| ticker="AAPL", | |
| company_name="Apple Inc.", | |
| mda_text="Revenue grew 5%.", | |
| risk_text="Competition remains a risk.", | |
| filing_date="2024-11-01", | |
| period="Q42024", | |
| form_type="10-Q", | |
| ) | |
| assert mock_vs.add_chunks.call_count == 2 | |
| def test_embed_filing_skips_empty_sections(mock_vs): | |
| embed_and_store_filing( | |
| ticker="AAPL", | |
| company_name="Apple Inc.", | |
| mda_text="Some MD&A text.", | |
| risk_text="", | |
| filing_date="2024-11-01", | |
| period="Q42024", | |
| form_type="10-Q", | |
| ) | |
| assert mock_vs.add_chunks.call_count == 1 | |
| def test_embed_filing_chunk_context_in_metadata(mock_vs): | |
| embed_and_store_filing( | |
| ticker="AAPL", | |
| company_name="Apple Inc.", | |
| mda_text="Revenue grew 5%.", | |
| risk_text="", | |
| filing_date="2024-11-01", | |
| period="Q42024", | |
| form_type="10-Q", | |
| ) | |
| _, kwargs = mock_vs.add_chunks.call_args_list[0] | |
| metadatas = mock_vs.add_chunks.call_args_list[0][0][2] | |
| assert "chunk_context" in metadatas[0] | |
| assert "Apple Inc." in metadatas[0]["chunk_context"] | |
| assert "Q42024" in metadatas[0]["chunk_context"] | |
| def test_embed_transcript(mock_vs): | |
| embed_and_store_transcript( | |
| ticker="AAPL", | |
| company_name="Apple Inc.", | |
| transcript_text="Tim Cook: Record quarter.", | |
| transcript_date="2024-11-01", | |
| period="Q42024", | |
| ) | |
| mock_vs.add_chunks.assert_called_once() | |
| args = mock_vs.add_chunks.call_args | |
| assert args[0][0] == "transcripts" | |
| metadatas = args[0][2] | |
| assert "chunk_context" in metadatas[0] | |
| assert "Q42024" in metadatas[0]["chunk_context"] | |