|
|
import logging |
|
|
import os |
|
|
import sys |
|
|
from pprint import pprint |
|
|
|
|
|
|
|
|
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))) |
|
|
|
|
|
from document_processor import prepare_product_documents, prepare_review_documents |
|
|
|
|
|
|
|
|
logging.basicConfig(level=logging.INFO, format='%(levelname)s: %(message)s') |
|
|
|
|
|
def test_chunking_implementation(): |
|
|
""" |
|
|
Tests the document preparation functions for products and reviews, |
|
|
verifying their output structure and content as per the plan. |
|
|
""" |
|
|
logging.info("--- Starting Test: Chunking and Document Preparation ---") |
|
|
|
|
|
|
|
|
|
|
|
base_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), '..')) |
|
|
products_file = os.path.join(base_dir, 'products.json') |
|
|
reviews_file = os.path.join(base_dir, 'product_reviews.json') |
|
|
|
|
|
logging.info(f"Using products file: {products_file}") |
|
|
logging.info(f"Using reviews file: {reviews_file}") |
|
|
|
|
|
|
|
|
logging.info("\n[1] Testing Product Document Preparation...") |
|
|
product_documents = prepare_product_documents(products_file) |
|
|
|
|
|
|
|
|
assert product_documents is not None, "prepare_product_documents returned None" |
|
|
assert isinstance(product_documents, list), "Expected a list of product documents" |
|
|
assert len(product_documents) > 0, "No product documents were created" |
|
|
logging.info(f"SUCCESS: Created {len(product_documents)} product documents.") |
|
|
|
|
|
|
|
|
first_product = product_documents[0] |
|
|
expected_keys = ["id", "text_for_embedding", "metadata"] |
|
|
assert all(key in first_product for key in expected_keys), \ |
|
|
f"Product document is missing one of the expected keys: {expected_keys}" |
|
|
|
|
|
expected_metadata_keys = ["chunk_type", "product_name", "model_number", "category", "brand", "price"] |
|
|
assert all(key in first_product["metadata"] for key in expected_metadata_keys), \ |
|
|
f"Product metadata is missing one of the expected keys: {expected_metadata_keys}" |
|
|
|
|
|
logging.info("SUCCESS: First product document has the correct structure.") |
|
|
|
|
|
|
|
|
print("\n--- Sample Product Document ---") |
|
|
pprint(first_product) |
|
|
print("-----------------------------\n") |
|
|
|
|
|
|
|
|
|
|
|
logging.info("[2] Testing Review Document Preparation...") |
|
|
review_documents = prepare_review_documents(reviews_file, products_file) |
|
|
|
|
|
|
|
|
assert review_documents is not None, "prepare_review_documents returned None" |
|
|
assert isinstance(review_documents, list), "Expected a list of review documents" |
|
|
assert len(review_documents) > 0, "No review documents were created" |
|
|
logging.info(f"SUCCESS: Created {len(review_documents)} review documents.") |
|
|
|
|
|
|
|
|
first_review = review_documents[0] |
|
|
assert all(key in first_review for key in expected_keys), \ |
|
|
f"Review document is missing one of the expected keys: {expected_keys}" |
|
|
|
|
|
expected_metadata_keys = ["chunk_type", "product_name", "model_number", "category", "brand", "rating"] |
|
|
assert all(key in first_review["metadata"] for key in expected_metadata_keys), \ |
|
|
f"Review metadata is missing one of the expected keys: {expected_metadata_keys}" |
|
|
|
|
|
logging.info("SUCCESS: First review document has the correct structure.") |
|
|
|
|
|
|
|
|
print("\n--- Sample Review Document ---") |
|
|
pprint(first_review) |
|
|
print("----------------------------\n") |
|
|
|
|
|
logging.info("--- Test Finished Successfully ---") |
|
|
|
|
|
if __name__ == "__main__": |
|
|
test_chunking_implementation() |
|
|
|