File size: 3,991 Bytes
625e9e8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
import logging
import os
import sys
from pprint import pprint

# Add the root directory to the Python path to allow for absolute imports
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))

from document_processor import prepare_product_documents, prepare_review_documents

# Configure logging to display info level messages
logging.basicConfig(level=logging.INFO, format='%(levelname)s: %(message)s')

def test_chunking_implementation():
    """
    Tests the document preparation functions for products and reviews,
    verifying their output structure and content as per the plan.
    """
    logging.info("--- Starting Test: Chunking and Document Preparation ---")

    # Define paths to the data files, assuming the script is run from the project root
    # or the test_scripts directory.
    base_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))
    products_file = os.path.join(base_dir, 'products.json')
    reviews_file = os.path.join(base_dir, 'product_reviews.json')
    
    logging.info(f"Using products file: {products_file}")
    logging.info(f"Using reviews file: {reviews_file}")

    # --- Test 1: Product Document Preparation ---
    logging.info("\n[1] Testing Product Document Preparation...")
    product_documents = prepare_product_documents(products_file)

    # Assert that documents were created
    assert product_documents is not None, "prepare_product_documents returned None"
    assert isinstance(product_documents, list), "Expected a list of product documents"
    assert len(product_documents) > 0, "No product documents were created"
    logging.info(f"SUCCESS: Created {len(product_documents)} product documents.")

    # Verify the structure of the first product document
    first_product = product_documents[0]
    expected_keys = ["id", "text_for_embedding", "metadata"]
    assert all(key in first_product for key in expected_keys), \
        f"Product document is missing one of the expected keys: {expected_keys}"
    
    expected_metadata_keys = ["chunk_type", "product_name", "model_number", "category", "brand", "price"]
    assert all(key in first_product["metadata"] for key in expected_metadata_keys), \
        f"Product metadata is missing one of the expected keys: {expected_metadata_keys}"

    logging.info("SUCCESS: First product document has the correct structure.")
    
    # Print the first product for visual inspection
    print("\n--- Sample Product Document ---")
    pprint(first_product)
    print("-----------------------------\n")


    # --- Test 2: Review Document Preparation ---
    logging.info("[2] Testing Review Document Preparation...")
    review_documents = prepare_review_documents(reviews_file, products_file)

    # Assert that documents were created
    assert review_documents is not None, "prepare_review_documents returned None"
    assert isinstance(review_documents, list), "Expected a list of review documents"
    assert len(review_documents) > 0, "No review documents were created"
    logging.info(f"SUCCESS: Created {len(review_documents)} review documents.")

    # Verify the structure of the first review document
    first_review = review_documents[0]
    assert all(key in first_review for key in expected_keys), \
        f"Review document is missing one of the expected keys: {expected_keys}"

    expected_metadata_keys = ["chunk_type", "product_name", "model_number", "category", "brand", "rating"]
    assert all(key in first_review["metadata"] for key in expected_metadata_keys), \
        f"Review metadata is missing one of the expected keys: {expected_metadata_keys}"

    logging.info("SUCCESS: First review document has the correct structure.")

    # Print the first review for visual inspection
    print("\n--- Sample Review Document ---")
    pprint(first_review)
    print("----------------------------\n")
    
    logging.info("--- Test Finished Successfully ---")

if __name__ == "__main__":
    test_chunking_implementation()