Spaces:
Paused
Paused
| import { SourceTracker } from "../source-tracker"; | |
| import { transformArrayToObject } from "../transform-array-to-obj"; | |
| describe("SourceTracker", () => { | |
| let sourceTracker: SourceTracker; | |
| beforeEach(() => { | |
| sourceTracker = new SourceTracker(); | |
| }); | |
| describe("transformResults", () => { | |
| it("should transform and merge results while preserving sources", () => { | |
| const extractionResults = [ | |
| { | |
| extract: { products: [{ name: "Product 1", price: 10 }] }, | |
| url: "http://example1.com" | |
| }, | |
| { | |
| extract: { products: [{ name: "Product 2", price: 20 }] }, | |
| url: "http://example2.com" | |
| } | |
| ]; | |
| const schema = { | |
| type: "object", | |
| properties: { | |
| products: { | |
| type: "array", | |
| items: { | |
| type: "object", | |
| properties: { | |
| name: { type: "string" }, | |
| price: { type: "number" } | |
| } | |
| } | |
| } | |
| } | |
| }; | |
| const result = sourceTracker.transformResults(extractionResults, schema); | |
| expect(result).toEqual({ | |
| products: [ | |
| { name: "Product 1", price: 10 }, | |
| { name: "Product 2", price: 20 } | |
| ] | |
| }); | |
| }); | |
| it("should match original transformArrayToObject behavior", () => { | |
| // Test case 1: Simple array transformation | |
| const schema1 = { | |
| type: "object", | |
| properties: { | |
| items: { | |
| type: "array", | |
| items: { | |
| type: "object", | |
| properties: { | |
| id: { type: "number" } | |
| } | |
| } | |
| } | |
| } | |
| }; | |
| const extractionResults1 = [ | |
| { extract: { items: [{ id: 1 }] }, url: "url1" }, | |
| { extract: { items: [{ id: 2 }] }, url: "url2" } | |
| ]; | |
| const originalResult1 = transformArrayToObject(schema1, extractionResults1.map(r => r.extract)); | |
| const newResult1 = sourceTracker.transformResults(extractionResults1, schema1); | |
| expect(newResult1).toEqual(originalResult1); | |
| // Test case 2: Nested objects with arrays | |
| const schema2 = { | |
| type: "object", | |
| properties: { | |
| data: { | |
| type: "object", | |
| properties: { | |
| products: { | |
| type: "array", | |
| items: { | |
| type: "object", | |
| properties: { | |
| id: { type: "number" }, | |
| variants: { | |
| type: "array", | |
| items: { type: "string" } | |
| } | |
| } | |
| } | |
| } | |
| } | |
| } | |
| } | |
| }; | |
| const extractionResults2 = [ | |
| { | |
| extract: { | |
| data: { | |
| products: [ | |
| { id: 1, variants: ["a", "b"] } | |
| ] | |
| } | |
| }, | |
| url: "url1" | |
| }, | |
| { | |
| extract: { | |
| data: { | |
| products: [ | |
| { id: 2, variants: ["c", "d"] } | |
| ] | |
| } | |
| }, | |
| url: "url2" | |
| } | |
| ]; | |
| const originalResult2 = transformArrayToObject(schema2, extractionResults2.map(r => r.extract)); | |
| const newResult2 = sourceTracker.transformResults(extractionResults2, schema2); | |
| expect(newResult2).toEqual(originalResult2); | |
| // Test case 3: Empty arrays | |
| const emptyResults = []; | |
| const originalResult3 = transformArrayToObject(schema1, emptyResults); | |
| const newResult3 = sourceTracker.transformResults([], schema1); | |
| expect(newResult3).toEqual(originalResult3); | |
| // Test case 4: Non-array properties | |
| const schema4 = { | |
| type: "object", | |
| properties: { | |
| name: { type: "string" }, | |
| count: { type: "number" } | |
| } | |
| }; | |
| const extractionResults4 = [ | |
| { extract: { name: "test1", count: 1 }, url: "url1" }, | |
| { extract: { name: "test2", count: 2 }, url: "url2" } | |
| ]; | |
| const originalResult4 = transformArrayToObject(schema4, extractionResults4.map(r => r.extract)); | |
| const newResult4 = sourceTracker.transformResults(extractionResults4, schema4); | |
| expect(newResult4).toEqual(originalResult4); | |
| }); | |
| }); | |
| describe("mapSourcesToFinalItems", () => { | |
| it("should correctly map sources after deduplication and merging", () => { | |
| // Setup initial data with mergeable items (same name, complementary fields) | |
| const extractionResults = [ | |
| { | |
| extract: { products: [{ name: "Product 1", price: 10, description: null }] }, | |
| url: "http://example1.com" | |
| }, | |
| { | |
| extract: { products: [{ name: "Product 1", price: null, description: "Great product" }] }, | |
| url: "http://example2.com" | |
| } | |
| ]; | |
| const schema = { | |
| type: "object", | |
| properties: { | |
| products: { | |
| type: "array", | |
| items: { | |
| type: "object", | |
| properties: { | |
| name: { type: "string" }, | |
| price: { type: "number" }, | |
| description: { type: "string" } | |
| } | |
| } | |
| } | |
| } | |
| }; | |
| // Transform results first | |
| const multiEntityResult = sourceTracker.transformResults(extractionResults, schema); | |
| sourceTracker.trackPreDeduplicationSources(multiEntityResult); | |
| // Test source mapping with a merged item that matches both sources | |
| const sources = sourceTracker.mapSourcesToFinalItems( | |
| { | |
| products: [ | |
| { name: "Product 1", price: 10, description: "Great product" } | |
| ] | |
| }, | |
| ["products"] | |
| ); | |
| expect(sources).toEqual({ | |
| "products[0]": ["http://example1.com", "http://example2.com"] | |
| }); | |
| }); | |
| it("should handle empty results", () => { | |
| const sources = sourceTracker.mapSourcesToFinalItems({}, []); | |
| expect(sources).toEqual({}); | |
| }); | |
| it("should handle non-array properties", () => { | |
| const sources = sourceTracker.mapSourcesToFinalItems( | |
| { nonArray: "value" } as any, | |
| ["nonArray"] | |
| ); | |
| expect(sources).toEqual({}); | |
| }); | |
| }); | |
| describe("trackPreDeduplicationSources", () => { | |
| it("should track sources before deduplication", () => { | |
| const extractionResults = [ | |
| { | |
| extract: { products: [{ id: 1, name: "Product 1" }] }, | |
| url: "http://example1.com" | |
| }, | |
| { | |
| extract: { products: [{ id: 1, name: "Product 1" }] }, | |
| url: "http://example2.com" | |
| } | |
| ]; | |
| const schema = { | |
| type: "object", | |
| properties: { | |
| products: { | |
| type: "array", | |
| items: { | |
| type: "object", | |
| properties: { | |
| id: { type: "number" }, | |
| name: { type: "string" } | |
| } | |
| } | |
| } | |
| } | |
| }; | |
| const multiEntityResult = sourceTracker.transformResults(extractionResults, schema); | |
| sourceTracker.trackPreDeduplicationSources(multiEntityResult); | |
| // Test source mapping after deduplication | |
| const sources = sourceTracker.mapSourcesToFinalItems( | |
| { | |
| products: [{ id: 1, name: "Product 1" }] | |
| }, | |
| ["products"] | |
| ); | |
| expect(sources).toEqual({ | |
| "products[0]": ["http://example1.com", "http://example2.com"] | |
| }); | |
| }); | |
| }); | |
| }); |