| import { parseISO } from "date-fns"; |
| import type { z } from "zod/v4"; |
| import type { invoiceSchema, receiptSchema } from "../schema"; |
| import { |
| calculateQualityScore, |
| calculateReceiptQualityScore, |
| } from "./validation"; |
|
|
| type InvoiceData = z.infer<typeof invoiceSchema>; |
| type ReceiptData = z.infer<typeof receiptSchema>; |
|
|
| |
| |
| |
| |
| export function calculateExtractionConfidence( |
| result: InvoiceData, |
| qualityScore: { score: number; missingCriticalFields: string[] }, |
| ): number { |
| |
| let confidence = qualityScore.score / 100; |
|
|
| |
| if (qualityScore.missingCriticalFields.length === 0) { |
| confidence = Math.min(1.0, confidence + 0.1); |
| } |
|
|
| |
| confidence -= qualityScore.missingCriticalFields.length * 0.05; |
|
|
| |
| if (result.vendor_name && result.vendor_name.length > 5) { |
| confidence = Math.min(1.0, confidence + 0.05); |
| } |
|
|
| |
| if (result.invoice_number) { |
| confidence = Math.min(1.0, confidence + 0.05); |
| } |
|
|
| return Math.max(0, Math.min(1, confidence)); |
| } |
|
|
| |
| |
| |
| export function calculateReceiptExtractionConfidence( |
| result: ReceiptData, |
| qualityScore: { score: number; missingCriticalFields: string[] }, |
| ): number { |
| |
| let confidence = qualityScore.score / 100; |
|
|
| |
| if (qualityScore.missingCriticalFields.length === 0) { |
| confidence = Math.min(1.0, confidence + 0.1); |
| } |
|
|
| |
| confidence -= qualityScore.missingCriticalFields.length * 0.05; |
|
|
| |
| if (result.store_name && result.store_name.length > 3) { |
| confidence = Math.min(1.0, confidence + 0.05); |
| } |
|
|
| return Math.max(0, Math.min(1, confidence)); |
| } |
|
|
| |
| |
| |
| const invoiceMergeRules: Record<string, (primary: any, secondary: any) => any> = |
| { |
| |
| vendor_name: (primary, secondary) => { |
| if (!primary && !secondary) return null; |
| if (!primary) return secondary; |
| if (!secondary) return primary; |
| |
| return primary.length >= secondary.length ? primary : secondary; |
| }, |
|
|
| |
| invoice_number: (primary, secondary) => { |
| if (!primary && !secondary) return null; |
| if (!primary) return secondary; |
| if (!secondary) return primary; |
| |
| return primary.length >= secondary.length ? primary : secondary; |
| }, |
|
|
| |
| total_amount: (primary, secondary) => { |
| if (primary !== null && primary !== undefined) return primary; |
| return secondary !== null && secondary !== undefined ? secondary : 0; |
| }, |
|
|
| |
| currency: (primary, secondary) => { |
| if (primary && primary !== "USD") return primary; |
| if (secondary && secondary !== "USD") return secondary; |
| return primary || secondary || "USD"; |
| }, |
|
|
| |
| invoice_date: (primary, secondary) => { |
| if (!primary && !secondary) return null; |
| if (!primary) return secondary; |
| if (!secondary) return primary; |
| |
| const primaryDate = parseISO(primary); |
| const secondaryDate = parseISO(secondary); |
| return primaryDate >= secondaryDate ? primary : secondary; |
| }, |
|
|
| |
| email: (primary, secondary) => { |
| if (!primary && !secondary) return null; |
| if (!primary) return secondary; |
| if (!secondary) return primary; |
| |
| if (primary.includes("@") && !secondary.includes("@")) return primary; |
| if (secondary.includes("@") && !primary.includes("@")) return secondary; |
| return primary; |
| }, |
|
|
| |
| website: (primary, secondary) => { |
| if (!primary && !secondary) return null; |
| if (!primary) return secondary; |
| if (!secondary) return primary; |
| |
| const primaryClean = primary.replace(/^www\./, ""); |
| const secondaryClean = secondary.replace(/^www\./, ""); |
| return primaryClean.length <= secondaryClean.length ? primary : secondary; |
| }, |
| }; |
|
|
| |
| |
| |
| const receiptMergeRules: Record<string, (primary: any, secondary: any) => any> = |
| { |
| |
| store_name: (primary, secondary) => { |
| if (!primary && !secondary) return null; |
| if (!primary) return secondary; |
| if (!secondary) return primary; |
| return primary.length >= secondary.length ? primary : secondary; |
| }, |
|
|
| |
| total_amount: (primary, secondary) => { |
| if (primary !== null && primary !== undefined) return primary; |
| return secondary !== null && secondary !== undefined ? secondary : 0; |
| }, |
|
|
| |
| currency: (primary, secondary) => { |
| if (primary && primary !== "USD") return primary; |
| if (secondary && secondary !== "USD") return secondary; |
| return primary || secondary || "USD"; |
| }, |
|
|
| |
| date: (primary, secondary) => { |
| if (!primary && !secondary) return null; |
| if (!primary) return secondary; |
| if (!secondary) return primary; |
| const primaryDate = parseISO(primary); |
| const secondaryDate = parseISO(secondary); |
| return primaryDate >= secondaryDate ? primary : secondary; |
| }, |
| }; |
|
|
| |
| |
| |
| export function mergeInvoiceResults( |
| primary: InvoiceData, |
| secondary: Partial<InvoiceData>, |
| primaryConfidence?: number, |
| secondaryConfidence?: number, |
| ): InvoiceData { |
| const merged: any = { ...primary }; |
|
|
| |
| const primaryConf = |
| primaryConfidence ?? |
| calculateExtractionConfidence(primary, calculateQualityScore(primary)); |
| const secondaryConf = |
| secondaryConfidence ?? |
| ((secondary as InvoiceData) |
| ? calculateExtractionConfidence( |
| secondary as InvoiceData, |
| calculateQualityScore(secondary as InvoiceData), |
| ) |
| : 0.5); |
|
|
| for (const [field, secondaryValue] of Object.entries(secondary)) { |
| if (secondaryValue === null || secondaryValue === undefined) { |
| continue; |
| } |
|
|
| const primaryValue = (primary as any)[field]; |
|
|
| |
| if ( |
| primaryValue !== null && |
| primaryValue !== undefined && |
| secondaryValue !== null && |
| secondaryValue !== undefined |
| ) { |
| |
| if (Math.abs(primaryConf - secondaryConf) < 0.1) { |
| if (invoiceMergeRules[field]) { |
| merged[field] = invoiceMergeRules[field]( |
| primaryValue, |
| secondaryValue, |
| ); |
| } else { |
| |
| merged[field] = primaryValue; |
| } |
| } else { |
| |
| merged[field] = |
| primaryConf >= secondaryConf ? primaryValue : secondaryValue; |
| } |
| } else { |
| |
| merged[field] = primaryValue ?? secondaryValue; |
| } |
| } |
|
|
| return merged as InvoiceData; |
| } |
|
|
| |
| |
| |
| export function mergeReceiptResults( |
| primary: ReceiptData, |
| secondary: Partial<ReceiptData>, |
| primaryConfidence?: number, |
| secondaryConfidence?: number, |
| ): ReceiptData { |
| const merged: any = { ...primary }; |
|
|
| |
| const primaryConf = |
| primaryConfidence ?? |
| calculateReceiptExtractionConfidence( |
| primary, |
| calculateReceiptQualityScore(primary), |
| ); |
| const secondaryConf = |
| secondaryConfidence ?? |
| ((secondary as ReceiptData) |
| ? calculateReceiptExtractionConfidence( |
| secondary as ReceiptData, |
| calculateReceiptQualityScore(secondary as ReceiptData), |
| ) |
| : 0.5); |
|
|
| for (const [field, secondaryValue] of Object.entries(secondary)) { |
| if (secondaryValue === null || secondaryValue === undefined) { |
| continue; |
| } |
|
|
| const primaryValue = (primary as any)[field]; |
|
|
| |
| if ( |
| primaryValue !== null && |
| primaryValue !== undefined && |
| secondaryValue !== null && |
| secondaryValue !== undefined |
| ) { |
| |
| if (Math.abs(primaryConf - secondaryConf) < 0.1) { |
| if (receiptMergeRules[field]) { |
| merged[field] = receiptMergeRules[field]( |
| primaryValue, |
| secondaryValue, |
| ); |
| } else { |
| |
| merged[field] = primaryValue; |
| } |
| } else { |
| |
| merged[field] = |
| primaryConf >= secondaryConf ? primaryValue : secondaryValue; |
| } |
| } else { |
| |
| merged[field] = primaryValue ?? secondaryValue; |
| } |
| } |
|
|
| return merged as ReceiptData; |
| } |
|
|