| import { parseISO } from "date-fns"; |
| import type { z } from "zod/v4"; |
| import type { invoiceSchema, receiptSchema } from "../schema"; |
|
|
| type InvoiceData = z.infer<typeof invoiceSchema>; |
| type ReceiptData = z.infer<typeof receiptSchema>; |
|
|
| |
| |
| |
| export function isValidCurrencyCode( |
| currency: string | null | undefined, |
| ): boolean { |
| if (!currency) return false; |
| |
| return /^[A-Z]{3}$/.test(currency); |
| } |
|
|
| |
| |
| |
| export function isValidDateFormat(date: string | null | undefined): boolean { |
| if (!date) return false; |
| const dateRegex = /^\d{4}-\d{2}-\d{2}$/; |
| if (!dateRegex.test(date)) return false; |
|
|
| const parsed = parseISO(date); |
| |
| return !Number.isNaN(parsed.getTime()); |
| } |
|
|
| |
| |
| |
| export function isDateInReasonableRange( |
| date: string | null | undefined, |
| ): boolean { |
| if (!date || !isValidDateFormat(date)) return false; |
|
|
| const parsed = parseISO(date); |
| const now = new Date(); |
| const tenYearsAgo = new Date( |
| now.getFullYear() - 10, |
| now.getMonth(), |
| now.getDate(), |
| ); |
| const twoYearsAhead = new Date( |
| now.getFullYear() + 2, |
| now.getMonth(), |
| now.getDate(), |
| ); |
|
|
| return parsed >= tenYearsAgo && parsed <= twoYearsAhead; |
| } |
|
|
| |
| |
| |
| export function isValidAmount(amount: number | null | undefined): boolean { |
| if (amount === null || amount === undefined) return false; |
| if (Number.isNaN(amount)) return false; |
| if (amount < 0) return false; |
| |
| if (amount > 1_000_000_000_000) return false; |
| return true; |
| } |
|
|
| |
| |
| |
| export function isValidTaxRate(rate: number | null | undefined): boolean { |
| if (rate === null || rate === undefined) return false; |
| if (Number.isNaN(rate)) return false; |
| return rate >= 0 && rate <= 100; |
| } |
|
|
| |
| |
| |
| |
| export function validateAmountConsistency( |
| totalAmount: number | null | undefined, |
| taxAmount: number | null | undefined, |
| lineItems: Array<{ total_price: number | null }> | null | undefined, |
| ): boolean { |
| if (!totalAmount || !isValidAmount(totalAmount)) return true; |
|
|
| |
| let subtotal = 0; |
| if (lineItems && lineItems.length > 0) { |
| subtotal = lineItems.reduce((sum, item) => { |
| return ( |
| sum + |
| (item.total_price && isValidAmount(item.total_price) |
| ? item.total_price |
| : 0) |
| ); |
| }, 0); |
| } |
|
|
| |
| if ( |
| taxAmount !== null && |
| taxAmount !== undefined && |
| isValidAmount(taxAmount) && |
| subtotal > 0 |
| ) { |
| const calculatedTotal = subtotal + taxAmount; |
| const difference = Math.abs(totalAmount - calculatedTotal); |
| |
| return difference <= 0.01; |
| } |
|
|
| |
| if ( |
| taxAmount !== null && |
| taxAmount !== undefined && |
| isValidAmount(taxAmount) |
| ) { |
| return taxAmount <= totalAmount; |
| } |
|
|
| return true; |
| } |
|
|
| |
| |
| |
| export interface QualityScore { |
| score: number; |
| issues: string[]; |
| missingCriticalFields: string[]; |
| invalidFields: string[]; |
| } |
|
|
| export function calculateQualityScore(result: InvoiceData): QualityScore { |
| const issues: string[] = []; |
| const missingCriticalFields: string[] = []; |
| const invalidFields: string[] = []; |
| let score = 100; |
|
|
| |
| if (!result.total_amount || !isValidAmount(result.total_amount)) { |
| missingCriticalFields.push("total_amount"); |
| score -= 30; |
| } else if (!isValidAmount(result.total_amount)) { |
| invalidFields.push("total_amount"); |
| score -= 15; |
| } |
|
|
| if (!result.currency || !isValidCurrencyCode(result.currency)) { |
| missingCriticalFields.push("currency"); |
| score -= 25; |
| } else if (!isValidCurrencyCode(result.currency)) { |
| invalidFields.push("currency"); |
| score -= 10; |
| } |
|
|
| if (!result.vendor_name) { |
| missingCriticalFields.push("vendor_name"); |
| score -= 20; |
| } |
|
|
| if (!result.invoice_date && !result.due_date) { |
| missingCriticalFields.push("invoice_date/due_date"); |
| score -= 15; |
| } else { |
| if (result.invoice_date && !isValidDateFormat(result.invoice_date)) { |
| invalidFields.push("invoice_date"); |
| score -= 5; |
| } else if ( |
| result.invoice_date && |
| !isDateInReasonableRange(result.invoice_date) |
| ) { |
| invalidFields.push("invoice_date"); |
| issues.push("invoice_date out of reasonable range"); |
| score -= 5; |
| } |
|
|
| if (result.due_date && !isValidDateFormat(result.due_date)) { |
| invalidFields.push("due_date"); |
| score -= 5; |
| } else if (result.due_date && !isDateInReasonableRange(result.due_date)) { |
| invalidFields.push("due_date"); |
| issues.push("due_date out of reasonable range"); |
| score -= 5; |
| } |
| } |
|
|
| |
| if (result.tax_amount !== null && result.tax_amount !== undefined) { |
| if (!isValidAmount(result.tax_amount)) { |
| invalidFields.push("tax_amount"); |
| score -= 5; |
| } |
| } |
|
|
| if (result.tax_rate !== null && result.tax_rate !== undefined) { |
| if (!isValidTaxRate(result.tax_rate)) { |
| invalidFields.push("tax_rate"); |
| score -= 5; |
| } |
| } |
|
|
| |
| if ( |
| result.total_amount && |
| !validateAmountConsistency( |
| result.total_amount, |
| result.tax_amount, |
| result.line_items, |
| ) |
| ) { |
| issues.push("Amount consistency check failed (tax + subtotal ≠ total)"); |
| score -= 10; |
| } |
|
|
| |
| if (!result.invoice_number) { |
| issues.push("invoice_number missing (optional but important)"); |
| score -= 5; |
| } |
|
|
| |
| score = Math.max(0, score); |
|
|
| return { |
| score, |
| issues, |
| missingCriticalFields, |
| invalidFields, |
| }; |
| } |
|
|
| |
| |
| |
| |
| |
| export function isDataQualityPoor( |
| result: InvoiceData, |
| threshold = 70, |
| ): boolean { |
| const qualityScore = calculateQualityScore(result); |
|
|
| |
| return ( |
| qualityScore.score < threshold || |
| qualityScore.missingCriticalFields.length > 0 |
| ); |
| } |
|
|
| |
| |
| |
| export function getFieldsNeedingReExtraction(result: InvoiceData): string[] { |
| const qualityScore = calculateQualityScore(result); |
| const fieldsToReExtract: string[] = []; |
|
|
| |
| fieldsToReExtract.push(...qualityScore.missingCriticalFields); |
|
|
| |
| fieldsToReExtract.push(...qualityScore.invalidFields); |
|
|
| return [...new Set(fieldsToReExtract)]; |
| } |
|
|
| |
| |
| |
| |
| export function mergeExtractionResults( |
| primary: InvoiceData, |
| secondary: Partial<InvoiceData>, |
| ): InvoiceData { |
| return { |
| document_type: |
| primary.document_type || secondary.document_type || "invoice", |
| invoice_number: primary.invoice_number || secondary.invoice_number || null, |
| invoice_date: primary.invoice_date || secondary.invoice_date || null, |
| due_date: primary.due_date || secondary.due_date || null, |
| currency: primary.currency || secondary.currency || null, |
| total_amount: |
| primary.total_amount !== null && primary.total_amount !== undefined |
| ? primary.total_amount |
| : secondary.total_amount !== null && |
| secondary.total_amount !== undefined |
| ? secondary.total_amount |
| : null, |
| tax_amount: primary.tax_amount || secondary.tax_amount || null, |
| tax_rate: primary.tax_rate || secondary.tax_rate || null, |
| tax_type: primary.tax_type || secondary.tax_type || null, |
| vendor_name: primary.vendor_name || secondary.vendor_name || null, |
| vendor_address: primary.vendor_address || secondary.vendor_address || null, |
| customer_name: primary.customer_name || secondary.customer_name || null, |
| customer_address: |
| primary.customer_address || secondary.customer_address || null, |
| website: primary.website || secondary.website || null, |
| email: primary.email || secondary.email || null, |
| line_items: |
| primary.line_items && primary.line_items.length > 0 |
| ? primary.line_items |
| : secondary.line_items && secondary.line_items.length > 0 |
| ? secondary.line_items |
| : [], |
| payment_instructions: |
| primary.payment_instructions || secondary.payment_instructions || null, |
| notes: primary.notes || secondary.notes || null, |
| language: primary.language || secondary.language || null, |
| }; |
| } |
|
|
| |
| |
| |
| export interface ReceiptQualityScore { |
| score: number; |
| issues: string[]; |
| missingCriticalFields: string[]; |
| invalidFields: string[]; |
| } |
|
|
| export function calculateReceiptQualityScore( |
| result: ReceiptData, |
| ): ReceiptQualityScore { |
| const issues: string[] = []; |
| const missingCriticalFields: string[] = []; |
| const invalidFields: string[] = []; |
| let score = 100; |
|
|
| |
| if (!result.total_amount || !isValidAmount(result.total_amount)) { |
| missingCriticalFields.push("total_amount"); |
| score -= 30; |
| } else if (!isValidAmount(result.total_amount)) { |
| invalidFields.push("total_amount"); |
| score -= 15; |
| } |
|
|
| if (!result.currency || !isValidCurrencyCode(result.currency)) { |
| missingCriticalFields.push("currency"); |
| score -= 25; |
| } else if (!isValidCurrencyCode(result.currency)) { |
| invalidFields.push("currency"); |
| score -= 10; |
| } |
|
|
| if (!result.store_name) { |
| missingCriticalFields.push("store_name"); |
| score -= 20; |
| } |
|
|
| if (!result.date) { |
| missingCriticalFields.push("date"); |
| score -= 15; |
| } else { |
| if (!isValidDateFormat(result.date)) { |
| invalidFields.push("date"); |
| score -= 5; |
| } else if (!isDateInReasonableRange(result.date)) { |
| invalidFields.push("date"); |
| issues.push("date out of reasonable range"); |
| score -= 5; |
| } |
| } |
|
|
| |
| if (result.tax_amount !== null && result.tax_amount !== undefined) { |
| if (!isValidAmount(result.tax_amount)) { |
| invalidFields.push("tax_amount"); |
| score -= 5; |
| } |
| } else { |
| |
| missingCriticalFields.push("tax_amount"); |
| score -= 10; |
| } |
|
|
| if (result.tax_rate !== null && result.tax_rate !== undefined) { |
| if (!isValidTaxRate(result.tax_rate)) { |
| invalidFields.push("tax_rate"); |
| score -= 5; |
| } |
| } |
|
|
| |
| if ( |
| result.total_amount && |
| result.subtotal_amount && |
| result.tax_amount && |
| !validateAmountConsistency( |
| result.total_amount, |
| result.tax_amount, |
| result.items?.map((item) => ({ total_price: item.total_price })) || [], |
| ) |
| ) { |
| issues.push("Amount consistency check failed (tax + subtotal ≠ total)"); |
| score -= 10; |
| } |
|
|
| |
| score = Math.max(0, score); |
|
|
| return { |
| score, |
| issues, |
| missingCriticalFields, |
| invalidFields, |
| }; |
| } |
|
|
| |
| |
| |
| |
| |
| export function isReceiptDataQualityPoor( |
| result: ReceiptData, |
| threshold = 70, |
| ): boolean { |
| const qualityScore = calculateReceiptQualityScore(result); |
| return ( |
| qualityScore.score < threshold || |
| qualityScore.missingCriticalFields.length > 0 |
| ); |
| } |
|
|
| |
| |
| |
| export function getReceiptFieldsNeedingReExtraction( |
| result: ReceiptData, |
| ): string[] { |
| const qualityScore = calculateReceiptQualityScore(result); |
| const fieldsToReExtract: string[] = []; |
|
|
| fieldsToReExtract.push(...qualityScore.missingCriticalFields); |
| fieldsToReExtract.push(...qualityScore.invalidFields); |
|
|
| return [...new Set(fieldsToReExtract)]; |
| } |
|
|
| |
| |
| |
| export function mergeReceiptExtractionResults( |
| primary: ReceiptData, |
| secondary: Partial<ReceiptData>, |
| ): ReceiptData { |
| return { |
| document_type: |
| primary.document_type || secondary.document_type || "receipt", |
| date: primary.date || secondary.date || null, |
| currency: primary.currency || secondary.currency || null, |
| total_amount: |
| primary.total_amount !== null && primary.total_amount !== undefined |
| ? primary.total_amount |
| : secondary.total_amount !== null && |
| secondary.total_amount !== undefined |
| ? secondary.total_amount |
| : null, |
| subtotal_amount: |
| primary.subtotal_amount || secondary.subtotal_amount || null, |
| tax_amount: |
| primary.tax_amount !== null && primary.tax_amount !== undefined |
| ? primary.tax_amount |
| : secondary.tax_amount !== null && secondary.tax_amount !== undefined |
| ? secondary.tax_amount |
| : null, |
| tax_rate: primary.tax_rate || secondary.tax_rate, |
| tax_type: primary.tax_type || secondary.tax_type || null, |
| store_name: primary.store_name || secondary.store_name || null, |
| website: primary.website || secondary.website || null, |
| payment_method: primary.payment_method || secondary.payment_method || null, |
| items: |
| primary.items && primary.items.length > 0 |
| ? primary.items |
| : secondary.items && secondary.items.length > 0 |
| ? secondary.items |
| : [], |
| cashier_name: primary.cashier_name || secondary.cashier_name || null, |
| email: primary.email || secondary.email || null, |
| register_number: |
| primary.register_number || secondary.register_number || null, |
| language: primary.language || secondary.language || null, |
| }; |
| } |
|
|