| import { createGoogleGenerativeAI } from "@ai-sdk/google"; |
| import { createMistral } from "@ai-sdk/mistral"; |
| import { createLoggerWithContext } from "@midday/logger"; |
| import { generateObject } from "ai"; |
| import type { z } from "zod/v4"; |
| import type { |
| ExtractionConfig, |
| ModelConfig, |
| } from "../config/extraction-config"; |
| import type { PromptComponents } from "../prompts/factory"; |
| import { createFieldSpecificPrompt } from "../prompts/field-specific"; |
| import type { DocumentFormat } from "../utils/format-detection"; |
| import { extractTextFromPdf } from "../utils/pdf-text-extract"; |
| import { retryCall } from "../utils/retry"; |
|
|
| const google = createGoogleGenerativeAI({ |
| apiKey: process.env.GOOGLE_GENERATIVE_AI_API_KEY!, |
| }); |
|
|
| const mistral = createMistral({ |
| apiKey: process.env.MISTRAL_API_KEY!, |
| }); |
|
|
| |
| |
| |
| function isRateLimitError(error: unknown): boolean { |
| if (!(error instanceof Error)) return false; |
|
|
| const message = error.message.toLowerCase(); |
| return ( |
| message.includes("rate limit") || |
| message.includes("rate_limit") || |
| message.includes("too many requests") || |
| message.includes("quota") || |
| message.includes("429") || |
| message.includes("resource_exhausted") |
| ); |
| } |
|
|
| export interface ExtractionResult<T> { |
| data: T; |
| qualityScore: { |
| score: number; |
| issues: string[]; |
| missingCriticalFields: string[]; |
| invalidFields: string[]; |
| }; |
| } |
|
|
| export interface ExtractionOptions { |
| companyName?: string | null; |
| logger?: ReturnType<typeof createLoggerWithContext>; |
| } |
|
|
| |
| |
| |
| |
| export abstract class BaseExtractionEngine<T extends z.ZodSchema> { |
| protected config: ExtractionConfig<T>; |
| protected logger: ReturnType<typeof createLoggerWithContext>; |
|
|
| constructor( |
| config: ExtractionConfig<T>, |
| logger?: ReturnType<typeof createLoggerWithContext>, |
| ) { |
| this.config = config; |
| this.logger = |
| logger || |
| createLoggerWithContext(`BaseExtractionEngine:${this.getDocumentType()}`); |
| } |
|
|
| protected getDocumentType(): string { |
| return "unknown"; |
| } |
|
|
| |
| |
| |
| protected async extractWithProvider( |
| documentUrl: string, |
| prompt: string, |
| modelConfig: ModelConfig, |
| ): Promise<z.infer<T>> { |
| const contentField = |
| this.config.contentType === "file" |
| ? { |
| type: "file" as const, |
| data: documentUrl, |
| mediaType: this.config.mediaType, |
| } |
| : { |
| type: "image" as const, |
| image: documentUrl, |
| }; |
|
|
| const model = |
| modelConfig.provider === "mistral" |
| ? mistral(modelConfig.model) |
| : google(modelConfig.model); |
|
|
| |
| const providerOptions = |
| modelConfig.provider === "mistral" |
| ? { |
| mistral: { |
| documentPageLimit: 10, |
| }, |
| } |
| : undefined; |
|
|
| const result = await retryCall( |
| () => |
| generateObject({ |
| model, |
| schema: this.config.schema, |
| temperature: 0.1, |
| abortSignal: AbortSignal.timeout(this.config.timeout), |
| messages: [ |
| { |
| role: "system", |
| content: prompt, |
| }, |
| { |
| role: "user", |
| content: [contentField], |
| }, |
| ], |
| ...(providerOptions && { providerOptions }), |
| }), |
| this.config.retries, |
| 2000, |
| ); |
|
|
| return result.object as z.infer<T>; |
| } |
|
|
| |
| |
| |
| |
| protected async extractWithCascadingFallback( |
| documentUrl: string, |
| prompt: string, |
| ): Promise<{ result: z.infer<T>; usedModel: ModelConfig }> { |
| const models = [ |
| { config: this.config.models.primary, name: "primary" }, |
| { config: this.config.models.secondary, name: "secondary" }, |
| { config: this.config.models.tertiary, name: "tertiary" }, |
| ]; |
|
|
| let lastError: Error | null = null; |
|
|
| for (const { config: modelConfig, name } of models) { |
| try { |
| this.logger.info(`Attempting extraction with ${name} model`, { |
| provider: modelConfig.provider, |
| model: modelConfig.model, |
| }); |
|
|
| const result = await this.extractWithProvider( |
| documentUrl, |
| prompt, |
| modelConfig, |
| ); |
|
|
| this.logger.info(`Extraction succeeded with ${name} model`, { |
| provider: modelConfig.provider, |
| model: modelConfig.model, |
| }); |
|
|
| return { result, usedModel: modelConfig }; |
| } catch (error) { |
| lastError = error instanceof Error ? error : new Error(String(error)); |
|
|
| const isRateLimit = isRateLimitError(error); |
| this.logger.warn(`${name} model extraction failed`, { |
| provider: modelConfig.provider, |
| model: modelConfig.model, |
| isRateLimit, |
| error: lastError.message, |
| }); |
| |
| } |
| } |
|
|
| |
| throw lastError || new Error("All extraction models failed"); |
| } |
|
|
| |
| |
| |
| protected async extractWithPrimaryModel( |
| documentUrl: string, |
| prompt: string, |
| ): Promise<z.infer<T>> { |
| const { result } = await this.extractWithCascadingFallback( |
| documentUrl, |
| prompt, |
| ); |
| return result; |
| } |
|
|
| |
| |
| |
| protected async extractWithFallbackModel( |
| documentUrl: string, |
| prompt: string, |
| ): Promise<z.infer<T>> { |
| |
| try { |
| return await this.extractWithProvider( |
| documentUrl, |
| prompt, |
| this.config.models.secondary, |
| ); |
| } catch (error) { |
| this.logger.warn("Secondary model failed, trying tertiary", { |
| error: error instanceof Error ? error.message : "Unknown error", |
| }); |
| return await this.extractWithProvider( |
| documentUrl, |
| prompt, |
| this.config.models.tertiary, |
| ); |
| } |
| } |
|
|
| |
| |
| |
| |
| protected async extractWithTextFallback( |
| documentUrl: string, |
| prompt: string, |
| modelConfig: ModelConfig, |
| ): Promise<z.infer<T>> { |
| |
| const extractedText = await extractTextFromPdf(documentUrl); |
|
|
| if (!extractedText) { |
| throw new Error( |
| "Failed to extract text from PDF - PDF may be image-based or corrupted", |
| ); |
| } |
|
|
| |
| const modifiedPrompt = `${prompt}\n\nNOTE: The document content below was extracted as text from a PDF. Some formatting, layout, or visual elements may be missing. Please extract the requested information from the text content.`; |
|
|
| const model = |
| modelConfig.provider === "mistral" |
| ? mistral(modelConfig.model) |
| : google(modelConfig.model); |
|
|
| |
| const result = await retryCall( |
| () => |
| generateObject({ |
| model, |
| schema: this.config.schema, |
| temperature: 0.1, |
| abortSignal: AbortSignal.timeout(this.config.timeout), |
| messages: [ |
| { |
| role: "system", |
| content: modifiedPrompt, |
| }, |
| { |
| role: "user", |
| content: [ |
| { |
| type: "text" as const, |
| text: extractedText, |
| }, |
| ], |
| }, |
| ], |
| }), |
| this.config.retries, |
| 2000, |
| ); |
|
|
| return result.object as z.infer<T>; |
| } |
|
|
| |
| |
| |
| protected analyzeFailurePattern( |
| result: z.infer<T>, |
| qualityScore: { |
| score: number; |
| issues: string[]; |
| missingCriticalFields: string[]; |
| invalidFields: string[]; |
| }, |
| ): { |
| strategy: |
| | "field_specific" |
| | "mathematical" |
| | "format_aware" |
| | "comprehensive"; |
| criticalFieldsMissing: boolean; |
| consistencyIssues: boolean; |
| formatIssues: boolean; |
| } { |
| const criticalFieldsMissing = qualityScore.missingCriticalFields.length > 0; |
| const hasNumericFields = qualityScore.missingCriticalFields.some( |
| (f) => f.includes("amount") || f.includes("rate"), |
| ); |
| const detectedFormat = this.detectFormat(result); |
|
|
| let strategy: |
| | "field_specific" |
| | "mathematical" |
| | "format_aware" |
| | "comprehensive" = "field_specific"; |
|
|
| if (criticalFieldsMissing && hasNumericFields && detectedFormat) { |
| strategy = "comprehensive"; |
| } else if (hasNumericFields) { |
| strategy = "mathematical"; |
| } else if (detectedFormat) { |
| strategy = "format_aware"; |
| } |
|
|
| return { |
| strategy, |
| criticalFieldsMissing, |
| consistencyIssues: qualityScore.invalidFields.length > 0, |
| formatIssues: detectedFormat !== undefined, |
| }; |
| } |
|
|
| |
| |
| |
| protected async reExtractFields( |
| documentUrl: string, |
| fields: string[], |
| companyName?: string | null, |
| format?: DocumentFormat | undefined, |
| ): Promise<Partial<z.infer<T>>> { |
| if (fields.length === 0) { |
| return {}; |
| } |
|
|
| |
| const sortedFields = [...fields].sort((a, b) => { |
| const priorityA = this.config.fieldPriority[a] || 0; |
| const priorityB = this.config.fieldPriority[b] || 0; |
| return priorityB - priorityA; |
| }); |
|
|
| |
| const criticalFields = sortedFields.filter( |
| (f) => (this.config.fieldPriority[f] || 0) >= 8, |
| ); |
| const otherFields = sortedFields.filter( |
| (f) => (this.config.fieldPriority[f] || 0) < 8, |
| ); |
|
|
| const reExtractedFields: Partial<z.infer<T>> = {}; |
|
|
| |
| if (criticalFields.length > 0) { |
| this.logger.info("Re-extracting critical fields in parallel", { |
| fields: criticalFields, |
| count: criticalFields.length, |
| }); |
|
|
| const criticalResults = await Promise.allSettled( |
| criticalFields.map(async (field) => { |
| try { |
| |
| let fieldPrompt = createFieldSpecificPrompt( |
| field, |
| this.getDocumentType() as "invoice" | "receipt", |
| companyName, |
| ); |
|
|
| |
| if (format) { |
| const formatHints = this.getFormatHintsForField(field, format); |
| if (formatHints) { |
| fieldPrompt = `${fieldPrompt}\n\n${formatHints}`; |
| } |
| } |
| |
| const modelConfig = this.config.models.secondary; |
| const model = |
| modelConfig.provider === "mistral" |
| ? mistral(modelConfig.model) |
| : google(modelConfig.model); |
|
|
| const result = await retryCall( |
| () => |
| generateObject({ |
| model, |
| schema: this.config.schema, |
| temperature: 0.1, |
| abortSignal: AbortSignal.timeout(90000), |
| messages: [ |
| { |
| role: "system", |
| content: fieldPrompt, |
| }, |
| { |
| role: "user", |
| content: [ |
| this.config.contentType === "file" |
| ? { |
| type: "file" as const, |
| data: documentUrl, |
| mediaType: this.config.mediaType, |
| } |
| : { |
| type: "image" as const, |
| image: documentUrl, |
| }, |
| ], |
| }, |
| ], |
| }), |
| 1, |
| 1000, |
| ); |
|
|
| const fieldValue = (result.object as any)[field]; |
| if (fieldValue !== null && fieldValue !== undefined) { |
| return { field, value: fieldValue }; |
| } |
| return null; |
| } catch (error) { |
| this.logger.warn(`Failed to re-extract field ${field}`, { |
| field, |
| error: error instanceof Error ? error.message : "Unknown error", |
| }); |
| return null; |
| } |
| }), |
| ); |
|
|
| |
| for (const result of criticalResults) { |
| if (result.status === "fulfilled" && result.value) { |
| (reExtractedFields as any)[result.value.field] = result.value.value; |
| } |
| } |
| } |
|
|
| |
| if (otherFields.length > 0) { |
| this.logger.info("Re-extracting other fields in parallel", { |
| fields: otherFields, |
| count: otherFields.length, |
| }); |
|
|
| const otherResults = await Promise.allSettled( |
| otherFields.map(async (field) => { |
| try { |
| |
| let fieldPrompt = createFieldSpecificPrompt( |
| field, |
| this.getDocumentType() as "invoice" | "receipt", |
| companyName, |
| ); |
|
|
| |
| if (format) { |
| const formatHints = this.getFormatHintsForField(field, format); |
| if (formatHints) { |
| fieldPrompt = `${fieldPrompt}\n\n${formatHints}`; |
| } |
| } |
|
|
| |
| const modelConfig = this.config.models.secondary; |
| const model = |
| modelConfig.provider === "mistral" |
| ? mistral(modelConfig.model) |
| : google(modelConfig.model); |
|
|
| const result = await retryCall( |
| () => |
| generateObject({ |
| model, |
| schema: this.config.schema, |
| temperature: 0.1, |
| abortSignal: AbortSignal.timeout(30000), |
| messages: [ |
| { |
| role: "system", |
| content: fieldPrompt, |
| }, |
| { |
| role: "user", |
| content: [ |
| this.config.contentType === "file" |
| ? { |
| type: "file" as const, |
| data: documentUrl, |
| mediaType: this.config.mediaType, |
| } |
| : { |
| type: "image" as const, |
| image: documentUrl, |
| }, |
| ], |
| }, |
| ], |
| }), |
| 1, |
| 1000, |
| ); |
|
|
| const fieldValue = (result.object as any)[field]; |
| if (fieldValue !== null && fieldValue !== undefined) { |
| return { field, value: fieldValue }; |
| } |
| return null; |
| } catch (error) { |
| this.logger.warn(`Failed to re-extract field ${field}`, { |
| field, |
| error: error instanceof Error ? error.message : "Unknown error", |
| }); |
| return null; |
| } |
| }), |
| ); |
|
|
| |
| for (const result of otherResults) { |
| if (result.status === "fulfilled" && result.value) { |
| (reExtractedFields as any)[result.value.field] = result.value.value; |
| } |
| } |
| } |
|
|
| return reExtractedFields; |
| } |
|
|
| |
| |
| |
| async extract( |
| documentUrl: string, |
| options: ExtractionOptions = {}, |
| ): Promise<ExtractionResult<z.infer<T>>> { |
| const { companyName } = options; |
| const logger = options.logger || this.logger; |
|
|
| if (!documentUrl) { |
| throw new Error("Document URL is required"); |
| } |
|
|
| |
| const promptFactory = this.config.promptFactory; |
|
|
| |
| let result: z.infer<T>; |
|
|
| try { |
| |
| const promptComponents = promptFactory(companyName); |
| const prompt = this.composePrompt(promptComponents, false); |
|
|
| logger.info("Pass 1: Extracting with cascading fallback", { |
| pass: 1, |
| primaryModel: `${this.config.models.primary.provider}:${this.config.models.primary.model}`, |
| }); |
|
|
| result = await this.extractWithPrimaryModel(documentUrl, prompt); |
| } catch (error) { |
| |
| const isTimeoutError = |
| (error instanceof DOMException && error.code === 23) || |
| (error instanceof Error && |
| (error.name === "TimeoutError" || |
| error.message.includes("timeout") || |
| error.message.includes("timed out"))); |
|
|
| const isPdfFile = |
| this.config.contentType === "file" && |
| this.config.mediaType === "application/pdf"; |
|
|
| |
| if (isTimeoutError && isPdfFile) { |
| logger.warn( |
| "PDF extraction timed out, attempting text extraction fallback", |
| { |
| error: error instanceof Error ? error.message : "Unknown error", |
| }, |
| ); |
|
|
| try { |
| const promptComponents = promptFactory(companyName); |
| const prompt = this.composePrompt(promptComponents, false); |
|
|
| result = await this.extractWithTextFallback( |
| documentUrl, |
| prompt, |
| this.config.models.secondary, |
| ); |
|
|
| logger.info("Text extraction fallback succeeded", { |
| pass: 1, |
| fallback: "text-extraction", |
| }); |
|
|
| return { |
| data: result, |
| qualityScore: this.calculateQualityScore(result), |
| }; |
| } catch (textFallbackError) { |
| logger.error("Text extraction fallback also failed", { |
| error: |
| textFallbackError instanceof Error |
| ? textFallbackError.message |
| : "Unknown error", |
| }); |
| |
| } |
| } |
|
|
| logger.warn("Pass 1 failed, trying fallback model immediately", { |
| error: error instanceof Error ? error.message : "Unknown error", |
| }); |
| |
| const fallbackPromptComponents = promptFactory(companyName, undefined); |
| const fallbackPrompt = this.composePrompt(fallbackPromptComponents, true); |
| result = await this.extractWithFallbackModel(documentUrl, fallbackPrompt); |
| return { |
| data: result, |
| qualityScore: this.calculateQualityScore(result), |
| }; |
| } |
|
|
| |
| const qualityScore = this.calculateQualityScore(result); |
| logger.info("Pass 1 quality score", { |
| pass: 1, |
| score: qualityScore.score, |
| issues: qualityScore.issues, |
| missingCriticalFields: qualityScore.missingCriticalFields, |
| }); |
|
|
| |
| if (!this.isDataQualityPoor(result)) { |
| return { data: result, qualityScore }; |
| } |
|
|
| |
| logger.info( |
| "Pass 1 quality poor, running Pass 2 with fallback model and chain-of-thought", |
| { |
| pass: 2, |
| model: `${this.config.models.secondary.provider}:${this.config.models.secondary.model}`, |
| }, |
| ); |
| try { |
| |
| const detectedFormat = this.detectFormat(result); |
|
|
| const chainOfThoughtPromptComponents = promptFactory( |
| companyName, |
| detectedFormat, |
| ); |
| const chainOfThoughtPrompt = this.composePrompt( |
| chainOfThoughtPromptComponents, |
| true, |
| ); |
|
|
| const fallbackResult = await this.extractWithFallbackModel( |
| documentUrl, |
| chainOfThoughtPrompt, |
| ); |
|
|
| |
| const primaryQuality = this.calculateQualityScore(result); |
| const fallbackQuality = this.calculateQualityScore(fallbackResult); |
| const primaryConfidence = this.calculateConfidence( |
| result, |
| primaryQuality, |
| ); |
| const fallbackConfidence = this.calculateConfidence( |
| fallbackResult, |
| fallbackQuality, |
| ); |
|
|
| logger.info("Confidence scores for Pass 2 merge", { |
| primaryConfidence: primaryConfidence.toFixed(2), |
| fallbackConfidence: fallbackConfidence.toFixed(2), |
| }); |
|
|
| |
| result = this.mergeResultsWithConfidence( |
| result, |
| fallbackResult, |
| primaryConfidence, |
| fallbackConfidence, |
| ); |
|
|
| |
| const mergedQualityScore = this.calculateQualityScore(result); |
| logger.info("Pass 2 merged quality score", { |
| pass: 2, |
| score: mergedQualityScore.score, |
| issues: mergedQualityScore.issues, |
| }); |
|
|
| |
| if (!this.isDataQualityPoor(result)) { |
| return { data: result, qualityScore: mergedQualityScore }; |
| } |
| } catch (fallbackError) { |
| logger.warn("Pass 2 fallback extraction failed", { |
| error: |
| fallbackError instanceof Error |
| ? fallbackError.message |
| : "Unknown error", |
| }); |
| |
| } |
|
|
| |
| const fieldsToReExtract = this.getFieldsNeedingReExtraction(result); |
| if (fieldsToReExtract.length > 0) { |
| logger.info("Pass 3: Re-extracting specific fields", { |
| pass: 3, |
| fields: fieldsToReExtract, |
| count: fieldsToReExtract.length, |
| }); |
| try { |
| const reExtractedFields = await this.reExtractFields( |
| documentUrl, |
| fieldsToReExtract, |
| companyName, |
| ); |
|
|
| |
| result = this.mergeResults(result, reExtractedFields); |
|
|
| const finalQualityScore = this.calculateQualityScore(result); |
| logger.info("Pass 3 final quality score", { |
| pass: 3, |
| score: finalQualityScore.score, |
| issues: finalQualityScore.issues, |
| }); |
| } catch (reExtractError) { |
| logger.warn("Pass 3 field re-extraction failed", { |
| error: |
| reExtractError instanceof Error |
| ? reExtractError.message |
| : "Unknown error", |
| }); |
| |
| } |
| } |
|
|
| |
| const consistencyResult = this.validateConsistency(result); |
| if ( |
| consistencyResult.issues.length > 0 || |
| consistencyResult.suggestedFixes.length > 0 |
| ) { |
| logger.info("Pass 4: Cross-field consistency validation", { |
| pass: 4, |
| issues: consistencyResult.issues.length, |
| suggestedFixes: consistencyResult.suggestedFixes.length, |
| }); |
|
|
| |
| if (consistencyResult.suggestedFixes.length > 0) { |
| result = this.applyConsistencyFixes( |
| result, |
| consistencyResult.suggestedFixes, |
| ); |
| logger.info("Applied consistency fixes", { |
| fixesApplied: consistencyResult.suggestedFixes.map((f) => f.field), |
| }); |
| } |
|
|
| |
| if (consistencyResult.issues.length > 0) { |
| logger.warn("Cross-field consistency issues found", { |
| issues: consistencyResult.issues.map((i) => ({ |
| field: i.field, |
| issue: i.issue, |
| severity: i.severity, |
| })), |
| }); |
| } |
| } |
|
|
| return { |
| data: result, |
| qualityScore: this.calculateQualityScore(result), |
| }; |
| } |
|
|
| |
| |
| |
| protected getFormatHintsForField( |
| field: string, |
| format: DocumentFormat, |
| ): string | null { |
| const hints: string[] = []; |
|
|
| if (field.includes("amount") || field.includes("rate")) { |
| if (format.numberFormat === "european") { |
| hints.push( |
| "NUMBER FORMAT: Use European format (1.234,56) - comma as decimal separator.", |
| ); |
| } |
| } |
|
|
| if (field.includes("date")) { |
| if (format.dateFormat === "european") { |
| hints.push( |
| "DATE FORMAT: Convert from DD/MM/YYYY to YYYY-MM-DD format.", |
| ); |
| } |
| } |
|
|
| if (field.includes("tax")) { |
| if (format.taxTerm === "vat") { |
| hints.push("Look for VAT, MwSt, TVA, or IVA labels."); |
| } else if (format.taxTerm === "gst") { |
| hints.push("Look for GST labels."); |
| } |
| } |
|
|
| return hints.length > 0 ? hints.join("\n") : null; |
| } |
|
|
| |
| |
| |
| protected composePrompt( |
| components: PromptComponents, |
| useChainOfThought: boolean, |
| ): string { |
| const parts: string[] = []; |
|
|
| parts.push(components.base); |
| parts.push( |
| "Extract structured data with maximum accuracy. Follow these instructions precisely:", |
| ); |
| parts.push(""); |
| parts.push(components.examples); |
|
|
| if (useChainOfThought && components.chainOfThought) { |
| parts.push(""); |
| parts.push(components.chainOfThought); |
| } |
|
|
| if (components.context) { |
| parts.push(""); |
| parts.push(components.context); |
| } |
|
|
| parts.push(""); |
| parts.push(components.requirements); |
| parts.push(""); |
| parts.push(components.fieldRules); |
| parts.push(""); |
| parts.push(components.accuracyGuidelines); |
| parts.push(""); |
| parts.push(components.commonErrors); |
| parts.push(""); |
| parts.push(components.validation); |
|
|
| return parts.join("\n"); |
| } |
|
|
| |
| |
| |
| protected isDataQualityPoor(result: z.infer<T>): boolean { |
| const qualityScore = this.calculateQualityScore(result); |
| return ( |
| qualityScore.score < this.config.qualityThreshold || |
| qualityScore.missingCriticalFields.length > 0 |
| ); |
| } |
|
|
| |
| |
| |
| protected abstract detectFormat( |
| result: z.infer<T>, |
| ): DocumentFormat | undefined; |
|
|
| |
| |
| |
| protected abstract validateConsistency(result: z.infer<T>): { |
| isValid: boolean; |
| issues: Array<{ |
| field: string; |
| issue: string; |
| severity: "error" | "warning"; |
| }>; |
| suggestedFixes: Array<{ |
| field: string; |
| value: any; |
| reason: string; |
| }>; |
| }; |
|
|
| |
| |
| |
| protected abstract applyConsistencyFixes( |
| result: z.infer<T>, |
| fixes: Array<{ field: string; value: any; reason: string }>, |
| ): z.infer<T>; |
|
|
| |
| |
| |
| protected abstract calculateConfidence( |
| result: z.infer<T>, |
| qualityScore: { |
| score: number; |
| missingCriticalFields: string[]; |
| }, |
| ): number; |
|
|
| |
| |
| |
| protected mergeResultsWithConfidence( |
| primary: z.infer<T>, |
| secondary: Partial<z.infer<T>>, |
| _primaryConfidence: number, |
| _secondaryConfidence: number, |
| ): z.infer<T> { |
| |
| |
| return this.mergeResults(primary, secondary); |
| } |
|
|
| |
| |
| |
| protected abstract calculateQualityScore(result: z.infer<T>): { |
| score: number; |
| issues: string[]; |
| missingCriticalFields: string[]; |
| invalidFields: string[]; |
| }; |
|
|
| protected abstract getFieldsNeedingReExtraction(result: z.infer<T>): string[]; |
|
|
| protected abstract mergeResults( |
| primary: z.infer<T>, |
| secondary: Partial<z.infer<T>>, |
| ): z.infer<T>; |
| } |
|
|