File size: 4,116 Bytes
c09f67c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
import { createLoggerWithContext } from "@midday/logger";
import type { z } from "zod/v4";
import { receiptConfig } from "../../config/extraction-config";
import type { GetDocumentRequest } from "../../types";
import { extractWebsite } from "../../utils";
import {
  applyReceiptFixes,
  validateReceiptConsistency,
} from "../../utils/cross-field-validation";
import type { DocumentFormat } from "../../utils/format-detection";
import { detectReceiptFormat } from "../../utils/format-detection";
import {
  calculateReceiptExtractionConfidence,
  mergeReceiptResults,
} from "../../utils/merging";
import {
  calculateReceiptQualityScore,
  getReceiptFieldsNeedingReExtraction,
} from "../../utils/validation";
import { BaseExtractionEngine } from "../base-extraction-engine";

type ReceiptData = z.infer<typeof receiptConfig.schema>;

export class ReceiptProcessor extends BaseExtractionEngine<
  typeof receiptConfig.schema
> {
  constructor() {
    super(receiptConfig, createLoggerWithContext("ReceiptProcessor"));
  }

  protected getDocumentType(): string {
    return "receipt";
  }

  protected calculateQualityScore(result: ReceiptData): {
    score: number;
    issues: string[];
    missingCriticalFields: string[];
    invalidFields: string[];
  } {
    return calculateReceiptQualityScore(result);
  }

  protected getFieldsNeedingReExtraction(result: ReceiptData): string[] {
    return getReceiptFieldsNeedingReExtraction(result);
  }

  protected mergeResults(
    primary: ReceiptData,
    secondary: Partial<ReceiptData>,
  ): ReceiptData {
    return mergeReceiptResults(primary, secondary);
  }

  protected validateConsistency(result: ReceiptData): {
    isValid: boolean;
    issues: Array<{
      field: string;
      issue: string;
      severity: "error" | "warning";
    }>;
    suggestedFixes: Array<{
      field: string;
      value: any;
      reason: string;
    }>;
  } {
    return validateReceiptConsistency(result);
  }

  protected applyConsistencyFixes(
    result: ReceiptData,
    fixes: Array<{ field: string; value: any; reason: string }>,
  ): ReceiptData {
    return applyReceiptFixes(result, fixes);
  }

  protected detectFormat(result: ReceiptData): DocumentFormat | undefined {
    return detectReceiptFormat(result);
  }

  protected calculateConfidence(
    result: ReceiptData,
    qualityScore: {
      score: number;
      missingCriticalFields: string[];
    },
  ): number {
    return calculateReceiptExtractionConfidence(result, qualityScore);
  }

  protected mergeResultsWithConfidence(
    primary: ReceiptData,
    secondary: Partial<ReceiptData>,
    primaryConfidence: number,
    secondaryConfidence: number,
  ): ReceiptData {
    return mergeReceiptResults(
      primary,
      secondary,
      primaryConfidence,
      secondaryConfidence,
    );
  }

  async #getWebsite({
    website,
    email,
    storeName,
  }: {
    website: string | null;
    email: string | null;
    storeName: string | null;
  }) {
    return extractWebsite(website, email, storeName, this.logger);
  }

  public async getReceipt(params: GetDocumentRequest) {
    if (!params.documentUrl) {
      throw new Error("Document URL is required");
    }

    const result = await this.extract(params.documentUrl, {
      companyName: params.companyName,
      logger: this.logger,
    });

    const website = await this.#getWebsite({
      website: result.data.website,
      email: result.data.email,
      storeName: result.data.store_name,
    });

    return {
      ...result.data,
      website,
      type: "expense",
      document_type: result.data.document_type,
      date: result.data.date,
      amount: result.data.total_amount,
      currency: result.data.currency,
      name: result.data.store_name,
      tax_amount: result.data.tax_amount,
      tax_rate: result.data.tax_rate,
      tax_type: result.data.tax_type,
      language: result.data.language,
      metadata: {
        register_number: result.data.register_number ?? null,
        cashier_name: result.data.cashier_name ?? null,
        email: result.data.email ?? null,
      },
    };
  }
}