Spaces:
Sleeping
Sleeping
File size: 1,110 Bytes
faa3050 f74e17e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 |
# src/utils.py
import hashlib
from typing import Dict, Any
from decimal import Decimal
from datetime import date
def generate_semantic_hash(invoice_data: Dict[str, Any]) -> str:
"""
Generates a unique fingerprint using a Composite Key strategy.
Composite Key = Vendor + Date + Total + Receipt Number
"""
# Define the specific fields that determine uniqueness
keys_to_hash = ['vendor', 'date', 'total_amount', 'receipt_number']
normalized_values = []
for key in keys_to_hash:
value = invoice_data[key]
# Normalize without modifying the original object
if value is None:
norm_val = ""
elif isinstance(value, (date, Decimal, int, float)):
norm_val = str(value)
else:
# String normalization
norm_val = str(value).lower().strip()
normalized_values.append(norm_val)
# Create the fingerprint string
composite_string = "|".join(normalized_values)
# Return the SHA256 hash of the string
return hashlib.sha256(composite_string.encode()).hexdigest()
|