Spaces:
Sleeping
Sleeping
feat: Update Dockerfile and requirements for PDF processing, add new dependencies, and refactor API structure
faa3050
| # src/utils.py | |
| import hashlib | |
| from typing import Dict, Any | |
| from decimal import Decimal | |
| from datetime import date | |
| def generate_semantic_hash(invoice_data: Dict[str, Any]) -> str: | |
| """ | |
| Generates a unique fingerprint using a Composite Key strategy. | |
| Composite Key = Vendor + Date + Total + Receipt Number | |
| """ | |
| # Define the specific fields that determine uniqueness | |
| keys_to_hash = ['vendor', 'date', 'total_amount', 'receipt_number'] | |
| normalized_values = [] | |
| for key in keys_to_hash: | |
| value = invoice_data[key] | |
| # Normalize without modifying the original object | |
| if value is None: | |
| norm_val = "" | |
| elif isinstance(value, (date, Decimal, int, float)): | |
| norm_val = str(value) | |
| else: | |
| # String normalization | |
| norm_val = str(value).lower().strip() | |
| normalized_values.append(norm_val) | |
| # Create the fingerprint string | |
| composite_string = "|".join(normalized_values) | |
| # Return the SHA256 hash of the string | |
| return hashlib.sha256(composite_string.encode()).hexdigest() | |