ChintanSatva commited on
Commit
e8f4cc9
·
verified ·
1 Parent(s): 78317c1

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +69 -28
app.py CHANGED
@@ -1,6 +1,6 @@
1
  from fastapi import FastAPI, File, UploadFile, HTTPException
2
  # Import Decimal and the custom JSONResponse
3
- from decimal import Decimal
4
  from fastapi.encoders import jsonable_encoder
5
  from starlette.responses import JSONResponse
6
  import pytesseract
@@ -47,35 +47,71 @@ def convert_scientific_decimals(obj):
47
  elif isinstance(obj, list):
48
  return [convert_scientific_decimals(item) for item in obj]
49
  elif isinstance(obj, Decimal):
50
- # Check if the decimal is in scientific notation by converting to string
51
- decimal_str = str(obj)
52
- if 'E' in decimal_str or 'e' in decimal_str:
53
- # Convert to float first to get the actual numeric value, then back to Decimal
54
- # This forces the decimal representation
55
  float_val = float(obj)
56
- # Use high precision string formatting to avoid float precision issues
57
- if abs(float_val) < 1e-10: # Very small numbers
58
- formatted = f"{float_val:.15f}"
59
- elif abs(float_val) < 1e-6: # Small numbers
60
- formatted = f"{float_val:.12f}"
61
- elif abs(float_val) < 1: # Numbers less than 1
62
- formatted = f"{float_val:.10f}"
63
- else: # Larger numbers
64
- formatted = f"{float_val:.8f}"
65
 
66
- # Remove trailing zeros and decimal point if not needed
67
- formatted = formatted.rstrip('0').rstrip('.')
68
- if '.' not in formatted and float_val != int(float_val):
69
- # If we removed all decimal places but it's not actually an integer
70
- # This handles edge cases
71
- formatted = f"{float_val:.1f}".rstrip('0').rstrip('.')
 
 
 
 
 
 
 
 
 
 
 
 
 
72
 
 
 
 
 
73
  return Decimal(formatted)
74
- else:
 
75
  return obj
76
  else:
77
  return obj
78
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
79
  # --- END OF MODIFICATIONS ---
80
 
81
 
@@ -176,12 +212,13 @@ async def process_with_gemini(filename: str, raw_text: str):
176
  - If a field is missing or not found, return an empty value (`""` or `0`) and set `accuracy` to `0.0`.
177
  - Convert any date found in format: YYYY-MM-DD
178
 
179
- CRITICAL: ALL numeric values must be in full decimal notation. NEVER use scientific notation:
180
  - CORRECT: 0.0000009, 0.00000015, 0.0000002, 1500000, 0.00123
181
- - INCORRECT: 9e-7, 9E-7, 1.5e-7, 1.5E-7, 2e-7, 2E-7, 1.5e+6, 1.23e-3
182
- - For very small numbers like 0.0000009, write out all the zeros
183
- - For large numbers like 1500000, write out all the digits
184
- - This applies to unit_price, total_price, tax_amount, discount, net_amount, sub_total, tax_total, discount_total, total_amount
 
185
 
186
  Raw text:
187
  {raw_text}
@@ -261,8 +298,9 @@ Output JSON:
261
  # object inside Python, instead of a standard float.
262
  structured_data = json.loads(json_str, parse_float=Decimal)
263
 
264
- # CRITICAL: Convert any scientific notation Decimals to decimal format
265
  structured_data = convert_scientific_decimals(structured_data)
 
266
  # --- END OF MODIFICATIONS ---
267
 
268
  structured_data_cache[text_hash] = structured_data
@@ -396,6 +434,9 @@ async def extract_and_structure(files: List[UploadFile] = File(...)):
396
  # 3. Use the custom encoder when returning the final JSON response.
397
  # This ensures the Decimal objects are converted to strings correctly.
398
  # The default JSONResponse will not handle Decimal types properly.
 
 
 
399
  encoded_data = custom_encoder(output_data)
400
  return JSONResponse(content=encoded_data)
401
  # --- END OF MODIFICATIONS ---
 
1
  from fastapi import FastAPI, File, UploadFile, HTTPException
2
  # Import Decimal and the custom JSONResponse
3
+ from decimal import Decimal, InvalidOperation
4
  from fastapi.encoders import jsonable_encoder
5
  from starlette.responses import JSONResponse
6
  import pytesseract
 
47
  elif isinstance(obj, list):
48
  return [convert_scientific_decimals(item) for item in obj]
49
  elif isinstance(obj, Decimal):
50
+ # Always convert to proper decimal format, regardless of current format
51
+ try:
52
+ # Convert to float to get actual numeric value
 
 
53
  float_val = float(obj)
 
 
 
 
 
 
 
 
 
54
 
55
+ # Handle special cases
56
+ if float_val == 0:
57
+ return Decimal('0')
58
+
59
+ # For very small positive numbers, use high precision
60
+ if 0 < abs(float_val) < 1e-10:
61
+ formatted = f"{float_val:.20f}".rstrip('0').rstrip('.')
62
+ elif 0 < abs(float_val) < 1e-6:
63
+ formatted = f"{float_val:.15f}".rstrip('0').rstrip('.')
64
+ elif 0 < abs(float_val) < 1:
65
+ formatted = f"{float_val:.10f}".rstrip('0').rstrip('.')
66
+ elif abs(float_val) < 1000000:
67
+ formatted = f"{float_val:.8f}".rstrip('0').rstrip('.')
68
+ else:
69
+ # For large numbers, don't use decimal places if they're all zeros
70
+ if float_val == int(float_val):
71
+ formatted = str(int(float_val))
72
+ else:
73
+ formatted = f"{float_val:.2f}".rstrip('0').rstrip('.')
74
 
75
+ # Ensure we have at least some decimal representation for very small numbers
76
+ if formatted == '0' and float_val != 0:
77
+ formatted = f"{float_val:.20f}".rstrip('0').rstrip('.')
78
+
79
  return Decimal(formatted)
80
+ except (ValueError, OverflowError, InvalidOperation):
81
+ # If conversion fails, return the original
82
  return obj
83
  else:
84
  return obj
85
 
86
+ def force_decimal_format(data):
87
+ """
88
+ Additional layer to ensure all numeric values are in proper decimal format.
89
+ This is applied right before JSON encoding.
90
+ """
91
+ if isinstance(data, dict):
92
+ result = {}
93
+ for key, value in data.items():
94
+ if key in ['unit_price', 'total_price', 'tax_amount', 'discount', 'net_amount',
95
+ 'sub_total', 'tax_total', 'discount_total', 'total_amount', 'tax_rate'] and isinstance(value, dict) and 'value' in value:
96
+ # Special handling for numeric fields
97
+ if isinstance(value['value'], (Decimal, float, int)) and value['value'] != 0:
98
+ try:
99
+ float_val = float(value['value'])
100
+ if 0 < abs(float_val) < 1e-6:
101
+ # Very small numbers - use high precision
102
+ decimal_str = f"{float_val:.15f}".rstrip('0').rstrip('.')
103
+ else:
104
+ decimal_str = f"{float_val:.10f}".rstrip('0').rstrip('.')
105
+ value['value'] = Decimal(decimal_str)
106
+ except:
107
+ pass
108
+ result[key] = force_decimal_format(value)
109
+ return result
110
+ elif isinstance(data, list):
111
+ return [force_decimal_format(item) for item in data]
112
+ else:
113
+ return data
114
+
115
  # --- END OF MODIFICATIONS ---
116
 
117
 
 
212
  - If a field is missing or not found, return an empty value (`""` or `0`) and set `accuracy` to `0.0`.
213
  - Convert any date found in format: YYYY-MM-DD
214
 
215
+ CRITICAL: ALL numeric values must be in full decimal notation. NEVER EVER use scientific notation or exponential format:
216
  - CORRECT: 0.0000009, 0.00000015, 0.0000002, 1500000, 0.00123
217
+ - ABSOLUTELY FORBIDDEN: 9e-7, 9E-7, 1.5e-7, 1.5E-7, 2e-7, 2E-7, 1.5e+6, 1.23e-3, any number with 'e' or 'E'
218
+ - For very small numbers like 0.0000009, you MUST write out all the zeros: 0.0000009
219
+ - For large numbers like 1500000, you MUST write out all the digits: 1500000
220
+ - This is MANDATORY for: unit_price, total_price, tax_amount, discount, net_amount, sub_total, tax_total, discount_total, total_amount
221
+ - Example: if unit price is 9 * 10^-7, write it as 0.0000009, NOT 9e-7 or 9E-7
222
 
223
  Raw text:
224
  {raw_text}
 
298
  # object inside Python, instead of a standard float.
299
  structured_data = json.loads(json_str, parse_float=Decimal)
300
 
301
+ # CRITICAL: Multiple layers of conversion to ensure proper decimal format
302
  structured_data = convert_scientific_decimals(structured_data)
303
+ structured_data = force_decimal_format(structured_data)
304
  # --- END OF MODIFICATIONS ---
305
 
306
  structured_data_cache[text_hash] = structured_data
 
434
  # 3. Use the custom encoder when returning the final JSON response.
435
  # This ensures the Decimal objects are converted to strings correctly.
436
  # The default JSONResponse will not handle Decimal types properly.
437
+
438
+ # Apply final decimal format enforcement before encoding
439
+ output_data = force_decimal_format(output_data)
440
  encoded_data = custom_encoder(output_data)
441
  return JSONResponse(content=encoded_data)
442
  # --- END OF MODIFICATIONS ---