Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -1,6 +1,6 @@
|
|
| 1 |
from fastapi import FastAPI, File, UploadFile, HTTPException
|
| 2 |
# Import Decimal and the custom JSONResponse
|
| 3 |
-
from decimal import Decimal
|
| 4 |
from fastapi.encoders import jsonable_encoder
|
| 5 |
from starlette.responses import JSONResponse
|
| 6 |
import pytesseract
|
|
@@ -47,35 +47,71 @@ def convert_scientific_decimals(obj):
|
|
| 47 |
elif isinstance(obj, list):
|
| 48 |
return [convert_scientific_decimals(item) for item in obj]
|
| 49 |
elif isinstance(obj, Decimal):
|
| 50 |
-
#
|
| 51 |
-
|
| 52 |
-
|
| 53 |
-
# Convert to float first to get the actual numeric value, then back to Decimal
|
| 54 |
-
# This forces the decimal representation
|
| 55 |
float_val = float(obj)
|
| 56 |
-
# Use high precision string formatting to avoid float precision issues
|
| 57 |
-
if abs(float_val) < 1e-10: # Very small numbers
|
| 58 |
-
formatted = f"{float_val:.15f}"
|
| 59 |
-
elif abs(float_val) < 1e-6: # Small numbers
|
| 60 |
-
formatted = f"{float_val:.12f}"
|
| 61 |
-
elif abs(float_val) < 1: # Numbers less than 1
|
| 62 |
-
formatted = f"{float_val:.10f}"
|
| 63 |
-
else: # Larger numbers
|
| 64 |
-
formatted = f"{float_val:.8f}"
|
| 65 |
|
| 66 |
-
#
|
| 67 |
-
|
| 68 |
-
|
| 69 |
-
|
| 70 |
-
|
| 71 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 72 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 73 |
return Decimal(formatted)
|
| 74 |
-
|
|
|
|
| 75 |
return obj
|
| 76 |
else:
|
| 77 |
return obj
|
| 78 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 79 |
# --- END OF MODIFICATIONS ---
|
| 80 |
|
| 81 |
|
|
@@ -176,12 +212,13 @@ async def process_with_gemini(filename: str, raw_text: str):
|
|
| 176 |
- If a field is missing or not found, return an empty value (`""` or `0`) and set `accuracy` to `0.0`.
|
| 177 |
- Convert any date found in format: YYYY-MM-DD
|
| 178 |
|
| 179 |
-
CRITICAL: ALL numeric values must be in full decimal notation. NEVER use scientific notation:
|
| 180 |
- CORRECT: 0.0000009, 0.00000015, 0.0000002, 1500000, 0.00123
|
| 181 |
-
-
|
| 182 |
-
- For very small numbers like 0.0000009, write out all the zeros
|
| 183 |
-
- For large numbers like 1500000, write out all the digits
|
| 184 |
-
- This
|
|
|
|
| 185 |
|
| 186 |
Raw text:
|
| 187 |
{raw_text}
|
|
@@ -261,8 +298,9 @@ Output JSON:
|
|
| 261 |
# object inside Python, instead of a standard float.
|
| 262 |
structured_data = json.loads(json_str, parse_float=Decimal)
|
| 263 |
|
| 264 |
-
# CRITICAL:
|
| 265 |
structured_data = convert_scientific_decimals(structured_data)
|
|
|
|
| 266 |
# --- END OF MODIFICATIONS ---
|
| 267 |
|
| 268 |
structured_data_cache[text_hash] = structured_data
|
|
@@ -396,6 +434,9 @@ async def extract_and_structure(files: List[UploadFile] = File(...)):
|
|
| 396 |
# 3. Use the custom encoder when returning the final JSON response.
|
| 397 |
# This ensures the Decimal objects are converted to strings correctly.
|
| 398 |
# The default JSONResponse will not handle Decimal types properly.
|
|
|
|
|
|
|
|
|
|
| 399 |
encoded_data = custom_encoder(output_data)
|
| 400 |
return JSONResponse(content=encoded_data)
|
| 401 |
# --- END OF MODIFICATIONS ---
|
|
|
|
| 1 |
from fastapi import FastAPI, File, UploadFile, HTTPException
|
| 2 |
# Import Decimal and the custom JSONResponse
|
| 3 |
+
from decimal import Decimal, InvalidOperation
|
| 4 |
from fastapi.encoders import jsonable_encoder
|
| 5 |
from starlette.responses import JSONResponse
|
| 6 |
import pytesseract
|
|
|
|
| 47 |
elif isinstance(obj, list):
|
| 48 |
return [convert_scientific_decimals(item) for item in obj]
|
| 49 |
elif isinstance(obj, Decimal):
|
| 50 |
+
# Always convert to proper decimal format, regardless of current format
|
| 51 |
+
try:
|
| 52 |
+
# Convert to float to get actual numeric value
|
|
|
|
|
|
|
| 53 |
float_val = float(obj)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 54 |
|
| 55 |
+
# Handle special cases
|
| 56 |
+
if float_val == 0:
|
| 57 |
+
return Decimal('0')
|
| 58 |
+
|
| 59 |
+
# For very small positive numbers, use high precision
|
| 60 |
+
if 0 < abs(float_val) < 1e-10:
|
| 61 |
+
formatted = f"{float_val:.20f}".rstrip('0').rstrip('.')
|
| 62 |
+
elif 0 < abs(float_val) < 1e-6:
|
| 63 |
+
formatted = f"{float_val:.15f}".rstrip('0').rstrip('.')
|
| 64 |
+
elif 0 < abs(float_val) < 1:
|
| 65 |
+
formatted = f"{float_val:.10f}".rstrip('0').rstrip('.')
|
| 66 |
+
elif abs(float_val) < 1000000:
|
| 67 |
+
formatted = f"{float_val:.8f}".rstrip('0').rstrip('.')
|
| 68 |
+
else:
|
| 69 |
+
# For large numbers, don't use decimal places if they're all zeros
|
| 70 |
+
if float_val == int(float_val):
|
| 71 |
+
formatted = str(int(float_val))
|
| 72 |
+
else:
|
| 73 |
+
formatted = f"{float_val:.2f}".rstrip('0').rstrip('.')
|
| 74 |
|
| 75 |
+
# Ensure we have at least some decimal representation for very small numbers
|
| 76 |
+
if formatted == '0' and float_val != 0:
|
| 77 |
+
formatted = f"{float_val:.20f}".rstrip('0').rstrip('.')
|
| 78 |
+
|
| 79 |
return Decimal(formatted)
|
| 80 |
+
except (ValueError, OverflowError, InvalidOperation):
|
| 81 |
+
# If conversion fails, return the original
|
| 82 |
return obj
|
| 83 |
else:
|
| 84 |
return obj
|
| 85 |
|
| 86 |
+
def force_decimal_format(data):
|
| 87 |
+
"""
|
| 88 |
+
Additional layer to ensure all numeric values are in proper decimal format.
|
| 89 |
+
This is applied right before JSON encoding.
|
| 90 |
+
"""
|
| 91 |
+
if isinstance(data, dict):
|
| 92 |
+
result = {}
|
| 93 |
+
for key, value in data.items():
|
| 94 |
+
if key in ['unit_price', 'total_price', 'tax_amount', 'discount', 'net_amount',
|
| 95 |
+
'sub_total', 'tax_total', 'discount_total', 'total_amount', 'tax_rate'] and isinstance(value, dict) and 'value' in value:
|
| 96 |
+
# Special handling for numeric fields
|
| 97 |
+
if isinstance(value['value'], (Decimal, float, int)) and value['value'] != 0:
|
| 98 |
+
try:
|
| 99 |
+
float_val = float(value['value'])
|
| 100 |
+
if 0 < abs(float_val) < 1e-6:
|
| 101 |
+
# Very small numbers - use high precision
|
| 102 |
+
decimal_str = f"{float_val:.15f}".rstrip('0').rstrip('.')
|
| 103 |
+
else:
|
| 104 |
+
decimal_str = f"{float_val:.10f}".rstrip('0').rstrip('.')
|
| 105 |
+
value['value'] = Decimal(decimal_str)
|
| 106 |
+
except:
|
| 107 |
+
pass
|
| 108 |
+
result[key] = force_decimal_format(value)
|
| 109 |
+
return result
|
| 110 |
+
elif isinstance(data, list):
|
| 111 |
+
return [force_decimal_format(item) for item in data]
|
| 112 |
+
else:
|
| 113 |
+
return data
|
| 114 |
+
|
| 115 |
# --- END OF MODIFICATIONS ---
|
| 116 |
|
| 117 |
|
|
|
|
| 212 |
- If a field is missing or not found, return an empty value (`""` or `0`) and set `accuracy` to `0.0`.
|
| 213 |
- Convert any date found in format: YYYY-MM-DD
|
| 214 |
|
| 215 |
+
CRITICAL: ALL numeric values must be in full decimal notation. NEVER EVER use scientific notation or exponential format:
|
| 216 |
- CORRECT: 0.0000009, 0.00000015, 0.0000002, 1500000, 0.00123
|
| 217 |
+
- ABSOLUTELY FORBIDDEN: 9e-7, 9E-7, 1.5e-7, 1.5E-7, 2e-7, 2E-7, 1.5e+6, 1.23e-3, any number with 'e' or 'E'
|
| 218 |
+
- For very small numbers like 0.0000009, you MUST write out all the zeros: 0.0000009
|
| 219 |
+
- For large numbers like 1500000, you MUST write out all the digits: 1500000
|
| 220 |
+
- This is MANDATORY for: unit_price, total_price, tax_amount, discount, net_amount, sub_total, tax_total, discount_total, total_amount
|
| 221 |
+
- Example: if unit price is 9 * 10^-7, write it as 0.0000009, NOT 9e-7 or 9E-7
|
| 222 |
|
| 223 |
Raw text:
|
| 224 |
{raw_text}
|
|
|
|
| 298 |
# object inside Python, instead of a standard float.
|
| 299 |
structured_data = json.loads(json_str, parse_float=Decimal)
|
| 300 |
|
| 301 |
+
# CRITICAL: Multiple layers of conversion to ensure proper decimal format
|
| 302 |
structured_data = convert_scientific_decimals(structured_data)
|
| 303 |
+
structured_data = force_decimal_format(structured_data)
|
| 304 |
# --- END OF MODIFICATIONS ---
|
| 305 |
|
| 306 |
structured_data_cache[text_hash] = structured_data
|
|
|
|
| 434 |
# 3. Use the custom encoder when returning the final JSON response.
|
| 435 |
# This ensures the Decimal objects are converted to strings correctly.
|
| 436 |
# The default JSONResponse will not handle Decimal types properly.
|
| 437 |
+
|
| 438 |
+
# Apply final decimal format enforcement before encoding
|
| 439 |
+
output_data = force_decimal_format(output_data)
|
| 440 |
encoded_data = custom_encoder(output_data)
|
| 441 |
return JSONResponse(content=encoded_data)
|
| 442 |
# --- END OF MODIFICATIONS ---
|