Spaces:
Sleeping
Sleeping
Commit
·
0afb73d
1
Parent(s):
10fa36b
Corrección de el código dentro de process_invoice todavía está tratando de concatenar con datos existentes
Browse files- commercial_invoice.py +41 -27
- test.py +1 -1
commercial_invoice.py
CHANGED
|
@@ -245,26 +245,23 @@ def process_invoice(image_path, coordinates_json, margin=5):
|
|
| 245 |
products_data.append(product_data)
|
| 246 |
logger.debug(f"Línea de producto procesada: {product_data}")
|
| 247 |
|
| 248 |
-
#
|
| 249 |
if products_data:
|
| 250 |
-
|
| 251 |
csv_path = os.path.join("data", "productos_por_factura.csv")
|
| 252 |
|
| 253 |
try:
|
| 254 |
-
#
|
| 255 |
-
|
| 256 |
-
|
| 257 |
-
|
| 258 |
-
|
| 259 |
-
|
| 260 |
-
|
| 261 |
except Exception as e:
|
| 262 |
-
logger.error(f"Error al
|
| 263 |
-
# Si hay error
|
| 264 |
-
|
| 265 |
-
# Guardar DataFrame actualizado
|
| 266 |
-
products_df.to_csv(csv_path, index=False)
|
| 267 |
-
logger.info(f"Productos agregados al archivo: {csv_path}")
|
| 268 |
|
| 269 |
return extracted_fields
|
| 270 |
|
|
@@ -272,22 +269,39 @@ def process_invoice(image_path, coordinates_json, margin=5):
|
|
| 272 |
logger.error(f"Error procesando factura {image_path}: {str(e)}")
|
| 273 |
return {'filename': os.path.basename(image_path)}
|
| 274 |
|
| 275 |
-
def process_invoice_batch(image_paths, coordinates_json
|
| 276 |
-
"""Procesa un lote de facturas"""
|
| 277 |
logger.info(f"Iniciando procesamiento de {len(image_paths)} facturas")
|
| 278 |
|
| 279 |
-
|
| 280 |
-
|
| 281 |
-
|
| 282 |
-
|
| 283 |
-
|
| 284 |
-
|
| 285 |
|
| 286 |
-
#
|
| 287 |
-
|
| 288 |
-
|
| 289 |
-
|
|
|
|
|
|
|
|
|
|
| 290 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 291 |
return df
|
| 292 |
|
| 293 |
def main(invoice_dir="./invoices", data_dir="./data", coordinates_json="./coordinates_CI.json"):
|
|
|
|
| 245 |
products_data.append(product_data)
|
| 246 |
logger.debug(f"Línea de producto procesada: {product_data}")
|
| 247 |
|
| 248 |
+
# Guardar productos si hay datos nuevos
|
| 249 |
if products_data:
|
| 250 |
+
new_products_df = pd.DataFrame(products_data)
|
| 251 |
csv_path = os.path.join("data", "productos_por_factura.csv")
|
| 252 |
|
| 253 |
try:
|
| 254 |
+
# Leer archivo existente
|
| 255 |
+
existing_df = pd.read_csv(csv_path)
|
| 256 |
+
# Agregar nuevos datos
|
| 257 |
+
updated_df = pd.concat([existing_df, new_products_df], ignore_index=True)
|
| 258 |
+
# Guardar todo en el archivo
|
| 259 |
+
updated_df.to_csv(csv_path, index=False)
|
| 260 |
+
logger.info(f"Productos agregados al archivo: {csv_path}")
|
| 261 |
except Exception as e:
|
| 262 |
+
logger.error(f"Error al actualizar archivo de productos: {str(e)}")
|
| 263 |
+
# Si hay error, intentar guardar solo los nuevos datos
|
| 264 |
+
new_products_df.to_csv(csv_path, mode='a', header=False, index=False)
|
|
|
|
|
|
|
|
|
|
| 265 |
|
| 266 |
return extracted_fields
|
| 267 |
|
|
|
|
| 269 |
logger.error(f"Error procesando factura {image_path}: {str(e)}")
|
| 270 |
return {'filename': os.path.basename(image_path)}
|
| 271 |
|
| 272 |
+
def process_invoice_batch(image_paths, coordinates_json):
|
| 273 |
+
"""Procesa un lote de imágenes de facturas"""
|
| 274 |
logger.info(f"Iniciando procesamiento de {len(image_paths)} facturas")
|
| 275 |
|
| 276 |
+
# Inicializar DataFrames vacíos al inicio del proceso por lotes
|
| 277 |
+
products_df = pd.DataFrame(columns=[
|
| 278 |
+
"invoice_number", "Boxes", "Pieces", "Product_desc",
|
| 279 |
+
"Tariff_number", "Stems", "Unit_price", "Extended_price"
|
| 280 |
+
])
|
| 281 |
+
facturas_df = pd.DataFrame()
|
| 282 |
|
| 283 |
+
# Crear archivos vacíos al inicio del proceso
|
| 284 |
+
output_dir = "data"
|
| 285 |
+
os.makedirs(output_dir, exist_ok=True)
|
| 286 |
+
products_csv_path = os.path.join(output_dir, "productos_por_factura.csv")
|
| 287 |
+
facturas_csv_path = os.path.join(output_dir, "facturas_procesadas.csv")
|
| 288 |
+
products_df.to_csv(products_csv_path, index=False)
|
| 289 |
+
facturas_df.to_csv(facturas_csv_path, index=False)
|
| 290 |
|
| 291 |
+
results = []
|
| 292 |
+
for image_path in image_paths:
|
| 293 |
+
try:
|
| 294 |
+
result = process_invoice(image_path, coordinates_json)
|
| 295 |
+
if result:
|
| 296 |
+
results.append(result)
|
| 297 |
+
except Exception as e:
|
| 298 |
+
logger.error(f"Error procesando {image_path}: {str(e)}")
|
| 299 |
+
results.append({'filename': os.path.basename(image_path)})
|
| 300 |
+
|
| 301 |
+
df = pd.DataFrame(results)
|
| 302 |
+
if not df.empty:
|
| 303 |
+
df.to_csv(facturas_csv_path, index=False)
|
| 304 |
+
logger.info(f"Resultados guardados en: {facturas_csv_path}")
|
| 305 |
return df
|
| 306 |
|
| 307 |
def main(invoice_dir="./invoices", data_dir="./data", coordinates_json="./coordinates_CI.json"):
|
test.py
CHANGED
|
@@ -2,7 +2,7 @@ import os
|
|
| 2 |
from commercial_invoice import process_invoice
|
| 3 |
|
| 4 |
def test_single_invoice():
|
| 5 |
-
invoice_path = os.path.join("invoices", "
|
| 6 |
coordinates_json = "coordinates_CI.json"
|
| 7 |
|
| 8 |
print("Iniciando prueba con factura individual...")
|
|
|
|
| 2 |
from commercial_invoice import process_invoice
|
| 3 |
|
| 4 |
def test_single_invoice():
|
| 5 |
+
invoice_path = os.path.join("invoices", "pagina_9.jpg")
|
| 6 |
coordinates_json = "coordinates_CI.json"
|
| 7 |
|
| 8 |
print("Iniciando prueba con factura individual...")
|