Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -6,12 +6,18 @@ import pandas as pd
|
|
| 6 |
import re
|
| 7 |
from datetime import datetime
|
| 8 |
from huggingface_hub import InferenceClient
|
| 9 |
-
|
| 10 |
-
|
| 11 |
-
|
| 12 |
-
from
|
| 13 |
-
from
|
| 14 |
-
import
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 15 |
|
| 16 |
# ============= EXTRAER TEXTO DEL PDF =============
|
| 17 |
def extraer_texto_pdf(pdf_file):
|
|
@@ -28,7 +34,7 @@ def extraer_texto_pdf(pdf_file):
|
|
| 28 |
def analizar_y_convertir_json(texto):
|
| 29 |
"""El LLM lee la factura, decide cómo estructurarla y devuelve JSON"""
|
| 30 |
|
| 31 |
-
token = os.getenv("
|
| 32 |
if not token:
|
| 33 |
return None, None, "Error: Falta configurar HF_TOKEN en Settings → Secrets"
|
| 34 |
|
|
@@ -234,10 +240,78 @@ def json_a_csv(datos_json):
|
|
| 234 |
|
| 235 |
return pd.DataFrame(filas)
|
| 236 |
|
| 237 |
-
# =============
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 238 |
def subir_a_drive(archivo_csv):
|
| 239 |
"""Sube el archivo CSV a Google Drive"""
|
| 240 |
|
|
|
|
|
|
|
|
|
|
| 241 |
SCOPES = ['https://www.googleapis.com/auth/drive.file']
|
| 242 |
creds = None
|
| 243 |
|
|
@@ -254,7 +328,7 @@ def subir_a_drive(archivo_csv):
|
|
| 254 |
else:
|
| 255 |
# Verificar si existe credentials.json
|
| 256 |
if not os.path.exists('credentials.json'):
|
| 257 |
-
return "Error: Falta el archivo credentials.json
|
| 258 |
|
| 259 |
flow = InstalledAppFlow.from_client_secrets_file('credentials.json', SCOPES)
|
| 260 |
creds = flow.run_local_server(port=0)
|
|
@@ -382,15 +456,44 @@ with gr.Blocks(title="Extractor IA de Facturas") as demo:
|
|
| 382 |
type="filepath"
|
| 383 |
)
|
| 384 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 385 |
gr.Markdown("")
|
| 386 |
|
| 387 |
# Checkbox para Google Drive
|
| 388 |
drive_checkbox = gr.Checkbox(
|
| 389 |
-
label="Guardar
|
| 390 |
value=False,
|
| 391 |
-
info="
|
| 392 |
)
|
| 393 |
|
|
|
|
|
|
|
| 394 |
gr.Markdown("")
|
| 395 |
|
| 396 |
btn = gr.Button(
|
|
@@ -407,9 +510,9 @@ with gr.Blocks(title="Extractor IA de Facturas") as demo:
|
|
| 407 |
|
| 408 |
gr.Markdown("")
|
| 409 |
|
| 410 |
-
# Estado de
|
| 411 |
-
|
| 412 |
-
label="Estado de
|
| 413 |
interactive=False,
|
| 414 |
lines=3
|
| 415 |
)
|
|
@@ -463,14 +566,33 @@ with gr.Blocks(title="Extractor IA de Facturas") as demo:
|
|
| 463 |
- *HF_TOKEN en Settings → Secrets (obligatorio)*
|
| 464 |
- *credentials.json de Google Cloud para usar Drive (opcional)*
|
| 465 |
|
| 466 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 467 |
""")
|
| 468 |
|
| 469 |
-
# Conectar
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 470 |
btn.click(
|
| 471 |
fn=procesar_factura,
|
| 472 |
inputs=[pdf_input, drive_checkbox],
|
| 473 |
-
outputs=[texto_extraido, tabla_preview, csv_output, resumen_tecnico, info_util,
|
| 474 |
)
|
| 475 |
|
| 476 |
if __name__ == "__main__":
|
|
|
|
| 6 |
import re
|
| 7 |
from datetime import datetime
|
| 8 |
from huggingface_hub import InferenceClient
|
| 9 |
+
|
| 10 |
+
# Importar Google Drive solo si está disponible
|
| 11 |
+
try:
|
| 12 |
+
from google.oauth2.credentials import Credentials
|
| 13 |
+
from google_auth_oauthlib.flow import InstalledAppFlow
|
| 14 |
+
from google.auth.transport.requests import Request
|
| 15 |
+
from googleapiclient.discovery import build
|
| 16 |
+
from googleapiclient.http import MediaFileUpload
|
| 17 |
+
import pickle
|
| 18 |
+
DRIVE_DISPONIBLE = True
|
| 19 |
+
except ImportError:
|
| 20 |
+
DRIVE_DISPONIBLE = False
|
| 21 |
|
| 22 |
# ============= EXTRAER TEXTO DEL PDF =============
|
| 23 |
def extraer_texto_pdf(pdf_file):
|
|
|
|
| 34 |
def analizar_y_convertir_json(texto):
|
| 35 |
"""El LLM lee la factura, decide cómo estructurarla y devuelve JSON"""
|
| 36 |
|
| 37 |
+
token = os.getenv("HF_TOKEN")
|
| 38 |
if not token:
|
| 39 |
return None, None, "Error: Falta configurar HF_TOKEN en Settings → Secrets"
|
| 40 |
|
|
|
|
| 240 |
|
| 241 |
return pd.DataFrame(filas)
|
| 242 |
|
| 243 |
+
# ============= AUTENTICAR GOOGLE DRIVE =============
|
| 244 |
+
def autenticar_drive():
|
| 245 |
+
"""Inicia el proceso de autenticación con Google Drive"""
|
| 246 |
+
|
| 247 |
+
if not DRIVE_DISPONIBLE:
|
| 248 |
+
return "Error: Librerías de Google Drive no instaladas.\n\nAgrega al requirements.txt:\ngoogle-auth-oauthlib\ngoogle-auth-httplib2\ngoogle-api-python-client", False
|
| 249 |
+
|
| 250 |
+
SCOPES = ['https://www.googleapis.com/auth/drive.file']
|
| 251 |
+
|
| 252 |
+
try:
|
| 253 |
+
# Verificar si existe credentials.json
|
| 254 |
+
if not os.path.exists('credentials.json'):
|
| 255 |
+
return "Error: Falta el archivo credentials.json.\n\nPasos:\n1. Ve a https://console.cloud.google.com/\n2. Crea un proyecto\n3. Activa Google Drive API\n4. Crea credenciales OAuth 2.0\n5. Descarga credentials.json\n6. Súbelo a tu aplicación", False
|
| 256 |
+
|
| 257 |
+
# Verificar si ya hay una sesión activa
|
| 258 |
+
if os.path.exists('token.pickle'):
|
| 259 |
+
with open('token.pickle', 'rb') as token:
|
| 260 |
+
creds = pickle.load(token)
|
| 261 |
+
if creds and creds.valid:
|
| 262 |
+
return "Ya estás conectado a Google Drive", True
|
| 263 |
+
|
| 264 |
+
# Iniciar flujo de autenticación
|
| 265 |
+
flow = InstalledAppFlow.from_client_secrets_file('credentials.json', SCOPES)
|
| 266 |
+
creds = flow.run_local_server(port=8080)
|
| 267 |
+
|
| 268 |
+
# Guardar credenciales
|
| 269 |
+
with open('token.pickle', 'wb') as token:
|
| 270 |
+
pickle.dump(creds, token)
|
| 271 |
+
|
| 272 |
+
return "Autenticación exitosa! Ahora puedes guardar archivos en Google Drive", True
|
| 273 |
+
|
| 274 |
+
except Exception as e:
|
| 275 |
+
return f"Error en la autenticación: {str(e)}", False
|
| 276 |
+
|
| 277 |
+
# ============= VERIFICAR ESTADO DE DRIVE =============
|
| 278 |
+
def verificar_sesion_drive():
|
| 279 |
+
"""Verifica si hay una sesión activa de Google Drive"""
|
| 280 |
+
|
| 281 |
+
if not DRIVE_DISPONIBLE:
|
| 282 |
+
return "Librerías no instaladas", False
|
| 283 |
+
|
| 284 |
+
if not os.path.exists('token.pickle'):
|
| 285 |
+
return "No conectado", False
|
| 286 |
+
|
| 287 |
+
try:
|
| 288 |
+
with open('token.pickle', 'rb') as token:
|
| 289 |
+
creds = pickle.load(token)
|
| 290 |
+
if creds and creds.valid:
|
| 291 |
+
return "Conectado a Google Drive", True
|
| 292 |
+
else:
|
| 293 |
+
return "Sesión expirada", False
|
| 294 |
+
except:
|
| 295 |
+
return "Error al verificar sesión", False
|
| 296 |
+
|
| 297 |
+
# ============= CERRAR SESIÓN DE DRIVE =============
|
| 298 |
+
def cerrar_sesion_drive():
|
| 299 |
+
"""Cierra la sesión de Google Drive"""
|
| 300 |
+
|
| 301 |
+
try:
|
| 302 |
+
if os.path.exists('token.pickle'):
|
| 303 |
+
os.remove('token.pickle')
|
| 304 |
+
return "Sesión cerrada correctamente", False
|
| 305 |
+
else:
|
| 306 |
+
return "No había sesión activa", False
|
| 307 |
+
except Exception as e:
|
| 308 |
+
return f"Error al cerrar sesión: {str(e)}", False
|
| 309 |
def subir_a_drive(archivo_csv):
|
| 310 |
"""Sube el archivo CSV a Google Drive"""
|
| 311 |
|
| 312 |
+
if not DRIVE_DISPONIBLE:
|
| 313 |
+
return "Error: Librerías de Google Drive no instaladas.\n\nAgrega al requirements.txt:\ngoogle-auth-oauthlib\ngoogle-auth-httplib2\ngoogle-api-python-client"
|
| 314 |
+
|
| 315 |
SCOPES = ['https://www.googleapis.com/auth/drive.file']
|
| 316 |
creds = None
|
| 317 |
|
|
|
|
| 328 |
else:
|
| 329 |
# Verificar si existe credentials.json
|
| 330 |
if not os.path.exists('credentials.json'):
|
| 331 |
+
return "Error: Falta el archivo credentials.json.\n\nDescárgalo desde Google Cloud Console:\nhttps://console.cloud.google.com/"
|
| 332 |
|
| 333 |
flow = InstalledAppFlow.from_client_secrets_file('credentials.json', SCOPES)
|
| 334 |
creds = flow.run_local_server(port=0)
|
|
|
|
| 456 |
type="filepath"
|
| 457 |
)
|
| 458 |
|
| 459 |
+
gr.Markdown("")
|
| 460 |
+
gr.Markdown("---")
|
| 461 |
+
gr.Markdown("")
|
| 462 |
+
|
| 463 |
+
# Sección de Google Drive
|
| 464 |
+
gr.Markdown("### Google Drive")
|
| 465 |
+
|
| 466 |
+
with gr.Row():
|
| 467 |
+
btn_conectar_drive = gr.Button(
|
| 468 |
+
"Conectar con Google Drive",
|
| 469 |
+
size="sm",
|
| 470 |
+
variant="secondary"
|
| 471 |
+
)
|
| 472 |
+
btn_cerrar_sesion = gr.Button(
|
| 473 |
+
"Cerrar Sesión",
|
| 474 |
+
size="sm"
|
| 475 |
+
)
|
| 476 |
+
|
| 477 |
+
gr.Markdown("")
|
| 478 |
+
|
| 479 |
+
drive_status_auth = gr.Textbox(
|
| 480 |
+
label="Estado de conexión",
|
| 481 |
+
value="No conectado",
|
| 482 |
+
interactive=False,
|
| 483 |
+
lines=2
|
| 484 |
+
)
|
| 485 |
+
|
| 486 |
gr.Markdown("")
|
| 487 |
|
| 488 |
# Checkbox para Google Drive
|
| 489 |
drive_checkbox = gr.Checkbox(
|
| 490 |
+
label="Guardar en Google Drive",
|
| 491 |
value=False,
|
| 492 |
+
info="Primero debes conectar tu cuenta"
|
| 493 |
)
|
| 494 |
|
| 495 |
+
gr.Markdown("")
|
| 496 |
+
gr.Markdown("---")
|
| 497 |
gr.Markdown("")
|
| 498 |
|
| 499 |
btn = gr.Button(
|
|
|
|
| 510 |
|
| 511 |
gr.Markdown("")
|
| 512 |
|
| 513 |
+
# Estado de subida a Drive
|
| 514 |
+
drive_upload_status = gr.Textbox(
|
| 515 |
+
label="Estado de subida a Drive",
|
| 516 |
interactive=False,
|
| 517 |
lines=3
|
| 518 |
)
|
|
|
|
| 566 |
- *HF_TOKEN en Settings → Secrets (obligatorio)*
|
| 567 |
- *credentials.json de Google Cloud para usar Drive (opcional)*
|
| 568 |
|
| 569 |
+
**Pasos para configurar Google Drive:**
|
| 570 |
+
1. Ve a [Google Cloud Console](https://console.cloud.google.com/)
|
| 571 |
+
2. Crea un proyecto y activa Google Drive API
|
| 572 |
+
3. Crea credenciales OAuth 2.0 (Tipo: Aplicación de escritorio)
|
| 573 |
+
4. Descarga el archivo credentials.json
|
| 574 |
+
5. Súbelo al directorio raíz de tu aplicación
|
| 575 |
+
6. Haz clic en "Conectar con Google Drive"
|
| 576 |
""")
|
| 577 |
|
| 578 |
+
# Conectar botones
|
| 579 |
+
btn_conectar_drive.click(
|
| 580 |
+
fn=autenticar_drive,
|
| 581 |
+
inputs=[],
|
| 582 |
+
outputs=[drive_status_auth, drive_checkbox]
|
| 583 |
+
)
|
| 584 |
+
|
| 585 |
+
btn_cerrar_sesion.click(
|
| 586 |
+
fn=cerrar_sesion_drive,
|
| 587 |
+
inputs=[],
|
| 588 |
+
outputs=[drive_status_auth, drive_checkbox]
|
| 589 |
+
)
|
| 590 |
+
|
| 591 |
+
# Conectar botón principal
|
| 592 |
btn.click(
|
| 593 |
fn=procesar_factura,
|
| 594 |
inputs=[pdf_input, drive_checkbox],
|
| 595 |
+
outputs=[texto_extraido, tabla_preview, csv_output, resumen_tecnico, info_util, drive_upload_status]
|
| 596 |
)
|
| 597 |
|
| 598 |
if __name__ == "__main__":
|