import io import json import os import tempfile from pathlib import Path import pandas as pd import plotly.express as px import streamlit as st from dotenv import load_dotenv from extractor import Invoice, extract_invoice_data load_dotenv() GROQ_API_KEY = os.getenv("GROQ_API_KEY", "") GROQ_MODEL = os.getenv("GROQ_MODEL", "llama-3.1-8b-instant") SAMPLE_DATA = Path(__file__).parent / "sample_data.json" st.set_page_config(page_title="Procesador de Facturas IA", page_icon="🧾", layout="wide") st.title("🧾 Procesador de Facturas con IA") st.caption("Sube tus facturas en PDF y obtΓ©n un dashboard completo en segundos.") # ── Sidebar ────────────────────────────────────────────────────────────────── with st.sidebar: st.header("βš™οΈ ConfiguraciΓ³n") model = st.selectbox("Modelo Groq", ["llama-3.1-8b-instant", "llama-3.3-70b-versatile"], index=0) st.divider() uploaded_files = st.file_uploader( "πŸ“‚ Sube tus facturas (PDF)", type=["pdf"], accept_multiple_files=True, ) process_btn = st.button("πŸš€ Procesar Facturas", use_container_width=True, type="primary", disabled=not uploaded_files) st.divider() if st.button("πŸ”„ Cargar datos de ejemplo", use_container_width=True): st.session_state.pop("invoices_df", None) st.session_state["use_sample"] = True st.rerun() # ── Load sample data on first visit ────────────────────────────────────────── if "invoices_df" not in st.session_state and not uploaded_files: if SAMPLE_DATA.exists(): records = json.loads(SAMPLE_DATA.read_text(encoding="utf-8")) st.session_state["invoices_df"] = pd.DataFrame(records) st.session_state["is_sample"] = True # ── Process uploaded files ──────────────────────────────────────────────────── if process_btn and uploaded_files: if not GROQ_API_KEY: st.error("No se encontrΓ³ GROQ_API_KEY. AΓ±Γ‘dela al fichero .env", icon="πŸ”‘") st.stop() from groq import Groq client = Groq(api_key=GROQ_API_KEY) invoices: list[Invoice] = [] progress = st.progress(0, text="Procesando facturas...") for i, uploaded_file in enumerate(uploaded_files): progress.progress((i + 1) / len(uploaded_files), text=f"Procesando {uploaded_file.name}...") tmp_path = Path(tempfile.gettempdir()) / uploaded_file.name tmp_path.write_bytes(uploaded_file.read()) invoice = extract_invoice_data(str(tmp_path), client, GROQ_MODEL) invoices.append(invoice) tmp_path.unlink(missing_ok=True) progress.empty() df = pd.DataFrame([inv.model_dump() for inv in invoices]) df.insert(0, "archivo", [f.name for f in uploaded_files]) st.session_state["invoices_df"] = df st.session_state["is_sample"] = False st.success(f"βœ… {len(invoices)} facturas procesadas correctamente.") # ── Dashboard ───────────────────────────────────────────────────────────────── if "invoices_df" not in st.session_state: st.stop() df: pd.DataFrame = st.session_state["invoices_df"] if st.session_state.get("is_sample"): st.info("Mostrando datos de ejemplo. Sube tus propias facturas PDF para analizarlas.", icon="πŸ“Š") # ── KPI Cards ── col1, col2, col3, col4 = st.columns(4) col1.metric("πŸ’Ά Gasto Total", f"{df['total'].sum():,.2f} €") col2.metric("🏦 IVA Total", f"{df['iva_importe'].sum():,.2f} €") col3.metric("πŸ“„ Facturas", len(df)) col4.metric("🏒 Proveedores", df["proveedor"].nunique()) st.divider() # ── Charts ── col_left, col_right = st.columns(2) with col_left: st.subheader("Gasto por CategorΓ­a") cat_df = df.groupby("categoria")["total"].sum().reset_index() fig_cat = px.pie(cat_df, values="total", names="categoria", hole=0.4, color_discrete_sequence=px.colors.qualitative.Set3) fig_cat.update_traces(textposition="inside", textinfo="percent+label") st.plotly_chart(fig_cat, use_container_width=True) with col_right: st.subheader("Gasto por Proveedor") prov_df = df.groupby("proveedor")["total"].sum().reset_index().sort_values("total", ascending=True) fig_prov = px.bar(prov_df, x="total", y="proveedor", orientation="h", color="total", color_continuous_scale="Blues", labels={"total": "Importe (€)", "proveedor": ""}) fig_prov.update_layout(coloraxis_showscale=False) st.plotly_chart(fig_prov, use_container_width=True) # Timeline chart df_dates = df.dropna(subset=["fecha"]).copy() if not df_dates.empty: st.subheader("EvoluciΓ³n de Gastos por Fecha") df_dates["fecha"] = pd.to_datetime(df_dates["fecha"], errors="coerce") df_dates = df_dates.dropna(subset=["fecha"]).sort_values("fecha") fig_time = px.bar(df_dates, x="fecha", y="total", color="categoria", labels={"total": "Importe (€)", "fecha": "Fecha"}, color_discrete_sequence=px.colors.qualitative.Set2) st.plotly_chart(fig_time, use_container_width=True) # ── Data Table ── st.subheader("πŸ“‹ Detalle de Facturas") display_cols = ["archivo", "proveedor", "numero_factura", "fecha", "concepto", "categoria", "subtotal", "iva_porcentaje", "iva_importe", "total", "moneda"] st.dataframe( df[display_cols].style.format({ "subtotal": "{:.2f}", "iva_importe": "{:.2f}", "total": "{:.2f}", "iva_porcentaje": "{:.1f}%", }, na_rep="β€”"), use_container_width=True, hide_index=True, ) # ── Export ── st.subheader("⬇️ Exportar") col_e1, col_e2 = st.columns(2) with col_e1: csv = df.to_csv(index=False).encode("utf-8") st.download_button("πŸ“₯ Descargar CSV", csv, "facturas.csv", "text/csv", use_container_width=True) with col_e2: excel_buf = io.BytesIO() with pd.ExcelWriter(excel_buf, engine="openpyxl") as writer: df.to_excel(writer, index=False, sheet_name="Facturas") cat_df.to_excel(writer, index=False, sheet_name="Por CategorΓ­a") prov_df.to_excel(writer, index=False, sheet_name="Por Proveedor") st.download_button( "πŸ“Š Descargar Excel", excel_buf.getvalue(), "facturas.xlsx", "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", use_container_width=True, )