Spaces:
Sleeping
Sleeping
| import io | |
| import json | |
| import os | |
| import tempfile | |
| from pathlib import Path | |
| import pandas as pd | |
| import plotly.express as px | |
| import streamlit as st | |
| from dotenv import load_dotenv | |
| from extractor import Invoice, extract_invoice_data | |
| load_dotenv() | |
| GROQ_API_KEY = os.getenv("GROQ_API_KEY", "") | |
| GROQ_MODEL = os.getenv("GROQ_MODEL", "llama-3.1-8b-instant") | |
| SAMPLE_DATA = Path(__file__).parent / "sample_data.json" | |
| st.set_page_config(page_title="Procesador de Facturas IA", page_icon="🧾", layout="wide") | |
| st.title("🧾 Procesador de Facturas con IA") | |
| st.caption("Sube tus facturas en PDF y obtén un dashboard completo en segundos.") | |
| # ── Sidebar ────────────────────────────────────────────────────────────────── | |
| with st.sidebar: | |
| st.header("⚙️ Configuración") | |
| model = st.selectbox("Modelo Groq", ["llama-3.1-8b-instant", "llama-3.3-70b-versatile"], index=0) | |
| st.divider() | |
| uploaded_files = st.file_uploader( | |
| "📂 Sube tus facturas (PDF)", | |
| type=["pdf"], | |
| accept_multiple_files=True, | |
| ) | |
| process_btn = st.button("🚀 Procesar Facturas", use_container_width=True, type="primary", | |
| disabled=not uploaded_files) | |
| st.divider() | |
| if st.button("🔄 Cargar datos de ejemplo", use_container_width=True): | |
| st.session_state.pop("invoices_df", None) | |
| st.session_state["use_sample"] = True | |
| st.rerun() | |
| # ── Load sample data on first visit ────────────────────────────────────────── | |
| if "invoices_df" not in st.session_state and not uploaded_files: | |
| if SAMPLE_DATA.exists(): | |
| records = json.loads(SAMPLE_DATA.read_text(encoding="utf-8")) | |
| st.session_state["invoices_df"] = pd.DataFrame(records) | |
| st.session_state["is_sample"] = True | |
| # ── Process uploaded files ──────────────────────────────────────────────────── | |
| if process_btn and uploaded_files: | |
| if not GROQ_API_KEY: | |
| st.error("No se encontró GROQ_API_KEY. Añádela al fichero .env", icon="🔑") | |
| st.stop() | |
| from groq import Groq | |
| client = Groq(api_key=GROQ_API_KEY) | |
| invoices: list[Invoice] = [] | |
| progress = st.progress(0, text="Procesando facturas...") | |
| for i, uploaded_file in enumerate(uploaded_files): | |
| progress.progress((i + 1) / len(uploaded_files), text=f"Procesando {uploaded_file.name}...") | |
| tmp_path = Path(tempfile.gettempdir()) / uploaded_file.name | |
| tmp_path.write_bytes(uploaded_file.read()) | |
| invoice = extract_invoice_data(str(tmp_path), client, GROQ_MODEL) | |
| invoices.append(invoice) | |
| tmp_path.unlink(missing_ok=True) | |
| progress.empty() | |
| df = pd.DataFrame([inv.model_dump() for inv in invoices]) | |
| df.insert(0, "archivo", [f.name for f in uploaded_files]) | |
| st.session_state["invoices_df"] = df | |
| st.session_state["is_sample"] = False | |
| st.success(f"✅ {len(invoices)} facturas procesadas correctamente.") | |
| # ── Dashboard ───────────────────────────────────────────────────────────────── | |
| if "invoices_df" not in st.session_state: | |
| st.stop() | |
| df: pd.DataFrame = st.session_state["invoices_df"] | |
| if st.session_state.get("is_sample"): | |
| st.info("Mostrando datos de ejemplo. Sube tus propias facturas PDF para analizarlas.", icon="📊") | |
| # ── KPI Cards ── | |
| col1, col2, col3, col4 = st.columns(4) | |
| col1.metric("💶 Gasto Total", f"{df['total'].sum():,.2f} €") | |
| col2.metric("🏦 IVA Total", f"{df['iva_importe'].sum():,.2f} €") | |
| col3.metric("📄 Facturas", len(df)) | |
| col4.metric("🏢 Proveedores", df["proveedor"].nunique()) | |
| st.divider() | |
| # ── Charts ── | |
| col_left, col_right = st.columns(2) | |
| with col_left: | |
| st.subheader("Gasto por Categoría") | |
| cat_df = df.groupby("categoria")["total"].sum().reset_index() | |
| fig_cat = px.pie(cat_df, values="total", names="categoria", hole=0.4, | |
| color_discrete_sequence=px.colors.qualitative.Set3) | |
| fig_cat.update_traces(textposition="inside", textinfo="percent+label") | |
| st.plotly_chart(fig_cat, use_container_width=True) | |
| with col_right: | |
| st.subheader("Gasto por Proveedor") | |
| prov_df = df.groupby("proveedor")["total"].sum().reset_index().sort_values("total", ascending=True) | |
| fig_prov = px.bar(prov_df, x="total", y="proveedor", orientation="h", | |
| color="total", color_continuous_scale="Blues", | |
| labels={"total": "Importe (€)", "proveedor": ""}) | |
| fig_prov.update_layout(coloraxis_showscale=False) | |
| st.plotly_chart(fig_prov, use_container_width=True) | |
| # Timeline chart | |
| df_dates = df.dropna(subset=["fecha"]).copy() | |
| if not df_dates.empty: | |
| st.subheader("Evolución de Gastos por Fecha") | |
| df_dates["fecha"] = pd.to_datetime(df_dates["fecha"], errors="coerce") | |
| df_dates = df_dates.dropna(subset=["fecha"]).sort_values("fecha") | |
| fig_time = px.bar(df_dates, x="fecha", y="total", color="categoria", | |
| labels={"total": "Importe (€)", "fecha": "Fecha"}, | |
| color_discrete_sequence=px.colors.qualitative.Set2) | |
| st.plotly_chart(fig_time, use_container_width=True) | |
| # ── Data Table ── | |
| st.subheader("📋 Detalle de Facturas") | |
| display_cols = ["archivo", "proveedor", "numero_factura", "fecha", "concepto", | |
| "categoria", "subtotal", "iva_porcentaje", "iva_importe", "total", "moneda"] | |
| st.dataframe( | |
| df[display_cols].style.format({ | |
| "subtotal": "{:.2f}", | |
| "iva_importe": "{:.2f}", | |
| "total": "{:.2f}", | |
| "iva_porcentaje": "{:.1f}%", | |
| }, na_rep="—"), | |
| use_container_width=True, | |
| hide_index=True, | |
| ) | |
| # ── Export ── | |
| st.subheader("⬇️ Exportar") | |
| col_e1, col_e2 = st.columns(2) | |
| with col_e1: | |
| csv = df.to_csv(index=False).encode("utf-8") | |
| st.download_button("📥 Descargar CSV", csv, "facturas.csv", "text/csv", use_container_width=True) | |
| with col_e2: | |
| excel_buf = io.BytesIO() | |
| with pd.ExcelWriter(excel_buf, engine="openpyxl") as writer: | |
| df.to_excel(writer, index=False, sheet_name="Facturas") | |
| cat_df.to_excel(writer, index=False, sheet_name="Por Categoría") | |
| prov_df.to_excel(writer, index=False, sheet_name="Por Proveedor") | |
| st.download_button( | |
| "📊 Descargar Excel", | |
| excel_buf.getvalue(), | |
| "facturas.xlsx", | |
| "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", | |
| use_container_width=True, | |
| ) | |