JoseAndresLopez's picture
Upload folder using huggingface_hub
6cc3191 verified
Raw
History Blame Contribute Delete
6.8 kB
import io
import json
import os
import tempfile
from pathlib import Path
import pandas as pd
import plotly.express as px
import streamlit as st
from dotenv import load_dotenv
from extractor import Invoice, extract_invoice_data
load_dotenv()
GROQ_API_KEY = os.getenv("GROQ_API_KEY", "")
GROQ_MODEL = os.getenv("GROQ_MODEL", "llama-3.1-8b-instant")
SAMPLE_DATA = Path(__file__).parent / "sample_data.json"
st.set_page_config(page_title="Procesador de Facturas IA", page_icon="🧾", layout="wide")
st.title("🧾 Procesador de Facturas con IA")
st.caption("Sube tus facturas en PDF y obtén un dashboard completo en segundos.")
# ── Sidebar ──────────────────────────────────────────────────────────────────
with st.sidebar:
st.header("⚙️ Configuración")
model = st.selectbox("Modelo Groq", ["llama-3.1-8b-instant", "llama-3.3-70b-versatile"], index=0)
st.divider()
uploaded_files = st.file_uploader(
"📂 Sube tus facturas (PDF)",
type=["pdf"],
accept_multiple_files=True,
)
process_btn = st.button("🚀 Procesar Facturas", use_container_width=True, type="primary",
disabled=not uploaded_files)
st.divider()
if st.button("🔄 Cargar datos de ejemplo", use_container_width=True):
st.session_state.pop("invoices_df", None)
st.session_state["use_sample"] = True
st.rerun()
# ── Load sample data on first visit ──────────────────────────────────────────
if "invoices_df" not in st.session_state and not uploaded_files:
if SAMPLE_DATA.exists():
records = json.loads(SAMPLE_DATA.read_text(encoding="utf-8"))
st.session_state["invoices_df"] = pd.DataFrame(records)
st.session_state["is_sample"] = True
# ── Process uploaded files ────────────────────────────────────────────────────
if process_btn and uploaded_files:
if not GROQ_API_KEY:
st.error("No se encontró GROQ_API_KEY. Añádela al fichero .env", icon="🔑")
st.stop()
from groq import Groq
client = Groq(api_key=GROQ_API_KEY)
invoices: list[Invoice] = []
progress = st.progress(0, text="Procesando facturas...")
for i, uploaded_file in enumerate(uploaded_files):
progress.progress((i + 1) / len(uploaded_files), text=f"Procesando {uploaded_file.name}...")
tmp_path = Path(tempfile.gettempdir()) / uploaded_file.name
tmp_path.write_bytes(uploaded_file.read())
invoice = extract_invoice_data(str(tmp_path), client, GROQ_MODEL)
invoices.append(invoice)
tmp_path.unlink(missing_ok=True)
progress.empty()
df = pd.DataFrame([inv.model_dump() for inv in invoices])
df.insert(0, "archivo", [f.name for f in uploaded_files])
st.session_state["invoices_df"] = df
st.session_state["is_sample"] = False
st.success(f"✅ {len(invoices)} facturas procesadas correctamente.")
# ── Dashboard ─────────────────────────────────────────────────────────────────
if "invoices_df" not in st.session_state:
st.stop()
df: pd.DataFrame = st.session_state["invoices_df"]
if st.session_state.get("is_sample"):
st.info("Mostrando datos de ejemplo. Sube tus propias facturas PDF para analizarlas.", icon="📊")
# ── KPI Cards ──
col1, col2, col3, col4 = st.columns(4)
col1.metric("💶 Gasto Total", f"{df['total'].sum():,.2f} €")
col2.metric("🏦 IVA Total", f"{df['iva_importe'].sum():,.2f} €")
col3.metric("📄 Facturas", len(df))
col4.metric("🏢 Proveedores", df["proveedor"].nunique())
st.divider()
# ── Charts ──
col_left, col_right = st.columns(2)
with col_left:
st.subheader("Gasto por Categoría")
cat_df = df.groupby("categoria")["total"].sum().reset_index()
fig_cat = px.pie(cat_df, values="total", names="categoria", hole=0.4,
color_discrete_sequence=px.colors.qualitative.Set3)
fig_cat.update_traces(textposition="inside", textinfo="percent+label")
st.plotly_chart(fig_cat, use_container_width=True)
with col_right:
st.subheader("Gasto por Proveedor")
prov_df = df.groupby("proveedor")["total"].sum().reset_index().sort_values("total", ascending=True)
fig_prov = px.bar(prov_df, x="total", y="proveedor", orientation="h",
color="total", color_continuous_scale="Blues",
labels={"total": "Importe (€)", "proveedor": ""})
fig_prov.update_layout(coloraxis_showscale=False)
st.plotly_chart(fig_prov, use_container_width=True)
# Timeline chart
df_dates = df.dropna(subset=["fecha"]).copy()
if not df_dates.empty:
st.subheader("Evolución de Gastos por Fecha")
df_dates["fecha"] = pd.to_datetime(df_dates["fecha"], errors="coerce")
df_dates = df_dates.dropna(subset=["fecha"]).sort_values("fecha")
fig_time = px.bar(df_dates, x="fecha", y="total", color="categoria",
labels={"total": "Importe (€)", "fecha": "Fecha"},
color_discrete_sequence=px.colors.qualitative.Set2)
st.plotly_chart(fig_time, use_container_width=True)
# ── Data Table ──
st.subheader("📋 Detalle de Facturas")
display_cols = ["archivo", "proveedor", "numero_factura", "fecha", "concepto",
"categoria", "subtotal", "iva_porcentaje", "iva_importe", "total", "moneda"]
st.dataframe(
df[display_cols].style.format({
"subtotal": "{:.2f}",
"iva_importe": "{:.2f}",
"total": "{:.2f}",
"iva_porcentaje": "{:.1f}%",
}, na_rep="—"),
use_container_width=True,
hide_index=True,
)
# ── Export ──
st.subheader("⬇️ Exportar")
col_e1, col_e2 = st.columns(2)
with col_e1:
csv = df.to_csv(index=False).encode("utf-8")
st.download_button("📥 Descargar CSV", csv, "facturas.csv", "text/csv", use_container_width=True)
with col_e2:
excel_buf = io.BytesIO()
with pd.ExcelWriter(excel_buf, engine="openpyxl") as writer:
df.to_excel(writer, index=False, sheet_name="Facturas")
cat_df.to_excel(writer, index=False, sheet_name="Por Categoría")
prov_df.to_excel(writer, index=False, sheet_name="Por Proveedor")
st.download_button(
"📊 Descargar Excel",
excel_buf.getvalue(),
"facturas.xlsx",
"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
use_container_width=True,
)