Spaces:

vecervantes89
/

asistente_auditoria_ia

Sleeping

App Files Files Community

vecervantes89 commited on Oct 22, 2025

Commit

c66f085

verified ·

1 Parent(s): 7416d40

Update app.py

Browse files

Files changed (1) hide show

app.py +85 -59

app.py CHANGED Viewed

@@ -2,139 +2,163 @@ import os
 import re
 import pdfplumber
 import gradio as gr
-from openai import OpenAI
-from huggingface_hub import hf_hub_download, list_repo_files
 from dotenv import load_dotenv
 # ------------------------------------------------------------
-# CONFIGURACIÓN DEL CLIENTE OPENAI
 # ------------------------------------------------------------
 load_dotenv()
-client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
 # ------------------------------------------------------------
-# CONFIGURACIÓN DEL ASISTENTE
 # ------------------------------------------------------------
 system_prompt = """
-Eres un Asistente de Inteligencia Artificial especializado en Auditoría Interna,
-formado bajo las Normas Internacionales para la Práctica Profesional de la Auditoría Interna
-emitidas por el IIA (Institute of Internal Auditors).
-Tu función es apoyar a auditores internos en análisis, planeación, ejecución,
-evaluación y documentación de auditorías, así como en la preparación para el
-examen CIA (Certified Internal Auditor). Tus respuestas deben reflejar:
-- Objetividad, integridad y confidencialidad.
-- Los valores de Gentera: Responsabilidad, Empatía, Innovación y Transparencia.
-- Lenguaje claro, profesional y humano.
-Si la pregunta se relaciona con auditoría, control interno, riesgos o ética profesional,
-responde con rigor técnico y ejemplos prácticos. Si se pide un resumen de un PDF,
-integra el contenido del documento correspondiente.
 """
 # ------------------------------------------------------------
-# CARGA DE PDFs DESDE HUGGING FACE (DATASET)
 # ------------------------------------------------------------
 REPO_ID = "vecervantes89/auditoria_interna_pdfs"
 REPO_TYPE = "dataset"
 def extract_pdf_text(local_path: str) -> str:
-    text_parts = []
     with pdfplumber.open(local_path) as pdf:
-        for page in pdf.pages:
-            text_parts.append(page.extract_text() or "")
-    return "\n".join(text_parts)
 def load_hf_pdfs_text(repo_id: str, repo_type: str = "dataset"):
     try:
         files = [f for f in list_repo_files(repo_id=repo_id, repo_type=repo_type) if f.lower().endswith(".pdf")]
     except Exception as e:
-        print(f"[ERROR] No se pudo listar los archivos del repo '{repo_id}': {e}")
-        return {"files": [], "all_text": "", "by_name": {}}
     entries = []
     for f in files:
         try:
-            local_path = hf_hub_download(repo_id=repo_id, filename=f, repo_type=repo_type)
-            text = extract_pdf_text(local_path)
             entries.append({"name": f, "text": text})
             print(f"[OK] Cargado {f}")
         except Exception as e:
             print(f"[ERROR] Falló la carga de {f}: {e}")
-    all_text = "\n\n".join(e["text"] for e in entries)
     by_name = {e["name"]: e["text"] for e in entries}
-    print(f"[INFO] Se cargaron {len(entries)} PDFs correctamente desde {repo_id}.")
-    return {"files": entries, "all_text": all_text, "by_name": by_name}
 HF_DOCS = load_hf_pdfs_text(REPO_ID, REPO_TYPE)
 # ------------------------------------------------------------
-# LÓGICA DEL CHAT
 # ------------------------------------------------------------
 def buscar_mejor_fragmento(pregunta: str, docs: dict, max_chars: int = 3000):
-    q = pregunta.lower()
-    # 1) Coincidencia por nombre de archivo mencionado en la pregunta
     for name, text in docs.get("by_name", {}).items():
         if name.lower() in q:
             return name, (text or "")[:max_chars]
-    # 2) Coincidencia simple por frecuencia de términos
     tokens = [t for t in re.findall(r"[a-záéíóúüñ0-9]+", q) if len(t) > 2]
     best_name, best_score, best_text = "", 0, ""
     for e in docs.get("files", []):
-        text_low = (e.get("text") or "").lower()
-        score = sum(text_low.count(t) for t in tokens)
-        if score > best_score:
-            best_score, best_name, best_text = score, e.get("name", ""), e.get("text", "")
     return (best_name, (best_text or "")[:max_chars]) if best_score > 0 else ("", "")
 def responder(user_text: str, history: list | None):
-    """
-    Con Chatbot(type="messages"), history es una lista de dicts:
-    [{"role":"user","content":"..."}, {"role":"assistant","content":"..."}]
-    """
     try:
-        history = history or []  # asegurar lista
-        # Añadimos el mensaje del usuario al historial
         history.append({"role": "user", "content": user_text})
-        # Buscar contexto en PDFs
         nombre_pdf, fragmento = buscar_mejor_fragmento(user_text, HF_DOCS)
         if fragmento:
             contenido_usuario = (
                 f"El siguiente texto proviene del documento '{nombre_pdf}'. "
-                "Úsalo para responder de manera clara, breve y profesional:\n\n"
-                f"{fragmento}\n\nPregunta del usuario:\n{user_text}"
             )
         else:
             contenido_usuario = user_text
         mensajes = [{"role": "system", "content": system_prompt}] + history[:-1] + [
             {"role": "user", "content": contenido_usuario}
         ]
         resp = client.chat.completions.create(
             model="gpt-4o-mini",
             messages=mensajes,
             temperature=0.3,
         )
-        bot_text = resp.choices[0].message.content
-        history.append({"role": "assistant", "content": bot_text})
-        # Limpiar textbox (""), devolver historial en formato messages
         return "", history
     except Exception as e:
-        history.append({"role": "assistant", "content": f"⚠️ Error: {e}"})
         return "", history
 def limpiar_chat():
-    return []  # devolver lista vacía para Chatbot(type="messages")
 # ------------------------------------------------------------
-# INTERFAZ VISUAL GRADIO
 # ------------------------------------------------------------
 with gr.Blocks(theme=gr.themes.Soft(primary_hue="blue")) as demo:
     gr.HTML("""
@@ -144,9 +168,11 @@ with gr.Blocks(theme=gr.themes.Soft(primary_hue="blue")) as demo:
         <p style="font-size:15px;">Basado en GPT-4o y los valores del IIA y Gentera</p>
     </div>
     """)
-    chat = gr.Chatbot(label="Chat Asistente Auditoría", type="messages",
-                      value=[{"role": "assistant", "content": "¡Hola! Soy tu Asistente IA de Auditoría Interna. ¿En qué te ayudo hoy?"}])
     msg = gr.Textbox(placeholder="Escribe tu consulta aquí...", label="Tu mensaje")
     clear = gr.Button("🧹 Limpiar chat")

 import re
 import pdfplumber
 import gradio as gr
 from dotenv import load_dotenv
+from huggingface_hub import hf_hub_download, list_repo_files
+from openai import OpenAI
+# --- Excepciones (compatibles con distintas versiones del SDK) ---
+try:
+    from openai import (
+        APIConnectionError as _APIConnectionError,
+        APIStatusError as _APIStatusError,
+        RateLimitError as _RateLimitError,
+        AuthenticationError as _AuthenticationError,
+        APITimeoutError as _APITimeoutError,
+    )
+except Exception:
+    _APIConnectionError = _APIStatusError = _RateLimitError = _AuthenticationError = _APITimeoutError = Exception
 # ------------------------------------------------------------
+# CONFIG: OpenAI
 # ------------------------------------------------------------
 load_dotenv()
+client = OpenAI(
+    api_key=os.getenv("OPENAI_API_KEY"),
+    timeout=30,       # evita cuelgues largos
+    max_retries=1,    # sin reintentos largos
+)
 # ------------------------------------------------------------
+# SYSTEM PROMPT
 # ------------------------------------------------------------
 system_prompt = """
+Eres un Asistente de IA especializado en Auditoría Interna,
+conforme a las Normas del IIA. Apoyas en análisis, planeación,
+ejecución y documentación de auditorías y en la preparación para el CIA.
+Responde con rigor técnico, ejemplos claros y lenguaje profesional.
+Si la consulta menciona un PDF, integra fragmentos pertinentes del documento.
 """
 # ------------------------------------------------------------
+# CARGA DE PDFs (dataset en Hugging Face)
 # ------------------------------------------------------------
 REPO_ID = "vecervantes89/auditoria_interna_pdfs"
 REPO_TYPE = "dataset"
 def extract_pdf_text(local_path: str) -> str:
+    parts = []
     with pdfplumber.open(local_path) as pdf:
+        for p in pdf.pages:
+            parts.append(p.extract_text() or "")
+    return "\n".join(parts)
 def load_hf_pdfs_text(repo_id: str, repo_type: str = "dataset"):
     try:
         files = [f for f in list_repo_files(repo_id=repo_id, repo_type=repo_type) if f.lower().endswith(".pdf")]
     except Exception as e:
+        print(f"[ERROR] No se pudo listar '{repo_id}': {e}")
+        return {"files": [], "by_name": {}, "all_text": ""}
     entries = []
     for f in files:
         try:
+            path = hf_hub_download(repo_id=repo_id, filename=f, repo_type=repo_type)
+            text = extract_pdf_text(path)
             entries.append({"name": f, "text": text})
             print(f"[OK] Cargado {f}")
         except Exception as e:
             print(f"[ERROR] Falló la carga de {f}: {e}")
     by_name = {e["name"]: e["text"] for e in entries}
+    all_text = "\n\n".join(e["text"] for e in entries)
+    print(f"[INFO] Se cargaron {len(entries)} PDFs desde {repo_id}.")
+    return {"files": entries, "by_name": by_name, "all_text": all_text}
 HF_DOCS = load_hf_pdfs_text(REPO_ID, REPO_TYPE)
 # ------------------------------------------------------------
+# BÚSQUEDA SIMPLE DE CONTEXTO
 # ------------------------------------------------------------
 def buscar_mejor_fragmento(pregunta: str, docs: dict, max_chars: int = 3000):
+    q = (pregunta or "").lower()
+    # 1) Si menciona explícitamente un nombre de archivo
     for name, text in docs.get("by_name", {}).items():
         if name.lower() in q:
             return name, (text or "")[:max_chars]
+    # 2) Coincidencia por términos
     tokens = [t for t in re.findall(r"[a-záéíóúüñ0-9]+", q) if len(t) > 2]
     best_name, best_score, best_text = "", 0, ""
     for e in docs.get("files", []):
+        t = (e.get("text") or "").lower()
+        s = sum(t.count(tok) for tok in tokens)
+        if s > best_score:
+            best_score, best_name, best_text = s, e.get("name", ""), e.get("text", "")
     return (best_name, (best_text or "")[:max_chars]) if best_score > 0 else ("", "")
+# ------------------------------------------------------------
+# HANDLER DEL CHAT (type="messages")
+# ------------------------------------------------------------
 def responder(user_text: str, history: list | None):
     try:
+        history = history or []
         history.append({"role": "user", "content": user_text})
+        # Contexto desde PDFs
         nombre_pdf, fragmento = buscar_mejor_fragmento(user_text, HF_DOCS)
         if fragmento:
             contenido_usuario = (
                 f"El siguiente texto proviene del documento '{nombre_pdf}'. "
+                "Úsalo como contexto y responde de forma clara, breve y profesional:\n\n"
+                f"{fragmento}\n\n"
+                f"Pregunta del usuario:\n{user_text}"
             )
         else:
             contenido_usuario = user_text
+        # Construimos los mensajes: system + historial (sin el último) + user contextualizado
         mensajes = [{"role": "system", "content": system_prompt}] + history[:-1] + [
             {"role": "user", "content": contenido_usuario}
         ]
+        # Modelo ligero para Spaces gratis
         resp = client.chat.completions.create(
             model="gpt-4o-mini",
             messages=mensajes,
             temperature=0.3,
         )
+        bot = resp.choices[0].message.content
+        history.append({"role": "assistant", "content": bot})
         return "", history
+    except _AuthenticationError:
+        history.append({"role": "assistant", "content":
+            "⚠️ Error de autenticación con OpenAI.\nRevisa **OPENAI_API_KEY** en Settings → Variables."})
+        return "", history
+    except _APIConnectionError:
+        history.append({"role": "assistant", "content":
+            "⚠️ Error de conexión saliente.\nActiva **Allow internet access** en Settings → Runtime/Networking."})
+        return "", history
+    except _RateLimitError:
+        history.append({"role": "assistant", "content":
+            "⚠️ Límite/ cuota de OpenAI alcanzado. Intenta más tarde o cambia de modelo."})
+        return "", history
+    except _APITimeoutError:
+        history.append({"role": "assistant", "content":
+            "⚠️ La solicitud a OpenAI excedió el tiempo de espera. Intenta de nuevo."})
+        return "", history
+    except _APIStatusError as e:
+        history.append({"role": "assistant", "content": f"⚠️ Error de API: {e}"} )
+        return "", history
     except Exception as e:
+        history.append({"role": "assistant", "content": f"⚠️ Error inesperado: {e}"} )
         return "", history
 def limpiar_chat():
+    return []  # Chatbot(type="messages") espera una lista de dicts
 # ------------------------------------------------------------
+# UI GRADIO
 # ------------------------------------------------------------
 with gr.Blocks(theme=gr.themes.Soft(primary_hue="blue")) as demo:
     gr.HTML("""
         <p style="font-size:15px;">Basado en GPT-4o y los valores del IIA y Gentera</p>
     </div>
     """)
+    chat = gr.Chatbot(
+        label="Chat Asistente Auditoría",
+        type="messages",
+        value=[{"role": "assistant", "content": "¡Hola! Soy tu Asistente IA de Auditoría Interna. ¿En qué te ayudo hoy?"}]
+    )
     msg = gr.Textbox(placeholder="Escribe tu consulta aquí...", label="Tu mensaje")
     clear = gr.Button("🧹 Limpiar chat")