| | import os |
| | import re |
| | import pdfplumber |
| | import gradio as gr |
| | from dotenv import load_dotenv |
| | from huggingface_hub import hf_hub_download, list_repo_files |
| | from openai import OpenAI |
| |
|
| | |
| | try: |
| | from openai import ( |
| | APIConnectionError as _APIConnectionError, |
| | APIStatusError as _APIStatusError, |
| | RateLimitError as _RateLimitError, |
| | AuthenticationError as _AuthenticationError, |
| | APITimeoutError as _APITimeoutError, |
| | ) |
| | except Exception: |
| | _APIConnectionError = _APIStatusError = _RateLimitError = _AuthenticationError = _APITimeoutError = Exception |
| |
|
| | |
| | |
| | |
| | load_dotenv() |
| | client = OpenAI( |
| | api_key=os.getenv("OPENAI_API_KEY"), |
| | timeout=30, |
| | max_retries=1, |
| | ) |
| |
|
| | |
| | |
| | |
| | system_prompt = """ |
| | Eres un Asistente de IA especializado en Auditoría Interna, |
| | conforme a las Normas del IIA. Apoyas en análisis, planeación, |
| | ejecución y documentación de auditorías y en la preparación para el CIA. |
| | Responde con rigor técnico, ejemplos claros y lenguaje profesional. |
| | Si la consulta menciona un PDF, integra fragmentos pertinentes del documento. |
| | """ |
| |
|
| | |
| | |
| | |
| | REPO_ID = "vecervantes89/auditoria_interna_pdfs" |
| | REPO_TYPE = "dataset" |
| |
|
| | def extract_pdf_text(local_path: str) -> str: |
| | parts = [] |
| | with pdfplumber.open(local_path) as pdf: |
| | for p in pdf.pages: |
| | parts.append(p.extract_text() or "") |
| | return "\n".join(parts) |
| |
|
| | def load_hf_pdfs_text(repo_id: str, repo_type: str = "dataset"): |
| | try: |
| | files = [f for f in list_repo_files(repo_id=repo_id, repo_type=repo_type) if f.lower().endswith(".pdf")] |
| | except Exception as e: |
| | print(f"[ERROR] No se pudo listar '{repo_id}': {e}") |
| | return {"files": [], "by_name": {}, "all_text": ""} |
| |
|
| | entries = [] |
| | for f in files: |
| | try: |
| | path = hf_hub_download(repo_id=repo_id, filename=f, repo_type=repo_type) |
| | text = extract_pdf_text(path) |
| | entries.append({"name": f, "text": text}) |
| | print(f"[OK] Cargado {f}") |
| | except Exception as e: |
| | print(f"[ERROR] Falló la carga de {f}: {e}") |
| |
|
| | by_name = {e["name"]: e["text"] for e in entries} |
| | all_text = "\n\n".join(e["text"] for e in entries) |
| | print(f"[INFO] Se cargaron {len(entries)} PDFs desde {repo_id}.") |
| | return {"files": entries, "by_name": by_name, "all_text": all_text} |
| |
|
| | HF_DOCS = load_hf_pdfs_text(REPO_ID, REPO_TYPE) |
| |
|
| | |
| | |
| | |
| | def buscar_mejor_fragmento(pregunta: str, docs: dict, max_chars: int = 3000): |
| | q = (pregunta or "").lower() |
| | |
| | for name, text in docs.get("by_name", {}).items(): |
| | if name.lower() in q: |
| | return name, (text or "")[:max_chars] |
| |
|
| | |
| | tokens = [t for t in re.findall(r"[a-záéíóúüñ0-9]+", q) if len(t) > 2] |
| | best_name, best_score, best_text = "", 0, "" |
| | for e in docs.get("files", []): |
| | t = (e.get("text") or "").lower() |
| | s = sum(t.count(tok) for tok in tokens) |
| | if s > best_score: |
| | best_score, best_name, best_text = s, e.get("name", ""), e.get("text", "") |
| | return (best_name, (best_text or "")[:max_chars]) if best_score > 0 else ("", "") |
| |
|
| | |
| | |
| | |
| | def responder(user_text: str, history: list | None): |
| | try: |
| | history = history or [] |
| | history.append({"role": "user", "content": user_text}) |
| |
|
| | |
| | nombre_pdf, fragmento = buscar_mejor_fragmento(user_text, HF_DOCS) |
| | if fragmento: |
| | contenido_usuario = ( |
| | f"El siguiente texto proviene del documento '{nombre_pdf}'. " |
| | "Úsalo como contexto y responde de forma clara, breve y profesional:\n\n" |
| | f"{fragmento}\n\n" |
| | f"Pregunta del usuario:\n{user_text}" |
| | ) |
| | else: |
| | contenido_usuario = user_text |
| |
|
| | |
| | mensajes = [{"role": "system", "content": system_prompt}] + history[:-1] + [ |
| | {"role": "user", "content": contenido_usuario} |
| | ] |
| |
|
| | |
| | resp = client.chat.completions.create( |
| | model="gpt-4o-mini", |
| | messages=mensajes, |
| | temperature=0.3, |
| | ) |
| | bot = resp.choices[0].message.content |
| | history.append({"role": "assistant", "content": bot}) |
| | return "", history |
| |
|
| | except _AuthenticationError: |
| | history.append({"role": "assistant", "content": |
| | "⚠️ Error de autenticación con OpenAI.\nRevisa **OPENAI_API_KEY** en Settings → Variables."}) |
| | return "", history |
| | except _APIConnectionError: |
| | history.append({"role": "assistant", "content": |
| | "⚠️ Error de conexión saliente.\nActiva **Allow internet access** en Settings → Runtime/Networking."}) |
| | return "", history |
| | except _RateLimitError: |
| | history.append({"role": "assistant", "content": |
| | "⚠️ Límite/ cuota de OpenAI alcanzado. Intenta más tarde o cambia de modelo."}) |
| | return "", history |
| | except _APITimeoutError: |
| | history.append({"role": "assistant", "content": |
| | "⚠️ La solicitud a OpenAI excedió el tiempo de espera. Intenta de nuevo."}) |
| | return "", history |
| | except _APIStatusError as e: |
| | history.append({"role": "assistant", "content": f"⚠️ Error de API: {e}"} ) |
| | return "", history |
| | except Exception as e: |
| | history.append({"role": "assistant", "content": f"⚠️ Error inesperado: {e}"} ) |
| | return "", history |
| |
|
| | def limpiar_chat(): |
| | return [] |
| |
|
| | |
| | |
| | |
| | with gr.Blocks(theme=gr.themes.Soft(primary_hue="blue")) as demo: |
| | gr.HTML(""" |
| | <div style="text-align:center; margin-bottom:20px;"> |
| | <img src="https://huggingface.co/front/assets/huggingface_logo-noborder.svg" width="70" alt="Logo"> |
| | <h1 style="color:#003366;">Asistente IA de Auditoría Interna</h1> |
| | <p style="font-size:15px;">Basado en GPT-4o y los valores del IIA y Gentera</p> |
| | </div> |
| | """) |
| | chat = gr.Chatbot( |
| | label="Chat Asistente Auditoría", |
| | type="messages", |
| | value=[{"role": "assistant", "content": "¡Hola! Soy tu Asistente IA de Auditoría Interna. ¿En qué te ayudo hoy?"}] |
| | ) |
| | msg = gr.Textbox(placeholder="Escribe tu consulta aquí...", label="Tu mensaje") |
| | clear = gr.Button("🧹 Limpiar chat") |
| |
|
| | msg.submit(responder, inputs=[msg, chat], outputs=[msg, chat]) |
| | clear.click(fn=limpiar_chat, inputs=None, outputs=chat) |
| |
|
| | gr.HTML("<p style='text-align:center; color:gray; font-size:12px;'>© 2025 Gentera AI · Desarrollado por Verónica Cervantes</p>") |
| |
|
| | |
| | demo.launch(server_name="0.0.0.0", ssr_mode=False) |