Spaces:

lukasnet
/

conversor-ideas

Running

File size: 6,978 Bytes

import gradio as gr
import subprocess
import os
import sys
from pathlib import Path

# Paths (Relative to Space root)
# In HF Spaces, we'll upload the Paper-KG-Pipeline folder
PIPELINE_SCRIPT = Path("Paper-KG-Pipeline/scripts/idea2story_pipeline.py")
OUTPUT_RESULT = Path("Paper-KG-Pipeline/output/pipeline_result.json")

def run_pipeline(idea, progress=gr.Progress()):
    """

    Runs the Idea2Story pipeline script as a subprocess and streams output.

    """
    logs = []
    try:
        if not idea.strip():
            logs.append("⚠️ Por favor ingresa una idea.")
            yield "\n".join(logs), None
            return

        # Locate Python executable
        # In HF Spaces, we use the system python
        python_exec = Path(sys.executable)
        
        script_path = PIPELINE_SCRIPT.absolute()
        if not script_path.exists():
             logs.append(f"❌ No se encontró el script en: {script_path}")
             yield "\n".join(logs), None
             return

        logs.append(f"🚀 Iniciando pipeline...")
        logs.append(f"📂 Carpeta actual: {os.getcwd()}")
        logs.append(f"📜 Script: {script_path}")
        yield "\n".join(logs), None

        command = [
            str(python_exec),
            str(script_path),
            idea
        ]
        
        # Ensure UTF-8 environment limits encoding errors
        env = os.environ.copy()
        env["PYTHONIOENCODING"] = "utf-8"
        # HF specific: add to pythonpath
        env["PYTHONPATH"] = os.path.join(os.getcwd(), "Paper-KG-Pipeline", "src")

        # --- CONFIGURACIÓN AUTOMÁTICA PARA DEPLOY ---
        # 1. Inyectar configuración de Gemini
        env["LLM_PROVIDER"] = "openai_compatible_chat"
        env["LLM_BASE_URL"] = "https://generativelanguage.googleapis.com/v1beta/openai/"
        env["LLM_MODEL"] = "gemini-2.0-flash"
        env["EMBEDDING_API_URL"] = "https://generativelanguage.googleapis.com/v1beta/openai/embeddings"
        env["EMBEDDING_MODEL"] = "gemini-embedding-001"
        
        # 2. Configurar reintentos para el Preflight (evitar fallo rápido)
        env["I2P_PREFLIGHT_LLM_RETRIES"] = "10"
        env["I2P_PREFLIGHT_EMB_RETRIES"] = "10"
        
        # 3. Mapear clave API
        if "GEMINI_API_KEY" in env:
            env["LLM_API_KEY"] = env["GEMINI_API_KEY"]
            env["EMBEDDING_API_KEY"] = env["GEMINI_API_KEY"]
            logs.append("✅ GEMINI_API_KEY encontrada e inyectada.")
        elif "LLM_API_KEY" not in env:
            logs.append("⚠️ ADVERTENCIA: No se encontró GEMINI_API_KEY.")
            
        logs.append(f"🔍 DEBUG: LLM_MODEL={env.get('LLM_MODEL')}")

        # 4. (HOTFIX) Parchear common.py en el servidor para aumentar reintentos globales
        # Esto evita tener que subir carpetas enteras de nuevo.
        try:
            common_py = Path("Paper-KG-Pipeline/src/idea2paper/infra/llm_providers/common.py")
            if common_py.exists():
                with open(common_py, "r", encoding="utf-8") as f:
                    content = f.read()
                
                # Si tiene pocos reintentos, lo subimos a 15 y backoff a 4
                if "total=8" not in content and "total=15" not in content:
                    logs.append("🔧 Parcheando common.py para mejorar resistencia a Rate Limits...")
                    # Reemplazamos configuraciones antiguas o por defecto
                    import re
                    content = re.sub(r"total=\d+", "total=15", content)
                    content = re.sub(r"backoff_factor=\d+", "backoff_factor=4", content)
                    content = re.sub(r"status_forcelist=\[.*?\]", "status_forcelist=[429, 500, 502, 503, 504]", content)
                    
                    with open(common_py, "w", encoding="utf-8") as f:
                        f.write(content)
                    logs.append("✅ common.py parcheado con éxito.")
        except Exception as e:
            logs.append(f"⚠️ No se pudo parchear common.py: {e}")

        # --------------------------------------------

        process = subprocess.Popen(
            command,
            stdout=subprocess.PIPE,
            stderr=subprocess.STDOUT,
            text=True,
            encoding='utf-8',
            errors='replace',
            env=env,
            cwd=os.getcwd()
        )
        
        # Stream output
        for line in iter(process.stdout.readline, ''):
            logs.append(line.rstrip())
            if len(logs) % 1 == 0: 
               yield "\n".join(logs), None
        
        process.wait()
        
        if process.returncode == 0:
            logs.append("\n✅ Pipeline completado con éxito!")
            
            # Load result
            if OUTPUT_RESULT.exists():
                try:
                    import json
                    with open(OUTPUT_RESULT, "r", encoding="utf-8") as f:
                        result_data = json.load(f)
                    yield "\n".join(logs), result_data
                except Exception as e:
                    logs.append(f"\n⚠️ Error leyendo resultado: {e}")
                    yield "\n".join(logs), None
            else:
                logs.append("\n⚠️ Archivo de resultado no encontrado.")
                yield "\n".join(logs), None
        else:
            logs.append(f"\n❌ Pipeline falló con código {process.returncode}")
            yield "\n".join(logs), None

    except Exception as e:
        import traceback
        logs.append(f"\n❌ Error GUI: {str(e)}")
        logs.append(traceback.format_exc())
        yield "\n".join(logs), None

# GUI Layout
with gr.Blocks(title="Conversor de ideas en papers") as demo:
    gr.Markdown("# 🚀 Conversor de ideas en papers")
    gr.Markdown("Transforme su idea de investigación en una historia/documento estructurado utilizando gráficos de conocimiento y LLM.")
    
    with gr.Row():
        with gr.Column(scale=1):
            idea_input = gr.Textbox(
                label="Tu idea a investigar",
                placeholder="ej: Razonamiento automatizado en grandes modelos de lenguaje...",
                lines=3
            )
            run_btn = gr.Button("Generar Historia", variant="primary")
        
    with gr.Row():
        with gr.Column(scale=1):
            logs_output = gr.Textbox(
                label="Registros de Ejecución",
                interactive=False,
                lines=20,
                autoscroll=True
            )
        with gr.Column(scale=1):
            result_output = gr.JSON(
                label="Resultado Generado",
            )

    run_btn.click(
        fn=run_pipeline,
        inputs=[idea_input],
        outputs=[logs_output, result_output]
    )

if __name__ == "__main__":
    demo.launch()