Spaces:

caarleexx
/

ToM

Sleeping

App Files Files Community

caarleexx commited on Dec 6, 2025

Commit

22d6f06

verified ·

1 Parent(s): 50a59c3

Delete app.py

Browse files

Files changed (1) hide show

app.py +0 -410

app.py DELETED Viewed

@@ -1,410 +0,0 @@
-# ╔════════════════════════════════════════════════════════════════════════════╗
-# ║  PIPELINE V44: FRAG + VISÃO PAGINADA + PARALELISMO + CACHE + AUDITORIA     ║
-# ╚════════════════════════════════════════════════════════════════════════════╝
-import os
-import json
-import time
-import hashlib
-from datetime import datetime
-from concurrent.futures import ThreadPoolExecutor, as_completed
-import gradio as gr
-import google.generativeai as genai
-import pypdf  # pip install pypdf
-# ==================== 1. CONFIGURAÇÃO ====================
-api_key = os.getenv("GOOGLE_API_KEY", "SUA_API_KEY_AQUI")
-if api_key and api_key != "SUA_API_KEY_AQUI":
-    genai.configure(api_key=api_key)
-model_flash = genai.GenerativeModel("gemini-flash-latest")
-model_pro   = genai.GenerativeModel("gemini-pro-latest")
-ARQUIVO_CONFIG = "protocolo_fragmentacao_visao-3.json"
-PASTA_CACHE = "cache_processamento"
-MAX_WORKERS = 5  # Paralelismo
-os.makedirs(PASTA_CACHE, exist_ok=True)
-# ==================== 2. UTILIDADES ====================
-def log_point(msg, logs):
-    ts = datetime.now().strftime("%H:%M:%S")
-    return logs + f"[{ts}] {msg}\n"
-def carregar_protocolo():
-    try:
-        with open(ARQUIVO_CONFIG, "r", encoding="utf-8") as f:
-            return f.read()
-    except:
-        proto = [
-            {
-                "nome": "PAGINADOR_VISUAL",
-                "missao": (
-                    "Você recebe o texto bruto de um conjunto de páginas de um PDF. "
-                    "Separe por página e devolva uma lista JSON com objetos "
-                    "{'pagina','transcricao_fiel','descricao_visual'}."
-                    "Retorne APENAS essa lista JSON, sem texto extra."
-                ),
-                "tipo_saida": "json",
-                "modelo": "flash",
-            }
-        ]
-        return json.dumps(proto, ensure_ascii=False, indent=2)
-def salvar_protocolo(conteudo):
-    try:
-        json.loads(conteudo)
-        with open(ARQUIVO_CONFIG, "w", encoding="utf-8") as f:
-            f.write(conteudo)
-        return "✅ Salvo"
-    except:
-        return "❌ Erro JSON"
-def gerar_hash_arquivo(nome_arquivo):
-    return hashlib.md5(nome_arquivo.encode()).hexdigest()
-def salvar_cache(hash_id, dados):
-    caminho = os.path.join(PASTA_CACHE, f"{hash_id}.json")
-    with open(caminho, "w", encoding="utf-8") as f:
-        json.dump(dados, f, ensure_ascii=False, indent=2)
-def carregar_cache(hash_id):
-    caminho = os.path.join(PASTA_CACHE, f"{hash_id}.json")
-    if os.path.exists(caminho):
-        with open(caminho, "r", encoding="utf-8") as f:
-            return json.load(f)
-    return None
-# --------- DIVISÃO PDF ---------
-def ler_anexo_e_fragmentar(arquivo, paginas_por_fragmento=5, logs=""):
-    logs = log_point("ler_anexo_e_fragmentar() chamado", logs)
-    if arquivo is None:
-        return [], "", logs
-    filename = getattr(arquivo, "name", arquivo)
-    if not os.path.exists(filename):
-        return [], f"[ERRO: Arquivo não encontrado]", logs
-    anexo_info = f"[PDF: {os.path.basename(filename)}]"
-    if not filename.lower().endswith(".pdf"):
-        logs = log_point("Arquivo texto simples detectado", logs)
-        try:
-            with open(filename, "r", encoding="utf-8") as f:
-                texto = f.read()
-            # Retorna como um único fragmento de texto
-            return [texto], f"[TXT: {os.path.basename(filename)}]", logs
-        except:
-            return [], "[ERRO LEITURA TXT]", logs
-    try:
-        reader = pypdf.PdfReader(filename)
-        total_pages = len(reader.pages)
-        logs = log_point(f"PDF carregado: {total_pages} páginas", logs)
-        fragments = []
-        for i in range(0, total_pages, paginas_por_fragmento):
-            start = i + 1
-            end = min(i + paginas_por_fragmento, total_pages)
-            bloco_texto = ""
-            for p in range(i, end):
-                try:
-                    t = reader.pages[p].extract_text() or ""
-                except Exception as e:
-                    t = f"\n[ERRO_EXTRACT_PAG_{p+1}: {e}]\n"
-                bloco_texto += f"\n=== PAGINA {p+1}/{total_pages} ===\n{t}\n"
-            fragment = (
-                f"=== FRAG {i//paginas_por_fragmento + 1} "
-                f"(PÁGS {start}-{end}/{total_pages}) ===\n"
-                f"{bloco_texto.strip()}"
-            )
-            fragments.append(fragment)
-        logs = log_point(f"Total de fragmentos criados: {len(fragments)}", logs)
-        return fragments, anexo_info, logs
-    except Exception as e:
-        logs = log_point(f"ERRO PDF: {e}", logs)
-        return [], f"[ERRO PDF: {str(e)}]", logs
-# ==================== 3. ENGINE DE EXECUÇÃO ====================
-def _extrair_json_possivel(out_raw: str) -> str:
-    cleaned = out_raw.strip()
-    idx_abre_col = cleaned.find("[")
-    idx_abre_obj = cleaned.find("{")
-    candidatos = [i for i in [idx_abre_col, idx_abre_obj] if i != -1]
-    if candidatos:
-        start = min(candidatos)
-        cleaned = cleaned[start:]
-    cleaned = cleaned.replace("```json", "").replace("```", "")
-    return cleaned
-def executar_no(timeline, config, fragmento_input=None):
-    """
-    Função Worker que será chamada tanto sequencialmente quanto em paralelo.
-    """
-    modelo = model_pro if config.get("modelo") == "pro" else model_flash
-    if fragmento_input is not None:
-        input_para_prompt = fragmento_input
-    else:
-        input_para_prompt = json.dumps(timeline, ensure_ascii=False, indent=2)
-    prompt = (
-        "--- INPUT PARA O AGENTE ---\n"
-        f"{input_para_prompt}\n"
-        "----------------\n"
-        f"AGENTE: {config['nome']}\n"
-        f"MISSÃO: {config['missao']}"
-    )
-    try:
-        # Retry simples para API
-        for tentativa in range(3):
-            try:
-                resp = modelo.generate_content(prompt)
-                out = resp.text or ""
-                break
-            except Exception as e:
-                if "429" in str(e):
-                    time.sleep(2 * (tentativa + 1))
-                    continue
-                raise e
-        content = out
-        if config["tipo_saida"] == "json":
-            cleaned = _extrair_json_possivel(out)
-            try:
-                content = json.loads(cleaned)
-            except:
-                content = [] # Fallback em caso de erro de parse
-        return {"role": "assistant", "agent": config["nome"], "content": content}, None
-    except Exception as e:
-        return {"role": "system", "error": str(e)}, str(e)
-# ==================== 4. ORQUESTRADOR ====================
-def orquestrador(texto, arquivo, history, json_config, confext_state):
-    logs = f"🚀 START: {datetime.now().strftime('%H:%M:%S')}\n"
-    logs = log_point("Orquestrador V44 iniciado", logs)
-    # 1. Preparação
-    if history is None: history = []
-    nome_arquivo = os.path.basename(getattr(arquivo, "name", "sem_arquivo")) if arquivo else "sem_arquivo"
-    hash_op = gerar_hash_arquivo(nome_arquivo + json_config) # Hash baseado no arquivo + protocolo
-    # 2. Verifica Cache
-    cache_existente = carregar_cache(hash_op) if arquivo else None
-    if cache_existente:
-        logs = log_point(f"♻️ Cache encontrado para {nome_arquivo}", logs)
-        confext_upload = cache_existente["confext_upload"]
-        timeline = cache_existente.get("timeline", [])
-        history.append([texto, "✅ Arquivo carregado do cache! Análise pronta."])
-        yield history, timeline, logs, confext_upload
-        # Se houver texto novo do usuário, seguimos para análise final, senão paramos
-        if not texto:
-            return
-    else:
-        # 3. Processamento Normal
-        fragmentos, anexo_info, logs = ler_anexo_e_fragmentar(
-            arquivo, paginas_por_fragmento=5, logs=logs
-        )
-        history.append([texto + (" 📎" if arquivo else ""), None])
-        yield history, {}, logs, confext_state
-        try:
-            protocolo = json.loads(json_config)
-        except Exception as e:
-            history[-1][1] = "❌ Erro no JSON de Configuração."
-            yield history, {}, logs, confext_state
-            return
-        timeline = [{"role": "user", "content": texto}]
-        confext_upload = {
-            "arquivo": nome_arquivo,
-            "meta": anexo_info,
-            "paginas": []
-        }
-        # 4. Execução Paginador (Paralela)
-        if protocolo and fragmentos:
-            cfg_visao = protocolo[0] # Assume que o primeiro é o leitor
-            logs = log_point(f"Iniciando Leitura Paralela ({MAX_WORKERS} workers) com {cfg_visao['nome']}", logs)
-            history[-1][1] = f"⏳ Fragmentando e lendo {len(fragmentos)} partes em paralelo..."
-            yield history, timeline, logs, confext_upload
-            resultados_ordenados = [None] * len(fragmentos)
-            with ThreadPoolExecutor(max_workers=MAX_WORKERS) as executor:
-                futures_map = {executor.submit(executar_no, [], cfg_visao, frag): i for i, frag in enumerate(fragmentos)}
-                concluidos = 0
-                for future in as_completed(futures_map):
-                    idx = futures_map[future]
-                    res, erro = future.result()
-                    if erro:
-                        logs = log_point(f"Erro no frag {idx}: {erro}", logs)
-                    else:
-                        resultados_ordenados[idx] = res["content"]
-                    concluidos += 1
-                    history[-1][1] = f"⏳ Leitura: {concluidos}/{len(fragmentos)} partes processadas..."
-                    yield history, timeline, logs, confext_upload
-            # Consolidar resultados ordenados
-            for pags in resultados_ordenados:
-                if pags:
-                    if isinstance(pags, list):
-                        confext_upload["paginas"].extend(pags)
-                    elif isinstance(pags, dict):
-                        confext_upload["paginas"].append(pags)
-            logs = log_point(f"Leitura concluída. Total páginas extraídas: {len(confext_upload['paginas'])}", logs)
-            # Salvar Cache após a leitura pesada
-            if arquivo:
-                salvar_cache(hash_op, {"confext_upload": confext_upload, "timeline": timeline})
-                logs = log_point("Estado salvo em Cache", logs)
-        # Injeta contexto no timeline
-        timeline.append({
-            "role": "system",
-            "agent": "CONFEXT_UPLOAD",
-            "content": confext_upload
-        })
-        # 5. Execução dos Agentes de Análise (Sequencial)
-        restante = protocolo[1:] if protocolo else []
-        for cfg in restante:
-            history[-1][1] = f"⚙️ {cfg['nome']} analisando..."
-            logs = log_point(f"Iniciando agente: {cfg['nome']}", logs)
-            yield history, timeline, logs, confext_upload
-            # Passa timeline atualizada
-            res, erro = executar_no(timeline, cfg, fragmento_input=None)
-            if erro:
-                logs = log_point(f"Erro agente {cfg['nome']}: {erro}", logs)
-            else:
-                timeline.append(res)
-                if cfg.get("tipo_saida") == "texto":
-                    history[-1][1] = res["content"]
-            yield history, timeline, logs, confext_upload
-    if not texto and arquivo:
-        history[-1][1] = "✅ Documento processado e indexado. Pode fazer perguntas."
-    logs = log_point("Processo Finalizado", logs)
-    yield history, timeline, logs, confext_upload
-# ==================== 5. UI ====================
-def ui_clean():
-    css = """
-    footer {display: none !important;}
-    .contain {border: none !important;}
-    """
-    config_init = carregar_protocolo()
-    with gr.Blocks(title="AI Forensics Auto V44", css=css, theme=gr.themes.Soft()) as app:
-        confext_state = gr.State(value=None)
-        with gr.Tabs():
-            with gr.Tab("💬 Investigador"):
-                chatbot = gr.Chatbot(
-                    label="",
-                    show_label=False,
-                    height=600,
-                    show_copy_button=True,
-                    render_markdown=True,
-                )
-                with gr.Row():
-                    with gr.Column(scale=10):
-                        txt_in = gr.Textbox(
-                            show_label=False,
-                            placeholder="Descreva o caso ou faça perguntas...",
-                            lines=1,
-                            max_lines=5,
-                            container=False,
-                        )
-                    with gr.Column(scale=1, min_width=50):
-                        file_in = gr.UploadButton(
-                            "📎",
-                            file_types=[".txt", ".md", ".json", ".pdf"],
-                            size="sm",
-                        )
-                    with gr.Column(scale=1, min_width=80):
-                        btn_send = gr.Button("Enviar", variant="primary", size="sm")
-                file_status = gr.Markdown("", visible=True)
-                def _on_upload(x):
-                    nome = os.path.basename(getattr(x, "name", x))
-                    return f"📎 Anexo pronto para análise: {nome}"
-                file_in.upload(_on_upload, inputs=file_in, outputs=file_status)
-            # --- AQUI ESTÁ A ABA SOLICITADA ---
-            with gr.Tab("🕵️ Auditoria & Debug"):
-                gr.Markdown("### 🧠 Processo Interno de Pensamento")
-                with gr.Row():
-                    out_dna = gr.JSON(label="Timeline da IA (Contexto)")
-                    out_logs = gr.Textbox(label="Logs do Sistema", lines=20)
-                gr.Markdown("### 📂 Dados Estruturados (Confext)")
-                confext_view = gr.JSON(label="Conteúdo Extraído")
-            with gr.Tab("⚙️ Config"):
-                with gr.Row():
-                    btn_save = gr.Button("Salvar Config")
-                    lbl_save = gr.Label(show_label=False)
-                code_json = gr.Code(value=config_init, language="json", label=ARQUIVO_CONFIG)
-                btn_save.click(salvar_protocolo, code_json, lbl_save)
-        def _orq_wrapper(texto, arquivo, history, json_cfg, confext_old):
-            for h, dna, logs, confext_new in orquestrador(
-                texto, arquivo, history, json_cfg, confext_old
-            ):
-                yield h, dna, logs, confext_new
-        triggers = [btn_send.click, txt_in.submit]
-        for trig in triggers:
-            trig(
-                _orq_wrapper,
-                inputs=[txt_in, file_in, chatbot, code_json, confext_state],
-                outputs=[chatbot, out_dna, out_logs, confext_state], # Atualiza aba Debug
-            ).then(
-                lambda c: (None, None, "", c)[1:],
-                inputs=confext_state,
-                outputs=[txt_in, file_in, file_status, confext_state],
-            ).then(
-                lambda c: c,
-                inputs=confext_state,
-                outputs=confext_view, # Atualiza visualizador JSON
-            )
-    return app
-if __name__ == "__main__":
-    ui_clean().launch()