Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -1,5 +1,5 @@
|
|
| 1 |
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 2 |
-
# β PIPELINE
|
| 3 |
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 4 |
|
| 5 |
import os
|
|
@@ -33,17 +33,8 @@ def carregar_protocolo():
|
|
| 33 |
with open(ARQUIVO_CONFIG, "r", encoding="utf-8") as f:
|
| 34 |
return f.read()
|
| 35 |
except:
|
| 36 |
-
|
| 37 |
-
|
| 38 |
-
"nome": "PAGINADOR_VISUAL",
|
| 39 |
-
"missao": (
|
| 40 |
-
"VocΓͺ recebe o texto bruto de um conjunto de pΓ‘ginas de um PDF. "
|
| 41 |
-
"Separe o conteΓΊdo por PΓGINA, na ordem original. "
|
| 42 |
-
"Para cada pΓ‘gina, produza um objeto com: 'pagina', "
|
| 43 |
-
"'transcricao_fiel' (texto integral, sem resumo) e "
|
| 44 |
-
"'descricao_visual' (imagens, tabelas, diagramas, layout, sem julgamentos). "
|
| 45 |
-
"Se o fragmento tiver 5 pΓ‘ginas, devolva uma lista JSON com EXATAMENTE 5 objetos, "
|
| 46 |
-
"um por pΓ‘gina."
|
| 47 |
),
|
| 48 |
"tipo_saida": "json",
|
| 49 |
"modelo": "flash"
|
|
@@ -119,6 +110,23 @@ def ler_anexo_e_fragmentar(arquivo, paginas_por_fragmento=5, logs=""):
|
|
| 119 |
|
| 120 |
# ==================== 3. ENGINE DE EXECUΓΓO ====================
|
| 121 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 122 |
def executar_no(timeline, config, fragmento_input=None, logs=""):
|
| 123 |
logs = log_point(f"executar_no({config['nome']}) chamado", logs)
|
| 124 |
modo = "input_fragmento" if fragmento_input is not None else "timeline"
|
|
@@ -149,7 +157,8 @@ def executar_no(timeline, config, fragmento_input=None, logs=""):
|
|
| 149 |
logs = log_point(f"SaΓda bruta (120 chars): {out[:120]!r}", logs)
|
| 150 |
|
| 151 |
if config["tipo_saida"] == "json":
|
| 152 |
-
cleaned = out
|
|
|
|
| 153 |
try:
|
| 154 |
content = json.loads(cleaned)
|
| 155 |
except Exception as e:
|
|
@@ -187,7 +196,7 @@ def orquestrador(texto, arquivo, history, json_config, confext_state):
|
|
| 187 |
protocolo = json.loads(json_config)
|
| 188 |
logs = log_point("Protocolo JSON carregado", logs)
|
| 189 |
except Exception as e:
|
| 190 |
-
history[-1]
|
| 191 |
logs = log_point(f"ERRO carregando protocolo: {e}", logs)
|
| 192 |
yield history, {}, logs, confext_state
|
| 193 |
return
|
|
@@ -205,17 +214,17 @@ def orquestrador(texto, arquivo, history, json_config, confext_state):
|
|
| 205 |
)
|
| 206 |
|
| 207 |
if fragmentos:
|
| 208 |
-
history[-1]
|
| 209 |
logs = log_point("Fragmentos disponΓveis; iniciando visΓ£o paginada", logs)
|
| 210 |
yield history, timeline, logs, confext_upload
|
| 211 |
|
| 212 |
# PASSO PAGINADOR_VISUAL (primeiro agente, se existir)
|
| 213 |
if protocolo and fragmentos:
|
| 214 |
-
cfg_visao = protocolo
|
| 215 |
logs = log_point(f"Agente de visΓ£o selecionado: {cfg_visao['nome']}", logs)
|
| 216 |
|
| 217 |
for i, fragmento in enumerate(fragmentos):
|
| 218 |
-
history[-1]
|
| 219 |
logs = log_point(f"Enviando frag {i+1}", logs)
|
| 220 |
yield history, timeline, logs, confext_upload
|
| 221 |
|
|
@@ -258,7 +267,7 @@ def orquestrador(texto, arquivo, history, json_config, confext_state):
|
|
| 258 |
final_response = ""
|
| 259 |
|
| 260 |
for cfg in restante:
|
| 261 |
-
history[-1]
|
| 262 |
logs = log_point(f"Iniciando passo adicional: {cfg['nome']}", logs)
|
| 263 |
yield history, timeline, logs, confext_upload
|
| 264 |
|
|
@@ -267,14 +276,14 @@ def orquestrador(texto, arquivo, history, json_config, confext_state):
|
|
| 267 |
|
| 268 |
if cfg["tipo_saida"] == "texto":
|
| 269 |
final_response = res["content"]
|
| 270 |
-
history[-1]
|
| 271 |
logs = log_point(f"Passo {cfg['nome']} produziu texto final", logs)
|
| 272 |
|
| 273 |
yield history, timeline, logs, confext_upload
|
| 274 |
|
| 275 |
if not restante and not texto:
|
| 276 |
-
history[-1]
|
| 277 |
-
final_response = history[-1][
|
| 278 |
logs = log_point("Nenhum passo adicional; apenas prΓ©-processamento", logs)
|
| 279 |
|
| 280 |
logs = log_point("FIM orquestrador()", logs)
|
|
@@ -294,7 +303,6 @@ def ui_clean():
|
|
| 294 |
confext_state = gr.State(value=None)
|
| 295 |
|
| 296 |
with gr.Tabs():
|
| 297 |
-
# --- ABA 1 ---
|
| 298 |
with gr.Tab("π¬ Investigador"):
|
| 299 |
chatbot = gr.Chatbot(
|
| 300 |
label="",
|
|
@@ -335,14 +343,12 @@ def ui_clean():
|
|
| 335 |
outputs=file_status,
|
| 336 |
)
|
| 337 |
|
| 338 |
-
# --- ABA 2 ---
|
| 339 |
with gr.Tab("π΅οΈ DepuraΓ§Γ£o"):
|
| 340 |
with gr.Row():
|
| 341 |
out_dna = gr.JSON(label="DNA (Timeline)")
|
| 342 |
out_logs = gr.Textbox(label="Logs do Sistema", lines=20)
|
| 343 |
confext_view = gr.JSON(label="confext_upload")
|
| 344 |
|
| 345 |
-
# --- ABA 3 ---
|
| 346 |
with gr.Tab("βοΈ Config"):
|
| 347 |
with gr.Row():
|
| 348 |
btn_save = gr.Button("Salvar Config")
|
|
|
|
| 1 |
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 2 |
+
# β PIPELINE v43: FRAG + VISΓO PAGINADA + CONFEXT_UPLOAD + PARSE ROBUSTO β
|
| 3 |
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 4 |
|
| 5 |
import os
|
|
|
|
| 33 |
with open(ARQUIVO_CONFIG, "r", encoding="utf-8") as f:
|
| 34 |
return f.read()
|
| 35 |
except:
|
| 36 |
+
# fallback com instruΓ§Γ£o jΓ‘ reforΓ§ada para JSON limpo
|
| 37 |
+
return json.dumps(json```
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 38 |
),
|
| 39 |
"tipo_saida": "json",
|
| 40 |
"modelo": "flash"
|
|
|
|
| 110 |
|
| 111 |
# ==================== 3. ENGINE DE EXECUΓΓO ====================
|
| 112 |
|
| 113 |
+
def _extrair_json_possivel(out_raw: str) -> str:
|
| 114 |
+
"""
|
| 115 |
+
Tenta isolar sΓ³ o bloco JSON de uma resposta que pode ter texto extra.
|
| 116 |
+
Procura o primeiro 'json.
|
| 117 |
+
"""
|
| 118 |
+
cleaned = out_raw.strip()
|
| 119 |
+
idx_abre_col = cleaned.find("
|
| 120 |
+
|
| 121 |
+
# menor Γndice vΓ‘lido
|
| 122 |
+
candidatos = [i for i in [idx_abre_col, idx_abre_obj] if i != -1]
|
| 123 |
+
if candidatos:
|
| 124 |
+
start = min(candidatos)
|
| 125 |
+
cleaned = cleaned[start:]
|
| 126 |
+
|
| 127 |
+
cleaned = cleaned.replace("```json", "").replace("```
|
| 128 |
+
return cleaned
|
| 129 |
+
|
| 130 |
def executar_no(timeline, config, fragmento_input=None, logs=""):
|
| 131 |
logs = log_point(f"executar_no({config['nome']}) chamado", logs)
|
| 132 |
modo = "input_fragmento" if fragmento_input is not None else "timeline"
|
|
|
|
| 157 |
logs = log_point(f"SaΓda bruta (120 chars): {out[:120]!r}", logs)
|
| 158 |
|
| 159 |
if config["tipo_saida"] == "json":
|
| 160 |
+
cleaned = _extrair_json_possivel(out)
|
| 161 |
+
logs = log_point(f"Trecho candidato a JSON (120): {cleaned[:120]!r}", logs)
|
| 162 |
try:
|
| 163 |
content = json.loads(cleaned)
|
| 164 |
except Exception as e:
|
|
|
|
| 196 |
protocolo = json.loads(json_config)
|
| 197 |
logs = log_point("Protocolo JSON carregado", logs)
|
| 198 |
except Exception as e:
|
| 199 |
+
history[-1] = "β Erro no JSON de ConfiguraΓ§Γ£o."[3]
|
| 200 |
logs = log_point(f"ERRO carregando protocolo: {e}", logs)
|
| 201 |
yield history, {}, logs, confext_state
|
| 202 |
return
|
|
|
|
| 214 |
)
|
| 215 |
|
| 216 |
if fragmentos:
|
| 217 |
+
history[-1] = "β³ Fragmentando + visΓ£o paginada..."[3]
|
| 218 |
logs = log_point("Fragmentos disponΓveis; iniciando visΓ£o paginada", logs)
|
| 219 |
yield history, timeline, logs, confext_upload
|
| 220 |
|
| 221 |
# PASSO PAGINADOR_VISUAL (primeiro agente, se existir)
|
| 222 |
if protocolo and fragmentos:
|
| 223 |
+
cfg_visao = protocolo
|
| 224 |
logs = log_point(f"Agente de visΓ£o selecionado: {cfg_visao['nome']}", logs)
|
| 225 |
|
| 226 |
for i, fragmento in enumerate(fragmentos):
|
| 227 |
+
history[-1] = f"ποΈ {cfg_visao['nome']} frag {i+1}/{len(fragmentos)}..."[3]
|
| 228 |
logs = log_point(f"Enviando frag {i+1}", logs)
|
| 229 |
yield history, timeline, logs, confext_upload
|
| 230 |
|
|
|
|
| 267 |
final_response = ""
|
| 268 |
|
| 269 |
for cfg in restante:
|
| 270 |
+
history[-1] = f"βοΈ {cfg['nome']}..."[3]
|
| 271 |
logs = log_point(f"Iniciando passo adicional: {cfg['nome']}", logs)
|
| 272 |
yield history, timeline, logs, confext_upload
|
| 273 |
|
|
|
|
| 276 |
|
| 277 |
if cfg["tipo_saida"] == "texto":
|
| 278 |
final_response = res["content"]
|
| 279 |
+
history[-1] = final_response[3]
|
| 280 |
logs = log_point(f"Passo {cfg['nome']} produziu texto final", logs)
|
| 281 |
|
| 282 |
yield history, timeline, logs, confext_upload
|
| 283 |
|
| 284 |
if not restante and not texto:
|
| 285 |
+
history[-1] = "β
PDF processado. Pronto para perguntas usando confext_upload."[3]
|
| 286 |
+
final_response = history[-1][3]
|
| 287 |
logs = log_point("Nenhum passo adicional; apenas prΓ©-processamento", logs)
|
| 288 |
|
| 289 |
logs = log_point("FIM orquestrador()", logs)
|
|
|
|
| 303 |
confext_state = gr.State(value=None)
|
| 304 |
|
| 305 |
with gr.Tabs():
|
|
|
|
| 306 |
with gr.Tab("π¬ Investigador"):
|
| 307 |
chatbot = gr.Chatbot(
|
| 308 |
label="",
|
|
|
|
| 343 |
outputs=file_status,
|
| 344 |
)
|
| 345 |
|
|
|
|
| 346 |
with gr.Tab("π΅οΈ DepuraΓ§Γ£o"):
|
| 347 |
with gr.Row():
|
| 348 |
out_dna = gr.JSON(label="DNA (Timeline)")
|
| 349 |
out_logs = gr.Textbox(label="Logs do Sistema", lines=20)
|
| 350 |
confext_view = gr.JSON(label="confext_upload")
|
| 351 |
|
|
|
|
| 352 |
with gr.Tab("βοΈ Config"):
|
| 353 |
with gr.Row():
|
| 354 |
btn_save = gr.Button("Salvar Config")
|