caarleexx commited on
Commit
9ec04c6
Β·
verified Β·
1 Parent(s): 66274bb

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +107 -43
app.py CHANGED
@@ -1,10 +1,9 @@
1
  # ╔════════════════════════════════════════════════════════════════════════════╗
2
- # β•‘ PIPELINE v41: FRAGMENTAÇÃO + VISΓƒO PAGINADA + CONFEXT_UPLOAD β•‘
3
  # β•šβ•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•
4
 
5
  import os
6
  import json
7
- import re
8
  import time
9
  from datetime import datetime
10
 
@@ -25,6 +24,10 @@ ARQUIVO_CONFIG = "protocolo_fragmentacao_visao-3.json"
25
 
26
  # ==================== 2. UTILIDADES ====================
27
 
 
 
 
 
28
  def carregar_protocolo():
29
  try:
30
  with open(ARQUIVO_CONFIG, "r", encoding="utf-8") as f:
@@ -56,36 +59,35 @@ def salvar_protocolo(conteudo):
56
  except:
57
  return "❌ Erro JSON"
58
 
59
- def log_point(msg, logs):
60
- ts = datetime.now().strftime("%H:%M:%S")
61
- return logs + f"[{ts}] {msg}\n"
62
 
63
- # --------- DIVISÃO BURRA COM TEXTO REAL ---------
 
64
 
65
- def ler_anexo_e_fragmentar(arquivo, paginas_por_fragmento=5):
66
- """
67
- Se PDF: lΓͺ texto das pΓ‘ginas e cria fragmentos com blocos de atΓ© N pΓ‘ginas.
68
- Cada fragmento contΓ©m o texto bruto dessas pΓ‘ginas.
69
- Se nΓ£o for PDF: devolve um marcador simples.
70
- """
71
  if arquivo is None:
72
- return [], ""
 
73
 
74
  filename = getattr(arquivo, "name", arquivo)
 
75
 
76
  if not os.path.exists(filename):
77
- return [], f"[ERRO: Arquivo nΓ£o encontrado: {filename}]"
 
 
78
 
79
  anexo_info = f"[PDF: {os.path.basename(filename)}]"
80
 
81
  if not filename.lower().endswith(".pdf"):
82
- return [f"[ARQUIVO_TEXTO: {os.path.basename(filename)}]"], anexo_info
 
83
 
84
  try:
85
  reader = pypdf.PdfReader(filename)
86
  total_pages = len(reader.pages)
87
- fragments = []
88
 
 
89
  for i in range(0, total_pages, paginas_por_fragmento):
90
  start = i + 1
91
  end = min(i + paginas_por_fragmento, total_pages)
@@ -104,14 +106,24 @@ def ler_anexo_e_fragmentar(arquivo, paginas_por_fragmento=5):
104
  f"{bloco_texto.strip()}"
105
  )
106
  fragments.append(fragment)
 
 
 
 
107
 
108
- return fragments, anexo_info
 
109
  except Exception as e:
110
- return [f"[ERRO PDF: {str(e)}]"], anexo_info
 
111
 
112
  # ==================== 3. ENGINE DE EXECUÇÃO ====================
113
 
114
- def executar_no(timeline, config, fragmento_input=None):
 
 
 
 
115
  modelo = model_pro if config.get("modelo") == "pro" else model_flash
116
 
117
  if fragmento_input is not None:
@@ -127,12 +139,14 @@ def executar_no(timeline, config, fragmento_input=None):
127
  f"MISSΓƒO: {config['missao']}"
128
  )
129
 
130
- log = f"\nπŸ”Έ {config['nome']}..."
131
  try:
132
  inicio = time.time()
 
133
  resp = modelo.generate_content(prompt)
134
  out = resp.text or ""
135
  tempo = time.time() - inicio
 
 
136
 
137
  if config["tipo_saida"] == "json":
138
  cleaned = out.strip().replace("``````", "")
@@ -140,61 +154,74 @@ def executar_no(timeline, config, fragmento_input=None):
140
  content = json.loads(cleaned)
141
  except Exception as e:
142
  content = []
143
- log += f" [ERRO JSON: {e}]"
144
  else:
145
  content = out
146
 
147
- log += f" (OK - {tempo:.2f}s)"
148
- return {"role": "assistant", "agent": config["nome"], "content": content}, log, out
149
  except Exception as e:
150
- return {"role": "system", "error": str(e)}, f" (ERRO: {e})", str(e)
 
151
 
152
  # ==================== 4. ORQUESTRADOR ====================
153
 
154
  def orquestrador(texto, arquivo, history, json_config, confext_state):
155
- fragmentos, anexo_info = ler_anexo_e_fragmentar(arquivo)
 
 
 
 
 
 
 
156
 
157
  if not texto and not fragmentos:
158
- yield history, {}, "⚠️ Sem input ou erro ao ler arquivo.", confext_state
 
159
  return
160
 
161
  history = history + [[texto + (" πŸ“Ž" if arquivo else ""), None]]
162
 
163
  try:
164
  protocolo = json.loads(json_config)
165
- except:
 
166
  history[-1][1] = "❌ Erro no JSON de ConfiguraΓ§Γ£o."
167
- yield history, {}, "Erro JSON", confext_state
 
168
  return
169
 
170
  timeline = [{"role": "user", "content": texto}]
171
- logs = f"πŸš€ START: {datetime.now().strftime('%H:%M:%S')}\n"
172
-
173
  confext_upload = {
174
  "arquivo": os.path.basename(getattr(arquivo, "name", "sem_arquivo"))
175
  if arquivo else None,
176
  "meta": anexo_info,
177
  "paginas": []
178
  }
 
 
 
 
179
 
180
  if fragmentos:
181
- logs = log_point(f"PDF dividido em {len(fragmentos)} fragmentos.", logs)
182
  history[-1][1] = "⏳ Fragmentando + visΓ£o paginada..."
 
183
  yield history, timeline, logs, confext_upload
184
 
185
- # PASSO PAGINADOR_VISUAL (se existir)
186
  if protocolo and fragmentos:
187
  cfg_visao = protocolo[0]
188
- logs = log_point(f"Usando agente de visΓ£o: {cfg_visao['nome']}", logs)
189
 
190
  for i, fragmento in enumerate(fragmentos):
191
  history[-1][1] = f"πŸ‘οΈ {cfg_visao['nome']} frag {i+1}/{len(fragmentos)}..."
192
  logs = log_point(f"Enviando frag {i+1}", logs)
193
  yield history, timeline, logs, confext_upload
194
 
195
- res, log_add, raw = executar_no(timeline, cfg_visao, fragmento_input=fragmento)
196
- logs += log_add + "\n"
197
- logs = log_point(f"Resposta bruta frag {i+1}: {raw[:160]!r}", logs)
198
 
199
  if "error" in res:
200
  logs = log_point(f"Erro no frag {i+1}: {res['error']}", logs)
@@ -204,41 +231,53 @@ def orquestrador(texto, arquivo, history, json_config, confext_state):
204
  paginas_res = res["content"]
205
  if isinstance(paginas_res, dict):
206
  paginas_res = [paginas_res]
 
207
  for p in paginas_res:
208
  confext_upload["paginas"].append(p)
 
 
 
 
 
209
  except Exception as e:
210
  logs = log_point(f"Falha ao anexar pΓ‘ginas do frag {i+1}: {e}", logs)
211
 
212
- logs = log_point("VisΓ£o paginada concluΓ­da.", logs)
 
 
 
213
 
214
  timeline.append({
215
  "role": "system",
216
  "agent": "CONFEXT_UPLOAD",
217
  "content": confext_upload
218
  })
 
219
 
220
  restante = protocolo[1:] if protocolo else []
221
  final_response = ""
222
 
223
  for cfg in restante:
224
  history[-1][1] = f"βš™οΈ {cfg['nome']}..."
 
225
  yield history, timeline, logs, confext_upload
226
 
227
- res, log_add, raw = executar_no(timeline, cfg)
228
  timeline.append(res)
229
- logs += log_add + "\n"
230
 
231
  if cfg["tipo_saida"] == "texto":
232
  final_response = res["content"]
233
  history[-1][1] = final_response
 
234
 
235
  yield history, timeline, logs, confext_upload
236
 
237
  if not restante and not texto:
238
  history[-1][1] = "βœ… PDF processado. Pronto para perguntas usando confext_upload."
239
  final_response = history[-1][1]
 
240
 
241
- logs = log_point("FIM.", logs)
242
  yield history, timeline, logs, confext_upload
243
 
244
  # ==================== 5. UI ====================
@@ -255,6 +294,7 @@ def ui_clean():
255
  confext_state = gr.State(value=None)
256
 
257
  with gr.Tabs():
 
258
  with gr.Tab("πŸ’¬ Investigador"):
259
  chatbot = gr.Chatbot(
260
  label="",
@@ -283,18 +323,26 @@ def ui_clean():
283
  btn_send = gr.Button("Enviar", variant="primary", size="sm")
284
 
285
  file_status = gr.Markdown("", visible=True)
 
 
 
 
 
 
286
  file_in.upload(
287
- lambda x: f"πŸ“Ž Anexo recebido: {os.path.basename(getattr(x, 'name', x))}",
288
  inputs=file_in,
289
  outputs=file_status,
290
  )
291
 
 
292
  with gr.Tab("πŸ•΅οΈ DepuraΓ§Γ£o"):
293
  with gr.Row():
294
  out_dna = gr.JSON(label="DNA (Timeline)")
295
  out_logs = gr.Textbox(label="Logs do Sistema", lines=20)
296
  confext_view = gr.JSON(label="confext_upload")
297
 
 
298
  with gr.Tab("βš™οΈ Config"):
299
  with gr.Row():
300
  btn_save = gr.Button("Salvar Config")
@@ -303,7 +351,14 @@ def ui_clean():
303
  btn_save.click(salvar_protocolo, code_json, lbl_save)
304
 
305
  def _orq_wrapper(texto, arquivo, history, json_cfg, confext_old):
306
- for h, dna, logs, confext_new in orquestrador(texto, arquivo, history, json_cfg, confext_old):
 
 
 
 
 
 
 
307
  yield h, dna, logs, confext_new
308
 
309
  triggers = [btn_send.click, txt_in.submit]
@@ -314,7 +369,16 @@ def ui_clean():
314
  inputs=[txt_in, file_in, chatbot, code_json, confext_state],
315
  outputs=[chatbot, out_dna, out_logs, confext_state],
316
  ).then(
317
- lambda c: (None, None, "", c),
 
 
 
 
 
 
 
 
 
318
  inputs=confext_state,
319
  outputs=[txt_in, file_in, file_status, confext_state],
320
  ).then(
 
1
  # ╔════════════════════════════════════════════════════════════════════════════╗
2
+ # ║ PIPELINE v42: FRAG + VISÃO PAGINADA + CONFEXT_UPLOAD + LOGS DETALHADOS ║
3
  # β•šβ•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•
4
 
5
  import os
6
  import json
 
7
  import time
8
  from datetime import datetime
9
 
 
24
 
25
  # ==================== 2. UTILIDADES ====================
26
 
27
+ def log_point(msg, logs):
28
+ ts = datetime.now().strftime("%H:%M:%S")
29
+ return logs + f"[{ts}] {msg}\n"
30
+
31
  def carregar_protocolo():
32
  try:
33
  with open(ARQUIVO_CONFIG, "r", encoding="utf-8") as f:
 
59
  except:
60
  return "❌ Erro JSON"
61
 
62
+ # --------- DIVISÃO BURRA COM TEXTO REAL + LOGS ---------
 
 
63
 
64
+ def ler_anexo_e_fragmentar(arquivo, paginas_por_fragmento=5, logs=""):
65
+ logs = log_point("ler_anexo_e_fragmentar() chamado", logs)
66
 
 
 
 
 
 
 
67
  if arquivo is None:
68
+ logs = log_point("Nenhum arquivo recebido", logs)
69
+ return [], "", logs
70
 
71
  filename = getattr(arquivo, "name", arquivo)
72
+ logs = log_point(f"Arquivo recebido: {filename}", logs)
73
 
74
  if not os.path.exists(filename):
75
+ msg = f"Arquivo nΓ£o encontrado: {filename}"
76
+ logs = log_point(msg, logs)
77
+ return [], f"[ERRO: {msg}]", logs
78
 
79
  anexo_info = f"[PDF: {os.path.basename(filename)}]"
80
 
81
  if not filename.lower().endswith(".pdf"):
82
+ logs = log_point("Arquivo nΓ£o Γ© PDF; tratado como texto simples", logs)
83
+ return [f"[ARQUIVO_TEXTO: {os.path.basename(filename)}]"], anexo_info, logs
84
 
85
  try:
86
  reader = pypdf.PdfReader(filename)
87
  total_pages = len(reader.pages)
88
+ logs = log_point(f"PDF com {total_pages} pΓ‘ginas", logs)
89
 
90
+ fragments = []
91
  for i in range(0, total_pages, paginas_por_fragmento):
92
  start = i + 1
93
  end = min(i + paginas_por_fragmento, total_pages)
 
106
  f"{bloco_texto.strip()}"
107
  )
108
  fragments.append(fragment)
109
+ logs = log_point(
110
+ f"Fragmento {i//paginas_por_fragmento + 1} criado (pags {start}-{end})",
111
+ logs,
112
+ )
113
 
114
+ logs = log_point(f"Total de fragmentos: {len(fragments)}", logs)
115
+ return fragments, anexo_info, logs
116
  except Exception as e:
117
+ logs = log_point(f"ERRO PDF: {e}", logs)
118
+ return [f"[ERRO PDF: {str(e)}]"], anexo_info, logs
119
 
120
  # ==================== 3. ENGINE DE EXECUÇÃO ====================
121
 
122
+ def executar_no(timeline, config, fragmento_input=None, logs=""):
123
+ logs = log_point(f"executar_no({config['nome']}) chamado", logs)
124
+ modo = "input_fragmento" if fragmento_input is not None else "timeline"
125
+ logs = log_point(f"Modo de entrada: {modo}", logs)
126
+
127
  modelo = model_pro if config.get("modelo") == "pro" else model_flash
128
 
129
  if fragmento_input is not None:
 
139
  f"MISSΓƒO: {config['missao']}"
140
  )
141
 
 
142
  try:
143
  inicio = time.time()
144
+ logs = log_point("Chamando modelo.generate_content()", logs)
145
  resp = modelo.generate_content(prompt)
146
  out = resp.text or ""
147
  tempo = time.time() - inicio
148
+ logs = log_point(f"Tempo de geraΓ§Γ£o: {tempo:.2f}s", logs)
149
+ logs = log_point(f"SaΓ­da bruta (120 chars): {out[:120]!r}", logs)
150
 
151
  if config["tipo_saida"] == "json":
152
  cleaned = out.strip().replace("``````", "")
 
154
  content = json.loads(cleaned)
155
  except Exception as e:
156
  content = []
157
+ logs = log_point(f"ERRO JSON parse: {e}", logs)
158
  else:
159
  content = out
160
 
161
+ logs = log_point("executar_no() concluΓ­do com sucesso", logs)
162
+ return {"role": "assistant", "agent": config["nome"], "content": content}, logs, out
163
  except Exception as e:
164
+ logs = log_point(f"ERRO em executar_no: {e}", logs)
165
+ return {"role": "system", "error": str(e)}, logs, str(e)
166
 
167
  # ==================== 4. ORQUESTRADOR ====================
168
 
169
  def orquestrador(texto, arquivo, history, json_config, confext_state):
170
+ logs = f"πŸš€ START: {datetime.now().strftime('%H:%M:%S')}\n"
171
+ logs = log_point("orquestrador() iniciado", logs)
172
+ logs = log_point(f"Texto len={len(texto or '')}", logs)
173
+
174
+ fragmentos, anexo_info, logs = ler_anexo_e_fragmentar(
175
+ arquivo, paginas_por_fragmento=5, logs=logs
176
+ )
177
+ logs = log_point(f"Qtd fragmentos apΓ³s leitura: {len(fragmentos)}", logs)
178
 
179
  if not texto and not fragmentos:
180
+ logs = log_point("Sem texto e sem fragmentos; encerrando", logs)
181
+ yield history, {}, logs, confext_state
182
  return
183
 
184
  history = history + [[texto + (" πŸ“Ž" if arquivo else ""), None]]
185
 
186
  try:
187
  protocolo = json.loads(json_config)
188
+ logs = log_point("Protocolo JSON carregado", logs)
189
+ except Exception as e:
190
  history[-1][1] = "❌ Erro no JSON de ConfiguraΓ§Γ£o."
191
+ logs = log_point(f"ERRO carregando protocolo: {e}", logs)
192
+ yield history, {}, logs, confext_state
193
  return
194
 
195
  timeline = [{"role": "user", "content": texto}]
 
 
196
  confext_upload = {
197
  "arquivo": os.path.basename(getattr(arquivo, "name", "sem_arquivo"))
198
  if arquivo else None,
199
  "meta": anexo_info,
200
  "paginas": []
201
  }
202
+ logs = log_point(
203
+ f"confext_upload inicializado para arquivo={confext_upload['arquivo']}",
204
+ logs,
205
+ )
206
 
207
  if fragmentos:
 
208
  history[-1][1] = "⏳ Fragmentando + visΓ£o paginada..."
209
+ logs = log_point("Fragmentos disponΓ­veis; iniciando visΓ£o paginada", logs)
210
  yield history, timeline, logs, confext_upload
211
 
212
+ # PASSO PAGINADOR_VISUAL (primeiro agente, se existir)
213
  if protocolo and fragmentos:
214
  cfg_visao = protocolo[0]
215
+ logs = log_point(f"Agente de visΓ£o selecionado: {cfg_visao['nome']}", logs)
216
 
217
  for i, fragmento in enumerate(fragmentos):
218
  history[-1][1] = f"πŸ‘οΈ {cfg_visao['nome']} frag {i+1}/{len(fragmentos)}..."
219
  logs = log_point(f"Enviando frag {i+1}", logs)
220
  yield history, timeline, logs, confext_upload
221
 
222
+ res, logs, raw = executar_no(
223
+ timeline, cfg_visao, fragmento_input=fragmento, logs=logs
224
+ )
225
 
226
  if "error" in res:
227
  logs = log_point(f"Erro no frag {i+1}: {res['error']}", logs)
 
231
  paginas_res = res["content"]
232
  if isinstance(paginas_res, dict):
233
  paginas_res = [paginas_res]
234
+ antes = len(confext_upload["paginas"])
235
  for p in paginas_res:
236
  confext_upload["paginas"].append(p)
237
+ depois = len(confext_upload["paginas"])
238
+ logs = log_point(
239
+ f"Frag {i+1} adicionou {depois-antes} pΓ‘ginas; total={depois}",
240
+ logs,
241
+ )
242
  except Exception as e:
243
  logs = log_point(f"Falha ao anexar pΓ‘ginas do frag {i+1}: {e}", logs)
244
 
245
+ logs = log_point(
246
+ f"VisΓ£o paginada concluΓ­da; paginas={len(confext_upload['paginas'])}",
247
+ logs,
248
+ )
249
 
250
  timeline.append({
251
  "role": "system",
252
  "agent": "CONFEXT_UPLOAD",
253
  "content": confext_upload
254
  })
255
+ logs = log_point("CONFEXT_UPLOAD injetado na timeline", logs)
256
 
257
  restante = protocolo[1:] if protocolo else []
258
  final_response = ""
259
 
260
  for cfg in restante:
261
  history[-1][1] = f"βš™οΈ {cfg['nome']}..."
262
+ logs = log_point(f"Iniciando passo adicional: {cfg['nome']}", logs)
263
  yield history, timeline, logs, confext_upload
264
 
265
+ res, logs, raw = executar_no(timeline, cfg, fragmento_input=None, logs=logs)
266
  timeline.append(res)
 
267
 
268
  if cfg["tipo_saida"] == "texto":
269
  final_response = res["content"]
270
  history[-1][1] = final_response
271
+ logs = log_point(f"Passo {cfg['nome']} produziu texto final", logs)
272
 
273
  yield history, timeline, logs, confext_upload
274
 
275
  if not restante and not texto:
276
  history[-1][1] = "βœ… PDF processado. Pronto para perguntas usando confext_upload."
277
  final_response = history[-1][1]
278
+ logs = log_point("Nenhum passo adicional; apenas prΓ©-processamento", logs)
279
 
280
+ logs = log_point("FIM orquestrador()", logs)
281
  yield history, timeline, logs, confext_upload
282
 
283
  # ==================== 5. UI ====================
 
294
  confext_state = gr.State(value=None)
295
 
296
  with gr.Tabs():
297
+ # --- ABA 1 ---
298
  with gr.Tab("πŸ’¬ Investigador"):
299
  chatbot = gr.Chatbot(
300
  label="",
 
323
  btn_send = gr.Button("Enviar", variant="primary", size="sm")
324
 
325
  file_status = gr.Markdown("", visible=True)
326
+
327
+ def _on_upload(x):
328
+ nome = os.path.basename(getattr(x, "name", x))
329
+ print("[DEBUG] upload arquivo:", nome)
330
+ return f"πŸ“Ž Anexo recebido: {nome}"
331
+
332
  file_in.upload(
333
+ _on_upload,
334
  inputs=file_in,
335
  outputs=file_status,
336
  )
337
 
338
+ # --- ABA 2 ---
339
  with gr.Tab("πŸ•΅οΈ DepuraΓ§Γ£o"):
340
  with gr.Row():
341
  out_dna = gr.JSON(label="DNA (Timeline)")
342
  out_logs = gr.Textbox(label="Logs do Sistema", lines=20)
343
  confext_view = gr.JSON(label="confext_upload")
344
 
345
+ # --- ABA 3 ---
346
  with gr.Tab("βš™οΈ Config"):
347
  with gr.Row():
348
  btn_save = gr.Button("Salvar Config")
 
351
  btn_save.click(salvar_protocolo, code_json, lbl_save)
352
 
353
  def _orq_wrapper(texto, arquivo, history, json_cfg, confext_old):
354
+ print(
355
+ "[DEBUG] _orq_wrapper disparado",
356
+ "len_texto=", len(texto or ""),
357
+ "arquivo=", getattr(arquivo, "name", None),
358
+ )
359
+ for h, dna, logs, confext_new in orquestrador(
360
+ texto, arquivo, history, json_cfg, confext_old
361
+ ):
362
  yield h, dna, logs, confext_new
363
 
364
  triggers = [btn_send.click, txt_in.submit]
 
369
  inputs=[txt_in, file_in, chatbot, code_json, confext_state],
370
  outputs=[chatbot, out_dna, out_logs, confext_state],
371
  ).then(
372
+ lambda c: (
373
+ print(
374
+ "[DEBUG] pΓ³s-envio; paginas_confext=",
375
+ 0 if not c else len(c.get("paginas", [])),
376
+ ),
377
+ None,
378
+ None,
379
+ "",
380
+ c,
381
+ )[1:],
382
  inputs=confext_state,
383
  outputs=[txt_in, file_in, file_status, confext_state],
384
  ).then(