caarleexx commited on
Commit
cd040e7
·
verified ·
1 Parent(s): 6bff912

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +843 -396
app.py CHANGED
@@ -7,23 +7,20 @@ import logging
7
  import requests
8
  import urllib3
9
  from flask import Flask, request, jsonify, render_template_string
10
- from playwright.sync_api import sync_playwright, TimeoutError as PlaywrightTimeoutError
11
  import traceback
12
 
13
  # Suprimir warnings de SSL
14
  urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
15
 
16
- # Configuração detalhada de logging
17
- logging.basicConfig(
18
- level=logging.DEBUG, # Mudar para DEBUG para mais detalhes
19
- format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
20
- )
21
  logger = logging.getLogger(__name__)
22
 
23
  app = Flask(__name__)
24
 
25
  # ============================================
26
- # PAYLOAD COMPLETO com TODOS os campos
27
  # ============================================
28
  PAYLOAD_COMPLETO = {
29
  "query": {
@@ -37,8 +34,38 @@ PAYLOAD_COMPLETO = {
37
  {"query_string": {
38
  "default_operator": "AND",
39
  "fields": [
 
 
 
 
 
 
 
 
 
 
 
40
  "ementa_texto.plural^3",
41
- "acordao_ata.plural^3"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
42
  ],
43
  "query": "*",
44
  "type": "cross_fields",
@@ -50,489 +77,909 @@ PAYLOAD_COMPLETO = {
50
  }
51
  },
52
  "_source": [
53
- "id", "titulo", "ementa_texto", "acordao_ata", "decisao_texto",
54
- "processo_codigo_completo", "processo_numero", "julgamento_data",
55
- "publicacao_data", "orgao_julgador", "relator_processo_nome",
56
- "ministro_facet", "partes_lista_texto", "inteiro_teor_url",
57
- "documental_legislacao_citada_texto", "documental_jurisprudencia_citada_texto",
58
- "is_repercussao_geral", "documental_tese_texto"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
59
  ],
60
- "size": 10,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
61
  "from": 0,
62
  "sort": [{"julgamento_data": {"order": "desc"}}],
63
  "track_total_hits": True
64
  }
65
 
 
 
 
 
 
 
 
 
66
  # Constantes da API
67
  URL_API = "https://jurisprudencia.stf.jus.br/api/search/search"
68
  HEADERS = {
69
  "Accept": "application/json, text/plain, */*",
70
  "Content-Type": "application/json",
71
- "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
72
  "Referer": "https://jurisprudencia.stf.jus.br/pages/search",
73
  "Origin": "https://jurisprudencia.stf.jus.br"
74
  }
75
 
76
- # Cache simples para tokens
77
- token_cache = {
78
- "token": None,
79
- "expires_at": 0
80
- }
81
 
82
- # Template HTML simplificado para teste
 
 
83
  HTML_TEMPLATE = """
84
  <!DOCTYPE html>
85
  <html>
86
  <head>
87
- <title>STF Jurisprudência - Teste</title>
88
  <meta charset="utf-8">
 
89
  <style>
90
- body { font-family: Arial, sans-serif; max-width: 1200px; margin: 0 auto; padding: 20px; }
91
- .container { background: white; border-radius: 10px; padding: 20px; box-shadow: 0 2px 10px rgba(0,0,0,0.1); }
92
- h1 { color: #333; border-bottom: 2px solid #4CAF50; padding-bottom: 10px; }
93
- button { background: #4CAF50; color: white; border: none; padding: 10px 20px; font-size: 16px; border-radius: 5px; cursor: pointer; }
94
- pre { background: #f5f5f5; padding: 15px; border-radius: 5px; overflow: auto; max-height: 500px; }
95
- .error { color: red; background: #ffeeee; padding: 10px; border-radius: 5px; }
96
- .success { color: green; background: #eeffee; padding: 10px; border-radius: 5px; }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
97
  </style>
98
  </head>
99
  <body>
100
  <div class="container">
101
- <h1>⚖️ STF Jurisprudência - Teste</h1>
102
- <button onclick="test()">Testar Conexão</button>
103
- <button onclick="search()">Buscar Documentos</button>
104
- <div id="result"></div>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
105
  </div>
 
106
  <script>
107
- async function test() {
108
- document.getElementById('result').innerHTML = 'Testando...';
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
109
  try {
110
- const response = await fetch('/api/health');
 
 
 
 
 
 
 
 
 
111
  const data = await response.json();
112
- document.getElementById('result').innerHTML = '<pre>' + JSON.stringify(data, null, 2) + '</pre>';
113
- } catch(e) {
114
- document.getElementById('result').innerHTML = '<div class="error">Erro: ' + e.message + '</div>';
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
115
  }
116
  }
117
-
118
- async function search() {
119
- document.getElementById('result').innerHTML = 'Buscando...';
 
 
 
 
 
 
 
 
 
 
 
 
 
120
  try {
121
- const response = await fetch('/api/test-bypass', { method: 'POST' });
122
  const data = await response.json();
123
- document.getElementById('result').innerHTML = '<pre>' + JSON.stringify(data, null, 2) + '</pre>';
124
- } catch(e) {
125
- document.getElementById('result').innerHTML = '<div class="error">Erro: ' + e.message + '</div>';
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
126
  }
127
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
128
  </script>
129
  </body>
130
  </html>
131
  """
132
 
133
  # ============================================
134
- # Funções auxiliares com tratamento de erro
135
  # ============================================
136
-
137
  def get_fresh_token():
138
- """Obtém um token novo via Playwright com tratamento de erro"""
139
  global token_cache
140
-
141
- logger.debug("get_fresh_token: Iniciando")
142
-
143
- # Verificar cache
144
  if token_cache["token"] and time.time() < token_cache["expires_at"]:
145
  logger.info("Usando token em cache")
146
  return token_cache["token"]
147
 
148
  logger.info("Obtendo novo token via Playwright")
149
-
150
- playwright = None
151
- browser = None
152
-
153
  try:
154
- from playwright.sync_api import sync_playwright
155
-
156
- playwright = sync_playwright().start()
157
- logger.debug("Playwright iniciado")
158
-
159
- browser = playwright.chromium.launch(
160
- headless=True,
161
- args=['--no-sandbox', '--disable-dev-shm-usage']
162
- )
163
- logger.debug("Navegador iniciado")
164
-
165
- context = browser.new_context(
166
- viewport={'width': 1920, 'height': 1080},
167
- user_agent='Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'
168
- )
169
-
170
- page = context.new_page()
171
- logger.debug("Página criada")
172
-
173
- # Navegar para a página
174
- response = page.goto(
175
- "https://jurisprudencia.stf.jus.br/pages/search",
176
- wait_until='domcontentloaded',
177
- timeout=30000
178
- )
179
-
180
- if not response:
181
- raise Exception("Sem resposta da página")
182
-
183
- logger.debug(f"Página carregada: status {response.status}")
184
-
185
- # Aguardar um pouco
186
- page.wait_for_timeout(3000)
187
-
188
- # Obter cookies
189
- cookies = context.cookies()
190
- token = None
191
-
192
- for cookie in cookies:
193
- if cookie.get('name') == 'aws-waf-token':
194
- token = cookie.get('value')
195
- logger.debug(f"Token encontrado: {token[:30]}...")
196
- break
197
-
198
- if not token:
199
- logger.warning("Token não encontrado nos cookies")
200
- # Tentar extrair de localStorage
201
- token = page.evaluate("""
202
- () => {
203
- for(let i=0; i<localStorage.length; i++) {
204
- let key = localStorage.key(i);
205
- if(key && key.includes('waf')) {
206
- return localStorage.getItem(key);
207
- }
208
- }
209
- return null;
210
- }
211
- """)
212
  if token:
213
- logger.debug(f"Token encontrado no localStorage: {token[:30]}...")
214
-
215
- return token
216
-
 
 
 
217
  except Exception as e:
218
  logger.error(f"Erro ao obter token: {str(e)}")
219
- logger.error(traceback.format_exc())
220
  return None
221
-
222
- finally:
223
- if browser:
224
- try:
225
- browser.close()
226
- logger.debug("Navegador fechado")
227
- except:
228
- pass
229
- if playwright:
230
- try:
231
- playwright.stop()
232
- logger.debug("Playwright parado")
233
- except:
234
- pass
235
 
236
  def search_with_token(token, custom_payload=None):
237
- """Faz busca usando token com tratamento de erro"""
238
- logger.debug("search_with_token: Iniciando")
239
-
240
  if not token:
241
  return {"success": False, "error": "Token não fornecido"}
242
-
243
  payload = custom_payload or PAYLOAD_COMPLETO
244
-
245
  headers = HEADERS.copy()
246
  headers['Cookie'] = f'aws-waf-token={token}'
247
-
248
  try:
249
- logger.debug(f"Fazendo requisição para {URL_API}")
250
-
251
- response = requests.post(
252
- URL_API,
253
- headers=headers,
254
- json=payload,
255
- verify=False, # Ignorar SSL para teste
256
- timeout=30
257
- )
258
-
259
- logger.debug(f"Resposta: status {response.status_code}")
260
-
261
  if response.status_code == 200:
262
- try:
263
- data = response.json()
264
- return {"success": True, "data": data}
265
- except Exception as e:
266
- return {"success": False, "error": f"Erro ao decodificar JSON: {str(e)}"}
267
  else:
268
- return {
269
- "success": False,
270
- "error": f"HTTP {response.status_code}",
271
- "text": response.text[:500]
272
- }
273
-
274
- except requests.exceptions.Timeout:
275
- return {"success": False, "error": "Timeout na requisição"}
276
- except requests.exceptions.ConnectionError as e:
277
- return {"success": False, "error": f"Erro de conexão: {str(e)}"}
278
  except Exception as e:
279
- return {"success": False, "error": str(e), "traceback": traceback.format_exc()}
280
 
281
- def test_playwright_installation():
282
- """Testa se o Playwright está instalado corretamente"""
283
- logger.info("Testando instalação do Playwright...")
284
-
 
 
 
285
  try:
286
- from playwright.sync_api import sync_playwright
287
-
288
  with sync_playwright() as p:
289
- # Tenta listar navegadores disponíveis
290
- browsers = p.chromium, p.firefox, p.webkit
291
- logger.info(f"Navegadores disponíveis: Chromium, Firefox, WebKit")
292
-
293
- # Tenta iniciar Chromium
294
- browser = p.chromium.launch(headless=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
295
  browser.close()
296
-
297
- logger.info("✅ Playwright instalado corretamente")
298
- return True
299
-
300
- except ImportError as e:
301
- logger.error(f"❌ Playwright não importado: {e}")
302
- return False
303
  except Exception as e:
304
- logger.error(f"Erro no Playwright: {e}")
305
- logger.error(traceback.format_exc())
306
- return False
307
 
308
  # ============================================
309
- # Rotas da aplicação
310
  # ============================================
311
-
312
  @app.route('/')
313
  def index():
314
- """Página principal"""
315
  try:
316
  return render_template_string(HTML_TEMPLATE)
317
  except Exception as e:
318
- logger.error(f"Erro ao renderizar template: {e}")
319
- return f"Erro: {str(e)}", 500
320
 
321
- @app.route('/api/health', methods=['GET'])
322
- def health():
323
- """Health check detalhado"""
324
- logger.debug("Health check requisitado")
 
 
325
 
326
- result = {
327
- "status": "checking",
328
- "timestamp": time.time(),
329
- "python_version": sys.version,
330
- "environment": {}
331
- }
332
 
333
- # Verificar variáveis de ambiente importantes
334
- result["environment"]["PORT"] = os.environ.get('PORT', 'não definido')
335
- result["environment"]["HOME"] = os.environ.get('HOME', 'não definido')
336
- result["environment"]["PATH"] = os.environ.get('PATH', 'não definido')[:100] + "..."
337
 
338
- # Verificar Playwright
339
- try:
340
- from playwright.sync_api import sync_playwright
341
- result["playwright_import"] = "ok"
342
-
343
- with sync_playwright() as p:
344
- browsers = []
345
- if hasattr(p, 'chromium'):
346
- browsers.append('chromium')
347
- if hasattr(p, 'firefox'):
348
- browsers.append('firefox')
349
- if hasattr(p, 'webkit'):
350
- browsers.append('webkit')
351
- result["playwright_browsers"] = browsers
352
- result["playwright_status"] = "ok"
353
- except Exception as e:
354
- result["playwright_status"] = f"erro: {str(e)}"
355
-
356
- # Verificar requests
357
- try:
358
- test_response = requests.get("https://httpbin.org/get", timeout=5)
359
- result["requests_status"] = f"ok (status {test_response.status_code})"
360
- except Exception as e:
361
- result["requests_status"] = f"erro: {str(e)}"
362
-
363
- # Tentar obter token
364
  token = get_fresh_token()
365
- result["token_obtained"] = bool(token)
366
- if token:
367
- result["token_preview"] = token[:30] + "..."
368
-
369
- # Tentar busca simples
370
- if token:
371
- try:
372
- test_search = search_with_token(token, {"size": 1, "track_total_hits": True})
373
- result["test_search"] = "ok" if test_search.get("success") else f"falha: {test_search.get('error')}"
374
- if test_search.get("success") and test_search["data"].get("hits", {}).get("total"):
375
- result["total_docs"] = test_search["data"]["hits"]["total"]["value"]
376
- except Exception as e:
377
- result["test_search"] = f"erro: {str(e)}"
378
 
379
- result["status"] = "healthy"
380
- return jsonify(result)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
381
 
382
  @app.route('/api/test-bypass', methods=['POST'])
383
  def test_bypass():
384
- """Endpoint de teste com fallback"""
385
- logger.info("Requisição de teste recebida")
386
-
387
- result = {
388
- "success": False,
389
- "timestamp": time.time(),
390
- "attempts": []
391
- }
392
-
393
- # Tentar com token primeiro
394
- logger.info("Tentando obter token...")
395
  token = get_fresh_token()
396
-
397
  if token:
398
- logger.info("Token obtido, tentando busca...")
399
- search_result = search_with_token(token)
400
- result["attempts"].append({
401
- "method": "token",
402
- "success": search_result.get("success", False),
403
- "token_used": bool(token)
404
- })
405
-
406
- if search_result.get("success"):
407
- result["success"] = True
408
- result["data"] = search_result["data"]
409
- result["token"] = token
410
- return jsonify(result)
411
- else:
412
- result["attempts"].append({
413
- "method": "token",
414
- "success": False,
415
- "error": "Não foi possível obter token"
416
- })
417
-
418
- # Se falhou, tentar Playwright direto
419
- logger.info("Tentando acesso direto com Playwright...")
420
  try:
421
- from playwright.sync_api import sync_playwright
422
-
423
  with sync_playwright() as p:
424
- browser = p.chromium.launch(headless=True, args=['--no-sandbox'])
425
- context = browser.new_context()
426
- page = context.new_page()
427
-
428
- # Navegar e executar busca
429
- page.goto("https://jurisprudencia.stf.jus.br/pages/search",
430
- wait_until='domcontentloaded',
431
- timeout=30000)
432
-
433
- # Executar busca via JavaScript
434
- api_result = page.evaluate("""
435
- async () => {
436
- try {
437
- const response = await fetch('https://jurisprudencia.stf.jus.br/api/search/search', {
438
- method: 'POST',
439
- headers: {'Content-Type': 'application/json'},
440
- body: JSON.stringify({
441
- query: {match_all: {}},
442
- size: 5
443
- })
444
- });
445
- return await response.json();
446
- } catch(e) {
447
- return {error: e.toString()};
448
- }
449
- }
450
- """)
451
-
452
- browser.close()
453
-
454
- if api_result and not api_result.get('error'):
455
- result["success"] = True
456
- result["method"] = "playwright_direct"
457
- result["data"] = api_result
458
- result["attempts"].append({
459
- "method": "playwright_direct",
460
- "success": True
461
- })
462
- return jsonify(result)
463
- else:
464
- result["attempts"].append({
465
- "method": "playwright_direct",
466
- "success": False,
467
- "error": api_result.get('error', 'Falha desconhecida')
468
- })
469
-
470
- except Exception as e:
471
- logger.error(f"Erro no Playwright direto: {str(e)}")
472
- result["attempts"].append({
473
- "method": "playwright_direct",
474
- "success": False,
475
- "error": str(e),
476
- "traceback": traceback.format_exc()
477
- })
478
-
479
- # Se todas as tentativas falharam
480
- result["error"] = "Todas as tentativas falharam"
481
- return jsonify(result), 500
482
-
483
- @app.route('/api/debug', methods=['GET'])
484
- def debug():
485
- """Endpoint de debug para verificar ambiente"""
486
- debug_info = {
487
- "cwd": os.getcwd(),
488
- "files": os.listdir('.'),
489
- "env": dict(os.environ),
490
- "python_path": sys.path,
491
- "modules": list(sys.modules.keys())[:20] # Primeiros 20 módulos
492
- }
493
- return jsonify(debug_info)
494
-
495
- @app.errorhandler(Exception)
496
- def handle_error(error):
497
- """Handler global de erros"""
498
- logger.error(f"Erro não tratado: {str(error)}")
499
- logger.error(traceback.format_exc())
500
  return jsonify({
501
- "error": str(error),
502
- "type": type(error).__name__,
503
- "traceback": traceback.format_exc()
504
- }), 500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
505
 
506
  if __name__ == '__main__':
507
- print("="*60)
508
- print("🚀 Iniciando aplicação STF Jurisprudência")
509
- print("="*60)
510
-
511
- # Informações do sistema
512
- print(f"Python version: {sys.version}")
513
- print(f"Current directory: {os.getcwd()}")
514
- print(f"Files in directory: {os.listdir('.')}")
515
-
516
- # Testar importações
517
  try:
518
- import flask
519
- print(f"✅ Flask version: {flask.__version__}")
520
- except ImportError as e:
521
- print(f"❌ Flask: {e}")
522
-
 
 
 
 
 
523
  try:
524
- import requests
525
- print(f"✅ Requests version: {requests.__version__}")
526
- except ImportError as e:
527
- print(f"❌ Requests: {e}")
528
-
529
- # Testar Playwright
530
- playwright_ok = test_playwright_installation()
531
-
532
- # Porta
533
  port = int(os.environ.get('PORT', 7860))
534
- print(f"📡 Porta: {port}")
535
- print("="*60)
536
-
537
- # Iniciar app
538
- app.run(host='0.0.0.0', port=port, debug=True) # debug=True para mais detalhes
 
7
  import requests
8
  import urllib3
9
  from flask import Flask, request, jsonify, render_template_string
10
+ from playwright.sync_api import sync_playwright
11
  import traceback
12
 
13
  # Suprimir warnings de SSL
14
  urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
15
 
16
+ # Configuração de logging
17
+ logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
 
 
 
18
  logger = logging.getLogger(__name__)
19
 
20
  app = Flask(__name__)
21
 
22
  # ============================================
23
+ # PAYLOAD COMPLETO (com True/False maiúsculos)
24
  # ============================================
25
  PAYLOAD_COMPLETO = {
26
  "query": {
 
34
  {"query_string": {
35
  "default_operator": "AND",
36
  "fields": [
37
+ "processo_codigo_completo.plural",
38
+ "acordao_ata.plural^3",
39
+ "documental_acordao_mesmo_sentido_lista_texto.plural",
40
+ "documental_doutrina_texto.plural",
41
+ "documental_indexacao_texto.plural",
42
+ "documental_jurisprudencia_citada_texto.plural",
43
+ "documental_legislacao_citada_texto.plural",
44
+ "documental_observacao_texto.plural",
45
+ "documental_publicacao_lista_texto.plural",
46
+ "documental_tese_tema_texto.plural^3",
47
+ "documental_tese_texto.plural^3",
48
  "ementa_texto.plural^3",
49
+ "ministro_facet.plural",
50
+ "revisor_processo_nome.plural",
51
+ "orgao_julgador.plural",
52
+ "partes_lista_texto.plural",
53
+ "procedencia_geografica_completo.plural",
54
+ "processo_classe_processual_unificada_extenso.plural",
55
+ "titulo.plural^6",
56
+ "colac_numero.plural",
57
+ "colac_pagina.plural",
58
+ "decisao_texto.plural^2",
59
+ "documental_decisao_mesmo_sentido_lista_texto.plural",
60
+ "processo_precedente_texto.plural",
61
+ "sumula_texto.plural^3",
62
+ "ramo_direito.plural^1",
63
+ "situacao_sumula.plural^1",
64
+ "materia_noticia.plural^1",
65
+ "titulo_noticia.plural^3",
66
+ "resumo_noticia.plural^3",
67
+ "conteudo_noticia.plural^1",
68
+ "ramo_noticia.plural^1"
69
  ],
70
  "query": "*",
71
  "type": "cross_fields",
 
77
  }
78
  },
79
  "_source": [
80
+ "base", "_id", "id", "dg_unique",
81
+ "titulo", "ministro_facet", "orgao_julgador",
82
+ "procedencia_geografica_completo",
83
+ "procedencia_geografica_pais_sigla",
84
+ "procedencia_geografica_uf_sigla",
85
+ "procedencia_geografica_uf_extenso",
86
+ "processo_codigo_completo",
87
+ "processo_classe_processual_unificada_extenso",
88
+ "processo_classe_processual_unificada_classe_sigla",
89
+ "processo_classe_processual_unificada_incidente_sigla",
90
+ "processo_classe_processual_unificada_sigla",
91
+ "processo_numero",
92
+ "processo_lista_texto",
93
+ "julgamento_data",
94
+ "publicacao_data",
95
+ "republicacao_data",
96
+ "periodo_inicio_data",
97
+ "periodo_fim_data",
98
+ "dg_atualizado_em",
99
+ "is_decisao_presidencia",
100
+ "relator_processo_nome",
101
+ "relator_decisao_nome",
102
+ "relator_acordao_nome",
103
+ "presidente_nome",
104
+ "revisor_processo_nome",
105
+ "ementa_texto",
106
+ "acordao_ata",
107
+ "decisao_texto",
108
+ "inteiro_teor_url",
109
+ "sumula_texto",
110
+ "partes_lista_texto",
111
+ "acompanhamento_processual_url",
112
+ "dje_url",
113
+ "informativo_url",
114
+ "pesquisa_url",
115
+ "audio_url",
116
+ "video_url",
117
+ "numero_noticias_url",
118
+ "aprovacao_url",
119
+ "documental_publicacao_lista_texto",
120
+ "documental_decisao_mesmo_sentido_lista_texto",
121
+ "documental_decisao_mesmo_sentido_lista_html",
122
+ "documental_decisao_mesmo_sentido_is_secundario",
123
+ "documental_legislacao_citada_texto",
124
+ "documental_jurisprudencia_citada_texto",
125
+ "documental_indexacao_texto",
126
+ "documental_observacao_texto",
127
+ "documental_observacao_html",
128
+ "documental_doutrina_texto",
129
+ "documental_acordao_mesmo_sentido_lista_texto",
130
+ "documental_acordao_mesmo_sentido_lista_html",
131
+ "documental_acordao_mesmo_sentido_is_secundario",
132
+ "documental_assunto_texto",
133
+ "documental_tese_tipo",
134
+ "documental_tese_texto",
135
+ "documental_tese_tema_texto",
136
+ "externo_seq_objeto_incidente",
137
+ "volume_informativo",
138
+ "ramo_noticia",
139
+ "materia_noticia",
140
+ "titulo_noticia",
141
+ "resumo_noticia",
142
+ "conteudo_noticia",
143
+ "numero_noticias_processo",
144
+ "is_covid",
145
+ "tipo_julgamento",
146
+ "julgamento_is_sessao_virtual",
147
+ "sumula_numero",
148
+ "is_vinculante",
149
+ "situacao_sumula",
150
+ "ramo_direito",
151
+ "processo_precedente_texto",
152
+ "processo_precedente_html",
153
+ "is_questao_ordem",
154
+ "is_repercussao_geral_admissibilidade",
155
+ "is_repercussao_geral_merito",
156
+ "is_repercussao_geral_recurso_interno",
157
+ "is_repercussao_geral",
158
+ "is_processo_antigo",
159
+ "is_colac",
160
+ "colac_numero",
161
+ "colac_pagina",
162
+ "old_seq_colac",
163
+ "old_seq_repercussao_geral",
164
+ "old_seq_sjur",
165
+ "ods_onu"
166
  ],
167
+ "aggs": {
168
+ "base_agg": {
169
+ "filters": {
170
+ "filters": {
171
+ "acordaos": {"match": {"base": "acordaos"}},
172
+ "sumulas": {"match": {"base": "sumulas"}},
173
+ "decisoes": {"match": {"base": "decisoes"}},
174
+ "informativos": {"match": {"base": "novo_informativo"}}
175
+ }
176
+ }
177
+ },
178
+ "orgao_julgador_agg": {
179
+ "aggs": {
180
+ "orgao_julgador_agg": {
181
+ "terms": {"field": "orgao_julgador.keyword", "size": 10}
182
+ }
183
+ }
184
+ },
185
+ "ministro_facet_agg": {
186
+ "aggs": {
187
+ "ministro_facet_agg": {
188
+ "terms": {"field": "ministro_facet.keyword", "size": 10}
189
+ }
190
+ }
191
+ },
192
+ "procedencia_geografica_uf_sigla_agg": {
193
+ "aggs": {
194
+ "procedencia_geografica_uf_sigla_agg": {
195
+ "terms": {"field": "procedencia_geografica_uf_sigla", "size": 10}
196
+ }
197
+ }
198
+ }
199
+ },
200
+ "highlight": {
201
+ "fields": {
202
+ "ementa_texto": {"matched_fields": ["ementa_texto.plural"], "type": "fvh", "fragment_size": 24000},
203
+ "sumula_texto": {"matched_fields": ["sumula_texto.plural"], "type": "fvh", "number_of_fragments": 0},
204
+ "materia_noticia": {"matched_fields": ["materia_noticia.plural"], "type": "fvh"},
205
+ "titulo_noticia": {"matched_fields": ["titulo_noticia.plural"], "type": "fvh"},
206
+ "resumo_noticia": {"matched_fields": ["resumo_noticia.plural"], "type": "fvh", "fragment_size": 5000},
207
+ "conteudo_noticia": {"matched_fields": ["conteudo_noticia.plural"], "type": "fvh", "fragment_size": 50000},
208
+ "acordao_ata": {"matched_fields": ["acordao_ata.plural"], "type": "fvh", "fragment_size": 600},
209
+ "decisao_texto": {"matched_fields": ["decisao_texto.plural"], "type": "fvh", "fragment_size": 1200},
210
+ "documental_tese_texto": {"matched_fields": ["documental_tese_texto.plural"], "type": "fvh", "fragment_size": 2000},
211
+ "documental_tese_tema_texto": {"matched_fields": ["documental_tese_tema_texto.plural"], "type": "fvh", "fragment_size": 2000},
212
+ "documental_observacao_texto": {"matched_fields": ["documental_observacao_texto.plural"], "type": "fvh"},
213
+ "documental_indexacao_texto": {"matched_fields": ["documental_indexacao_texto.plural"], "type": "fvh"},
214
+ "documental_legislacao_citada_texto": {"matched_fields": ["documental_legislacao_citada_texto.plural"], "type": "fvh"},
215
+ "documental_jurisprudencia_citada_texto": {"matched_fields": ["documental_jurisprudencia_citada_texto.plural"], "type": "fvh"},
216
+ "documental_doutrina_texto": {"matched_fields": ["documental_doutrina_texto.plural"], "type": "fvh"},
217
+ "partes_lista_texto": {"matched_fields": ["partes_lista_texto.plural"], "type": "fvh"},
218
+ "documental_publicacao_lista_texto": {"matched_fields": ["documental_publicacao_lista_texto.plural"], "type": "fvh"},
219
+ "documental_acordao_mesmo_sentido_lista_texto": {"matched_fields": ["documental_acordao_mesmo_sentido_lista_texto.plural"], "type": "fvh"},
220
+ "documental_decisao_mesmo_sentido_lista_texto": {"matched_fields": ["documental_decisao_mesmo_sentido_lista_texto.plural"], "type": "fvh"},
221
+ "processo_precedente_texto": {"matched_fields": ["processo_precedente_texto.plural"], "type": "fvh"},
222
+ "procedencia_geografica_completo": {"matched_fields": ["procedencia_geografica_completo.plural"], "type": "fvh"}
223
+ },
224
+ "pre_tags": ["<em>"],
225
+ "post_tags": ["</em>"],
226
+ "fragment_size": 300,
227
+ "number_of_fragments": 64,
228
+ "order": "score"
229
+ },
230
+ "size": 100,
231
  "from": 0,
232
  "sort": [{"julgamento_data": {"order": "desc"}}],
233
  "track_total_hits": True
234
  }
235
 
236
+ # Payload para busca por ID
237
+ PAYLOAD_POR_ID = {
238
+ "query": {"ids": {"values": []}},
239
+ "_source": PAYLOAD_COMPLETO["_source"],
240
+ "highlight": PAYLOAD_COMPLETO["highlight"],
241
+ "size": 1
242
+ }
243
+
244
  # Constantes da API
245
  URL_API = "https://jurisprudencia.stf.jus.br/api/search/search"
246
  HEADERS = {
247
  "Accept": "application/json, text/plain, */*",
248
  "Content-Type": "application/json",
249
+ "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36",
250
  "Referer": "https://jurisprudencia.stf.jus.br/pages/search",
251
  "Origin": "https://jurisprudencia.stf.jus.br"
252
  }
253
 
254
+ # Cache de token
255
+ token_cache = {"token": None, "expires_at": 0}
 
 
 
256
 
257
+ # ============================================
258
+ # HTML TEMPLATE COMPLETO (com abas, botões, etc.)
259
+ # ============================================
260
  HTML_TEMPLATE = """
261
  <!DOCTYPE html>
262
  <html>
263
  <head>
264
+ <title>⚖️ STF Jurisprudência - Visualizador Completo</title>
265
  <meta charset="utf-8">
266
+ <meta name="viewport" content="width=device-width, initial-scale=1">
267
  <style>
268
+ body { font-family: 'Segoe UI', Roboto, system-ui, sans-serif; max-width: 1600px; margin: 0 auto; padding: 20px; background: #f0f2f5; }
269
+ .container { background: white; border-radius: 12px; padding: 30px; box-shadow: 0 8px 20px rgba(0,0,0,0.1); }
270
+ h1 { color: #1a1a2e; border-bottom: 3px solid #4CAF50; padding-bottom: 15px; display: flex; align-items: center; gap: 10px; }
271
+ h1:before { content: "⚖️"; font-size: 1.2em; }
272
+ h2 { color: #2c3e50; margin-top: 25px; margin-bottom: 15px; font-size: 1.5em; border-left: 5px solid #4CAF50; padding-left: 15px; }
273
+ h3 { color: #34495e; margin-top: 20px; margin-bottom: 10px; font-size: 1.2em; border-bottom: 1px solid #e0e0e0; padding-bottom: 8px; }
274
+ .info-box { background: #e8f0fe; border-left: 5px solid #2196F3; padding: 15px 20px; margin: 20px 0; border-radius: 8px; }
275
+ .stats-grid { display: grid; grid-template-columns: repeat(auto-fit, minmax(200px, 1fr)); gap: 15px; margin: 25px 0; }
276
+ .stat-card { background: white; border: 1px solid #e0e0e0; border-radius: 10px; padding: 20px; text-align: center; box-shadow: 0 2px 8px rgba(0,0,0,0.05); }
277
+ .stat-value { font-size: 32px; font-weight: bold; color: #1a73e8; }
278
+ .stat-label { color: #5f6368; font-size: 14px; margin-top: 8px; text-transform: uppercase; letter-spacing: 0.5px; }
279
+ .button-group { display: flex; gap: 15px; flex-wrap: wrap; margin: 25px 0; }
280
+ button { background: #1a73e8; color: white; border: none; padding: 14px 28px; font-size: 16px; font-weight: 500; border-radius: 8px; cursor: pointer; transition: all 0.3s; display: inline-flex; align-items: center; gap: 10px; box-shadow: 0 2px 8px rgba(26,115,232,0.3); }
281
+ button:hover { background: #1557b0; transform: translateY(-2px); box-shadow: 0 4px 12px rgba(26,115,232,0.4); }
282
+ button:disabled { background: #a0a0a0; cursor: not-allowed; transform: none; box-shadow: none; }
283
+ button.secondary { background: #34a853; }
284
+ button.secondary:hover { background: #2d8745; }
285
+ button.download { background: #f9ab00; color: #1a1a2e; }
286
+ button.download:hover { background: #e69c00; }
287
+ .loading { display: inline-block; width: 20px; height: 20px; border: 3px solid rgba(255,255,255,0.3); border-top: 3px solid white; border-radius: 50%; animation: spin 1s linear infinite; margin-right: 10px; vertical-align: middle; }
288
+ @keyframes spin { 0% { transform: rotate(0deg); } 100% { transform: rotate(360deg); } }
289
+ pre { background: #f8f9fa; border: 1px solid #e0e0e0; border-radius: 10px; padding: 20px; overflow: auto; max-height: 800px; font-size: 13px; font-family: 'Consolas', 'Monaco', monospace; box-shadow: inset 0 2px 4px rgba(0,0,0,0.05); }
290
+ .success { color: #0f9d58; background: #e6f4ea; border-left: 5px solid #34a853; padding: 15px 20px; margin: 15px 0; border-radius: 8px; font-weight: 500; }
291
+ .error { color: #d93025; background: #fce8e6; border-left: 5px solid #ea4335; padding: 15px 20px; margin: 15px 0; border-radius: 8px; font-weight: 500; }
292
+ .warning { color: #e37400; background: #fef7e0; border-left: 5px solid #f9ab00; padding: 15px 20px; margin: 15px 0; border-radius: 8px; font-weight: 500; }
293
+ .token-box { background: #1a1a2e; color: #e0e0e0; padding: 15px; border-radius: 8px; font-family: 'Consolas', monospace; word-break: break-all; margin: 15px 0; border: 1px solid #2a2a3e; }
294
+ .token-label { color: #f9ab00; font-weight: bold; margin-bottom: 8px; display: block; }
295
+ .filters { background: #f8f9fa; border-radius: 8px; padding: 20px; margin: 20px 0; border: 1px solid #e0e0e0; }
296
+ .filter-group { display: grid; grid-template-columns: repeat(auto-fit, minmax(200px, 1fr)); gap: 15px; align-items: end; }
297
+ .filter-item { min-width: 200px; }
298
+ .filter-item label { display: block; margin-bottom: 8px; color: #5f6368; font-size: 14px; font-weight: 500; }
299
+ .filter-item input, .filter-item select { width: 100%; padding: 10px; border: 1px solid #ddd; border-radius: 6px; font-size: 14px; }
300
+ .filter-item input:focus, .filter-item select:focus { outline: none; border-color: #1a73e8; box-shadow: 0 0 0 2px rgba(26,115,232,0.2); }
301
+ .badge { display: inline-block; background: #e8f0fe; color: #1a73e8; padding: 4px 12px; border-radius: 20px; font-size: 13px; font-weight: 500; margin: 2px; }
302
+ .campo-lista { background: #f8f9fa; border: 1px solid #e9ecef; border-radius: 8px; padding: 15px; margin: 10px 0; }
303
+ .campo-nome { font-weight: bold; color: #2c3e50; font-size: 14px; text-transform: uppercase; letter-spacing: 0.5px; margin-bottom: 5px; }
304
+ .campo-valor { color: #1a1a2e; font-size: 14px; word-break: break-word; font-family: 'Consolas', monospace; background: white; padding: 8px; border-radius: 4px; border: 1px solid #e0e0e0; }
305
+ .campo-vazio { color: #999; font-style: italic; }
306
+ .texto-completo { max-height: 400px; overflow-y: auto; background: #f1f8fe; padding: 15px; border-radius: 8px; border: 1px solid #b8daf5; margin: 10px 0; white-space: pre-wrap; font-family: inherit; line-height: 1.5; }
307
+ .nav-tabs { display: flex; gap: 5px; margin: 20px 0; flex-wrap: wrap; border-bottom: 2px solid #e0e0e0; padding-bottom: 10px; }
308
+ .nav-tab { padding: 10px 20px; cursor: pointer; background: white; border: 1px solid #e0e0e0; border-radius: 8px 8px 0 0; margin-bottom: -2px; font-weight: 500; transition: all 0.2s; }
309
+ .nav-tab:hover { background: #f0f0f0; }
310
+ .nav-tab.active { background: #1a73e8; color: white; border-color: #1a73e8; }
311
+ .tab-content { display: none; padding: 20px; background: white; border: 1px solid #e0e0e0; border-top: none; border-radius: 0 0 8px 8px; }
312
+ .tab-content.active { display: block; }
313
+ .campo-card { background: #f8f9fa; border: 1px solid #e9ecef; border-radius: 8px; padding: 15px; margin: 10px 0; transition: all 0.2s; }
314
+ .campo-card:hover { box-shadow: 0 4px 12px rgba(0,0,0,0.1); border-color: #1a73e8; }
315
+ .flag-true { color: #0f9d58; font-weight: bold; }
316
+ .flag-false { color: #d93025; }
317
+ .url-link { color: #1a73e8; text-decoration: none; word-break: break-all; }
318
+ .url-link:hover { text-decoration: underline; }
319
+ .highlight { background-color: #f9ab00; color: #1a1a2e; font-weight: bold; padding: 2px 4px; border-radius: 4px; }
320
+ .highlight-box { background: #fef7e0; border: 1px solid #f9ab00; padding: 10px; border-radius: 8px; margin: 10px 0; }
321
+ .highlight-title { font-weight: bold; color: #e37400; margin-bottom: 5px; }
322
  </style>
323
  </head>
324
  <body>
325
  <div class="container">
326
+ <h1>STF Jurisprudência - Visualizador Completo</h1>
327
+
328
+ <div class="info-box">
329
+ <strong>📌 API de Jurisprudência do STF - Todos os Campos com Highlight</strong><br>
330
+ • Total de campos disponíveis: <span id="totalCampos">102</span><br>
331
+ • Campos com highlight: 22 campos configurados<br>
332
+ • Documentos disponíveis: <span id="totalDocs">carregando...</span>
333
+ </div>
334
+
335
+ <div class="stats-grid">
336
+ <div class="stat-card"><div class="stat-value" id="requestsCount">0</div><div class="stat-label">Requisições</div></div>
337
+ <div class="stat-card"><div class="stat-value" id="successCount">0</div><div class="stat-label">Sucessos</div></div>
338
+ <div class="stat-card"><div class="stat-value" id="failCount">0</div><div class="stat-label">Falhas</div></div>
339
+ <div class="stat-card"><div class="stat-value" id="docsCount">0</div><div class="stat-label">Docs Obtidos</div></div>
340
+ </div>
341
+
342
+ <div class="filters">
343
+ <h3 style="margin-top: 0;">🔍 Filtros de Busca</h3>
344
+ <div class="filter-group">
345
+ <div class="filter-item">
346
+ <label>Quantidade</label>
347
+ <select id="pageSize">
348
+ <option value="5">5 resultados</option>
349
+ <option value="10">10 resultados</option>
350
+ <option value="25">25 resultados</option>
351
+ <option value="50">50 resultados</option>
352
+ <option value="100" selected>100 resultados</option>
353
+ </select>
354
+ </div>
355
+ <div class="filter-item">
356
+ <label>Ordenar por</label>
357
+ <select id="sortOrder">
358
+ <option value="desc">Mais recentes</option>
359
+ <option value="asc">Mais antigos</option>
360
+ </select>
361
+ </div>
362
+ <div class="filter-item">
363
+ <label>Base</label>
364
+ <select id="base">
365
+ <option value="acordaos">Acórdãos</option>
366
+ <option value="decisoes">Decisões</option>
367
+ <option value="sumulas">Súmulas</option>
368
+ <option value="informativos">Informativos</option>
369
+ <option value="">Todas</option>
370
+ </select>
371
+ </div>
372
+ <div class="filter-item">
373
+ <label>Busca por ID</label>
374
+ <input type="text" id="docId" placeholder="Ex: sjur505215">
375
+ </div>
376
+ </div>
377
+ </div>
378
+
379
+ <div class="button-group">
380
+ <button id="testBtn" onclick="runSearch()">
381
+ <span class="loading" id="loading" style="display: none;"></span>
382
+ <span>🔍 Buscar Documentos</span>
383
+ </button>
384
+ <button id="getByIdBtn" class="secondary" onclick="getDocumentById()">
385
+ <span>📄 Buscar por ID</span>
386
+ </button>
387
+ <button id="downloadBtn" class="download" onclick="downloadJSON()" disabled>
388
+ 📥 Download JSON
389
+ </button>
390
+ <button id="copyBtn" class="secondary" onclick="copyToken()">
391
+ 🔑 Copiar Token
392
+ </button>
393
+ </div>
394
+
395
+ <div id="tokenDisplay" style="display: none;" class="token-box">
396
+ <span class="token-label">🔐 Token AWS WAF</span>
397
+ <span id="tokenValue"></span>
398
+ </div>
399
+
400
+ <div id="result" style="margin-top: 25px;"></div>
401
  </div>
402
+
403
  <script>
404
+ let lastResult = null;
405
+ let lastToken = null;
406
+ let requestsCount = 0, successCount = 0, failCount = 0, docsCount = 0;
407
+
408
+ function renderCampo(nome, valor, tipo = 'normal') {
409
+ if (valor === null || valor === undefined || valor === '') {
410
+ return `<div class="campo-card"><div class="campo-nome">${nome}</div><div class="campo-vazio">(vazio)</div></div>`;
411
+ }
412
+ if (tipo === 'url' && valor) {
413
+ return `<div class="campo-card"><div class="campo-nome">${nome}</div><div class="campo-valor"><a href="${valor}" target="_blank" class="url-link">${valor}</a></div></div>`;
414
+ }
415
+ if (tipo === 'flag') {
416
+ const flagClass = valor ? 'flag-true' : 'flag-false';
417
+ return `<div class="campo-card"><div class="campo-nome">${nome}</div><div class="campo-valor ${flagClass}">${valor ? '✓ Sim' : '✗ Não'}</div></div>`;
418
+ }
419
+ if (tipo === 'texto' && valor && valor.length > 200) {
420
+ return `<div class="campo-card"><div class="campo-nome">${nome}</div><div class="texto-completo">${valor.replace(/\\n/g, '<br>').replace(/<em>/g, '<span class="highlight">').replace(/<\\/em>/g, '</span>')}</div></div>`;
421
+ }
422
+ if (Array.isArray(valor)) {
423
+ return `<div class="campo-card"><div class="campo-nome">${nome}</div><div class="campo-valor">${valor.join('<br>')}</div></div>`;
424
+ }
425
+ return `<div class="campo-card"><div class="campo-nome">${nome}</div><div class="campo-valor">${String(valor).replace(/\\n/g, '<br>').replace(/<em>/g, '<span class="highlight">').replace(/<\\/em>/g, '</span>')}</div></div>`;
426
+ }
427
+
428
+ function renderHighlight(highlight) {
429
+ if (!highlight) return '';
430
+ let html = '<div class="highlight-box"><div class="highlight-title">🔆 Termos destacados (highlight):</div>';
431
+ for (const [campo, valores] of Object.entries(highlight)) {
432
+ if (valores && valores.length > 0) {
433
+ html += `<div><strong>${campo}:</strong> `;
434
+ valores.forEach(valor => {
435
+ html += `<div style="margin-left: 20px; margin-top: 5px;">${valor.replace(/<em>/g, '<span class="highlight">').replace(/<\\/em>/g, '</span>')}</div>`;
436
+ });
437
+ html += '</div>';
438
+ }
439
+ }
440
+ html += '</div>';
441
+ return html;
442
+ }
443
+
444
+ function displayResult(data) {
445
+ if (!data || !data.result || !data.result.hits || !data.result.hits.hits || data.result.hits.hits.length === 0) {
446
+ document.getElementById('result').innerHTML = '<div class="warning">⚠️ Nenhum documento encontrado</div>';
447
+ return;
448
+ }
449
+
450
+ const hits = data.result.hits.hits;
451
+ const total = data.result.hits.total?.value || 0;
452
+
453
+ let html = `<div class="success">✅ Encontrados ${total.toLocaleString()} documentos. Exibindo ${hits.length} resultados.</div>`;
454
+
455
+ hits.forEach((hit, index) => {
456
+ const source = hit._source || {};
457
+ const highlight = hit.highlight || {};
458
+ const docId = source.id || hit._id || `doc_${index}`;
459
+
460
+ html += `<div class="campo-lista" style="margin-top: 30px;">`;
461
+ html += `<h3 style="display: flex; justify-content: space-between; align-items: center;">
462
+ <span>📄 Documento ${index + 1}: ${source.titulo || source.processo_codigo_completo || docId}</span>
463
+ <span class="badge">ID: ${docId}</span>
464
+ </h3>`;
465
+
466
+ // Abas internas para cada documento
467
+ html += `<div style="display: flex; gap: 5px; margin: 15px 0; flex-wrap: wrap;">`;
468
+ html += `<button class="badge" style="cursor: pointer;" onclick="showDocTab('geral-${index}')">📋 Geral</button>`;
469
+ html += `<button class="badge" style="cursor: pointer;" onclick="showDocTab('ementa-${index}')">📝 Ementa</button>`;
470
+ html += `<button class="badge" style="cursor: pointer;" onclick="showDocTab('acordao-${index}')">⚖️ Acórdão</button>`;
471
+ html += `<button class="badge" style="cursor: pointer;" onclick="showDocTab('legislacao-${index}')">📚 Legislação</button>`;
472
+ html += `<button class="badge" style="cursor: pointer;" onclick="showDocTab('highlight-${index}')">🔆 Highlight</button>`;
473
+ html += `<button class="badge" style="cursor: pointer;" onclick="showDocTab('completo-${index}')">📋 Todos Campos</button>`;
474
+ html += `</div>`;
475
+
476
+ // Aba: Geral
477
+ html += `<div id="doc-tab-geral-${index}" class="doc-tab" style="display: block;">`;
478
+ html += renderCampo('Processo', source.processo_codigo_completo);
479
+ html += renderCampo('Classe', source.processo_classe_processual_unificada_extenso);
480
+ html += renderCampo('Órgão Julgador', source.orgao_julgador);
481
+ html += renderCampo('Relator', source.relator_processo_nome);
482
+ html += renderCampo('Relator Acórdão', source.relator_acordao_nome);
483
+ html += renderCampo('Ministros', source.ministro_facet);
484
+ html += renderCampo('Data Julgamento', source.julgamento_data);
485
+ html += renderCampo('Data Publicação', source.publicacao_data);
486
+ html += renderCampo('Procedência', source.procedencia_geografica_completo);
487
+ html += renderCampo('UF', source.procedencia_geografica_uf_sigla);
488
+ html += `</div>`;
489
+
490
+ // Aba: Ementa
491
+ html += `<div id="doc-tab-ementa-${index}" class="doc-tab" style="display: none;">`;
492
+ html += renderCampo('Ementa', source.ementa_texto, 'texto');
493
+ html += renderCampo('Tese', source.documental_tese_texto, 'texto');
494
+ html += renderCampo('Tema', source.documental_tese_tema_texto);
495
+ html += `</div>`;
496
+
497
+ // Aba: Acórdão
498
+ html += `<div id="doc-tab-acordao-${index}" class="doc-tab" style="display: none;">`;
499
+ html += renderCampo('Acórdão/Ata', source.acordao_ata, 'texto');
500
+ html += renderCampo('Decisão', source.decisao_texto, 'texto');
501
+ html += renderCampo('Súmula', source.sumula_texto, 'texto');
502
+ html += renderCampo('Indexação', source.documental_indexacao_texto, 'texto');
503
+ html += renderCampo('Observações', source.documental_observacao_texto, 'texto');
504
+ html += `</div>`;
505
+
506
+ // Aba: Legislação
507
+ html += `<div id="doc-tab-legislacao-${index}" class="doc-tab" style="display: none;">`;
508
+ html += renderCampo('Legislação Citada', source.documental_legislacao_citada_texto, 'texto');
509
+ html += renderCampo('Jurisprudência Citada', source.documental_jurisprudencia_citada_texto, 'texto');
510
+ html += renderCampo('Doutrina', source.documental_doutrina_texto, 'texto');
511
+ html += renderCampo('Precedentes', source.processo_precedente_texto, 'texto');
512
+ html += `</div>`;
513
+
514
+ // Aba: Highlight
515
+ html += `<div id="doc-tab-highlight-${index}" class="doc-tab" style="display: none;">`;
516
+ if (Object.keys(highlight).length > 0) {
517
+ for (const [campo, valores] of Object.entries(highlight)) {
518
+ if (valores && valores.length > 0) {
519
+ html += `<div class="campo-card">`;
520
+ html += `<div class="campo-nome">${campo}</div>`;
521
+ valores.forEach(valor => {
522
+ html += `<div class="highlight-box" style="margin-top: 5px;">${valor.replace(/<em>/g, '<span class="highlight">').replace(/<\\/em>/g, '</span>')}</div>`;
523
+ });
524
+ html += `</div>`;
525
+ }
526
+ }
527
+ } else {
528
+ html += `<div class="campo-vazio">Nenhum termo destacado</div>`;
529
+ }
530
+ html += `</div>`;
531
+
532
+ // Aba: Todos Campos
533
+ html += `<div id="doc-tab-completo-${index}" class="doc-tab" style="display: none;">`;
534
+ html += `<pre>${JSON.stringify(source, null, 2).replace(/<em>/g, '<span class="highlight">').replace(/<\\/em>/g, '</span>')}</pre>`;
535
+ html += `</div>`;
536
+
537
+ // URLs importantes
538
+ if (source.inteiro_teor_url) {
539
+ html += `<div style="margin-top: 15px; padding: 10px; background: #e8f0fe; border-radius: 8px;">`;
540
+ html += `<strong>🔗 Links:</strong><br>`;
541
+ html += `<a href="${source.inteiro_teor_url}" target="_blank" class="url-link">📄 Inteiro Teor</a><br>`;
542
+ if (source.acompanhamento_processual_url) {
543
+ html += `<a href="${source.acompanhamento_processual_url}" target="_blank" class="url-link">📊 Acompanhamento Processual</a><br>`;
544
+ }
545
+ if (source.dje_url) {
546
+ html += `<a href="${source.dje_url}" target="_blank" class="url-link">📰 DJE</a>`;
547
+ }
548
+ html += `</div>`;
549
+ }
550
+
551
+ html += `</div>`; // Fecha campo-lista
552
+ });
553
+
554
+ document.getElementById('result').innerHTML = html;
555
+ }
556
+
557
+ function showDocTab(tabId) {
558
+ const docIndex = tabId.split('-')[2];
559
+ document.querySelectorAll(`[id^="doc-tab-"]`).forEach(el => {
560
+ if (el.id.includes(`-${docIndex}`)) {
561
+ el.style.display = 'none';
562
+ }
563
+ });
564
+ const selectedTab = document.getElementById(`doc-tab-${tabId}`);
565
+ if (selectedTab) {
566
+ selectedTab.style.display = 'block';
567
+ }
568
+ }
569
+
570
+ async function runSearch() {
571
+ const btn = document.getElementById('testBtn');
572
+ const loading = document.getElementById('loading');
573
+ const resultDiv = document.getElementById('result');
574
+ const pageSize = document.getElementById('pageSize').value;
575
+ const sortOrder = document.getElementById('sortOrder').value;
576
+ const base = document.getElementById('base').value;
577
+
578
+ btn.disabled = true;
579
+ loading.style.display = 'inline-block';
580
+ resultDiv.innerHTML = '<div class="info-box">⏳ Executando busca no STF...</div>';
581
+
582
  try {
583
+ const response = await fetch('/api/search-advanced', {
584
+ method: 'POST',
585
+ headers: { 'Content-Type': 'application/json' },
586
+ body: JSON.stringify({
587
+ pageSize: parseInt(pageSize),
588
+ sortOrder,
589
+ base: base || undefined
590
+ })
591
+ });
592
+
593
  const data = await response.json();
594
+ requestsCount++;
595
+ document.getElementById('requestsCount').textContent = requestsCount;
596
+
597
+ if (data.success) {
598
+ successCount++;
599
+ document.getElementById('successCount').textContent = successCount;
600
+
601
+ if (data.data && data.data.result && data.data.result.hits) {
602
+ docsCount = data.data.result.hits.hits.length;
603
+ document.getElementById('docsCount').textContent = docsCount;
604
+
605
+ if (data.data.result.hits.total && data.data.result.hits.total.value) {
606
+ document.getElementById('totalDocs').textContent = data.data.result.hits.total.value.toLocaleString();
607
+ }
608
+ }
609
+
610
+ lastResult = data.data;
611
+ lastToken = data.token;
612
+
613
+ if (lastToken) {
614
+ document.getElementById('tokenValue').textContent = lastToken;
615
+ document.getElementById('tokenDisplay').style.display = 'block';
616
+ document.getElementById('downloadBtn').disabled = false;
617
+ }
618
+
619
+ displayResult(data.data);
620
+
621
+ } else {
622
+ failCount++;
623
+ document.getElementById('failCount').textContent = failCount;
624
+
625
+ let errorHtml = '<div class="error">❌ Falha na busca</div>';
626
+ if (data.attempts) {
627
+ data.attempts.forEach(attempt => {
628
+ errorHtml += `<div class="warning"><strong>${attempt.method}:</strong> ${attempt.error || 'Erro'}</div>`;
629
+ });
630
+ }
631
+ errorHtml += '<pre>' + JSON.stringify(data, null, 2) + '</pre>';
632
+ resultDiv.innerHTML = errorHtml;
633
+ }
634
+ } catch (error) {
635
+ failCount++;
636
+ document.getElementById('failCount').textContent = failCount;
637
+ resultDiv.innerHTML = '<div class="error">❌ Erro: ' + error.message + '</div>';
638
+ } finally {
639
+ btn.disabled = false;
640
+ loading.style.display = 'none';
641
  }
642
  }
643
+
644
+ async function getDocumentById() {
645
+ const docId = document.getElementById('docId').value.trim();
646
+ if (!docId) {
647
+ alert('Digite um ID');
648
+ return;
649
+ }
650
+
651
+ const btn = document.getElementById('getByIdBtn');
652
+ const loading = document.getElementById('loading');
653
+ const resultDiv = document.getElementById('result');
654
+
655
+ btn.disabled = true;
656
+ loading.style.display = 'inline-block';
657
+ resultDiv.innerHTML = '<div class="info-box">⏳ Buscando documento...</div>';
658
+
659
  try {
660
+ const response = await fetch(`/api/document/${docId}`);
661
  const data = await response.json();
662
+
663
+ requestsCount++;
664
+ document.getElementById('requestsCount').textContent = requestsCount;
665
+
666
+ if (data.success && data.document) {
667
+ successCount++;
668
+ document.getElementById('successCount').textContent = successCount;
669
+ docsCount++;
670
+ document.getElementById('docsCount').textContent = docsCount;
671
+
672
+ lastResult = { result: { hits: { hits: [data.document] } } };
673
+
674
+ displayResult(lastResult);
675
+ } else {
676
+ failCount++;
677
+ document.getElementById('failCount').textContent = failCount;
678
+ resultDiv.innerHTML = '<div class="error">❌ Documento não encontrado</div>';
679
+ }
680
+ } catch (error) {
681
+ failCount++;
682
+ document.getElementById('failCount').textContent = failCount;
683
+ resultDiv.innerHTML = '<div class="error">❌ Erro: ' + error.message + '</div>';
684
+ } finally {
685
+ btn.disabled = false;
686
+ loading.style.display = 'none';
687
  }
688
  }
689
+
690
+ function downloadJSON() {
691
+ if (!lastResult) {
692
+ alert('Nenhum resultado para download');
693
+ return;
694
+ }
695
+
696
+ const dataStr = JSON.stringify(lastResult, null, 2);
697
+ const blob = new Blob([dataStr], { type: 'application/json' });
698
+ const url = URL.createObjectURL(blob);
699
+ const a = document.createElement('a');
700
+ a.href = url;
701
+ a.download = `stf_jurisprudencia_${new Date().toISOString().slice(0,10)}.json`;
702
+ document.body.appendChild(a);
703
+ a.click();
704
+ document.body.removeChild(a);
705
+ URL.revokeObjectURL(url);
706
+ }
707
+
708
+ function copyToken() {
709
+ if (!lastToken) {
710
+ alert('Nenhum token disponível. Execute uma busca primeiro.');
711
+ return;
712
+ }
713
+
714
+ navigator.clipboard.writeText(lastToken).then(() => {
715
+ alert('✅ Token copiado para área de transferência!');
716
+ }).catch(() => {
717
+ alert('❌ Erro ao copiar token');
718
+ });
719
+ }
720
+
721
+ window.onload = async () => {
722
+ try {
723
+ const response = await fetch('/api/health');
724
+ const data = await response.json();
725
+ if (data.total_docs) {
726
+ document.getElementById('totalDocs').textContent = data.total_docs.toLocaleString();
727
+ }
728
+ document.getElementById('totalCampos').textContent = "102";
729
+ } catch (e) {
730
+ document.getElementById('totalDocs').textContent = 'indisponível';
731
+ }
732
+ };
733
  </script>
734
  </body>
735
  </html>
736
  """
737
 
738
  # ============================================
739
+ # Funções auxiliares
740
  # ============================================
 
741
  def get_fresh_token():
 
742
  global token_cache
 
 
 
 
743
  if token_cache["token"] and time.time() < token_cache["expires_at"]:
744
  logger.info("Usando token em cache")
745
  return token_cache["token"]
746
 
747
  logger.info("Obtendo novo token via Playwright")
 
 
 
 
748
  try:
749
+ with sync_playwright() as p:
750
+ browser = p.chromium.launch(headless=True, args=['--no-sandbox'])
751
+ context = browser.new_context(
752
+ viewport={'width': 1920, 'height': 1080},
753
+ user_agent='Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'
754
+ )
755
+ page = context.new_page()
756
+ page.goto("https://jurisprudencia.stf.jus.br/pages/search", wait_until='domcontentloaded', timeout=30000)
757
+ page.wait_for_timeout(3000)
758
+ cookies = context.cookies()
759
+ token = None
760
+ for cookie in cookies:
761
+ if cookie.get('name') == 'aws-waf-token':
762
+ token = cookie.get('value')
763
+ break
764
+ browser.close()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
765
  if token:
766
+ token_cache["token"] = token
767
+ token_cache["expires_at"] = time.time() + 3300
768
+ logger.info(f"Token obtido: {token[:30]}...")
769
+ return token
770
+ else:
771
+ logger.warning("Token não encontrado nos cookies")
772
+ return None
773
  except Exception as e:
774
  logger.error(f"Erro ao obter token: {str(e)}")
 
775
  return None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
776
 
777
  def search_with_token(token, custom_payload=None):
 
 
 
778
  if not token:
779
  return {"success": False, "error": "Token não fornecido"}
 
780
  payload = custom_payload or PAYLOAD_COMPLETO
 
781
  headers = HEADERS.copy()
782
  headers['Cookie'] = f'aws-waf-token={token}'
 
783
  try:
784
+ response = requests.post(URL_API, headers=headers, json=payload, verify=False, timeout=30)
 
 
 
 
 
 
 
 
 
 
 
785
  if response.status_code == 200:
786
+ return {"success": True, "data": response.json()}
787
+ elif response.status_code == 403:
788
+ token_cache["token"] = None
789
+ return {"success": False, "error": "Token expirado", "status": 403}
 
790
  else:
791
+ return {"success": False, "error": f"HTTP {response.status_code}", "text": response.text[:500]}
 
 
 
 
 
 
 
 
 
792
  except Exception as e:
793
+ return {"success": False, "error": str(e)}
794
 
795
+ def get_document_by_id(token, doc_id):
796
+ payload = PAYLOAD_POR_ID.copy()
797
+ payload["query"]["ids"]["values"] = [doc_id]
798
+ return search_with_token(token, payload)
799
+
800
+ def test_with_playwright_full(payload):
801
+ logger.info("Tentando acesso com Playwright...")
802
  try:
 
 
803
  with sync_playwright() as p:
804
+ browser = p.chromium.launch(headless=True, args=['--no-sandbox'])
805
+ context = browser.new_context(
806
+ viewport={'width': 1920, 'height': 1080},
807
+ user_agent='Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'
808
+ )
809
+ page = context.new_page()
810
+ page.goto("https://jurisprudencia.stf.jus.br/pages/search", wait_until='domcontentloaded', timeout=30000)
811
+ page.wait_for_timeout(3000)
812
+ cookies = context.cookies()
813
+ token = None
814
+ for cookie in cookies:
815
+ if cookie.get('name') == 'aws-waf-token':
816
+ token = cookie.get('value')
817
+ break
818
+ api_result = page.evaluate("""
819
+ async (payload) => {
820
+ try {
821
+ const response = await fetch('https://jurisprudencia.stf.jus.br/api/search/search', {
822
+ method: 'POST',
823
+ headers: {'Content-Type': 'application/json', 'Accept': 'application/json'},
824
+ body: JSON.stringify(payload)
825
+ });
826
+ if (response.ok) {
827
+ return { success: true, data: await response.json() };
828
+ } else {
829
+ return { success: false, status: response.status };
830
+ }
831
+ } catch (error) {
832
+ return { success: false, error: error.toString() };
833
+ }
834
+ }
835
+ """, payload)
836
  browser.close()
837
+ if api_result.get('success'):
838
+ if token:
839
+ token_cache["token"] = token
840
+ token_cache["expires_at"] = time.time() + 3300
841
+ return {"success": True, "data": api_result.get('data'), "token": token}
842
+ else:
843
+ return {"success": False, "error": api_result.get('error', 'Falha desconhecida'), "token": token}
844
  except Exception as e:
845
+ logger.error(f"Erro no Playwright: {str(e)}")
846
+ return {"success": False, "error": str(e)}
 
847
 
848
  # ============================================
849
+ # Rotas
850
  # ============================================
 
851
  @app.route('/')
852
  def index():
 
853
  try:
854
  return render_template_string(HTML_TEMPLATE)
855
  except Exception as e:
856
+ return f"Erro no template: {str(e)}<br><pre>{traceback.format_exc()}</pre>", 500
 
857
 
858
+ @app.route('/api/search-advanced', methods=['POST'])
859
+ def search_advanced():
860
+ data = request.json or {}
861
+ page_size = data.get('pageSize', 100)
862
+ sort_order = data.get('sortOrder', 'desc')
863
+ base = data.get('base')
864
 
865
+ payload = PAYLOAD_COMPLETO.copy()
866
+ payload["size"] = min(page_size, 250)
867
+ payload["sort"] = [{"julgamento_data": {"order": sort_order}}]
 
 
 
868
 
869
+ if base:
870
+ if "post_filter" not in payload:
871
+ payload["post_filter"] = {"bool": {"must": []}}
872
+ payload["post_filter"]["bool"]["must"] = [{"term": {"base": base}}]
873
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
874
  token = get_fresh_token()
875
+ if not token:
876
+ res = test_with_playwright_full(payload)
877
+ if res.get('success'):
878
+ return jsonify({"success": True, "token": res.get('token'), "data": res['data'], "timestamp": time.time()})
879
+ else:
880
+ return jsonify({"success": False, "error": res.get('error')}), 500
 
 
 
 
 
 
 
881
 
882
+ result = search_with_token(token, payload)
883
+ if result.get("success"):
884
+ return jsonify({"success": True, "token": token, "data": result["data"], "timestamp": time.time()})
885
+ else:
886
+ if result.get("status") == 403:
887
+ token_cache["token"] = None
888
+ token = get_fresh_token()
889
+ if token:
890
+ result = search_with_token(token, payload)
891
+ if result.get("success"):
892
+ return jsonify({"success": True, "token": token, "data": result["data"], "timestamp": time.time()})
893
+ # fallback
894
+ res = test_with_playwright_full(payload)
895
+ if res.get('success'):
896
+ return jsonify({"success": True, "token": res.get('token'), "data": res['data'], "timestamp": time.time()})
897
+ else:
898
+ return jsonify({"success": False, "error": result.get('error')}), 500
899
+
900
+ @app.route('/api/document/<doc_id>', methods=['GET'])
901
+ def get_document(doc_id):
902
+ token = get_fresh_token()
903
+ if not token:
904
+ return jsonify({"error": "Não foi possível obter token"}), 500
905
+ result = get_document_by_id(token, doc_id)
906
+ if result.get("success") and result["data"].get("hits", {}).get("hits"):
907
+ doc = result["data"]["hits"]["hits"][0] if result["data"]["hits"]["hits"] else None
908
+ return jsonify({"success": True, "document": doc})
909
+ else:
910
+ return jsonify({"success": False, "error": result.get("error", "Documento não encontrado")}), 404
911
 
912
  @app.route('/api/test-bypass', methods=['POST'])
913
  def test_bypass():
 
 
 
 
 
 
 
 
 
 
 
914
  token = get_fresh_token()
 
915
  if token:
916
+ res = search_with_token(token)
917
+ if res.get('success'):
918
+ return jsonify({"success": True, "method": "token", "token": token, "data": res['data']})
919
+ res = test_with_playwright_full(PAYLOAD_COMPLETO)
920
+ if res.get('success'):
921
+ return jsonify({"success": True, "method": "playwright", "token": res.get('token'), "data": res['data']})
922
+ return jsonify({"success": False, "error": res.get('error', 'Falha')}), 500
923
+
924
+ @app.route('/api/health', methods=['GET'])
925
+ def health():
926
+ playwright_status = False
 
 
 
 
 
 
 
 
 
 
 
927
  try:
 
 
928
  with sync_playwright() as p:
929
+ p.chromium.launch(headless=True).close()
930
+ playwright_status = True
931
+ except:
932
+ pass
933
+ total_docs = None
934
+ token = get_fresh_token()
935
+ if token:
936
+ try:
937
+ res = search_with_token(token, {"size": 0, "track_total_hits": True})
938
+ if res.get("success") and res["data"].get("hits", {}).get("total"):
939
+ total_docs = res["data"]["hits"]["total"]["value"]
940
+ except:
941
+ pass
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
942
  return jsonify({
943
+ "status": "healthy",
944
+ "timestamp": time.time(),
945
+ "playwright_ready": playwright_status,
946
+ "token_cached": bool(token_cache["token"]),
947
+ "token_expires_in": max(0, token_cache["expires_at"] - time.time()) if token_cache["expires_at"] else 0,
948
+ "total_docs": total_docs,
949
+ "python_version": sys.version
950
+ })
951
+
952
+ @app.route('/api/token', methods=['GET'])
953
+ def get_token():
954
+ token = get_fresh_token()
955
+ if token:
956
+ return jsonify({"success": True, "token": token, "expires_in": max(0, token_cache["expires_at"] - time.time())})
957
+ else:
958
+ return jsonify({"success": False, "error": "Não foi possível obter token"}), 500
959
+
960
+ @app.route('/api/clear-cache', methods=['POST'])
961
+ def clear_cache():
962
+ global token_cache
963
+ token_cache = {"token": None, "expires_at": 0}
964
+ return jsonify({"success": True, "message": "Cache limpo"})
965
 
966
  if __name__ == '__main__':
 
 
 
 
 
 
 
 
 
 
967
  try:
968
+ import certifi
969
+ os.environ['SSL_CERT_FILE'] = certifi.where()
970
+ os.environ['REQUESTS_CA_BUNDLE'] = certifi.where()
971
+ except:
972
+ pass
973
+ logger.info("="*50)
974
+ logger.info("🚀 Iniciando aplicação STF Jurisprudência")
975
+ logger.info(f"📋 Campos _source: {len(PAYLOAD_COMPLETO['_source'])}")
976
+ logger.info(f"🔆 Campos highlight: {len(PAYLOAD_COMPLETO['highlight']['fields'])}")
977
+ logger.info("="*50)
978
  try:
979
+ with sync_playwright() as p:
980
+ p.chromium.launch(headless=True).close()
981
+ logger.info("✅ Playwright pronto para uso")
982
+ except Exception as e:
983
+ logger.warning(f"⚠️ Playwright pode não estar configurado: {e}")
 
 
 
 
984
  port = int(os.environ.get('PORT', 7860))
985
+ app.run(host='0.0.0.0', port=port, debug=False)