caarleexx commited on
Commit
71039a1
·
verified ·
1 Parent(s): 6ec9cf3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +851 -600
app.py CHANGED
@@ -1,726 +1,977 @@
 
1
  import os
2
  import sys
3
  import json
4
  import time
5
- import copy
6
  import logging
7
  import requests
8
  import urllib3
9
- from flask import Flask, request, jsonify
10
  from playwright.sync_api import sync_playwright
 
11
 
 
12
  urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
 
 
13
  logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
14
  logger = logging.getLogger(__name__)
15
- app = Flask(__name__)
16
 
17
- # ============================================
18
- # CONSTANTES
19
- # ============================================
20
- URL_API = "https://jurisprudencia.stf.jus.br/api/search/search"
21
- HEADERS_BASE = {
22
- "Accept": "application/json, text/plain, */*",
23
- "Content-Type": "application/json",
24
- "User-Agent": (
25
- "Mozilla/5.0 (Linux; Android 10; Pixel 4a Build/QD4A.200805.003; wv) "
26
- "AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/129.0.6668.71 "
27
- "Mobile Safari/537.36"
28
- ),
29
- "Referer": "https://jurisprudencia.stf.jus.br/pages/search",
30
- "Origin": "https://jurisprudencia.stf.jus.br",
31
- }
32
- token_cache = {"token": None, "expires_at": 0}
33
 
34
  # ============================================
35
- # CAMPOS REUTILIZÁVEIS
36
  # ============================================
37
- _FIELDS_FULL = [
38
- "processo_codigo_completo.plural",
39
- "acordao_ata.plural^3",
40
- "documental_acordao_mesmo_sentido_lista_texto.plural",
41
- "documental_doutrina_texto.plural",
42
- "documental_indexacao_texto.plural",
43
- "documental_jurisprudencia_citada_texto.plural",
44
- "documental_legislacao_citada_texto.plural",
45
- "documental_observacao_texto.plural",
46
- "documental_publicacao_lista_texto.plural",
47
- "documental_tese_tema_texto.plural^3",
48
- "documental_tese_texto.plural^3",
49
- "ementa_texto.plural^3",
50
- "ministro_facet.plural",
51
- "revisor_processo_nome.plural",
52
- "orgao_julgador.plural",
53
- "partes_lista_texto.plural",
54
- "procedencia_geografica_completo.plural",
55
- "processo_classe_processual_unificada_extenso.plural",
56
- "titulo.plural^6",
57
- "colac_numero.plural",
58
- "colac_pagina.plural",
59
- "decisao_texto.plural^2",
60
- "documental_decisao_mesmo_sentido_lista_texto.plural",
61
- "processo_precedente_texto.plural",
62
- "sumula_texto.plural^3",
63
- "ramo_direito.plural^1",
64
- "situacao_sumula.plural^1",
65
- "materia_noticia.plural^1",
66
- "titulo_noticia.plural^3",
67
- "resumo_noticia.plural^3",
68
- "conteudo_noticia.plural^1",
69
- "ramo_noticia.plural^1",
70
- ]
71
-
72
- _FIELDS_SHOULD = [
73
- "acordao_ata.plural^3",
74
- "documental_doutrina_texto.plural",
75
- "documental_indexacao_texto.plural",
76
- "documental_jurisprudencia_citada_texto.plural",
77
- "documental_observacao_texto.plural",
78
- "documental_tese_tema_texto.plural^3",
79
- "documental_tese_texto.plural^3",
80
- "ementa_texto.plural^3",
81
- "titulo.plural^6",
82
- "decisao_texto.plural^2",
83
- "sumula_texto.plural^3",
84
- "ramo_direito.plural^1",
85
- "situacao_sumula.plural^1",
86
- "materia_noticia.plural^1",
87
- "titulo_noticia.plural^3",
88
- "resumo_noticia.plural^3",
89
- "conteudo_noticia.plural^1",
90
- "ramo_noticia.plural^1",
91
- ]
92
-
93
- _SOURCE_FIELDS = [
94
- "base", "_id", "id", "dg_unique", "titulo", "ministro_facet",
95
- "procedencia_geografica_completo", "procedencia_geografica_uf_sigla",
96
- "processo_codigo_completo", "processo_classe_processual_unificada_extenso",
97
- "processo_classe_processual_unificada_classe_sigla", "processo_numero",
98
- "julgamento_data", "publicacao_data",
99
- "relator_processo_nome", "relator_acordao_nome", "relator_decisao_nome",
100
- "acordao_ata", "decisao_texto", "ementa_texto",
101
- "partes_lista_texto", "orgao_julgador",
102
- "inteiro_teor_url", "acompanhamento_processual_url", "dje_url",
103
- "documental_tese_texto", "documental_tese_tema_texto",
104
- "documental_indexacao_texto", "documental_observacao_texto",
105
- "documental_legislacao_citada_texto", "documental_publicacao_lista_texto",
106
- "documental_acordao_mesmo_sentido_lista_texto",
107
- "documental_decisao_mesmo_sentido_lista_texto",
108
- "documental_jurisprudencia_citada_texto",
109
- "is_repercussao_geral", "is_repercussao_geral_merito",
110
- "is_repercussao_geral_admissibilidade", "is_questao_ordem",
111
- "ramo_direito", "sumula_texto", "sumula_numero", "situacao_sumula",
112
- "is_vinculante", "is_colac", "colac_numero", "colac_pagina",
113
- "julgamento_is_sessao_virtual", "tipo_julgamento",
114
- "volume_informativo", "titulo_noticia", "resumo_noticia",
115
- "conteudo_noticia", "materia_noticia", "ramo_noticia",
116
- "pesquisa_url", "informativo_url", "audio_url", "video_url",
117
- "dg_atualizado_em",
118
- ]
119
-
120
-
121
- def _build_query(term: str) -> dict:
122
- """Monta o bloco function_score com os 3 boosters do portal STF."""
123
- qs_base = {
124
- "default_operator": "AND",
125
- "fields": _FIELDS_FULL,
126
- "query": term,
127
- "type": "cross_fields",
128
- "fuzziness": "AUTO:4,7",
129
- "analyzer": "legal_search_analyzer",
130
- "quote_analyzer": "legal_index_analyzer",
131
- }
132
- qs_should_1 = {
133
- "default_operator": "AND",
134
- "fields": _FIELDS_SHOULD,
135
- "query": term,
136
- "tie_breaker": 1,
137
- "fuzziness": "AUTO:4,7",
138
- "analyzer": "legal_search_analyzer",
139
- "quote_analyzer": "legal_index_analyzer",
140
- }
141
- qs_should_2 = {
142
- "default_operator": "and",
143
- "type": "phrase",
144
- "tie_breaker": 1,
145
- "phrase_slop": 20,
146
- "fields": [
147
- "acordao_ata.plural^3", "documental_tese_tema_texto.plural^3",
148
- "documental_tese_texto.plural^3", "ementa_texto.plural^3",
149
- "decisao_texto.plural^2", "titulo_noticia.plural^3",
150
- "resumo_noticia.plural^3", "conteudo_noticia.plural^1",
151
- ],
152
- "query": term,
153
- "fuzziness": "AUTO:4,7",
154
- "analyzer": "legal_search_analyzer",
155
- "quote_analyzer": "legal_index_analyzer",
156
- }
157
-
158
- return {
159
  "function_score": {
160
  "functions": [
161
- {
162
- "exp": {
163
- "julgamento_data": {
164
- "origin": "now",
165
- "scale": "47450d",
166
- "offset": "1095d",
167
- "decay": 0.1,
168
- }
169
- }
170
- },
171
- {
172
- "filter": {"term": {"orgao_julgador.keyword": "Tribunal Pleno"}},
173
- "weight": 1.15,
174
- },
175
- {
176
- "filter": {"term": {"is_repercussao_geral": True}},
177
- "weight": 1.1,
178
- },
179
  ],
180
  "query": {
181
  "bool": {
182
- "filter": [{"query_string": qs_base}],
183
- "must": [],
184
- "should": [
185
- {"query_string": qs_should_1},
186
- {"query_string": qs_should_2},
187
- ],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
188
  }
189
- },
190
  }
191
- }
192
-
193
-
194
- def _build_aggs() -> dict:
195
- """Aggregations para facets (contagens por base, órgão, ministro, etc.)."""
196
- return {
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
197
  "base_agg": {
198
  "filters": {
199
  "filters": {
200
  "acordaos": {"match": {"base": "acordaos"}},
201
  "sumulas": {"match": {"base": "sumulas"}},
202
  "decisoes": {"match": {"base": "decisoes"}},
203
- "informativos": {"match": {"base": "novo_informativo"}},
204
- }
205
- }
206
- },
207
- "is_repercussao_geral_agg": {
208
- "filters": {
209
- "filters": {
210
- "true": {"bool": {"must": [
211
- {"match": {"is_repercussao_geral": True}},
212
- {"term": {"base": "acordaos"}},
213
- ]}},
214
- "false": {"bool": {"must": [
215
- {"match": {"is_repercussao_geral": False}},
216
- {"term": {"base": "acordaos"}},
217
- ]}},
218
  }
219
  }
220
  },
221
  "orgao_julgador_agg": {
222
  "aggs": {
223
  "orgao_julgador_agg": {
224
- "terms": {
225
- "field": "orgao_julgador.keyword",
226
- "size": 200,
227
- "execution_hint": "map",
228
- }
229
  }
230
- },
231
- "filter": {"bool": {"must": [{"term": {"base": "acordaos"}}]}},
232
  },
233
  "ministro_facet_agg": {
234
  "aggs": {
235
  "ministro_facet_agg": {
236
- "terms": {
237
- "field": "ministro_facet.keyword",
238
- "size": 200,
239
- "execution_hint": "map",
240
- }
241
  }
242
- },
243
- "filter": {"bool": {"must": [{"term": {"base": "acordaos"}}]}},
244
  },
245
- }
246
-
247
-
248
- def _build_highlight(term: str) -> dict:
249
- """Bloco de highlight com highlight_query espelhando a query principal."""
250
- return {
251
- "highlight_query": _build_query(term),
252
- "number_of_fragments": 64,
253
- "fragment_size": 300,
254
- "order": "score",
255
- "pre_tags": ["<em>"],
256
- "post_tags": ["</em>"],
257
  "fields": {
258
- "ementa_texto": {
259
- "fragment_size": 24000,
260
- "matched_fields": ["ementa_texto.plural"],
261
- "type": "fvh",
262
- },
263
- "acordao_ata": {
264
- "fragment_size": 600,
265
- "matched_fields": ["acordao_ata.plural"],
266
- "type": "fvh",
267
- },
268
- "decisao_texto": {
269
- "fragment_size": 1200,
270
- "matched_fields": ["decisao_texto.plural"],
271
- "type": "fvh",
272
- },
273
- "documental_tese_texto": {
274
- "fragment_size": 2000,
275
- "matched_fields": ["documental_tese_texto.plural"],
276
- "type": "fvh",
277
- },
278
- "documental_tese_tema_texto": {
279
- "fragment_size": 2000,
280
- "matched_fields": ["documental_tese_tema_texto.plural"],
281
- "type": "fvh",
282
- },
283
- "resumo_noticia": {
284
- "fragment_size": 5000,
285
- "matched_fields": ["resumo_noticia.plural"],
286
- "type": "fvh",
287
- },
288
  },
289
- }
290
-
291
-
292
- def build_payload(
293
- term: str,
294
- size: int = 20,
295
- from_: int = 0,
296
- sort: str = "_score", # "_score" | "julgamento_data" | "publicacao_data"
297
- sort_order: str = "desc",
298
- base: str = "acordaos", # acordaos | sumulas | decisoes | novo_informativo | None
299
- include_aggs: bool = True,
300
- include_highlight: bool = True,
301
- ) -> dict:
302
- """
303
- Monta o payload completo para a API do STF.
304
- """
305
- sort_clause = (
306
- [{"_score": sort_order}]
307
- if sort == "_score"
308
- else [{sort: {"order": sort_order}}]
309
- )
310
 
311
- payload = {
312
- "query": _build_query(term),
313
- "_source": _SOURCE_FIELDS,
314
- "size": size,
315
- "from": from_,
316
- "sort": sort_clause,
317
- "track_total_hits": True,
318
- }
 
319
 
320
- if base:
321
- payload["post_filter"] = {
322
- "bool": {
323
- "must": [{"term": {"base": base}}],
324
- "should": [],
325
- }
 
326
  }
 
 
 
 
 
 
327
 
328
- if include_aggs:
329
- payload["aggs"] = _build_aggs()
 
 
 
 
 
330
 
331
- if include_highlight:
332
- payload["highlight"] = _build_highlight(term)
 
 
 
 
333
 
334
- return payload
 
 
 
 
 
 
 
 
 
335
 
 
 
336
 
337
  # ============================================
338
- # TOKEN / WAF
339
  # ============================================
340
- def get_fresh_token() -> str | None:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
341
  global token_cache
342
- now = time.time()
343
- if token_cache["token"] and now < token_cache["expires_at"]:
344
  logger.info("Usando token em cache")
345
  return token_cache["token"]
346
-
347
  logger.info("Obtendo novo token via Playwright")
348
  try:
349
  with sync_playwright() as p:
350
- browser = p.chromium.launch(
351
- headless=True, args=["--no-sandbox", "--disable-dev-shm-usage"]
352
- )
353
  context = browser.new_context(
354
- viewport={"width": 1920, "height": 1080},
355
- user_agent=HEADERS_BASE["User-Agent"],
356
  )
357
  page = context.new_page()
358
- page.goto(
359
- "https://jurisprudencia.stf.jus.br/pages/search",
360
- wait_until="domcontentloaded",
361
- timeout=30000,
362
- )
363
  page.wait_for_timeout(3000)
364
  cookies = context.cookies()
 
 
 
 
 
365
  browser.close()
366
-
367
- token = next(
368
- (c["value"] for c in cookies if c["name"] == "aws-waf-token"), None
369
- )
370
- if token:
371
- token_cache["token"] = token
372
- token_cache["expires_at"] = now + 3300 # ~55 min
373
- logger.info(f"Token obtido: {token[:30]}...")
374
- return token
375
-
376
- logger.warning("Cookie aws-waf-token não encontrado")
377
- return None
378
  except Exception as e:
379
- logger.error(f"Erro ao obter token: {e}")
380
  return None
381
 
382
-
383
- def _make_headers(token: str) -> dict:
384
- h = HEADERS_BASE.copy()
385
- h["Cookie"] = f"aws-waf-token={token}"
386
- return h
387
-
388
-
389
- def search_with_token(token: str, payload: dict) -> dict:
390
  """
391
- Retorna {"success": bool, "data": {...}} ou {"success": False, "error": ..., "status": int}
392
- - 200 sucesso
393
- - 403 → token expirado (limpa cache)
394
- - 202 → WAF bloqueou (token aceito como cookie mas requisição não passa)
395
  """
396
- if not token:
397
- return {"success": False, "error": "Token ausente"}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
398
 
 
 
 
 
 
399
  try:
400
- response = requests.post(
401
- URL_API,
402
- headers=_make_headers(token),
403
- json=payload,
404
- verify=False,
405
- timeout=30,
406
- )
407
- logger.info(f"HTTP {response.status_code}")
408
-
409
  if response.status_code == 200:
410
  return {"success": True, "data": response.json()}
411
-
412
- if response.status_code == 403:
413
  token_cache["token"] = None
414
- token_cache["expires_at"] = 0
415
- return {"success": False, "error": "Token expirado (403)", "status": 403}
416
-
417
- if response.status_code == 202:
418
- return {
419
- "success": False,
420
- "error": "WAF bloqueou requisição (202)",
421
- "status": 202,
422
- }
423
-
424
- return {
425
- "success": False,
426
- "error": f"HTTP {response.status_code}",
427
- "status": response.status_code,
428
- "text": response.text[:500],
429
- }
430
  except Exception as e:
431
- logger.error(f"Erro na requisição: {e}")
432
  return {"success": False, "error": str(e)}
433
 
434
-
435
- def try_search_cascade(
436
- token: str,
437
- term: str,
438
- size: int,
439
- from_: int,
440
- sort: str,
441
- sort_order: str,
442
- base: str,
443
- ) -> tuple[bool, dict | None, str | None]:
444
  """
445
- Tenta 3 níveis de complexidade de payload.
446
- Retorna (success, data, nivel_usado).
447
  """
448
- levels = [
449
- ("completo", dict(include_aggs=True, include_highlight=True)),
450
- ("sem_highlight", dict(include_aggs=False, include_highlight=False)),
451
- ("minimo", None),
452
- ]
453
-
454
- for nivel, kwargs in levels:
455
- if nivel == "minimo":
456
- payload = {
457
- "query": {"bool": {"filter": [{"term": {"base": base}}]}},
458
- "_source": ["id", "titulo", "ementa_texto", "processo_numero",
459
- "julgamento_data", "relator_processo_nome", "inteiro_teor_url"],
460
- "size": size,
461
- "from": from_,
462
- "sort": [{"julgamento_data": {"order": sort_order}}],
463
- "track_total_hits": True,
 
 
 
 
 
 
 
 
 
464
  }
465
- else:
466
- payload = build_payload(
467
- term=term, size=size, from_=from_,
468
- sort=sort, sort_order=sort_order, base=base,
469
- **kwargs,
470
- )
471
-
472
- logger.info(f"Tentando payload [{nivel}]...")
473
- res = search_with_token(token, payload)
474
-
475
- if res.get("success"):
476
- return True, res["data"], nivel
477
-
478
- # Apenas continua em erros de payload (400), não de auth (403/202)
479
- if res.get("status") not in (400, None):
480
- break
481
-
482
- return False, None, None
483
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
484
 
485
- def playwright_fetch(payload: dict) -> dict:
486
- """Fallback: executa fetch() dentro do browser já autenticado pelo WAF."""
487
- logger.info("Fallback Playwright fetch...")
488
  try:
489
  with sync_playwright() as p:
490
- browser = p.chromium.launch(
491
- headless=True, args=["--no-sandbox", "--disable-dev-shm-usage"]
492
- )
493
  context = browser.new_context(
494
- viewport={"width": 1920, "height": 1080},
495
- user_agent=HEADERS_BASE["User-Agent"],
496
  )
497
  page = context.new_page()
498
- page.goto(
499
- "https://jurisprudencia.stf.jus.br/pages/search",
500
- wait_until="domcontentloaded",
501
- timeout=30000,
502
- )
503
  page.wait_for_timeout(3000)
504
-
505
  cookies = context.cookies()
506
- token = next(
507
- (c["value"] for c in cookies if c["name"] == "aws-waf-token"), None
508
- )
509
-
510
- result = page.evaluate(
511
- """async (payload) => {
 
512
  try {
513
- const r = await fetch(
514
- 'https://jurisprudencia.stf.jus.br/api/search/search',
515
- {
516
- method: 'POST',
517
- headers: {
518
- 'Content-Type': 'application/json',
519
- 'Accept': 'application/json'
520
- },
521
- body: JSON.stringify(payload)
522
- }
523
- );
524
- return r.ok
525
- ? { success: true, data: await r.json() }
526
- : { success: false, status: r.status };
527
- } catch(e) {
528
- return { success: false, error: e.toString() };
529
  }
530
- }""",
531
- payload,
532
- )
533
  browser.close()
534
-
535
- if token:
536
- token_cache["token"] = token
537
- token_cache["expires_at"] = time.time() + 3300
538
-
539
- if result.get("success"):
540
- return {"success": True, "data": result["data"], "token": token}
541
-
542
- return {
543
- "success": False,
544
- "error": result.get("error", "Falha desconhecida"),
545
- "status": result.get("status"),
546
- }
547
  except Exception as e:
548
- logger.error(f"Erro no Playwright fetch: {e}")
549
  return {"success": False, "error": str(e)}
550
 
551
-
552
  # ============================================
553
- # HELPERS DE RESPOSTA
554
  # ============================================
555
- def _extract_first(d: dict, *keys, default=""):
556
- for k in keys:
557
- v = d.get(k)
558
- if v:
559
- return v
560
- return default
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
561
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
562
 
563
- def _format_hits(hits: list) -> list:
564
- results = []
565
- for hit in hits:
566
- src = hit.get("_source", {})
567
- hl = hit.get("highlight", {})
568
- ementa = _extract_first(
569
- hl, "ementa_texto",
570
- default=src.get("ementa_texto", ""),
571
- )
572
- if isinstance(ementa, list):
573
- ementa = ementa[0] if ementa else ""
574
-
575
- results.append({
576
- "id": src.get("id"),
577
- "titulo": src.get("titulo"),
578
- "processo": src.get("processo_codigo_completo") or src.get("processo_numero"),
579
- "classe": src.get("processo_classe_processual_unificada_extenso"),
580
- "orgao": src.get("orgao_julgador"),
581
- "relator": src.get("relator_processo_nome") or src.get("relator_acordao_nome"),
582
- "data_julgamento": src.get("julgamento_data"),
583
- "data_publicacao": src.get("publicacao_data"),
584
- "ramo_direito": src.get("ramo_direito"),
585
- "is_repercussao_geral": src.get("is_repercussao_geral"),
586
- "ementa": ementa.strip() if ementa else "",
587
- "tese": src.get("documental_tese_texto"),
588
- "url_inteiro_teor": src.get("inteiro_teor_url"),
589
- "url_acompanhamento": src.get("acompanhamento_processual_url"),
590
- "url_dje": src.get("dje_url"),
591
- "score": hit.get("_score"),
592
- })
593
- return results
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
594
 
 
 
 
 
 
595
 
596
  # ============================================
597
- # ENDPOINTS
598
  # ============================================
599
- @app.route("/busca")
600
- def busca():
601
  """
602
- GET /busca?q=termo&page=1&size=20&sort=_score&base=acordaos
 
 
603
  """
604
- query = request.args.get("q", "").strip()
605
  if not query:
606
- return jsonify({"error": "Parâmetro 'q' é obrigatório"}), 400
607
-
608
- page = max(1, int(request.args.get("page", 1)))
609
- size = min(int(request.args.get("size", 20)), 250)
610
- sort = request.args.get("sort", "_score") # _score | julgamento_data | publicacao_data
611
- sort_order = request.args.get("order", "desc") # asc | desc
612
- base = request.args.get("base", "acordaos") # acordaos | sumulas | decisoes | novo_informativo
613
-
614
- from_ = (page - 1) * size
615
 
616
  token = get_fresh_token()
617
  if not token:
618
- return jsonify({"success": False, "error": "Falha ao obter token WAF"}), 500
619
-
620
- success, data, nivel = try_search_cascade(
621
- token, query, size, from_, sort, sort_order, base
622
- )
623
-
624
- if not success:
625
- # Fallback Playwright
626
- fallback_payload = build_payload(
627
- term=query, size=size, from_=from_,
628
- sort=sort, sort_order=sort_order, base=base,
629
- include_aggs=False, include_highlight=True,
630
- )
631
- res = playwright_fetch(fallback_payload)
632
- if res.get("success"):
633
- data = res["data"]
634
- nivel = "playwright"
635
- success = True
636
-
637
- if not success:
638
- return jsonify({"success": False, "error": "Todas as tentativas falharam"}), 500
639
-
640
- # Normaliza estrutura da resposta (API retorna result.hits.hits ou hits.hits)
641
- root = data.get("result", data)
642
- hits_obj = root.get("hits", {})
643
- total = hits_obj.get("total", {}).get("value", 0)
644
- hits = hits_obj.get("hits", [])
645
-
646
- # Aggregations (facets), quando disponíveis
647
- aggs = root.get("aggregations", data.get("aggregations"))
648
 
649
- return jsonify({
650
- "success": True,
651
- "query": query,
652
- "base": base,
653
- "total": total,
654
- "page": page,
655
- "size": size,
656
- "nivel_payload": nivel,
657
- "aggregations": aggs,
658
- "results": _format_hits(hits),
659
- })
 
 
 
 
 
 
660
 
 
 
661
 
662
- @app.route("/health")
663
- def health():
664
- playwright_ok = False
665
  try:
666
- with sync_playwright() as p:
667
- p.chromium.launch(headless=True, args=["--no-sandbox"]).close()
668
- playwright_ok = True
669
- except Exception:
670
- pass
671
 
672
- # Ping mínimo na API
673
- api_ok = False
674
- total_docs = None
675
- token = get_fresh_token()
676
- if token:
677
- res = search_with_token(
678
- token,
679
- {"size": 0, "query": {"match_all": {}}, "track_total_hits": True},
680
- )
681
- if res.get("success"):
682
- api_ok = True
683
- total_docs = (
684
- res["data"]
685
- .get("hits", {})
686
- .get("total", {})
687
- .get("value")
688
- )
 
 
 
 
 
 
 
689
 
690
  return jsonify({
691
- "status": "healthy" if playwright_ok and api_ok else "degraded",
692
- "timestamp": time.time(),
693
- "playwright_ready": playwright_ok,
694
- "api_reachable": api_ok,
695
- "token_cached": bool(token_cache["token"]),
696
- "token_expires_in": max(0, token_cache["expires_at"] - time.time()),
697
- "total_docs": total_docs,
698
- "python_version": sys.version,
699
  })
700
 
701
-
702
- # ============================================
703
- # MAIN
704
- # ============================================
705
- if __name__ == "__main__":
706
  try:
707
  import certifi
708
- os.environ["SSL_CERT_FILE"] = certifi.where()
709
- os.environ["REQUESTS_CA_BUNDLE"] = certifi.where()
710
- except ImportError:
711
  pass
712
-
713
- logger.info("=" * 50)
714
- logger.info("🚀 STF Jurisprudência — Para.AI")
715
- logger.info(f"📋 _source fields: {len(_SOURCE_FIELDS)}")
716
- logger.info("=" * 50)
717
-
718
  try:
719
  with sync_playwright() as p:
720
- p.chromium.launch(headless=True, args=["--no-sandbox"]).close()
721
- logger.info("✅ Playwright pronto")
722
  except Exception as e:
723
- logger.warning(f"⚠️ Playwright: {e}")
724
-
725
- port = int(os.environ.get("PORT", 7860))
726
- app.run(host="0.0.0.0", port=port, debug=False)
 
1
+
2
  import os
3
  import sys
4
  import json
5
  import time
 
6
  import logging
7
  import requests
8
  import urllib3
9
+ from flask import Flask, request, jsonify, render_template_string
10
  from playwright.sync_api import sync_playwright
11
+ import traceback
12
 
13
+ # Suprimir warnings de SSL
14
  urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
15
+
16
+ # Configuração de logging
17
  logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
18
  logger = logging.getLogger(__name__)
 
19
 
20
+ app = Flask(__name__)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
21
 
22
  # ============================================
23
+ # PAYLOAD COMPLETO (com todos os campos e highlight)
24
  # ============================================
25
+ PAYLOAD_COMPLETO = {
26
+ "query": {
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
27
  "function_score": {
28
  "functions": [
29
+ {"exp": {"julgamento_data": {"origin": "now", "scale": "47450d", "offset": "1095d", "decay": 0.1}}}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
30
  ],
31
  "query": {
32
  "bool": {
33
+ "filter": [
34
+ {"query_string": {
35
+ "default_operator": "AND",
36
+ "fields": [
37
+ "processo_codigo_completo.plural",
38
+ "acordao_ata.plural^3",
39
+ "documental_acordao_mesmo_sentido_lista_texto.plural",
40
+ "documental_doutrina_texto.plural",
41
+ "documental_indexacao_texto.plural",
42
+ "documental_jurisprudencia_citada_texto.plural",
43
+ "documental_legislacao_citada_texto.plural",
44
+ "documental_observacao_texto.plural",
45
+ "documental_publicacao_lista_texto.plural",
46
+ "documental_tese_tema_texto.plural^3",
47
+ "documental_tese_texto.plural^3",
48
+ "ementa_texto.plural^3",
49
+ "ministro_facet.plural",
50
+ "revisor_processo_nome.plural",
51
+ "orgao_julgador.plural",
52
+ "partes_lista_texto.plural",
53
+ "procedencia_geografica_completo.plural",
54
+ "processo_classe_processual_unificada_extenso.plural",
55
+ "titulo.plural^6",
56
+ "colac_numero.plural",
57
+ "colac_pagina.plural",
58
+ "decisao_texto.plural^2",
59
+ "documental_decisao_mesmo_sentido_lista_texto.plural",
60
+ "processo_precedente_texto.plural",
61
+ "sumula_texto.plural^3",
62
+ "ramo_direito.plural^1",
63
+ "situacao_sumula.plural^1",
64
+ "materia_noticia.plural^1",
65
+ "titulo_noticia.plural^3",
66
+ "resumo_noticia.plural^3",
67
+ "conteudo_noticia.plural^1",
68
+ "ramo_noticia.plural^1"
69
+ ],
70
+ "query": "*",
71
+ "type": "cross_fields",
72
+ "fuzziness": "AUTO:4,7"
73
+ }}
74
+ ]
75
  }
76
+ }
77
  }
78
+ },
79
+ "_source": [
80
+ "base", "_id", "id", "dg_unique",
81
+ "titulo", "ministro_facet", "orgao_julgador",
82
+ "procedencia_geografica_completo",
83
+ "procedencia_geografica_pais_sigla",
84
+ "procedencia_geografica_uf_sigla",
85
+ "procedencia_geografica_uf_extenso",
86
+ "processo_codigo_completo",
87
+ "processo_classe_processual_unificada_extenso",
88
+ "processo_classe_processual_unificada_classe_sigla",
89
+ "processo_classe_processual_unificada_incidente_sigla",
90
+ "processo_classe_processual_unificada_sigla",
91
+ "processo_numero",
92
+ "processo_lista_texto",
93
+ "julgamento_data",
94
+ "publicacao_data",
95
+ "republicacao_data",
96
+ "periodo_inicio_data",
97
+ "periodo_fim_data",
98
+ "dg_atualizado_em",
99
+ "is_decisao_presidencia",
100
+ "relator_processo_nome",
101
+ "relator_decisao_nome",
102
+ "relator_acordao_nome",
103
+ "presidente_nome",
104
+ "revisor_processo_nome",
105
+ "ementa_texto",
106
+ "acordao_ata",
107
+ "decisao_texto",
108
+ "inteiro_teor_url",
109
+ "sumula_texto",
110
+ "partes_lista_texto",
111
+ "acompanhamento_processual_url",
112
+ "dje_url",
113
+ "informativo_url",
114
+ "pesquisa_url",
115
+ "audio_url",
116
+ "video_url",
117
+ "numero_noticias_url",
118
+ "aprovacao_url",
119
+ "documental_publicacao_lista_texto",
120
+ "documental_decisao_mesmo_sentido_lista_texto",
121
+ "documental_decisao_mesmo_sentido_lista_html",
122
+ "documental_decisao_mesmo_sentido_is_secundario",
123
+ "documental_legislacao_citada_texto",
124
+ "documental_jurisprudencia_citada_texto",
125
+ "documental_indexacao_texto",
126
+ "documental_observacao_texto",
127
+ "documental_observacao_html",
128
+ "documental_doutrina_texto",
129
+ "documental_acordao_mesmo_sentido_lista_texto",
130
+ "documental_acordao_mesmo_sentido_lista_html",
131
+ "documental_acordao_mesmo_sentido_is_secundario",
132
+ "documental_assunto_texto",
133
+ "documental_tese_tipo",
134
+ "documental_tese_texto",
135
+ "documental_tese_tema_texto",
136
+ "externo_seq_objeto_incidente",
137
+ "volume_informativo",
138
+ "ramo_noticia",
139
+ "materia_noticia",
140
+ "titulo_noticia",
141
+ "resumo_noticia",
142
+ "conteudo_noticia",
143
+ "numero_noticias_processo",
144
+ "is_covid",
145
+ "tipo_julgamento",
146
+ "julgamento_is_sessao_virtual",
147
+ "sumula_numero",
148
+ "is_vinculante",
149
+ "situacao_sumula",
150
+ "ramo_direito",
151
+ "processo_precedente_texto",
152
+ "processo_precedente_html",
153
+ "is_questao_ordem",
154
+ "is_repercussao_geral_admissibilidade",
155
+ "is_repercussao_geral_merito",
156
+ "is_repercussao_geral_recurso_interno",
157
+ "is_repercussao_geral",
158
+ "is_processo_antigo",
159
+ "is_colac",
160
+ "colac_numero",
161
+ "colac_pagina",
162
+ "old_seq_colac",
163
+ "old_seq_repercussao_geral",
164
+ "old_seq_sjur",
165
+ "ods_onu"
166
+ ],
167
+ "aggs": {
168
  "base_agg": {
169
  "filters": {
170
  "filters": {
171
  "acordaos": {"match": {"base": "acordaos"}},
172
  "sumulas": {"match": {"base": "sumulas"}},
173
  "decisoes": {"match": {"base": "decisoes"}},
174
+ "informativos": {"match": {"base": "novo_informativo"}}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
175
  }
176
  }
177
  },
178
  "orgao_julgador_agg": {
179
  "aggs": {
180
  "orgao_julgador_agg": {
181
+ "terms": {"field": "orgao_julgador.keyword", "size": 10}
 
 
 
 
182
  }
183
+ }
 
184
  },
185
  "ministro_facet_agg": {
186
  "aggs": {
187
  "ministro_facet_agg": {
188
+ "terms": {"field": "ministro_facet.keyword", "size": 10}
 
 
 
 
189
  }
190
+ }
 
191
  },
192
+ "procedencia_geografica_uf_sigla_agg": {
193
+ "aggs": {
194
+ "procedencia_geografica_uf_sigla_agg": {
195
+ "terms": {"field": "procedencia_geografica_uf_sigla", "size": 10}
196
+ }
197
+ }
198
+ }
199
+ },
200
+ "highlight": {
 
 
 
201
  "fields": {
202
+ "ementa_texto": {"matched_fields": ["ementa_texto.plural"], "type": "fvh", "fragment_size": 24000},
203
+ "sumula_texto": {"matched_fields": ["sumula_texto.plural"], "type": "fvh", "number_of_fragments": 0},
204
+ "materia_noticia": {"matched_fields": ["materia_noticia.plural"], "type": "fvh"},
205
+ "titulo_noticia": {"matched_fields": ["titulo_noticia.plural"], "type": "fvh"},
206
+ "resumo_noticia": {"matched_fields": ["resumo_noticia.plural"], "type": "fvh", "fragment_size": 5000},
207
+ "conteudo_noticia": {"matched_fields": ["conteudo_noticia.plural"], "type": "fvh", "fragment_size": 50000},
208
+ "acordao_ata": {"matched_fields": ["acordao_ata.plural"], "type": "fvh", "fragment_size": 600},
209
+ "decisao_texto": {"matched_fields": ["decisao_texto.plural"], "type": "fvh", "fragment_size": 1200},
210
+ "documental_tese_texto": {"matched_fields": ["documental_tese_texto.plural"], "type": "fvh", "fragment_size": 2000},
211
+ "documental_tese_tema_texto": {"matched_fields": ["documental_tese_tema_texto.plural"], "type": "fvh", "fragment_size": 2000},
212
+ "documental_observacao_texto": {"matched_fields": ["documental_observacao_texto.plural"], "type": "fvh"},
213
+ "documental_indexacao_texto": {"matched_fields": ["documental_indexacao_texto.plural"], "type": "fvh"},
214
+ "documental_legislacao_citada_texto": {"matched_fields": ["documental_legislacao_citada_texto.plural"], "type": "fvh"},
215
+ "documental_jurisprudencia_citada_texto": {"matched_fields": ["documental_jurisprudencia_citada_texto.plural"], "type": "fvh"},
216
+ "documental_doutrina_texto": {"matched_fields": ["documental_doutrina_texto.plural"], "type": "fvh"},
217
+ "partes_lista_texto": {"matched_fields": ["partes_lista_texto.plural"], "type": "fvh"},
218
+ "documental_publicacao_lista_texto": {"matched_fields": ["documental_publicacao_lista_texto.plural"], "type": "fvh"},
219
+ "documental_acordao_mesmo_sentido_lista_texto": {"matched_fields": ["documental_acordao_mesmo_sentido_lista_texto.plural"], "type": "fvh"},
220
+ "documental_decisao_mesmo_sentido_lista_texto": {"matched_fields": ["documental_decisao_mesmo_sentido_lista_texto.plural"], "type": "fvh"},
221
+ "processo_precedente_texto": {"matched_fields": ["processo_precedente_texto.plural"], "type": "fvh"},
222
+ "procedencia_geografica_completo": {"matched_fields": ["procedencia_geografica_completo.plural"], "type": "fvh"}
 
 
 
 
 
 
 
 
 
223
  },
224
+ "pre_tags": ["<em>"],
225
+ "post_tags": ["</em>"],
226
+ "fragment_size": 300,
227
+ "number_of_fragments": 64,
228
+ "order": "score"
229
+ },
230
+ "size": 100,
231
+ "from": 0,
232
+ "sort": [{"julgamento_data": {"order": "desc"}}],
233
+ "track_total_hits": True
234
+ }
 
 
 
 
 
 
 
 
 
 
235
 
236
+ # Payload sem highlight e sem aggs (apenas _source e query)
237
+ PAYLOAD_SEM_HIGHLIGHT = {
238
+ "query": PAYLOAD_COMPLETO["query"],
239
+ "_source": PAYLOAD_COMPLETO["_source"],
240
+ "size": 100,
241
+ "from": 0,
242
+ "sort": [{"julgamento_data": {"order": "desc"}}],
243
+ "track_total_hits": True
244
+ }
245
 
246
+ # Payload mínimo (apenas o essencial, testado anteriormente)
247
+ PAYLOAD_MINIMO = {
248
+ "query": {
249
+ "bool": {
250
+ "filter": [
251
+ {"term": {"base": "acordaos"}}
252
+ ]
253
  }
254
+ },
255
+ "_source": ["id", "titulo", "ementa_texto", "processo_numero", "julgamento_data", "relator_processo_nome", "inteiro_teor_url"],
256
+ "size": 100,
257
+ "sort": [{"julgamento_data": {"order": "desc"}}],
258
+ "track_total_hits": True
259
+ }
260
 
261
+ # Payload para busca por ID (versão completa com highlight)
262
+ PAYLOAD_POR_ID_COMPLETO = {
263
+ "query": {"ids": {"values": []}},
264
+ "_source": PAYLOAD_COMPLETO["_source"],
265
+ "highlight": PAYLOAD_COMPLETO["highlight"],
266
+ "size": 1
267
+ }
268
 
269
+ # Payload para busca por ID (versão sem highlight)
270
+ PAYLOAD_POR_ID_SIMPLES = {
271
+ "query": {"ids": {"values": []}},
272
+ "_source": PAYLOAD_COMPLETO["_source"],
273
+ "size": 1
274
+ }
275
 
276
+ # Constantes da API
277
+ URL_API = "https://jurisprudencia.stf.jus.br/api/search/search"
278
+ URL_API_GET = "https://jurisprudencia.stf.jus.br/api/search/get" # endpoint para obter documento por ID
279
+ HEADERS = {
280
+ "Accept": "application/json, text/plain, */*",
281
+ "Content-Type": "application/json",
282
+ "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36",
283
+ "Referer": "https://jurisprudencia.stf.jus.br/pages/search",
284
+ "Origin": "https://jurisprudencia.stf.jus.br"
285
+ }
286
 
287
+ # Cache de token
288
+ token_cache = {"token": None, "expires_at": 0}
289
 
290
  # ============================================
291
+ # HTML TEMPLATE COMPLETO (mantido para interface web)
292
  # ============================================
293
+ HTML_TEMPLATE = """
294
+ <!DOCTYPE html>
295
+ <html>
296
+ <head>
297
+ <title>⚖️ STF Jurisprudência - Visualizador Completo</title>
298
+ <meta charset="utf-8">
299
+ <meta name="viewport" content="width=device-width, initial-scale=1">
300
+ <style>
301
+ /* (estilos completos, mantidos da versão anterior) */
302
+ body { font-family: 'Segoe UI', Roboto, system-ui, sans-serif; max-width: 1600px; margin: 0 auto; padding: 20px; background: #f0f2f5; }
303
+ .container { background: white; border-radius: 12px; padding: 30px; box-shadow: 0 8px 20px rgba(0,0,0,0.1); }
304
+ h1 { color: #1a1a2e; border-bottom: 3px solid #4CAF50; padding-bottom: 15px; display: flex; align-items: center; gap: 10px; }
305
+ .info-box { background: #e8f0fe; border-left: 5px solid #2196F3; padding: 15px 20px; margin: 20px 0; border-radius: 8px; }
306
+ .stats-grid { display: grid; grid-template-columns: repeat(auto-fit, minmax(200px, 1fr)); gap: 15px; margin: 25px 0; }
307
+ .stat-card { background: white; border: 1px solid #e0e0e0; border-radius: 10px; padding: 20px; text-align: center; box-shadow: 0 2px 8px rgba(0,0,0,0.05); }
308
+ .stat-value { font-size: 32px; font-weight: bold; color: #1a73e8; }
309
+ .stat-label { color: #5f6368; font-size: 14px; margin-top: 8px; text-transform: uppercase; letter-spacing: 0.5px; }
310
+ .button-group { display: flex; gap: 15px; flex-wrap: wrap; margin: 25px 0; }
311
+ button { background: #1a73e8; color: white; border: none; padding: 14px 28px; font-size: 16px; font-weight: 500; border-radius: 8px; cursor: pointer; transition: all 0.3s; display: inline-flex; align-items: center; gap: 10px; box-shadow: 0 2px 8px rgba(26,115,232,0.3); }
312
+ button:hover { background: #1557b0; transform: translateY(-2px); box-shadow: 0 4px 12px rgba(26,115,232,0.4); }
313
+ button:disabled { background: #a0a0a0; cursor: not-allowed; transform: none; box-shadow: none; }
314
+ .loading { display: inline-block; width: 20px; height: 20px; border: 3px solid rgba(255,255,255,0.3); border-top: 3px solid white; border-radius: 50%; animation: spin 1s linear infinite; margin-right: 10px; vertical-align: middle; }
315
+ @keyframes spin { 0% { transform: rotate(0deg); } 100% { transform: rotate(360deg); } }
316
+ pre { background: #f8f9fa; border: 1px solid #e0e0e0; border-radius: 10px; padding: 20px; overflow: auto; max-height: 800px; font-size: 13px; font-family: 'Consolas', 'Monaco', monospace; box-shadow: inset 0 2px 4px rgba(0,0,0,0.05); }
317
+ .success { color: #0f9d58; background: #e6f4ea; border-left: 5px solid #34a853; padding: 15px 20px; margin: 15px 0; border-radius: 8px; font-weight: 500; }
318
+ .error { color: #d93025; background: #fce8e6; border-left: 5px solid #ea4335; padding: 15px 20px; margin: 15px 0; border-radius: 8px; font-weight: 500; }
319
+ .token-box { background: #1a1a2e; color: #e0e0e0; padding: 15px; border-radius: 8px; font-family: 'Consolas', monospace; word-break: break-all; margin: 15px 0; border: 1px solid #2a2a3e; }
320
+ .token-label { color: #f9ab00; font-weight: bold; margin-bottom: 8px; display: block; }
321
+ .filters { background: #f8f9fa; border-radius: 8px; padding: 20px; margin: 20px 0; border: 1px solid #e0e0e0; }
322
+ .filter-group { display: grid; grid-template-columns: repeat(auto-fit, minmax(200px, 1fr)); gap: 15px; align-items: end; }
323
+ .filter-item { min-width: 200px; }
324
+ .filter-item label { display: block; margin-bottom: 8px; color: #5f6368; font-size: 14px; font-weight: 500; }
325
+ .filter-item input, .filter-item select { width: 100%; padding: 10px; border: 1px solid #ddd; border-radius: 6px; font-size: 14px; }
326
+ .campo-lista { background: #f8f9fa; border: 1px solid #e9ecef; border-radius: 8px; padding: 15px; margin: 10px 0; }
327
+ .campo-nome { font-weight: bold; color: #2c3e50; font-size: 14px; text-transform: uppercase; letter-spacing: 0.5px; margin-bottom: 5px; }
328
+ .campo-valor { color: #1a1a2e; font-size: 14px; word-break: break-word; font-family: 'Consolas', monospace; background: white; padding: 8px; border-radius: 4px; border: 1px solid #e0e0e0; }
329
+ .texto-completo { max-height: 400px; overflow-y: auto; background: #f1f8fe; padding: 15px; border-radius: 8px; border: 1px solid #b8daf5; margin: 10px 0; white-space: pre-wrap; font-family: inherit; line-height: 1.5; }
330
+ .nav-tabs { display: flex; gap: 5px; margin: 20px 0; flex-wrap: wrap; border-bottom: 2px solid #e0e0e0; padding-bottom: 10px; }
331
+ .nav-tab { padding: 10px 20px; cursor: pointer; background: white; border: 1px solid #e0e0e0; border-radius: 8px 8px 0 0; margin-bottom: -2px; font-weight: 500; transition: all 0.2s; }
332
+ .nav-tab.active { background: #1a73e8; color: white; border-color: #1a73e8; }
333
+ .tab-content { display: none; padding: 20px; background: white; border: 1px solid #e0e0e0; border-top: none; border-radius: 0 0 8px 8px; }
334
+ .tab-content.active { display: block; }
335
+ .campo-card { background: #f8f9fa; border: 1px solid #e9ecef; border-radius: 8px; padding: 15px; margin: 10px 0; }
336
+ .url-link { color: #1a73e8; text-decoration: none; word-break: break-all; }
337
+ .url-link:hover { text-decoration: underline; }
338
+ .highlight { background-color: #f9ab00; color: #1a1a2e; font-weight: bold; padding: 2px 4px; border-radius: 4px; }
339
+ .highlight-box { background: #fef7e0; border: 1px solid #f9ab00; padding: 10px; border-radius: 8px; margin: 10px 0; }
340
+ </style>
341
+ </head>
342
+ <body>
343
+ <div class="container">
344
+ <h1>STF Jurisprudência - Visualizador Completo</h1>
345
+ <div class="info-box">
346
+ <strong>📌 API de Jurisprudência do STF - Todos os Campos com Highlight</strong><br>
347
+ • Total de campos disponíveis: <span id="totalCampos">102</span><br>
348
+ • Documentos disponíveis: <span id="totalDocs">carregando...</span>
349
+ </div>
350
+ <div class="stats-grid">
351
+ <div class="stat-card"><div class="stat-value" id="requestsCount">0</div><div class="stat-label">Requisições</div></div>
352
+ <div class="stat-card"><div class="stat-value" id="successCount">0</div><div class="stat-label">Sucessos</div></div>
353
+ <div class="stat-card"><div class="stat-value" id="failCount">0</div><div class="stat-label">Falhas</div></div>
354
+ <div class="stat-card"><div class="stat-value" id="docsCount">0</div><div class="stat-label">Docs Obtidos</div></div>
355
+ </div>
356
+ <div class="filters">
357
+ <h3>🔍 Filtros de Busca</h3>
358
+ <div class="filter-group">
359
+ <div class="filter-item"><label>Quantidade</label><select id="pageSize"><option value="5">5</option><option value="10">10</option><option value="25">25</option><option value="50">50</option><option value="100" selected>100</option></select></div>
360
+ <div class="filter-item"><label>Ordenar por</label><select id="sortOrder"><option value="desc">Mais recentes</option><option value="asc">Mais antigos</option></select></div>
361
+ <div class="filter-item"><label>Base</label><select id="base"><option value="acordaos">Acórdãos</option><option value="decisoes">Decisões</option><option value="sumulas">Súmulas</option><option value="informativos">Informativos</option><option value="">Todas</option></select></div>
362
+ <div class="filter-item"><label>Busca por ID</label><input type="text" id="docId" placeholder="Ex: sjur505215"></div>
363
+ </div>
364
+ </div>
365
+ <div class="button-group">
366
+ <button id="testBtn" onclick="runSearch()"><span class="loading" id="loading" style="display:none;"></span>🔍 Buscar Documentos</button>
367
+ <button id="getByIdBtn" class="secondary" onclick="getDocumentById()">📄 Buscar por ID</button>
368
+ <button id="downloadBtn" class="download" onclick="downloadJSON()" disabled>📥 Download JSON</button>
369
+ <button id="copyBtn" class="secondary" onclick="copyToken()">🔑 Copiar Token</button>
370
+ </div>
371
+ <div id="tokenDisplay" style="display:none;" class="token-box"><span class="token-label">🔐 Token AWS WAF</span><span id="tokenValue"></span></div>
372
+ <div id="result" style="margin-top:25px;"></div>
373
+ </div>
374
+ <script>
375
+ let lastResult = null, lastToken = null, requestsCount = 0, successCount = 0, failCount = 0, docsCount = 0;
376
+ function renderCampo(nome, valor, tipo='normal') {
377
+ if (!valor && valor!==0) return `<div class="campo-card"><div class="campo-nome">${nome}</div><div class="campo-vazio">(vazio)</div></div>`;
378
+ if (tipo==='url') return `<div class="campo-card"><div class="campo-nome">${nome}</div><div class="campo-valor"><a href="${valor}" target="_blank" class="url-link">${valor}</a></div></div>`;
379
+ if (tipo==='texto' && valor.length>200) return `<div class="campo-card"><div class="campo-nome">${nome}</div><div class="texto-completo">${valor.replace(/\\n/g,'<br>').replace(/<em>/g,'<span class="highlight">').replace(/<\\/em>/g,'</span>')}</div></div>`;
380
+ return `<div class="campo-card"><div class="campo-nome">${nome}</div><div class="campo-valor">${String(valor).replace(/\\n/g,'<br>').replace(/<em>/g,'<span class="highlight">').replace(/<\\/em>/g,'</span>')}</div></div>`;
381
+ }
382
+ function displayResult(data) {
383
+ if (!data?.result?.hits?.hits) return document.getElementById('result').innerHTML = '<div class="warning">⚠️ Nenhum documento encontrado</div>';
384
+ const hits = data.result.hits.hits, total = data.result.hits.total?.value || 0;
385
+ let html = `<div class="success">✅ Encontrados ${total.toLocaleString()} documentos. Exibindo ${hits.length} resultados.</div>`;
386
+ hits.forEach((hit, idx) => {
387
+ const src = hit._source || {}, hl = hit.highlight || {}, docId = src.id || hit._id || `doc_${idx}`;
388
+ html += `<div class="campo-lista"><h3 style="display:flex;justify-content:space-between;"><span>📄 ${idx+1}: ${src.titulo || src.processo_codigo_completo || docId}</span><span class="badge">ID: ${docId}</span></h3>`;
389
+ html += `<div style="margin-bottom:10px;"><button class="secondary" onclick="fetchFullDocument('${docId}')">📄 Obter texto completo</button></div>`;
390
+ html += `<div style="display:flex;gap:5px;margin:15px 0;flex-wrap:wrap;">`;
391
+ ['geral','ementa','acordao','legislacao','highlight','completo'].forEach(t => html += `<button class="badge" style="cursor:pointer;" onclick="showDocTab('${t}-${idx}')">${t}</button>`);
392
+ html += `</div>`;
393
+ html += `<div id="doc-tab-geral-${idx}" class="doc-tab" style="display:block;">`;
394
+ html += renderCampo('Processo', src.processo_codigo_completo);
395
+ html += renderCampo('Classe', src.processo_classe_processual_unificada_extenso);
396
+ html += renderCampo('Órgão Julgador', src.orgao_julgador);
397
+ html += renderCampo('Relator', src.relator_processo_nome);
398
+ html += renderCampo('Relator Acórdão', src.relator_acordao_nome);
399
+ html += renderCampo('Ministros', src.ministro_facet);
400
+ html += renderCampo('Data Julgamento', src.julgamento_data);
401
+ html += renderCampo('Data Publicação', src.publicacao_data);
402
+ html += renderCampo('Procedência', src.procedencia_geografica_completo);
403
+ html += `</div>`;
404
+ html += `<div id="doc-tab-ementa-${idx}" class="doc-tab" style="display:none;">`;
405
+ html += renderCampo('Ementa', src.ementa_texto, 'texto');
406
+ html += renderCampo('Tese', src.documental_tese_texto, 'texto');
407
+ html += renderCampo('Tema', src.documental_tese_tema_texto);
408
+ html += `</div>`;
409
+ html += `<div id="doc-tab-acordao-${idx}" class="doc-tab" style="display:none;">`;
410
+ html += renderCampo('Acórdão/Ata', src.acordao_ata, 'texto');
411
+ html += renderCampo('Decisão', src.decisao_texto, 'texto');
412
+ html += renderCampo('Súmula', src.sumula_texto, 'texto');
413
+ html += renderCampo('Indexação', src.documental_indexacao_texto, 'texto');
414
+ html += renderCampo('Observações', src.documental_observacao_texto, 'texto');
415
+ html += `</div>`;
416
+ html += `<div id="doc-tab-legislacao-${idx}" class="doc-tab" style="display:none;">`;
417
+ html += renderCampo('Legislação Citada', src.documental_legislacao_citada_texto, 'texto');
418
+ html += renderCampo('Jurisprudência Citada', src.documental_jurisprudencia_citada_texto, 'texto');
419
+ html += renderCampo('Doutrina', src.documental_doutrina_texto, 'texto');
420
+ html += renderCampo('Precedentes', src.processo_precedente_texto, 'texto');
421
+ html += `</div>`;
422
+ html += `<div id="doc-tab-highlight-${idx}" class="doc-tab" style="display:none;">`;
423
+ if (Object.keys(hl).length) {
424
+ for (const [campo, valores] of Object.entries(hl)) {
425
+ html += `<div class="campo-card"><div class="campo-nome">${campo}</div>`;
426
+ valores.forEach(valor => html += `<div class="highlight-box">${valor.replace(/<em>/g,'<span class="highlight">').replace(/<\\/em>/g,'</span>')}</div>`);
427
+ html += `</div>`;
428
+ }
429
+ } else html += `<div class="campo-vazio">Nenhum termo destacado</div>`;
430
+ html += `</div>`;
431
+ html += `<div id="doc-tab-completo-${idx}" class="doc-tab" style="display:none;">`;
432
+ html += `<pre>${JSON.stringify(src, null, 2).replace(/<em>/g,'<span class="highlight">').replace(/<\\/em>/g,'</span>')}</pre>`;
433
+ html += `</div>`;
434
+ if (src.inteiro_teor_url) html += `<div style="margin-top:15px;padding:10px;background:#e8f0fe;border-radius:8px;"><strong>🔗 Links:</strong><br><a href="${src.inteiro_teor_url}" target="_blank" class="url-link">📄 Inteiro Teor</a></div>`;
435
+ html += `</div>`;
436
+ });
437
+ document.getElementById('result').innerHTML = html;
438
+ }
439
+ function showDocTab(tabId) {
440
+ const idx = tabId.split('-')[2];
441
+ document.querySelectorAll(`[id^="doc-tab-"]`).forEach(el => { if (el.id.includes(`-${idx}`)) el.style.display = 'none'; });
442
+ const sel = document.getElementById(`doc-tab-${tabId}`);
443
+ if (sel) sel.style.display = 'block';
444
+ }
445
+ async function runSearch() {
446
+ const btn = document.getElementById('testBtn'), loading = document.getElementById('loading'), resultDiv = document.getElementById('result');
447
+ btn.disabled = true; loading.style.display = 'inline-block'; resultDiv.innerHTML = '<div class="info-box">⏳ Executando busca...</div>';
448
+ try {
449
+ const resp = await fetch('/api/search-advanced', {
450
+ method:'POST', headers:{'Content-Type':'application/json'},
451
+ body:JSON.stringify({ pageSize: parseInt(document.getElementById('pageSize').value), sortOrder: document.getElementById('sortOrder').value, base: document.getElementById('base').value || undefined })
452
+ });
453
+ const data = await resp.json();
454
+ requestsCount++; document.getElementById('requestsCount').textContent = requestsCount;
455
+ if (data.success) {
456
+ successCount++; document.getElementById('successCount').textContent = successCount;
457
+ if (data.data?.result?.hits) {
458
+ docsCount = data.data.result.hits.hits.length;
459
+ document.getElementById('docsCount').textContent = docsCount;
460
+ if (data.data.result.hits.total?.value) document.getElementById('totalDocs').textContent = data.data.result.hits.total.value.toLocaleString();
461
+ }
462
+ lastResult = data.data; lastToken = data.token;
463
+ if (lastToken) { document.getElementById('tokenValue').textContent = lastToken; document.getElementById('tokenDisplay').style.display = 'block'; document.getElementById('downloadBtn').disabled = false; }
464
+ displayResult(data.data);
465
+ } else {
466
+ failCount++; document.getElementById('failCount').textContent = failCount;
467
+ resultDiv.innerHTML = '<div class="error">❌ Falha</div><pre>'+JSON.stringify(data,null,2)+'</pre>';
468
+ }
469
+ } catch(e) {
470
+ failCount++; document.getElementById('failCount').textContent = failCount;
471
+ resultDiv.innerHTML = '<div class="error">❌ Erro: '+e.message+'</div>';
472
+ } finally {
473
+ btn.disabled = false; loading.style.display = 'none';
474
+ }
475
+ }
476
+ async function getDocumentById() {
477
+ const docId = document.getElementById('docId').value.trim();
478
+ if (!docId) return alert('Digite um ID');
479
+ const btn = document.getElementById('getByIdBtn'), loading = document.getElementById('loading'), resultDiv = document.getElementById('result');
480
+ btn.disabled = true; loading.style.display = 'inline-block'; resultDiv.innerHTML = '<div class="info-box">⏳ Buscando...</div>';
481
+ try {
482
+ const resp = await fetch(`/api/document/${docId}`);
483
+ const data = await resp.json();
484
+ requestsCount++; document.getElementById('requestsCount').textContent = requestsCount;
485
+ if (data.success && data.document) {
486
+ successCount++; document.getElementById('successCount').textContent = successCount; docsCount++; document.getElementById('docsCount').textContent = docsCount;
487
+ lastResult = { result: { hits: { hits: [data.document] } } };
488
+ displayResult(lastResult);
489
+ } else {
490
+ failCount++; document.getElementById('failCount').textContent = failCount;
491
+ resultDiv.innerHTML = '<div class="error">❌ Documento não encontrado</div>';
492
+ }
493
+ } catch(e) {
494
+ failCount++; document.getElementById('failCount').textContent = failCount;
495
+ resultDiv.innerHTML = '<div class="error">❌ Erro: '+e.message+'</div>';
496
+ } finally {
497
+ btn.disabled = false; loading.style.display = 'none';
498
+ }
499
+ }
500
+ async function fetchFullDocument(docId) {
501
+ try {
502
+ const resp = await fetch(`/api/document-raw/${docId}`);
503
+ const data = await resp.json();
504
+ if (data.success) {
505
+ alert('Documento completo obtido! Verifique o console.');
506
+ console.log('Documento completo:', data.document);
507
+ } else alert('Erro: ' + (data.error || 'desconhecido'));
508
+ } catch(e) { alert('Erro: ' + e.message); }
509
+ }
510
+ function downloadJSON() {
511
+ if (!lastResult) return alert('Nenhum resultado');
512
+ const dataStr = JSON.stringify(lastResult, null, 2);
513
+ const blob = new Blob([dataStr], {type:'application/json'});
514
+ const url = URL.createObjectURL(blob);
515
+ const a = document.createElement('a');
516
+ a.href = url; a.download = `stf_${new Date().toISOString().slice(0,10)}.json`;
517
+ document.body.appendChild(a); a.click(); document.body.removeChild(a); URL.revokeObjectURL(url);
518
+ }
519
+ function copyToken() { if (lastToken) navigator.clipboard.writeText(lastToken).then(()=>alert('✅ Token copiado!')); else alert('Nenhum token'); }
520
+ window.onload = async () => {
521
+ try {
522
+ const resp = await fetch('/api/health');
523
+ const data = await resp.json();
524
+ if (data.total_docs) document.getElementById('totalDocs').textContent = data.total_docs.toLocaleString();
525
+ document.getElementById('totalCampos').textContent = "102";
526
+ } catch(e) { document.getElementById('totalDocs').textContent = 'indisponível'; }
527
+ };
528
+ </script>
529
+ </body>
530
+ </html>
531
+ """
532
+
533
+ # ============================================
534
+ # Funções auxiliares
535
+ # ============================================
536
+ def get_fresh_token():
537
  global token_cache
538
+ if token_cache["token"] and time.time() < token_cache["expires_at"]:
 
539
  logger.info("Usando token em cache")
540
  return token_cache["token"]
541
+
542
  logger.info("Obtendo novo token via Playwright")
543
  try:
544
  with sync_playwright() as p:
545
+ browser = p.chromium.launch(headless=True, args=['--no-sandbox'])
 
 
546
  context = browser.new_context(
547
+ viewport={'width': 1920, 'height': 1080},
548
+ user_agent='Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'
549
  )
550
  page = context.new_page()
551
+ page.goto("https://jurisprudencia.stf.jus.br/pages/search", wait_until='domcontentloaded', timeout=30000)
 
 
 
 
552
  page.wait_for_timeout(3000)
553
  cookies = context.cookies()
554
+ token = None
555
+ for cookie in cookies:
556
+ if cookie.get('name') == 'aws-waf-token':
557
+ token = cookie.get('value')
558
+ break
559
  browser.close()
560
+ if token:
561
+ token_cache["token"] = token
562
+ token_cache["expires_at"] = time.time() + 3300
563
+ logger.info(f"Token obtido: {token[:30]}...")
564
+ return token
565
+ else:
566
+ logger.warning("Token não encontrado nos cookies")
567
+ return None
 
 
 
 
568
  except Exception as e:
569
+ logger.error(f"Erro ao obter token: {str(e)}")
570
  return None
571
 
572
+ def try_search_with_payloads(token, base_payload, sort_order='desc', base_filter=None):
 
 
 
 
 
 
 
573
  """
574
+ Tenta vários payloads em ordem decrescente de complexidade até obter sucesso.
575
+ Retorna (success, data, payload_usado)
 
 
576
  """
577
+ # Payload 1: completo com aggs e highlight
578
+ payload1 = base_payload.copy()
579
+ payload1["sort"] = [{"julgamento_data": {"order": sort_order}}]
580
+ if base_filter:
581
+ payload1["post_filter"] = {"bool": {"must": [{"term": {"base": base_filter}}]}}
582
+
583
+ logger.info("Tentando payload completo...")
584
+ res = search_with_token(token, payload1)
585
+ if res.get("success"):
586
+ return True, res["data"], "completo"
587
+
588
+ # Se falhou com 400, tenta sem highlight e sem aggs
589
+ if res.get("status") == 400:
590
+ logger.info("Payload completo falhou (400). Tentando payload sem highlight...")
591
+ payload2 = PAYLOAD_SEM_HIGHLIGHT.copy()
592
+ payload2["sort"] = [{"julgamento_data": {"order": sort_order}}]
593
+ if base_filter:
594
+ payload2["post_filter"] = {"bool": {"must": [{"term": {"base": base_filter}}]}}
595
+ res2 = search_with_token(token, payload2)
596
+ if res2.get("success"):
597
+ return True, res2["data"], "sem_highlight"
598
+
599
+ if res2.get("status") == 400:
600
+ logger.info("Payload sem highlight falhou. Tentando payload mínimo...")
601
+ payload3 = PAYLOAD_MINIMO.copy()
602
+ payload3["sort"] = [{"julgamento_data": {"order": sort_order}}]
603
+ if base_filter:
604
+ payload3["post_filter"] = {"bool": {"must": [{"term": {"base": base_filter}}]}}
605
+ res3 = search_with_token(token, payload3)
606
+ if res3.get("success"):
607
+ return True, res3["data"], "minimo"
608
+
609
+ return False, None, None
610
 
611
+ def search_with_token(token, payload):
612
+ if not token:
613
+ return {"success": False, "error": "Token não fornecido"}
614
+ headers = HEADERS.copy()
615
+ headers['Cookie'] = f'aws-waf-token={token}'
616
  try:
617
+ logger.debug(f"Enviando payload: {json.dumps(payload)[:200]}...")
618
+ response = requests.post(URL_API, headers=headers, json=payload, verify=False, timeout=30)
619
+ logger.info(f"Resposta: status {response.status_code}")
 
 
 
 
 
 
620
  if response.status_code == 200:
621
  return {"success": True, "data": response.json()}
622
+ elif response.status_code == 403:
 
623
  token_cache["token"] = None
624
+ return {"success": False, "error": "Token expirado", "status": 403}
625
+ else:
626
+ return {"success": False, "error": f"HTTP {response.status_code}", "status": response.status_code, "text": response.text[:500]}
 
 
 
 
 
 
 
 
 
 
 
 
 
627
  except Exception as e:
628
+ logger.error(f"Erro na requisição: {str(e)}")
629
  return {"success": False, "error": str(e)}
630
 
631
+ def get_document_by_id(token, doc_id):
 
 
 
 
 
 
 
 
 
632
  """
633
+ Busca um documento por ID, tentando diferentes payloads (completo, simples, etc.)
 
634
  """
635
+ # Tentar payload completo com highlight
636
+ payload_completo = PAYLOAD_POR_ID_COMPLETO.copy()
637
+ payload_completo["query"]["ids"]["values"] = [doc_id]
638
+ res = search_with_token(token, payload_completo)
639
+ if res.get("success") and res["data"].get("hits", {}).get("hits"):
640
+ return res
641
+
642
+ # Se falhou, tentar payload sem highlight
643
+ logger.info("Payload completo por ID falhou. Tentando payload simples...")
644
+ payload_simples = PAYLOAD_POR_ID_SIMPLES.copy()
645
+ payload_simples["query"]["ids"]["values"] = [doc_id]
646
+ res2 = search_with_token(token, payload_simples)
647
+ if res2.get("success") and res2["data"].get("hits", {}).get("hits"):
648
+ return res2
649
+
650
+ # Último recurso: buscar por termo no campo id (alguns documentos podem ter id em _id)
651
+ logger.info("Payloads por ID falharam. Tentando busca por termo...")
652
+ payload_termo = {
653
+ "query": {
654
+ "bool": {
655
+ "should": [
656
+ {"term": {"_id": doc_id}},
657
+ {"term": {"id": doc_id}},
658
+ {"term": {"dg_unique": doc_id}}
659
+ ]
660
  }
661
+ },
662
+ "_source": PAYLOAD_COMPLETO["_source"],
663
+ "size": 1
664
+ }
665
+ res3 = search_with_token(token, payload_termo)
666
+ return res3
 
 
 
 
 
 
 
 
 
 
 
 
667
 
668
+ def get_document_raw(token, doc_id):
669
+ """
670
+ Tenta obter o documento completo via endpoint /api/search/get/{id}
671
+ """
672
+ if not token:
673
+ return {"success": False, "error": "Token não fornecido"}
674
+ headers = HEADERS.copy()
675
+ headers['Cookie'] = f'aws-waf-token={token}'
676
+ # Remove Content-Type para GET
677
+ headers.pop("Content-Type", None)
678
+ url = f"{URL_API_GET}/{doc_id}"
679
+ try:
680
+ response = requests.get(url, headers=headers, verify=False, timeout=30)
681
+ logger.info(f"GET documento raw: status {response.status_code}")
682
+ if response.status_code == 200:
683
+ return {"success": True, "data": response.json()}
684
+ elif response.status_code == 403:
685
+ token_cache["token"] = None
686
+ return {"success": False, "error": "Token expirado", "status": 403}
687
+ else:
688
+ return {"success": False, "error": f"HTTP {response.status_code}", "status": response.status_code, "text": response.text[:500]}
689
+ except Exception as e:
690
+ logger.error(f"Erro na requisição GET: {str(e)}")
691
+ return {"success": False, "error": str(e)}
692
 
693
+ def test_with_playwright_full(payload):
694
+ logger.info("Tentando acesso com Playwright...")
 
695
  try:
696
  with sync_playwright() as p:
697
+ browser = p.chromium.launch(headless=True, args=['--no-sandbox'])
 
 
698
  context = browser.new_context(
699
+ viewport={'width': 1920, 'height': 1080},
700
+ user_agent='Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'
701
  )
702
  page = context.new_page()
703
+ page.goto("https://jurisprudencia.stf.jus.br/pages/search", wait_until='domcontentloaded', timeout=30000)
 
 
 
 
704
  page.wait_for_timeout(3000)
 
705
  cookies = context.cookies()
706
+ token = None
707
+ for cookie in cookies:
708
+ if cookie.get('name') == 'aws-waf-token':
709
+ token = cookie.get('value')
710
+ break
711
+ api_result = page.evaluate("""
712
+ async (payload) => {
713
  try {
714
+ const response = await fetch('https://jurisprudencia.stf.jus.br/api/search/search', {
715
+ method: 'POST',
716
+ headers: {'Content-Type': 'application/json', 'Accept': 'application/json'},
717
+ body: JSON.stringify(payload)
718
+ });
719
+ if (response.ok) {
720
+ return { success: true, data: await response.json() };
721
+ } else {
722
+ return { success: false, status: response.status };
723
+ }
724
+ } catch (error) {
725
+ return { success: false, error: error.toString() };
 
 
 
 
726
  }
727
+ }
728
+ """, payload)
 
729
  browser.close()
730
+ if api_result.get('success'):
731
+ if token:
732
+ token_cache["token"] = token
733
+ token_cache["expires_at"] = time.time() + 3300
734
+ return {"success": True, "data": api_result.get('data'), "token": token}
735
+ else:
736
+ return {"success": False, "error": api_result.get('error', 'Falha desconhecida'), "status": api_result.get('status'), "token": token}
 
 
 
 
 
 
737
  except Exception as e:
738
+ logger.error(f"Erro no Playwright: {str(e)}")
739
  return {"success": False, "error": str(e)}
740
 
 
741
  # ============================================
742
+ # Rotas
743
  # ============================================
744
+ @app.route('/')
745
+ def index():
746
+ try:
747
+ return render_template_string(HTML_TEMPLATE)
748
+ except Exception as e:
749
+ return f"Erro no template: {str(e)}<br><pre>{traceback.format_exc()}</pre>", 500
750
+
751
+ @app.route('/api/search-advanced', methods=['POST'])
752
+ def search_advanced():
753
+ data = request.json or {}
754
+ page_size = data.get('pageSize', 100)
755
+ sort_order = data.get('sortOrder', 'desc')
756
+ base = data.get('base')
757
+
758
+ token = get_fresh_token()
759
+ if not token:
760
+ # Fallback para Playwright
761
+ payload = PAYLOAD_MINIMO.copy()
762
+ payload["size"] = min(page_size, 250)
763
+ payload["sort"] = [{"julgamento_data": {"order": sort_order}}]
764
+ if base:
765
+ payload["post_filter"] = {"bool": {"must": [{"term": {"base": base}}]}}
766
+ res = test_with_playwright_full(payload)
767
+ if res.get('success'):
768
+ return jsonify({"success": True, "token": res.get('token'), "data": res['data'], "timestamp": time.time()})
769
+ else:
770
+ return jsonify({"success": False, "error": res.get('error', 'Falha')}), 500
771
+
772
+ # Tentar com múltiplos payloads
773
+ success, data, payload_type = try_search_with_payloads(token, PAYLOAD_COMPLETO, sort_order, base)
774
+ if success:
775
+ logger.info(f"Busca bem-sucedida com payload: {payload_type}")
776
+ return jsonify({"success": True, "token": token, "data": data, "timestamp": time.time()})
777
+ else:
778
+ # Último recurso: Playwright
779
+ payload = PAYLOAD_MINIMO.copy()
780
+ payload["size"] = min(page_size, 250)
781
+ payload["sort"] = [{"julgamento_data": {"order": sort_order}}]
782
+ if base:
783
+ payload["post_filter"] = {"bool": {"must": [{"term": {"base": base}}]}}
784
+ res = test_with_playwright_full(payload)
785
+ if res.get('success'):
786
+ return jsonify({"success": True, "token": res.get('token'), "data": res['data'], "timestamp": time.time()})
787
+ else:
788
+ return jsonify({"success": False, "error": "Todas as tentativas falharam"}), 500
789
 
790
+ @app.route('/api/document/<doc_id>', methods=['GET'])
791
+ def get_document(doc_id):
792
+ token = get_fresh_token()
793
+ if not token:
794
+ return jsonify({"error": "Não foi possível obter token"}), 500
795
+
796
+ result = get_document_by_id(token, doc_id)
797
+
798
+ if result.get("success") and result["data"].get("hits", {}).get("hits"):
799
+ doc = result["data"]["hits"]["hits"][0] if result["data"]["hits"]["hits"] else None
800
+ return jsonify({"success": True, "document": doc})
801
+ else:
802
+ return jsonify({"success": False, "error": result.get("error", "Documento não encontrado")}), 404
803
+
804
+ @app.route('/api/document-raw/<doc_id>', methods=['GET'])
805
+ def get_document_raw_endpoint(doc_id):
806
+ """
807
+ Endpoint para obter o documento completo via GET (pode incluir inteiro teor em texto)
808
+ """
809
+ token = get_fresh_token()
810
+ if not token:
811
+ return jsonify({"error": "Não foi possível obter token"}), 500
812
+
813
+ result = get_document_raw(token, doc_id)
814
+
815
+ if result.get("success"):
816
+ return jsonify({"success": True, "document": result["data"]})
817
+ else:
818
+ # Se token expirou, limpar cache e tentar novamente uma vez
819
+ if result.get("status") == 403:
820
+ token_cache["token"] = None
821
+ token = get_fresh_token()
822
+ if token:
823
+ result = get_document_raw(token, doc_id)
824
+ if result.get("success"):
825
+ return jsonify({"success": True, "document": result["data"]})
826
+ return jsonify({"success": False, "error": result.get("error", "Falha")}), 500
827
 
828
+ @app.route('/api/test-bypass', methods=['POST'])
829
+ def test_bypass():
830
+ token = get_fresh_token()
831
+ if token:
832
+ # Tenta payload completo
833
+ success, data, _ = try_search_with_payloads(token, PAYLOAD_COMPLETO, 'desc', None)
834
+ if success:
835
+ return jsonify({"success": True, "method": "token", "token": token, "data": data})
836
+ # Fallback Playwright com payload mínimo
837
+ res = test_with_playwright_full(PAYLOAD_MINIMO)
838
+ if res.get('success'):
839
+ return jsonify({"success": True, "method": "playwright", "token": res.get('token'), "data": res['data']})
840
+ return jsonify({"success": False, "error": res.get('error', 'Falha')}), 500
841
+
842
+ @app.route('/api/health', methods=['GET'])
843
+ def health():
844
+ playwright_status = False
845
+ try:
846
+ with sync_playwright() as p:
847
+ p.chromium.launch(headless=True).close()
848
+ playwright_status = True
849
+ except:
850
+ pass
851
+ total_docs = None
852
+ token = get_fresh_token()
853
+ if token:
854
+ try:
855
+ res = search_with_token(token, {"size": 0, "track_total_hits": True})
856
+ if res.get("success") and res["data"].get("hits", {}).get("total"):
857
+ total_docs = res["data"]["hits"]["total"]["value"]
858
+ except:
859
+ pass
860
+ return jsonify({
861
+ "status": "healthy",
862
+ "timestamp": time.time(),
863
+ "playwright_ready": playwright_status,
864
+ "token_cached": bool(token_cache["token"]),
865
+ "token_expires_in": max(0, token_cache["expires_at"] - time.time()) if token_cache["expires_at"] else 0,
866
+ "total_docs": total_docs,
867
+ "python_version": sys.version
868
+ })
869
+
870
+ @app.route('/api/token', methods=['GET'])
871
+ def get_token():
872
+ token = get_fresh_token()
873
+ if token:
874
+ return jsonify({"success": True, "token": token, "expires_in": max(0, token_cache["expires_at"] - time.time())})
875
+ else:
876
+ return jsonify({"success": False, "error": "Não foi possível obter token"}), 500
877
 
878
+ @app.route('/api/clear-cache', methods=['POST'])
879
+ def clear_cache():
880
+ global token_cache
881
+ token_cache = {"token": None, "expires_at": 0}
882
+ return jsonify({"success": True, "message": "Cache limpo"})
883
 
884
  # ============================================
885
+ # NOVO ENDPOINT SIMPLIFICADO /busca
886
  # ============================================
887
+ @app.route('/busca')
888
+ def busca_simplificada():
889
  """
890
+ Endpoint público para busca simplificada de jurisprudência.
891
+ Exemplo: /busca?q=dano%20moral
892
+ Retorna JSON com campos: id, titulo, processo, relator, orgao, data, ementa, url_documento, score.
893
  """
894
+ query = request.args.get('q', '')
895
  if not query:
896
+ return jsonify({"erro": "Parâmetro 'q' é obrigatório"}), 400
 
 
 
 
 
 
 
 
897
 
898
  token = get_fresh_token()
899
  if not token:
900
+ return jsonify({"erro": "Não foi possível obter token de acesso"}), 503
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
901
 
902
+ # Payload leve e focado nos campos desejados
903
+ payload = {
904
+ "query": {
905
+ "query_string": {
906
+ "query": query,
907
+ "default_operator": "AND",
908
+ "fields": ["ementa_texto", "acordao_ata", "decisao_texto", "titulo"]
909
+ }
910
+ },
911
+ "_source": [
912
+ "id", "titulo", "processo_codigo_completo", "relator_processo_nome",
913
+ "orgao_julgador", "julgamento_data", "ementa_texto", "inteiro_teor_url"
914
+ ],
915
+ "size": 20,
916
+ "sort": [{"julgamento_data": {"order": "desc"}}],
917
+ "track_total_hits": True
918
+ }
919
 
920
+ headers = HEADERS.copy()
921
+ headers['Cookie'] = f'aws-waf-token={token}'
922
 
 
 
 
923
  try:
924
+ response = requests.post(URL_API, headers=headers, json=payload, verify=False, timeout=15)
925
+ except Exception as e:
926
+ return jsonify({"erro": f"Falha na comunicação com a API: {str(e)}"}), 502
 
 
927
 
928
+ if response.status_code != 200:
929
+ return jsonify({"erro": f"API retornou status {response.status_code}"}), response.status_code
930
+
931
+ data = response.json()
932
+ hits = data.get('result', {}).get('hits', {}).get('hits', [])
933
+
934
+ resultados = []
935
+ for hit in hits:
936
+ source = hit.get('_source', {})
937
+ # Mapeia exatamente os campos desejados, incluindo score
938
+ item = {
939
+ "id": source.get('id') or hit.get('_id'),
940
+ "titulo": source.get('titulo'),
941
+ "processo": source.get('processo_codigo_completo'),
942
+ "relator": source.get('relator_processo_nome'),
943
+ "orgao": source.get('orgao_julgador'),
944
+ "data": source.get('julgamento_data'),
945
+ "ementa": source.get('ementa_texto'),
946
+ "url_documento": source.get('inteiro_teor_url'),
947
+ "score": hit.get('_score')
948
+ }
949
+ # Remove campos nulos (opcional)
950
+ item = {k: v for k, v in item.items() if v is not None}
951
+ resultados.append(item)
952
 
953
  return jsonify({
954
+ "q": query,
955
+ "total": data.get('result', {}).get('hits', {}).get('total', {}).get('value', 0),
956
+ "resultados": resultados
 
 
 
 
 
957
  })
958
 
959
+ if __name__ == '__main__':
 
 
 
 
960
  try:
961
  import certifi
962
+ os.environ['SSL_CERT_FILE'] = certifi.where()
963
+ os.environ['REQUESTS_CA_BUNDLE'] = certifi.where()
964
+ except:
965
  pass
966
+ logger.info("="*50)
967
+ logger.info("🚀 Iniciando aplicação STF Jurisprudência")
968
+ logger.info(f"📋 Campos _source: {len(PAYLOAD_COMPLETO['_source'])}")
969
+ logger.info("="*50)
 
 
970
  try:
971
  with sync_playwright() as p:
972
+ p.chromium.launch(headless=True).close()
973
+ logger.info("✅ Playwright pronto para uso")
974
  except Exception as e:
975
+ logger.warning(f"⚠️ Playwright pode não estar configurado: {e}")
976
+ port = int(os.environ.get('PORT', 7860))
977
+ app.run(host='0.0.0.0', port=port, debug=False)