Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
|
@@ -240,28 +240,26 @@ def format_context(results: list[dict]) -> str:
|
|
| 240 |
|
| 241 |
SYSTEM = """You are RegTech BR, a specialist AI in Brazilian crypto asset regulation.
|
| 242 |
Analyze the compliance query and produce a structured JSON assessment.
|
| 243 |
-
Respond ONLY with
|
| 244 |
-
|
| 245 |
-
Use EXACTLY these key names (snake_case, no variations):
|
| 246 |
{
|
| 247 |
"risk_level": "LOW | MEDIUM | HIGH | UNCLEAR",
|
| 248 |
"compliance_status": "COMPLIANT | NON-COMPLIANT | REQUIRES_REVIEW | INSUFFICIENT_INFO",
|
| 249 |
-
"applicable_regulations": ["
|
| 250 |
-
"relevant_articles": ["
|
| 251 |
"finding": "2-5 sentence assessment",
|
| 252 |
"corrective_action": "specific steps or 'No action required'",
|
| 253 |
"confidence": "HIGH | MEDIUM | LOW",
|
| 254 |
"authority": "BCB | CVM | COAF | mixed | federal"
|
| 255 |
}
|
| 256 |
-
|
| 257 |
-
|
| 258 |
-
-
|
| 259 |
-
-
|
| 260 |
-
-
|
| 261 |
-
- If
|
| 262 |
-
- If
|
| 263 |
-
- If
|
| 264 |
-
- If tokens with dividends, voting rights, or public fundraising: HIGH risk, CVM securities.
|
| 265 |
- Base the answer strictly on the retrieved regulatory context.
|
| 266 |
"""
|
| 267 |
|
|
@@ -279,36 +277,223 @@ def extract_json_object(raw: str) -> str:
|
|
| 279 |
return raw
|
| 280 |
|
| 281 |
|
| 282 |
-
#
|
| 283 |
-
|
| 284 |
-
|
| 285 |
-
|
| 286 |
-
|
| 287 |
-
|
| 288 |
-
|
| 289 |
-
|
| 290 |
-
]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 291 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 292 |
|
| 293 |
-
|
| 294 |
-
|
| 295 |
-
|
| 296 |
-
|
| 297 |
-
|
| 298 |
-
|
| 299 |
-
|
| 300 |
-
|
| 301 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 302 |
|
| 303 |
-
|
| 304 |
-
|
| 305 |
-
|
| 306 |
-
|
| 307 |
-
|
| 308 |
-
|
| 309 |
-
|
|
|
|
| 310 |
|
| 311 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 312 |
|
| 313 |
|
| 314 |
def call_claude(query: str, context: str) -> dict | None:
|
|
@@ -316,13 +501,17 @@ def call_claude(query: str, context: str) -> dict | None:
|
|
| 316 |
if not api_key:
|
| 317 |
print("Missing ANTHROPIC_API_KEY.", flush=True)
|
| 318 |
return None
|
|
|
|
| 319 |
prompt = (
|
| 320 |
f"COMPLIANCE QUERY:\n{query}\n\n"
|
| 321 |
f"REGULATORY CONTEXT:\n\n{context}\n\n"
|
| 322 |
-
|
| 323 |
-
|
| 324 |
-
|
|
|
|
|
|
|
| 325 |
)
|
|
|
|
| 326 |
try:
|
| 327 |
response = requests.post(
|
| 328 |
"https://api.anthropic.com/v1/messages",
|
|
@@ -339,19 +528,27 @@ def call_claude(query: str, context: str) -> dict | None:
|
|
| 339 |
},
|
| 340 |
timeout=90,
|
| 341 |
)
|
|
|
|
|
|
|
| 342 |
response.raise_for_status()
|
|
|
|
|
|
|
| 343 |
raw = "".join(
|
| 344 |
block.get("text", "")
|
| 345 |
-
for block in
|
| 346 |
if block.get("type") == "text"
|
| 347 |
)
|
| 348 |
-
print(f"CLAUDE JSON KEYS: {list(json.loads(extract_json_object(raw)).keys()) if raw else 'empty'}", flush=True)
|
| 349 |
clean = extract_json_object(raw)
|
| 350 |
-
|
| 351 |
-
|
| 352 |
-
|
| 353 |
-
|
| 354 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 355 |
except Exception as exc:
|
| 356 |
print(f"Claude error: {type(exc).__name__}: {exc}", flush=True)
|
| 357 |
return None
|
|
@@ -376,14 +573,6 @@ STATUS_ICON = {
|
|
| 376 |
}
|
| 377 |
|
| 378 |
|
| 379 |
-
def as_list(value) -> list[str]:
|
| 380 |
-
if value is None:
|
| 381 |
-
return []
|
| 382 |
-
if isinstance(value, list):
|
| 383 |
-
return [str(v) for v in value if v]
|
| 384 |
-
return [str(value)]
|
| 385 |
-
|
| 386 |
-
|
| 387 |
def esc(value) -> str:
|
| 388 |
return html.escape("" if value is None else str(value))
|
| 389 |
|
|
@@ -499,14 +688,39 @@ EXAMPLES = [
|
|
| 499 |
def analyze(query: str) -> tuple[str, str]:
|
| 500 |
if not query or not query.strip():
|
| 501 |
return render_error("Please enter a compliance query."), ""
|
|
|
|
| 502 |
query = query.strip()
|
|
|
|
|
|
|
|
|
|
| 503 |
results = retrieve(query)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 504 |
if not results:
|
| 505 |
return render_error("No relevant regulatory chunks found. Try rephrasing your query."), ""
|
|
|
|
| 506 |
context = format_context(results)
|
| 507 |
report = call_claude(query, context)
|
| 508 |
if not report:
|
| 509 |
return render_error("Could not reach Claude API. Check that ANTHROPIC_API_KEY is set as a Space Secret."), context
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 510 |
return render_report(report, query, results), context
|
| 511 |
|
| 512 |
|
|
|
|
| 240 |
|
| 241 |
SYSTEM = """You are RegTech BR, a specialist AI in Brazilian crypto asset regulation.
|
| 242 |
Analyze the compliance query and produce a structured JSON assessment.
|
| 243 |
+
Respond ONLY with valid JSON — no markdown fences.
|
| 244 |
+
Use EXACTLY these snake_case keys:
|
|
|
|
| 245 |
{
|
| 246 |
"risk_level": "LOW | MEDIUM | HIGH | UNCLEAR",
|
| 247 |
"compliance_status": "COMPLIANT | NON-COMPLIANT | REQUIRES_REVIEW | INSUFFICIENT_INFO",
|
| 248 |
+
"applicable_regulations": ["list of regulation names"],
|
| 249 |
+
"relevant_articles": ["list of specific article references"],
|
| 250 |
"finding": "2-5 sentence assessment",
|
| 251 |
"corrective_action": "specific steps or 'No action required'",
|
| 252 |
"confidence": "HIGH | MEDIUM | LOW",
|
| 253 |
"authority": "BCB | CVM | COAF | mixed | federal"
|
| 254 |
}
|
| 255 |
+
Rules:
|
| 256 |
+
- Always populate applicable_regulations and relevant_articles as non-empty arrays.
|
| 257 |
+
- Use only regulation/article references present in the retrieved context.
|
| 258 |
+
- If an exact article is unclear, cite the closest source/article_hint from the retrieved context instead of leaving the array empty.
|
| 259 |
+
- If the query describes operating without required authorization, flag high risk.
|
| 260 |
+
- If the query describes weak KYC or anonymous transactions, flag high risk.
|
| 261 |
+
- If the query describes no segregation of client assets, flag high risk.
|
| 262 |
+
- If the query describes tokens with dividends, voting rights, or public fundraising, flag CVM securities risk.
|
|
|
|
| 263 |
- Base the answer strictly on the retrieved regulatory context.
|
| 264 |
"""
|
| 265 |
|
|
|
|
| 277 |
return raw
|
| 278 |
|
| 279 |
|
| 280 |
+
# ============================================================
|
| 281 |
+
# Claude output normalization and safety fallback
|
| 282 |
+
# ============================================================
|
| 283 |
+
|
| 284 |
+
KEY_ALIASES = {
|
| 285 |
+
"risk_level": [
|
| 286 |
+
"risk_level", "riskLevel", "risk", "level", "nivel_risco", "nível_risco",
|
| 287 |
+
"nivel_de_risco", "nível_de_risco",
|
| 288 |
+
],
|
| 289 |
+
"compliance_status": [
|
| 290 |
+
"compliance_status", "complianceStatus", "status", "compliance",
|
| 291 |
+
"status_conformidade", "conformidade",
|
| 292 |
+
],
|
| 293 |
+
"applicable_regulations": [
|
| 294 |
+
"applicable_regulations", "applicableRegulations", "applicable regulation",
|
| 295 |
+
"applicable regulations", "regulations", "regulation", "laws", "legal_basis",
|
| 296 |
+
"legalBasis", "normas_aplicaveis", "normas_aplicáveis", "regulacoes_aplicaveis",
|
| 297 |
+
"regulações_aplicáveis", "regulamentacoes", "regulamentações",
|
| 298 |
+
],
|
| 299 |
+
"relevant_articles": [
|
| 300 |
+
"relevant_articles", "relevantArticles", "relevant articles", "articles",
|
| 301 |
+
"article_references", "legal_references", "citations", "references",
|
| 302 |
+
"artigos_relevantes", "artigos", "dispositivos", "dispositivos_relevantes",
|
| 303 |
+
],
|
| 304 |
+
"finding": [
|
| 305 |
+
"finding", "findings", "assessment", "analysis", "analise", "análise",
|
| 306 |
+
"conclusao", "conclusão", "avaliacao", "avaliação",
|
| 307 |
+
],
|
| 308 |
+
"corrective_action": [
|
| 309 |
+
"corrective_action", "correctiveAction", "action", "recommended_action",
|
| 310 |
+
"recommendation", "recomendacao", "recomendação", "acao_corretiva", "ação_corretiva",
|
| 311 |
+
],
|
| 312 |
+
"confidence": [
|
| 313 |
+
"confidence", "confidence_level", "confidenceLevel", "confianca", "confiança",
|
| 314 |
+
],
|
| 315 |
+
"authority": [
|
| 316 |
+
"authority", "authority_type", "regulator", "agency", "orgao", "órgão",
|
| 317 |
+
"autoridade", "autoridade_competente",
|
| 318 |
+
],
|
| 319 |
+
}
|
| 320 |
+
|
| 321 |
+
|
| 322 |
+
def _norm_key(key: str) -> str:
|
| 323 |
+
key = unicodedata.normalize("NFD", str(key or ""))
|
| 324 |
+
key = "".join(c for c in key if unicodedata.category(c) != "Mn")
|
| 325 |
+
key = re.sub(r"[^a-zA-Z0-9]+", "_", key).strip("_").lower()
|
| 326 |
+
return key
|
| 327 |
+
|
| 328 |
+
|
| 329 |
+
def _lookup_alias(data: dict, canonical_key: str):
|
| 330 |
+
if not isinstance(data, dict):
|
| 331 |
+
return None
|
| 332 |
|
| 333 |
+
direct_aliases = KEY_ALIASES.get(canonical_key, [])
|
| 334 |
+
for alias in direct_aliases:
|
| 335 |
+
if alias in data:
|
| 336 |
+
return data.get(alias)
|
| 337 |
|
| 338 |
+
norm_to_original = {_norm_key(k): k for k in data.keys()}
|
| 339 |
+
for alias in direct_aliases:
|
| 340 |
+
norm_alias = _norm_key(alias)
|
| 341 |
+
if norm_alias in norm_to_original:
|
| 342 |
+
return data.get(norm_to_original[norm_alias])
|
| 343 |
+
|
| 344 |
+
return None
|
| 345 |
+
|
| 346 |
+
|
| 347 |
+
def as_list(value) -> list[str]:
|
| 348 |
+
"""Coerce Claude output into a clean list of strings.
|
| 349 |
+
|
| 350 |
+
Handles arrays, strings, numbers, and arrays of objects such as:
|
| 351 |
+
[{"name": "Lei 14.478/2022"}, {"article": "Art. 7º"}]
|
| 352 |
+
"""
|
| 353 |
+
if value is None:
|
| 354 |
+
return []
|
| 355 |
+
if isinstance(value, list):
|
| 356 |
+
out = []
|
| 357 |
+
for item in value:
|
| 358 |
+
out.extend(as_list(item))
|
| 359 |
+
return list(dict.fromkeys([str(v).strip() for v in out if str(v).strip()]))
|
| 360 |
+
if isinstance(value, dict):
|
| 361 |
+
preferred = [
|
| 362 |
+
"name", "title", "reference", "article", "regulation", "law",
|
| 363 |
+
"text", "label", "value", "source", "source_label",
|
| 364 |
+
]
|
| 365 |
+
for key in preferred:
|
| 366 |
+
if key in value and value[key]:
|
| 367 |
+
return as_list(value[key])
|
| 368 |
+
return [
|
| 369 |
+
"; ".join(f"{k}: {v}" for k, v in value.items() if v)
|
| 370 |
+
]
|
| 371 |
+
text_value = str(value).strip()
|
| 372 |
+
if not text_value:
|
| 373 |
+
return []
|
| 374 |
+
return [text_value]
|
| 375 |
+
|
| 376 |
+
|
| 377 |
+
def infer_regulations_from_results(results: list[dict], max_items: int = 4) -> list[str]:
|
| 378 |
+
regs = []
|
| 379 |
+
for r in results or []:
|
| 380 |
+
label = str(r.get("source_label") or "").strip()
|
| 381 |
+
norm_ref = str(r.get("normative_reference_hint") or "").strip()
|
| 382 |
+
source_id = str(r.get("source_id") or "").strip()
|
| 383 |
+
|
| 384 |
+
if label:
|
| 385 |
+
item = label
|
| 386 |
+
if norm_ref and norm_ref not in item:
|
| 387 |
+
item = f"{item} — {norm_ref}"
|
| 388 |
+
elif norm_ref:
|
| 389 |
+
item = norm_ref
|
| 390 |
+
else:
|
| 391 |
+
item = source_id
|
| 392 |
+
|
| 393 |
+
if item:
|
| 394 |
+
regs.append(item)
|
| 395 |
+
|
| 396 |
+
return list(dict.fromkeys(regs))[:max_items]
|
| 397 |
+
|
| 398 |
+
|
| 399 |
+
def infer_articles_from_results(results: list[dict], max_items: int = 6) -> list[str]:
|
| 400 |
+
articles = []
|
| 401 |
+
for r in results or []:
|
| 402 |
+
article = str(r.get("article_hint") or "").strip()
|
| 403 |
+
norm_ref = str(r.get("normative_reference_hint") or "").strip()
|
| 404 |
+
label = str(r.get("source_label") or "").strip()
|
| 405 |
+
source_id = str(r.get("source_id") or "").strip()
|
| 406 |
+
|
| 407 |
+
if article and norm_ref:
|
| 408 |
+
item = f"{norm_ref} — {article}"
|
| 409 |
+
elif article and label:
|
| 410 |
+
item = f"{label} — {article}"
|
| 411 |
+
elif article:
|
| 412 |
+
item = article
|
| 413 |
+
elif norm_ref:
|
| 414 |
+
item = norm_ref
|
| 415 |
+
elif source_id:
|
| 416 |
+
item = source_id
|
| 417 |
+
else:
|
| 418 |
+
item = ""
|
| 419 |
+
|
| 420 |
+
if item:
|
| 421 |
+
articles.append(item)
|
| 422 |
+
|
| 423 |
+
return list(dict.fromkeys(articles))[:max_items]
|
| 424 |
+
|
| 425 |
+
|
| 426 |
+
def canonicalize_report(report: dict, results: list[dict]) -> dict:
|
| 427 |
+
"""Normalize Claude response keys and guarantee non-empty legal-reference arrays."""
|
| 428 |
+
if not isinstance(report, dict):
|
| 429 |
+
report = {}
|
| 430 |
+
|
| 431 |
+
canonical = dict(report)
|
| 432 |
+
|
| 433 |
+
for key in KEY_ALIASES:
|
| 434 |
+
value = _lookup_alias(report, key)
|
| 435 |
+
if value is not None:
|
| 436 |
+
canonical[key] = value
|
| 437 |
+
|
| 438 |
+
canonical["risk_level"] = str(canonical.get("risk_level", "UNCLEAR")).upper().replace("-", "_")
|
| 439 |
+
canonical["compliance_status"] = (
|
| 440 |
+
str(canonical.get("compliance_status", "INSUFFICIENT_INFO"))
|
| 441 |
+
.upper()
|
| 442 |
+
.replace("_", "-")
|
| 443 |
+
)
|
| 444 |
+
canonical["confidence"] = str(canonical.get("confidence", "LOW")).upper()
|
| 445 |
+
|
| 446 |
+
regs = as_list(canonical.get("applicable_regulations"))
|
| 447 |
+
if not regs:
|
| 448 |
+
regs = infer_regulations_from_results(results)
|
| 449 |
+
print(
|
| 450 |
+
"[WARN] applicable_regulations empty or missing in Claude response; "
|
| 451 |
+
f"filled from retrieved sources: {regs}",
|
| 452 |
+
flush=True,
|
| 453 |
+
)
|
| 454 |
|
| 455 |
+
articles = as_list(canonical.get("relevant_articles"))
|
| 456 |
+
if not articles:
|
| 457 |
+
articles = infer_articles_from_results(results)
|
| 458 |
+
print(
|
| 459 |
+
"[WARN] relevant_articles empty or missing in Claude response; "
|
| 460 |
+
f"filled from retrieved sources: {articles}",
|
| 461 |
+
flush=True,
|
| 462 |
+
)
|
| 463 |
|
| 464 |
+
canonical["applicable_regulations"] = regs
|
| 465 |
+
canonical["relevant_articles"] = articles
|
| 466 |
+
|
| 467 |
+
if not canonical.get("finding"):
|
| 468 |
+
canonical["finding"] = "Assessment generated from the retrieved regulatory context."
|
| 469 |
+
if not canonical.get("corrective_action"):
|
| 470 |
+
canonical["corrective_action"] = "Review the cited regulatory sources and update the compliance procedure accordingly."
|
| 471 |
+
if not canonical.get("authority"):
|
| 472 |
+
authorities = [str(r.get("authority")) for r in results or [] if r.get("authority")]
|
| 473 |
+
canonical["authority"] = "mixed" if len(set(authorities)) > 1 else (authorities[0] if authorities else "?")
|
| 474 |
+
|
| 475 |
+
return canonical
|
| 476 |
+
|
| 477 |
+
|
| 478 |
+
def debug_print_claude(raw: str, clean: str, parsed: dict | None = None) -> None:
|
| 479 |
+
print("\n" + "=" * 72, flush=True)
|
| 480 |
+
print("CLAUDE RAW RESPONSE START", flush=True)
|
| 481 |
+
print(raw or "<EMPTY RAW RESPONSE>", flush=True)
|
| 482 |
+
print("CLAUDE RAW RESPONSE END", flush=True)
|
| 483 |
+
print("-" * 72, flush=True)
|
| 484 |
+
print("CLAUDE EXTRACTED JSON START", flush=True)
|
| 485 |
+
print(clean or "<EMPTY EXTRACTED JSON>", flush=True)
|
| 486 |
+
print("CLAUDE EXTRACTED JSON END", flush=True)
|
| 487 |
+
if isinstance(parsed, dict):
|
| 488 |
+
print("-" * 72, flush=True)
|
| 489 |
+
print(f"CLAUDE PARSED KEYS: {sorted(parsed.keys())}", flush=True)
|
| 490 |
+
print(
|
| 491 |
+
"CLAUDE LEGAL ARRAYS: "
|
| 492 |
+
f"applicable_regulations={parsed.get('applicable_regulations')!r}; "
|
| 493 |
+
f"relevant_articles={parsed.get('relevant_articles')!r}",
|
| 494 |
+
flush=True,
|
| 495 |
+
)
|
| 496 |
+
print("=" * 72 + "\n", flush=True)
|
| 497 |
|
| 498 |
|
| 499 |
def call_claude(query: str, context: str) -> dict | None:
|
|
|
|
| 501 |
if not api_key:
|
| 502 |
print("Missing ANTHROPIC_API_KEY.", flush=True)
|
| 503 |
return None
|
| 504 |
+
|
| 505 |
prompt = (
|
| 506 |
f"COMPLIANCE QUERY:\n{query}\n\n"
|
| 507 |
f"REGULATORY CONTEXT:\n\n{context}\n\n"
|
| 508 |
+
"Produce a structured compliance assessment. "
|
| 509 |
+
"Return ONLY valid JSON using EXACTLY these keys: "
|
| 510 |
+
"risk_level, compliance_status, applicable_regulations, relevant_articles, "
|
| 511 |
+
"finding, corrective_action, confidence, authority. "
|
| 512 |
+
"The arrays applicable_regulations and relevant_articles must be non-empty."
|
| 513 |
)
|
| 514 |
+
|
| 515 |
try:
|
| 516 |
response = requests.post(
|
| 517 |
"https://api.anthropic.com/v1/messages",
|
|
|
|
| 528 |
},
|
| 529 |
timeout=90,
|
| 530 |
)
|
| 531 |
+
|
| 532 |
+
print(f"Claude HTTP status: {response.status_code}", flush=True)
|
| 533 |
response.raise_for_status()
|
| 534 |
+
|
| 535 |
+
payload = response.json()
|
| 536 |
raw = "".join(
|
| 537 |
block.get("text", "")
|
| 538 |
+
for block in payload.get("content", [])
|
| 539 |
if block.get("type") == "text"
|
| 540 |
)
|
|
|
|
| 541 |
clean = extract_json_object(raw)
|
| 542 |
+
|
| 543 |
+
try:
|
| 544 |
+
parsed = json.loads(clean)
|
| 545 |
+
debug_print_claude(raw, clean, parsed)
|
| 546 |
+
return parsed
|
| 547 |
+
except json.JSONDecodeError as json_exc:
|
| 548 |
+
debug_print_claude(raw, clean, None)
|
| 549 |
+
print(f"Claude JSON parse error: {json_exc}", flush=True)
|
| 550 |
+
return None
|
| 551 |
+
|
| 552 |
except Exception as exc:
|
| 553 |
print(f"Claude error: {type(exc).__name__}: {exc}", flush=True)
|
| 554 |
return None
|
|
|
|
| 573 |
}
|
| 574 |
|
| 575 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 576 |
def esc(value) -> str:
|
| 577 |
return html.escape("" if value is None else str(value))
|
| 578 |
|
|
|
|
| 688 |
def analyze(query: str) -> tuple[str, str]:
|
| 689 |
if not query or not query.strip():
|
| 690 |
return render_error("Please enter a compliance query."), ""
|
| 691 |
+
|
| 692 |
query = query.strip()
|
| 693 |
+
print("\n" + "=" * 72, flush=True)
|
| 694 |
+
print(f"NEW QUERY: {query}", flush=True)
|
| 695 |
+
|
| 696 |
results = retrieve(query)
|
| 697 |
+
print(f"Retrieved chunks: {len(results)}", flush=True)
|
| 698 |
+
for i, r in enumerate(results, 1):
|
| 699 |
+
print(
|
| 700 |
+
f"[RAG {i}] source_id={r.get('source_id')} | "
|
| 701 |
+
f"authority={r.get('authority')} | "
|
| 702 |
+
f"article_hint={r.get('article_hint')} | "
|
| 703 |
+
f"normative_reference_hint={r.get('normative_reference_hint')} | "
|
| 704 |
+
f"final_score={float(r.get('_final', 0.0)):.3f}",
|
| 705 |
+
flush=True,
|
| 706 |
+
)
|
| 707 |
+
|
| 708 |
if not results:
|
| 709 |
return render_error("No relevant regulatory chunks found. Try rephrasing your query."), ""
|
| 710 |
+
|
| 711 |
context = format_context(results)
|
| 712 |
report = call_claude(query, context)
|
| 713 |
if not report:
|
| 714 |
return render_error("Could not reach Claude API. Check that ANTHROPIC_API_KEY is set as a Space Secret."), context
|
| 715 |
+
|
| 716 |
+
report = canonicalize_report(report, results)
|
| 717 |
+
print(
|
| 718 |
+
"FINAL NORMALIZED REPORT LEGAL ARRAYS: "
|
| 719 |
+
f"applicable_regulations={report.get('applicable_regulations')!r}; "
|
| 720 |
+
f"relevant_articles={report.get('relevant_articles')!r}",
|
| 721 |
+
flush=True,
|
| 722 |
+
)
|
| 723 |
+
|
| 724 |
return render_report(report, query, results), context
|
| 725 |
|
| 726 |
|