Spaces:

juakazike
/

gender-sensitization-engine

Sleeping

App Files Files Community

gender-sensitization-engine / api /service.py

AcharO

feat(ha): Hausa/Zulu lexicons, caller=studylabs gate skip, budurwa precision fix — HA P=1.000

63255ea 29 days ago

raw

history blame contribute delete

6.65 kB

	"""JuaKazi rewrite service — core correction logic (no HTTP)."""

	import time
	from typing import Optional

	from config import (
	AIBRIDGE_ENABLED,
	DEFAULT_REWRITE_CONFIDENCE,
	REWRITE_CONFIDENCE_BY_SOURCE,
	get_semantic_threshold,
	)
	from core.semantic_preservation import SemanticPreservationMetrics

	from .bias_detection_client import LANG_CODE_MAP, AibridgeResult, detect_bias
	from .disambiguator import disambiguate
	from .ml_rewriter import ml_rewrite
	from .rules_engine import apply_rules_on_spans, build_reason
	from .schemas import RewriteResponse

	semantic_metrics = SemanticPreservationMetrics()

	# Callers that already ran bias detection upstream — skip Stage 0 external /detect gate.
	# StudyLabs (and similar) detect; we only correct when they hit POST /rewrite with this set.
	_SKIP_EXTERNAL_BIAS_GATE_CALLERS = frozenset({"aibridge", "studylabs"})


	def rewrite_text(
	id: str,
	text: str,
	lang: str,
	flags: Optional[list] = None,
	region_dialect: Optional[str] = None,
	caller: Optional[str] = None,
	) -> tuple[RewriteResponse, dict]:
	"""
	Run bias detection + correction. Returns (response, audit_info).
	audit_info has model_info, latency_ms for logging.

	caller 'studylabs' or 'aibridge': skip Stage 0 external /detect — partner already
	detected bias; we only run lexicon (and later stages) to correct.
	"""
	t0 = time.time()

	# Stage 0: optional external bias detection gate (haus \| swahili \| zulu when enabled).
	# Skipped when the integration partner already flagged bias (StudyLabs, AIBRIDGE pipeline).
	# If the external model says no bias, skip correction entirely and return immediately.
	# On any network/auth error, aibridge_result.error is set and we fall through silently.
	aibridge_result: Optional[AibridgeResult] = None
	caller_norm = (caller or "").strip().lower()
	skip_gate = caller_norm in _SKIP_EXTERNAL_BIAS_GATE_CALLERS
	ext_lang = LANG_CODE_MAP.get(lang) if (AIBRIDGE_ENABLED and not skip_gate) else None
	if ext_lang:
	aibridge_result = detect_bias(text, ext_lang)
	if aibridge_result.error is None and not aibridge_result.has_bias:
	latency_ms = int((time.time() - t0) * 1000)
	response = RewriteResponse(
	id=id,
	original_text=text,
	rewrite=text,
	edits=[],
	confidence=REWRITE_CONFIDENCE_BY_SOURCE["aibridge_preserved"],
	needs_review=False,
	source="aibridge_preserved",
	reason=build_reason("aibridge_preserved", [], []),
	semantic_score=None,
	skipped_context=None,
	has_bias_detected=False,
	aibridge_confidence=aibridge_result.confidence,
	aibridge_detected=False,
	)
	audit_info = {
	"model_info": {
	"model": "aibridge-external",
	"confidence": aibridge_result.confidence,
	"message": aibridge_result.message,
	},
	"latency_ms": latency_ms,
	"region_dialect": region_dialect or "unknown",
	}
	return response, audit_info

	rewritten, edits, matched_rules, skipped = apply_rules_on_spans(
	text, lang, flags=flags
	)
	source = "rules"
	ml_info = None
	semantic_score = None

	threshold = get_semantic_threshold()
	if rewritten != text:
	score = semantic_metrics.calculate_composite_preservation_score(text, rewritten)
	semantic_score = score["composite_score"]
	if semantic_score < threshold:
	rewritten, edits, source, semantic_score = text, [], "preserved", 1.0

	# Stage 2.5: LLM disambiguation for borderline warn-only matches (SW).
	# Only fires when rules found warn-severity terms but no replace-severity terms.
	warn_only = matched_rules > 0 and not any(
	e.get("severity") == "replace" for e in edits
	)
	if warn_only and lang == "sw":
	llm_result = disambiguate(text)
	if llm_result is True:
	# LLM confirmed bias — promote the warn edits to replace
	for e in edits:
	if e.get("severity") == "warn":
	e["severity"] = "replace"
	e["reason"] = (e.get("reason") or "") + " [LLM confirmed]"
	source = "disambiguated"
	elif llm_result is False:
	# LLM says not bias — suppress the warn edits
	edits = []
	rewritten = text
	source = "preserved"

	if matched_rules == 0 and source != "preserved":
	ml_out = ml_rewrite(text, lang=lang, num_return_sequences=3)
	ml_score = semantic_metrics.calculate_composite_preservation_score(
	text, ml_out["best"]
	)
	if ml_score["composite_score"] < threshold:
	rewritten, source, semantic_score = text, "preserved", 1.0
	else:
	rewritten = ml_out["best"]
	source = "ml"
	semantic_score = ml_score["composite_score"]
	ml_info = ml_out
	edits.append({
	"from": text,
	"to": rewritten,
	"severity": "ml_fallback",
	"tags": "",
	"reason": "ML rewrite",
	})

	latency_ms = int((time.time() - t0) * 1000)
	confidence = REWRITE_CONFIDENCE_BY_SOURCE.get(source, DEFAULT_REWRITE_CONFIDENCE)
	needs_review = source == "ml" or len(edits) == 0
	aibridge_ok = aibridge_result is not None and aibridge_result.error is None
	aibridge_detected = aibridge_result.has_bias if aibridge_ok else None
	reason = build_reason(source, edits, skipped, aibridge_detected=bool(aibridge_detected))
	has_bias_detected = any(e.get("severity") == "replace" for e in edits)

	response = RewriteResponse(
	id=id,
	original_text=text,
	rewrite=rewritten,
	edits=edits,
	confidence=confidence,
	needs_review=needs_review,
	source=source,
	reason=reason,
	semantic_score=semantic_score,
	skipped_context=skipped or None,
	has_bias_detected=has_bias_detected,
	aibridge_confidence=aibridge_result.confidence if aibridge_ok else None,
	aibridge_detected=aibridge_detected,
	)
	audit_info = {
	"model_info": ml_info or {"model": "rulepack-v0.3"},
	"latency_ms": latency_ms,
	"region_dialect": region_dialect or "unknown",
	"aibridge_error": aibridge_result.error if aibridge_result else None,
	}
	return response, audit_info