Spaces:

tlogandesigns
/

FairHousing-classifier

Sleeping

App Files Files Community

FairHousing-classifier / app.py

tlogandesigns

style updates

886d1bd 5 months ago

raw

history blame contribute delete

8.54 kB

	import os
	import re
	import html
	import yaml
	from pathlib import Path
	import gradio as gr

	# -----------------------------
	# Config
	# -----------------------------
	TITLE = "Fair Housing Text Checker - V1.7"
	DESCRIPTION = (
	"Paste any ad, post, or listing text. The checker highlights potential Fair Housing risks "
	"and suggests compliant alternatives. It focuses on protected classes in the U.S. Fair Housing Act. "
	"This tool does not provide legal advice."
	)

	# Defaults from env
	ENV_USE_TINY = os.getenv("USE_TINY_ML", "1") == "1"
	ENV_REPO = os.getenv("ML_REPO", "tlogandesigns/fairhousing-bert-tiny")
	ENV_THRESH = float(os.getenv("THRESH", "0.75")) # decision threshold for Potential Violation
	PHRASES_PATH = os.getenv("PHRASES_PATH", "phrases.yaml")

	# -----------------------------
	# Load patterns
	# -----------------------------
	if Path(PHRASES_PATH).exists():
	PHRASES = yaml.safe_load(Path(PHRASES_PATH).read_text(encoding="utf-8"))
	else:
	print(f"Warning: Phrases file '{PHRASES_PATH}' not found. Using empty patterns.")

	COMPILED = []
	for cat, data in PHRASES.get("categories", {}).items():
	for p in data.get("patterns", []):
	COMPILED.append((cat, re.compile(p, re.IGNORECASE), data.get("suggest", [])))

	# -----------------------------
	# Optional tiny transformer
	# -----------------------------
	pipe = None
	_transformers_ok = False
	try:
	from transformers import pipeline
	_transformers_ok = True
	except Exception:
	_transformers_ok = False

	def _load_ml(repo: str):
	global pipe
	if not _transformers_ok:
	return False, "transformers not installed"
	if pipe is None:
	try:
	pipe = pipeline(
	"text-classification",
	model=repo,
	tokenizer=repo,
	device=-1,
	return_all_scores=True,
	truncation=True,
	)
	except Exception as e:
	return False, str(e)
	return True, None

	# -----------------------------
	# HTML highlighting helpers
	# -----------------------------
	CATEGORY_COLORS = {
	"Familial status": "#e57373",
	"Religion": "#64b5f6",
	"Disability": "#81c784",
	"Sex": "#ba68c8",
	"Race or color": "#4db6ac",
	"National origin": "#ffd54f",
	"Other preference": "#90a4ae",
	}

	STYLE_BLOCK = """
	<style>
	.mark { padding: 0.1em 0.25em; border-radius: 0.25rem; }
	.badge { display: inline-block; padding: 0 0.35em; border-radius: 0.4rem; font-size: 0.8em; margin-left: 0.3em; opacity: 0.9; }
	.legend { display:flex; flex-wrap:wrap; gap:8px; margin: 0.5rem 0 1rem; }
	.legend .swatch { width: 12px; height: 12px; border-radius: 3px; display:inline-block; margin-right:6px; }
	.notice { margin-top: 10px; padding: 8px 10px; border-radius: 8px; background: #ffcccb; }
	</style>
	"""
	# .hl-container { background: #ffffff; color: #000000; padding: 12px; border-radius: 8px; line-height: 1.7; }


	def build_legend(categories):
	parts = ["<div class='legend'>"]
	for cat in sorted(categories):
	color = CATEGORY_COLORS.get(cat, "#bdbdbd")
	parts.append(f"<span><span class='swatch' style='background:{color}'></span>{html.escape(cat)}</span>")
	parts.append("</div>")
	return "".join(parts)


	def highlight_html(text, spans):
	if not spans:
	return STYLE_BLOCK + f"<div class='hl-container'>{html.escape(text)}</div>"
	spans = sorted(spans, key=lambda x: x[0])
	cur = 0
	out = [STYLE_BLOCK, "<div class='hl-container'>"]
	for s, e, cat in spans:
	if s > cur:
	out.append(html.escape(text[cur:s]))
	frag = html.escape(text[s:e])
	color = CATEGORY_COLORS.get(cat, "#bdbdbd")
	out.append(
	f"<span class='mark' style='background:{color}1A; outline: 1px solid {color}55'>" \
	f"{frag}<span class='badge' style='background:{color}33'>{html.escape(cat)}</span></span>"
	)
	cur = e
	if cur < len(text):
	out.append(html.escape(text[cur:]))
	out.append("</div>")
	return "".join(out)

	# -----------------------------
	# Core analysis
	# -----------------------------

	def analyze_text(text: str, use_ml: bool, repo_override: str, threshold: float):
	text = text or ""
	findings = []
	highlights = []

	# Rules-first
	for cat, pat, suggestions in COMPILED:
	for m in pat.finditer(text):
	s, e = m.span()
	snippet = text[max(0, s - 40) : min(len(text), e + 40)]
	findings.append(
	{
	"category": cat,
	"match": m.group(0),
	"start": s,
	"end": e,
	"context": snippet,
	"suggestions": suggestions[:3],
	}
	)
	highlights.append((s, e, cat))

	# Optional ML
	ml_score = None
	ml_status = "ML: off"
	ml_violation = None
	repo = (repo_override or ENV_REPO).strip()
	if use_ml:
	ok, err = _load_ml(repo)
	if ok:
	try:
	scores = pipe(text)[0]
	ml_score = {s["label"]: float(s["score"]) for s in scores}
	pv = ml_score.get("Potential Violation", 0.0)
	ml_violation = pv >= threshold
	ml_status = f"decision={'flag' if ml_violation else 'no flag'}"
	except Exception as e:
	ml_score = {"error": str(e)}
	ml_status = f"ML: error - {e}"
	else:
	ml_status = f"ML: error - {err}"

	html_out = build_legend({f["category"] for f in findings}) + highlight_html(text, highlights)

	# If ML flags and rules found nothing, show a small notice
	if use_ml and ml_violation and not findings:
	html_out += "<div class='notice'>Model flagged this text as a potential violation based on the threshold.</div>"

	summary = {
	"issues_found": len(findings),
	"categories": sorted(list({f["category"] for f in findings})),
	"ml_score": ml_score,
	"threshold": threshold,
	"ml_violation": ml_violation,
	}

	if findings:
	rows = []
	for f in findings:
	rows.append(
	f"- Category: {f['category']} \| Phrase: \"{f['match']}\"\n"
	f" Context: ...{f['context']}...\n"
	f" Suggested alternatives: {', '.join(f['suggestions']) if f['suggestions'] else 'N/A'}"
	)
	report = "Potential issues:\n" + "\n".join(rows)
	else:
	report = "No obvious risk phrases found by the rules engine."

	return html_out, report, summary, ml_status

	# -----------------------------
	# UI
	# -----------------------------
	with gr.Blocks(fill_height=True) as demo:
	gr.Markdown(f"# {TITLE}\n\n{DESCRIPTION}")

	with gr.Row():
	inp = gr.Textbox(label="Paste text", lines=10, placeholder="Paste listing or ad copy here...")

	with gr.Accordion("Advanced Options & Summary", open=False):
	with gr.Row():
	use_ml = gr.Checkbox(label="Use tiny ML classifier", value=ENV_USE_TINY)
	repo_box = gr.Textbox(label="Model repo", value=ENV_REPO, info="Hugging Face repo id for a sequence classifier")
	thresh = gr.Slider(label="Violation threshold", minimum=0.50, maximum=0.95, step=0.01, value=ENV_THRESH)
	with gr.Row():
	summary = gr.JSON(label="Summary")
	with gr.Row():
	ml_status = gr.Markdown()

	with gr.Row():
	btn_check = gr.Button("Check text", variant="primary")
	btn_clear = gr.Button("Clear")

	with gr.Row():
	marked_html = gr.HTML(label="Highlighted text")

	with gr.Row():
	report = gr.Markdown()



	# Wire actions
	inp.submit(analyze_text, inputs=[inp, use_ml, repo_box, thresh], outputs=[marked_html, report, summary, ml_status])
	btn_check.click(analyze_text, inputs=[inp, use_ml, repo_box, thresh], outputs=[marked_html, report, summary, ml_status])

	btn_clear.click(
	lambda: ("", "", {"issues_found": 0, "categories": [], "ml_score": None, "threshold": ENV_THRESH, "ml_violation": None}, "ML: off"),
	inputs=None,
	outputs=[marked_html, report, summary, ml_status],
	)

	gr.Examples(
	examples=[
	["Beautiful condo in quiet, safe neighborhood. No children please. Ladies only."],
	["Close to multiple community centers and parks. Service animals accommodated per law."],
	["christians only"],
	],
	inputs=inp,
	)

	if __name__ == "__main__":
	demo.launch()