import os import re import html import yaml from pathlib import Path import gradio as gr # ----------------------------- # Config # ----------------------------- TITLE = "Fair Housing Text Checker - V1.7" DESCRIPTION = ( "Paste any ad, post, or listing text. The checker highlights potential Fair Housing risks " "and suggests compliant alternatives. It focuses on protected classes in the U.S. Fair Housing Act. " "This tool does not provide legal advice." ) # Defaults from env ENV_USE_TINY = os.getenv("USE_TINY_ML", "1") == "1" ENV_REPO = os.getenv("ML_REPO", "tlogandesigns/fairhousing-bert-tiny") ENV_THRESH = float(os.getenv("THRESH", "0.75")) # decision threshold for Potential Violation PHRASES_PATH = os.getenv("PHRASES_PATH", "phrases.yaml") # ----------------------------- # Load patterns # ----------------------------- if Path(PHRASES_PATH).exists(): PHRASES = yaml.safe_load(Path(PHRASES_PATH).read_text(encoding="utf-8")) else: print(f"Warning: Phrases file '{PHRASES_PATH}' not found. Using empty patterns.") COMPILED = [] for cat, data in PHRASES.get("categories", {}).items(): for p in data.get("patterns", []): COMPILED.append((cat, re.compile(p, re.IGNORECASE), data.get("suggest", []))) # ----------------------------- # Optional tiny transformer # ----------------------------- pipe = None _transformers_ok = False try: from transformers import pipeline _transformers_ok = True except Exception: _transformers_ok = False def _load_ml(repo: str): global pipe if not _transformers_ok: return False, "transformers not installed" if pipe is None: try: pipe = pipeline( "text-classification", model=repo, tokenizer=repo, device=-1, return_all_scores=True, truncation=True, ) except Exception as e: return False, str(e) return True, None # ----------------------------- # HTML highlighting helpers # ----------------------------- CATEGORY_COLORS = { "Familial status": "#e57373", "Religion": "#64b5f6", "Disability": "#81c784", "Sex": "#ba68c8", "Race or color": "#4db6ac", "National origin": "#ffd54f", "Other preference": "#90a4ae", } STYLE_BLOCK = """ """ # .hl-container { background: #ffffff; color: #000000; padding: 12px; border-radius: 8px; line-height: 1.7; } def build_legend(categories): parts = ["
"] for cat in sorted(categories): color = CATEGORY_COLORS.get(cat, "#bdbdbd") parts.append(f"{html.escape(cat)}") parts.append("
") return "".join(parts) def highlight_html(text, spans): if not spans: return STYLE_BLOCK + f"
{html.escape(text)}
" spans = sorted(spans, key=lambda x: x[0]) cur = 0 out = [STYLE_BLOCK, "
"] for s, e, cat in spans: if s > cur: out.append(html.escape(text[cur:s])) frag = html.escape(text[s:e]) color = CATEGORY_COLORS.get(cat, "#bdbdbd") out.append( f"" \ f"{frag}{html.escape(cat)}" ) cur = e if cur < len(text): out.append(html.escape(text[cur:])) out.append("
") return "".join(out) # ----------------------------- # Core analysis # ----------------------------- def analyze_text(text: str, use_ml: bool, repo_override: str, threshold: float): text = text or "" findings = [] highlights = [] # Rules-first for cat, pat, suggestions in COMPILED: for m in pat.finditer(text): s, e = m.span() snippet = text[max(0, s - 40) : min(len(text), e + 40)] findings.append( { "category": cat, "match": m.group(0), "start": s, "end": e, "context": snippet, "suggestions": suggestions[:3], } ) highlights.append((s, e, cat)) # Optional ML ml_score = None ml_status = "ML: off" ml_violation = None repo = (repo_override or ENV_REPO).strip() if use_ml: ok, err = _load_ml(repo) if ok: try: scores = pipe(text)[0] ml_score = {s["label"]: float(s["score"]) for s in scores} pv = ml_score.get("Potential Violation", 0.0) ml_violation = pv >= threshold ml_status = f"decision={'flag' if ml_violation else 'no flag'}" except Exception as e: ml_score = {"error": str(e)} ml_status = f"ML: error - {e}" else: ml_status = f"ML: error - {err}" html_out = build_legend({f["category"] for f in findings}) + highlight_html(text, highlights) # If ML flags and rules found nothing, show a small notice if use_ml and ml_violation and not findings: html_out += "
Model flagged this text as a potential violation based on the threshold.
" summary = { "issues_found": len(findings), "categories": sorted(list({f["category"] for f in findings})), "ml_score": ml_score, "threshold": threshold, "ml_violation": ml_violation, } if findings: rows = [] for f in findings: rows.append( f"- Category: {f['category']} | Phrase: \"{f['match']}\"\n" f" Context: ...{f['context']}...\n" f" Suggested alternatives: {', '.join(f['suggestions']) if f['suggestions'] else 'N/A'}" ) report = "Potential issues:\n" + "\n".join(rows) else: report = "No obvious risk phrases found by the rules engine." return html_out, report, summary, ml_status # ----------------------------- # UI # ----------------------------- with gr.Blocks(fill_height=True) as demo: gr.Markdown(f"# {TITLE}\n\n{DESCRIPTION}") with gr.Row(): inp = gr.Textbox(label="Paste text", lines=10, placeholder="Paste listing or ad copy here...") with gr.Accordion("Advanced Options & Summary", open=False): with gr.Row(): use_ml = gr.Checkbox(label="Use tiny ML classifier", value=ENV_USE_TINY) repo_box = gr.Textbox(label="Model repo", value=ENV_REPO, info="Hugging Face repo id for a sequence classifier") thresh = gr.Slider(label="Violation threshold", minimum=0.50, maximum=0.95, step=0.01, value=ENV_THRESH) with gr.Row(): summary = gr.JSON(label="Summary") with gr.Row(): ml_status = gr.Markdown() with gr.Row(): btn_check = gr.Button("Check text", variant="primary") btn_clear = gr.Button("Clear") with gr.Row(): marked_html = gr.HTML(label="Highlighted text") with gr.Row(): report = gr.Markdown() # Wire actions inp.submit(analyze_text, inputs=[inp, use_ml, repo_box, thresh], outputs=[marked_html, report, summary, ml_status]) btn_check.click(analyze_text, inputs=[inp, use_ml, repo_box, thresh], outputs=[marked_html, report, summary, ml_status]) btn_clear.click( lambda: ("", "", {"issues_found": 0, "categories": [], "ml_score": None, "threshold": ENV_THRESH, "ml_violation": None}, "ML: off"), inputs=None, outputs=[marked_html, report, summary, ml_status], ) gr.Examples( examples=[ ["Beautiful condo in quiet, safe neighborhood. No children please. Ladies only."], ["Close to multiple community centers and parks. Service animals accommodated per law."], ["christians only"], ], inputs=inp, ) if __name__ == "__main__": demo.launch()