File size: 8,541 Bytes
11429f1 c7f016f 53535bc c7f016f 11429f1 ad1550d c7f016f 53535bc 3db64f4 53535bc ad1550d 53535bc 11429f1 c7f016f 11429f1 53535bc 911432a 11429f1 c7f016f 11429f1 c7f016f 11429f1 53535bc 11429f1 53535bc c7f016f 53535bc 11429f1 53535bc 11429f1 53535bc 886d1bd 53535bc 886d1bd 53535bc 11429f1 53535bc 11429f1 53535bc c7f016f ad1550d c7f016f 53535bc c7f016f 11429f1 c7f016f 53535bc c7f016f 53535bc ad1550d 53535bc ad1550d c8165f4 53535bc c7f016f ad1550d c7f016f 11429f1 ad1550d c7f016f 53535bc 11429f1 53535bc c7f016f fd03162 c7f016f fd03162 3db64f4 53535bc fd03162 c7f016f 53535bc fd03162 c7f016f fd03162 c8165f4 c7f016f 53535bc ad1550d fd03162 ad1550d fd03162 53535bc fd03162 11429f1 ad1550d fd03162 11429f1 fd03162 c7f016f | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 | import os
import re
import html
import yaml
from pathlib import Path
import gradio as gr
# -----------------------------
# Config
# -----------------------------
TITLE = "Fair Housing Text Checker - V1.7"
DESCRIPTION = (
"Paste any ad, post, or listing text. The checker highlights potential Fair Housing risks "
"and suggests compliant alternatives. It focuses on protected classes in the U.S. Fair Housing Act. "
"This tool does not provide legal advice."
)
# Defaults from env
ENV_USE_TINY = os.getenv("USE_TINY_ML", "1") == "1"
ENV_REPO = os.getenv("ML_REPO", "tlogandesigns/fairhousing-bert-tiny")
ENV_THRESH = float(os.getenv("THRESH", "0.75")) # decision threshold for Potential Violation
PHRASES_PATH = os.getenv("PHRASES_PATH", "phrases.yaml")
# -----------------------------
# Load patterns
# -----------------------------
if Path(PHRASES_PATH).exists():
PHRASES = yaml.safe_load(Path(PHRASES_PATH).read_text(encoding="utf-8"))
else:
print(f"Warning: Phrases file '{PHRASES_PATH}' not found. Using empty patterns.")
COMPILED = []
for cat, data in PHRASES.get("categories", {}).items():
for p in data.get("patterns", []):
COMPILED.append((cat, re.compile(p, re.IGNORECASE), data.get("suggest", [])))
# -----------------------------
# Optional tiny transformer
# -----------------------------
pipe = None
_transformers_ok = False
try:
from transformers import pipeline
_transformers_ok = True
except Exception:
_transformers_ok = False
def _load_ml(repo: str):
global pipe
if not _transformers_ok:
return False, "transformers not installed"
if pipe is None:
try:
pipe = pipeline(
"text-classification",
model=repo,
tokenizer=repo,
device=-1,
return_all_scores=True,
truncation=True,
)
except Exception as e:
return False, str(e)
return True, None
# -----------------------------
# HTML highlighting helpers
# -----------------------------
CATEGORY_COLORS = {
"Familial status": "#e57373",
"Religion": "#64b5f6",
"Disability": "#81c784",
"Sex": "#ba68c8",
"Race or color": "#4db6ac",
"National origin": "#ffd54f",
"Other preference": "#90a4ae",
}
STYLE_BLOCK = """
<style>
.mark { padding: 0.1em 0.25em; border-radius: 0.25rem; }
.badge { display: inline-block; padding: 0 0.35em; border-radius: 0.4rem; font-size: 0.8em; margin-left: 0.3em; opacity: 0.9; }
.legend { display:flex; flex-wrap:wrap; gap:8px; margin: 0.5rem 0 1rem; }
.legend .swatch { width: 12px; height: 12px; border-radius: 3px; display:inline-block; margin-right:6px; }
.notice { margin-top: 10px; padding: 8px 10px; border-radius: 8px; background: #ffcccb; }
</style>
"""
# .hl-container { background: #ffffff; color: #000000; padding: 12px; border-radius: 8px; line-height: 1.7; }
def build_legend(categories):
parts = ["<div class='legend'>"]
for cat in sorted(categories):
color = CATEGORY_COLORS.get(cat, "#bdbdbd")
parts.append(f"<span><span class='swatch' style='background:{color}'></span>{html.escape(cat)}</span>")
parts.append("</div>")
return "".join(parts)
def highlight_html(text, spans):
if not spans:
return STYLE_BLOCK + f"<div class='hl-container'>{html.escape(text)}</div>"
spans = sorted(spans, key=lambda x: x[0])
cur = 0
out = [STYLE_BLOCK, "<div class='hl-container'>"]
for s, e, cat in spans:
if s > cur:
out.append(html.escape(text[cur:s]))
frag = html.escape(text[s:e])
color = CATEGORY_COLORS.get(cat, "#bdbdbd")
out.append(
f"<span class='mark' style='background:{color}1A; outline: 1px solid {color}55'>" \
f"{frag}<span class='badge' style='background:{color}33'>{html.escape(cat)}</span></span>"
)
cur = e
if cur < len(text):
out.append(html.escape(text[cur:]))
out.append("</div>")
return "".join(out)
# -----------------------------
# Core analysis
# -----------------------------
def analyze_text(text: str, use_ml: bool, repo_override: str, threshold: float):
text = text or ""
findings = []
highlights = []
# Rules-first
for cat, pat, suggestions in COMPILED:
for m in pat.finditer(text):
s, e = m.span()
snippet = text[max(0, s - 40) : min(len(text), e + 40)]
findings.append(
{
"category": cat,
"match": m.group(0),
"start": s,
"end": e,
"context": snippet,
"suggestions": suggestions[:3],
}
)
highlights.append((s, e, cat))
# Optional ML
ml_score = None
ml_status = "ML: off"
ml_violation = None
repo = (repo_override or ENV_REPO).strip()
if use_ml:
ok, err = _load_ml(repo)
if ok:
try:
scores = pipe(text)[0]
ml_score = {s["label"]: float(s["score"]) for s in scores}
pv = ml_score.get("Potential Violation", 0.0)
ml_violation = pv >= threshold
ml_status = f"decision={'flag' if ml_violation else 'no flag'}"
except Exception as e:
ml_score = {"error": str(e)}
ml_status = f"ML: error - {e}"
else:
ml_status = f"ML: error - {err}"
html_out = build_legend({f["category"] for f in findings}) + highlight_html(text, highlights)
# If ML flags and rules found nothing, show a small notice
if use_ml and ml_violation and not findings:
html_out += "<div class='notice'>Model flagged this text as a potential violation based on the threshold.</div>"
summary = {
"issues_found": len(findings),
"categories": sorted(list({f["category"] for f in findings})),
"ml_score": ml_score,
"threshold": threshold,
"ml_violation": ml_violation,
}
if findings:
rows = []
for f in findings:
rows.append(
f"- Category: {f['category']} | Phrase: \"{f['match']}\"\n"
f" Context: ...{f['context']}...\n"
f" Suggested alternatives: {', '.join(f['suggestions']) if f['suggestions'] else 'N/A'}"
)
report = "Potential issues:\n" + "\n".join(rows)
else:
report = "No obvious risk phrases found by the rules engine."
return html_out, report, summary, ml_status
# -----------------------------
# UI
# -----------------------------
with gr.Blocks(fill_height=True) as demo:
gr.Markdown(f"# {TITLE}\n\n{DESCRIPTION}")
with gr.Row():
inp = gr.Textbox(label="Paste text", lines=10, placeholder="Paste listing or ad copy here...")
with gr.Accordion("Advanced Options & Summary", open=False):
with gr.Row():
use_ml = gr.Checkbox(label="Use tiny ML classifier", value=ENV_USE_TINY)
repo_box = gr.Textbox(label="Model repo", value=ENV_REPO, info="Hugging Face repo id for a sequence classifier")
thresh = gr.Slider(label="Violation threshold", minimum=0.50, maximum=0.95, step=0.01, value=ENV_THRESH)
with gr.Row():
summary = gr.JSON(label="Summary")
with gr.Row():
ml_status = gr.Markdown()
with gr.Row():
btn_check = gr.Button("Check text", variant="primary")
btn_clear = gr.Button("Clear")
with gr.Row():
marked_html = gr.HTML(label="Highlighted text")
with gr.Row():
report = gr.Markdown()
# Wire actions
inp.submit(analyze_text, inputs=[inp, use_ml, repo_box, thresh], outputs=[marked_html, report, summary, ml_status])
btn_check.click(analyze_text, inputs=[inp, use_ml, repo_box, thresh], outputs=[marked_html, report, summary, ml_status])
btn_clear.click(
lambda: ("", "", {"issues_found": 0, "categories": [], "ml_score": None, "threshold": ENV_THRESH, "ml_violation": None}, "ML: off"),
inputs=None,
outputs=[marked_html, report, summary, ml_status],
)
gr.Examples(
examples=[
["Beautiful condo in quiet, safe neighborhood. No children please. Ladies only."],
["Close to multiple community centers and parks. Service animals accommodated per law."],
["christians only"],
],
inputs=inp,
)
if __name__ == "__main__":
demo.launch()
|