File size: 8,541 Bytes
11429f1
c7f016f
53535bc
c7f016f
 
 
 
11429f1
 
 
ad1550d
c7f016f
 
 
 
 
 
53535bc
3db64f4
53535bc
ad1550d
53535bc
11429f1
 
c7f016f
11429f1
53535bc
 
 
911432a
11429f1
c7f016f
 
11429f1
 
c7f016f
11429f1
53535bc
11429f1
 
53535bc
 
 
 
 
 
c7f016f
53535bc
11429f1
53535bc
 
11429f1
53535bc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
886d1bd
53535bc
 
886d1bd
 
53535bc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11429f1
53535bc
 
 
 
 
11429f1
53535bc
 
 
c7f016f
ad1550d
c7f016f
 
 
 
53535bc
c7f016f
 
11429f1
 
 
 
 
 
 
 
 
 
 
 
 
c7f016f
53535bc
c7f016f
53535bc
ad1550d
53535bc
 
 
 
 
 
 
ad1550d
 
c8165f4
53535bc
 
 
 
 
 
 
c7f016f
ad1550d
 
 
 
c7f016f
 
 
11429f1
ad1550d
 
c7f016f
 
 
 
 
 
 
 
 
 
 
 
 
 
53535bc
11429f1
53535bc
 
 
c7f016f
 
fd03162
c7f016f
 
fd03162
3db64f4
 
 
 
 
 
 
 
 
53535bc
fd03162
 
 
 
c7f016f
53535bc
fd03162
c7f016f
 
fd03162
c8165f4
c7f016f
53535bc
ad1550d
 
fd03162
 
ad1550d
fd03162
53535bc
fd03162
 
 
 
 
11429f1
ad1550d
fd03162
11429f1
fd03162
 
c7f016f
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
import os
import re
import html
import yaml
from pathlib import Path
import gradio as gr

# -----------------------------
# Config
# -----------------------------
TITLE = "Fair Housing Text Checker - V1.7"
DESCRIPTION = (
    "Paste any ad, post, or listing text. The checker highlights potential Fair Housing risks "
    "and suggests compliant alternatives. It focuses on protected classes in the U.S. Fair Housing Act. "
    "This tool does not provide legal advice."
)

# Defaults from env
ENV_USE_TINY = os.getenv("USE_TINY_ML", "1") == "1"
ENV_REPO = os.getenv("ML_REPO", "tlogandesigns/fairhousing-bert-tiny")
ENV_THRESH = float(os.getenv("THRESH", "0.75"))  # decision threshold for Potential Violation
PHRASES_PATH = os.getenv("PHRASES_PATH", "phrases.yaml")

# -----------------------------
# Load patterns
# -----------------------------
if Path(PHRASES_PATH).exists():
    PHRASES = yaml.safe_load(Path(PHRASES_PATH).read_text(encoding="utf-8"))
else:
    print(f"Warning: Phrases file '{PHRASES_PATH}' not found. Using empty patterns.")

COMPILED = []
for cat, data in PHRASES.get("categories", {}).items():
    for p in data.get("patterns", []):
        COMPILED.append((cat, re.compile(p, re.IGNORECASE), data.get("suggest", [])))

# -----------------------------
# Optional tiny transformer
# -----------------------------
pipe = None
_transformers_ok = False
try:
    from transformers import pipeline
    _transformers_ok = True
except Exception:
    _transformers_ok = False

def _load_ml(repo: str):
    global pipe
    if not _transformers_ok:
        return False, "transformers not installed"
    if pipe is None:
        try:
            pipe = pipeline(
                "text-classification",
                model=repo,
                tokenizer=repo,
                device=-1,
                return_all_scores=True,
                truncation=True,
            )
        except Exception as e:
            return False, str(e)
    return True, None

# -----------------------------
# HTML highlighting helpers
# -----------------------------
CATEGORY_COLORS = {
    "Familial status": "#e57373",
    "Religion": "#64b5f6",
    "Disability": "#81c784",
    "Sex": "#ba68c8",
    "Race or color": "#4db6ac",
    "National origin": "#ffd54f",
    "Other preference": "#90a4ae",
}

STYLE_BLOCK = """
<style>
.mark { padding: 0.1em 0.25em; border-radius: 0.25rem; }
.badge { display: inline-block; padding: 0 0.35em; border-radius: 0.4rem; font-size: 0.8em; margin-left: 0.3em; opacity: 0.9; }
.legend { display:flex; flex-wrap:wrap; gap:8px; margin: 0.5rem 0 1rem; }
.legend .swatch { width: 12px; height: 12px; border-radius: 3px; display:inline-block; margin-right:6px; }
.notice { margin-top: 10px; padding: 8px 10px; border-radius: 8px; background: #ffcccb; }
</style>
"""
# .hl-container { background: #ffffff; color: #000000; padding: 12px; border-radius: 8px; line-height: 1.7; }


def build_legend(categories):
    parts = ["<div class='legend'>"]
    for cat in sorted(categories):
        color = CATEGORY_COLORS.get(cat, "#bdbdbd")
        parts.append(f"<span><span class='swatch' style='background:{color}'></span>{html.escape(cat)}</span>")
    parts.append("</div>")
    return "".join(parts)


def highlight_html(text, spans):
    if not spans:
        return STYLE_BLOCK + f"<div class='hl-container'>{html.escape(text)}</div>"
    spans = sorted(spans, key=lambda x: x[0])
    cur = 0
    out = [STYLE_BLOCK, "<div class='hl-container'>"]
    for s, e, cat in spans:
        if s > cur:
            out.append(html.escape(text[cur:s]))
        frag = html.escape(text[s:e])
        color = CATEGORY_COLORS.get(cat, "#bdbdbd")
        out.append(
            f"<span class='mark' style='background:{color}1A; outline: 1px solid {color}55'>" \
            f"{frag}<span class='badge' style='background:{color}33'>{html.escape(cat)}</span></span>"
        )
        cur = e
    if cur < len(text):
        out.append(html.escape(text[cur:]))
    out.append("</div>")
    return "".join(out)

# -----------------------------
# Core analysis
# -----------------------------

def analyze_text(text: str, use_ml: bool, repo_override: str, threshold: float):
    text = text or ""
    findings = []
    highlights = []

    # Rules-first
    for cat, pat, suggestions in COMPILED:
        for m in pat.finditer(text):
            s, e = m.span()
            snippet = text[max(0, s - 40) : min(len(text), e + 40)]
            findings.append(
                {
                    "category": cat,
                    "match": m.group(0),
                    "start": s,
                    "end": e,
                    "context": snippet,
                    "suggestions": suggestions[:3],
                }
            )
            highlights.append((s, e, cat))

    # Optional ML
    ml_score = None
    ml_status = "ML: off"
    ml_violation = None
    repo = (repo_override or ENV_REPO).strip()
    if use_ml:
        ok, err = _load_ml(repo)
        if ok:
            try:
                scores = pipe(text)[0]
                ml_score = {s["label"]: float(s["score"]) for s in scores}
                pv = ml_score.get("Potential Violation", 0.0)
                ml_violation = pv >= threshold
                ml_status = f"decision={'flag' if ml_violation else 'no flag'}"
            except Exception as e:
                ml_score = {"error": str(e)}
                ml_status = f"ML: error - {e}"
        else:
            ml_status = f"ML: error - {err}"

    html_out = build_legend({f["category"] for f in findings}) + highlight_html(text, highlights)

    # If ML flags and rules found nothing, show a small notice
    if use_ml and ml_violation and not findings:
        html_out += "<div class='notice'>Model flagged this text as a potential violation based on the threshold.</div>"

    summary = {
        "issues_found": len(findings),
        "categories": sorted(list({f["category"] for f in findings})),
        "ml_score": ml_score,
        "threshold": threshold,
        "ml_violation": ml_violation,
    }

    if findings:
        rows = []
        for f in findings:
            rows.append(
                f"- Category: {f['category']} | Phrase: \"{f['match']}\"\n"
                f"  Context: ...{f['context']}...\n"
                f"  Suggested alternatives: {', '.join(f['suggestions']) if f['suggestions'] else 'N/A'}"
            )
        report = "Potential issues:\n" + "\n".join(rows)
    else:
        report = "No obvious risk phrases found by the rules engine."

    return html_out, report, summary, ml_status

# -----------------------------
# UI
# -----------------------------
with gr.Blocks(fill_height=True) as demo:
    gr.Markdown(f"# {TITLE}\n\n{DESCRIPTION}")

    with gr.Row():
        inp = gr.Textbox(label="Paste text", lines=10, placeholder="Paste listing or ad copy here...")

    with gr.Accordion("Advanced Options & Summary", open=False):
        with gr.Row():
            use_ml = gr.Checkbox(label="Use tiny ML classifier", value=ENV_USE_TINY)
            repo_box = gr.Textbox(label="Model repo", value=ENV_REPO, info="Hugging Face repo id for a sequence classifier")
            thresh = gr.Slider(label="Violation threshold", minimum=0.50, maximum=0.95, step=0.01, value=ENV_THRESH)
        with gr.Row():
            summary = gr.JSON(label="Summary")
        with gr.Row():
            ml_status = gr.Markdown()

    with gr.Row():
        btn_check = gr.Button("Check text", variant="primary")
        btn_clear = gr.Button("Clear")

    with gr.Row():
        marked_html = gr.HTML(label="Highlighted text")

    with gr.Row():
        report = gr.Markdown()



    # Wire actions
    inp.submit(analyze_text, inputs=[inp, use_ml, repo_box, thresh], outputs=[marked_html, report, summary, ml_status])
    btn_check.click(analyze_text, inputs=[inp, use_ml, repo_box, thresh], outputs=[marked_html, report, summary, ml_status])

    btn_clear.click(
        lambda: ("", "", {"issues_found": 0, "categories": [], "ml_score": None, "threshold": ENV_THRESH, "ml_violation": None}, "ML: off"),
        inputs=None,
        outputs=[marked_html, report, summary, ml_status],
    )

    gr.Examples(
        examples=[
            ["Beautiful condo in quiet, safe neighborhood. No children please. Ladies only."],
            ["Close to multiple community centers and parks. Service animals accommodated per law."],
            ["christians only"],
        ],
        inputs=inp,
    )

if __name__ == "__main__":
    demo.launch()