import gradio as gr # ───────────────────────────────────────────── # TAB 1 LOGIC: RLHF Pairwise Rater # ───────────────────────────────────────────── def check_consistency( prompt, resp_a, resp_b, help_a, harm_a, acc_a, inst_a, help_b, harm_b, acc_b, inst_b, preference, confidence ): if not resp_a.strip() or not resp_b.strip(): return "⚠️ Please enter both responses before checking.", "" axes = { "Helpfulness": (help_a, help_b), "Harmlessness": (harm_a, harm_b), "Accuracy": (acc_a, acc_b), "Instruction-Following": (inst_a, inst_b), } avg_a = sum(v[0] for v in axes.values()) / 4 avg_b = sum(v[1] for v in axes.values()) / 4 # Which axes favour each response? a_wins = [(k, va, vb) for k, (va, vb) in axes.items() if va > vb] b_wins = [(k, va, vb) for k, (va, vb) in axes.items() if vb > va] ties = [(k, va, vb) for k, (va, vb) in axes.items() if va == vb] conf_label = {1: "Low", 2: "Medium", 3: "High"}[confidence] # Build score table table_rows = "" for ax, (va, vb) in axes.items(): winner = "A ✓" if va > vb else ("B ✓" if vb > va else "Tie") table_rows += f"| {ax} | {va} | {vb} | {winner} |\n" table_rows += f"| **Average** | **{avg_a:.2f}** | **{avg_b:.2f}** | {'A ✓' if avg_a > avg_b else ('B ✓' if avg_b > avg_a else 'Tie')} |\n" score_table = ( "### Score Summary\n\n" "| Axis | Response A | Response B | Higher |\n" "|------|-----------|-----------|--------|\n" + table_rows ) # Consistency check if preference == "Tie": if abs(avg_a - avg_b) >= 1.0: msg = ( f"⚠️ **Possible inconsistency:** You selected 'Tie', but the average scores differ by " f"{abs(avg_a - avg_b):.2f} points (A avg: {avg_a:.2f}, B avg: {avg_b:.2f}). " f"A tie is most appropriate when averages are within ~0.5 of each other." ) else: msg = f"✅ **Consistent:** A 'Tie' verdict aligns with close average scores (A: {avg_a:.2f}, B: {avg_b:.2f})." elif preference == "A is better": if avg_b >= avg_a: detail = ", ".join(f"{k}: A={va} vs B={vb}" for k, va, vb in b_wins) or "none" msg = ( f"⚠️ **Inconsistency detected:** You selected 'A is better' overall, but Response B " f"scored higher on {len(b_wins)}/4 axes. " f"Axes favouring B: {detail}. " f"Overall averages — A: {avg_a:.2f}, B: {avg_b:.2f}. Consider reviewing your overall verdict." ) else: msg = f"✅ **Consistent:** 'A is better' aligns with higher per-axis averages (A: {avg_a:.2f} vs B: {avg_b:.2f})." else: # B is better if avg_a >= avg_b: detail = ", ".join(f"{k}: A={va} vs B={vb}" for k, va, vb in a_wins) or "none" msg = ( f"⚠️ **Inconsistency detected:** You selected 'B is better' overall, but Response A " f"scored higher on {len(a_wins)}/4 axes. " f"Axes favouring A: {detail}. " f"Overall averages — A: {avg_a:.2f}, B: {avg_b:.2f}. Consider reviewing your overall verdict." ) else: msg = f"✅ **Consistent:** 'B is better' aligns with higher per-axis averages (B: {avg_b:.2f} vs A: {avg_a:.2f})." msg += f"\n\n**Confidence:** {conf_label} ({confidence}/3)" return msg, score_table # ───────────────────────────────────────────── # TAB 2 LOGIC: Content Policy Rater # ───────────────────────────────────────────── VERDICTS = ["PASS", "FLAG", "BLOCK"] VERDICT_RANK = {"PASS": 0, "FLAG": 1, "BLOCK": 2} VERDICT_EMOJI = {"PASS": "🟢", "FLAG": "🟡", "BLOCK": "🔴"} CRITERIA = [ "Factual Accuracy", "Safety", "Bias", "PII Exposure", "On-Policy", "Clarity", ] def get_overall_verdict(*args): content = args[0] if not content.strip(): return "⚠️ Please enter content to evaluate.", "" # args layout: content, then pairs of (verdict, reasoning) × 6, then mode verdicts = [args[1 + i*2] for i in range(6)] reasonings= [args[1 + i*2 + 1] for i in range(6)] mode = args[13] ranks = [VERDICT_RANK[v] for v in verdicts] if mode == "worst_wins": final_rank = max(ranks) explanation = "**worst_wins:** The overall verdict equals the most severe individual criterion rating." elif mode == "majority": from collections import Counter c = Counter(verdicts) final_rank = VERDICT_RANK[c.most_common(1)[0][0]] explanation = f"**majority:** The most frequent verdict ({c.most_common(1)[0][0]}, {c.most_common(1)[0][1]}/6 criteria) wins." else: # weighted weights = {"Factual Accuracy": 2, "Safety": 3, "Bias": 2, "PII Exposure": 3, "On-Policy": 2, "Clarity": 1} total_w = sum(weights.values()) weighted_score = sum(weights[c] * ranks[i] for i, c in enumerate(CRITERIA)) / total_w final_rank = 0 if weighted_score < 0.5 else (1 if weighted_score < 1.5 else 2) explanation = f"**weighted:** Safety and PII carry 3× weight. Weighted score: {weighted_score:.2f} → {'PASS' if final_rank==0 else 'FLAG' if final_rank==1 else 'BLOCK'}." final_verdict = VERDICTS[final_rank] emoji = VERDICT_EMOJI[final_verdict] header = f"## {emoji} Overall Verdict: **{final_verdict}**\n\n{explanation}\n\n" table = "### Per-Criterion Breakdown\n\n| Criterion | Verdict | Reasoning |\n|-----------|---------|----------|\n" for i, crit in enumerate(CRITERIA): v = verdicts[i] r = reasonings[i].strip() if reasonings[i].strip() else "—" ev = VERDICT_EMOJI[v] table += f"| {crit} | {ev} {v} | {r} |\n" return header, table # ───────────────────────────────────────────── # TAB 3 LOGIC: Observation vs Inference # ───────────────────────────────────────────── INFERENCE_SIGNALS = [ # (signal phrase, example clean alternative) ("seems", "Describe exactly what you see, not what it suggests."), ("appears", "Describe exactly what you see, not what it suggests."), ("looks like", "Describe the specific visual or measurable property instead."), ("looks ", "Describe the specific visual or measurable property instead."), ("probably", "Remove speculation — state only what is directly observable."), ("likely", "Remove speculation — state only what is directly observable."), ("might", "Remove hedging — state only what is directly observable."), ("may ", "Remove hedging — state only what is directly observable."), ("should ", "Avoid prescriptive language in an observation."), ("is bad", "Describe the specific measurable problem, not a judgment."), ("is good", "Describe the specific measurable quality, not a judgment."), ("is wrong", "Describe the exact discrepancy observed."), ("is broken", "Describe what specifically does not work as expected."), ("is inconsistent","Specify the exact values or positions that differ."), ("unclear", "Describe what specific information is missing or ambiguous."), ("confusing", "Describe the specific element that causes confusion."), ("feels ", "Feelings are inferences. Describe the observable trigger instead."), ("indicates", "'Indicates' draws a conclusion. State the raw signal only."), ("suggests", "'Suggests' draws a conclusion. State the raw signal only."), ("implies", "'Implies' draws a conclusion. State the raw signal only."), ("because", "Causal claims belong in the inference, not the observation."), ] def analyze_obs_inf(observation, inference): if not observation.strip(): return "⚠️ Please enter an observation.", "" obs_lower = observation.lower() found = [(sig, tip) for sig, tip in INFERENCE_SIGNALS if sig in obs_lower] if not found: obs_result = ( "✅ **Clean observation** — specific and factual. " "No inference language detected." ) else: issues = "\n".join( f"- **'{sig.strip()}'** — {tip}" for sig, tip in found[:3] ) obs_result = ( f"⚠️ **Observation contains inference language** ({len(found)} signal(s) found):\n\n" + issues + "\n\n**Tip:** An observation should answer 'What did you literally see/measure?' — " "no judgments, no causes, no speculation." ) inf_result = "" if inference.strip(): inf_lower = inference.lower() # Inferences *should* contain reasoning words — flag if completely bare reasoning_words = ["because", "therefore", "so ", "thus", "indicates", "suggests", "means", "implies", "likely", "probably", "conclude"] has_reasoning = any(w in inf_lower for w in reasoning_words) if has_reasoning: inf_result = "\n\n✅ **Inference** — contains reasoning language, which is appropriate here." else: inf_result = ( "\n\n💡 **Inference tip:** Your inference reads like a bare statement. " "Strong inferences explain *why* — try adding 'because', 'therefore', or 'this suggests'." ) examples = """ --- ### Reference: Good vs Bad Examples | # | ❌ Contaminated Observation | ✅ Clean Observation | |---|----------------------------|----------------------| | 1 | "The button looks inconsistent with the rest of the UI." | "The Save button is 8 px lower than the Cancel button; Save uses Inter 14px, Cancel uses Inter 16px." | | 2 | "The error message is confusing." | "The error message reads 'Error 403' with no additional context and no retry option." | | 3 | "The response seems off-topic." | "The response does not mention the word 'Python' despite the prompt asking for a Python code example." | **Rule of thumb:** If you can't photograph or measure it, it's probably an inference. """ return obs_result + inf_result, examples # ───────────────────────────────────────────── # BUILD THE GRADIO APP # ───────────────────────────────────────────── HEADER = """
Interactive demos of AI training data quality-control workflows.
Built by Laela Zorana ·
HuggingFace ·
Kaggle