Spaces:
Running
Running
| # app.py | |
| # Slop Detector | |
| # Gradio app | |
| # 24-02-2026 | |
| # | |
| # EVERNOTE: | |
| # https://share.evernote.com/note/0fb9b438-7842-4eff-a93f-ba0850e6ae83 | |
| # | |
| # F:\DATA SCIENCE\MIJN DATA SCIENCE PROJECTS\FAKE NEWS DETECTOR - LOCAL LLM - SIRAJ RAVAL FEB 2026\SlopShield-main\SlopShield-PYTHON\GRADIO_APP | |
| # app.py | |
| # Gradio app for automated slop detection (Hugging Face Spaces ready). | |
| # | |
| # ✅ Features: | |
| # - User can input a URL OR paste text | |
| # - Extracts main content (trafilatura preferred, BeautifulSoup fallback) | |
| # - Calls an OpenAI "mini" model (default: gpt-4o-mini) using Structured Outputs (JSON Schema) | |
| # - Displays results neatly (score, subscores, contributions, interpretation, radar chart) | |
| # - Allows downloading a Markdown (.md) report and a PDF (.pdf) report | |
| # | |
| # --- HF Spaces setup notes --- | |
| # 1) Add an environment variable in your Space: | |
| # OPENAI_API_KEY = "..." | |
| # 2) Recommended requirements.txt: | |
| # gradio | |
| # openai | |
| # requests | |
| # trafilatura | |
| # beautifulsoup4 | |
| # lxml | |
| # matplotlib | |
| # reportlab | |
| # | |
| # OpenAI docs referenced for Structured Outputs + model listing: | |
| # - Structured Outputs: https://developers.openai.com/api/docs/guides/structured-outputs/ [oai_citation:0‡OpenAI Developers](https://developers.openai.com/api/docs/guides/structured-outputs/?utm_source=chatgpt.com) | |
| # - Models (incl. gpt-4o-mini): https://developers.openai.com/api/docs/models [oai_citation:1‡OpenAI Developers](https://developers.openai.com/api/docs/models?utm_source=chatgpt.com) | |
| # - gpt-4o-mini model page: https://developers.openai.com/api/docs/models/gpt-4o-mini [oai_citation:2‡OpenAI Developers](https://developers.openai.com/api/docs/models/gpt-4o-mini?utm_source=chatgpt.com) | |
| # - Responses API: https://platform.openai.com/docs/api-reference/responses [oai_citation:3‡platform.openai.com](https://platform.openai.com/docs/api-reference/responses?utm_source=chatgpt.com) | |
| # pip install -r requirements.txt --user | |
| import os | |
| import re | |
| import json | |
| import math | |
| import time | |
| import textwrap | |
| import urllib.parse | |
| from dataclasses import dataclass | |
| from typing import Optional, Dict, Any, Tuple, List | |
| import requests | |
| import gradio as gr | |
| # Optional extraction libs | |
| try: | |
| import trafilatura | |
| except Exception: | |
| trafilatura = None | |
| try: | |
| from bs4 import BeautifulSoup | |
| except Exception: | |
| BeautifulSoup = None | |
| import matplotlib.pyplot as plt | |
| from reportlab.lib.pagesizes import letter | |
| from reportlab.lib.styles import getSampleStyleSheet | |
| from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, Preformatted | |
| from reportlab.lib.units import inch | |
| from openai import OpenAI | |
| # ----------------------------- | |
| # Config | |
| # ----------------------------- | |
| DEFAULT_MODEL = os.getenv("OPENAI_MODEL", "gpt-4o-mini") | |
| MAX_CHARS_SENT_TO_LLM = int(os.getenv("MAX_CHARS_SENT_TO_LLM", "35000")) # safety for context | |
| HTTP_TIMEOUT = int(os.getenv("HTTP_TIMEOUT", "20")) | |
| # Output dir for reports and radar chart (works on Windows and Linux) | |
| _OUTPUT_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), "slop_output") | |
| os.makedirs(_OUTPUT_DIR, exist_ok=True) | |
| # Preset URLs the user can choose from (DEV_LOG examples) | |
| DEFAULT_URL_CHOICES = [ | |
| ("Custom — enter your own URL below", ""), | |
| ("CNN Home", "https://www.cnn.com/"), | |
| ("CNN Politics", "https://www.cnn.com/politics"), | |
| ("CNN — US-Iran strike article", "https://edition.cnn.com/2026/02/19/politics/us-iran-strike-options-trump-military"), | |
| ("CNN — China AI Seedance", "https://www.cnn.com/2026/02/20/china/china-ai-seedance-intl-hnk-dst"), | |
| ("MattsWorld101 — SEO examples", "https://mattsworld101.com/examples-of-seo/"), | |
| ("Scitechtalk — Genealogy", "http://www.scitechtalk.org/UITGEBREIDE_GENEALOGIE_VAN%20_SERVAAS_BOURS/HTu1-10.html"), | |
| ("Scitechtalk — arXiv aggregator", "http://scitechtalk.org/ARXIV_AGGREGATOR/index.html"), | |
| ("arXiv — paper abs/2410.14255", "https://arxiv.org/abs/2410.14255"), | |
| ("Dumpert", "https://www.dumpert.nl/"), | |
| ("Medium — P vs NP of AI", "https://medium.com/data-and-beyond/the-p-vs-np-of-ai-why-reasoning-is-mathematically-impossible-for-a-decoder-ee440f1d27ce"), | |
| ("Medium — Creativity vector hallucination", "https://medium.com/data-and-beyond/i-extracted-a-creativity-vector-from-gpt-it-was-a-hallucination-95a033fb890a"), | |
| ("Medium — Topology of matrix multiplication", "https://medium.com/data-and-beyond/the-topology-of-matrix-multiplication-why-your-ai-is-just-folding-space-cf8e408f2c91"), | |
| ] | |
| UA = ( | |
| "Mozilla/5.0 (X11; Linux x86_64) " | |
| "AppleWebKit/537.36 (KHTML, like Gecko) " | |
| "Chrome/120.0 Safari/537.36 SlopDetector/1.0" | |
| ) | |
| # ----------------------------- | |
| # Helpers | |
| # ----------------------------- | |
| def clamp01(x: float) -> float: | |
| return max(0.0, min(1.0, float(x))) | |
| def safe_slug(s: str, max_len: int = 60) -> str: | |
| s = (s or "").strip().lower() | |
| s = re.sub(r"https?://", "", s) | |
| s = re.sub(r"[^a-z0-9]+", "-", s).strip("-") | |
| if not s: | |
| s = "slop-report" | |
| return s[:max_len].rstrip("-") | |
| def now_ts() -> str: | |
| return time.strftime("%Y%m%d-%H%M%S") | |
| def infer_title_from_text(text: str) -> str: | |
| # simple heuristic: first non-empty line (trim) | |
| for line in (text or "").splitlines(): | |
| line = line.strip() | |
| if len(line) >= 8: | |
| return line[:120] | |
| return "Untitled" | |
| def compute_interpretation(slop_score_0_100: float) -> str: | |
| # Interprets the 0–100 score; user can normalize by /100 if desired. | |
| s = slop_score_0_100 | |
| if s <= 5: | |
| band = "Extremely Low Slop" | |
| desc = "Meaning-dense, highly specific, minimal repetition/templating." | |
| elif s <= 15: | |
| band = "Very Low Slop" | |
| desc = "High information density; only mild stylistic templates." | |
| elif s <= 30: | |
| band = "Low Slop" | |
| desc = "Mostly meaning-driven, with some rhetorical repetition or structure." | |
| elif s <= 45: | |
| band = "Mild–Moderate Slop" | |
| desc = "Noticeable templating and/or generic framing; still contains substance." | |
| elif s <= 60: | |
| band = "Moderate Slop" | |
| desc = "Substantial filler/templating; reduced specificity; repetition noticeable." | |
| elif s <= 75: | |
| band = "High Slop" | |
| desc = "Strong low-value signals: repetition, template voice, low specificity." | |
| elif s <= 90: | |
| band = "Very High Slop" | |
| desc = "Predominantly template/filler; weak grounding; attention/SEO patterns likely." | |
| else: | |
| band = "Extreme Slop" | |
| desc = "Near-pure filler or spam-like content; minimal meaningful information." | |
| return f"**{band}** — {desc}" | |
| def weighted_contributions(result: Dict[str, Any]) -> Dict[str, float]: | |
| # Uses the canonical weights from your spec. | |
| info_density = clamp01(result.get("info_density", 0.0)) | |
| redundancy = clamp01(result.get("redundancy", 0.0)) | |
| template = clamp01(result.get("template_markers", 0.0)) | |
| incoherence = clamp01(result.get("incoherence", 0.0)) | |
| monetization = clamp01(result.get("monetization", 0.0)) | |
| contrib = { | |
| "info_density_deficit": 0.30 * (1.0 - info_density), | |
| "redundancy": 0.30 * redundancy, | |
| "template_markers": 0.20 * template, | |
| "incoherence": 0.10 * incoherence, | |
| "monetization": 0.10 * monetization, | |
| } | |
| # normalized sum should equal slop (0..1) if model followed formula | |
| contrib["slop_normalized_sum"] = sum(contrib.values()) | |
| contrib["slop_score_0_100_sum"] = 100.0 * contrib["slop_normalized_sum"] | |
| return contrib | |
| def make_radar_chart(subscores: Dict[str, float], out_path: str) -> str: | |
| labels = ["info_density", "redundancy", "template_markers", "incoherence", "monetization"] | |
| values = [clamp01(subscores.get(k, 0.0)) for k in labels] | |
| # Radar chart setup | |
| angles = [n / float(len(labels)) * 2 * math.pi for n in range(len(labels))] | |
| angles += angles[:1] | |
| vals = values + values[:1] | |
| plt.figure(figsize=(6, 6)) | |
| ax = plt.subplot(111, polar=True) | |
| ax.set_theta_offset(math.pi / 2) | |
| ax.set_theta_direction(-1) | |
| plt.xticks(angles[:-1], labels) | |
| ax.set_rlabel_position(0) | |
| plt.yticks([0.25, 0.5, 0.75], ["0.25", "0.50", "0.75"], alpha=0.7) | |
| plt.ylim(0, 1) | |
| # Do not set explicit colors (per system guidance) | |
| ax.plot(angles, vals, linewidth=2) | |
| ax.fill(angles, vals, alpha=0.15) | |
| plt.title("Subscores Radar (0–1)", y=1.08) | |
| plt.tight_layout() | |
| plt.savefig(out_path, dpi=160) | |
| plt.close() | |
| return out_path | |
| # ----------------------------- | |
| # Webpage extraction | |
| # ----------------------------- | |
| def normalize_url(url: str) -> str: | |
| """Ensure URL has a scheme (default https://).""" | |
| url = (url or "").strip() | |
| if not url: | |
| return url | |
| if not url.startswith(("http://", "https://")): | |
| url = "https://" + url | |
| return url | |
| def fetch_url(url: str) -> Tuple[str, str]: | |
| """Return (final_url, html).""" | |
| url = normalize_url(url) | |
| headers = {"User-Agent": UA} | |
| resp = requests.get(url, headers=headers, timeout=HTTP_TIMEOUT, allow_redirects=True) | |
| resp.raise_for_status() | |
| final_url = resp.url | |
| html = resp.text | |
| return final_url, html | |
| def extract_main_text(url: str) -> Tuple[str, str, str]: | |
| """ | |
| Returns (final_url, extracted_text, extraction_method). | |
| """ | |
| url = normalize_url(url) | |
| final_url, html = fetch_url(url) | |
| if trafilatura is not None: | |
| try: | |
| downloaded = trafilatura.extract( | |
| html, | |
| include_comments=False, | |
| include_tables=False, | |
| include_formatting=False, | |
| url=final_url, | |
| ) | |
| if downloaded and len(downloaded.strip()) > 200: | |
| return final_url, downloaded.strip(), "trafilatura" | |
| except Exception: | |
| pass | |
| # Fallback: BeautifulSoup get_text | |
| if BeautifulSoup is not None: | |
| soup = BeautifulSoup(html, "lxml") if "lxml" in globals() else BeautifulSoup(html, "html.parser") | |
| # Remove scripts/styles | |
| for tag in soup(["script", "style", "noscript"]): | |
| tag.decompose() | |
| text = soup.get_text("\n") | |
| # Normalize whitespace | |
| lines = [ln.strip() for ln in text.splitlines()] | |
| lines = [ln for ln in lines if ln] | |
| cleaned = "\n".join(lines) | |
| cleaned = re.sub(r"\n{3,}", "\n\n", cleaned).strip() | |
| return final_url, cleaned, "beautifulsoup_fallback" | |
| # Last resort: raw html stripped | |
| stripped = re.sub(r"<[^>]+>", " ", html) | |
| stripped = re.sub(r"\s+", " ", stripped).strip() | |
| return final_url, stripped, "regex_fallback" | |
| # ----------------------------- | |
| # OpenAI call (Structured Outputs JSON Schema) | |
| # ----------------------------- | |
| SLOP_SCHEMA = { | |
| "name": "slop_score_output", | |
| "schema": { | |
| "type": "object", | |
| "additionalProperties": False, | |
| "properties": { | |
| "info_density": {"type": "number"}, | |
| "redundancy": {"type": "number"}, | |
| "template_markers": {"type": "number"}, | |
| "incoherence": {"type": "number"}, | |
| "monetization": {"type": "number"}, | |
| "slop_score": {"type": "number"}, | |
| "top_contributing_factors": { | |
| "type": "array", | |
| "items": {"type": "string"}, | |
| "minItems": 1, | |
| }, | |
| "confidence": {"type": "number"}, | |
| }, | |
| "required": [ | |
| "info_density", | |
| "redundancy", | |
| "template_markers", | |
| "incoherence", | |
| "monetization", | |
| "slop_score", | |
| "top_contributing_factors", | |
| "confidence", | |
| ], | |
| }, | |
| "strict": True, | |
| } | |
| def build_prompt(url: str, text: str) -> str: | |
| # Your prompt, adapted to accept either URL or pasted text. | |
| # We do NOT ask the model to add interpretation outside JSON; the app does that deterministically. | |
| return f""" | |
| You are given extracted main text from a webpage. | |
| WEBPAGE: | |
| {url if url else ""} | |
| TEXT: | |
| Read the text from webpage: | |
| {url if url else "(user-provided text)"} | |
| MAIN_TEXT: | |
| \"\"\" | |
| {text} | |
| \"\"\" | |
| Goal: | |
| Estimate Sloppiness (0–100). | |
| Definition: | |
| Sloppiness = degree to which text is low-information, generic, repetitive, templated, incoherent, or monetization-optimized rather than meaning-dense. | |
| Constraints: | |
| - Evaluate only intrinsic writing properties. | |
| - Ignore topic, politics, and site type. | |
| - Do not speculate beyond text evidence. | |
| Step 1 — Produce normalized subscores (0–1): | |
| - info_density: 1 = high specificity, 0 = generic. | |
| - redundancy: 1 = heavy repetition. | |
| - template_markers: 1 = strongly templated. | |
| - incoherence: 1 = incoherent. | |
| - monetization: 1 = heavy monetization cues. | |
| Step 2 — Compute score: | |
| slop_score = 100 * ( | |
| 0.30 * (1 - info_density) + | |
| 0.30 * redundancy + | |
| 0.20 * template_markers + | |
| 0.10 * incoherence + | |
| 0.10 * monetization | |
| ) | |
| Step 3 — Output ONLY valid JSON matching the provided schema. | |
| """.strip() | |
| def call_openai_slop(api_key: str, model: str, url: str, text: str, temperature: float) -> Dict[str, Any]: | |
| api_key = (api_key or "").strip() | |
| if not api_key: | |
| raise RuntimeError("Please enter your OpenAI API key above before running analysis.") | |
| client = OpenAI(api_key=api_key) | |
| # Trim text for safety | |
| trimmed = text[:MAX_CHARS_SENT_TO_LLM] | |
| prompt = build_prompt(url=url, text=trimmed) | |
| # Chat Completions API with Structured Outputs (JSON Schema) | |
| resp = client.chat.completions.create( | |
| model=model, | |
| messages=[ | |
| {"role": "system", "content": "You are a careful evaluator. Follow the schema exactly."}, | |
| {"role": "user", "content": prompt}, | |
| ], | |
| temperature=temperature, | |
| response_format={"type": "json_schema", "json_schema": SLOP_SCHEMA}, | |
| ) | |
| raw = (resp.choices[0].message.content or "").strip() | |
| if not raw: | |
| raise RuntimeError("Model returned empty content.") | |
| try: | |
| data = json.loads(raw) | |
| except Exception as e: | |
| raise RuntimeError(f"Model returned non-JSON or malformed JSON. Raw output:\n{raw}") from e | |
| # Clamp and sanity-check | |
| for k in ["info_density", "redundancy", "template_markers", "incoherence", "monetization", "confidence"]: | |
| data[k] = clamp01(data.get(k, 0.0)) | |
| # slop_score should be 0..100 | |
| data["slop_score"] = float(data.get("slop_score", 0.0)) | |
| data["slop_score"] = max(0.0, min(100.0, data["slop_score"])) | |
| # Ensure list exists | |
| if not isinstance(data.get("top_contributing_factors"), list): | |
| data["top_contributing_factors"] = [] | |
| return data | |
| # ----------------------------- | |
| # Report generation (MD + PDF) | |
| # ----------------------------- | |
| def format_report_markdown( | |
| url: str, | |
| title: str, | |
| extraction_method: str, | |
| text_preview: str, | |
| result: Dict[str, Any], | |
| ) -> str: | |
| contrib = weighted_contributions(result) | |
| slop = result["slop_score"] | |
| interp = compute_interpretation(slop) | |
| normalized = slop / 100.0 | |
| md = [] | |
| md.append(f"# Slop Detection Report") | |
| md.append("") | |
| md.append(f"- **Title (heuristic):** {title}") | |
| md.append(f"- **URL:** {url if url else '(user-provided text)'}") | |
| md.append(f"- **Extraction method:** {extraction_method}") | |
| md.append(f"- **Generated at:** {time.strftime('%Y-%m-%d %H:%M:%S')}") | |
| md.append("") | |
| md.append("## Overall Score") | |
| md.append("") | |
| md.append(f"- **slop_score (0–100):** {slop:.1f}") | |
| md.append(f"- **slop (0–1):** {normalized:.3f}") | |
| md.append(f"- **confidence (0–1):** {result.get('confidence', 0.0):.2f}") | |
| md.append("") | |
| md.append("### Interpretation") | |
| md.append("") | |
| md.append(interp) | |
| md.append("") | |
| md.append("## Subscores (0–1)") | |
| md.append("") | |
| md.append("| Subscore | Value |") | |
| md.append("|---|---:|") | |
| md.append(f"| info_density | {result['info_density']:.2f} |") | |
| md.append(f"| redundancy | {result['redundancy']:.2f} |") | |
| md.append(f"| template_markers | {result['template_markers']:.2f} |") | |
| md.append(f"| incoherence | {result['incoherence']:.2f} |") | |
| md.append(f"| monetization | {result['monetization']:.2f} |") | |
| md.append("") | |
| md.append("## Weighted Contribution Breakdown (normalized)") | |
| md.append("") | |
| md.append("| Term | Weight Contribution | Share |") | |
| md.append("|---|---:|---:|") | |
| total = contrib["slop_normalized_sum"] if contrib["slop_normalized_sum"] > 0 else 1.0 | |
| for key in ["info_density_deficit", "redundancy", "template_markers", "incoherence", "monetization"]: | |
| val = contrib[key] | |
| share = val / total | |
| md.append(f"| {key} | {val:.4f} | {share:.1%} |") | |
| md.append("") | |
| md.append("## Top Contributing Factors (model)") | |
| md.append("") | |
| for f in result.get("top_contributing_factors", [])[:10]: | |
| md.append(f"- {f}") | |
| md.append("") | |
| md.append("## Raw JSON Output (model)") | |
| md.append("") | |
| md.append("```json") | |
| md.append(json.dumps(result, ensure_ascii=False, indent=2)) | |
| md.append("```") | |
| md.append("") | |
| md.append("## Text Preview (first ~1200 chars after extraction)") | |
| md.append("") | |
| md.append("```") | |
| md.append(text_preview) | |
| md.append("```") | |
| md.append("") | |
| return "\n".join(md) | |
| def save_markdown(md_text: str, base_slug: str) -> str: | |
| path = os.path.join(_OUTPUT_DIR, f"slop_report_{base_slug}_{now_ts()}.md") | |
| with open(path, "w", encoding="utf-8") as f: | |
| f.write(md_text) | |
| return path | |
| def save_pdf(md_text: str, base_slug: str) -> str: | |
| path = os.path.join(_OUTPUT_DIR, f"slop_report_{base_slug}_{now_ts()}.pdf") | |
| doc = SimpleDocTemplate(path, pagesize=letter, rightMargin=54, leftMargin=54, topMargin=54, bottomMargin=54) | |
| styles = getSampleStyleSheet() | |
| story = [] | |
| # Convert markdown-ish to simple paragraphs | |
| # Keep it robust: strip heavy markdown and preserve code blocks as Preformatted. | |
| lines = md_text.splitlines() | |
| in_code = False | |
| code_buf = [] | |
| def flush_code(): | |
| nonlocal code_buf | |
| if code_buf: | |
| story.append(Preformatted("\n".join(code_buf), styles["Code"])) | |
| story.append(Spacer(1, 0.15 * inch)) | |
| code_buf = [] | |
| for ln in lines: | |
| if ln.strip().startswith("```"): | |
| if not in_code: | |
| in_code = True | |
| code_buf = [] | |
| else: | |
| in_code = False | |
| flush_code() | |
| continue | |
| if in_code: | |
| code_buf.append(ln.rstrip("\n")) | |
| continue | |
| # headings | |
| if ln.startswith("# "): | |
| story.append(Paragraph(ln[2:].strip(), styles["Title"])) | |
| story.append(Spacer(1, 0.15 * inch)) | |
| elif ln.startswith("## "): | |
| story.append(Paragraph(ln[3:].strip(), styles["Heading2"])) | |
| story.append(Spacer(1, 0.10 * inch)) | |
| elif ln.startswith("### "): | |
| story.append(Paragraph(ln[4:].strip(), styles["Heading3"])) | |
| story.append(Spacer(1, 0.08 * inch)) | |
| elif ln.strip().startswith("- "): | |
| story.append(Paragraph("• " + ln.strip()[2:], styles["BodyText"])) | |
| elif ln.strip() == "": | |
| story.append(Spacer(1, 0.08 * inch)) | |
| else: | |
| # light markdown bold -> remove ** for PDF | |
| clean = ln.replace("**", "") | |
| story.append(Paragraph(clean, styles["BodyText"])) | |
| if in_code: | |
| flush_code() | |
| doc.build(story) | |
| return path | |
| # ----------------------------- | |
| # Gradio pipeline | |
| # ----------------------------- | |
| class AnalysisInputs: | |
| api_key: str | |
| url: str | |
| pasted_text: str | |
| model: str | |
| temperature: float | |
| def analyze(inputs: AnalysisInputs) -> Tuple[str, Dict[str, Any], str, str, str]: | |
| url = (inputs.url or "").strip() | |
| pasted_text = (inputs.pasted_text or "").strip() | |
| if not url and not pasted_text: | |
| raise ValueError("Please provide either a URL or paste text to analyze.") | |
| extraction_method = "user_text" | |
| final_url = normalize_url(url) if url else "" | |
| text = pasted_text | |
| if url and not pasted_text: | |
| final_url, text, extraction_method = extract_main_text(url) | |
| # Basic title heuristic | |
| title = infer_title_from_text(text) | |
| base_slug = safe_slug(final_url or title) | |
| # Make a preview | |
| preview = text[:1200].strip() | |
| if len(text) > 1200: | |
| preview += "\n\n…(truncated preview)…" | |
| # Call OpenAI (API key from user input) | |
| result = call_openai_slop( | |
| api_key=inputs.api_key or "", | |
| model=inputs.model or DEFAULT_MODEL, | |
| url=final_url, | |
| text=text, | |
| temperature=float(inputs.temperature), | |
| ) | |
| # Build UI markdown summary | |
| interp = compute_interpretation(result["slop_score"]) | |
| normalized = result["slop_score"] / 100.0 | |
| contrib = weighted_contributions(result) | |
| summary_md = f""" | |
| ## Results | |
| **slop_score (0–100):** `{result["slop_score"]:.1f}` | |
| **slop (0–1):** `{normalized:.3f}` | |
| **confidence (0–1):** `{result.get("confidence", 0.0):.2f}` | |
| ### Interpretation | |
| {interp} | |
| ### Subscores (0–1) | |
| - info_density: `{result["info_density"]:.2f}` | |
| - redundancy: `{result["redundancy"]:.2f}` | |
| - template_markers: `{result["template_markers"]:.2f}` | |
| - incoherence: `{result["incoherence"]:.2f}` | |
| - monetization: `{result["monetization"]:.2f}` | |
| ### Dominant contributors (weighted shares) | |
| - redundancy: `{(contrib["redundancy"]/contrib["slop_normalized_sum"] if contrib["slop_normalized_sum"] else 0):.1%}` | |
| - template_markers: `{(contrib["template_markers"]/contrib["slop_normalized_sum"] if contrib["slop_normalized_sum"] else 0):.1%}` | |
| - info_density_deficit: `{(contrib["info_density_deficit"]/contrib["slop_normalized_sum"] if contrib["slop_normalized_sum"] else 0):.1%}` | |
| - incoherence: `{(contrib["incoherence"]/contrib["slop_normalized_sum"] if contrib["slop_normalized_sum"] else 0):.1%}` | |
| - monetization: `{(contrib["monetization"]/contrib["slop_normalized_sum"] if contrib["slop_normalized_sum"] else 0):.1%}` | |
| ### Top contributing factors (model) | |
| {chr(10).join([f"- {x}" for x in result.get("top_contributing_factors", [])[:8]]) if result.get("top_contributing_factors") else "- (none provided)"} | |
| ### Extraction preview | |
| <details> | |
| <summary>Show extracted text preview</summary> | |
| {preview} | |
| </details> | |
| """.strip() | |
| # Radar chart | |
| radar_path = os.path.join(_OUTPUT_DIR, f"radar_{base_slug}_{now_ts()}.png") | |
| make_radar_chart( | |
| { | |
| "info_density": result["info_density"], | |
| "redundancy": result["redundancy"], | |
| "template_markers": result["template_markers"], | |
| "incoherence": result["incoherence"], | |
| "monetization": result["monetization"], | |
| }, | |
| radar_path, | |
| ) | |
| # Reports | |
| report_md = format_report_markdown( | |
| url=final_url, | |
| title=title, | |
| extraction_method=extraction_method, | |
| text_preview=preview, | |
| result=result, | |
| ) | |
| md_path = save_markdown(report_md, base_slug) | |
| pdf_path = save_pdf(report_md, base_slug) | |
| return summary_md, result, radar_path, md_path, pdf_path | |
| # ----------------------------- | |
| # Gradio UI | |
| # ----------------------------- | |
| def run_analysis(api_key: str, url: str, pasted_text: str, model: str, temperature: float): | |
| inputs = AnalysisInputs(api_key=api_key, url=url, pasted_text=pasted_text, model=model, temperature=temperature) | |
| return analyze(inputs) | |
| with gr.Blocks(title="Automated Slop Detection") as demo: | |
| gr.Markdown( | |
| "# Automated Slop Detection\n" | |
| "Analyze a webpage (URL) or pasted text and estimate **Sloppiness** with subscores.\n\n" | |
| "**Tip:** For best results, analyze a single article page (not a homepage/feed)." | |
| ) | |
| api_key_in = gr.Textbox( | |
| label="OpenAI API Key (required)", | |
| type="password", | |
| placeholder="sk-...", | |
| info="Enter your OpenAI API key to run analysis. It is not stored.", | |
| ) | |
| url_preset_in = gr.Dropdown( | |
| label="Choose a preset URL (or Custom to enter your own)", | |
| choices=[(label, url) for label, url in DEFAULT_URL_CHOICES], | |
| value="", | |
| allow_custom_value=False, | |
| ) | |
| url_in = gr.Textbox( | |
| label="URL (optional — used when preset is Custom)", | |
| value="", | |
| placeholder="https://example.com/article", | |
| lines=1, | |
| ) | |
| text_in = gr.Textbox( | |
| label="Paste text (optional)", | |
| placeholder="Paste extracted main text here (leave URL empty if using pasted text).", | |
| lines=10, | |
| ) | |
| with gr.Row(): | |
| model_in = gr.Textbox(label="OpenAI model", value=DEFAULT_MODEL) | |
| temp_in = gr.Slider(label="Temperature", minimum=0.0, maximum=1.0, value=0.0, step=0.05, info="Set to 0 for stable, deterministic results.") | |
| analyze_btn = gr.Button("Analyze", variant="primary") | |
| gr.Markdown("---") | |
| out_md = gr.Markdown(label="Summary") | |
| out_json = gr.JSON(label="Model JSON output (schema)") | |
| out_plot = gr.Image(label="Subscores radar chart", type="filepath") | |
| with gr.Row(): | |
| out_md_file = gr.File(label="Download Markdown report (.md)") | |
| out_pdf_file = gr.File(label="Download PDF report (.pdf)") | |
| def _on_click(api_key, url_preset, url_custom, text, model, temp): | |
| url = (url_preset or "").strip() or (url_custom or "").strip() | |
| summary_md, result_json, radar_path, md_path, pdf_path = run_analysis(api_key, url, text, model, temp) | |
| return summary_md, result_json, radar_path, md_path, pdf_path | |
| analyze_btn.click( | |
| _on_click, | |
| inputs=[api_key_in, url_preset_in, url_in, text_in, model_in, temp_in], | |
| outputs=[out_md, out_json, out_plot, out_md_file, out_pdf_file], | |
| ) | |
| gr.Markdown( | |
| "### Notes\n" | |
| "- **slop_score (0–100)** is the scaled score. Divide by 100 for normalized slop in **[0,1]**.\n" | |
| "- The app generates its own interpretation from slop_score bands to keep the model output strictly JSON.\n" | |
| "- OpenAI usage and billing: [platform.openai.com/usage](https://platform.openai.com/usage)\n" | |
| ) | |
| if __name__ == "__main__": | |
| demo.launch() | |
| # python app.py | |
| # ========================================================================================= | |