jimnoneill
/

pubguard-classifier

@@ -1,21 +1,14 @@
 #!/usr/bin/env python3
 """
-PubGuard gate for run_pubverse_pipeline.sh integration.
 Reads extracted PDF text from stdin or a file, screens it, and:
-  - Prints the error code string to STDOUT (always, for pipeline capture)
-  - Prints verdict JSON and diagnostics to STDERR
-  - Exits 0 (pass) → pipeline continues
-  - Exits 1 (fail) → pipeline halts with error code
-Error code format:
-    PV-0[doc_type][ai_detect][toxicity] | NAME | snarky message
-    PV-0000 = scientific_paper + human + clean = PASS
-Usage in run_pubverse_pipeline.sh:
-    PUBGUARD_RESULT=$(echo "$PDF_TEXT" | python3 pub_check/scripts/pubguard_gate.py)
-    PUBGUARD_EXIT=$?
-    echo "$PUBGUARD_RESULT"   # Error code line on stdout
 Environment variables:
     PUBGUARD_MODELS_DIR  – Override models directory
@@ -34,15 +27,9 @@ logging.basicConfig(
 )
 from pubguard import PubGuard, PubGuardConfig
-from pubguard.errors import (
-    build_pubguard_error,
-    empty_input_error,
-    gate_bypassed,
-)
 def main():
-    # Read text from stdin or file argument
     if len(sys.argv) > 1 and sys.argv[1] != "-":
         with open(sys.argv[1], errors="replace") as f:
             text = f.read()
@@ -50,46 +37,36 @@ def main():
         text = sys.stdin.read()
     if not text.strip():
-        err = empty_input_error()
-        print(str(err))  # stdout: error code line
-        print(json.dumps(err.to_dict()), file=sys.stderr)
         sys.exit(1)
-    # Configure
     config = PubGuardConfig()
     strict = os.environ.get("PUBGUARD_STRICT", "1") != "0"
-    # Screen
     guard = PubGuard(config=config)
     guard.initialize()
     verdict = guard.screen(text)
-    # Build structured error code from verdict
-    err = build_pubguard_error(verdict)
-    # STDOUT: always print the error code line (pipeline captures this)
-    print(str(err))
-    # STDERR: full verdict JSON for debugging
     print(json.dumps(verdict), file=sys.stderr)
-    # Gate decision
     if verdict["pass"]:
-        print(f"PUBGUARD: PASS ({err.code})", file=sys.stderr)
         sys.exit(0)
     else:
-        print(f"PUBGUARD: FAIL ({err.code})", file=sys.stderr)
-        if strict and err.fatal:
             sys.exit(1)
-        elif not strict:
-            bypass = gate_bypassed()
-            print(str(bypass))  # Also print bypass code to stdout
-            print(f"PUBGUARD: {bypass.message}", file=sys.stderr)
-            sys.exit(0)
         else:
-            # Non-fatal flag (e.g. AI detection, toxicity) — warn but pass
-            print(f"PUBGUARD: WARNING (non-fatal flag, proceeding)", file=sys.stderr)
             sys.exit(0)

 #!/usr/bin/env python3
 """
+PubGuard gate for pipeline integration.
 Reads extracted PDF text from stdin or a file, screens it, and:
+  - Prints verdict JSON to STDERR (for debugging)
+  - Prints PASS/FAIL to STDERR
+  - Exits 0 (pass) or 1 (fail)
+Usage:
+    echo "$PDF_TEXT" | python3 pub_check/scripts/pubguard_gate.py
 Environment variables:
     PUBGUARD_MODELS_DIR  – Override models directory
 )
 from pubguard import PubGuard, PubGuardConfig
 def main():
     if len(sys.argv) > 1 and sys.argv[1] != "-":
         with open(sys.argv[1], errors="replace") as f:
             text = f.read()
         text = sys.stdin.read()
     if not text.strip():
+        print("PUBGUARD: Empty input", file=sys.stderr)
         sys.exit(1)
     config = PubGuardConfig()
     strict = os.environ.get("PUBGUARD_STRICT", "1") != "0"
     guard = PubGuard(config=config)
     guard.initialize()
     verdict = guard.screen(text)
     print(json.dumps(verdict), file=sys.stderr)
     if verdict["pass"]:
+        print("PUBGUARD: PASS", file=sys.stderr)
         sys.exit(0)
     else:
+        reasons = []
+        if verdict["doc_type"]["label"] != "scientific_paper":
+            reasons.append(f"doc_type={verdict['doc_type']['label']}")
+        if verdict["ai_generated"]["label"] == "ai_generated":
+            reasons.append(f"ai_generated (score={verdict['ai_generated']['score']:.2f})")
+        if verdict["toxicity"]["label"] == "toxic":
+            reasons.append(f"toxic (score={verdict['toxicity']['score']:.2f})")
+        print(f"PUBGUARD: FAIL — {', '.join(reasons)}", file=sys.stderr)
+        if strict:
             sys.exit(1)
         else:
+            print("PUBGUARD: Running in non-strict mode, continuing...", file=sys.stderr)
             sys.exit(0)