File size: 2,063 Bytes
5dbd484 b91194e 5dbd484 b91194e 5dbd484 b91194e 5dbd484 b91194e 5dbd484 b91194e 5dbd484 b91194e 5dbd484 b91194e 5dbd484 b91194e 5dbd484 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 |
#!/usr/bin/env python3
"""
PubGuard gate for pipeline integration.
Reads extracted PDF text from stdin or a file, screens it, and:
- Prints verdict JSON to STDERR (for debugging)
- Prints PASS/FAIL to STDERR
- Exits 0 (pass) or 1 (fail)
Usage:
echo "$PDF_TEXT" | python3 pub_check/scripts/pubguard_gate.py
Environment variables:
PUBGUARD_MODELS_DIR – Override models directory
PUBGUARD_STRICT – Set to "0" to warn instead of gate (exit 0 always)
"""
import json
import sys
import os
import logging
logging.basicConfig(
level=logging.WARNING,
format="%(asctime)s | %(levelname)s | %(message)s",
datefmt="%H:%M:%S",
)
from pubguard import PubGuard, PubGuardConfig
def main():
if len(sys.argv) > 1 and sys.argv[1] != "-":
with open(sys.argv[1], errors="replace") as f:
text = f.read()
else:
text = sys.stdin.read()
if not text.strip():
print("PUBGUARD: Empty input", file=sys.stderr)
sys.exit(1)
config = PubGuardConfig()
strict = os.environ.get("PUBGUARD_STRICT", "1") != "0"
guard = PubGuard(config=config)
guard.initialize()
verdict = guard.screen(text)
print(json.dumps(verdict), file=sys.stderr)
if verdict["pass"]:
print("PUBGUARD: PASS", file=sys.stderr)
sys.exit(0)
else:
reasons = []
if verdict["doc_type"]["label"] != "scientific_paper":
reasons.append(f"doc_type={verdict['doc_type']['label']}")
if verdict["ai_generated"]["label"] == "ai_generated":
reasons.append(f"ai_generated (score={verdict['ai_generated']['score']:.2f})")
if verdict["toxicity"]["label"] == "toxic":
reasons.append(f"toxic (score={verdict['toxicity']['score']:.2f})")
print(f"PUBGUARD: FAIL — {', '.join(reasons)}", file=sys.stderr)
if strict:
sys.exit(1)
else:
print("PUBGUARD: Running in non-strict mode, continuing...", file=sys.stderr)
sys.exit(0)
if __name__ == "__main__":
main()
|