| | |
| | """ |
| | PubGuard gate for pipeline integration. |
| | |
| | Reads extracted PDF text from stdin or a file, screens it, and: |
| | - Prints verdict JSON to STDERR (for debugging) |
| | - Prints PASS/FAIL to STDERR |
| | - Exits 0 (pass) or 1 (fail) |
| | |
| | Usage: |
| | echo "$PDF_TEXT" | python3 pub_check/scripts/pubguard_gate.py |
| | |
| | Environment variables: |
| | PUBGUARD_MODELS_DIR β Override models directory |
| | PUBGUARD_STRICT β Set to "0" to warn instead of gate (exit 0 always) |
| | """ |
| |
|
| | import json |
| | import sys |
| | import os |
| | import logging |
| |
|
| | logging.basicConfig( |
| | level=logging.WARNING, |
| | format="%(asctime)s | %(levelname)s | %(message)s", |
| | datefmt="%H:%M:%S", |
| | ) |
| |
|
| | from pubguard import PubGuard, PubGuardConfig |
| |
|
| |
|
| | def main(): |
| | if len(sys.argv) > 1 and sys.argv[1] != "-": |
| | with open(sys.argv[1], errors="replace") as f: |
| | text = f.read() |
| | else: |
| | text = sys.stdin.read() |
| |
|
| | if not text.strip(): |
| | print("PUBGUARD: Empty input", file=sys.stderr) |
| | sys.exit(1) |
| |
|
| | config = PubGuardConfig() |
| | strict = os.environ.get("PUBGUARD_STRICT", "1") != "0" |
| |
|
| | guard = PubGuard(config=config) |
| | guard.initialize() |
| | verdict = guard.screen(text) |
| |
|
| | print(json.dumps(verdict), file=sys.stderr) |
| |
|
| | if verdict["pass"]: |
| | print("PUBGUARD: PASS", file=sys.stderr) |
| | sys.exit(0) |
| | else: |
| | reasons = [] |
| | if verdict["doc_type"]["label"] != "scientific_paper": |
| | reasons.append(f"doc_type={verdict['doc_type']['label']}") |
| | if verdict["ai_generated"]["label"] == "ai_generated": |
| | reasons.append(f"ai_generated (score={verdict['ai_generated']['score']:.2f})") |
| | if verdict["toxicity"]["label"] == "toxic": |
| | reasons.append(f"toxic (score={verdict['toxicity']['score']:.2f})") |
| |
|
| | print(f"PUBGUARD: FAIL β {', '.join(reasons)}", file=sys.stderr) |
| |
|
| | if strict: |
| | sys.exit(1) |
| | else: |
| | print("PUBGUARD: Running in non-strict mode, continuing...", file=sys.stderr) |
| | sys.exit(0) |
| |
|
| |
|
| | if __name__ == "__main__": |
| | main() |
| |
|