jimnoneill
/

pubguard-classifier

Text Classification

document-classification

scientific-papers

toxicity-detection

Model card Files Files and versions

pubguard-classifier / scripts /pubguard_gate.py

jimnoneill's picture

Clean gate script — no errors module dependency

b91194e verified about 15 hours ago

history blame contribute delete

2.06 kB

	#!/usr/bin/env python3
	"""
	PubGuard gate for pipeline integration.

	Reads extracted PDF text from stdin or a file, screens it, and:
	- Prints verdict JSON to STDERR (for debugging)
	- Prints PASS/FAIL to STDERR
	- Exits 0 (pass) or 1 (fail)

	Usage:
	echo "$PDF_TEXT" \| python3 pub_check/scripts/pubguard_gate.py

	Environment variables:
	PUBGUARD_MODELS_DIR – Override models directory
	PUBGUARD_STRICT – Set to "0" to warn instead of gate (exit 0 always)
	"""

	import json
	import sys
	import os
	import logging

	logging.basicConfig(
	level=logging.WARNING,
	format="%(asctime)s \| %(levelname)s \| %(message)s",
	datefmt="%H:%M:%S",
	)

	from pubguard import PubGuard, PubGuardConfig


	def main():
	if len(sys.argv) > 1 and sys.argv[1] != "-":
	with open(sys.argv[1], errors="replace") as f:
	text = f.read()
	else:
	text = sys.stdin.read()

	if not text.strip():
	print("PUBGUARD: Empty input", file=sys.stderr)
	sys.exit(1)

	config = PubGuardConfig()
	strict = os.environ.get("PUBGUARD_STRICT", "1") != "0"

	guard = PubGuard(config=config)
	guard.initialize()
	verdict = guard.screen(text)

	print(json.dumps(verdict), file=sys.stderr)

	if verdict["pass"]:
	print("PUBGUARD: PASS", file=sys.stderr)
	sys.exit(0)
	else:
	reasons = []
	if verdict["doc_type"]["label"] != "scientific_paper":
	reasons.append(f"doc_type={verdict['doc_type']['label']}")
	if verdict["ai_generated"]["label"] == "ai_generated":
	reasons.append(f"ai_generated (score={verdict['ai_generated']['score']:.2f})")
	if verdict["toxicity"]["label"] == "toxic":
	reasons.append(f"toxic (score={verdict['toxicity']['score']:.2f})")

	print(f"PUBGUARD: FAIL — {', '.join(reasons)}", file=sys.stderr)

	if strict:
	sys.exit(1)
	else:
	print("PUBGUARD: Running in non-strict mode, continuing...", file=sys.stderr)
	sys.exit(0)


	if __name__ == "__main__":
	main()