|
|
|
|
|
""" |
|
|
PubGuard gate for pipeline integration. |
|
|
|
|
|
Reads extracted PDF text from stdin or a file, screens it, and: |
|
|
- Prints verdict JSON to STDERR (for debugging) |
|
|
- Prints PASS/FAIL to STDERR |
|
|
- Exits 0 (pass) or 1 (fail) |
|
|
|
|
|
Usage: |
|
|
echo "$PDF_TEXT" | python3 pub_check/scripts/pubguard_gate.py |
|
|
|
|
|
Environment variables: |
|
|
PUBGUARD_MODELS_DIR β Override models directory |
|
|
PUBGUARD_STRICT β Set to "0" to warn instead of gate (exit 0 always) |
|
|
""" |
|
|
|
|
|
import json |
|
|
import sys |
|
|
import os |
|
|
import logging |
|
|
|
|
|
logging.basicConfig( |
|
|
level=logging.WARNING, |
|
|
format="%(asctime)s | %(levelname)s | %(message)s", |
|
|
datefmt="%H:%M:%S", |
|
|
) |
|
|
|
|
|
from pubguard import PubGuard, PubGuardConfig |
|
|
|
|
|
|
|
|
def main(): |
|
|
if len(sys.argv) > 1 and sys.argv[1] != "-": |
|
|
with open(sys.argv[1], errors="replace") as f: |
|
|
text = f.read() |
|
|
else: |
|
|
text = sys.stdin.read() |
|
|
|
|
|
if not text.strip(): |
|
|
print("PUBGUARD: Empty input", file=sys.stderr) |
|
|
sys.exit(1) |
|
|
|
|
|
config = PubGuardConfig() |
|
|
strict = os.environ.get("PUBGUARD_STRICT", "1") != "0" |
|
|
|
|
|
guard = PubGuard(config=config) |
|
|
guard.initialize() |
|
|
verdict = guard.screen(text) |
|
|
|
|
|
print(json.dumps(verdict), file=sys.stderr) |
|
|
|
|
|
if verdict["pass"]: |
|
|
print("PUBGUARD: PASS", file=sys.stderr) |
|
|
sys.exit(0) |
|
|
else: |
|
|
reasons = [] |
|
|
if verdict["doc_type"]["label"] != "scientific_paper": |
|
|
reasons.append(f"doc_type={verdict['doc_type']['label']}") |
|
|
if verdict["ai_generated"]["label"] == "ai_generated": |
|
|
reasons.append(f"ai_generated (score={verdict['ai_generated']['score']:.2f})") |
|
|
if verdict["toxicity"]["label"] == "toxic": |
|
|
reasons.append(f"toxic (score={verdict['toxicity']['score']:.2f})") |
|
|
|
|
|
print(f"PUBGUARD: FAIL β {', '.join(reasons)}", file=sys.stderr) |
|
|
|
|
|
if strict: |
|
|
sys.exit(1) |
|
|
else: |
|
|
print("PUBGUARD: Running in non-strict mode, continuing...", file=sys.stderr) |
|
|
sys.exit(0) |
|
|
|
|
|
|
|
|
if __name__ == "__main__": |
|
|
main() |
|
|
|