jimnoneill commited on
Commit
b91194e
·
verified ·
1 Parent(s): 863b047

Clean gate script — no errors module dependency

Browse files
Files changed (1) hide show
  1. scripts/pubguard_gate.py +19 -42
scripts/pubguard_gate.py CHANGED
@@ -1,21 +1,14 @@
1
  #!/usr/bin/env python3
2
  """
3
- PubGuard gate for run_pubverse_pipeline.sh integration.
4
 
5
  Reads extracted PDF text from stdin or a file, screens it, and:
6
- - Prints the error code string to STDOUT (always, for pipeline capture)
7
- - Prints verdict JSON and diagnostics to STDERR
8
- - Exits 0 (pass) pipeline continues
9
- - Exits 1 (fail) → pipeline halts with error code
10
 
11
- Error code format:
12
- PV-0[doc_type][ai_detect][toxicity] | NAME | snarky message
13
- PV-0000 = scientific_paper + human + clean = PASS
14
-
15
- Usage in run_pubverse_pipeline.sh:
16
- PUBGUARD_RESULT=$(echo "$PDF_TEXT" | python3 pub_check/scripts/pubguard_gate.py)
17
- PUBGUARD_EXIT=$?
18
- echo "$PUBGUARD_RESULT" # Error code line on stdout
19
 
20
  Environment variables:
21
  PUBGUARD_MODELS_DIR – Override models directory
@@ -34,15 +27,9 @@ logging.basicConfig(
34
  )
35
 
36
  from pubguard import PubGuard, PubGuardConfig
37
- from pubguard.errors import (
38
- build_pubguard_error,
39
- empty_input_error,
40
- gate_bypassed,
41
- )
42
 
43
 
44
  def main():
45
- # Read text from stdin or file argument
46
  if len(sys.argv) > 1 and sys.argv[1] != "-":
47
  with open(sys.argv[1], errors="replace") as f:
48
  text = f.read()
@@ -50,46 +37,36 @@ def main():
50
  text = sys.stdin.read()
51
 
52
  if not text.strip():
53
- err = empty_input_error()
54
- print(str(err)) # stdout: error code line
55
- print(json.dumps(err.to_dict()), file=sys.stderr)
56
  sys.exit(1)
57
 
58
- # Configure
59
  config = PubGuardConfig()
60
  strict = os.environ.get("PUBGUARD_STRICT", "1") != "0"
61
 
62
- # Screen
63
  guard = PubGuard(config=config)
64
  guard.initialize()
65
  verdict = guard.screen(text)
66
 
67
- # Build structured error code from verdict
68
- err = build_pubguard_error(verdict)
69
-
70
- # STDOUT: always print the error code line (pipeline captures this)
71
- print(str(err))
72
-
73
- # STDERR: full verdict JSON for debugging
74
  print(json.dumps(verdict), file=sys.stderr)
75
 
76
- # Gate decision
77
  if verdict["pass"]:
78
- print(f"PUBGUARD: PASS ({err.code})", file=sys.stderr)
79
  sys.exit(0)
80
  else:
81
- print(f"PUBGUARD: FAIL ({err.code})", file=sys.stderr)
 
 
 
 
 
 
 
 
82
 
83
- if strict and err.fatal:
84
  sys.exit(1)
85
- elif not strict:
86
- bypass = gate_bypassed()
87
- print(str(bypass)) # Also print bypass code to stdout
88
- print(f"PUBGUARD: {bypass.message}", file=sys.stderr)
89
- sys.exit(0)
90
  else:
91
- # Non-fatal flag (e.g. AI detection, toxicity) — warn but pass
92
- print(f"PUBGUARD: WARNING (non-fatal flag, proceeding)", file=sys.stderr)
93
  sys.exit(0)
94
 
95
 
 
1
  #!/usr/bin/env python3
2
  """
3
+ PubGuard gate for pipeline integration.
4
 
5
  Reads extracted PDF text from stdin or a file, screens it, and:
6
+ - Prints verdict JSON to STDERR (for debugging)
7
+ - Prints PASS/FAIL to STDERR
8
+ - Exits 0 (pass) or 1 (fail)
 
9
 
10
+ Usage:
11
+ echo "$PDF_TEXT" | python3 pub_check/scripts/pubguard_gate.py
 
 
 
 
 
 
12
 
13
  Environment variables:
14
  PUBGUARD_MODELS_DIR – Override models directory
 
27
  )
28
 
29
  from pubguard import PubGuard, PubGuardConfig
 
 
 
 
 
30
 
31
 
32
  def main():
 
33
  if len(sys.argv) > 1 and sys.argv[1] != "-":
34
  with open(sys.argv[1], errors="replace") as f:
35
  text = f.read()
 
37
  text = sys.stdin.read()
38
 
39
  if not text.strip():
40
+ print("PUBGUARD: Empty input", file=sys.stderr)
 
 
41
  sys.exit(1)
42
 
 
43
  config = PubGuardConfig()
44
  strict = os.environ.get("PUBGUARD_STRICT", "1") != "0"
45
 
 
46
  guard = PubGuard(config=config)
47
  guard.initialize()
48
  verdict = guard.screen(text)
49
 
 
 
 
 
 
 
 
50
  print(json.dumps(verdict), file=sys.stderr)
51
 
 
52
  if verdict["pass"]:
53
+ print("PUBGUARD: PASS", file=sys.stderr)
54
  sys.exit(0)
55
  else:
56
+ reasons = []
57
+ if verdict["doc_type"]["label"] != "scientific_paper":
58
+ reasons.append(f"doc_type={verdict['doc_type']['label']}")
59
+ if verdict["ai_generated"]["label"] == "ai_generated":
60
+ reasons.append(f"ai_generated (score={verdict['ai_generated']['score']:.2f})")
61
+ if verdict["toxicity"]["label"] == "toxic":
62
+ reasons.append(f"toxic (score={verdict['toxicity']['score']:.2f})")
63
+
64
+ print(f"PUBGUARD: FAIL — {', '.join(reasons)}", file=sys.stderr)
65
 
66
+ if strict:
67
  sys.exit(1)
 
 
 
 
 
68
  else:
69
+ print("PUBGUARD: Running in non-strict mode, continuing...", file=sys.stderr)
 
70
  sys.exit(0)
71
 
72