BuaaCXF commited on
Commit
d35df54
·
verified ·
1 Parent(s): 5c60597

Update redact_generic.py

Browse files
Files changed (1) hide show
  1. redact_generic.py +14 -14
redact_generic.py CHANGED
@@ -1,16 +1,16 @@
1
  #!/usr/bin/env python3
2
  """
3
- 通用CSV脱敏脚本
4
 
5
- 用法示例:
6
  python redact_generic.py input.csv
7
  python redact_generic.py input.csv --output out.csv --columns log_message
8
  python redact_generic.py input.csv --full-mask
9
 
10
- 特性:
11
- - 默认一组正则规则(URL, IP, email, 手机号, 身份证, 长数字, 设备ID, App名)
12
- - 支持按列脱敏(通过列名或索引),默认对所有文本列处理
13
- - 提供部分掩码(默认)或完全替换选项
14
  """
15
  import re
16
  import csv
@@ -20,7 +20,7 @@ from typing import List, Pattern
20
 
21
 
22
  def build_rules(full_mask: bool = False):
23
- # 返回 (name, pattern, repl_or_callable)
24
  rules = []
25
 
26
  # URLs
@@ -124,22 +124,22 @@ def process_csv(input_path: Path, output_path: Path, columns: List[str], full_ma
124
 
125
 
126
  def main():
127
- ap = argparse.ArgumentParser(description='通用CSV脱敏工具')
128
- ap.add_argument('input', help='输入CSV文件')
129
- ap.add_argument('--output', '-o', help='输出CSV文件(默认与输入同目录,后缀 .redacted.csv')
130
- ap.add_argument('--columns', '-c', nargs='+', help='要脱敏的列名或列索引(索引从0开始)。不指定则对所有列应用')
131
- ap.add_argument('--full-mask', action='store_true', help='使用完全替换而不是部分掩码(对手机号/身份证等)')
132
 
133
  args = ap.parse_args()
134
  inp = Path(args.input)
135
  if not inp.exists():
136
- print('输入文件不存在:', inp)
137
  return
138
 
139
  out = Path(args.output) if args.output else inp.with_suffix('.redacted.csv')
140
 
141
  process_csv(inp, out, args.columns or [], args.full_mask)
142
- print('已写脱敏文件:', out)
143
 
144
 
145
  if __name__ == '__main__':
 
1
  #!/usr/bin/env python3
2
  """
3
+ Generic CSV Redaction Script
4
 
5
+ Usage Examples:
6
  python redact_generic.py input.csv
7
  python redact_generic.py input.csv --output out.csv --columns log_message
8
  python redact_generic.py input.csv --full-mask
9
 
10
+ Features:
11
+ - Default set of regex rules (URL, IP, email, phone number, ID card, long number, device ID, App name)
12
+ - Supports column-specific redaction (by column name or index), defaults to processing all text columns
13
+ - Provides partial masking (default) or full replacement options
14
  """
15
  import re
16
  import csv
 
20
 
21
 
22
  def build_rules(full_mask: bool = False):
23
+ # Returns (name, pattern, repl_or_callable)
24
  rules = []
25
 
26
  # URLs
 
124
 
125
 
126
  def main():
127
+ ap = argparse.ArgumentParser(description='Generic CSV Redaction Tool')
128
+ ap.add_argument('input', help='Input CSV file')
129
+ ap.add_argument('--output', '-o', help='Output CSV file (default in the same directory as input with .redacted.csv suffix)')
130
+ ap.add_argument('--columns', '-c', nargs='+', help='Column names or indices to redact (indices start from 0). If not specified, apply to all columns')
131
+ ap.add_argument('--full-mask', action='store_true', help='Use full replacement instead of partial masking (for phone numbers, ID cards, etc.)')
132
 
133
  args = ap.parse_args()
134
  inp = Path(args.input)
135
  if not inp.exists():
136
+ print('Input file does not exist:', inp)
137
  return
138
 
139
  out = Path(args.output) if args.output else inp.with_suffix('.redacted.csv')
140
 
141
  process_csv(inp, out, args.columns or [], args.full_mask)
142
+ print('Redacted file written to:', out)
143
 
144
 
145
  if __name__ == '__main__':