Spaces:
No application file
No application file
Update redact_generic.py
Browse files- redact_generic.py +14 -14
redact_generic.py
CHANGED
|
@@ -1,16 +1,16 @@
|
|
| 1 |
#!/usr/bin/env python3
|
| 2 |
"""
|
| 3 |
-
|
| 4 |
|
| 5 |
-
|
| 6 |
python redact_generic.py input.csv
|
| 7 |
python redact_generic.py input.csv --output out.csv --columns log_message
|
| 8 |
python redact_generic.py input.csv --full-mask
|
| 9 |
|
| 10 |
-
|
| 11 |
-
-
|
| 12 |
-
-
|
| 13 |
-
-
|
| 14 |
"""
|
| 15 |
import re
|
| 16 |
import csv
|
|
@@ -20,7 +20,7 @@ from typing import List, Pattern
|
|
| 20 |
|
| 21 |
|
| 22 |
def build_rules(full_mask: bool = False):
|
| 23 |
-
#
|
| 24 |
rules = []
|
| 25 |
|
| 26 |
# URLs
|
|
@@ -124,22 +124,22 @@ def process_csv(input_path: Path, output_path: Path, columns: List[str], full_ma
|
|
| 124 |
|
| 125 |
|
| 126 |
def main():
|
| 127 |
-
ap = argparse.ArgumentParser(description='
|
| 128 |
-
ap.add_argument('input', help='
|
| 129 |
-
ap.add_argument('--output', '-o', help='
|
| 130 |
-
ap.add_argument('--columns', '-c', nargs='+', help='
|
| 131 |
-
ap.add_argument('--full-mask', action='store_true', help='
|
| 132 |
|
| 133 |
args = ap.parse_args()
|
| 134 |
inp = Path(args.input)
|
| 135 |
if not inp.exists():
|
| 136 |
-
print('
|
| 137 |
return
|
| 138 |
|
| 139 |
out = Path(args.output) if args.output else inp.with_suffix('.redacted.csv')
|
| 140 |
|
| 141 |
process_csv(inp, out, args.columns or [], args.full_mask)
|
| 142 |
-
print('
|
| 143 |
|
| 144 |
|
| 145 |
if __name__ == '__main__':
|
|
|
|
| 1 |
#!/usr/bin/env python3
|
| 2 |
"""
|
| 3 |
+
Generic CSV Redaction Script
|
| 4 |
|
| 5 |
+
Usage Examples:
|
| 6 |
python redact_generic.py input.csv
|
| 7 |
python redact_generic.py input.csv --output out.csv --columns log_message
|
| 8 |
python redact_generic.py input.csv --full-mask
|
| 9 |
|
| 10 |
+
Features:
|
| 11 |
+
- Default set of regex rules (URL, IP, email, phone number, ID card, long number, device ID, App name)
|
| 12 |
+
- Supports column-specific redaction (by column name or index), defaults to processing all text columns
|
| 13 |
+
- Provides partial masking (default) or full replacement options
|
| 14 |
"""
|
| 15 |
import re
|
| 16 |
import csv
|
|
|
|
| 20 |
|
| 21 |
|
| 22 |
def build_rules(full_mask: bool = False):
|
| 23 |
+
# Returns (name, pattern, repl_or_callable)
|
| 24 |
rules = []
|
| 25 |
|
| 26 |
# URLs
|
|
|
|
| 124 |
|
| 125 |
|
| 126 |
def main():
|
| 127 |
+
ap = argparse.ArgumentParser(description='Generic CSV Redaction Tool')
|
| 128 |
+
ap.add_argument('input', help='Input CSV file')
|
| 129 |
+
ap.add_argument('--output', '-o', help='Output CSV file (default in the same directory as input with .redacted.csv suffix)')
|
| 130 |
+
ap.add_argument('--columns', '-c', nargs='+', help='Column names or indices to redact (indices start from 0). If not specified, apply to all columns')
|
| 131 |
+
ap.add_argument('--full-mask', action='store_true', help='Use full replacement instead of partial masking (for phone numbers, ID cards, etc.)')
|
| 132 |
|
| 133 |
args = ap.parse_args()
|
| 134 |
inp = Path(args.input)
|
| 135 |
if not inp.exists():
|
| 136 |
+
print('Input file does not exist:', inp)
|
| 137 |
return
|
| 138 |
|
| 139 |
out = Path(args.output) if args.output else inp.with_suffix('.redacted.csv')
|
| 140 |
|
| 141 |
process_csv(inp, out, args.columns or [], args.full_mask)
|
| 142 |
+
print('Redacted file written to:', out)
|
| 143 |
|
| 144 |
|
| 145 |
if __name__ == '__main__':
|