PARA.Ai_api / filter_fields.py
caarleexx's picture
Upload 11 files
9514a77 verified
raw
history blame contribute delete
981 Bytes
#!/usr/bin/env python3
import json
import argparse
import yaml
def filter_jsonl(input_file, output_file, fields_to_keep):
with open(input_file, 'r', encoding='utf-8') as fin:
with open(output_file, 'w', encoding='utf-8') as fout:
for line in fin:
if line.strip():
record = json.loads(line)
filtered = {k: record.get(k) for k in fields_to_keep if k in record}
fout.write(json.dumps(filtered, ensure_ascii=False) + '\n')
def main():
parser = argparse.ArgumentParser()
parser.add_argument('--input', required=True)
parser.add_argument('--output', required=True)
parser.add_argument('--config', default='config.yaml')
args = parser.parse_args()
with open(args.config) as f:
config = yaml.safe_load(f)
filter_jsonl(args.input, args.output, config['campos_filter'])
print(f"✅ Filtrado: {args.output}")
if __name__ == '__main__':
main()