File size: 967 Bytes
9fdb4cf
807509a
9fdb4cf
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
#!/usr/bin/env python3
import json, argparse, yaml

def filter_jsonl(input_file, output_file, fields_to_keep):
    with open(input_file, 'r', encoding='utf-8') as fin:
        with open(output_file, 'w', encoding='utf-8') as fout:
            for line in fin:
                if line.strip():
                    record = json.loads(line)
                    filtered = {k: record.get(k) for k in fields_to_keep if k in record}
                    fout.write(json.dumps(filtered, ensure_ascii=False) + '\n')

def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--input', required=True)
    parser.add_argument('--output', required=True)
    parser.add_argument('--config', default='config.yaml')
    args = parser.parse_args()
    with open(args.config) as f:
        config = yaml.safe_load(f)
    filter_jsonl(args.input, args.output, config['campos_filter'])
    print(f"✅ Filtrado: {args.output}")

if __name__ == '__main__':
    main()