#!/usr/bin/env python3 import json, argparse, yaml def filter_jsonl(input_file, output_file, fields_to_keep): with open(input_file, 'r', encoding='utf-8') as fin: with open(output_file, 'w', encoding='utf-8') as fout: for line in fin: if line.strip(): record = json.loads(line) filtered = {k: record.get(k) for k in fields_to_keep if k in record} fout.write(json.dumps(filtered, ensure_ascii=False) + '\n') def main(): parser = argparse.ArgumentParser() parser.add_argument('--input', required=True) parser.add_argument('--output', required=True) parser.add_argument('--config', default='config.yaml') args = parser.parse_args() with open(args.config) as f: config = yaml.safe_load(f) filter_jsonl(args.input, args.output, config['campos_filter']) print(f"✅ Filtrado: {args.output}") if __name__ == '__main__': main()