Spaces:
Sleeping
Sleeping
| #!/usr/bin/env python3 | |
| import json | |
| import argparse | |
| import yaml | |
| def filter_jsonl(input_file, output_file, fields_to_keep): | |
| with open(input_file, 'r', encoding='utf-8') as fin: | |
| with open(output_file, 'w', encoding='utf-8') as fout: | |
| for line in fin: | |
| if line.strip(): | |
| record = json.loads(line) | |
| filtered = {k: record.get(k) for k in fields_to_keep if k in record} | |
| fout.write(json.dumps(filtered, ensure_ascii=False) + '\n') | |
| def main(): | |
| parser = argparse.ArgumentParser() | |
| parser.add_argument('--input', required=True) | |
| parser.add_argument('--output', required=True) | |
| parser.add_argument('--config', default='config.yaml') | |
| args = parser.parse_args() | |
| with open(args.config) as f: | |
| config = yaml.safe_load(f) | |
| filter_jsonl(args.input, args.output, config['campos_filter']) | |
| print(f"✅ Filtrado: {args.output}") | |
| if __name__ == '__main__': | |
| main() | |