Spaces:
Sleeping
Sleeping
| import jsonlines | |
| # Input file containing classified data | |
| input_file = "data/thirdStep_file.jsonl" | |
| # Output file to store transformed data | |
| output_file = "data/Full-Labeled-Data-Final-4465.jsonl" | |
| # Threshold for considering a label | |
| threshold = 0.21 | |
| # Options for different categories | |
| options = [ | |
| {"id": "CapitalRequirements", "text": "Capital Requirements", "meta": "0.00"}, | |
| {"id": "ConsumerProtection", "text": "Consumer Protection", "meta": "0.00"}, | |
| {"id": "RiskManagement", "text": "Risk Management", "meta": "0.00"}, | |
| {"id": "ReportingAndCompliance", "text": "Reporting And Compliance", "meta": "0.00"}, | |
| {"id": "CorporateGovernance", "text": "Corporate Governance", "meta": "0.00"} | |
| ] | |
| # Function to process each record | |
| def process_record(record): | |
| # Extract text and predicted labels | |
| text = record["text"] | |
| predicted_labels = record["predicted_labels"] | |
| # Determine accepted categories based on threshold | |
| accepted_categories = [label for label, score in predicted_labels.items() if score > threshold] | |
| # Determine answer based on accepted categories | |
| answer = "accept" if accepted_categories else "reject" | |
| # Prepare options with meta | |
| options_with_meta = [ | |
| {"id": option["id"], "text": option["text"], "meta": option["meta"]} for option in options | |
| ] | |
| # Construct the output record | |
| output_record = { | |
| "text": text, | |
| "cats": predicted_labels, | |
| "accept": accepted_categories, | |
| "answer": answer, | |
| "options": options_with_meta | |
| } | |
| return output_record | |
| # Process input file and write transformed data to output file | |
| with jsonlines.open(input_file, "r") as infile, jsonlines.open(output_file, "w") as outfile: | |
| for record in infile: | |
| output_record = process_record(record) | |
| outfile.write(output_record) |