import os def remove_duplicates(input_file, output_file): unique_lines = set() # Read the input file and store unique lines with open(input_file, 'r', encoding='utf-8') as f: for line in f: unique_lines.add(line.strip()) # Write unique lines to the output file with open(output_file, 'w', encoding='utf-8') as f: for line in unique_lines: f.write(line + '\n') print(f"Duplicates removed. Original file had {sum(1 for _ in open(input_file))} lines.") print(f"New file has {len(unique_lines)} lines.") print(f"Removed {sum(1 for _ in open(input_file)) - len(unique_lines)} duplicate lines.") def main(): input_file = '/home/vikrant-MNMT/myenv/BPCC/inline_tages/eng_Latn-hin_Deva/tag_dictonary_1.txt' output_file = '/home/vikrant-MNMT/myenv/BPCC/inline_tages/eng_Latn-hin_Deva/tag_dictonary_1.txt' try: if not os.path.exists(input_file): raise FileNotFoundError(f"Input file not found: {input_file}") remove_duplicates(input_file, output_file) print(f"Duplicates removed successfully. Output file: {output_file}") except Exception as e: print(f"An error occurred: {str(e)}") if __name__ == "__main__": main()