| import os | |
| def remove_duplicates(input_file, output_file): | |
| unique_lines = set() | |
| # Read the input file and store unique lines | |
| with open(input_file, 'r', encoding='utf-8') as f: | |
| for line in f: | |
| unique_lines.add(line.strip()) | |
| # Write unique lines to the output file | |
| with open(output_file, 'w', encoding='utf-8') as f: | |
| for line in unique_lines: | |
| f.write(line + '\n') | |
| print(f"Duplicates removed. Original file had {sum(1 for _ in open(input_file))} lines.") | |
| print(f"New file has {len(unique_lines)} lines.") | |
| print(f"Removed {sum(1 for _ in open(input_file)) - len(unique_lines)} duplicate lines.") | |
| def main(): | |
| input_file = '/home/vikrant-MNMT/myenv/BPCC/inline_tages/eng_Latn-hin_Deva/tag_dictonary_1.txt' | |
| output_file = '/home/vikrant-MNMT/myenv/BPCC/inline_tages/eng_Latn-hin_Deva/tag_dictonary_1.txt' | |
| try: | |
| if not os.path.exists(input_file): | |
| raise FileNotFoundError(f"Input file not found: {input_file}") | |
| remove_duplicates(input_file, output_file) | |
| print(f"Duplicates removed successfully. Output file: {output_file}") | |
| except Exception as e: | |
| print(f"An error occurred: {str(e)}") | |
| if __name__ == "__main__": | |
| main() | |