File size: 1,279 Bytes
76023b0 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 |
import os
def remove_duplicates(input_file, output_file):
unique_lines = set()
# Read the input file and store unique lines
with open(input_file, 'r', encoding='utf-8') as f:
for line in f:
unique_lines.add(line.strip())
# Write unique lines to the output file
with open(output_file, 'w', encoding='utf-8') as f:
for line in unique_lines:
f.write(line + '\n')
print(f"Duplicates removed. Original file had {sum(1 for _ in open(input_file))} lines.")
print(f"New file has {len(unique_lines)} lines.")
print(f"Removed {sum(1 for _ in open(input_file)) - len(unique_lines)} duplicate lines.")
def main():
input_file = '/home/vikrant-MNMT/myenv/BPCC/inline_tages/eng_Latn-hin_Deva/tag_dictonary_1.txt'
output_file = '/home/vikrant-MNMT/myenv/BPCC/inline_tages/eng_Latn-hin_Deva/tag_dictonary_1.txt'
try:
if not os.path.exists(input_file):
raise FileNotFoundError(f"Input file not found: {input_file}")
remove_duplicates(input_file, output_file)
print(f"Duplicates removed successfully. Output file: {output_file}")
except Exception as e:
print(f"An error occurred: {str(e)}")
if __name__ == "__main__":
main()
|