File size: 689 Bytes
41f6dd8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
# Author: Barry Haddow
# Distributed under MIT license

#
# Remove Romanian diacritics. Assumes s-comma and t-comma are normalised

import io
import sys
istream = io.TextIOWrapper(sys.stdin.buffer, encoding='utf-8')
ostream = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8')

for line in istream:
  line = line.replace("\u0218", "S").replace("\u0219", "s") #s-comma
  line = line.replace("\u021a", "T").replace("\u021b", "t") #t-comma
  line = line.replace("\u0102", "A").replace("\u0103", "a")
  line = line.replace("\u00C2", "A").replace("\u00E2", "a")
  line = line.replace("\u00CE", "I").replace("\u00EE", "i")
  ostream.write(line)