File size: 764 Bytes
366b225 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 | # -*- coding: utf-8 -*-
import unicodedata
def ispunct(token):
return all(unicodedata.category(char).startswith('P')
for char in token)
def isfullwidth(token):
return all(unicodedata.east_asian_width(char) in ['W', 'F', 'A']
for char in token)
def islatin(token):
return all('LATIN' in unicodedata.name(char)
for char in token)
def isdigit(token):
return all('DIGIT' in unicodedata.name(char)
for char in token)
def isprojective(sequence):
for i in range(1, len(sequence)):
hi = sequence[i]
for j in range(i + 1, hi):
hj = sequence[j]
if hi >= 0 and hj >= 0 and (hj - hi) * (hj - i) > 0:
return False
return True
|