|
|
|
|
|
|
|
|
import unicodedata |
|
|
|
|
|
|
|
|
def ispunct(token): |
|
|
return all(unicodedata.category(char).startswith('P') |
|
|
for char in token) |
|
|
|
|
|
|
|
|
def isfullwidth(token): |
|
|
return all(unicodedata.east_asian_width(char) in ['W', 'F', 'A'] |
|
|
for char in token) |
|
|
|
|
|
|
|
|
def islatin(token): |
|
|
return all('LATIN' in unicodedata.name(char) |
|
|
for char in token) |
|
|
|
|
|
|
|
|
def isdigit(token): |
|
|
return all('DIGIT' in unicodedata.name(char) |
|
|
for char in token) |
|
|
|
|
|
|
|
|
def isprojective(sequence): |
|
|
for i in range(1, len(sequence)): |
|
|
hi = sequence[i] |
|
|
for j in range(i + 1, hi): |
|
|
hj = sequence[j] |
|
|
if hi >= 0 and hj >= 0 and (hj - hi) * (hj - i) > 0: |
|
|
return False |
|
|
return True |
|
|
|