|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
import re |
|
|
import sys |
|
|
|
|
|
from GenerateCommon import \ |
|
|
script_names, \ |
|
|
script_abbrevs |
|
|
|
|
|
def write_both(text): |
|
|
input_file.write(text) |
|
|
output_file.write(text) |
|
|
|
|
|
def to_string_char(ch_idx): |
|
|
if ch_idx < 128: |
|
|
if ch_idx < 16: |
|
|
return "\\x{0%x}" % ch_idx |
|
|
if ch_idx >= 32: |
|
|
return chr(ch_idx) |
|
|
return "\\x{%x}" % ch_idx |
|
|
|
|
|
try: |
|
|
input_file = open("testinput", "w") |
|
|
output_file = open("testoutput", "w") |
|
|
except IOError: |
|
|
print("** Couldn't create output files") |
|
|
sys.exit(1) |
|
|
|
|
|
write_both("# These tests were generated by maint/GenerateTest.py using PCRE2's UCP\n"); |
|
|
write_both("# data, do not edit unless that data has changed and they are reflecting\n"); |
|
|
write_both("# a previous version.\n\n"); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def gen_script_tests(): |
|
|
script_data = [None] * len(script_names) |
|
|
char_data = [None] * 0x110000 |
|
|
|
|
|
property_re = re.compile(r"^([0-9A-F]{4,6})(?:\.\.([0-9A-F]{4,6}))? +; ([A-Za-z_ ]+[A-Za-z]) +#") |
|
|
prev_name = "" |
|
|
script_idx = -1 |
|
|
|
|
|
with open("Unicode.tables/Scripts.txt") as f: |
|
|
version_pat = r"^# Scripts-(\d+\.\d+\.\d+)\.txt$" |
|
|
v = re.match(version_pat, f.readline()) |
|
|
unicode_version = v.group(1) |
|
|
|
|
|
write_both("# Unicode Script Extension tests for version " + unicode_version + "\n\n") |
|
|
write_both("#perltest\n\n") |
|
|
|
|
|
for line in f: |
|
|
match_obj = property_re.match(line) |
|
|
|
|
|
if match_obj == None: |
|
|
continue |
|
|
|
|
|
name = match_obj.group(3) |
|
|
if name != prev_name: |
|
|
script_idx = script_names.index(name) |
|
|
prev_name = name |
|
|
|
|
|
low = int(match_obj.group(1), 16) |
|
|
high = low |
|
|
char_data[low] = name |
|
|
|
|
|
if match_obj.group(2) != None: |
|
|
high = int(match_obj.group(2), 16) |
|
|
for idx in range(low + 1, high + 1): |
|
|
char_data[idx] = name |
|
|
|
|
|
if script_data[script_idx] == None: |
|
|
script_data[script_idx] = [low, None, None, None, None] |
|
|
script_data[script_idx][1] = high |
|
|
|
|
|
extended_script_indicies = {} |
|
|
|
|
|
with open("Unicode.tables/ScriptExtensions.txt") as f: |
|
|
for line in f: |
|
|
match_obj = property_re.match(line) |
|
|
|
|
|
if match_obj == None: |
|
|
continue |
|
|
|
|
|
low = int(match_obj.group(1), 16) |
|
|
high = low |
|
|
if match_obj.group(2) != None: |
|
|
high = int(match_obj.group(2), 16) |
|
|
|
|
|
for abbrev in match_obj.group(3).split(" "): |
|
|
if abbrev not in extended_script_indicies: |
|
|
idx = script_abbrevs.index(abbrev) |
|
|
extended_script_indicies[abbrev] = idx |
|
|
rec = script_data[idx] |
|
|
rec[2] = low |
|
|
rec[3] = high |
|
|
else: |
|
|
idx = extended_script_indicies[abbrev] |
|
|
rec = script_data[idx] |
|
|
if rec[2] > low: |
|
|
rec[2] = low |
|
|
if rec[3] < high: |
|
|
rec[3] = high |
|
|
|
|
|
if rec[4] == None: |
|
|
name = script_names[idx] |
|
|
for idx in range(low, high + 1): |
|
|
if char_data[idx] != name: |
|
|
rec[4] = idx |
|
|
break |
|
|
|
|
|
long_property_name = False |
|
|
|
|
|
for idx, rec in enumerate(script_data): |
|
|
script_name = script_names[idx] |
|
|
|
|
|
if script_name == "Unknown": |
|
|
continue |
|
|
|
|
|
script_abbrev = script_abbrevs[idx] |
|
|
|
|
|
write_both("# Base script check\n") |
|
|
write_both("/^\\p{sc=%s}/utf\n" % script_name) |
|
|
write_both(" %s\n" % to_string_char(rec[0])) |
|
|
output_file.write(" 0: %s\n" % to_string_char(rec[0])) |
|
|
write_both("\n") |
|
|
|
|
|
write_both("/^\\p{Script=%s}/utf\n" % script_abbrev) |
|
|
write_both(" %s\n" % to_string_char(rec[1])) |
|
|
output_file.write(" 0: %s\n" % to_string_char(rec[1])) |
|
|
write_both("\n") |
|
|
|
|
|
if rec[2] != None: |
|
|
property_name = "scx" |
|
|
if long_property_name: |
|
|
property_name = "Script_Extensions" |
|
|
|
|
|
write_both("# Script extension check\n") |
|
|
write_both("/^\\p{%s}/utf\n" % script_name) |
|
|
write_both(" %s\n" % to_string_char(rec[2])) |
|
|
output_file.write(" 0: %s\n" % to_string_char(rec[2])) |
|
|
write_both("\n") |
|
|
|
|
|
write_both("/^\\p{%s=%s}/utf\n" % (property_name, script_abbrev)) |
|
|
write_both(" %s\n" % to_string_char(rec[3])) |
|
|
output_file.write(" 0: %s\n" % to_string_char(rec[3])) |
|
|
write_both("\n") |
|
|
|
|
|
long_property_name = not long_property_name |
|
|
|
|
|
if rec[4] != None: |
|
|
write_both("# Script extension only character\n") |
|
|
write_both("/^\\p{%s}/utf\n" % script_name) |
|
|
write_both(" %s\n" % to_string_char(rec[4])) |
|
|
output_file.write(" 0: %s\n" % to_string_char(rec[4])) |
|
|
write_both("\n") |
|
|
|
|
|
write_both("/^\\p{sc=%s}/utf\n" % script_name) |
|
|
write_both(" %s\n" % to_string_char(rec[4])) |
|
|
output_file.write("No match\n") |
|
|
write_both("\n") |
|
|
else: |
|
|
print("External character has not found for %s" % script_name) |
|
|
|
|
|
high = rec[1] |
|
|
if rec[3] != None and rec[3] > rec[1]: |
|
|
high = rec[3] |
|
|
write_both("# Character not in script\n") |
|
|
write_both("/^\\p{%s}/utf\n" % script_name) |
|
|
write_both(" %s\n" % to_string_char(high + 1)) |
|
|
output_file.write("No match\n") |
|
|
write_both("\n") |
|
|
|
|
|
gen_script_tests() |
|
|
|
|
|
write_both("# End of test\n") |
|
|
|