ShawtyIsBad-bgem3 / test_bg /result1.json
aloobun's picture
Upload 2 files
09a92a5 verified
{
"coverage": {},
"complexity": {},
"language_analysis": {},
"edge_cases": {
"hindi": {
"script_test": {
"tokens": [
"नम",
"सà¥įतà¥ĩ",
",",
"Ġमà¥Īà¤Ĥ",
"Ġà¤Ńारत",
"Ġसà¥ĩ",
"Ġहà¥Ĥà¤ģ",
"।",
"Ġदिलà¥įलà¥Ģ",
"Ġबहà¥ģत",
"Ġबड़ा",
"Ġशहर",
"Ġहà¥Ī",
"।"
],
"token_count": 14,
"unique_tokens": 13
},
"unicode_test": {
"tokens": [
"हिनà¥įद",
"à¥Ģ",
"Ġ",
"१",
"२",
"३",
"४",
"५",
"६",
"à¥Ń",
"८",
"९",
"Ġvow",
"els",
":",
"Ġà¤ħ",
"Ġà¤Ĩ",
"Ġà¤ĩ",
"Ġà¤Ī",
"Ġà¤ī",
"Ġà¤Ĭ"
],
"token_count": 21,
"unique_tokens": 21
},
"special_chars": {
"tokens": [
"हिनà¥įद",
"à¥Ģ",
"!",
"Ġ@",
"Ġ#",
"Ġ$",
"Ġ%",
"Ġ^",
"Ġ&",
"Ġ*",
"Ġ(",
"Ġ)",
"Ġ_",
"Ġ+",
"Ġ=",
"Ġ[",
"Ġ]",
"Ġ{",
"Ġ}"
],
"token_count": 19,
"unique_tokens": 19
}
},
"english": {
"script_test": {
"tokens": [
"Hello",
",",
"ĠI",
"Ġam",
"Ġfrom",
"Ġthe",
"ĠUnited",
"ĠStates",
".",
"ĠNew",
"ĠYork",
"Ġis",
"Ġa",
"Ġbeautiful",
"Ġcity",
"."
],
"token_count": 16,
"unique_tokens": 15
},
"unicode_test": {
"tokens": [
"English",
"Ġ",
"012",
"345",
"678",
"9",
"Ġvow",
"els",
":",
"Ġa",
"Ġe",
"Ġi",
"Ġo",
"Ġu"
],
"token_count": 14,
"unique_tokens": 14
},
"special_chars": {
"tokens": [
"English",
"!",
"Ġ@",
"Ġ#",
"Ġ$",
"Ġ%",
"Ġ^",
"Ġ&",
"Ġ*",
"Ġ(",
"Ġ)",
"Ġ_",
"Ġ+",
"Ġ=",
"Ġ[",
"Ġ]",
"Ġ{",
"Ġ}"
],
"token_count": 18,
"unique_tokens": 18
}
}
},
"unicode_coverage": {
"hindi": {
"original_text": "हिन्दी १२३४५६७८९ vowels: अ आ इ ई उ ऊ",
"tokens": [
"हिनà¥įद",
"à¥Ģ",
"Ġ",
"१",
"२",
"३",
"४",
"५",
"६",
"à¥Ń",
"८",
"९",
"Ġvow",
"els",
":",
"Ġà¤ħ",
"Ġà¤Ĩ",
"Ġà¤ĩ",
"Ġà¤Ī",
"Ġà¤ī",
"Ġà¤Ĭ"
],
"token_count": 21,
"unique_tokens": 21,
"coverage_ratio": 1.0
},
"english": {
"original_text": "English 0123456789 vowels: a e i o u",
"tokens": [
"English",
"Ġ",
"012",
"345",
"678",
"9",
"Ġvow",
"els",
":",
"Ġa",
"Ġe",
"Ġi",
"Ġo",
"Ġu"
],
"token_count": 14,
"unique_tokens": 14,
"coverage_ratio": 1.0
}
},
"script_complexity": {
"hindi": {
"original_text_length": 49,
"tokens": [
"नम",
"सà¥įतà¥ĩ",
",",
"Ġमà¥Īà¤Ĥ",
"Ġà¤Ńारत",
"Ġसà¥ĩ",
"Ġहà¥Ĥà¤ģ",
"।",
"Ġदिलà¥įलà¥Ģ",
"Ġबहà¥ģत",
"Ġबड़ा",
"Ġशहर",
"Ġहà¥Ī",
"।"
],
"token_count": 14,
"avg_token_length": 9.071428571428571,
"token_diversity": 0.9285714285714286
},
"english": {
"original_text_length": 65,
"tokens": [
"Hello",
",",
"ĠI",
"Ġam",
"Ġfrom",
"Ġthe",
"ĠUnited",
"ĠStates",
".",
"ĠNew",
"ĠYork",
"Ġis",
"Ġa",
"Ġbeautiful",
"Ġcity",
"."
],
"token_count": 16,
"avg_token_length": 4.0625,
"token_diversity": 0.9375
}
}
}