[ { "seed_identities_with_labels": [ { "name": "Moalim Ibrahim (latin)", "dob": "1985-3-13", "address": "Somalia", "label": "positive", "script": "latin" }, { "name": "jérôme evrard (latin)", "dob": "1999-04-01", "address": "Monaco", "label": "High Risk", "script": "latin" }, { "name": "marine valette (latin)", "dob": "1979-02-16", "address": "Tonga", "label": "negative", "script": "latin" }, { "name": "руслан овсієнко (cyrillic)", "dob": "1934-04-12", "address": "Ukraine", "label": "negative", "script": "cyrillic" }, { "name": "álvaro oliveira (latin)", "dob": "1935-09-03", "address": "Mozambique", "label": "High Risk", "script": "latin" }, { "name": "Elena Yampolskaya (latin)", "dob": "1971-6-20", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "virginie mace (latin)", "dob": "1956-02-03", "address": "Chine (Rép. pop.)", "label": "negative", "script": "latin" }, { "name": "suzanne lagarde (latin)", "dob": "1959-07-13", "address": "Haiti", "label": "High Risk", "script": "latin" }, { "name": "راتب بنو هلال (arabic)", "dob": "1977-06-15", "address": "Yemen", "label": "High Risk", "script": "arabic" }, { "name": "anne couturier (latin)", "dob": "1989-09-16", "address": "Bahrain", "label": "negative", "script": "latin" }, { "name": "Abu Emad (latin)", "dob": "1984-1-1", "address": "Yemen", "label": "positive", "script": "latin" }, { "name": "maryse picard (latin)", "dob": "1982-11-29", "address": "Central African Republic", "label": "High Risk", "script": "latin" }, { "name": "حسین اسداله (arabic)", "dob": "1983-5-14", "address": "Iran", "label": "positive", "script": "arabic" }, { "name": "'Abdul-Wahab AL-HUMAYQANI (latin)", "dob": "1972-8-4", "address": "Yemen", "label": "positive", "script": "latin" }, { "name": "michelle poulain (latin)", "dob": "1977-02-27", "address": "Tadjikistan", "label": "negative", "script": "latin" } ], "query_template": "The following name is the seed name to generate variations for: {name}. Generate 15 variations of the name {name}, ensuring phonetic similarity: 30% Light, 40% Medium, 30% Far, and orthographic similarity: 100% Medium, and also include 23% of variations that follow: Additionally, generate variations that perform these transformations: Delete a random letter, and Use first name initial with last name. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 15, "phonetic_similarity": { "Light": 0.3, "Medium": 0.4, "Far": 0.3 }, "orthographic_similarity": { "Medium": 1.0 }, "rule_based": { "rule_percentage": 23, "selected_rules": [ "delete_random_letter", "initial_only_first_name" ], "rule_descriptions": { "delete_random_letter": "Delete a random letter", "initial_only_first_name": "Use first name initial with last name" }, "percentage": 23 } } }, { "seed_identities_with_labels": [ { "name": "lorenzo jáuregui (latin)", "dob": "1982-06-30", "address": "Qatar", "label": "negative", "script": "latin" }, { "name": "каллистрат кудрявцева (cyrillic)", "dob": "2005-02-24", "address": "Nauru", "label": "negative", "script": "cyrillic" }, { "name": "emanuel andrade (latin)", "dob": "1952-08-08", "address": "Angola", "label": "High Risk", "script": "latin" }, { "name": "Elena Drapeko (latin)", "dob": "1948-10-29", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "benigno montenegro (latin)", "dob": "1939-05-25", "address": "Bolivia", "label": "High Risk", "script": "latin" }, { "name": "alexandrie barbe (latin)", "dob": "1960-03-30", "address": "Cameroon", "label": "High Risk", "script": "latin" }, { "name": "glauco batalla (latin)", "dob": "1975-10-04", "address": "Eritrea", "label": "negative", "script": "latin" }, { "name": "josefina valverde (latin)", "dob": "1973-05-28", "address": "Kuwait", "label": "negative", "script": "latin" }, { "name": "Abdulwahhab AL-HUMAIKANI (latin)", "dob": "1972-8-4", "address": "Yemen", "label": "positive", "script": "latin" }, { "name": "Hataiwan WORAWATVICHAI (latin)", "dob": "1956-1-9", "address": "Thailand", "label": "positive", "script": "latin" }, { "name": "демьян мясникова (cyrillic)", "dob": "1931-02-07", "address": "Luhansk", "label": "High Risk", "script": "cyrillic" }, { "name": "elisabet sainz (latin)", "dob": "2005-07-03", "address": "Bolivia", "label": "High Risk", "script": "latin" }, { "name": "alonso larrañaga (latin)", "dob": "1977-11-21", "address": "Santo Tomé y Príncipe", "label": "negative", "script": "latin" }, { "name": "Владимир Ежиков (cyrillic)", "dob": "1987-6-20", "address": "Russia", "label": "positive", "script": "cyrillic" }, { "name": "Alexander Udodov (latin)", "dob": "1969-6-10", "address": "Russia", "label": "positive", "script": "latin" } ], "query_template": "The following name is the seed name to generate variations for: {name}. Generate 11 variations of the name {name}, ensuring phonetic similarity: 10% Light, 30% Medium, 60% Far, and orthographic similarity: 20% Light, 60% Medium, 20% Far, and also include 50% of variations that follow: Additionally, generate variations that: Replace spaces with special characters. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 11, "phonetic_similarity": { "Light": 0.1, "Medium": 0.3, "Far": 0.6 }, "orthographic_similarity": { "Light": 0.2, "Medium": 0.6, "Far": 0.2 }, "rule_based": { "rule_percentage": 50, "selected_rules": [ "replace_spaces_with_random_special_characters" ], "rule_descriptions": { "replace_spaces_with_random_special_characters": "Replace spaces with special characters" }, "percentage": 50 } } }, { "seed_identities_with_labels": [ { "name": "colleen rivera (latin)", "dob": "1942-02-28", "address": "South Africa", "label": "High Risk", "script": "latin" }, { "name": "Olexiy KOSTRUBITSKY (latin)", "dob": "1978-8-24", "address": "Ukraine", "label": "positive", "script": "latin" }, { "name": "حازم التركمان (arabic)", "dob": "1981-04-06", "address": "San Marino", "label": "negative", "script": "arabic" }, { "name": "timothy young (latin)", "dob": "1998-01-28", "address": "Madagascar", "label": "negative", "script": "latin" }, { "name": "lidia romeu (latin)", "dob": "1936-04-22", "address": "Venezuela", "label": "High Risk", "script": "latin" }, { "name": "Максим Егоров (cyrillic)", "dob": "1977-5-23", "address": "Russia", "label": "positive", "script": "cyrillic" }, { "name": "Oleg Romanenko (latin)", "dob": "1963-10-5", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "jérôme leroy (latin)", "dob": "1951-01-27", "address": "Haiti", "label": "High Risk", "script": "latin" }, { "name": "toni williams (latin)", "dob": "1997-11-05", "address": "Jersey", "label": "negative", "script": "latin" }, { "name": "Nufail Akbar (latin)", "dob": "1972-3-26", "address": "South Africa", "label": "positive", "script": "latin" }, { "name": "дарья пономарев (cyrillic)", "dob": "1974-07-09", "address": "Belarus", "label": "High Risk", "script": "cyrillic" }, { "name": "Hamzah al-Khalidi (latin)", "dob": "1984-7-22", "address": "Iran", "label": "positive", "script": "latin" }, { "name": "christian fletcher (latin)", "dob": "1953-11-22", "address": "Spain", "label": "negative", "script": "latin" }, { "name": "alexandra glover (latin)", "dob": "1995-11-30", "address": "New Zealand", "label": "negative", "script": "latin" }, { "name": "graciana pizarro (latin)", "dob": "1956-03-24", "address": "Bolivia", "label": "High Risk", "script": "latin" } ], "query_template": "The following name is the seed name to generate variations for: {name}. Generate 8 variations of the name {name}, ensuring phonetic similarity: 50% Light, 50% Medium, and orthographic similarity: 10% Light, 50% Medium, 40% Far, and also include 21% of variations that follow: Additionally, generate variations that perform these transformations: Convert name to initials, Swap random adjacent letters, and Use first name initial with last name. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 8, "phonetic_similarity": { "Light": 0.5, "Medium": 0.5 }, "orthographic_similarity": { "Light": 0.1, "Medium": 0.5, "Far": 0.4 }, "rule_based": { "rule_percentage": 21, "selected_rules": [ "shorten_name_to_initials", "swap_random_letter", "initial_only_first_name" ], "rule_descriptions": { "shorten_name_to_initials": "Convert name to initials", "swap_random_letter": "Swap random adjacent letters", "initial_only_first_name": "Use first name initial with last name" }, "percentage": 21 } } }, { "seed_identities_with_labels": [ { "name": "chantal georges (latin)", "dob": "1994-07-04", "address": "Slovaquie", "label": "negative", "script": "latin" }, { "name": "Виталий Перфильев (cyrillic)", "dob": "1983-9-11", "address": "Central African Republic", "label": "positive", "script": "cyrillic" }, { "name": "مصعب حوسة (arabic)", "dob": "1961-09-08", "address": "Greenland", "label": "negative", "script": "arabic" }, { "name": "thomas gay (latin)", "dob": "1996-12-10", "address": "Benin", "label": "negative", "script": "latin" }, { "name": "Rezaei Mehdi (latin)", "dob": "1976-9-21", "address": "Syria", "label": "positive", "script": "latin" }, { "name": "غطفان بنو شيبان (arabic)", "dob": "1926-01-03", "address": "Yemen", "label": "High Risk", "script": "arabic" }, { "name": "matthew lopez (latin)", "dob": "1937-05-08", "address": "British Virgin Islands", "label": "High Risk", "script": "latin" }, { "name": "lucas blin (latin)", "dob": "1976-10-29", "address": "Monaco", "label": "High Risk", "script": "latin" }, { "name": "étienne chevalier (latin)", "dob": "1953-03-25", "address": "Mali", "label": "High Risk", "script": "latin" }, { "name": "Denis RJAUZOW (latin)", "dob": "1974-5-23", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "Ho'-kyu Kim (latin)", "dob": "1970-9-15", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "zacharie dubois (latin)", "dob": "1967-03-08", "address": "Madagascar", "label": "negative", "script": "latin" }, { "name": "cheryl weaver (latin)", "dob": "1963-11-03", "address": "South Africa", "label": "High Risk", "script": "latin" }, { "name": "jean regnier (latin)", "dob": "1929-11-02", "address": "Belize", "label": "negative", "script": "latin" }, { "name": "Jinhua Qin (latin)", "dob": "1981-11-16", "address": "China", "label": "positive", "script": "latin" } ], "query_template": "The following name is the seed name to generate variations for: {name}. Generate 6 variations of the name {name}, ensuring phonetic similarity: 20% Light, 60% Medium, 20% Far, and orthographic similarity: 20% Light, 60% Medium, 20% Far, and also include 45% of variations that follow: Additionally, generate variations that perform these transformations: Reorder name parts, Duplicate a random letter, and Add a title prefix (Mr., Dr., etc.). The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 6, "phonetic_similarity": { "Light": 0.2, "Medium": 0.6, "Far": 0.2 }, "orthographic_similarity": { "Light": 0.2, "Medium": 0.6, "Far": 0.2 }, "rule_based": { "rule_percentage": 45, "selected_rules": [ "name_parts_permutations", "duplicate_random_letter_as_double_letter", "add_random_leading_title" ], "rule_descriptions": { "name_parts_permutations": "Reorder name parts", "duplicate_random_letter_as_double_letter": "Duplicate a random letter", "add_random_leading_title": "Add a title prefix (Mr., Dr., etc.)" }, "percentage": 45 } } }, { "seed_identities_with_labels": [ { "name": "zaida zabaleta (latin)", "dob": "1991-01-05", "address": "Nicaragua", "label": "High Risk", "script": "latin" }, { "name": "пелагея матвеева (cyrillic)", "dob": "1999-11-20", "address": "Bahamas", "label": "negative", "script": "cyrillic" }, { "name": "yves voisin (latin)", "dob": "1981-08-29", "address": "Haiti", "label": "High Risk", "script": "latin" }, { "name": "jonathan owens (latin)", "dob": "1963-09-12", "address": "Micronesia", "label": "negative", "script": "latin" }, { "name": "jeffrey smith (latin)", "dob": "1985-11-26", "address": "Wallis and Futuna", "label": "negative", "script": "latin" }, { "name": "Дмитрий Белик (cyrillic)", "dob": "1969-10-17", "address": "Russia", "label": "positive", "script": "cyrillic" }, { "name": "Hassan Daqou (latin)", "dob": "1985-2-1", "address": "Lebanon", "label": "positive", "script": "latin" }, { "name": "samuel nunes (latin)", "dob": "1983-06-18", "address": "Mozambique", "label": "High Risk", "script": "latin" }, { "name": "stephanie cox (latin)", "dob": "1990-07-27", "address": "Uganda", "label": "negative", "script": "latin" }, { "name": "tanya butler (latin)", "dob": "1944-09-27", "address": "American Samoa", "label": "negative", "script": "latin" }, { "name": "Gennadii Kudriavtsev (latin)", "dob": "1947-8-20", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "tina hayden (latin)", "dob": "1945-04-09", "address": "South Africa", "label": "High Risk", "script": "latin" }, { "name": "Abdullah Al-Faisal (latin)", "dob": "1963-9-10", "address": "Jamaica", "label": "positive", "script": "latin" }, { "name": "жичка педалов (cyrillic)", "dob": "1973-09-18", "address": "Bulgaria", "label": "High Risk", "script": "cyrillic" }, { "name": "Muhammad Ahmad (latin)", "dob": "1990-10-7", "address": "Syria", "label": "positive", "script": "latin" } ], "query_template": "The following name is the seed name to generate variations for: {name}. Generate 13 variations of the name {name}, ensuring phonetic similarity: 20% Light, 60% Medium, 20% Far, and orthographic similarity: 30% Light, 40% Medium, 30% Far, and also include 43% of variations that follow: Additionally, generate variations that perform these transformations: Delete a random letter, and Swap adjacent consonants. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 13, "phonetic_similarity": { "Light": 0.2, "Medium": 0.6, "Far": 0.2 }, "orthographic_similarity": { "Light": 0.3, "Medium": 0.4, "Far": 0.3 }, "rule_based": { "rule_percentage": 43, "selected_rules": [ "delete_random_letter", "swap_adjacent_consonants" ], "rule_descriptions": { "delete_random_letter": "Delete a random letter", "swap_adjacent_consonants": "Swap adjacent consonants" }, "percentage": 43 } } }, { "seed_identities_with_labels": [ { "name": "Naser Neser (latin)", "dob": "1963-4-20", "address": "Lebanon", "label": "positive", "script": "latin" }, { "name": "jeremy park (latin)", "dob": "1985-10-06", "address": "Congo", "label": "negative", "script": "latin" }, { "name": "théophile gosselin (latin)", "dob": "1927-08-18", "address": "Burkina Faso", "label": "High Risk", "script": "latin" }, { "name": "Viktoriya Savruk (latin)", "dob": "1980-2-12", "address": "Belarus", "label": "positive", "script": "latin" }, { "name": "larry gordon (latin)", "dob": "1970-10-15", "address": "Latvia", "label": "negative", "script": "latin" }, { "name": "marc johnson (latin)", "dob": "1958-07-31", "address": "Benin", "label": "negative", "script": "latin" }, { "name": "ebony lawson (latin)", "dob": "1927-01-21", "address": "Congo", "label": "negative", "script": "latin" }, { "name": "auguste cousin (latin)", "dob": "1932-04-10", "address": "Haiti", "label": "High Risk", "script": "latin" }, { "name": "Дмитрий Белик (cyrillic)", "dob": "1969-10-17", "address": "Russia", "label": "positive", "script": "cyrillic" }, { "name": "pauline diaz (latin)", "dob": "1987-08-06", "address": "Burkina Faso", "label": "High Risk", "script": "latin" }, { "name": "веселинка точева-клопова (cyrillic)", "dob": "1938-01-28", "address": "Spain", "label": "negative", "script": "cyrillic" }, { "name": "elizabeth zhang (latin)", "dob": "1954-02-21", "address": "South Sudan", "label": "High Risk", "script": "latin" }, { "name": "Hafiz SAEED (latin)", "dob": "1950-6-5", "address": "Pakistan", "label": "positive", "script": "latin" }, { "name": "Evgeniy Polyanin (latin)", "dob": "1993-3-4", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "евтим младенов (cyrillic)", "dob": "1963-07-04", "address": "Bulgaria", "label": "High Risk", "script": "cyrillic" } ], "query_template": "The following name is the seed name to generate variations for: {name}. Generate 15 variations of the name {name}, ensuring phonetic similarity: 50% Light, 50% Medium, and orthographic similarity: 20% Light, 60% Medium, 20% Far, and also include 48% of variations that follow: Additionally, generate variations that: Abbreviate name parts. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 15, "phonetic_similarity": { "Light": 0.5, "Medium": 0.5 }, "orthographic_similarity": { "Light": 0.2, "Medium": 0.6, "Far": 0.2 }, "rule_based": { "rule_percentage": 48, "selected_rules": [ "shorten_name_to_abbreviations" ], "rule_descriptions": { "shorten_name_to_abbreviations": "Abbreviate name parts" }, "percentage": 48 } } }, { "seed_identities_with_labels": [ { "name": "laetitia chevallier (latin)", "dob": "1966-08-05", "address": "Democratic Republic of Congo", "label": "High Risk", "script": "latin" }, { "name": "mark patton (latin)", "dob": "1982-11-23", "address": "Slovakia (Slovak Republic)", "label": "negative", "script": "latin" }, { "name": "Andrey Klishas (latin)", "dob": "1972-11-9", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "‫بلال‬ ‫حدرج‬ (arabic)", "dob": "1968-7-10", "address": "Lebanon", "label": "positive", "script": "arabic" }, { "name": "aaron garcia (latin)", "dob": "1936-10-14", "address": "Nigeria", "label": "High Risk", "script": "latin" }, { "name": "mary velez (latin)", "dob": "1980-06-04", "address": "British Virgin Islands", "label": "High Risk", "script": "latin" }, { "name": "софія микитюк (cyrillic)", "dob": "1961-03-03", "address": "Uganda", "label": "negative", "script": "cyrillic" }, { "name": "dominique maury (latin)", "dob": "1966-06-22", "address": "Mali", "label": "High Risk", "script": "latin" }, { "name": "kelly salas (latin)", "dob": "1966-12-23", "address": "Uganda", "label": "negative", "script": "latin" }, { "name": "изот шестакова (cyrillic)", "dob": "1942-12-20", "address": "Donetsk", "label": "High Risk", "script": "cyrillic" }, { "name": "Aleyona Chuguleva (latin)", "dob": "1986-5-14", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "timothy ballard (latin)", "dob": "1946-08-15", "address": "Panama", "label": "negative", "script": "latin" }, { "name": "Alireza Fatahinojokambari (latin)", "dob": "1980-9-21", "address": "Iran", "label": "positive", "script": "latin" }, { "name": "jenna haley (latin)", "dob": "1992-08-18", "address": "Kiribati", "label": "negative", "script": "latin" }, { "name": "Alexander GUSEV (latin)", "dob": "1963-7-27", "address": "Russia", "label": "positive", "script": "latin" } ], "query_template": "The following name is the seed name to generate variations for: {name}. Generate 12 variations of the name {name}, ensuring phonetic similarity: 10% Light, 30% Medium, 60% Far, and orthographic similarity: 100% Far, and also include 21% of variations that follow: Additionally, generate variations that: Insert a random letter. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 12, "phonetic_similarity": { "Light": 0.1, "Medium": 0.3, "Far": 0.6 }, "orthographic_similarity": { "Far": 1.0 }, "rule_based": { "rule_percentage": 21, "selected_rules": [ "insert_random_letter" ], "rule_descriptions": { "insert_random_letter": "Insert a random letter" }, "percentage": 21 } } }, { "seed_identities_with_labels": [ { "name": "алевтина агафонова (cyrillic)", "dob": "1927-11-29", "address": "Crimea", "label": "High Risk", "script": "cyrillic" }, { "name": "Hamid Zareikajosangi (latin)", "dob": "1987-5-20", "address": "Iran", "label": "positive", "script": "latin" }, { "name": "hugo correia (latin)", "dob": "1992-08-17", "address": "Angola", "label": "High Risk", "script": "latin" }, { "name": "cristiano monteiro (latin)", "dob": "1971-02-23", "address": "Angola", "label": "High Risk", "script": "latin" }, { "name": "elba castelló (latin)", "dob": "1982-08-18", "address": "Cuba", "label": "High Risk", "script": "latin" }, { "name": "Hussam Al-Qatirji (latin)", "dob": "1982-1-11", "address": "Syria", "label": "positive", "script": "latin" }, { "name": "теодослав монтянов (cyrillic)", "dob": "1947-04-25", "address": "Palau", "label": "negative", "script": "cyrillic" }, { "name": "竺 陳 (chinese)", "dob": "1953-8-17", "address": "China", "label": "positive", "script": "chinese" }, { "name": "emanuel torres (latin)", "dob": "1936-01-27", "address": "República Checa", "label": "negative", "script": "latin" }, { "name": "bryan torres (latin)", "dob": "1941-02-13", "address": "Ilha Norfolk", "label": "negative", "script": "latin" }, { "name": "Mohammed Raad (latin)", "dob": "1955-8-28", "address": "Lebanon", "label": "positive", "script": "latin" }, { "name": "ana carneiro (latin)", "dob": "1925-04-18", "address": "Tuvalu", "label": "negative", "script": "latin" }, { "name": "salomé reis (latin)", "dob": "1998-03-04", "address": "Ashmore and Cartier Islands", "label": "negative", "script": "latin" }, { "name": "Rayimbek Matraimov (latin)", "dob": "1971-5-3", "address": "Kyrgyzstan", "label": "positive", "script": "latin" }, { "name": "otilia valle (latin)", "dob": "2005-12-31", "address": "Cuba", "label": "High Risk", "script": "latin" } ], "query_template": "The following name is the seed name to generate variations for: {name}. Generate 11 variations of the name {name}, ensuring phonetic similarity: 70% Light, 30% Medium, and orthographic similarity: 100% Medium, and also include 41% of variations that follow: Additionally, generate variations that: Remove a random vowel. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 11, "phonetic_similarity": { "Light": 0.7, "Medium": 0.3 }, "orthographic_similarity": { "Medium": 1.0 }, "rule_based": { "rule_percentage": 41, "selected_rules": [ "remove_random_vowel" ], "rule_descriptions": { "remove_random_vowel": "Remove a random vowel" }, "percentage": 41 } } }, { "seed_identities_with_labels": [ { "name": "petra pinto (latin)", "dob": "1994-08-07", "address": "Nicarágua", "label": "negative", "script": "latin" }, { "name": "Alexander Zharov (latin)", "dob": "1964-8-11", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "leonardo neves (latin)", "dob": "1984-10-23", "address": "Áustria", "label": "negative", "script": "latin" }, { "name": "azahar amor (latin)", "dob": "1982-11-24", "address": "Bolivia", "label": "High Risk", "script": "latin" }, { "name": "sandro pinho (latin)", "dob": "1973-08-16", "address": "Mozambique", "label": "High Risk", "script": "latin" }, { "name": "Yaroslav Bulygin (latin)", "dob": "1973-8-2", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "Wendy Morales (latin)", "dob": "1980-5-28", "address": "Nicaragua", "label": "positive", "script": "latin" }, { "name": "ипат князева (cyrillic)", "dob": "1931-09-26", "address": "Belarus", "label": "High Risk", "script": "cyrillic" }, { "name": "anthony dillon (latin)", "dob": "1936-10-15", "address": "South Sudan", "label": "High Risk", "script": "latin" }, { "name": "daniel rocha (latin)", "dob": "1962-07-25", "address": "Luxemburgo", "label": "negative", "script": "latin" }, { "name": "Yelena Yevtyukhova (latin)", "dob": "1970-8-7", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "zoé grenier (latin)", "dob": "1940-03-06", "address": "Central African Republic", "label": "High Risk", "script": "latin" }, { "name": "ariana freitas (latin)", "dob": "2005-08-21", "address": "Sri Lanca", "label": "negative", "script": "latin" }, { "name": "学礼 江 (chinese)", "dob": "1972-5-22", "address": "Hong Kong", "label": "positive", "script": "chinese" }, { "name": "ярослав дуплій (cyrillic)", "dob": "1966-02-01", "address": "Zambia", "label": "negative", "script": "cyrillic" } ], "query_template": "The following name is the seed name to generate variations for: {name}. Generate 12 variations of the name {name}, ensuring phonetic similarity: 30% Light, 40% Medium, 30% Far, and orthographic similarity: 10% Light, 50% Medium, 40% Far, and also include 42% of variations that follow: Additionally, generate variations that perform these transformations: Swap adjacent consonants, Delete a random letter, and Remove a random vowel. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 12, "phonetic_similarity": { "Light": 0.3, "Medium": 0.4, "Far": 0.3 }, "orthographic_similarity": { "Light": 0.1, "Medium": 0.5, "Far": 0.4 }, "rule_based": { "rule_percentage": 42, "selected_rules": [ "swap_adjacent_consonants", "delete_random_letter", "remove_random_vowel" ], "rule_descriptions": { "swap_adjacent_consonants": "Swap adjacent consonants", "delete_random_letter": "Delete a random letter", "remove_random_vowel": "Remove a random vowel" }, "percentage": 42 } } }, { "seed_identities_with_labels": [ { "name": "jaime vergara (latin)", "dob": "1990-09-23", "address": "República Democrática Popular Lao", "label": "negative", "script": "latin" }, { "name": "Zakhar KALASHOV (latin)", "dob": "1953-3-20", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "نورس بنو عجل (arabic)", "dob": "1949-08-30", "address": "Yemen", "label": "High Risk", "script": "arabic" }, { "name": "cosme ochoa (latin)", "dob": "1933-03-04", "address": "Guinea Ecuatorial", "label": "negative", "script": "latin" }, { "name": "kaylee campbell (latin)", "dob": "1944-06-27", "address": "South Africa", "label": "High Risk", "script": "latin" }, { "name": "michael jordan (latin)", "dob": "1980-05-20", "address": "British Virgin Islands", "label": "High Risk", "script": "latin" }, { "name": "juan josé bartolomé (latin)", "dob": "1981-06-01", "address": "Botswana", "label": "negative", "script": "latin" }, { "name": "emma llabrés (latin)", "dob": "1949-07-19", "address": "Francia", "label": "negative", "script": "latin" }, { "name": "león angulo (latin)", "dob": "1982-07-08", "address": "Cuba", "label": "High Risk", "script": "latin" }, { "name": "Alexander Udodov (latin)", "dob": "1969-6-10", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "Леонид Пасечник (cyrillic)", "dob": "1970-3-15", "address": "Ukraine", "label": "positive", "script": "cyrillic" }, { "name": "Sufian QUMU (latin)", "dob": "1959-6-26", "address": "Libya", "label": "positive", "script": "latin" }, { "name": "عتريس شاهين (arabic)", "dob": "1969-07-18", "address": "Ireland", "label": "negative", "script": "arabic" }, { "name": "Esam Ettehadi (latin)", "dob": "1989-7-31", "address": "Iran", "label": "positive", "script": "latin" }, { "name": "lisa rivera (latin)", "dob": "1987-01-10", "address": "Kenya", "label": "High Risk", "script": "latin" } ], "query_template": "The following name is the seed name to generate variations for: {name}. Generate 7 variations of the name {name}, ensuring phonetic similarity: 10% Light, 30% Medium, 60% Far, and orthographic similarity: 10% Light, 30% Medium, 60% Far, and also include 29% of variations that follow: Additionally, generate variations that perform these transformations: Remove a random consonant, Abbreviate name parts, and Duplicate a random letter. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 7, "phonetic_similarity": { "Light": 0.1, "Medium": 0.3, "Far": 0.6 }, "orthographic_similarity": { "Light": 0.1, "Medium": 0.3, "Far": 0.6 }, "rule_based": { "rule_percentage": 29, "selected_rules": [ "remove_random_consonant", "shorten_name_to_abbreviations", "duplicate_random_letter_as_double_letter" ], "rule_descriptions": { "remove_random_consonant": "Remove a random consonant", "shorten_name_to_abbreviations": "Abbreviate name parts", "duplicate_random_letter_as_double_letter": "Duplicate a random letter" }, "percentage": 29 } } }, { "seed_identities_with_labels": [ { "name": "Олег Колесников (cyrillic)", "dob": "1968-9-11", "address": "Russia", "label": "positive", "script": "cyrillic" }, { "name": "áurea frías (latin)", "dob": "1929-05-26", "address": "Venezuela", "label": "High Risk", "script": "latin" }, { "name": "edward carroll (latin)", "dob": "1985-12-21", "address": "South Sudan", "label": "High Risk", "script": "latin" }, { "name": "остап ермаков (cyrillic)", "dob": "1969-01-24", "address": "Crimea", "label": "High Risk", "script": "cyrillic" }, { "name": "Boris Vologdin (latin)", "dob": "1955-7-22", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "kelly baptista (latin)", "dob": "1953-05-07", "address": "Mozambique", "label": "High Risk", "script": "latin" }, { "name": "amy duncan (latin)", "dob": "1999-05-26", "address": "Kenya", "label": "High Risk", "script": "latin" }, { "name": "camila paiva (latin)", "dob": "1946-06-29", "address": "Jersey", "label": "negative", "script": "latin" }, { "name": "Muhammad Ahmad (latin)", "dob": "1990-10-7", "address": "Syria", "label": "positive", "script": "latin" }, { "name": "gabriela sousa (latin)", "dob": "1959-08-27", "address": "Salvador", "label": "negative", "script": "latin" }, { "name": "Abu Emad (latin)", "dob": "1984-1-1", "address": "Yemen", "label": "positive", "script": "latin" }, { "name": "طاهر النمر (arabic)", "dob": "2003-11-15", "address": "Isle of Man", "label": "negative", "script": "arabic" }, { "name": "In O (latin)", "dob": "1969-7-3", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "violeta pereira (latin)", "dob": "1972-12-19", "address": "Colômbia", "label": "negative", "script": "latin" }, { "name": "luís borges (latin)", "dob": "1953-11-09", "address": "Guiné-Bissau", "label": "negative", "script": "latin" } ], "query_template": "The following name is the seed name to generate variations for: {name}. Generate 13 variations of the name {name}, ensuring phonetic similarity: 30% Light, 40% Medium, 30% Far, and orthographic similarity: 30% Light, 40% Medium, 30% Far, and also include 22% of variations that follow: Additionally, generate variations that perform these transformations: Replace random consonants with different consonants, and Remove all spaces. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 13, "phonetic_similarity": { "Light": 0.3, "Medium": 0.4, "Far": 0.3 }, "orthographic_similarity": { "Light": 0.3, "Medium": 0.4, "Far": 0.3 }, "rule_based": { "rule_percentage": 22, "selected_rules": [ "replace_random_consonant_with_random_consonant", "remove_all_spaces" ], "rule_descriptions": { "replace_random_consonant_with_random_consonant": "Replace random consonants with different consonants", "remove_all_spaces": "Remove all spaces" }, "percentage": 22 } } }, { "seed_identities_with_labels": [ { "name": "sofia araújo (latin)", "dob": "1978-11-16", "address": "Angola", "label": "High Risk", "script": "latin" }, { "name": "isabela mendes (latin)", "dob": "1936-12-31", "address": "Marianas do Norte", "label": "negative", "script": "latin" }, { "name": "ابراهيم غدار (arabic)", "dob": "1978-1-22", "address": "United Arab Emirates", "label": "positive", "script": "arabic" }, { "name": "Mykola Vorobei (latin)", "dob": "1963-5-4", "address": "Belarus", "label": "positive", "script": "latin" }, { "name": "tyler williams (latin)", "dob": "1986-01-24", "address": "British Virgin Islands", "label": "High Risk", "script": "latin" }, { "name": "gloria elías (latin)", "dob": "2001-05-29", "address": "Nicaragua", "label": "High Risk", "script": "latin" }, { "name": "Apinya CHANTRAPRAPAPORN (latin)", "dob": "1955-1-14", "address": "Thailand", "label": "positive", "script": "latin" }, { "name": "Andrii Sushko (latin)", "dob": "1976-1-23", "address": "Ukraine", "label": "positive", "script": "latin" }, { "name": "Anatol Lapo (latin)", "dob": "1963-5-24", "address": "Belarus", "label": "positive", "script": "latin" }, { "name": "álvaro henriques (latin)", "dob": "1991-02-01", "address": "Indian Ocean", "label": "negative", "script": "latin" }, { "name": "petra tavares (latin)", "dob": "1983-05-13", "address": "Anguila", "label": "negative", "script": "latin" }, { "name": "anthony robinson (latin)", "dob": "1947-01-28", "address": "Nigeria", "label": "High Risk", "script": "latin" }, { "name": "vasco figueiredo (latin)", "dob": "1950-01-09", "address": "Países Baixos", "label": "negative", "script": "latin" }, { "name": "натан якушева (cyrillic)", "dob": "1954-10-10", "address": "Russia", "label": "High Risk", "script": "cyrillic" }, { "name": "єфрем романенко (cyrillic)", "dob": "2004-06-30", "address": "Togo", "label": "negative", "script": "cyrillic" } ], "query_template": "The following name is the seed name to generate variations for: {name}. Generate 6 variations of the name {name}, ensuring phonetic similarity: 30% Light, 40% Medium, 30% Far, and orthographic similarity: 50% Light, 50% Medium, and also include 33% of variations that follow: Additionally, generate variations that perform these transformations: Remove a random consonant, Swap adjacent syllables, and Duplicate a random letter. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 6, "phonetic_similarity": { "Light": 0.3, "Medium": 0.4, "Far": 0.3 }, "orthographic_similarity": { "Light": 0.5, "Medium": 0.5 }, "rule_based": { "rule_percentage": 33, "selected_rules": [ "remove_random_consonant", "swap_adjacent_syllables", "duplicate_random_letter_as_double_letter" ], "rule_descriptions": { "remove_random_consonant": "Remove a random consonant", "swap_adjacent_syllables": "Swap adjacent syllables", "duplicate_random_letter_as_double_letter": "Duplicate a random letter" }, "percentage": 33 } } }, { "seed_identities_with_labels": [ { "name": "Viktor Boyarkin (latin)", "dob": "1958-10-12", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "مناف بجيلة (arabic)", "dob": "1990-02-16", "address": "Yemen", "label": "High Risk", "script": "arabic" }, { "name": "kyara silva (latin)", "dob": "2000-01-20", "address": "Angola", "label": "High Risk", "script": "latin" }, { "name": "Zakhar KALASHOV (latin)", "dob": "1953-3-20", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "Abdelmalek DERDOUKAL (latin)", "dob": "1970-4-20", "address": "Algeria", "label": "positive", "script": "latin" }, { "name": "jaime fernandes (latin)", "dob": "1958-09-06", "address": "Dhekelia", "label": "negative", "script": "latin" }, { "name": "maria vaz (latin)", "dob": "1971-06-24", "address": "Áustria", "label": "negative", "script": "latin" }, { "name": "carrie rose (latin)", "dob": "1981-12-31", "address": "South Africa", "label": "High Risk", "script": "latin" }, { "name": "leonor araújo (latin)", "dob": "1947-10-15", "address": "Filipinas", "label": "negative", "script": "latin" }, { "name": "pénélope delmas (latin)", "dob": "1973-01-18", "address": "Cameroon", "label": "High Risk", "script": "latin" }, { "name": "Hossein Akbari (latin)", "dob": "1961-6-17", "address": "Iran", "label": "positive", "script": "latin" }, { "name": "Айсен Николаев (cyrillic)", "dob": "1972-1-22", "address": "Russia", "label": "positive", "script": "cyrillic" }, { "name": "naiara moreira (latin)", "dob": "1989-10-09", "address": "Angola", "label": "High Risk", "script": "latin" }, { "name": "معزّ الحكير (arabic)", "dob": "2005-03-05", "address": "Comoros", "label": "negative", "script": "arabic" }, { "name": "miguel gaspar (latin)", "dob": "2000-03-04", "address": "Afeganistão", "label": "negative", "script": "latin" } ], "query_template": "The following name is the seed name to generate variations for: {name}. Generate 14 variations of the name {name}, ensuring phonetic similarity: 30% Light, 40% Medium, 30% Far, and orthographic similarity: 50% Light, 50% Medium, and also include 43% of variations that follow: Additionally, generate variations that perform these transformations: Reorder name parts, Add a title prefix (Mr., Dr., etc.), and Swap random adjacent letters. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 14, "phonetic_similarity": { "Light": 0.3, "Medium": 0.4, "Far": 0.3 }, "orthographic_similarity": { "Light": 0.5, "Medium": 0.5 }, "rule_based": { "rule_percentage": 43, "selected_rules": [ "name_parts_permutations", "add_random_leading_title", "swap_random_letter" ], "rule_descriptions": { "name_parts_permutations": "Reorder name parts", "add_random_leading_title": "Add a title prefix (Mr., Dr., etc.)", "swap_random_letter": "Swap random adjacent letters" }, "percentage": 43 } } }, { "seed_identities_with_labels": [ { "name": "Chilli Yuan (latin)", "dob": "1985-5-8", "address": "China", "label": "positive", "script": "latin" }, { "name": "david ross (latin)", "dob": "1978-01-26", "address": "British Virgin Islands", "label": "High Risk", "script": "latin" }, { "name": "Peter Mbaga (latin)", "dob": "1976-9-25", "address": "South Africa", "label": "positive", "script": "latin" }, { "name": "jeffery garza (latin)", "dob": "2004-07-22", "address": "San Marino", "label": "negative", "script": "latin" }, { "name": "Andrej Grigor'ev (latin)", "dob": "1963-1-30", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "азарий мясников (cyrillic)", "dob": "1988-05-10", "address": "Russia", "label": "High Risk", "script": "cyrillic" }, { "name": "barbara roberts (latin)", "dob": "1979-08-19", "address": "Kenya", "label": "High Risk", "script": "latin" }, { "name": "thomas turner (latin)", "dob": "2002-03-08", "address": "Bangladesh", "label": "negative", "script": "latin" }, { "name": "Александр Бортников (cyrillic)", "dob": "1951-11-15", "address": "Russia", "label": "positive", "script": "cyrillic" }, { "name": "رهيف جرار (arabic)", "dob": "1990-07-18", "address": "Greenland", "label": "negative", "script": "arabic" }, { "name": "Sergei Arenin (latin)", "dob": "1958-8-29", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "noël perrier (latin)", "dob": "1954-12-29", "address": "Cameroon", "label": "High Risk", "script": "latin" }, { "name": "cindy aguilar (latin)", "dob": "1953-04-08", "address": "Nauru", "label": "negative", "script": "latin" }, { "name": "brian williams (latin)", "dob": "1936-08-19", "address": "Nauru", "label": "negative", "script": "latin" }, { "name": "jean durand (latin)", "dob": "1936-08-25", "address": "Democratic Republic of Congo", "label": "High Risk", "script": "latin" } ], "query_template": "The following name is the seed name to generate variations for: {name}. Generate 9 variations of the name {name}, ensuring phonetic similarity: 10% Light, 30% Medium, 60% Far, and orthographic similarity: 30% Light, 40% Medium, 30% Far, and also include 43% of variations that follow: Additionally, generate variations that perform these transformations: Replace random vowels with different vowels, Remove a random consonant, and Remove a random vowel. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 9, "phonetic_similarity": { "Light": 0.1, "Medium": 0.3, "Far": 0.6 }, "orthographic_similarity": { "Light": 0.3, "Medium": 0.4, "Far": 0.3 }, "rule_based": { "rule_percentage": 43, "selected_rules": [ "replace_random_vowel_with_random_vowel", "remove_random_consonant", "remove_random_vowel" ], "rule_descriptions": { "replace_random_vowel_with_random_vowel": "Replace random vowels with different vowels", "remove_random_consonant": "Remove a random consonant", "remove_random_vowel": "Remove a random vowel" }, "percentage": 43 } } }, { "seed_identities_with_labels": [ { "name": "Руслан Гаджиев (cyrillic)", "dob": "1978-8-29", "address": "Russia", "label": "positive", "script": "cyrillic" }, { "name": "travis smith (latin)", "dob": "1931-02-21", "address": "Malta", "label": "negative", "script": "latin" }, { "name": "Lucy Miller (latin)", "dob": "1973-7-2", "address": "Thailand", "label": "positive", "script": "latin" }, { "name": "Elizaveta Danilova (latin)", "dob": "1984-11-16", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "Ahmad Noroozi (latin)", "dob": "1987-5-5", "address": "Iran", "label": "positive", "script": "latin" }, { "name": "luís gonçalves (latin)", "dob": "1964-06-22", "address": "Mozambique", "label": "High Risk", "script": "latin" }, { "name": "tina goodwin (latin)", "dob": "1963-07-08", "address": "Svalbard & Jan Mayen Islands", "label": "negative", "script": "latin" }, { "name": "andrea baldwin (latin)", "dob": "1939-10-31", "address": "South Sudan", "label": "High Risk", "script": "latin" }, { "name": "марія євдокименко (cyrillic)", "dob": "1931-05-26", "address": "Korea", "label": "negative", "script": "cyrillic" }, { "name": "stephen brown (latin)", "dob": "1959-03-10", "address": "British Virgin Islands", "label": "High Risk", "script": "latin" }, { "name": "david cook (latin)", "dob": "1929-01-07", "address": "New Zealand", "label": "negative", "script": "latin" }, { "name": "Volodymyr Bandura (latin)", "dob": "1990-7-15", "address": "Ukraine", "label": "positive", "script": "latin" }, { "name": "april wilson (latin)", "dob": "1992-03-05", "address": "Kenya", "label": "High Risk", "script": "latin" }, { "name": "ronald munoz (latin)", "dob": "1937-12-15", "address": "United Kingdom", "label": "negative", "script": "latin" }, { "name": "милен захарова (cyrillic)", "dob": "1948-06-17", "address": "Belarus", "label": "High Risk", "script": "cyrillic" } ], "query_template": "The following name is the seed name to generate variations for: {name}. Generate 14 variations of the name {name}, ensuring phonetic similarity: 20% Light, 60% Medium, 20% Far, and orthographic similarity: 50% Light, 50% Medium, and also include 45% of variations that follow: Additionally, generate variations that: Duplicate a random letter. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 14, "phonetic_similarity": { "Light": 0.2, "Medium": 0.6, "Far": 0.2 }, "orthographic_similarity": { "Light": 0.5, "Medium": 0.5 }, "rule_based": { "rule_percentage": 45, "selected_rules": [ "duplicate_random_letter_as_double_letter" ], "rule_descriptions": { "duplicate_random_letter_as_double_letter": "Duplicate a random letter" }, "percentage": 45 } } }, { "seed_identities_with_labels": [ { "name": "tania márquez (latin)", "dob": "1933-05-02", "address": "Guinea Bissau", "label": "negative", "script": "latin" }, { "name": "florinda solana (latin)", "dob": "1963-08-22", "address": "Jordania", "label": "negative", "script": "latin" }, { "name": "marianne rodriguez (latin)", "dob": "1929-03-21", "address": "Haiti", "label": "High Risk", "script": "latin" }, { "name": "abel saavedra (latin)", "dob": "1981-10-12", "address": "Samoa", "label": "negative", "script": "latin" }, { "name": "cruz velázquez (latin)", "dob": "1936-10-14", "address": "Cuba", "label": "High Risk", "script": "latin" }, { "name": "Tetyana Tumilina (latin)", "dob": "1966-4-18", "address": "Ukraine", "label": "positive", "script": "latin" }, { "name": "Walid Al-Rawi (latin)", "dob": "1988-11-11", "address": "Iraq", "label": "positive", "script": "latin" }, { "name": "raquel garcia (latin)", "dob": "1963-08-21", "address": "Angola", "label": "High Risk", "script": "latin" }, { "name": "Наталья Беглова (cyrillic)", "dob": "1955-11-12", "address": "Russia", "label": "positive", "script": "cyrillic" }, { "name": "Vasiliy Yurchenko (latin)", "dob": "1960-9-26", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "Jihad Kansou (latin)", "dob": "1966-2-10", "address": "Lebanon", "label": "positive", "script": "latin" }, { "name": "елисей захарова (cyrillic)", "dob": "1986-08-02", "address": "Russia", "label": "High Risk", "script": "cyrillic" }, { "name": "danilo peralta (latin)", "dob": "1985-05-25", "address": "Vietman", "label": "negative", "script": "latin" }, { "name": "хома яременко (cyrillic)", "dob": "1926-02-19", "address": "Kuwait", "label": "negative", "script": "cyrillic" }, { "name": "renée leclercq (latin)", "dob": "1994-11-16", "address": "Côte d'Ivoire", "label": "High Risk", "script": "latin" } ], "query_template": "The following name is the seed name to generate variations for: {name}. Generate 14 variations of the name {name}, ensuring phonetic similarity: 100% Far, and orthographic similarity: 30% Light, 40% Medium, 30% Far, and also include 39% of variations that follow: Additionally, generate variations that: Remove a random vowel. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 14, "phonetic_similarity": { "Far": 1.0 }, "orthographic_similarity": { "Light": 0.3, "Medium": 0.4, "Far": 0.3 }, "rule_based": { "rule_percentage": 39, "selected_rules": [ "remove_random_vowel" ], "rule_descriptions": { "remove_random_vowel": "Remove a random vowel" }, "percentage": 39 } } }, { "seed_identities_with_labels": [ { "name": "simone rousset (latin)", "dob": "1956-11-23", "address": "Syrie", "label": "negative", "script": "latin" }, { "name": "amaya pastor (latin)", "dob": "1934-05-19", "address": "Cuba", "label": "High Risk", "script": "latin" }, { "name": "Vladimir Pavlenko (latin)", "dob": "1962-4-14", "address": "Ukraine", "label": "positive", "script": "latin" }, { "name": "Mokhtar BUSHUSHA (latin)", "dob": "1969-10-13", "address": "Italy", "label": "positive", "script": "latin" }, { "name": "carlos phillips (latin)", "dob": "1935-10-30", "address": "British Virgin Islands", "label": "High Risk", "script": "latin" }, { "name": "Артем Кирьянов (cyrillic)", "dob": "1977-1-12", "address": "Russia", "label": "positive", "script": "cyrillic" }, { "name": "Amir Khamzat (latin)", "dob": "1974-10-22", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "лев рябова (cyrillic)", "dob": "1956-11-10", "address": "Donetsk", "label": "High Risk", "script": "cyrillic" }, { "name": "гордей фролов (cyrillic)", "dob": "1940-06-04", "address": "Saudi Arabia", "label": "negative", "script": "cyrillic" }, { "name": "guillaume guilbert (latin)", "dob": "2001-02-05", "address": "Hong Kong", "label": "negative", "script": "latin" }, { "name": "thibault aubert (latin)", "dob": "1953-10-07", "address": "Cameroon", "label": "High Risk", "script": "latin" }, { "name": "Myo'ng-chin Kim (latin)", "dob": "1980-2-18", "address": "China", "label": "positive", "script": "latin" }, { "name": "joseph bazin (latin)", "dob": "1945-01-29", "address": "Irak", "label": "negative", "script": "latin" }, { "name": "susan peron (latin)", "dob": "1963-03-31", "address": "Haiti", "label": "High Risk", "script": "latin" }, { "name": "capucine besson (latin)", "dob": "1960-06-08", "address": "Irak", "label": "negative", "script": "latin" } ], "query_template": "The following name is the seed name to generate variations for: {name}. Generate 14 variations of the name {name}, ensuring phonetic similarity: 10% Light, 50% Medium, 40% Far, and orthographic similarity: 10% Light, 50% Medium, 40% Far, and also include 21% of variations that follow: Additionally, generate variations that perform these transformations: Remove all spaces, Add a title prefix (Mr., Dr., etc.), and Remove a random consonant. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 14, "phonetic_similarity": { "Light": 0.1, "Medium": 0.5, "Far": 0.4 }, "orthographic_similarity": { "Light": 0.1, "Medium": 0.5, "Far": 0.4 }, "rule_based": { "rule_percentage": 21, "selected_rules": [ "remove_all_spaces", "add_random_leading_title", "remove_random_consonant" ], "rule_descriptions": { "remove_all_spaces": "Remove all spaces", "add_random_leading_title": "Add a title prefix (Mr., Dr., etc.)", "remove_random_consonant": "Remove a random consonant" }, "percentage": 21 } } }, { "seed_identities_with_labels": [ { "name": "bryan rodriguez (latin)", "dob": "1967-01-08", "address": "Kenya", "label": "High Risk", "script": "latin" }, { "name": "michael delgado (latin)", "dob": "1938-11-24", "address": "Kiribati", "label": "negative", "script": "latin" }, { "name": "Мария ШУВАЛОВА (cyrillic)", "dob": "1998-8-4", "address": "Russia", "label": "positive", "script": "cyrillic" }, { "name": "Hamza Akbar (latin)", "dob": "1998-9-6", "address": "South Africa", "label": "positive", "script": "latin" }, { "name": "malena barrios (latin)", "dob": "1972-10-17", "address": "Bolivia", "label": "High Risk", "script": "latin" }, { "name": "brittany wilson (latin)", "dob": "1980-02-28", "address": "Svalbard & Jan Mayen Islands", "label": "negative", "script": "latin" }, { "name": "steven murillo (latin)", "dob": "1941-02-06", "address": "Aruba", "label": "negative", "script": "latin" }, { "name": "angela ford (latin)", "dob": "1958-08-19", "address": "Namibia", "label": "High Risk", "script": "latin" }, { "name": "Bella Zlatkis (latin)", "dob": "1948-7-5", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "паулин бележкова (cyrillic)", "dob": "1942-01-01", "address": "Malta", "label": "negative", "script": "cyrillic" }, { "name": "william moreau (latin)", "dob": "1999-05-21", "address": "Democratic Republic of Congo", "label": "High Risk", "script": "latin" }, { "name": "Aleksey Budnev (latin)", "dob": "1964-7-9", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "демид сысоев (cyrillic)", "dob": "1944-06-12", "address": "Armenia", "label": "High Risk", "script": "cyrillic" }, { "name": "anthony moore (latin)", "dob": "1957-02-12", "address": "Denmark", "label": "negative", "script": "latin" }, { "name": "Peter Tultaev (latin)", "dob": "1961-1-1", "address": "Russia", "label": "positive", "script": "latin" } ], "query_template": "The following name is the seed name to generate variations for: {name}. Generate 7 variations of the name {name}, ensuring phonetic similarity: 70% Light, 30% Medium, and orthographic similarity: 50% Light, 50% Medium, and also include 37% of variations that follow: Additionally, generate variations that perform these transformations: Remove a random consonant, Duplicate a random letter, and Replace spaces with special characters. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 7, "phonetic_similarity": { "Light": 0.7, "Medium": 0.3 }, "orthographic_similarity": { "Light": 0.5, "Medium": 0.5 }, "rule_based": { "rule_percentage": 37, "selected_rules": [ "remove_random_consonant", "duplicate_random_letter_as_double_letter", "replace_spaces_with_random_special_characters" ], "rule_descriptions": { "remove_random_consonant": "Remove a random consonant", "duplicate_random_letter_as_double_letter": "Duplicate a random letter", "replace_spaces_with_random_special_characters": "Replace spaces with special characters" }, "percentage": 37 } } }, { "seed_identities_with_labels": [ { "name": "richard fernandez (latin)", "dob": "1981-08-22", "address": "Kenya", "label": "High Risk", "script": "latin" }, { "name": "lee luna (latin)", "dob": "1990-12-02", "address": "Jamaica", "label": "negative", "script": "latin" }, { "name": "Дмитрий Гусев (cyrillic)", "dob": "1972-7-23", "address": "Russia", "label": "positive", "script": "cyrillic" }, { "name": "richard raymond (latin)", "dob": "1960-07-11", "address": "South Africa", "label": "High Risk", "script": "latin" }, { "name": "Viktor Boyarkin (latin)", "dob": "1958-10-12", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "kelly powell (latin)", "dob": "2004-12-31", "address": "Bahamas", "label": "negative", "script": "latin" }, { "name": "sherry roth (latin)", "dob": "1989-11-10", "address": "Guyana", "label": "negative", "script": "latin" }, { "name": "پرهام عبدالعلی (arabic)", "dob": "1965-12-07", "address": "Mauritania", "label": "negative", "script": "arabic" }, { "name": "Hassan AYACH (latin)", "dob": "1963-5-1", "address": "Lebanon", "label": "positive", "script": "latin" }, { "name": "leah woodard (latin)", "dob": "1956-04-01", "address": "Finland", "label": "negative", "script": "latin" }, { "name": "данимира певецова (cyrillic)", "dob": "1957-06-25", "address": "Bulgaria", "label": "High Risk", "script": "cyrillic" }, { "name": "Raja Salame (latin)", "dob": "1960-8-15", "address": "Lebanon", "label": "positive", "script": "latin" }, { "name": "Dzheykhun Aslanov (latin)", "dob": "1990-1-1", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "adèle baudry (latin)", "dob": "1996-01-13", "address": "Côte d'Ivoire", "label": "High Risk", "script": "latin" }, { "name": "angélica vega (latin)", "dob": "1993-04-08", "address": "Cuba", "label": "High Risk", "script": "latin" } ], "query_template": "The following name is the seed name to generate variations for: {name}. Generate 8 variations of the name {name}, ensuring phonetic similarity: 10% Light, 30% Medium, 60% Far, and orthographic similarity: 10% Light, 30% Medium, 60% Far, and also include 49% of variations that follow: Additionally, generate variations that perform these transformations: Replace random vowels with different vowels, and Add a title prefix (Mr., Dr., etc.). The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 8, "phonetic_similarity": { "Light": 0.1, "Medium": 0.3, "Far": 0.6 }, "orthographic_similarity": { "Light": 0.1, "Medium": 0.3, "Far": 0.6 }, "rule_based": { "rule_percentage": 49, "selected_rules": [ "replace_random_vowel_with_random_vowel", "add_random_leading_title" ], "rule_descriptions": { "replace_random_vowel_with_random_vowel": "Replace random vowels with different vowels", "add_random_leading_title": "Add a title prefix (Mr., Dr., etc.)" }, "percentage": 49 } } }, { "seed_identities_with_labels": [ { "name": "amy gilbert (latin)", "dob": "1975-03-14", "address": "British Virgin Islands", "label": "High Risk", "script": "latin" }, { "name": "justin armstrong (latin)", "dob": "1941-01-20", "address": "Barbados", "label": "negative", "script": "latin" }, { "name": "دانیال صنایعی (arabic)", "dob": "1938-01-07", "address": "Belize", "label": "negative", "script": "arabic" }, { "name": "Евгений Попов (cyrillic)", "dob": "1978-9-11", "address": "Russia", "label": "positive", "script": "cyrillic" }, { "name": "nancy odom (latin)", "dob": "1948-04-06", "address": "Zambia", "label": "negative", "script": "latin" }, { "name": "قاسم يافع (arabic)", "dob": "2000-05-22", "address": "Yemen", "label": "High Risk", "script": "arabic" }, { "name": "patricia lévêque (latin)", "dob": "1957-04-01", "address": "Monaco", "label": "High Risk", "script": "latin" }, { "name": "Laith Al-Khazali (latin)", "dob": "1975-10-14", "address": "Iraq", "label": "positive", "script": "latin" }, { "name": "lydia williams (latin)", "dob": "1997-11-02", "address": "Kenya", "label": "High Risk", "script": "latin" }, { "name": "Il Cho (latin)", "dob": "1945-5-10", "address": "Korea, North", "label": "positive", "script": "latin" }, { "name": "Dmitry Perminov (latin)", "dob": "1979-4-3", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "Viktoriya Savruk (latin)", "dob": "1980-2-12", "address": "Belarus", "label": "positive", "script": "latin" }, { "name": "michael hanson (latin)", "dob": "1988-07-22", "address": "Brunei Darussalam", "label": "negative", "script": "latin" }, { "name": "sofia valente (latin)", "dob": "1986-03-21", "address": "Mozambique", "label": "High Risk", "script": "latin" }, { "name": "sarah lee (latin)", "dob": "1968-02-13", "address": "Belize", "label": "negative", "script": "latin" } ], "query_template": "The following name is the seed name to generate variations for: {name}. Generate 15 variations of the name {name}, ensuring phonetic similarity: 100% Light, and orthographic similarity: 30% Light, 40% Medium, 30% Far, and also include 49% of variations that follow: Additionally, generate variations that perform these transformations: Add a title prefix (Mr., Dr., etc.), and Remove all spaces. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 15, "phonetic_similarity": { "Light": 1.0 }, "orthographic_similarity": { "Light": 0.3, "Medium": 0.4, "Far": 0.3 }, "rule_based": { "rule_percentage": 49, "selected_rules": [ "add_random_leading_title", "remove_all_spaces" ], "rule_descriptions": { "add_random_leading_title": "Add a title prefix (Mr., Dr., etc.)", "remove_all_spaces": "Remove all spaces" }, "percentage": 49 } } }, { "seed_identities_with_labels": [ { "name": "stephanie grant (latin)", "dob": "1940-05-24", "address": "Nigeria", "label": "High Risk", "script": "latin" }, { "name": "نبهان غطفان (arabic)", "dob": "2007-01-26", "address": "Iraq", "label": "High Risk", "script": "arabic" }, { "name": "debra pruitt (latin)", "dob": "1938-07-09", "address": "Mongolia", "label": "negative", "script": "latin" }, { "name": "Abdelmalek DERDOUKAL (latin)", "dob": "1970-4-20", "address": "Algeria", "label": "positive", "script": "latin" }, { "name": "еріка перепелиця (cyrillic)", "dob": "1985-04-30", "address": "Niue", "label": "negative", "script": "cyrillic" }, { "name": "laura cobb (latin)", "dob": "1979-12-19", "address": "British Virgin Islands", "label": "High Risk", "script": "latin" }, { "name": "suzanne gates (latin)", "dob": "1936-06-28", "address": "Wallis and Futuna", "label": "negative", "script": "latin" }, { "name": "jeannine duval (latin)", "dob": "1978-09-04", "address": "Monaco", "label": "High Risk", "script": "latin" }, { "name": "Yakub Zakriyev (latin)", "dob": "1990-10-16", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "Хазалбек Атабекаў (cyrillic)", "dob": "1967-3-18", "address": "Belarus", "label": "positive", "script": "cyrillic" }, { "name": "mark blevins (latin)", "dob": "1935-12-31", "address": "Zambia", "label": "negative", "script": "latin" }, { "name": "carmen lucero (latin)", "dob": "1985-03-28", "address": "Korea", "label": "negative", "script": "latin" }, { "name": "Abu Aisyah (latin)", "dob": "1983-9-6", "address": "Syria", "label": "positive", "script": "latin" }, { "name": "emily martinez (latin)", "dob": "1989-01-24", "address": "South Africa", "label": "High Risk", "script": "latin" }, { "name": "Ismatullah Khalozai (latin)", "dob": "1995-1-1", "address": "Afghanistan", "label": "positive", "script": "latin" } ], "query_template": "The following name is the seed name to generate variations for: {name}. Generate 6 variations of the name {name}, ensuring phonetic similarity: 30% Light, 40% Medium, 30% Far, and orthographic similarity: 50% Light, 50% Medium, and also include 43% of variations that follow: Additionally, generate variations that perform these transformations: Replace random consonants with different consonants, Abbreviate name parts, and Use first name initial with last name. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 6, "phonetic_similarity": { "Light": 0.3, "Medium": 0.4, "Far": 0.3 }, "orthographic_similarity": { "Light": 0.5, "Medium": 0.5 }, "rule_based": { "rule_percentage": 43, "selected_rules": [ "replace_random_consonant_with_random_consonant", "shorten_name_to_abbreviations", "initial_only_first_name" ], "rule_descriptions": { "replace_random_consonant_with_random_consonant": "Replace random consonants with different consonants", "shorten_name_to_abbreviations": "Abbreviate name parts", "initial_only_first_name": "Use first name initial with last name" }, "percentage": 43 } } }, { "seed_identities_with_labels": [ { "name": "cory decker (latin)", "dob": "1989-08-03", "address": "Korea", "label": "negative", "script": "latin" }, { "name": "Leonid MIKHAILIUK (latin)", "dob": "1970-1-1", "address": "Ukraine", "label": "positive", "script": "latin" }, { "name": "david carter (latin)", "dob": "1963-06-14", "address": "Faroe Islands", "label": "negative", "script": "latin" }, { "name": "Taher Kayali (latin)", "dob": "1960-7-11", "address": "Syria", "label": "positive", "script": "latin" }, { "name": "Abdul AL-MAGHREBI (latin)", "dob": "1970-7-1", "address": "Iran", "label": "positive", "script": "latin" }, { "name": "émile fernandez (latin)", "dob": "2001-07-02", "address": "Democratic Republic of Congo", "label": "High Risk", "script": "latin" }, { "name": "carol vance (latin)", "dob": "1968-11-17", "address": "Grenada", "label": "negative", "script": "latin" }, { "name": "luis miguel corominas (latin)", "dob": "1925-03-25", "address": "Cuba", "label": "High Risk", "script": "latin" }, { "name": "hortense martineau (latin)", "dob": "1933-03-31", "address": "Democratic Republic of Congo", "label": "High Risk", "script": "latin" }, { "name": "Viktoriya Savruk (latin)", "dob": "1980-2-12", "address": "Belarus", "label": "positive", "script": "latin" }, { "name": "tina reynolds (latin)", "dob": "1951-05-13", "address": "British Virgin Islands", "label": "High Risk", "script": "latin" }, { "name": "طيّع جزار (arabic)", "dob": "1985-09-15", "address": "Liechtenstein", "label": "negative", "script": "arabic" }, { "name": "jason green (latin)", "dob": "2006-02-11", "address": "Mauritania", "label": "negative", "script": "latin" }, { "name": "مازن الألجاوي (arabic)", "dob": "1962-03-02", "address": "Somalia", "label": "High Risk", "script": "arabic" }, { "name": "Андрэй Рыбакоў (cyrillic)", "dob": "1976-7-11", "address": "Belarus", "label": "positive", "script": "cyrillic" } ], "query_template": "The following name is the seed name to generate variations for: {name}. Generate 12 variations of the name {name}, ensuring phonetic similarity: 30% Light, 40% Medium, 30% Far, and orthographic similarity: 10% Light, 30% Medium, 60% Far, and also include 32% of variations that follow: Additionally, generate variations that perform these transformations: Remove a random consonant, Abbreviate name parts, and Replace spaces with special characters. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 12, "phonetic_similarity": { "Light": 0.3, "Medium": 0.4, "Far": 0.3 }, "orthographic_similarity": { "Light": 0.1, "Medium": 0.3, "Far": 0.6 }, "rule_based": { "rule_percentage": 32, "selected_rules": [ "remove_random_consonant", "shorten_name_to_abbreviations", "replace_spaces_with_random_special_characters" ], "rule_descriptions": { "remove_random_consonant": "Remove a random consonant", "shorten_name_to_abbreviations": "Abbreviate name parts", "replace_spaces_with_random_special_characters": "Replace spaces with special characters" }, "percentage": 32 } } }, { "seed_identities_with_labels": [ { "name": "Tatiana Butskaya (latin)", "dob": "1975-5-8", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "лев авдеева (cyrillic)", "dob": "1980-02-15", "address": "Russia", "label": "High Risk", "script": "cyrillic" }, { "name": "teresa nascimento (latin)", "dob": "1959-12-09", "address": "Ilhas Salomão", "label": "negative", "script": "latin" }, { "name": "Abdulwahhab AL-HUMAIKANI (latin)", "dob": "1972-8-4", "address": "Yemen", "label": "positive", "script": "latin" }, { "name": "Ruslan Lechkhadzhiev (latin)", "dob": "1965-7-2", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "Gholamreza Eyni (latin)", "dob": "1975-7-11", "address": "Iran", "label": "positive", "script": "latin" }, { "name": "Игорь Кайгородов (cyrillic)", "dob": "1974-11-29", "address": "Russia", "label": "positive", "script": "cyrillic" }, { "name": "яномил многознаева (cyrillic)", "dob": "1968-02-05", "address": "Cayman Islands", "label": "negative", "script": "cyrillic" }, { "name": "camila guerreiro (latin)", "dob": "1935-07-26", "address": "Níger", "label": "negative", "script": "latin" }, { "name": "bianca borges (latin)", "dob": "1925-12-07", "address": "Angola", "label": "High Risk", "script": "latin" }, { "name": "loida bueno (latin)", "dob": "1964-04-26", "address": "Bolivia", "label": "High Risk", "script": "latin" }, { "name": "iara barbosa (latin)", "dob": "1929-12-20", "address": "Luxemburgo", "label": "negative", "script": "latin" }, { "name": "santiago domingues (latin)", "dob": "1985-01-25", "address": "Ilhas Heard e McDonald", "label": "negative", "script": "latin" }, { "name": "erika mendes (latin)", "dob": "1976-05-02", "address": "Pacific Ocean", "label": "negative", "script": "latin" }, { "name": "lauren ford (latin)", "dob": "1948-01-06", "address": "South Africa", "label": "High Risk", "script": "latin" } ], "query_template": "The following name is the seed name to generate variations for: {name}. Generate 7 variations of the name {name}, ensuring phonetic similarity: 10% Light, 30% Medium, 60% Far, and orthographic similarity: 100% Medium, and also include 15% of variations that follow: Additionally, generate variations that perform these transformations: Add a title prefix (Mr., Dr., etc.), Remove all spaces, and Reorder name parts. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 7, "phonetic_similarity": { "Light": 0.1, "Medium": 0.3, "Far": 0.6 }, "orthographic_similarity": { "Medium": 1.0 }, "rule_based": { "rule_percentage": 15, "selected_rules": [ "add_random_leading_title", "remove_all_spaces", "name_parts_permutations" ], "rule_descriptions": { "add_random_leading_title": "Add a title prefix (Mr., Dr., etc.)", "remove_all_spaces": "Remove all spaces", "name_parts_permutations": "Reorder name parts" }, "percentage": 15 } } }, { "seed_identities_with_labels": [ { "name": "noémi françois (latin)", "dob": "1995-03-28", "address": "Italie", "label": "negative", "script": "latin" }, { "name": "Abdlwahab Abdi (latin)", "dob": "1970-1-1", "address": "United Arab Emirates", "label": "positive", "script": "latin" }, { "name": "benjamin duval (latin)", "dob": "1973-07-21", "address": "Central African Republic", "label": "High Risk", "script": "latin" }, { "name": "dominique julien (latin)", "dob": "1931-10-07", "address": "Central African Republic", "label": "High Risk", "script": "latin" }, { "name": "gilbert bouvier (latin)", "dob": "1988-07-11", "address": "Burkina Faso", "label": "High Risk", "script": "latin" }, { "name": "douglas rogers (latin)", "dob": "1943-01-10", "address": "South Africa", "label": "High Risk", "script": "latin" }, { "name": "غامد آل معيض (arabic)", "dob": "1940-04-16", "address": "United States Virgin Islands", "label": "negative", "script": "arabic" }, { "name": "susan guillou (latin)", "dob": "1956-12-12", "address": "Turquie", "label": "negative", "script": "latin" }, { "name": "Theint Htet (latin)", "dob": "1999-5-21", "address": "Burma", "label": "positive", "script": "latin" }, { "name": "Rajabi Al-Zahir (latin)", "dob": "1969-7-5", "address": "United Arab Emirates", "label": "positive", "script": "latin" }, { "name": "Алексей Мордашов (cyrillic)", "dob": "1965-9-26", "address": "Russia", "label": "positive", "script": "cyrillic" }, { "name": "luc payet (latin)", "dob": "1935-05-21", "address": "Moldavie", "label": "negative", "script": "latin" }, { "name": "عبد القهّار الأيوبي (arabic)", "dob": "1944-06-05", "address": "Algeria", "label": "High Risk", "script": "arabic" }, { "name": "Yuri Shvytkin (latin)", "dob": "1965-5-24", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "virginie dos santos (latin)", "dob": "1963-02-27", "address": "Turks et Caïques (Îles)", "label": "negative", "script": "latin" } ], "query_template": "The following name is the seed name to generate variations for: {name}. Generate 13 variations of the name {name}, ensuring phonetic similarity: 20% Light, 60% Medium, 20% Far, and orthographic similarity: 100% Medium, and also include 15% of variations that follow: Additionally, generate variations that perform these transformations: Add a title prefix (Mr., Dr., etc.), and Swap adjacent consonants. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 13, "phonetic_similarity": { "Light": 0.2, "Medium": 0.6, "Far": 0.2 }, "orthographic_similarity": { "Medium": 1.0 }, "rule_based": { "rule_percentage": 15, "selected_rules": [ "add_random_leading_title", "swap_adjacent_consonants" ], "rule_descriptions": { "add_random_leading_title": "Add a title prefix (Mr., Dr., etc.)", "swap_adjacent_consonants": "Swap adjacent consonants" }, "percentage": 15 } } }, { "seed_identities_with_labels": [ { "name": "margaret voisin (latin)", "dob": "1980-02-17", "address": "Brésil", "label": "negative", "script": "latin" }, { "name": "Kseniya Shoigu (latin)", "dob": "1991-1-10", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "david traore (latin)", "dob": "1962-12-16", "address": "Fidji (République des)", "label": "negative", "script": "latin" }, { "name": "Mukhtar Shah (latin)", "dob": "1939-11-8", "address": "Pakistan", "label": "positive", "script": "latin" }, { "name": "God Nisanov (latin)", "dob": "1972-4-24", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "thibault laporte (latin)", "dob": "1950-04-09", "address": "Swaziland", "label": "negative", "script": "latin" }, { "name": "odette bertin (latin)", "dob": "1989-05-05", "address": "Central African Republic", "label": "High Risk", "script": "latin" }, { "name": "henriette bodin (latin)", "dob": "1932-01-07", "address": "Paraguay", "label": "negative", "script": "latin" }, { "name": "christelle muller (latin)", "dob": "1933-06-18", "address": "Central African Republic", "label": "High Risk", "script": "latin" }, { "name": "roland poirier (latin)", "dob": "1997-10-27", "address": "Democratic Republic of Congo", "label": "High Risk", "script": "latin" }, { "name": "Светлана Горячева (cyrillic)", "dob": "1947-6-3", "address": "Russia", "label": "positive", "script": "cyrillic" }, { "name": "naiara amorim (latin)", "dob": "1983-05-04", "address": "Angola", "label": "High Risk", "script": "latin" }, { "name": "سراج الدّين النقيب (arabic)", "dob": "1926-09-08", "address": "Syria", "label": "High Risk", "script": "arabic" }, { "name": "Vladimir Chudakov (latin)", "dob": "1970-7-15", "address": "Belarus", "label": "positive", "script": "latin" }, { "name": "سوگند زنجانی (arabic)", "dob": "1953-04-22", "address": "Ghana", "label": "negative", "script": "arabic" } ], "query_template": "The following name is the seed name to generate variations for: {name}. Generate 10 variations of the name {name}, ensuring phonetic similarity: 20% Light, 60% Medium, 20% Far, and orthographic similarity: 10% Light, 30% Medium, 60% Far, and also include 33% of variations that follow: Additionally, generate variations that perform these transformations: Add a title prefix (Mr., Dr., etc.), Swap random adjacent letters, and Replace random vowels with different vowels. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 10, "phonetic_similarity": { "Light": 0.2, "Medium": 0.6, "Far": 0.2 }, "orthographic_similarity": { "Light": 0.1, "Medium": 0.3, "Far": 0.6 }, "rule_based": { "rule_percentage": 33, "selected_rules": [ "add_random_leading_title", "swap_random_letter", "replace_random_vowel_with_random_vowel" ], "rule_descriptions": { "add_random_leading_title": "Add a title prefix (Mr., Dr., etc.)", "swap_random_letter": "Swap random adjacent letters", "replace_random_vowel_with_random_vowel": "Replace random vowels with different vowels" }, "percentage": 33 } } }, { "seed_identities_with_labels": [ { "name": "Tabish Qayyuum (latin)", "dob": "1983-4-9", "address": "Pakistan", "label": "positive", "script": "latin" }, { "name": "filipe branco (latin)", "dob": "2003-04-08", "address": "Alemanha", "label": "negative", "script": "latin" }, { "name": "patrícia vieira (latin)", "dob": "1964-06-04", "address": "Angola", "label": "High Risk", "script": "latin" }, { "name": "Катерина ТИХОНОВА (cyrillic)", "dob": "1986-8-31", "address": "Russia", "label": "positive", "script": "cyrillic" }, { "name": "شجاع بدر (arabic)", "dob": "1984-05-17", "address": "Slovakia (Slovak Republic)", "label": "negative", "script": "arabic" }, { "name": "Kamchibek KOLBAEV (latin)", "dob": "1973-1-1", "address": "United Arab Emirates", "label": "positive", "script": "latin" }, { "name": "ângelo garcia (latin)", "dob": "1954-09-11", "address": "Zimbabué", "label": "negative", "script": "latin" }, { "name": "Tatiana Butskaya (latin)", "dob": "1975-5-8", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "лучезар филиппова (cyrillic)", "dob": "1938-08-16", "address": "Armenia", "label": "High Risk", "script": "cyrillic" }, { "name": "emanuel moura (latin)", "dob": "1975-02-26", "address": "Granada", "label": "negative", "script": "latin" }, { "name": "danielle rocher (latin)", "dob": "1975-05-07", "address": "Democratic Republic of Congo", "label": "High Risk", "script": "latin" }, { "name": "larissa lima (latin)", "dob": "2005-04-18", "address": "Man, Isle of", "label": "negative", "script": "latin" }, { "name": "tere roma (latin)", "dob": "1997-08-02", "address": "Venezuela", "label": "High Risk", "script": "latin" }, { "name": "Apinya CHANTARAPRAPAPORN (latin)", "dob": "1955-1-14", "address": "Thailand", "label": "positive", "script": "latin" }, { "name": "amber johnson (latin)", "dob": "1926-11-19", "address": "South Sudan", "label": "High Risk", "script": "latin" } ], "query_template": "The following name is the seed name to generate variations for: {name}. Generate 14 variations of the name {name}, ensuring phonetic similarity: 70% Light, 30% Medium, and orthographic similarity: 50% Light, 50% Medium, and also include 50% of variations that follow: Additionally, generate variations that perform these transformations: Insert a random letter, and Replace double letters with a single letter. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 14, "phonetic_similarity": { "Light": 0.7, "Medium": 0.3 }, "orthographic_similarity": { "Light": 0.5, "Medium": 0.5 }, "rule_based": { "rule_percentage": 50, "selected_rules": [ "insert_random_letter", "replace_double_letters_with_single_letter" ], "rule_descriptions": { "insert_random_letter": "Insert a random letter", "replace_double_letters_with_single_letter": "Replace double letters with a single letter" }, "percentage": 50 } } }, { "seed_identities_with_labels": [ { "name": "bertrand leclercq (latin)", "dob": "1974-12-11", "address": "Micronésie (États fédérés de)", "label": "negative", "script": "latin" }, { "name": "Mansur Soltajev (latin)", "dob": "1978-6-13", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "автоном селезнева (cyrillic)", "dob": "1974-05-13", "address": "Armenia", "label": "High Risk", "script": "cyrillic" }, { "name": "وحيد جرار (arabic)", "dob": "1994-05-10", "address": "Oman", "label": "negative", "script": "arabic" }, { "name": "Andrey Sapelin (latin)", "dob": "1965-9-16", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "Chol Kim (latin)", "dob": "1964-9-27", "address": "China", "label": "positive", "script": "latin" }, { "name": "susan clerc (latin)", "dob": "1962-06-01", "address": "Bangladesh", "label": "negative", "script": "latin" }, { "name": "瑞光 郭 (chinese)", "dob": "1954-3-20", "address": "China", "label": "positive", "script": "chinese" }, { "name": "laurence fouquet (latin)", "dob": "1931-04-18", "address": "Vierges (Îles)", "label": "negative", "script": "latin" }, { "name": "thibault lemaître (latin)", "dob": "1940-11-02", "address": "Équateur", "label": "negative", "script": "latin" }, { "name": "amaro talavera (latin)", "dob": "1987-05-27", "address": "Venezuela", "label": "High Risk", "script": "latin" }, { "name": "federico folch (latin)", "dob": "1965-11-01", "address": "Bolivia", "label": "High Risk", "script": "latin" }, { "name": "Alireza Chegha-Marani (latin)", "dob": "1962-8-11", "address": "Iran", "label": "positive", "script": "latin" }, { "name": "laurent coulon (latin)", "dob": "1931-08-25", "address": "Democratic Republic of Congo", "label": "High Risk", "script": "latin" }, { "name": "sylvie breton (latin)", "dob": "1936-08-10", "address": "Central African Republic", "label": "High Risk", "script": "latin" } ], "query_template": "The following name is the seed name to generate variations for: {name}. Generate 15 variations of the name {name}, ensuring phonetic similarity: 10% Light, 30% Medium, 60% Far, and orthographic similarity: 30% Light, 40% Medium, 30% Far, and also include 38% of variations that follow: Additionally, generate variations that perform these transformations: Reorder name parts, and Replace random vowels with different vowels. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 15, "phonetic_similarity": { "Light": 0.1, "Medium": 0.3, "Far": 0.6 }, "orthographic_similarity": { "Light": 0.3, "Medium": 0.4, "Far": 0.3 }, "rule_based": { "rule_percentage": 38, "selected_rules": [ "name_parts_permutations", "replace_random_vowel_with_random_vowel" ], "rule_descriptions": { "name_parts_permutations": "Reorder name parts", "replace_random_vowel_with_random_vowel": "Replace random vowels with different vowels" }, "percentage": 38 } } }, { "seed_identities_with_labels": [ { "name": "marvin luna (latin)", "dob": "1999-10-04", "address": "Denmark", "label": "negative", "script": "latin" }, { "name": "nádia loureiro (latin)", "dob": "1924-11-17", "address": "Mozambique", "label": "High Risk", "script": "latin" }, { "name": "دانية هندية (arabic)", "dob": "1977-11-19", "address": "Iraq", "label": "High Risk", "script": "arabic" }, { "name": "Pavel Kachkayev (latin)", "dob": "1951-10-4", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "Nikolai Levine (latin)", "dob": "1985-5-21", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "louis walters (latin)", "dob": "1965-08-11", "address": "Madagascar", "label": "negative", "script": "latin" }, { "name": "Dzmitryri Braim (latin)", "dob": "1976-4-18", "address": "Belarus", "label": "positive", "script": "latin" }, { "name": "melanie snyder (latin)", "dob": "1965-12-02", "address": "Myanmar", "label": "negative", "script": "latin" }, { "name": "سلطان اسعد (arabic)", "dob": "1962-10-31", "address": "Lebanon", "label": "positive", "script": "arabic" }, { "name": "tyler ramirez (latin)", "dob": "1998-09-11", "address": "South Africa", "label": "High Risk", "script": "latin" }, { "name": "april baker (latin)", "dob": "1982-07-29", "address": "Estonia", "label": "negative", "script": "latin" }, { "name": "adam owens (latin)", "dob": "2003-05-22", "address": "Kenya", "label": "High Risk", "script": "latin" }, { "name": "Boris Vologdin (latin)", "dob": "1955-7-22", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "имилиан балахуров (cyrillic)", "dob": "1976-08-01", "address": "Gabon", "label": "negative", "script": "cyrillic" }, { "name": "elizabeth duncan (latin)", "dob": "1925-09-25", "address": "British Virgin Islands", "label": "High Risk", "script": "latin" } ], "query_template": "The following name is the seed name to generate variations for: {name}. Generate 8 variations of the name {name}, ensuring phonetic similarity: 70% Light, 30% Medium, and orthographic similarity: 30% Light, 40% Medium, 30% Far, and also include 33% of variations that follow: Additionally, generate variations that perform these transformations: Remove a random vowel, Remove all spaces, and Reorder name parts. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 8, "phonetic_similarity": { "Light": 0.7, "Medium": 0.3 }, "orthographic_similarity": { "Light": 0.3, "Medium": 0.4, "Far": 0.3 }, "rule_based": { "rule_percentage": 33, "selected_rules": [ "remove_random_vowel", "remove_all_spaces", "name_parts_permutations" ], "rule_descriptions": { "remove_random_vowel": "Remove a random vowel", "remove_all_spaces": "Remove all spaces", "name_parts_permutations": "Reorder name parts" }, "percentage": 33 } } }, { "seed_identities_with_labels": [ { "name": "Kan Zaw (latin)", "dob": "1954-10-11", "address": "Burma", "label": "positive", "script": "latin" }, { "name": "brian moura (latin)", "dob": "1953-01-06", "address": "Ruanda", "label": "negative", "script": "latin" }, { "name": "christina murray (latin)", "dob": "1954-04-12", "address": "British Virgin Islands", "label": "High Risk", "script": "latin" }, { "name": "Tabish Qayyuum (latin)", "dob": "1983-4-9", "address": "Pakistan", "label": "positive", "script": "latin" }, { "name": "يارا العسلي (arabic)", "dob": "1933-10-28", "address": "Somalia", "label": "High Risk", "script": "arabic" }, { "name": "ناجح السمان (arabic)", "dob": "1967-09-21", "address": "Paraguay", "label": "negative", "script": "arabic" }, { "name": "Alexander Udodov (latin)", "dob": "1969-6-10", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "alice carvalho (latin)", "dob": "1974-06-19", "address": "Arábia Saudita", "label": "negative", "script": "latin" }, { "name": "Iyad Makhlouf (latin)", "dob": "1973-1-21", "address": "Syria", "label": "positive", "script": "latin" }, { "name": "cornelio cantón (latin)", "dob": "1968-09-09", "address": "Bolivia", "label": "High Risk", "script": "latin" }, { "name": "luísa matos (latin)", "dob": "1949-06-27", "address": "Mozambique", "label": "High Risk", "script": "latin" }, { "name": "Владимир Артяков (cyrillic)", "dob": "1959-7-30", "address": "Russia", "label": "positive", "script": "cyrillic" }, { "name": "graciano blanco (latin)", "dob": "1970-11-19", "address": "Nicaragua", "label": "High Risk", "script": "latin" }, { "name": "yasmin pacheco (latin)", "dob": "1978-09-07", "address": "Marrocos", "label": "negative", "script": "latin" }, { "name": "matias carneiro (latin)", "dob": "1945-10-08", "address": "Macau", "label": "negative", "script": "latin" } ], "query_template": "The following name is the seed name to generate variations for: {name}. Generate 8 variations of the name {name}, ensuring phonetic similarity: 30% Light, 40% Medium, 30% Far, and orthographic similarity: 100% Light, and also include 42% of variations that follow: Additionally, generate variations that: Reorder name parts. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 8, "phonetic_similarity": { "Light": 0.3, "Medium": 0.4, "Far": 0.3 }, "orthographic_similarity": { "Light": 1.0 }, "rule_based": { "rule_percentage": 42, "selected_rules": [ "name_parts_permutations" ], "rule_descriptions": { "name_parts_permutations": "Reorder name parts" }, "percentage": 42 } } }, { "seed_identities_with_labels": [ { "name": "maricruz viñas (latin)", "dob": "1926-11-11", "address": "Kuwait", "label": "negative", "script": "latin" }, { "name": "diego faria (latin)", "dob": "1994-08-17", "address": "Angola", "label": "High Risk", "script": "latin" }, { "name": "سجاد رضا زاده (arabic)", "dob": "2001-11-06", "address": "Albania", "label": "negative", "script": "arabic" }, { "name": "domitila guillen (latin)", "dob": "1937-07-10", "address": "Cuba", "label": "High Risk", "script": "latin" }, { "name": "Alexander GUSEV (latin)", "dob": "1963-7-27", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "Moalim Ibrahim (latin)", "dob": "1985-3-13", "address": "Somalia", "label": "positive", "script": "latin" }, { "name": "William ZHOU (latin)", "dob": "1977-3-25", "address": "China", "label": "positive", "script": "latin" }, { "name": "venceslás espinosa (latin)", "dob": "1999-06-28", "address": "Burundi", "label": "negative", "script": "latin" }, { "name": "marta andrade (latin)", "dob": "1966-10-31", "address": "Singapur", "label": "negative", "script": "latin" }, { "name": "وصاف بديرية (arabic)", "dob": "1954-02-02", "address": "Sudan", "label": "High Risk", "script": "arabic" }, { "name": "juan parrish (latin)", "dob": "1974-10-03", "address": "Kenya", "label": "High Risk", "script": "latin" }, { "name": "Viktor Zolotov (latin)", "dob": "1954-1-27", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "Виктор Игнатов (cyrillic)", "dob": "1968-10-15", "address": "Russia", "label": "positive", "script": "cyrillic" }, { "name": "crescencia gras (latin)", "dob": "1994-08-26", "address": "Turkmenistán", "label": "negative", "script": "latin" }, { "name": "bibiana murcia (latin)", "dob": "1996-10-15", "address": "Bolivia", "label": "High Risk", "script": "latin" } ], "query_template": "The following name is the seed name to generate variations for: {name}. Generate 15 variations of the name {name}, ensuring phonetic similarity: 20% Light, 60% Medium, 20% Far, and orthographic similarity: 10% Light, 50% Medium, 40% Far, and also include 33% of variations that follow: Additionally, generate variations that: Remove a random vowel. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 15, "phonetic_similarity": { "Light": 0.2, "Medium": 0.6, "Far": 0.2 }, "orthographic_similarity": { "Light": 0.1, "Medium": 0.5, "Far": 0.4 }, "rule_based": { "rule_percentage": 33, "selected_rules": [ "remove_random_vowel" ], "rule_descriptions": { "remove_random_vowel": "Remove a random vowel" }, "percentage": 33 } } }, { "seed_identities_with_labels": [ { "name": "عزيز الحنبلي (arabic)", "dob": "1947-01-25", "address": "Bahrain", "label": "negative", "script": "arabic" }, { "name": "Mayya Bolotova (latin)", "dob": "1975-1-18", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "Farzin Mazlghanchay (latin)", "dob": "1992-12-7", "address": "Iran", "label": "positive", "script": "latin" }, { "name": "Олег Ларин (cyrillic)", "dob": "1973-10-9", "address": "Belarus", "label": "positive", "script": "cyrillic" }, { "name": "Dzhasharbek Uzdenov (latin)", "dob": "1967-1-25", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "داهي بديرية (arabic)", "dob": "1940-01-04", "address": "Syria", "label": "High Risk", "script": "arabic" }, { "name": "claudine fournier (latin)", "dob": "1950-03-12", "address": "Democratic Republic of Congo", "label": "High Risk", "script": "latin" }, { "name": "nair azevedo (latin)", "dob": "1955-06-27", "address": "Uruguai", "label": "negative", "script": "latin" }, { "name": "Sarah al-Sayyid (latin)", "dob": "1985-7-7", "address": "Egypt", "label": "positive", "script": "latin" }, { "name": "hortense da silva (latin)", "dob": "1979-02-21", "address": "Cameroon", "label": "High Risk", "script": "latin" }, { "name": "eva maría torralba (latin)", "dob": "2002-04-09", "address": "Bolivia", "label": "High Risk", "script": "latin" }, { "name": "diana fernandes (latin)", "dob": "1969-12-14", "address": "Espanha", "label": "negative", "script": "latin" }, { "name": "beatriz ramos (latin)", "dob": "1981-02-09", "address": "Somália", "label": "negative", "script": "latin" }, { "name": "gaspar monteiro (latin)", "dob": "2000-03-15", "address": "Síria", "label": "negative", "script": "latin" }, { "name": "vitória gonçalves (latin)", "dob": "1990-11-28", "address": "Mozambique", "label": "High Risk", "script": "latin" } ], "query_template": "The following name is the seed name to generate variations for: {name}. Generate 9 variations of the name {name}, ensuring phonetic similarity: 20% Light, 60% Medium, 20% Far, and orthographic similarity: 30% Light, 40% Medium, 30% Far, and also include 19% of variations that follow: Additionally, generate variations that: Convert name to initials. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 9, "phonetic_similarity": { "Light": 0.2, "Medium": 0.6, "Far": 0.2 }, "orthographic_similarity": { "Light": 0.3, "Medium": 0.4, "Far": 0.3 }, "rule_based": { "rule_percentage": 19, "selected_rules": [ "shorten_name_to_initials" ], "rule_descriptions": { "shorten_name_to_initials": "Convert name to initials" }, "percentage": 19 } } }, { "seed_identities_with_labels": [ { "name": "Guowei CENG (latin)", "dob": "1963-9-1", "address": "Hong Kong", "label": "positive", "script": "latin" }, { "name": "аніта карась (cyrillic)", "dob": "1931-04-28", "address": "Cyprus", "label": "negative", "script": "cyrillic" }, { "name": "sérgio almeida (latin)", "dob": "1932-10-01", "address": "Bósnia e Herzegovina", "label": "negative", "script": "latin" }, { "name": "mateus branco (latin)", "dob": "1989-07-04", "address": "Angola", "label": "High Risk", "script": "latin" }, { "name": "mariana soares (latin)", "dob": "2000-12-14", "address": "Butão", "label": "negative", "script": "latin" }, { "name": "جلال مرازيق البقوم (arabic)", "dob": "1945-05-05", "address": "Iraq", "label": "High Risk", "script": "arabic" }, { "name": "tracy flores (latin)", "dob": "1954-03-25", "address": "Nigeria", "label": "High Risk", "script": "latin" }, { "name": "célina hebert (latin)", "dob": "1970-11-04", "address": "Mali", "label": "High Risk", "script": "latin" }, { "name": "josé rocha (latin)", "dob": "1995-11-21", "address": "Aruba", "label": "negative", "script": "latin" }, { "name": "家超 李 (chinese)", "dob": "1957-12-7", "address": "Hong Kong", "label": "positive", "script": "chinese" }, { "name": "Muhammad Fachry (latin)", "dob": "1968-2-18", "address": "Indonesia", "label": "positive", "script": "latin" }, { "name": "ágata gómez (latin)", "dob": "1999-07-17", "address": "Bolivia", "label": "High Risk", "script": "latin" }, { "name": "'Ali al-Shufa (latin)", "dob": "1991-7-25", "address": "Iran", "label": "positive", "script": "latin" }, { "name": "Marko Svorcan (latin)", "dob": "1967-5-7", "address": "Serbia", "label": "positive", "script": "latin" }, { "name": "mário melo (latin)", "dob": "1954-10-14", "address": "Malásia", "label": "negative", "script": "latin" } ], "query_template": "The following name is the seed name to generate variations for: {name}. Generate 8 variations of the name {name}, ensuring phonetic similarity: 10% Light, 30% Medium, 60% Far, and orthographic similarity: 10% Light, 50% Medium, 40% Far, and also include 22% of variations that follow: Additionally, generate variations that perform these transformations: Add a title suffix (Jr., PhD, etc.), Replace random consonants with different consonants, and Delete a random letter. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 8, "phonetic_similarity": { "Light": 0.1, "Medium": 0.3, "Far": 0.6 }, "orthographic_similarity": { "Light": 0.1, "Medium": 0.5, "Far": 0.4 }, "rule_based": { "rule_percentage": 22, "selected_rules": [ "add_random_trailing_title", "replace_random_consonant_with_random_consonant", "delete_random_letter" ], "rule_descriptions": { "add_random_trailing_title": "Add a title suffix (Jr., PhD, etc.)", "replace_random_consonant_with_random_consonant": "Replace random consonants with different consonants", "delete_random_letter": "Delete a random letter" }, "percentage": 22 } } }, { "seed_identities_with_labels": [ { "name": "леонтій ейбоженко (cyrillic)", "dob": "1993-10-10", "address": "Argentina", "label": "negative", "script": "cyrillic" }, { "name": "perlita cervantes (latin)", "dob": "1934-09-15", "address": "Andorra", "label": "negative", "script": "latin" }, { "name": "Steven Liu (latin)", "dob": "1984-11-13", "address": "China", "label": "positive", "script": "latin" }, { "name": "Владислав Даванков (cyrillic)", "dob": "1984-2-25", "address": "Russia", "label": "positive", "script": "cyrillic" }, { "name": "Viktor Mozhelyansky (latin)", "dob": "1964-5-10", "address": "Ukraine", "label": "positive", "script": "latin" }, { "name": "abel fiol (latin)", "dob": "1971-01-02", "address": "Bolivia", "label": "High Risk", "script": "latin" }, { "name": "crystal richardson (latin)", "dob": "1959-12-28", "address": "South Sudan", "label": "High Risk", "script": "latin" }, { "name": "حكيم حب رمان (arabic)", "dob": "1956-04-01", "address": "Somalia", "label": "High Risk", "script": "arabic" }, { "name": "candelario durán (latin)", "dob": "1978-10-06", "address": "Mongolia", "label": "negative", "script": "latin" }, { "name": "Xenia Iudaeva (latin)", "dob": "1970-3-17", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "cesar stewart (latin)", "dob": "1974-10-24", "address": "South Africa", "label": "High Risk", "script": "latin" }, { "name": "Rafael Bastardo (latin)", "dob": "1978-9-22", "address": "Venezuela", "label": "positive", "script": "latin" }, { "name": "flor cunha (latin)", "dob": "1972-06-24", "address": "Mozambique", "label": "High Risk", "script": "latin" }, { "name": "miguel ángel mancebo (latin)", "dob": "1995-05-25", "address": "San Marino", "label": "negative", "script": "latin" }, { "name": "georgina arnal (latin)", "dob": "1975-02-24", "address": "Eritrea", "label": "negative", "script": "latin" } ], "query_template": "The following name is the seed name to generate variations for: {name}. Generate 11 variations of the name {name}, ensuring phonetic similarity: 30% Light, 40% Medium, 30% Far, and orthographic similarity: 50% Light, 50% Medium, and also include 19% of variations that follow: Additionally, generate variations that perform these transformations: Replace spaces with special characters, Replace random consonants with different consonants, and Insert a random letter. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 11, "phonetic_similarity": { "Light": 0.3, "Medium": 0.4, "Far": 0.3 }, "orthographic_similarity": { "Light": 0.5, "Medium": 0.5 }, "rule_based": { "rule_percentage": 19, "selected_rules": [ "replace_spaces_with_random_special_characters", "replace_random_consonant_with_random_consonant", "insert_random_letter" ], "rule_descriptions": { "replace_spaces_with_random_special_characters": "Replace spaces with special characters", "replace_random_consonant_with_random_consonant": "Replace random consonants with different consonants", "insert_random_letter": "Insert a random letter" }, "percentage": 19 } } }, { "seed_identities_with_labels": [ { "name": "denise vincent (latin)", "dob": "1961-09-25", "address": "Côte d'Ivoire", "label": "High Risk", "script": "latin" }, { "name": "Andrey Doukhvalov (latin)", "dob": "1957-12-3", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "Сергей Алтухов (cyrillic)", "dob": "1982-2-23", "address": "Russia", "label": "positive", "script": "cyrillic" }, { "name": "virginia bush (latin)", "dob": "1956-04-25", "address": "British Virgin Islands", "label": "High Risk", "script": "latin" }, { "name": "lucy pinto (latin)", "dob": "1981-01-29", "address": "République centrafricaine", "label": "negative", "script": "latin" }, { "name": "Vasiliy Yurchenko (latin)", "dob": "1960-9-26", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "clémence regnier (latin)", "dob": "1977-11-17", "address": "Haiti", "label": "High Risk", "script": "latin" }, { "name": "fábio correia (latin)", "dob": "1943-10-01", "address": "Angola", "label": "High Risk", "script": "latin" }, { "name": "Mansur Soltajev (latin)", "dob": "1978-6-13", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "diane munoz (latin)", "dob": "1999-09-03", "address": "France", "label": "negative", "script": "latin" }, { "name": "suzanne couturier (latin)", "dob": "2003-05-24", "address": "Chili", "label": "negative", "script": "latin" }, { "name": "étienne carpentier (latin)", "dob": "2004-09-12", "address": "Guam", "label": "negative", "script": "latin" }, { "name": "Dragan Dragas (latin)", "dob": "1982-6-20", "address": "Serbia", "label": "positive", "script": "latin" }, { "name": "веніямин заїка (cyrillic)", "dob": "1957-04-05", "address": "Guinea-Bissau", "label": "negative", "script": "cyrillic" }, { "name": "يوسف یزدی (arabic)", "dob": "1980-06-04", "address": "Afghanistan", "label": "High Risk", "script": "arabic" } ], "query_template": "The following name is the seed name to generate variations for: {name}. Generate 13 variations of the name {name}, ensuring phonetic similarity: 70% Light, 30% Medium, and orthographic similarity: 20% Light, 60% Medium, 20% Far, and also include 14% of variations that follow: Additionally, generate variations that perform these transformations: Convert name to initials, and Swap adjacent consonants. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 13, "phonetic_similarity": { "Light": 0.7, "Medium": 0.3 }, "orthographic_similarity": { "Light": 0.2, "Medium": 0.6, "Far": 0.2 }, "rule_based": { "rule_percentage": 14, "selected_rules": [ "shorten_name_to_initials", "swap_adjacent_consonants" ], "rule_descriptions": { "shorten_name_to_initials": "Convert name to initials", "swap_adjacent_consonants": "Swap adjacent consonants" }, "percentage": 14 } } }, { "seed_identities_with_labels": [ { "name": "lara gonçalves (latin)", "dob": "1998-06-08", "address": "Alemanha", "label": "negative", "script": "latin" }, { "name": "igor monteiro (latin)", "dob": "1969-03-25", "address": "Mozambique", "label": "High Risk", "script": "latin" }, { "name": "Laith Al-Khazali (latin)", "dob": "1975-10-14", "address": "Iraq", "label": "positive", "script": "latin" }, { "name": "amélie mathieu (latin)", "dob": "1962-02-18", "address": "Haiti", "label": "High Risk", "script": "latin" }, { "name": "núria gomes (latin)", "dob": "1945-12-18", "address": "Ilhas dos Cocos", "label": "negative", "script": "latin" }, { "name": "Dzhasharbek Uzdenov (latin)", "dob": "1967-1-25", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "joão valente (latin)", "dob": "1987-05-14", "address": "Malta", "label": "negative", "script": "latin" }, { "name": "Ekaterina Kharchenko (latin)", "dob": "1977-8-11", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "maryse poulain (latin)", "dob": "1935-06-03", "address": "Monaco", "label": "High Risk", "script": "latin" }, { "name": "melissa davis (latin)", "dob": "1951-06-30", "address": "British Virgin Islands", "label": "High Risk", "script": "latin" }, { "name": "Владимир Ежиков (cyrillic)", "dob": "1987-6-20", "address": "Russia", "label": "positive", "script": "cyrillic" }, { "name": "martim nunes (latin)", "dob": "1941-11-14", "address": "Vanuatu", "label": "negative", "script": "latin" }, { "name": "игнатий лебедев (cyrillic)", "dob": "1988-03-03", "address": "Armenia", "label": "High Risk", "script": "cyrillic" }, { "name": "софрон галкин (cyrillic)", "dob": "1979-10-17", "address": "Kazakhstan", "label": "negative", "script": "cyrillic" }, { "name": "Amjad Sazgar (latin)", "dob": "1979-4-16", "address": "Iran", "label": "positive", "script": "latin" } ], "query_template": "The following name is the seed name to generate variations for: {name}. Generate 8 variations of the name {name}, ensuring phonetic similarity: 20% Light, 60% Medium, 20% Far, and orthographic similarity: 50% Light, 50% Medium, and also include 18% of variations that follow: Additionally, generate variations that perform these transformations: Reorder name parts, and Add a title prefix (Mr., Dr., etc.). The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 8, "phonetic_similarity": { "Light": 0.2, "Medium": 0.6, "Far": 0.2 }, "orthographic_similarity": { "Light": 0.5, "Medium": 0.5 }, "rule_based": { "rule_percentage": 18, "selected_rules": [ "name_parts_permutations", "add_random_leading_title" ], "rule_descriptions": { "name_parts_permutations": "Reorder name parts", "add_random_leading_title": "Add a title prefix (Mr., Dr., etc.)" }, "percentage": 18 } } }, { "seed_identities_with_labels": [ { "name": "Эдди Астанин (cyrillic)", "dob": "1961-12-16", "address": "Russia", "label": "positive", "script": "cyrillic" }, { "name": "antoinette guichard (latin)", "dob": "1979-05-29", "address": "Cameroon", "label": "High Risk", "script": "latin" }, { "name": "Aleksey Budnev (latin)", "dob": "1964-7-9", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "ronald harding (latin)", "dob": "2002-04-08", "address": "Kenya", "label": "High Risk", "script": "latin" }, { "name": "харлампий овчинников (cyrillic)", "dob": "1945-09-24", "address": "Wallis and Futuna", "label": "negative", "script": "cyrillic" }, { "name": "zoé garnier (latin)", "dob": "2006-01-18", "address": "Belgique", "label": "negative", "script": "latin" }, { "name": "irma cabeza (latin)", "dob": "1956-11-20", "address": "Cuba", "label": "High Risk", "script": "latin" }, { "name": "apolinar briones (latin)", "dob": "1965-07-16", "address": "Venezuela", "label": "High Risk", "script": "latin" }, { "name": "Aleksey Mordashov (latin)", "dob": "1965-9-26", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "olivie maurice (latin)", "dob": "2003-11-04", "address": "Tchad", "label": "negative", "script": "latin" }, { "name": "حقّي البشيتي (arabic)", "dob": "1942-10-09", "address": "Iraq", "label": "High Risk", "script": "arabic" }, { "name": "Marcelin Makolo (latin)", "dob": "1985-11-30", "address": "Congo, Democratic Republic of the", "label": "positive", "script": "latin" }, { "name": "Ahmed Afraah (latin)", "dob": "1985-8-17", "address": "Maldives", "label": "positive", "script": "latin" }, { "name": "antoine gonzalez (latin)", "dob": "1951-12-25", "address": "Luxembourg", "label": "negative", "script": "latin" }, { "name": "jérôme couturier (latin)", "dob": "2000-12-20", "address": "Italie", "label": "negative", "script": "latin" } ], "query_template": "Generate exactly 13 execution vectors for each target identity {name}, ensuring phonetic similarity (sound-alike names) with the following distribution: 20% Light, 60% Medium, 20% Far and orthographic similarity (visually similar spellings) with the same distribution. Approximately 54% of the total variations should follow the rule-based transformations below. Additionally, generate variations that: Remove all spaces from {name}. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 13, "phonetic_similarity": { "Light": 0.2, "Medium": 0.6, "Far": 0.2 }, "orthographic_similarity": { "Light": 0.2, "Medium": 0.6, "Far": 0.2 }, "rule_based": { "rule_percentage": 54, "selected_rules": [ "remove_all_spaces" ], "rule_descriptions": { "remove_all_spaces": "Remove all spaces" }, "percentage": 54 } } }, { "seed_identities_with_labels": [ { "name": "alex carre (latin)", "dob": "1929-08-27", "address": "Lithuanie", "label": "negative", "script": "latin" }, { "name": "marianne roger (latin)", "dob": "1987-03-27", "address": "Macédoine du Nord", "label": "negative", "script": "latin" }, { "name": "Rimma Utyasheva (latin)", "dob": "1952-1-3", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "Il Cho (latin)", "dob": "1945-5-10", "address": "Korea, North", "label": "positive", "script": "latin" }, { "name": "astrid courtois (latin)", "dob": "1973-07-15", "address": "Belize", "label": "negative", "script": "latin" }, { "name": "Mu Xiaolu (latin)", "dob": "1990-3-8", "address": "China", "label": "positive", "script": "latin" }, { "name": "Nataliya KHORSHEVA (latin)", "dob": "1972-7-14", "address": "Ukraine", "label": "positive", "script": "latin" }, { "name": "تحسين قرش (arabic)", "dob": "1938-03-29", "address": "Lebanon", "label": "High Risk", "script": "arabic" }, { "name": "tiffany anderson (latin)", "dob": "1998-07-01", "address": "Nigeria", "label": "High Risk", "script": "latin" }, { "name": "nicole maillard (latin)", "dob": "1973-11-20", "address": "Mali", "label": "High Risk", "script": "latin" }, { "name": "Ирина Черкасова (cyrillic)", "dob": "1963-5-30", "address": "Russia", "label": "positive", "script": "cyrillic" }, { "name": "susan lopez (latin)", "dob": "1969-12-12", "address": "Burkina Faso", "label": "High Risk", "script": "latin" }, { "name": "margaux hardy (latin)", "dob": "1936-06-05", "address": "Seychelles", "label": "negative", "script": "latin" }, { "name": "франц іващенко (cyrillic)", "dob": "1982-08-21", "address": "Madagascar", "label": "negative", "script": "cyrillic" }, { "name": "spencer morgan (latin)", "dob": "1986-09-29", "address": "South Africa", "label": "High Risk", "script": "latin" } ], "query_template": "The following name is the seed name to generate variations for: {name}. Generate 15 variations of the name {name}, ensuring phonetic similarity: 20% Light, 60% Medium, 20% Far, and orthographic similarity: 30% Light, 40% Medium, 30% Far, and also include 48% of variations that follow: Additionally, generate variations that perform these transformations: Replace random consonants with different consonants, and Add a title suffix (Jr., PhD, etc.). The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 15, "phonetic_similarity": { "Light": 0.2, "Medium": 0.6, "Far": 0.2 }, "orthographic_similarity": { "Light": 0.3, "Medium": 0.4, "Far": 0.3 }, "rule_based": { "rule_percentage": 48, "selected_rules": [ "replace_random_consonant_with_random_consonant", "add_random_trailing_title" ], "rule_descriptions": { "replace_random_consonant_with_random_consonant": "Replace random consonants with different consonants", "add_random_trailing_title": "Add a title suffix (Jr., PhD, etc.)" }, "percentage": 48 } } }, { "seed_identities_with_labels": [ { "name": "brenda sherman (latin)", "dob": "1990-07-13", "address": "Sierra Leone", "label": "negative", "script": "latin" }, { "name": "Aleksandr Mishustin (latin)", "dob": "2000-12-13", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "robert white (latin)", "dob": "1997-10-14", "address": "Kenya", "label": "High Risk", "script": "latin" }, { "name": "حامد المغاولة (arabic)", "dob": "1937-08-19", "address": "Sao Tome and Principe", "label": "negative", "script": "arabic" }, { "name": "Walid Al-Rawi (latin)", "dob": "1988-11-11", "address": "Iraq", "label": "positive", "script": "latin" }, { "name": "aurora ferrández (latin)", "dob": "2001-10-31", "address": "Cuba", "label": "High Risk", "script": "latin" }, { "name": "ساطع الشحوح (arabic)", "dob": "1974-03-14", "address": "Somalia", "label": "High Risk", "script": "arabic" }, { "name": "Ihor Rotenberg (latin)", "dob": "1973-5-9", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "kimberly watson (latin)", "dob": "1926-05-28", "address": "Croatia", "label": "negative", "script": "latin" }, { "name": "encarnación moles (latin)", "dob": "1937-12-12", "address": "Nicaragua", "label": "High Risk", "script": "latin" }, { "name": "Anatoliy Lappo (latin)", "dob": "1963-5-24", "address": "Belarus", "label": "positive", "script": "latin" }, { "name": "rachel george (latin)", "dob": "1945-02-02", "address": "Pitcairn Islands", "label": "negative", "script": "latin" }, { "name": "bryan gonzales (latin)", "dob": "1946-11-16", "address": "Morocco", "label": "negative", "script": "latin" }, { "name": "Людмила Зайцева (cyrillic)", "dob": "1979-7-25", "address": "Russia", "label": "positive", "script": "cyrillic" }, { "name": "salomé morais (latin)", "dob": "1968-12-22", "address": "Angola", "label": "High Risk", "script": "latin" } ], "query_template": "Generate 13 execution vectors ({name} variations) for each target identity.\nEnsure phonetic similarity (sound-alike names): 100% Far\nEnsure orthographic similarity (visually similar spellings): 20% Light, 60% Medium, 20% Far\nApproximately 54% of the total 13 variations should follow these rule-based transformations: \nAdditionally, generate variations that perform these transformations:\nAbbreviate name parts, and Remove a random vowel. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 13, "phonetic_similarity": { "Far": 1.0 }, "orthographic_similarity": { "Light": 0.2, "Medium": 0.6, "Far": 0.2 }, "rule_based": { "rule_percentage": 54, "selected_rules": [ "shorten_name_to_abbreviations", "remove_random_vowel" ], "rule_descriptions": { "shorten_name_to_abbreviations": "Abbreviate name parts", "remove_random_vowel": "Remove a random vowel" }, "percentage": 54 } } }, { "seed_identities_with_labels": [ { "name": "рубен дьячкова (cyrillic)", "dob": "1976-09-28", "address": "Crimea", "label": "High Risk", "script": "cyrillic" }, { "name": "dawn stevenson (latin)", "dob": "1981-08-22", "address": "Slovenia", "label": "negative", "script": "latin" }, { "name": "alan zimmerman (latin)", "dob": "2003-07-29", "address": "Romania", "label": "negative", "script": "latin" }, { "name": "Hamid Ahmar (latin)", "dob": "1967-1-1", "address": "Turkey", "label": "positive", "script": "latin" }, { "name": "stephen brown (latin)", "dob": "1993-08-11", "address": "Nigeria", "label": "High Risk", "script": "latin" }, { "name": "Ciro FERREIRA (latin)", "dob": "1987-8-27", "address": "Brazil", "label": "positive", "script": "latin" }, { "name": "койчо рангелов (cyrillic)", "dob": "1961-01-26", "address": "Svalbard & Jan Mayen Islands", "label": "negative", "script": "cyrillic" }, { "name": "katrina clark (latin)", "dob": "1935-05-09", "address": "Pitcairn Islands", "label": "negative", "script": "latin" }, { "name": "Mokhtar BUSHUSHA (latin)", "dob": "1969-10-13", "address": "Italy", "label": "positive", "script": "latin" }, { "name": "Елена Перминова (cyrillic)", "dob": "1980-12-5", "address": "Russia", "label": "positive", "script": "cyrillic" }, { "name": "rebekah delgado (latin)", "dob": "1964-04-01", "address": "Kuwait", "label": "negative", "script": "latin" }, { "name": "isabela gomes (latin)", "dob": "1986-02-21", "address": "Angola", "label": "High Risk", "script": "latin" }, { "name": "gerardo pinedo (latin)", "dob": "1935-09-28", "address": "Cuba", "label": "High Risk", "script": "latin" }, { "name": "Vinai PICHAYOT (latin)", "dob": "1957-12-1", "address": "Thailand", "label": "positive", "script": "latin" }, { "name": "craig garcia (latin)", "dob": "1928-03-26", "address": "Kenya", "label": "High Risk", "script": "latin" } ], "query_template": "The following name is the seed name to generate variations for: {name}. Generate 14 variations of the name {name}, ensuring phonetic similarity: 10% Light, 30% Medium, 60% Far, and orthographic similarity: 70% Light, 30% Medium, and also include 43% of variations that follow: Additionally, generate variations that perform these transformations: Duplicate a random letter, and Add a title prefix (Mr., Dr., etc.). The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 14, "phonetic_similarity": { "Light": 0.1, "Medium": 0.3, "Far": 0.6 }, "orthographic_similarity": { "Light": 0.7, "Medium": 0.3 }, "rule_based": { "rule_percentage": 43, "selected_rules": [ "duplicate_random_letter_as_double_letter", "add_random_leading_title" ], "rule_descriptions": { "duplicate_random_letter_as_double_letter": "Duplicate a random letter", "add_random_leading_title": "Add a title prefix (Mr., Dr., etc.)" }, "percentage": 43 } } }, { "seed_identities_with_labels": [ { "name": "Zaki Ararawi (latin)", "dob": "1961-5-23", "address": "Turkey", "label": "positive", "script": "latin" }, { "name": "diego moraleda (latin)", "dob": "1933-11-12", "address": "México", "label": "negative", "script": "latin" }, { "name": "toño escolano (latin)", "dob": "1989-05-18", "address": "Nicaragua", "label": "High Risk", "script": "latin" }, { "name": "azucena hervia (latin)", "dob": "2001-09-24", "address": "Nicaragua", "label": "High Risk", "script": "latin" }, { "name": "Farhaad DOCKRAT (latin)", "dob": "1959-2-28", "address": "South Africa", "label": "positive", "script": "latin" }, { "name": "jose manuel pozuelo (latin)", "dob": "2000-03-12", "address": "Micronesia", "label": "negative", "script": "latin" }, { "name": "leslie brandt (latin)", "dob": "1929-08-10", "address": "Namibia", "label": "High Risk", "script": "latin" }, { "name": "jafet barberá (latin)", "dob": "1943-05-10", "address": "Bangladesh", "label": "negative", "script": "latin" }, { "name": "селиван селиверстова (cyrillic)", "dob": "1995-01-04", "address": "Crimea", "label": "High Risk", "script": "cyrillic" }, { "name": "ярослав воробьева (cyrillic)", "dob": "1928-06-28", "address": "Northern Mariana Islands", "label": "negative", "script": "cyrillic" }, { "name": "Vyacheslav Rossolay (latin)", "dob": "1981-10-17", "address": "Belarus", "label": "positive", "script": "latin" }, { "name": "jennifer richardson (latin)", "dob": "2003-01-13", "address": "South Africa", "label": "High Risk", "script": "latin" }, { "name": "Andrei Siguta (latin)", "dob": "1979-5-5", "address": "Ukraine", "label": "positive", "script": "latin" }, { "name": "germán romeu (latin)", "dob": "1992-04-13", "address": "Maldivas", "label": "negative", "script": "latin" }, { "name": "Герман Греф (cyrillic)", "dob": "1964-2-8", "address": "Russia", "label": "positive", "script": "cyrillic" } ], "query_template": "The following name is the seed name to generate variations for: {name}. Generate 8 variations of the name {name}, ensuring phonetic similarity: 10% Light, 30% Medium, 60% Far, and orthographic similarity: 20% Light, 60% Medium, 20% Far, and also include 51% of variations that follow: Additionally, generate variations that perform these transformations: Remove a random vowel, Abbreviate name parts, and Delete a random letter. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 8, "phonetic_similarity": { "Light": 0.1, "Medium": 0.3, "Far": 0.6 }, "orthographic_similarity": { "Light": 0.2, "Medium": 0.6, "Far": 0.2 }, "rule_based": { "rule_percentage": 51, "selected_rules": [ "remove_random_vowel", "shorten_name_to_abbreviations", "delete_random_letter" ], "rule_descriptions": { "remove_random_vowel": "Remove a random vowel", "shorten_name_to_abbreviations": "Abbreviate name parts", "delete_random_letter": "Delete a random letter" }, "percentage": 51 } } }, { "seed_identities_with_labels": [ { "name": "anita rojas (latin)", "dob": "1980-08-23", "address": "Trinidad and Tobago", "label": "negative", "script": "latin" }, { "name": "fábio mota (latin)", "dob": "1931-05-31", "address": "Angola", "label": "High Risk", "script": "latin" }, { "name": "Igor KORNET (latin)", "dob": "1973-4-29", "address": "Ukraine", "label": "positive", "script": "latin" }, { "name": "michelle goncalves (latin)", "dob": "1949-09-11", "address": "Côte d'Ivoire", "label": "High Risk", "script": "latin" }, { "name": "Franki Eskeda (latin)", "dob": "1994-10-23", "address": "Mexico", "label": "positive", "script": "latin" }, { "name": "تحسين القلموني (arabic)", "dob": "1944-05-03", "address": "Yemen", "label": "High Risk", "script": "arabic" }, { "name": "علي دياب (arabic)", "dob": "1988-12-15", "address": "Zambia", "label": "positive", "script": "arabic" }, { "name": "tiffany ramirez (latin)", "dob": "1988-03-13", "address": "Indonesia", "label": "negative", "script": "latin" }, { "name": "cathy myers (latin)", "dob": "1975-10-29", "address": "Chile", "label": "negative", "script": "latin" }, { "name": "cindy jones (latin)", "dob": "1947-06-02", "address": "Martinique", "label": "negative", "script": "latin" }, { "name": "aura mariño (latin)", "dob": "1940-06-03", "address": "Nicaragua", "label": "High Risk", "script": "latin" }, { "name": "jamie monroe (latin)", "dob": "1971-12-01", "address": "Kenya", "label": "High Risk", "script": "latin" }, { "name": "رائد البرغوثي (arabic)", "dob": "1949-11-17", "address": "Guinea", "label": "negative", "script": "arabic" }, { "name": "Rimma Utyasheva (latin)", "dob": "1952-1-3", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "Maxime Mocom (latin)", "dob": "1978-12-30", "address": "Central African Republic", "label": "positive", "script": "latin" } ], "query_template": "The following name is the seed name to generate variations for: {name}. Generate 15 variations of the name {name}, ensuring phonetic similarity: 30% Light, 40% Medium, 30% Far, and orthographic similarity: 10% Light, 30% Medium, 60% Far, and also include 24% of variations that follow: Additionally, generate variations that perform these transformations: Swap random adjacent letters, Add a title suffix (Jr., PhD, etc.), and Insert a random letter. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 15, "phonetic_similarity": { "Light": 0.3, "Medium": 0.4, "Far": 0.3 }, "orthographic_similarity": { "Light": 0.1, "Medium": 0.3, "Far": 0.6 }, "rule_based": { "rule_percentage": 24, "selected_rules": [ "swap_random_letter", "add_random_trailing_title", "insert_random_letter" ], "rule_descriptions": { "swap_random_letter": "Swap random adjacent letters", "add_random_trailing_title": "Add a title suffix (Jr., PhD, etc.)", "insert_random_letter": "Insert a random letter" }, "percentage": 24 } } }, { "seed_identities_with_labels": [ { "name": "Marinko Cavara (latin)", "dob": "1967-2-2", "address": "Bosnia and Herzegovina", "label": "positive", "script": "latin" }, { "name": "william chen (latin)", "dob": "1938-09-29", "address": "South Africa", "label": "High Risk", "script": "latin" }, { "name": "Ігар Маршалаў (cyrillic)", "dob": "1972-1-12", "address": "Belarus", "label": "positive", "script": "cyrillic" }, { "name": "kristi walters (latin)", "dob": "1933-03-13", "address": "Uganda", "label": "negative", "script": "latin" }, { "name": "راجي قرادة (arabic)", "dob": "1996-10-04", "address": "Liechtenstein", "label": "negative", "script": "arabic" }, { "name": "tracy ross (latin)", "dob": "2003-05-31", "address": "Iceland", "label": "negative", "script": "latin" }, { "name": "Yuriy Karaev (latin)", "dob": "1966-6-21", "address": "Belarus", "label": "positive", "script": "latin" }, { "name": "juliette robert (latin)", "dob": "1986-10-07", "address": "Burkina Faso", "label": "High Risk", "script": "latin" }, { "name": "акулина анисимова (cyrillic)", "dob": "1967-12-07", "address": "Belarus", "label": "High Risk", "script": "cyrillic" }, { "name": "Aysen Nikolayev (latin)", "dob": "1972-1-22", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "ian price (latin)", "dob": "1999-07-04", "address": "Guinea-Bissau", "label": "negative", "script": "latin" }, { "name": "anastasie lemonnier (latin)", "dob": "1960-12-30", "address": "Democratic Republic of Congo", "label": "High Risk", "script": "latin" }, { "name": "erin king (latin)", "dob": "2001-08-02", "address": "Niue", "label": "negative", "script": "latin" }, { "name": "Javier RIVERA (latin)", "dob": "1972-4-20", "address": "Honduras", "label": "positive", "script": "latin" }, { "name": "steven underwood (latin)", "dob": "1951-05-17", "address": "Kenya", "label": "High Risk", "script": "latin" } ], "query_template": "The following name is the seed name to generate variations for: {name}. Generate 10 variations of the name {name}, ensuring phonetic similarity: 30% Light, 40% Medium, 30% Far, and orthographic similarity: 10% Light, 50% Medium, 40% Far, and also include 57% of variations that follow: Additionally, generate variations that perform these transformations: Convert name to initials, Abbreviate name parts, and Swap random adjacent letters. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 10, "phonetic_similarity": { "Light": 0.3, "Medium": 0.4, "Far": 0.3 }, "orthographic_similarity": { "Light": 0.1, "Medium": 0.5, "Far": 0.4 }, "rule_based": { "rule_percentage": 57, "selected_rules": [ "shorten_name_to_initials", "shorten_name_to_abbreviations", "swap_random_letter" ], "rule_descriptions": { "shorten_name_to_initials": "Convert name to initials", "shorten_name_to_abbreviations": "Abbreviate name parts", "swap_random_letter": "Swap random adjacent letters" }, "percentage": 57 } } }, { "seed_identities_with_labels": [ { "name": "ronald perez (latin)", "dob": "1954-02-12", "address": "Kenya", "label": "High Risk", "script": "latin" }, { "name": "'Ali al-Shufa (latin)", "dob": "1991-7-25", "address": "Iran", "label": "positive", "script": "latin" }, { "name": "Arif Khot (latin)", "dob": "1970-8-21", "address": "India", "label": "positive", "script": "latin" }, { "name": "jessica harper (latin)", "dob": "2002-04-29", "address": "Greenland", "label": "negative", "script": "latin" }, { "name": "christopher adams (latin)", "dob": "1961-07-24", "address": "British Virgin Islands", "label": "High Risk", "script": "latin" }, { "name": "kevin hampton (latin)", "dob": "2007-07-27", "address": "Jamaica", "label": "negative", "script": "latin" }, { "name": "адриан белозерова (cyrillic)", "dob": "1989-04-06", "address": "Crimea", "label": "High Risk", "script": "cyrillic" }, { "name": "Vladimir Pavlov (latin)", "dob": "1976-6-1", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "james morales (latin)", "dob": "1945-04-29", "address": "Bouvet Island (Bouvetoya)", "label": "negative", "script": "latin" }, { "name": "Виктор Игнатов (cyrillic)", "dob": "1968-10-15", "address": "Russia", "label": "positive", "script": "cyrillic" }, { "name": "joshua walker (latin)", "dob": "1970-08-10", "address": "India", "label": "negative", "script": "latin" }, { "name": "alexander wilkerson (latin)", "dob": "2001-09-05", "address": "South Sudan", "label": "High Risk", "script": "latin" }, { "name": "zoé guillaume (latin)", "dob": "1984-09-23", "address": "Central African Republic", "label": "High Risk", "script": "latin" }, { "name": "Myo'ng-chin Kim (latin)", "dob": "1980-2-18", "address": "China", "label": "positive", "script": "latin" }, { "name": "مليکا ملکیان (arabic)", "dob": "2000-10-11", "address": "Malta", "label": "negative", "script": "arabic" } ], "query_template": "The following name is the seed name to generate variations for: {name}. Generate 13 variations of the name {name}, ensuring phonetic similarity: 10% Light, 30% Medium, 60% Far, and orthographic similarity: 30% Light, 40% Medium, 30% Far, and also include 31% of variations that follow: Additionally, generate variations that: Replace spaces with special characters. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 13, "phonetic_similarity": { "Light": 0.1, "Medium": 0.3, "Far": 0.6 }, "orthographic_similarity": { "Light": 0.3, "Medium": 0.4, "Far": 0.3 }, "rule_based": { "rule_percentage": 31, "selected_rules": [ "replace_spaces_with_random_special_characters" ], "rule_descriptions": { "replace_spaces_with_random_special_characters": "Replace spaces with special characters" }, "percentage": 31 } } }, { "seed_identities_with_labels": [ { "name": "marguerite masson (latin)", "dob": "1974-06-21", "address": "Indonésie", "label": "negative", "script": "latin" }, { "name": "jeanne bigot (latin)", "dob": "1949-03-04", "address": "Mauritanie", "label": "negative", "script": "latin" }, { "name": "Harun Nasir-Al-Din (latin)", "dob": "1970-6-5", "address": "Turkey", "label": "positive", "script": "latin" }, { "name": "pauline sanchez (latin)", "dob": "1965-10-05", "address": "Burkina Faso", "label": "High Risk", "script": "latin" }, { "name": "Alexey Shkadarevich (latin)", "dob": "1947-10-27", "address": "Belarus", "label": "positive", "script": "latin" }, { "name": "манолина тумангелов (cyrillic)", "dob": "1956-01-11", "address": "Marshall Islands", "label": "negative", "script": "cyrillic" }, { "name": "vítor domingues (latin)", "dob": "1925-04-05", "address": "Angola", "label": "High Risk", "script": "latin" }, { "name": "eugène bigot (latin)", "dob": "1977-01-19", "address": "Democratic Republic of Congo", "label": "High Risk", "script": "latin" }, { "name": "christelle joubert (latin)", "dob": "1984-01-04", "address": "Suriname", "label": "negative", "script": "latin" }, { "name": "emmanuel noël (latin)", "dob": "1993-06-13", "address": "Monaco", "label": "High Risk", "script": "latin" }, { "name": "Белан Хамчиев (cyrillic)", "dob": "1960-12-7", "address": "Russia", "label": "positive", "script": "cyrillic" }, { "name": "Oleksandr Basov (latin)", "dob": "1971-10-16", "address": "Ukraine", "label": "positive", "script": "latin" }, { "name": "michel pinto (latin)", "dob": "1971-03-23", "address": "Maurice", "label": "negative", "script": "latin" }, { "name": "سهل عبد اللطيف (arabic)", "dob": "1996-05-14", "address": "Sudan", "label": "High Risk", "script": "arabic" }, { "name": "Aegli Tamani-Phella (latin)", "dob": "1978-6-21", "address": "Cyprus", "label": "positive", "script": "latin" } ], "query_template": "The following name is the seed name to generate variations for: {name}. Generate 8 variations of the name {name}, ensuring phonetic similarity: 30% Light, 40% Medium, 30% Far, and orthographic similarity: 30% Light, 40% Medium, 30% Far, and also include 32% of variations that follow: Additionally, generate variations that perform these transformations: Add a title suffix (Jr., PhD, etc.), and Swap adjacent syllables. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 8, "phonetic_similarity": { "Light": 0.3, "Medium": 0.4, "Far": 0.3 }, "orthographic_similarity": { "Light": 0.3, "Medium": 0.4, "Far": 0.3 }, "rule_based": { "rule_percentage": 32, "selected_rules": [ "add_random_trailing_title", "swap_adjacent_syllables" ], "rule_descriptions": { "add_random_trailing_title": "Add a title suffix (Jr., PhD, etc.)", "swap_adjacent_syllables": "Swap adjacent syllables" }, "percentage": 32 } } }, { "seed_identities_with_labels": [ { "name": "thérèse lelièvre (latin)", "dob": "1945-05-12", "address": "Côte d'Ivoire", "label": "High Risk", "script": "latin" }, { "name": "jérôme fabre (latin)", "dob": "2001-10-08", "address": "Mali", "label": "High Risk", "script": "latin" }, { "name": "Мария ШУВАЛОВА (cyrillic)", "dob": "1998-8-4", "address": "Russia", "label": "positive", "script": "cyrillic" }, { "name": "Vadim Shuvalov (latin)", "dob": "1958-2-17", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "victor descamps (latin)", "dob": "1963-03-30", "address": "Brésil", "label": "negative", "script": "latin" }, { "name": "Aleksandr Mishustin (latin)", "dob": "2000-12-13", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "danielle munoz (latin)", "dob": "1965-10-11", "address": "Somalie", "label": "negative", "script": "latin" }, { "name": "hugues boutin (latin)", "dob": "2002-12-08", "address": "Liechtenstein", "label": "negative", "script": "latin" }, { "name": "Irina Shoygu (latin)", "dob": "1955-5-31", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "велинна токов (cyrillic)", "dob": "1993-01-19", "address": "Bulgaria", "label": "High Risk", "script": "cyrillic" }, { "name": "Aleksey MOZGOVOY (latin)", "dob": "1975-4-3", "address": "Ukraine", "label": "positive", "script": "latin" }, { "name": "دهمان آل عواض (arabic)", "dob": "1963-01-08", "address": "Liechtenstein", "label": "negative", "script": "arabic" }, { "name": "carolyn parks (latin)", "dob": "1981-08-01", "address": "British Virgin Islands", "label": "High Risk", "script": "latin" }, { "name": "barbara long (latin)", "dob": "1998-12-08", "address": "Kenya", "label": "High Risk", "script": "latin" }, { "name": "célina peltier (latin)", "dob": "1972-11-07", "address": "Corée du Nord", "label": "negative", "script": "latin" } ], "query_template": "Generate 7 execution vectors for {name}, ensuring phonetic similarity (100% Far) and orthographic similarity (100% Light). Approximately 52% of the total 7 variations should follow these rule-based transformations: Duplicate a random letter, Reorder name parts, and Swap adjacent consonants. Include exactly one variation that duplicates a random letter, one that reorders name parts, and one that swaps adjacent consonants in each set of 4 consecutive rule-based variations. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 7, "phonetic_similarity": { "Far": 1.0 }, "orthographic_similarity": { "Light": 1.0 }, "rule_based": { "rule_percentage": 52, "selected_rules": [ "duplicate_random_letter_as_double_letter", "name_parts_permutations", "swap_adjacent_consonants" ], "rule_descriptions": { "duplicate_random_letter_as_double_letter": "Duplicate a random letter", "name_parts_permutations": "Reorder name parts", "swap_adjacent_consonants": "Swap adjacent consonants" }, "percentage": 52 } } }, { "seed_identities_with_labels": [ { "name": "michelle blanchet (latin)", "dob": "1941-12-30", "address": "Libye", "label": "negative", "script": "latin" }, { "name": "Pavel Kachkayev (latin)", "dob": "1951-10-4", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "jules fleury (latin)", "dob": "1992-01-12", "address": "Panama", "label": "negative", "script": "latin" }, { "name": "christiane laroche (latin)", "dob": "1940-10-07", "address": "Côte d'Ivoire", "label": "High Risk", "script": "latin" }, { "name": "надежда лапина (cyrillic)", "dob": "1974-11-18", "address": "Belarus", "label": "High Risk", "script": "cyrillic" }, { "name": "Javier RIVERA (latin)", "dob": "1972-4-20", "address": "Honduras", "label": "positive", "script": "latin" }, { "name": "Светлана Горячева (cyrillic)", "dob": "1947-6-3", "address": "Russia", "label": "positive", "script": "cyrillic" }, { "name": "édith gimenez (latin)", "dob": "1984-09-05", "address": "Mali", "label": "High Risk", "script": "latin" }, { "name": "christelle ruiz (latin)", "dob": "1956-02-19", "address": "Guyane française", "label": "negative", "script": "latin" }, { "name": "агата соколова (cyrillic)", "dob": "1962-07-19", "address": "Romania", "label": "negative", "script": "cyrillic" }, { "name": "camila loureiro (latin)", "dob": "1943-04-04", "address": "Mozambique", "label": "High Risk", "script": "latin" }, { "name": "hortense guichard (latin)", "dob": "1948-10-02", "address": "Sainte Lucie", "label": "negative", "script": "latin" }, { "name": "nathalie pierre (latin)", "dob": "1998-10-09", "address": "Mali", "label": "High Risk", "script": "latin" }, { "name": "Hosseyn Akbari (latin)", "dob": "1961-6-17", "address": "Iran", "label": "positive", "script": "latin" }, { "name": "Ivan Demchenko (latin)", "dob": "1960-9-27", "address": "Russia", "label": "positive", "script": "latin" } ], "query_template": "The following name is the seed name to generate variations for: {name}. Generate 13 variations of the name {name}, ensuring phonetic similarity: 70% Light, 30% Medium, and orthographic similarity: 30% Light, 40% Medium, 30% Far, and also include 34% of variations that follow: Additionally, generate variations that perform these transformations: Replace random consonants with different consonants, Replace random vowels with different vowels, and Duplicate a random letter. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 13, "phonetic_similarity": { "Light": 0.7, "Medium": 0.3 }, "orthographic_similarity": { "Light": 0.3, "Medium": 0.4, "Far": 0.3 }, "rule_based": { "rule_percentage": 34, "selected_rules": [ "replace_random_consonant_with_random_consonant", "replace_random_vowel_with_random_vowel", "duplicate_random_letter_as_double_letter" ], "rule_descriptions": { "replace_random_consonant_with_random_consonant": "Replace random consonants with different consonants", "replace_random_vowel_with_random_vowel": "Replace random vowels with different vowels", "duplicate_random_letter_as_double_letter": "Duplicate a random letter" }, "percentage": 34 } } }, { "seed_identities_with_labels": [ { "name": "Javier RIVERA (latin)", "dob": "1972-4-20", "address": "Honduras", "label": "positive", "script": "latin" }, { "name": "madison lee (latin)", "dob": "1997-10-15", "address": "British Virgin Islands", "label": "High Risk", "script": "latin" }, { "name": "гюгра кривошапкова (cyrillic)", "dob": "1987-09-15", "address": "Bulgaria", "label": "High Risk", "script": "cyrillic" }, { "name": "Andrei Khokhlun (latin)", "dob": "1966-12-4", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "julieta millán (latin)", "dob": "1998-11-12", "address": "Cuba", "label": "High Risk", "script": "latin" }, { "name": "sierra simpson (latin)", "dob": "1966-02-11", "address": "French Southern Territories", "label": "negative", "script": "latin" }, { "name": "Владимир Ежиков (cyrillic)", "dob": "1987-6-20", "address": "Russia", "label": "positive", "script": "cyrillic" }, { "name": "melissa dixon (latin)", "dob": "1963-10-05", "address": "Iceland", "label": "negative", "script": "latin" }, { "name": "pamela hunt (latin)", "dob": "1952-02-03", "address": "Lithuania", "label": "negative", "script": "latin" }, { "name": "виктория европовкирилов (cyrillic)", "dob": "1992-11-17", "address": "Guatemala", "label": "negative", "script": "cyrillic" }, { "name": "Irina Bubnova (latin)", "dob": "1983-4-1", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "Seyed Ghoreishi (latin)", "dob": "1964-9-27", "address": "Iran", "label": "positive", "script": "latin" }, { "name": "ramona salom (latin)", "dob": "1996-12-29", "address": "Cuba", "label": "High Risk", "script": "latin" }, { "name": "marcelle mace (latin)", "dob": "1982-10-06", "address": "Côte d'Ivoire", "label": "High Risk", "script": "latin" }, { "name": "duane melton (latin)", "dob": "1944-01-21", "address": "Slovakia (Slovak Republic)", "label": "negative", "script": "latin" } ], "query_template": "Generate 10 variations of {name} ensuring phonetic similarity (50% Light, 50% Medium) and orthographic similarity (30% Light, 40% Medium, 30% Far). Approximately 14% of the total 10 variations should follow these rule-based transformations: Additionally, generate variations that perform these transformations: Convert name to initials, Replace double letters with a single letter, and Add a title prefix (Mr., Dr., etc.). The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 10, "phonetic_similarity": { "Light": 0.5, "Medium": 0.5 }, "orthographic_similarity": { "Light": 0.3, "Medium": 0.4, "Far": 0.3 }, "rule_based": { "rule_percentage": 14, "selected_rules": [ "shorten_name_to_initials", "replace_double_letters_with_single_letter", "add_random_leading_title" ], "rule_descriptions": { "shorten_name_to_initials": "Convert name to initials", "replace_double_letters_with_single_letter": "Replace double letters with a single letter", "add_random_leading_title": "Add a title prefix (Mr., Dr., etc.)" }, "percentage": 14 } } }, { "seed_identities_with_labels": [ { "name": "Oleksiy MOZGOVY (latin)", "dob": "1975-4-3", "address": "Ukraine", "label": "positive", "script": "latin" }, { "name": "Omid ABEDSHAHI (latin)", "dob": "1983-1-21", "address": "Iran", "label": "positive", "script": "latin" }, { "name": "george friedman (latin)", "dob": "1963-03-16", "address": "Namibia", "label": "High Risk", "script": "latin" }, { "name": "john owens (latin)", "dob": "1967-09-12", "address": "Gabon", "label": "negative", "script": "latin" }, { "name": "luke brown (latin)", "dob": "1965-07-27", "address": "New Caledonia", "label": "negative", "script": "latin" }, { "name": "Andrei Suvorov (latin)", "dob": "1967-10-12", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "kelly king (latin)", "dob": "1926-05-09", "address": "Colombia", "label": "negative", "script": "latin" }, { "name": "Kazbek Kokov (latin)", "dob": "1973-7-20", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "bill may (latin)", "dob": "1990-04-06", "address": "South Sudan", "label": "High Risk", "script": "latin" }, { "name": "ابراهيم غدار (arabic)", "dob": "1978-1-22", "address": "United Arab Emirates", "label": "positive", "script": "arabic" }, { "name": "phillip warren (latin)", "dob": "1933-10-03", "address": "Kenya", "label": "High Risk", "script": "latin" }, { "name": "عمّار أبو داوود (arabic)", "dob": "1944-10-09", "address": "Barbados", "label": "negative", "script": "arabic" }, { "name": "cécile bailly (latin)", "dob": "1929-08-13", "address": "Haiti", "label": "High Risk", "script": "latin" }, { "name": "kayla garcia (latin)", "dob": "1947-10-13", "address": "Egypt", "label": "negative", "script": "latin" }, { "name": "شهاب أكلب (arabic)", "dob": "1964-01-07", "address": "Iraq", "label": "High Risk", "script": "arabic" } ], "query_template": "The following name is the seed name to generate variations for: {name}. Generate 13 variations of the name {name}, ensuring phonetic similarity: 10% Light, 30% Medium, 60% Far, and orthographic similarity: 100% Medium, and also include 39% of variations that follow: Additionally, generate variations that perform these transformations: Insert a random letter, Abbreviate name parts, and Replace spaces with special characters. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 13, "phonetic_similarity": { "Light": 0.1, "Medium": 0.3, "Far": 0.6 }, "orthographic_similarity": { "Medium": 1.0 }, "rule_based": { "rule_percentage": 39, "selected_rules": [ "insert_random_letter", "shorten_name_to_abbreviations", "replace_spaces_with_random_special_characters" ], "rule_descriptions": { "insert_random_letter": "Insert a random letter", "shorten_name_to_abbreviations": "Abbreviate name parts", "replace_spaces_with_random_special_characters": "Replace spaces with special characters" }, "percentage": 39 } } }, { "seed_identities_with_labels": [ { "name": "patricia douglas (latin)", "dob": "1941-11-17", "address": "South Sudan", "label": "High Risk", "script": "latin" }, { "name": "gilbert boulay (latin)", "dob": "1932-04-13", "address": "Burkina Faso", "label": "High Risk", "script": "latin" }, { "name": "pepita franch (latin)", "dob": "1976-08-26", "address": "Kazajstán", "label": "negative", "script": "latin" }, { "name": "جواهر أولاد زيان (arabic)", "dob": "1938-01-08", "address": "Lebanon", "label": "High Risk", "script": "arabic" }, { "name": "benito álamo (latin)", "dob": "1964-07-27", "address": "San Vicente y las Granadinas", "label": "negative", "script": "latin" }, { "name": "فوّاز أبو داوود (arabic)", "dob": "1963-05-01", "address": "Western Sahara", "label": "negative", "script": "arabic" }, { "name": "حبيب دراجي (arabic)", "dob": "1976-9-18", "address": "Iran", "label": "positive", "script": "arabic" }, { "name": "Houssam Katrangi (latin)", "dob": "1973-11-27", "address": "Lebanon", "label": "positive", "script": "latin" }, { "name": "Irina Kosenko (latin)", "dob": "1973-1-16", "address": "North Macedonia, The Republic of", "label": "positive", "script": "latin" }, { "name": "corey garrett (latin)", "dob": "1950-04-04", "address": "Namibia", "label": "High Risk", "script": "latin" }, { "name": "Abdulwahhab AL-HUMAIKANI (latin)", "dob": "1972-8-4", "address": "Yemen", "label": "positive", "script": "latin" }, { "name": "jose francisco mendoza (latin)", "dob": "1993-03-22", "address": "Cuba", "label": "High Risk", "script": "latin" }, { "name": "graciano casas (latin)", "dob": "1938-11-24", "address": "Letonia", "label": "negative", "script": "latin" }, { "name": "Igor Kuzmenko (latin)", "dob": "1967-11-11", "address": "Belarus", "label": "positive", "script": "latin" }, { "name": "sebastián castells (latin)", "dob": "1982-02-25", "address": "Australia", "label": "negative", "script": "latin" } ], "query_template": "Generate 9 name variations for {name}, ensuring phonetic similarity of 100% Far. \nFor orthographic similarity, generate 30% Light variations by adding or removing diacritical marks, 40% Medium variations by changing one consonant sound to another similar sound, and 30% Far variations by changing multiple consonant sounds.\nApproximately 42% of the total 9 name variations should follow these rule-based transformations: Replace random consonants with different consonants in {name}, Delete a random letter from {name}, and Swap adjacent consonants in {name}. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 9, "phonetic_similarity": { "Far": 1.0 }, "orthographic_similarity": { "Light": 0.3, "Medium": 0.4, "Far": 0.3 }, "rule_based": { "rule_percentage": 42, "selected_rules": [ "replace_random_consonant_with_random_consonant", "delete_random_letter", "swap_adjacent_consonants" ], "rule_descriptions": { "replace_random_consonant_with_random_consonant": "Replace random consonants with different consonants", "delete_random_letter": "Delete a random letter", "swap_adjacent_consonants": "Swap adjacent consonants" }, "percentage": 42 } } }, { "seed_identities_with_labels": [ { "name": "griselda granados (latin)", "dob": "1973-12-19", "address": "Madagascar", "label": "negative", "script": "latin" }, { "name": "iván miranda (latin)", "dob": "1985-11-18", "address": "Camboya", "label": "negative", "script": "latin" }, { "name": "yéssica marco (latin)", "dob": "1965-08-25", "address": "Cuba", "label": "High Risk", "script": "latin" }, { "name": "قاسم الشاويش (arabic)", "dob": "1933-10-31", "address": "Algeria", "label": "High Risk", "script": "arabic" }, { "name": "norberto nebot (latin)", "dob": "1959-08-25", "address": "Sudán", "label": "negative", "script": "latin" }, { "name": "robert daniels (latin)", "dob": "1967-02-07", "address": "Nigeria", "label": "High Risk", "script": "latin" }, { "name": "German Belous (latin)", "dob": "1977-11-14", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "milagros carranza (latin)", "dob": "1975-11-08", "address": "Guinea", "label": "negative", "script": "latin" }, { "name": "Mahmoud Kzemabad (latin)", "dob": "1965-6-26", "address": "Iran", "label": "positive", "script": "latin" }, { "name": "Елена Цунаева (cyrillic)", "dob": "1969-1-13", "address": "Russia", "label": "positive", "script": "cyrillic" }, { "name": "michelle couturier (latin)", "dob": "1948-11-05", "address": "Mali", "label": "High Risk", "script": "latin" }, { "name": "stéphane roger (latin)", "dob": "1970-08-30", "address": "Democratic Republic of Congo", "label": "High Risk", "script": "latin" }, { "name": "Osman Hamid (latin)", "dob": "1966-1-1", "address": "Sudan", "label": "positive", "script": "latin" }, { "name": "وسيم البيسار القعقور (arabic)", "dob": "1971-04-03", "address": "Argentina", "label": "negative", "script": "arabic" }, { "name": "Siarhei Kalinouski (latin)", "dob": "1969-1-3", "address": "Belarus", "label": "positive", "script": "latin" } ], "query_template": "Generate 9 variations of {name}. Ensuring phonetic similarity (10% Light, 30% Medium, 60% Far) and orthographic similarity (30% Light, 40% Medium, 30% Far). Approximately 21% of the total 9 variations should follow these rule-based transformations: Additionally, generate variations that: Insert a random letter. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 9, "phonetic_similarity": { "Light": 0.1, "Medium": 0.3, "Far": 0.6 }, "orthographic_similarity": { "Light": 0.3, "Medium": 0.4, "Far": 0.3 }, "rule_based": { "rule_percentage": 21, "selected_rules": [ "insert_random_letter" ], "rule_descriptions": { "insert_random_letter": "Insert a random letter" }, "percentage": 21 } } }, { "seed_identities_with_labels": [ { "name": "monique johnson (latin)", "dob": "1929-01-28", "address": "Czech Republic", "label": "negative", "script": "latin" }, { "name": "clinton peck (latin)", "dob": "1928-03-07", "address": "Kenya", "label": "High Risk", "script": "latin" }, { "name": "franck georges (latin)", "dob": "1946-03-14", "address": "Monaco", "label": "High Risk", "script": "latin" }, { "name": "sara campos (latin)", "dob": "1938-01-30", "address": "Mozambique", "label": "High Risk", "script": "latin" }, { "name": "robert potter (latin)", "dob": "1976-08-17", "address": "Iceland", "label": "negative", "script": "latin" }, { "name": "Anatoly Bifov (latin)", "dob": "1963-1-7", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "matthew ramsey (latin)", "dob": "1944-02-03", "address": "Myanmar", "label": "negative", "script": "latin" }, { "name": "Nikolay Kosov (latin)", "dob": "1955-6-30", "address": "Hungary", "label": "positive", "script": "latin" }, { "name": "حمود الحويطات (arabic)", "dob": "1980-05-09", "address": "Algeria", "label": "High Risk", "script": "arabic" }, { "name": "nidia noguera (latin)", "dob": "1978-12-09", "address": "Nicaragua", "label": "High Risk", "script": "latin" }, { "name": "Андрей Сапелин (cyrillic)", "dob": "1965-9-16", "address": "Russia", "label": "positive", "script": "cyrillic" }, { "name": "yvonne hensley (latin)", "dob": "1954-02-26", "address": "Spain", "label": "negative", "script": "latin" }, { "name": "Jonha Lang (latin)", "dob": "1978-12-15", "address": "China", "label": "positive", "script": "latin" }, { "name": "تحسين آل سلطان (arabic)", "dob": "1946-04-29", "address": "Egypt", "label": "negative", "script": "arabic" }, { "name": "Ko Oo (latin)", "dob": "1972-12-2", "address": "Burma", "label": "positive", "script": "latin" } ], "query_template": "The following name is the seed name to generate variations for: {name}. Generate 7 variations of the name {name}, ensuring phonetic similarity: 30% Light, 40% Medium, 30% Far, and orthographic similarity: 20% Light, 60% Medium, 20% Far, and also include 50% of variations that follow: Additionally, generate variations that perform these transformations: Remove a random vowel, Delete a random letter, and Use first name initial with last name. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 7, "phonetic_similarity": { "Light": 0.3, "Medium": 0.4, "Far": 0.3 }, "orthographic_similarity": { "Light": 0.2, "Medium": 0.6, "Far": 0.2 }, "rule_based": { "rule_percentage": 50, "selected_rules": [ "remove_random_vowel", "delete_random_letter", "initial_only_first_name" ], "rule_descriptions": { "remove_random_vowel": "Remove a random vowel", "delete_random_letter": "Delete a random letter", "initial_only_first_name": "Use first name initial with last name" }, "percentage": 50 } } }, { "seed_identities_with_labels": [ { "name": "Sergei Savchenkov (latin)", "dob": "1954-10-1", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "عادل النمر (arabic)", "dob": "1968-08-24", "address": "Algeria", "label": "High Risk", "script": "arabic" }, { "name": "Olimjon Tohtahunov (latin)", "dob": "1949-12-31", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "olivie lemaître (latin)", "dob": "1974-08-20", "address": "Haiti", "label": "High Risk", "script": "latin" }, { "name": "Abu Aisyah (latin)", "dob": "1983-9-6", "address": "Syria", "label": "positive", "script": "latin" }, { "name": "océane caron (latin)", "dob": "1966-03-01", "address": "Haiti", "label": "High Risk", "script": "latin" }, { "name": "margaret bousquet (latin)", "dob": "1983-07-07", "address": "Guadeloupe", "label": "negative", "script": "latin" }, { "name": "marthe barthelemy (latin)", "dob": "1981-07-14", "address": "Autriche", "label": "negative", "script": "latin" }, { "name": "Kia Sadeghi (latin)", "dob": "1986-3-21", "address": "Iran", "label": "positive", "script": "latin" }, { "name": "Максим Смирнов (cyrillic)", "dob": "1965-10-4", "address": "Russia", "label": "positive", "script": "cyrillic" }, { "name": "محمدجواد میردامادی (arabic)", "dob": "2007-05-07", "address": "Timor-Leste", "label": "negative", "script": "arabic" }, { "name": "julien rivière (latin)", "dob": "1961-04-27", "address": "Italie", "label": "negative", "script": "latin" }, { "name": "claudine becker (latin)", "dob": "1928-10-31", "address": "Îles Mineures Éloignées des États-Unis", "label": "negative", "script": "latin" }, { "name": "denis perret (latin)", "dob": "1983-10-19", "address": "Cameroon", "label": "High Risk", "script": "latin" }, { "name": "louis clerc (latin)", "dob": "1958-01-10", "address": "Cameroon", "label": "High Risk", "script": "latin" } ], "query_template": "Generate exactly 8 execution vectors (name variations) for each target identity {name}, ensuring phonetic similarity with the following distributions: 30% Light, 40% Medium, and 30% Far. Additionally, ensure orthographic similarity with the following distributions: 70% Light and 30% Medium. Approximately 41% of the total 8 variations should follow these rule-based transformations: Add a title prefix (Mr., Dr., etc.). The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 8, "phonetic_similarity": { "Light": 0.3, "Medium": 0.4, "Far": 0.3 }, "orthographic_similarity": { "Light": 0.7, "Medium": 0.3 }, "rule_based": { "rule_percentage": 41, "selected_rules": [ "add_random_leading_title" ], "rule_descriptions": { "add_random_leading_title": "Add a title prefix (Mr., Dr., etc.)" }, "percentage": 41 } } }, { "seed_identities_with_labels": [ { "name": "claude marty (latin)", "dob": "1947-12-02", "address": "Monaco", "label": "High Risk", "script": "latin" }, { "name": "kristen hodges (latin)", "dob": "1985-07-21", "address": "British Virgin Islands", "label": "High Risk", "script": "latin" }, { "name": "Сергей Мальцев (cyrillic)", "dob": "1973-2-28", "address": "Russia", "label": "positive", "script": "cyrillic" }, { "name": "Kifah Milhem (latin)", "dob": "1961-11-28", "address": "Syria", "label": "positive", "script": "latin" }, { "name": "Edgar Sarrias (latin)", "dob": "1976-1-24", "address": "Venezuela", "label": "positive", "script": "latin" }, { "name": "lisa church (latin)", "dob": "1989-02-05", "address": "Philippines", "label": "negative", "script": "latin" }, { "name": "مُتعب جعفر (arabic)", "dob": "1960-11-13", "address": "Libya", "label": "High Risk", "script": "arabic" }, { "name": "henry ellison (latin)", "dob": "1944-09-28", "address": "Andorra", "label": "negative", "script": "latin" }, { "name": "Samer Ismail (latin)", "dob": "1980-10-25", "address": "Syria", "label": "positive", "script": "latin" }, { "name": "дорина кьоров (cyrillic)", "dob": "1945-08-01", "address": "Guatemala", "label": "negative", "script": "cyrillic" }, { "name": "rebecca shaffer (latin)", "dob": "1991-09-13", "address": "Madagascar", "label": "negative", "script": "latin" }, { "name": "célina lenoir (latin)", "dob": "1943-06-10", "address": "Burkina Faso", "label": "High Risk", "script": "latin" }, { "name": "Ochur-Suge Mongush (latin)", "dob": "1993-3-28", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "catherine mason (latin)", "dob": "1974-01-22", "address": "Guadeloupe", "label": "negative", "script": "latin" }, { "name": "craig anderson (latin)", "dob": "1927-05-03", "address": "Namibia", "label": "High Risk", "script": "latin" } ], "query_template": "The following name is the seed name to generate variations for: {name}. Generate 8 variations of the name {name}, ensuring phonetic similarity: 20% Light, 60% Medium, 20% Far, and orthographic similarity: 20% Light, 60% Medium, 20% Far, and also include 39% of variations that follow: Additionally, generate variations that perform these transformations: Convert name to initials, and Use first name initial with last name. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 8, "phonetic_similarity": { "Light": 0.2, "Medium": 0.6, "Far": 0.2 }, "orthographic_similarity": { "Light": 0.2, "Medium": 0.6, "Far": 0.2 }, "rule_based": { "rule_percentage": 39, "selected_rules": [ "shorten_name_to_initials", "initial_only_first_name" ], "rule_descriptions": { "shorten_name_to_initials": "Convert name to initials", "initial_only_first_name": "Use first name initial with last name" }, "percentage": 39 } } }, { "seed_identities_with_labels": [ { "name": "Андрей Колесник (cyrillic)", "dob": "1960-2-26", "address": "Russia", "label": "positive", "script": "cyrillic" }, { "name": "Oleg Krinitsyn (latin)", "dob": "1971-5-12", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "Mikhail Shchapov (latin)", "dob": "1975-9-20", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "maurice camus (latin)", "dob": "1961-06-06", "address": "Monaco", "label": "High Risk", "script": "latin" }, { "name": "Aleksandr Sarkisyan (latin)", "dob": "1946-8-17", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "lucy martins (latin)", "dob": "1928-08-08", "address": "Seychelles", "label": "negative", "script": "latin" }, { "name": "auguste bonneau (latin)", "dob": "1946-11-17", "address": "Samoa", "label": "negative", "script": "latin" }, { "name": "heliodoro cabrera (latin)", "dob": "1945-12-30", "address": "Bolivia", "label": "High Risk", "script": "latin" }, { "name": "thibault lopez (latin)", "dob": "1992-09-14", "address": "Saint-Marin (Rép. de)", "label": "negative", "script": "latin" }, { "name": "Dmitry Khotimskiy (latin)", "dob": "1973-6-29", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "denis martel (latin)", "dob": "1979-08-16", "address": "Guinée", "label": "negative", "script": "latin" }, { "name": "pauline da costa (latin)", "dob": "1941-06-09", "address": "Sri Lanka", "label": "negative", "script": "latin" }, { "name": "antoinette chrétien (latin)", "dob": "1947-04-27", "address": "Monaco", "label": "High Risk", "script": "latin" }, { "name": "клоя цицков (cyrillic)", "dob": "1925-02-20", "address": "Bulgaria", "label": "High Risk", "script": "cyrillic" }, { "name": "héctor muro (latin)", "dob": "1934-08-17", "address": "Venezuela", "label": "High Risk", "script": "latin" } ], "query_template": "Generate 10 variations of {name} ensuring phonetic similarity (10% Light, 30% Medium, 60% Far) and orthographic similarity (20% Light, 60% Medium, 20% Far). Approximately 59% of the total 10 variations should follow these rule-based transformations: Replace double letters with a single letter, and Swap adjacent consonants. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 10, "phonetic_similarity": { "Light": 0.1, "Medium": 0.3, "Far": 0.6 }, "orthographic_similarity": { "Light": 0.2, "Medium": 0.6, "Far": 0.2 }, "rule_based": { "rule_percentage": 59, "selected_rules": [ "replace_double_letters_with_single_letter", "swap_adjacent_consonants" ], "rule_descriptions": { "replace_double_letters_with_single_letter": "Replace double letters with a single letter", "swap_adjacent_consonants": "Swap adjacent consonants" }, "percentage": 59 } } }, { "seed_identities_with_labels": [ { "name": "natanael estrada (latin)", "dob": "1981-06-02", "address": "Eslovaquia", "label": "negative", "script": "latin" }, { "name": "Кирило Ковальчук (cyrillic)", "dob": "1968-12-22", "address": "Russia", "label": "positive", "script": "cyrillic" }, { "name": "Mohammed Raad (latin)", "dob": "1955-8-28", "address": "Lebanon", "label": "positive", "script": "latin" }, { "name": "pastor arévalo (latin)", "dob": "2002-03-09", "address": "Kuwait", "label": "negative", "script": "latin" }, { "name": "heliodoro cifuentes (latin)", "dob": "1967-01-14", "address": "Mauritania", "label": "negative", "script": "latin" }, { "name": "Aleksey Budnev (latin)", "dob": "1964-7-9", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "نعيم حميضة (arabic)", "dob": "2003-06-08", "address": "Algeria", "label": "High Risk", "script": "arabic" }, { "name": "sheryl fletcher (latin)", "dob": "1975-11-03", "address": "South Africa", "label": "High Risk", "script": "latin" }, { "name": "ileana reig (latin)", "dob": "1977-02-12", "address": "Venezuela", "label": "High Risk", "script": "latin" }, { "name": "amelia ojeda (latin)", "dob": "1984-01-22", "address": "Francia", "label": "negative", "script": "latin" }, { "name": "Chapito ISIDRO (latin)", "dob": "1982-6-19", "address": "Mexico", "label": "positive", "script": "latin" }, { "name": "عرفان کابلی (arabic)", "dob": "1973-10-16", "address": "French Guiana", "label": "negative", "script": "arabic" }, { "name": "suzanne dumas (latin)", "dob": "1946-10-15", "address": "Burkina Faso", "label": "High Risk", "script": "latin" }, { "name": "Yongbok Kim (latin)", "dob": "1957-7-27", "address": "Korea, North", "label": "positive", "script": "latin" }, { "name": "madeleine hervé (latin)", "dob": "1964-03-27", "address": "Burkina Faso", "label": "High Risk", "script": "latin" } ], "query_template": "The following name is the seed name to generate variations for: {name}. Generate 14 variations of the name {name}, ensuring phonetic similarity: 100% Light, and orthographic similarity: 100% Medium, and also include 25% of variations that follow: Additionally, generate variations that: Delete a random letter. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 14, "phonetic_similarity": { "Light": 1.0 }, "orthographic_similarity": { "Medium": 1.0 }, "rule_based": { "rule_percentage": 25, "selected_rules": [ "delete_random_letter" ], "rule_descriptions": { "delete_random_letter": "Delete a random letter" }, "percentage": 25 } } }, { "seed_identities_with_labels": [ { "name": "antoinette bernard (latin)", "dob": "2003-06-09", "address": "Somalie", "label": "negative", "script": "latin" }, { "name": "Dzheykhun Aslanov (latin)", "dob": "1990-1-1", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "Gulbuddin HEKHMARTYAR (latin)", "dob": "1949-8-1", "address": "Iran", "label": "positive", "script": "latin" }, { "name": "Ирина Шойгу (cyrillic)", "dob": "1955-5-31", "address": "Russia", "label": "positive", "script": "cyrillic" }, { "name": "опанас михалюк (cyrillic)", "dob": "1991-05-27", "address": "Guatemala", "label": "negative", "script": "cyrillic" }, { "name": "josette marchal (latin)", "dob": "2003-08-22", "address": "Gabon", "label": "negative", "script": "latin" }, { "name": "dominique navarro (latin)", "dob": "1974-04-23", "address": "Mali", "label": "High Risk", "script": "latin" }, { "name": "бисенти сланинкова (cyrillic)", "dob": "2001-07-10", "address": "Bulgaria", "label": "High Risk", "script": "cyrillic" }, { "name": "jordan chaney (latin)", "dob": "1964-04-24", "address": "British Virgin Islands", "label": "High Risk", "script": "latin" }, { "name": "guillaume bodin (latin)", "dob": "1984-10-01", "address": "Lesotho", "label": "negative", "script": "latin" }, { "name": "james gibson (latin)", "dob": "1928-03-25", "address": "British Virgin Islands", "label": "High Risk", "script": "latin" }, { "name": "Artem Malyshev (latin)", "dob": "1988-2-2", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "christophe weber (latin)", "dob": "1934-05-28", "address": "Moldavie", "label": "negative", "script": "latin" }, { "name": "Mohsen NAFTCHI (latin)", "dob": "1988-2-16", "address": "Iran", "label": "positive", "script": "latin" }, { "name": "débora arellano (latin)", "dob": "1983-11-11", "address": "Nicaragua", "label": "High Risk", "script": "latin" } ], "query_template": "The following name is the seed name to generate variations for: {name}. Generate 12 variations of the name {name}, ensuring phonetic similarity: 50% Light, 50% Medium, and orthographic similarity: 30% Light, 40% Medium, 30% Far, and also include 54% of variations that follow: Additionally, generate variations that perform these transformations: Remove a random consonant, and Swap random adjacent letters. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 12, "phonetic_similarity": { "Light": 0.5, "Medium": 0.5 }, "orthographic_similarity": { "Light": 0.3, "Medium": 0.4, "Far": 0.3 }, "rule_based": { "rule_percentage": 54, "selected_rules": [ "remove_random_consonant", "swap_random_letter" ], "rule_descriptions": { "remove_random_consonant": "Remove a random consonant", "swap_random_letter": "Swap random adjacent letters" }, "percentage": 54 } } }, { "seed_identities_with_labels": [ { "name": "mark martin (latin)", "dob": "1966-07-22", "address": "Marshall Islands", "label": "negative", "script": "latin" }, { "name": "Vildan Zinnurov (latin)", "dob": "1964-1-30", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "dana cook (latin)", "dob": "1988-11-13", "address": "Sao Tome and Principe", "label": "negative", "script": "latin" }, { "name": "ryan snyder (latin)", "dob": "1949-04-20", "address": "Namibia", "label": "High Risk", "script": "latin" }, { "name": "cody walker (latin)", "dob": "2004-06-12", "address": "Serbia", "label": "negative", "script": "latin" }, { "name": "mackenzie baker (latin)", "dob": "1943-07-04", "address": "Guatemala", "label": "negative", "script": "latin" }, { "name": "Hassan Daqou (latin)", "dob": "1985-2-1", "address": "Lebanon", "label": "positive", "script": "latin" }, { "name": "رضي عاملة (arabic)", "dob": "1955-12-19", "address": "Grenada", "label": "negative", "script": "arabic" }, { "name": "Su WEI (latin)", "dob": "1959-12-3", "address": "Hong Kong", "label": "positive", "script": "latin" }, { "name": "كیان مجتبوی (arabic)", "dob": "1982-09-13", "address": "Afghanistan", "label": "High Risk", "script": "arabic" }, { "name": "Юрый Назаранка (cyrillic)", "dob": "1976-4-17", "address": "Belarus", "label": "positive", "script": "cyrillic" }, { "name": "marisol ángel (latin)", "dob": "1936-06-09", "address": "Cuba", "label": "High Risk", "script": "latin" }, { "name": "Alexei Sheshenya (latin)", "dob": "1971-4-16", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "joseph lee (latin)", "dob": "1966-08-27", "address": "Nigeria", "label": "High Risk", "script": "latin" }, { "name": "jérôme regnier (latin)", "dob": "2007-04-15", "address": "Mali", "label": "High Risk", "script": "latin" } ], "query_template": "The following name is the seed name to generate variations for: {name}. Generate 13 variations of the name {name}, ensuring phonetic similarity: 20% Light, 60% Medium, 20% Far, and orthographic similarity: 30% Light, 40% Medium, 30% Far, and also include 41% of variations that follow: Additionally, generate variations that perform these transformations: Use first name initial with last name, Reorder name parts, and Abbreviate name parts. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 13, "phonetic_similarity": { "Light": 0.2, "Medium": 0.6, "Far": 0.2 }, "orthographic_similarity": { "Light": 0.3, "Medium": 0.4, "Far": 0.3 }, "rule_based": { "rule_percentage": 41, "selected_rules": [ "initial_only_first_name", "name_parts_permutations", "shorten_name_to_abbreviations" ], "rule_descriptions": { "initial_only_first_name": "Use first name initial with last name", "name_parts_permutations": "Reorder name parts", "shorten_name_to_abbreviations": "Abbreviate name parts" }, "percentage": 41 } } }, { "seed_identities_with_labels": [ { "name": "Muhsin al-Zibin (latin)", "dob": "1973-7-1", "address": "Kuwait", "label": "positive", "script": "latin" }, { "name": "Rimma Utyasheva (latin)", "dob": "1952-1-3", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "жанна григорьев (cyrillic)", "dob": "2004-07-04", "address": "Ireland", "label": "negative", "script": "cyrillic" }, { "name": "Михаил Кисляков (cyrillic)", "dob": "1975-11-18", "address": "Russia", "label": "positive", "script": "cyrillic" }, { "name": "noémi roussel (latin)", "dob": "2005-06-19", "address": "Burkina Faso", "label": "High Risk", "script": "latin" }, { "name": "brandy bailey (latin)", "dob": "1925-10-19", "address": "Kenya", "label": "High Risk", "script": "latin" }, { "name": "monique phillips (latin)", "dob": "1937-09-13", "address": "Tajikistan", "label": "negative", "script": "latin" }, { "name": "troy rice (latin)", "dob": "1963-03-22", "address": "South Africa", "label": "High Risk", "script": "latin" }, { "name": "Dmitry Perminov (latin)", "dob": "1979-4-3", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "федор зуев (cyrillic)", "dob": "1938-04-10", "address": "Russia", "label": "High Risk", "script": "cyrillic" }, { "name": "elizabeth long (latin)", "dob": "1962-10-28", "address": "Qatar", "label": "negative", "script": "latin" }, { "name": "julien navarro (latin)", "dob": "1960-05-06", "address": "Cameroon", "label": "High Risk", "script": "latin" }, { "name": "teresa jimenez (latin)", "dob": "1953-02-20", "address": "Nauru", "label": "negative", "script": "latin" }, { "name": "Sergey Perminov (latin)", "dob": "1968-9-16", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "antonio ellis (latin)", "dob": "1985-10-17", "address": "Nauru", "label": "negative", "script": "latin" } ], "query_template": "Generate exactly 7 execution vectors (name variations) for {name}, ensuring phonetic similarity (10% Light, 50% Medium, 40% Far) and orthographic similarity (20% Light, 60% Medium, 20% Far). Approximately 52% of the total 7 variations should follow these rule-based transformations: Additionally, generate variations that: Convert {name} to its initials. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 7, "phonetic_similarity": { "Light": 0.1, "Medium": 0.5, "Far": 0.4 }, "orthographic_similarity": { "Light": 0.2, "Medium": 0.6, "Far": 0.2 }, "rule_based": { "rule_percentage": 52, "selected_rules": [ "shorten_name_to_initials" ], "rule_descriptions": { "shorten_name_to_initials": "Convert name to initials" }, "percentage": 52 } } }, { "seed_identities_with_labels": [ { "name": "simão correia (latin)", "dob": "1954-03-05", "address": "Butão", "label": "negative", "script": "latin" }, { "name": "daniela santos (latin)", "dob": "1963-01-21", "address": "Angola", "label": "High Risk", "script": "latin" }, { "name": "ريفال جذام (arabic)", "dob": "1991-11-20", "address": "Bermuda", "label": "negative", "script": "arabic" }, { "name": "Yevgeny KHODOTOV (latin)", "dob": "1964-3-21", "address": "Central African Republic", "label": "positive", "script": "latin" }, { "name": "нелида топчийски (cyrillic)", "dob": "1946-08-16", "address": "Bulgaria", "label": "High Risk", "script": "cyrillic" }, { "name": "Николай Коломейцев (cyrillic)", "dob": "1956-9-1", "address": "Russia", "label": "positive", "script": "cyrillic" }, { "name": "isabelle perrier (latin)", "dob": "1928-07-04", "address": "Monaco", "label": "High Risk", "script": "latin" }, { "name": "Umm Layth (latin)", "dob": "1994-5-11", "address": "Syria", "label": "positive", "script": "latin" }, { "name": "tatiana costa (latin)", "dob": "1978-10-06", "address": "Comores", "label": "negative", "script": "latin" }, { "name": "Ismatullah Khalozai (latin)", "dob": "1995-1-1", "address": "Afghanistan", "label": "positive", "script": "latin" }, { "name": "teo torrens (latin)", "dob": "1959-02-07", "address": "Bolivia", "label": "High Risk", "script": "latin" }, { "name": "anthony hill (latin)", "dob": "1994-08-20", "address": "South Africa", "label": "High Risk", "script": "latin" }, { "name": "Andrey Sapelin (latin)", "dob": "1965-9-16", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "noah moreira (latin)", "dob": "2005-12-21", "address": "Congo-Kinshasa", "label": "negative", "script": "latin" }, { "name": "margarida rocha (latin)", "dob": "1959-07-15", "address": "Canadá", "label": "negative", "script": "latin" } ], "query_template": "Generate exactly 6 execution vectors (name variations) for each target identity {name}. ensuring phonetic similarity (20% Light, 60% Medium, 20% Far) and orthographic similarity (100% Medium). Approximately 41% of the total 6 variations should follow these rule-based transformations: Additionally, generate variations that: Remove a random consonant. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 6, "phonetic_similarity": { "Light": 0.2, "Medium": 0.6, "Far": 0.2 }, "orthographic_similarity": { "Medium": 1.0 }, "rule_based": { "rule_percentage": 41, "selected_rules": [ "remove_random_consonant" ], "rule_descriptions": { "remove_random_consonant": "Remove a random consonant" }, "percentage": 41 } } }, { "seed_identities_with_labels": [ { "name": "rémy lopes (latin)", "dob": "1942-03-10", "address": "Koweit", "label": "negative", "script": "latin" }, { "name": "зоя кудряшова (cyrillic)", "dob": "2003-12-22", "address": "Donetsk", "label": "High Risk", "script": "cyrillic" }, { "name": "nathalie leleu (latin)", "dob": "1981-03-16", "address": "Central African Republic", "label": "High Risk", "script": "latin" }, { "name": "борис фокин (cyrillic)", "dob": "1945-06-30", "address": "Cyprus", "label": "negative", "script": "cyrillic" }, { "name": "richard le goff (latin)", "dob": "1955-07-20", "address": "Côte d'Ivoire", "label": "High Risk", "script": "latin" }, { "name": "paulette pasquier (latin)", "dob": "1928-01-19", "address": "République tchèque", "label": "negative", "script": "latin" }, { "name": "sebastião gaspar (latin)", "dob": "1969-07-22", "address": "Mozambique", "label": "High Risk", "script": "latin" }, { "name": "Irina MELNIK (latin)", "dob": "1970-7-30", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "Gholamreza Ardakani (latin)", "dob": "1992-8-20", "address": "Iran", "label": "positive", "script": "latin" }, { "name": "michelle bourdon (latin)", "dob": "1978-10-22", "address": "République centrafricaine", "label": "negative", "script": "latin" }, { "name": "adèle joubert (latin)", "dob": "1977-12-10", "address": "Inde", "label": "negative", "script": "latin" }, { "name": "denis lacombe (latin)", "dob": "1953-04-25", "address": "Cameroon", "label": "High Risk", "script": "latin" }, { "name": "Андрей Бунаков (cyrillic)", "dob": "1971-7-5", "address": "Belarus", "label": "positive", "script": "cyrillic" }, { "name": "Aegli Tamani-Phella (latin)", "dob": "1978-6-21", "address": "Cyprus", "label": "positive", "script": "latin" }, { "name": "Igor STRELKOV (latin)", "dob": "1970-12-17", "address": "Russia", "label": "positive", "script": "latin" } ], "query_template": "The following name is the seed name to generate variations for: {name}. Generate 8 variations of the name {name}, ensuring phonetic similarity: 20% Light, 60% Medium, 20% Far, and orthographic similarity: 30% Light, 40% Medium, 30% Far, and also include 20% of variations that follow: Additionally, generate variations that: Insert a random letter. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 8, "phonetic_similarity": { "Light": 0.2, "Medium": 0.6, "Far": 0.2 }, "orthographic_similarity": { "Light": 0.3, "Medium": 0.4, "Far": 0.3 }, "rule_based": { "rule_percentage": 20, "selected_rules": [ "insert_random_letter" ], "rule_descriptions": { "insert_random_letter": "Insert a random letter" }, "percentage": 20 } } }, { "seed_identities_with_labels": [ { "name": "stéphanie olivier (latin)", "dob": "1957-10-30", "address": "Haiti", "label": "High Risk", "script": "latin" }, { "name": "Rida Gull (latin)", "dob": "1981-12-25", "address": "Pakistan", "label": "positive", "script": "latin" }, { "name": "rúben araújo (latin)", "dob": "1992-09-28", "address": "Mozambique", "label": "High Risk", "script": "latin" }, { "name": "Oleksandr Melnychuk (latin)", "dob": "1965-1-17", "address": "Ukraine", "label": "positive", "script": "latin" }, { "name": "théophile vidal (latin)", "dob": "2004-08-28", "address": "Côte d'Ivoire", "label": "High Risk", "script": "latin" }, { "name": "رمزي مراد (arabic)", "dob": "1940-02-01", "address": "Hungary", "label": "negative", "script": "arabic" }, { "name": "vicki peterson (latin)", "dob": "1983-05-29", "address": "Kenya", "label": "High Risk", "script": "latin" }, { "name": "Sergey Kravtsov (latin)", "dob": "1974-3-17", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "عبد الباقي زهران (arabic)", "dob": "1928-07-29", "address": "Syria", "label": "High Risk", "script": "arabic" }, { "name": "simão batista (latin)", "dob": "1987-10-03", "address": "Países Baixos", "label": "negative", "script": "latin" }, { "name": "jéssica macedo (latin)", "dob": "2004-01-13", "address": "Ilhas dos Cocos", "label": "negative", "script": "latin" }, { "name": "Александр Усс (cyrillic)", "dob": "1954-11-3", "address": "Russia", "label": "positive", "script": "cyrillic" }, { "name": "Aleksander Lukashenko (latin)", "dob": "1954-8-31", "address": "Belarus", "label": "positive", "script": "latin" }, { "name": "matilde moura (latin)", "dob": "1962-10-21", "address": "Clipperton Island", "label": "negative", "script": "latin" }, { "name": "rafaela costa (latin)", "dob": "1957-12-14", "address": "Butão", "label": "negative", "script": "latin" } ], "query_template": "Generate 8 execution vectors for each target identity \"{name}\", ensuring phonetic similarity (100% Medium) and orthographic similarity (30% Light, 40% Medium, 30% Far). Approximately 25% of the total 8 variations should follow these rule-based transformations: Additionally, generate variations that perform these transformations: Replace spaces with special characters, and Swap adjacent consonants. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 8, "phonetic_similarity": { "Medium": 1.0 }, "orthographic_similarity": { "Light": 0.3, "Medium": 0.4, "Far": 0.3 }, "rule_based": { "rule_percentage": 25, "selected_rules": [ "replace_spaces_with_random_special_characters", "swap_adjacent_consonants" ], "rule_descriptions": { "replace_spaces_with_random_special_characters": "Replace spaces with special characters", "swap_adjacent_consonants": "Swap adjacent consonants" }, "percentage": 25 } } }, { "seed_identities_with_labels": [ { "name": "Aleksey MOZGOVOY (latin)", "dob": "1975-4-3", "address": "Ukraine", "label": "positive", "script": "latin" }, { "name": "Cholung Choe (latin)", "dob": "1973-5-16", "address": "Korea, North", "label": "positive", "script": "latin" }, { "name": "claude carpentier (latin)", "dob": "1935-03-29", "address": "Palau", "label": "negative", "script": "latin" }, { "name": "СЯРГЕЙ ЧАРГЕЙКА (cyrillic)", "dob": "1986-8-27", "address": "Belarus", "label": "positive", "script": "cyrillic" }, { "name": "aurélie imbert (latin)", "dob": "1998-01-07", "address": "Central African Republic", "label": "High Risk", "script": "latin" }, { "name": "جرير الشايع (arabic)", "dob": "1956-10-11", "address": "Hungary", "label": "negative", "script": "arabic" }, { "name": "ramona riera (latin)", "dob": "1940-11-12", "address": "Venezuela", "label": "High Risk", "script": "latin" }, { "name": "'Ali Sharara (latin)", "dob": "1968-9-25", "address": "Lebanon", "label": "positive", "script": "latin" }, { "name": "Mokhtar BUSHUSHA (latin)", "dob": "1969-10-13", "address": "Italy", "label": "positive", "script": "latin" }, { "name": "constance gillet (latin)", "dob": "1995-08-16", "address": "Allemagne", "label": "negative", "script": "latin" }, { "name": "marc meyer (latin)", "dob": "1976-07-01", "address": "Brésil", "label": "negative", "script": "latin" }, { "name": "наум попов (cyrillic)", "dob": "1983-09-07", "address": "Luhansk", "label": "High Risk", "script": "cyrillic" }, { "name": "isabelle lecoq (latin)", "dob": "1927-09-03", "address": "Democratic Republic of Congo", "label": "High Risk", "script": "latin" }, { "name": "denise loiseau (latin)", "dob": "1975-10-21", "address": "Malte", "label": "negative", "script": "latin" }, { "name": "maggie morvan (latin)", "dob": "1946-10-20", "address": "Mali", "label": "High Risk", "script": "latin" } ], "query_template": "Generate 9 variations of {name}, ensuring phonetic similarity (20% Light, 60% Medium, 20% Far) and orthographic similarity (20% Light, 60% Medium, 20% Far). Approximately 17% of the total 9 variations should follow these rule-based transformations: Additionally, generate variations that perform these transformations: Abbreviate name parts, and Replace spaces with special characters. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 9, "phonetic_similarity": { "Light": 0.2, "Medium": 0.6, "Far": 0.2 }, "orthographic_similarity": { "Light": 0.2, "Medium": 0.6, "Far": 0.2 }, "rule_based": { "rule_percentage": 17, "selected_rules": [ "shorten_name_to_abbreviations", "replace_spaces_with_random_special_characters" ], "rule_descriptions": { "shorten_name_to_abbreviations": "Abbreviate name parts", "replace_spaces_with_random_special_characters": "Replace spaces with special characters" }, "percentage": 17 } } }, { "seed_identities_with_labels": [ { "name": "Johnson Byabashaija (latin)", "dob": "1957-9-27", "address": "Uganda", "label": "positive", "script": "latin" }, { "name": "adrienne rocher (latin)", "dob": "1976-12-18", "address": "Mali", "label": "High Risk", "script": "latin" }, { "name": "maría fernanda losa (latin)", "dob": "1976-05-30", "address": "Venezuela", "label": "High Risk", "script": "latin" }, { "name": "Дмитрий Лебедев (cyrillic)", "dob": "1968-3-30", "address": "Russia", "label": "positive", "script": "cyrillic" }, { "name": "alex pereira (latin)", "dob": "1982-01-09", "address": "Central African Republic", "label": "High Risk", "script": "latin" }, { "name": "bradley lewis (latin)", "dob": "1934-09-23", "address": "Bahamas", "label": "negative", "script": "latin" }, { "name": "фимка градинарова (cyrillic)", "dob": "1951-10-15", "address": "Botswana", "label": "negative", "script": "cyrillic" }, { "name": "In O (latin)", "dob": "1969-7-3", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "харлампий ермаков (cyrillic)", "dob": "1973-02-19", "address": "Armenia", "label": "High Risk", "script": "cyrillic" }, { "name": "Imre Laszloczki (latin)", "dob": "1961-9-26", "address": "Hungary", "label": "positive", "script": "latin" }, { "name": "Lyudmila Zaitseva (latin)", "dob": "1979-7-25", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "stephen campbell (latin)", "dob": "1937-12-02", "address": "Zimbabwe", "label": "negative", "script": "latin" }, { "name": "joshua wiggins (latin)", "dob": "1955-11-15", "address": "Kenya", "label": "High Risk", "script": "latin" }, { "name": "daniel bennett (latin)", "dob": "1931-08-14", "address": "Tokelau", "label": "negative", "script": "latin" }, { "name": "kristin ramirez (latin)", "dob": "1929-04-02", "address": "Fiji", "label": "negative", "script": "latin" } ], "query_template": "Generate 10 variations of {name}, ensuring phonetic similarity (10% Light, 30% Medium, 60% Far) and orthographic similarity (50% Light, 50% Medium). Approximately 13% of the total 10 variations should follow these rule-based transformations: \nDuplicate a random letter, \nRemove a random vowel, \nand Add a title suffix (Jr., PhD, etc.). The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 10, "phonetic_similarity": { "Light": 0.1, "Medium": 0.3, "Far": 0.6 }, "orthographic_similarity": { "Light": 0.5, "Medium": 0.5 }, "rule_based": { "rule_percentage": 13, "selected_rules": [ "duplicate_random_letter_as_double_letter", "remove_random_vowel", "add_random_trailing_title" ], "rule_descriptions": { "duplicate_random_letter_as_double_letter": "Duplicate a random letter", "remove_random_vowel": "Remove a random vowel", "add_random_trailing_title": "Add a title suffix (Jr., PhD, etc.)" }, "percentage": 13 } } }, { "seed_identities_with_labels": [ { "name": "raymond rousseau (latin)", "dob": "1975-07-20", "address": "Égypte", "label": "negative", "script": "latin" }, { "name": "Yuri Nabokov (latin)", "dob": "1981-1-1", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "élise da silva (latin)", "dob": "1959-03-16", "address": "Italie", "label": "negative", "script": "latin" }, { "name": "伟 张 (chinese)", "dob": "1977-6-16", "address": "China", "label": "positive", "script": "chinese" }, { "name": "victor mccarthy (latin)", "dob": "1941-05-05", "address": "Nigeria", "label": "High Risk", "script": "latin" }, { "name": "dinis maia (latin)", "dob": "2000-08-22", "address": "Angola", "label": "High Risk", "script": "latin" }, { "name": "георгий степанов (cyrillic)", "dob": "2007-01-02", "address": "Crimea", "label": "High Risk", "script": "cyrillic" }, { "name": "Nikolay Kosov (latin)", "dob": "1955-6-30", "address": "Hungary", "label": "positive", "script": "latin" }, { "name": "امیررضا الوندی (arabic)", "dob": "1948-03-28", "address": "Hong Kong", "label": "negative", "script": "arabic" }, { "name": "luc perrot (latin)", "dob": "1986-08-08", "address": "Russie", "label": "negative", "script": "latin" }, { "name": "Zajar KALASHOV (latin)", "dob": "1953-3-20", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "adèle lemaire (latin)", "dob": "1966-07-12", "address": "Mali", "label": "High Risk", "script": "latin" }, { "name": "Gulbuddin HEKHMARTYAR (latin)", "dob": "1949-8-1", "address": "Iran", "label": "positive", "script": "latin" }, { "name": "alix langlois (latin)", "dob": "1974-09-20", "address": "Chypre", "label": "negative", "script": "latin" }, { "name": "laurence aubert (latin)", "dob": "1955-11-28", "address": "Côte d'Ivoire", "label": "High Risk", "script": "latin" } ], "query_template": "The following name is the seed name to generate variations for: {name}. Generate 13 variations of the name {name}, ensuring phonetic similarity: 30% Light, 40% Medium, 30% Far, and orthographic similarity: 50% Light, 50% Medium, and also include 25% of variations that follow: Additionally, generate variations that perform these transformations: Abbreviate name parts, Replace random vowels with different vowels, and Remove a random vowel. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 13, "phonetic_similarity": { "Light": 0.3, "Medium": 0.4, "Far": 0.3 }, "orthographic_similarity": { "Light": 0.5, "Medium": 0.5 }, "rule_based": { "rule_percentage": 25, "selected_rules": [ "shorten_name_to_abbreviations", "replace_random_vowel_with_random_vowel", "remove_random_vowel" ], "rule_descriptions": { "shorten_name_to_abbreviations": "Abbreviate name parts", "replace_random_vowel_with_random_vowel": "Replace random vowels with different vowels", "remove_random_vowel": "Remove a random vowel" }, "percentage": 25 } } }, { "seed_identities_with_labels": [ { "name": "مصباح شمران (arabic)", "dob": "1948-06-30", "address": "United Kingdom", "label": "negative", "script": "arabic" }, { "name": "tatiana ariño (latin)", "dob": "1937-08-31", "address": "Eslovenia", "label": "negative", "script": "latin" }, { "name": "Анатоль Лапо (cyrillic)", "dob": "1963-5-24", "address": "Belarus", "label": "positive", "script": "cyrillic" }, { "name": "lisandro cunha (latin)", "dob": "1947-12-08", "address": "Angola", "label": "High Risk", "script": "latin" }, { "name": "Dmitry Perminov (latin)", "dob": "1979-4-3", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "lucía paniagua (latin)", "dob": "2006-06-05", "address": "Chad", "label": "negative", "script": "latin" }, { "name": "fito carranza (latin)", "dob": "1937-04-11", "address": "Cuba", "label": "High Risk", "script": "latin" }, { "name": "imelda quiroga (latin)", "dob": "1935-08-14", "address": "Togo", "label": "negative", "script": "latin" }, { "name": "Omid ABEDSHAHI (latin)", "dob": "1983-1-21", "address": "Iran", "label": "positive", "script": "latin" }, { "name": "delfina agustí (latin)", "dob": "1983-08-31", "address": "Micronesia", "label": "negative", "script": "latin" }, { "name": "madeleine legrand (latin)", "dob": "1973-04-04", "address": "Mali", "label": "High Risk", "script": "latin" }, { "name": "аксидан монтянов (cyrillic)", "dob": "1973-07-13", "address": "Bulgaria", "label": "High Risk", "script": "cyrillic" }, { "name": "Waad Qado (latin)", "dob": "1971-1-1", "address": "Iraq", "label": "positive", "script": "latin" }, { "name": "brigitte roche (latin)", "dob": "1934-08-22", "address": "Burkina Faso", "label": "High Risk", "script": "latin" }, { "name": "Alexey Sukhodolov (latin)", "dob": "1974-4-19", "address": "Russia", "label": "positive", "script": "latin" } ], "query_template": "Generate 8 variations of {name} ensuring phonetic similarity (20% Light, 60% Medium, 20% Far) and orthographic similarity (30% Light, 40% Medium, 30% Far). Approximately 24% of the total 8 variations should follow these rule-based transformations: Additionally, generate variations that perform these transformations: Swap random adjacent letters, and Reorder name parts. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 8, "phonetic_similarity": { "Light": 0.2, "Medium": 0.6, "Far": 0.2 }, "orthographic_similarity": { "Light": 0.3, "Medium": 0.4, "Far": 0.3 }, "rule_based": { "rule_percentage": 24, "selected_rules": [ "swap_random_letter", "name_parts_permutations" ], "rule_descriptions": { "swap_random_letter": "Swap random adjacent letters", "name_parts_permutations": "Reorder name parts" }, "percentage": 24 } } }, { "seed_identities_with_labels": [ { "name": "Aleksandr Sokolov (latin)", "dob": "1970-8-4", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "margot baudry (latin)", "dob": "1989-01-26", "address": "Pérou", "label": "negative", "script": "latin" }, { "name": "Oleg Romanenko (latin)", "dob": "1963-10-5", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "anouk hardy (latin)", "dob": "1931-02-26", "address": "Belize", "label": "negative", "script": "latin" }, { "name": "милиана пондьов (cyrillic)", "dob": "1941-12-28", "address": "Nauru", "label": "negative", "script": "cyrillic" }, { "name": "Валерий Коровин (cyrillic)", "dob": "1977-5-31", "address": "Russia", "label": "positive", "script": "cyrillic" }, { "name": "stéphane vidal (latin)", "dob": "1985-12-07", "address": "Bulgarie", "label": "negative", "script": "latin" }, { "name": "Alexey Russkikh (latin)", "dob": "1968-7-17", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "david rowe (latin)", "dob": "1948-09-01", "address": "South Africa", "label": "High Risk", "script": "latin" }, { "name": "monica bailey (latin)", "dob": "1931-10-03", "address": "Kenya", "label": "High Risk", "script": "latin" }, { "name": "eugène hamel (latin)", "dob": "2000-09-06", "address": "Benin", "label": "negative", "script": "latin" }, { "name": "ظهير بنو كنز (arabic)", "dob": "1995-01-07", "address": "Libya", "label": "High Risk", "script": "arabic" }, { "name": "Ibrahim Jathran (latin)", "dob": "1982-10-29", "address": "Libya", "label": "positive", "script": "latin" }, { "name": "christine navarro (latin)", "dob": "2005-09-05", "address": "Monaco", "label": "High Risk", "script": "latin" }, { "name": "paula frank (latin)", "dob": "1985-12-06", "address": "British Virgin Islands", "label": "High Risk", "script": "latin" } ], "query_template": "The following name is the seed name to generate variations for: {name}. Generate 11 variations of the name {name}, ensuring phonetic similarity: 10% Light, 30% Medium, 60% Far, and orthographic similarity: 10% Light, 50% Medium, 40% Far, and also include 33% of variations that follow: Additionally, generate variations that perform these transformations: Remove a random vowel, and Replace random vowels with different vowels. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 11, "phonetic_similarity": { "Light": 0.1, "Medium": 0.3, "Far": 0.6 }, "orthographic_similarity": { "Light": 0.1, "Medium": 0.5, "Far": 0.4 }, "rule_based": { "rule_percentage": 33, "selected_rules": [ "remove_random_vowel", "replace_random_vowel_with_random_vowel" ], "rule_descriptions": { "remove_random_vowel": "Remove a random vowel", "replace_random_vowel_with_random_vowel": "Replace random vowels with different vowels" }, "percentage": 33 } } }, { "seed_identities_with_labels": [ { "name": "severiano jordán (latin)", "dob": "1951-08-17", "address": "Nicaragua", "label": "High Risk", "script": "latin" }, { "name": "Katerina Pawlowska (latin)", "dob": "1977-3-28", "address": "United Arab Emirates", "label": "positive", "script": "latin" }, { "name": "راما عاملة (arabic)", "dob": "1981-01-18", "address": "Lebanon", "label": "High Risk", "script": "arabic" }, { "name": "Сергей Веремеенко (cyrillic)", "dob": "1955-9-26", "address": "Russia", "label": "positive", "script": "cyrillic" }, { "name": "louise boulay (latin)", "dob": "1950-08-18", "address": "Bulgarie", "label": "negative", "script": "latin" }, { "name": "eugène rivière (latin)", "dob": "2004-07-28", "address": "Ouganda", "label": "negative", "script": "latin" }, { "name": "Irina Shoygu (latin)", "dob": "1955-5-31", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "бисер колев (cyrillic)", "dob": "1967-07-22", "address": "Sierra Leone", "label": "negative", "script": "cyrillic" }, { "name": "denis dos santos (latin)", "dob": "1970-11-11", "address": "Gabon", "label": "negative", "script": "latin" }, { "name": "Yaqub Al-Rashidi (latin)", "dob": "1964-3-22", "address": "Pakistan", "label": "positive", "script": "latin" }, { "name": "margaret paul (latin)", "dob": "1983-04-28", "address": "Mali", "label": "High Risk", "script": "latin" }, { "name": "éric faure (latin)", "dob": "1987-12-12", "address": "Democratic Republic of Congo", "label": "High Risk", "script": "latin" }, { "name": "Apinya CHANTRAPRAPAPORN (latin)", "dob": "1955-1-14", "address": "Thailand", "label": "positive", "script": "latin" }, { "name": "corinne briand (latin)", "dob": "1976-09-09", "address": "Bulgarie", "label": "negative", "script": "latin" }, { "name": "victoria jones (latin)", "dob": "1981-07-21", "address": "South Sudan", "label": "High Risk", "script": "latin" } ], "query_template": "The following name is the seed name to generate variations for: {name}. Generate 11 variations of the name {name}, ensuring phonetic similarity: 70% Light, 30% Medium, and orthographic similarity: 50% Light, 50% Medium, and also include 21% of variations that follow: Additionally, generate variations that perform these transformations: Remove all spaces, and Reorder name parts. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 11, "phonetic_similarity": { "Light": 0.7, "Medium": 0.3 }, "orthographic_similarity": { "Light": 0.5, "Medium": 0.5 }, "rule_based": { "rule_percentage": 21, "selected_rules": [ "remove_all_spaces", "name_parts_permutations" ], "rule_descriptions": { "remove_all_spaces": "Remove all spaces", "name_parts_permutations": "Reorder name parts" }, "percentage": 21 } } }, { "seed_identities_with_labels": [ { "name": "بيلسان بدر (arabic)", "dob": "1935-12-24", "address": "Poland", "label": "negative", "script": "arabic" }, { "name": "françois dupuis (latin)", "dob": "2001-05-12", "address": "Burkina Faso", "label": "High Risk", "script": "latin" }, { "name": "andré roy (latin)", "dob": "1937-02-13", "address": "Kiribati", "label": "negative", "script": "latin" }, { "name": "jeanne martel (latin)", "dob": "1927-09-24", "address": "Democratic Republic of Congo", "label": "High Risk", "script": "latin" }, { "name": "élodie cohen (latin)", "dob": "1949-03-19", "address": "Montserrat", "label": "negative", "script": "latin" }, { "name": "صلاح أشجع (arabic)", "dob": "1974-03-12", "address": "Sudan", "label": "High Risk", "script": "arabic" }, { "name": "Mohammad Ansari (latin)", "dob": "1975-11-22", "address": "Syria", "label": "positive", "script": "latin" }, { "name": "tomás cifuentes (latin)", "dob": "1947-03-28", "address": "Venezuela", "label": "High Risk", "script": "latin" }, { "name": "gabriel leon (latin)", "dob": "1931-06-09", "address": "South Africa", "label": "High Risk", "script": "latin" }, { "name": "Peter Mbaga (latin)", "dob": "1976-9-25", "address": "South Africa", "label": "positive", "script": "latin" }, { "name": "martin salmon (latin)", "dob": "1956-07-29", "address": "Érythrée", "label": "negative", "script": "latin" }, { "name": "nicolas bègue (latin)", "dob": "1954-12-18", "address": "Îles Mineures Éloignées des États-Unis", "label": "negative", "script": "latin" }, { "name": "حسين عطية (arabic)", "dob": "1965-12-19", "address": "Lebanon", "label": "positive", "script": "arabic" }, { "name": "Vladimir Uyba (latin)", "dob": "1958-10-4", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "Muhammad Adhiguna (latin)", "dob": "1996-7-30", "address": "Turkey", "label": "positive", "script": "latin" } ], "query_template": "Generate 13 execution vectors for {name} with approximately 55% of the total variations following these rule-based transformations: \nDuplicate a random letter, Swap adjacent consonants, and Delete a random letter. Ensure phonetic similarity (70% Light variations using Soundex algorithm, 30% Medium variations using Metaphone) and orthographic similarity (10% Light variations with single character substitution, 50% Medium variations with double character substitution, 40% Far variations with multiple character insertion or deletion). The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 13, "phonetic_similarity": { "Light": 0.7, "Medium": 0.3 }, "orthographic_similarity": { "Light": 0.1, "Medium": 0.5, "Far": 0.4 }, "rule_based": { "rule_percentage": 55, "selected_rules": [ "duplicate_random_letter_as_double_letter", "swap_adjacent_consonants", "delete_random_letter" ], "rule_descriptions": { "duplicate_random_letter_as_double_letter": "Duplicate a random letter", "swap_adjacent_consonants": "Swap adjacent consonants", "delete_random_letter": "Delete a random letter" }, "percentage": 55 } } }, { "seed_identities_with_labels": [ { "name": "Saad AL-FAGIH (latin)", "dob": "1957-1-31", "address": "United Kingdom", "label": "positive", "script": "latin" }, { "name": "chuy briones (latin)", "dob": "1972-05-06", "address": "Uzbekistán", "label": "negative", "script": "latin" }, { "name": "maría dolores blanco (latin)", "dob": "1942-06-14", "address": "Eritrea", "label": "negative", "script": "latin" }, { "name": "zacarías valera (latin)", "dob": "1981-06-15", "address": "Bolivia", "label": "High Risk", "script": "latin" }, { "name": "اعتكاف تغلب بن وائل (arabic)", "dob": "1951-08-26", "address": "Hungary", "label": "negative", "script": "arabic" }, { "name": "nico porcel (latin)", "dob": "1928-05-23", "address": "Palau", "label": "negative", "script": "latin" }, { "name": "rebeca castillo (latin)", "dob": "1941-07-22", "address": "Bolivia", "label": "High Risk", "script": "latin" }, { "name": "ágata palomo (latin)", "dob": "1979-06-25", "address": "Nicaragua", "label": "High Risk", "script": "latin" }, { "name": "Muhammad Al-Nabi (latin)", "dob": "1952-1-1", "address": "Yemen", "label": "positive", "script": "latin" }, { "name": "леонид аксенов (cyrillic)", "dob": "1996-05-06", "address": "Donetsk", "label": "High Risk", "script": "cyrillic" }, { "name": "Francois Okunji (latin)", "dob": "1949-7-10", "address": "Congo, Democratic Republic of the", "label": "positive", "script": "latin" }, { "name": "serafina saavedra (latin)", "dob": "1997-04-24", "address": "Costa Rica", "label": "negative", "script": "latin" }, { "name": "joel cardoso (latin)", "dob": "1975-06-21", "address": "Mozambique", "label": "High Risk", "script": "latin" }, { "name": "Михаил Авдеев (cyrillic)", "dob": "1977-3-6", "address": "Russia", "label": "positive", "script": "cyrillic" }, { "name": "Muhammad Al-Sunaydar (latin)", "dob": "1987-3-5", "address": "Yemen", "label": "positive", "script": "latin" } ], "query_template": "Generate 14 name variations of {name}, ensuring phonetic similarity (10% Light, 30% Medium, 60% Far) and orthographic similarity (30% Light, 40% Medium, 30% Far). Approximately 34% of the total 14 variations should follow these rule-based transformations: Replace spaces with special characters, Replace double letters with a single letter, and Delete a random letter. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 14, "phonetic_similarity": { "Light": 0.1, "Medium": 0.3, "Far": 0.6 }, "orthographic_similarity": { "Light": 0.3, "Medium": 0.4, "Far": 0.3 }, "rule_based": { "rule_percentage": 34, "selected_rules": [ "replace_spaces_with_random_special_characters", "replace_double_letters_with_single_letter", "delete_random_letter" ], "rule_descriptions": { "replace_spaces_with_random_special_characters": "Replace spaces with special characters", "replace_double_letters_with_single_letter": "Replace double letters with a single letter", "delete_random_letter": "Delete a random letter" }, "percentage": 34 } } }, { "seed_identities_with_labels": [ { "name": "Abu Salih (latin)", "dob": "1983-12-13", "address": "Syria", "label": "positive", "script": "latin" }, { "name": "frédéric delattre (latin)", "dob": "1972-04-13", "address": "Mali", "label": "High Risk", "script": "latin" }, { "name": "donald davis (latin)", "dob": "1946-12-30", "address": "Zimbabwe", "label": "negative", "script": "latin" }, { "name": "odette dumas (latin)", "dob": "1988-05-27", "address": "Burkina Faso", "label": "High Risk", "script": "latin" }, { "name": "نرگس پویان (arabic)", "dob": "1964-10-14", "address": "Bahrain", "label": "negative", "script": "arabic" }, { "name": "molly edwards (latin)", "dob": "1954-01-31", "address": "Mayotte", "label": "negative", "script": "latin" }, { "name": "Aleksander Lukashenko (latin)", "dob": "1954-8-31", "address": "Belarus", "label": "positive", "script": "latin" }, { "name": "candace green (latin)", "dob": "1946-03-27", "address": "Belgium", "label": "negative", "script": "latin" }, { "name": "ângelo lima (latin)", "dob": "1960-11-01", "address": "Mozambique", "label": "High Risk", "script": "latin" }, { "name": "عبد القادر العقيدات (arabic)", "dob": "2003-08-18", "address": "Iraq", "label": "High Risk", "script": "arabic" }, { "name": "Dmitry Pleshevskiy (latin)", "dob": "1992-7-30", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "Вячеслав Фомичёв (cyrillic)", "dob": "1965-4-26", "address": "Russia", "label": "positive", "script": "cyrillic" }, { "name": "tammie fitzgerald (latin)", "dob": "2007-03-30", "address": "Rwanda", "label": "negative", "script": "latin" }, { "name": "Iurii Hotsaniuk (latin)", "dob": "1966-7-18", "address": "Ukraine", "label": "positive", "script": "latin" }, { "name": "christopher shaw (latin)", "dob": "2001-04-25", "address": "Nigeria", "label": "High Risk", "script": "latin" } ], "query_template": "Generate 13 variations of {name}, ensuring phonetic similarity (70% Light, 30% Medium) and orthographic similarity (50% Light, 50% Medium). Approximately 34% of the total 13 variations should follow these rule-based transformations: \nAdditionally, generate variations that: Convert name to initials. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 13, "phonetic_similarity": { "Light": 0.7, "Medium": 0.3 }, "orthographic_similarity": { "Light": 0.5, "Medium": 0.5 }, "rule_based": { "rule_percentage": 34, "selected_rules": [ "shorten_name_to_initials" ], "rule_descriptions": { "shorten_name_to_initials": "Convert name to initials" }, "percentage": 34 } } }, { "seed_identities_with_labels": [ { "name": "Oleksiy MOZHOVY (latin)", "dob": "1975-4-3", "address": "Ukraine", "label": "positive", "script": "latin" }, { "name": "rúben soares (latin)", "dob": "1995-04-10", "address": "Geórgia do Sul e Sandwich do Sul", "label": "negative", "script": "latin" }, { "name": "baltasar aguilera (latin)", "dob": "1935-03-19", "address": "Nicaragua", "label": "High Risk", "script": "latin" }, { "name": "jéssica fernandes (latin)", "dob": "1956-07-28", "address": "Angola", "label": "High Risk", "script": "latin" }, { "name": "amor huerta (latin)", "dob": "1995-03-15", "address": "Cuba", "label": "High Risk", "script": "latin" }, { "name": "Jinhua Qin (latin)", "dob": "1981-11-16", "address": "China", "label": "positive", "script": "latin" }, { "name": "josé freitas (latin)", "dob": "1946-10-16", "address": "Equador", "label": "negative", "script": "latin" }, { "name": "gonçalo lopes (latin)", "dob": "1995-04-25", "address": "Burúndi", "label": "negative", "script": "latin" }, { "name": "Клим Комаров (cyrillic)", "dob": "1996-6-21", "address": "Russia", "label": "positive", "script": "cyrillic" }, { "name": "иванна герасимов (cyrillic)", "dob": "1940-08-19", "address": "Panama", "label": "negative", "script": "cyrillic" }, { "name": "Gennadii Kudriavtsev (latin)", "dob": "1947-8-20", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "Yuriy Gudilin (latin)", "dob": "1983-6-18", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "мефодий морозова (cyrillic)", "dob": "1991-02-12", "address": "Armenia", "label": "High Risk", "script": "cyrillic" }, { "name": "emmanuel jacques (latin)", "dob": "1936-05-10", "address": "Haiti", "label": "High Risk", "script": "latin" }, { "name": "vasco jesus (latin)", "dob": "1937-07-26", "address": "Rússia", "label": "negative", "script": "latin" } ], "query_template": "Generate exactly 6 execution vectors (name variations) for each target identity {name}, ensuring phonetic similarity (10% Light, 50% Medium, 40% Far) and orthographic similarity (100% Medium). Approximately 29% of the total 6 variations should follow these rule-based transformations: Additionally, generate variations that convert name to initials. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 6, "phonetic_similarity": { "Light": 0.1, "Medium": 0.5, "Far": 0.4 }, "orthographic_similarity": { "Medium": 1.0 }, "rule_based": { "rule_percentage": 29, "selected_rules": [ "shorten_name_to_initials" ], "rule_descriptions": { "shorten_name_to_initials": "Convert name to initials" }, "percentage": 29 } } }, { "seed_identities_with_labels": [ { "name": "áfrica tovar (latin)", "dob": "1945-01-18", "address": "Venezuela", "label": "High Risk", "script": "latin" }, { "name": "marc lombard (latin)", "dob": "1924-11-13", "address": "Kazakhstan", "label": "negative", "script": "latin" }, { "name": "catherine torres (latin)", "dob": "2007-01-03", "address": "Mali", "label": "High Risk", "script": "latin" }, { "name": "Svetlana Bessarab (latin)", "dob": "1970-12-7", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "marine thibault (latin)", "dob": "1931-05-27", "address": "Democratic Republic of Congo", "label": "High Risk", "script": "latin" }, { "name": "marcel noël (latin)", "dob": "1962-04-18", "address": "Fidji (République des)", "label": "negative", "script": "latin" }, { "name": "marine bouvier (latin)", "dob": "2004-10-16", "address": "Kazakhstan", "label": "negative", "script": "latin" }, { "name": "محدثه مهدیان (arabic)", "dob": "1952-07-06", "address": "Iran", "label": "High Risk", "script": "arabic" }, { "name": "Aleksandr Mishustin (latin)", "dob": "2000-12-13", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "luc rivière (latin)", "dob": "1974-09-26", "address": "Haïti", "label": "negative", "script": "latin" }, { "name": "Marianne Houwayek (latin)", "dob": "1980-5-14", "address": "Lebanon", "label": "positive", "script": "latin" }, { "name": "Abdulqader Mortada (latin)", "dob": "1978-3-4", "address": "Yemen", "label": "positive", "script": "latin" }, { "name": "مبینا هاشمی (arabic)", "dob": "1946-07-12", "address": "Lithuania", "label": "negative", "script": "arabic" }, { "name": "josette fabre (latin)", "dob": "1948-02-10", "address": "Côte d'Ivoire", "label": "High Risk", "script": "latin" }, { "name": "Алексей Русских (cyrillic)", "dob": "1968-7-17", "address": "Russia", "label": "positive", "script": "cyrillic" } ], "query_template": "The following name is the seed name to generate variations for: {name}. Generate 6 variations of the name {name}, ensuring phonetic similarity: 10% Light, 30% Medium, 60% Far, and orthographic similarity: 20% Light, 60% Medium, 20% Far, and also include 50% of variations that follow: Additionally, generate variations that perform these transformations: Delete a random letter, Add a title suffix (Jr., PhD, etc.), and Swap adjacent consonants. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 6, "phonetic_similarity": { "Light": 0.1, "Medium": 0.3, "Far": 0.6 }, "orthographic_similarity": { "Light": 0.2, "Medium": 0.6, "Far": 0.2 }, "rule_based": { "rule_percentage": 50, "selected_rules": [ "delete_random_letter", "add_random_trailing_title", "swap_adjacent_consonants" ], "rule_descriptions": { "delete_random_letter": "Delete a random letter", "add_random_trailing_title": "Add a title suffix (Jr., PhD, etc.)", "swap_adjacent_consonants": "Swap adjacent consonants" }, "percentage": 50 } } }, { "seed_identities_with_labels": [ { "name": "Nasif Barakat (latin)", "dob": "1970-11-30", "address": "Syria", "label": "positive", "script": "latin" }, { "name": "amy gross (latin)", "dob": "1962-06-09", "address": "South Sudan", "label": "High Risk", "script": "latin" }, { "name": "Mariam Barreh (latin)", "dob": "1971-4-10", "address": "United Arab Emirates", "label": "positive", "script": "latin" }, { "name": "Ratka Kamceva (latin)", "dob": "1945-10-16", "address": "North Macedonia, The Republic of", "label": "positive", "script": "latin" }, { "name": "jérôme evrard (latin)", "dob": "1988-06-17", "address": "Swaziland", "label": "negative", "script": "latin" }, { "name": "dominique delahaye (latin)", "dob": "1992-08-20", "address": "Belize", "label": "negative", "script": "latin" }, { "name": "margot renault (latin)", "dob": "1927-11-28", "address": "Mali", "label": "High Risk", "script": "latin" }, { "name": "ирла тутурилов (cyrillic)", "dob": "1929-09-11", "address": "Bulgaria", "label": "High Risk", "script": "cyrillic" }, { "name": "اسماء الاسد (arabic)", "dob": "1975-8-11", "address": "Syria", "label": "positive", "script": "arabic" }, { "name": "محفوظ بقشان (arabic)", "dob": "1977-09-06", "address": "North Macedonia", "label": "negative", "script": "arabic" }, { "name": "christian ford (latin)", "dob": "1932-04-10", "address": "South Africa", "label": "High Risk", "script": "latin" }, { "name": "gilles schneider (latin)", "dob": "2004-06-04", "address": "Monaco", "label": "High Risk", "script": "latin" }, { "name": "Mohamed Al-Kani (latin)", "dob": "1979-5-3", "address": "Libya", "label": "positive", "script": "latin" }, { "name": "henri guillaume (latin)", "dob": "1934-06-29", "address": "Vierges (Îles)", "label": "negative", "script": "latin" }, { "name": "suzanne pineau (latin)", "dob": "1960-03-25", "address": "Pays-Bas", "label": "negative", "script": "latin" } ], "query_template": "Generate 6 name variations for {name} ensuring phonetic similarity with 70% Light variations sounding like \"{name}\" and 30% Medium variations sounding similar to \"{name}\", and orthographic similarity with 70% Light variations visually identical to \"{name}\" and 30% Medium variations having a visually similar spelling, approximately 47% of the total 6 variations should follow these rule-based transformations: Additionally, generate variations that duplicate a random letter in \"{name}\". The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 6, "phonetic_similarity": { "Light": 0.7, "Medium": 0.3 }, "orthographic_similarity": { "Light": 0.7, "Medium": 0.3 }, "rule_based": { "rule_percentage": 47, "selected_rules": [ "duplicate_random_letter_as_double_letter" ], "rule_descriptions": { "duplicate_random_letter_as_double_letter": "Duplicate a random letter" }, "percentage": 47 } } }, { "seed_identities_with_labels": [ { "name": "charlotte jones (latin)", "dob": "1988-01-06", "address": "Palau", "label": "negative", "script": "latin" }, { "name": "Тимур Каноков (cyrillic)", "dob": "1972-9-24", "address": "Russia", "label": "positive", "script": "cyrillic" }, { "name": "مظهر أبو قمر (arabic)", "dob": "1925-08-02", "address": "Austria", "label": "negative", "script": "arabic" }, { "name": "clotilde gonzález (latin)", "dob": "1962-09-11", "address": "Bolivia", "label": "High Risk", "script": "latin" }, { "name": "Katerina Tikhonova (latin)", "dob": "1986-8-31", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "Ahmed Afraah (latin)", "dob": "1985-8-17", "address": "Maldives", "label": "positive", "script": "latin" }, { "name": "christina thomas (latin)", "dob": "1951-06-04", "address": "British Virgin Islands", "label": "High Risk", "script": "latin" }, { "name": "perla heredia (latin)", "dob": "1975-07-24", "address": "Bolivia", "label": "High Risk", "script": "latin" }, { "name": "brian kelly (latin)", "dob": "1955-05-20", "address": "Antarctica (the territory South of 60 deg S)", "label": "negative", "script": "latin" }, { "name": "Ivan Yermakov (latin)", "dob": "1986-4-10", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "patrick benjamin (latin)", "dob": "1925-11-06", "address": "Cook Islands", "label": "negative", "script": "latin" }, { "name": "Sergei Arenin (latin)", "dob": "1958-8-29", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "eric fisher (latin)", "dob": "1953-10-12", "address": "Korea", "label": "negative", "script": "latin" }, { "name": "breanna santiago (latin)", "dob": "1995-01-03", "address": "Namibia", "label": "High Risk", "script": "latin" }, { "name": "وصفي أولاد زيان (arabic)", "dob": "1940-04-05", "address": "Yemen", "label": "High Risk", "script": "arabic" } ], "query_template": "The following name is the seed name to generate variations for: {name}. Generate 13 variations of the name {name}, ensuring phonetic similarity: 30% Light, 40% Medium, 30% Far, and orthographic similarity: 10% Light, 30% Medium, 60% Far, and also include 38% of variations that follow: Additionally, generate variations that perform these transformations: Swap adjacent consonants, and Duplicate a random letter. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 13, "phonetic_similarity": { "Light": 0.3, "Medium": 0.4, "Far": 0.3 }, "orthographic_similarity": { "Light": 0.1, "Medium": 0.3, "Far": 0.6 }, "rule_based": { "rule_percentage": 38, "selected_rules": [ "swap_adjacent_consonants", "duplicate_random_letter_as_double_letter" ], "rule_descriptions": { "swap_adjacent_consonants": "Swap adjacent consonants", "duplicate_random_letter_as_double_letter": "Duplicate a random letter" }, "percentage": 38 } } }, { "seed_identities_with_labels": [ { "name": "Waad Qado (latin)", "dob": "1971-1-1", "address": "Iraq", "label": "positive", "script": "latin" }, { "name": "Андрей Бунаков (cyrillic)", "dob": "1971-7-5", "address": "Belarus", "label": "positive", "script": "cyrillic" }, { "name": "morgan winters (latin)", "dob": "1957-02-19", "address": "Nigeria", "label": "High Risk", "script": "latin" }, { "name": "dustin wyatt (latin)", "dob": "1988-07-30", "address": "Namibia", "label": "High Risk", "script": "latin" }, { "name": "benjamin arroyo (latin)", "dob": "1941-08-14", "address": "Norfolk Island", "label": "negative", "script": "latin" }, { "name": "nath merle (latin)", "dob": "1962-10-15", "address": "Haiti", "label": "High Risk", "script": "latin" }, { "name": "mary norton (latin)", "dob": "1993-09-22", "address": "British Virgin Islands", "label": "High Risk", "script": "latin" }, { "name": "joshua pearson (latin)", "dob": "1930-11-10", "address": "Turkmenistan", "label": "negative", "script": "latin" }, { "name": "Siarhei Kalinouski (latin)", "dob": "1969-1-3", "address": "Belarus", "label": "positive", "script": "latin" }, { "name": "аскольд сидорова (cyrillic)", "dob": "1949-01-11", "address": "Donetsk", "label": "High Risk", "script": "cyrillic" }, { "name": "амос лобанова (cyrillic)", "dob": "1959-04-26", "address": "Gabon", "label": "negative", "script": "cyrillic" }, { "name": "douglas carter (latin)", "dob": "1961-05-03", "address": "Qatar", "label": "negative", "script": "latin" }, { "name": "Aleksei Budnev (latin)", "dob": "1964-7-9", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "russell andrews (latin)", "dob": "1959-10-27", "address": "Guatemala", "label": "negative", "script": "latin" }, { "name": "Dmitry Pirog (latin)", "dob": "1980-6-27", "address": "Russia", "label": "positive", "script": "latin" } ], "query_template": "Generate 10 variations of {name}, ensuring phonetic similarity (100% Far) and orthographic similarity (10% Light, 30% Medium, 60% Far). Approximately 16% of the total 10 variations should follow these rule-based transformations: Additionally, generate variations that perform these transformations: Add a title suffix (Jr., PhD, etc.), and Replace random vowels with different vowels. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 10, "phonetic_similarity": { "Far": 1.0 }, "orthographic_similarity": { "Light": 0.1, "Medium": 0.3, "Far": 0.6 }, "rule_based": { "rule_percentage": 16, "selected_rules": [ "add_random_trailing_title", "replace_random_vowel_with_random_vowel" ], "rule_descriptions": { "add_random_trailing_title": "Add a title suffix (Jr., PhD, etc.)", "replace_random_vowel_with_random_vowel": "Replace random vowels with different vowels" }, "percentage": 16 } } }, { "seed_identities_with_labels": [ { "name": "kathryn sawyer (latin)", "dob": "1940-04-14", "address": "Falkland Islands (Malvinas)", "label": "negative", "script": "latin" }, { "name": "واصف الحجار (arabic)", "dob": "1990-01-05", "address": "Croatia", "label": "negative", "script": "arabic" }, { "name": "Vasiliy Yurchenko (latin)", "dob": "1960-9-26", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "matthew mitchell (latin)", "dob": "1947-08-12", "address": "Nigeria", "label": "High Risk", "script": "latin" }, { "name": "'Ali al-Shufa (latin)", "dob": "1991-7-25", "address": "Iran", "label": "positive", "script": "latin" }, { "name": "debra ayala (latin)", "dob": "1942-11-09", "address": "French Southern Territories", "label": "negative", "script": "latin" }, { "name": "سعدون النمر (arabic)", "dob": "1942-07-29", "address": "Libya", "label": "High Risk", "script": "arabic" }, { "name": "karen chandler (latin)", "dob": "1985-10-14", "address": "Hungary", "label": "negative", "script": "latin" }, { "name": "guillaume meyer (latin)", "dob": "1924-10-06", "address": "Central African Republic", "label": "High Risk", "script": "latin" }, { "name": "christopher baker (latin)", "dob": "1980-03-04", "address": "South Sudan", "label": "High Risk", "script": "latin" }, { "name": "Gibran Bassil (latin)", "dob": "1970-6-21", "address": "Lebanon", "label": "positive", "script": "latin" }, { "name": "Андрей Шевченко (cyrillic)", "dob": "1965-5-29", "address": "Russia", "label": "positive", "script": "cyrillic" }, { "name": "brianna anderson (latin)", "dob": "1976-01-25", "address": "Zambia", "label": "negative", "script": "latin" }, { "name": "Aleksey Mordashov (latin)", "dob": "1965-9-26", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "georgina cañellas (latin)", "dob": "1975-07-07", "address": "Venezuela", "label": "High Risk", "script": "latin" } ], "query_template": "Generate exactly 12 execution vectors for {name}, ensuring phonetic similarity (10% Light variations such as \"Ayn\", 50% Medium variations like \"Eina\" or \"Inay\", 40% Far variations like \"Onyeka\") and orthographic similarity (30% Light variations like \"Annie\", 40% Medium variations like \"Einah\", 30% Far variations like \"Anyka\"). Approximately 55% of the total 12 variations should follow these rule-based transformations: Additionally, generate variations that perform these transformations: Convert {name} to initials and Replace spaces in {name} with special characters (e.g., \"{name}\" -> \"{name}@\").. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 12, "phonetic_similarity": { "Light": 0.1, "Medium": 0.5, "Far": 0.4 }, "orthographic_similarity": { "Light": 0.3, "Medium": 0.4, "Far": 0.3 }, "rule_based": { "rule_percentage": 55, "selected_rules": [ "shorten_name_to_initials", "replace_spaces_with_random_special_characters" ], "rule_descriptions": { "shorten_name_to_initials": "Convert name to initials", "replace_spaces_with_random_special_characters": "Replace spaces with special characters" }, "percentage": 55 } } }, { "seed_identities_with_labels": [ { "name": "hugues lelièvre (latin)", "dob": "1948-01-08", "address": "Niue", "label": "negative", "script": "latin" }, { "name": "baltasar lledó (latin)", "dob": "1976-05-23", "address": "Venezuela", "label": "High Risk", "script": "latin" }, { "name": "alex lebrun (latin)", "dob": "1958-12-21", "address": "Haiti", "label": "High Risk", "script": "latin" }, { "name": "gregory welch (latin)", "dob": "1942-03-04", "address": "Nigeria", "label": "High Risk", "script": "latin" }, { "name": "Kifah Milhem (latin)", "dob": "1961-11-28", "address": "Syria", "label": "positive", "script": "latin" }, { "name": "Ivan Yermakov (latin)", "dob": "1986-4-10", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "ممدوح عنس (arabic)", "dob": "2007-05-21", "address": "Sudan", "label": "High Risk", "script": "arabic" }, { "name": "Sara Manakhimova (latin)", "dob": "1977-10-12", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "марк-антоний чупетловска (cyrillic)", "dob": "1955-01-02", "address": "Ireland", "label": "negative", "script": "cyrillic" }, { "name": "susan joly (latin)", "dob": "1939-10-20", "address": "Zaïre", "label": "negative", "script": "latin" }, { "name": "maurice bigot (latin)", "dob": "1969-12-10", "address": "Soudan", "label": "negative", "script": "latin" }, { "name": "Олег Смолин (cyrillic)", "dob": "1952-2-10", "address": "Russia", "label": "positive", "script": "cyrillic" }, { "name": "Aleksandr Vetenevich (latin)", "dob": "1976-6-20", "address": "Belarus", "label": "positive", "script": "latin" }, { "name": "wendy lee (latin)", "dob": "1987-06-16", "address": "South Africa", "label": "High Risk", "script": "latin" }, { "name": "emmanuel foucher (latin)", "dob": "1931-09-16", "address": "Cambodge", "label": "negative", "script": "latin" } ], "query_template": "Generate exactly 9 execution vectors (name variations) for {name}, ensuring phonetic similarity (30% Light, 40% Medium, 30% Far) and orthographic similarity (30% Light, 40% Medium, 30% Far). Approximately 57% of the total 9 variations should follow these rule-based transformations: Additionally, generate variations that perform these transformations: Replace random vowels with different vowels, and Remove a random consonant. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 9, "phonetic_similarity": { "Light": 0.3, "Medium": 0.4, "Far": 0.3 }, "orthographic_similarity": { "Light": 0.3, "Medium": 0.4, "Far": 0.3 }, "rule_based": { "rule_percentage": 57, "selected_rules": [ "replace_random_vowel_with_random_vowel", "remove_random_consonant" ], "rule_descriptions": { "replace_random_vowel_with_random_vowel": "Replace random vowels with different vowels", "remove_random_consonant": "Remove a random consonant" }, "percentage": 57 } } }, { "seed_identities_with_labels": [ { "name": "Sveta Boyko (latin)", "dob": "1990-4-14", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "Asadollah Seify (latin)", "dob": "1965-4-4", "address": "Iran", "label": "positive", "script": "latin" }, { "name": "jéssica gomes (latin)", "dob": "1991-01-19", "address": "Ilha Norfolk", "label": "negative", "script": "latin" }, { "name": "Татьяна Егерева (cyrillic)", "dob": "1966-4-20", "address": "Russia", "label": "positive", "script": "cyrillic" }, { "name": "Irina Shoygu (latin)", "dob": "1955-5-31", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "teresa dalton (latin)", "dob": "1989-08-25", "address": "Nigeria", "label": "High Risk", "script": "latin" }, { "name": "eva batista (latin)", "dob": "1968-03-23", "address": "União Europeia", "label": "negative", "script": "latin" }, { "name": "étienne louis (latin)", "dob": "1947-03-30", "address": "Mali", "label": "High Risk", "script": "latin" }, { "name": "ермолай виноградова (cyrillic)", "dob": "1936-02-04", "address": "Donetsk", "label": "High Risk", "script": "cyrillic" }, { "name": "joaquim silva (latin)", "dob": "1950-03-15", "address": "Mozambique", "label": "High Risk", "script": "latin" }, { "name": "vicente simões (latin)", "dob": "1961-04-29", "address": "Argélia", "label": "negative", "script": "latin" }, { "name": "Imre Laszloczki (latin)", "dob": "1961-9-26", "address": "Hungary", "label": "positive", "script": "latin" }, { "name": "kevin johnson (latin)", "dob": "1954-12-10", "address": "South Africa", "label": "High Risk", "script": "latin" }, { "name": "violeta domingues (latin)", "dob": "1943-04-08", "address": "Paraguai", "label": "negative", "script": "latin" }, { "name": "дечю мустакова (cyrillic)", "dob": "1973-07-30", "address": "Bangladesh", "label": "negative", "script": "cyrillic" } ], "query_template": "Generate exactly 6 execution vectors for each target identity {name}, ensuring phonetic similarity (30% Light, 40% Medium, 30% Far) and orthographic similarity (100% Medium). Approximately 50% of the total 6 variations should follow these rule-based transformations: Additionally, generate variations that perform these transformations: Delete a random letter from {name}, Replace one or more vowels in {name} with different vowels, and Add a title suffix (Jr., PhD, etc.) to {name}. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 6, "phonetic_similarity": { "Light": 0.3, "Medium": 0.4, "Far": 0.3 }, "orthographic_similarity": { "Medium": 1.0 }, "rule_based": { "rule_percentage": 50, "selected_rules": [ "delete_random_letter", "replace_random_vowel_with_random_vowel", "add_random_trailing_title" ], "rule_descriptions": { "delete_random_letter": "Delete a random letter", "replace_random_vowel_with_random_vowel": "Replace random vowels with different vowels", "add_random_trailing_title": "Add a title suffix (Jr., PhD, etc.)" }, "percentage": 50 } } }, { "seed_identities_with_labels": [ { "name": "данила шашков (cyrillic)", "dob": "2004-09-08", "address": "Belarus", "label": "High Risk", "script": "cyrillic" }, { "name": "théodore dupuy (latin)", "dob": "1938-02-22", "address": "Nauru", "label": "negative", "script": "latin" }, { "name": "'Abdullah al-'Anizi (latin)", "dob": "1984-8-2", "address": "Kuwait", "label": "positive", "script": "latin" }, { "name": "Иван Заворотный (cyrillic)", "dob": "1979-10-22", "address": "Russia", "label": "positive", "script": "cyrillic" }, { "name": "Ahmet DURI (latin)", "dob": "1987-1-12", "address": "Turkey", "label": "positive", "script": "latin" }, { "name": "michelle cohen (latin)", "dob": "1943-03-12", "address": "Monaco", "label": "High Risk", "script": "latin" }, { "name": "Johnson Byabashaija (latin)", "dob": "1957-9-27", "address": "Uganda", "label": "positive", "script": "latin" }, { "name": "emmanuel aubert (latin)", "dob": "1932-01-04", "address": "Mali", "label": "High Risk", "script": "latin" }, { "name": "margaud leblanc (latin)", "dob": "1958-11-20", "address": "Cocos (Îles)", "label": "negative", "script": "latin" }, { "name": "édith raynaud (latin)", "dob": "1927-09-24", "address": "Libéria", "label": "negative", "script": "latin" }, { "name": "lourenço teixeira (latin)", "dob": "1935-06-01", "address": "Angola", "label": "High Risk", "script": "latin" }, { "name": "étienne caron (latin)", "dob": "1967-12-20", "address": "Svalbard et Jan Mayen (Îles)", "label": "negative", "script": "latin" }, { "name": "Aleksandr Kobets (latin)", "dob": "1959-9-27", "address": "Ukraine", "label": "positive", "script": "latin" }, { "name": "عاتب شهران (arabic)", "dob": "1989-11-10", "address": "Sao Tome and Principe", "label": "negative", "script": "arabic" }, { "name": "caroline bigot (latin)", "dob": "1999-04-03", "address": "Mali", "label": "High Risk", "script": "latin" } ], "query_template": "Generate 6 variations of {name}. ensuring phonetic similarity (30% Light, 40% Medium, 30% Far) and orthographic similarity (50% Light, 50% Medium). Approximately 45% of the total 6 variations should follow these rule-based transformations: Add a title prefix (Mr., Dr., etc.). The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 6, "phonetic_similarity": { "Light": 0.3, "Medium": 0.4, "Far": 0.3 }, "orthographic_similarity": { "Light": 0.5, "Medium": 0.5 }, "rule_based": { "rule_percentage": 45, "selected_rules": [ "add_random_leading_title" ], "rule_descriptions": { "add_random_leading_title": "Add a title prefix (Mr., Dr., etc.)" }, "percentage": 45 } } }, { "seed_identities_with_labels": [ { "name": "Muhsin al-Zibin (latin)", "dob": "1973-7-1", "address": "Kuwait", "label": "positive", "script": "latin" }, { "name": "daniel mathieu (latin)", "dob": "1981-02-09", "address": "Central African Republic", "label": "High Risk", "script": "latin" }, { "name": "samuel clark (latin)", "dob": "1982-04-20", "address": "Philippines", "label": "negative", "script": "latin" }, { "name": "paula cross (latin)", "dob": "1997-07-25", "address": "Norfolk Island", "label": "negative", "script": "latin" }, { "name": "Abulghasem Valagohar (latin)", "dob": "1969-8-15", "address": "Iran", "label": "positive", "script": "latin" }, { "name": "Медни Кадырова (cyrillic)", "dob": "1978-9-7", "address": "Russia", "label": "positive", "script": "cyrillic" }, { "name": "jonathan lewis (latin)", "dob": "2004-11-13", "address": "Kenya", "label": "High Risk", "script": "latin" }, { "name": "elizabeth neal (latin)", "dob": "1927-04-04", "address": "Puerto Rico", "label": "negative", "script": "latin" }, { "name": "امیرمحمد بهرامی (arabic)", "dob": "1938-03-06", "address": "Afghanistan", "label": "High Risk", "script": "arabic" }, { "name": "kenneth young (latin)", "dob": "1983-11-05", "address": "Belgium", "label": "negative", "script": "latin" }, { "name": "guy huet (latin)", "dob": "1953-07-16", "address": "Mali", "label": "High Risk", "script": "latin" }, { "name": "Mehdi Lashgarian (latin)", "dob": "1989-6-2", "address": "Iran", "label": "positive", "script": "latin" }, { "name": "عبدالرّحمن ضبيعة (arabic)", "dob": "1965-02-24", "address": "Czech Republic", "label": "negative", "script": "arabic" }, { "name": "Umm Layth (latin)", "dob": "1994-5-11", "address": "Syria", "label": "positive", "script": "latin" }, { "name": "macaria pozo (latin)", "dob": "1993-01-27", "address": "Bolivia", "label": "High Risk", "script": "latin" } ], "query_template": "Generate exactly 7 execution vectors for each target identity {name}, ensuring phonetic similarity (20% Light, 60% Medium, 20% Far) and orthographic similarity (20% Light, 60% Medium, 20% Far). Approximately 45% of the total 7 variations should follow these rule-based transformations: Additionally, generate variations that: Add a title suffix (Jr., PhD, etc.). The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 7, "phonetic_similarity": { "Light": 0.2, "Medium": 0.6, "Far": 0.2 }, "orthographic_similarity": { "Light": 0.2, "Medium": 0.6, "Far": 0.2 }, "rule_based": { "rule_percentage": 45, "selected_rules": [ "add_random_trailing_title" ], "rule_descriptions": { "add_random_trailing_title": "Add a title suffix (Jr., PhD, etc.)" }, "percentage": 45 } } }, { "seed_identities_with_labels": [ { "name": "Muhammad Al-Sunaydar (latin)", "dob": "1987-3-5", "address": "Yemen", "label": "positive", "script": "latin" }, { "name": "مسرور خثعم (arabic)", "dob": "1948-07-14", "address": "Finland", "label": "negative", "script": "arabic" }, { "name": "emiliano ballester (latin)", "dob": "2006-10-18", "address": "Cuba", "label": "High Risk", "script": "latin" }, { "name": "marita donaire (latin)", "dob": "1940-03-03", "address": "Mauritania", "label": "negative", "script": "latin" }, { "name": "andrés felipe salinas (latin)", "dob": "1990-11-13", "address": "Irán", "label": "negative", "script": "latin" }, { "name": "звездан патков (cyrillic)", "dob": "1960-01-09", "address": "Bulgaria", "label": "High Risk", "script": "cyrillic" }, { "name": "Abu Emad (latin)", "dob": "1984-1-1", "address": "Yemen", "label": "positive", "script": "latin" }, { "name": "Анна Суровикина (cyrillic)", "dob": "1973-7-4", "address": "Russia", "label": "positive", "script": "cyrillic" }, { "name": "Gennadii NIKULOV (latin)", "dob": "1967-2-17", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "michèle leroy (latin)", "dob": "1931-09-12", "address": "Monaco", "label": "High Risk", "script": "latin" }, { "name": "ramiro azorin (latin)", "dob": "1942-02-15", "address": "Bolivia", "label": "High Risk", "script": "latin" }, { "name": "soraya hervás (latin)", "dob": "1994-10-22", "address": "Bolivia", "label": "High Risk", "script": "latin" }, { "name": "arsenio busquets (latin)", "dob": "1980-09-03", "address": "Etiopía", "label": "negative", "script": "latin" }, { "name": "Yakiv Antonov (latin)", "dob": "1972-11-7", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "jordana quesada (latin)", "dob": "1928-02-16", "address": "Sierra Leona", "label": "negative", "script": "latin" } ], "query_template": "Generate 15 variations of {name}, ensuring phonetic similarity (10% Light, 50% Medium, 40% Far) and orthographic similarity (50% Light, 50% Medium). Approximately 56% of the total 15 variations should follow these rule-based transformations: Additionally, generate variations that perform these transformations: Remove a random consonant from {name}, Duplicate a random letter in {name}, and Convert {name} to initials. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 15, "phonetic_similarity": { "Light": 0.1, "Medium": 0.5, "Far": 0.4 }, "orthographic_similarity": { "Light": 0.5, "Medium": 0.5 }, "rule_based": { "rule_percentage": 56, "selected_rules": [ "remove_random_consonant", "duplicate_random_letter_as_double_letter", "shorten_name_to_initials" ], "rule_descriptions": { "remove_random_consonant": "Remove a random consonant", "duplicate_random_letter_as_double_letter": "Duplicate a random letter", "shorten_name_to_initials": "Convert name to initials" }, "percentage": 56 } } }, { "seed_identities_with_labels": [ { "name": "andré lourenço (latin)", "dob": "1978-10-21", "address": "Sérvia", "label": "negative", "script": "latin" }, { "name": "petra monteiro (latin)", "dob": "1990-09-10", "address": "Dhekelia", "label": "negative", "script": "latin" }, { "name": "manuel domingues (latin)", "dob": "1980-05-10", "address": "Mozambique", "label": "High Risk", "script": "latin" }, { "name": "аркадій забіла (cyrillic)", "dob": "1979-03-27", "address": "Falkland Islands (Malvinas)", "label": "negative", "script": "cyrillic" }, { "name": "artur melo (latin)", "dob": "1976-07-09", "address": "Índia", "label": "negative", "script": "latin" }, { "name": "Aleksei MOZHOVYY (latin)", "dob": "1975-4-3", "address": "Ukraine", "label": "positive", "script": "latin" }, { "name": "Abdulla Shareef (latin)", "dob": "1986-6-11", "address": "Maldives", "label": "positive", "script": "latin" }, { "name": "sabine ledoux (latin)", "dob": "1930-10-06", "address": "Cameroon", "label": "High Risk", "script": "latin" }, { "name": "antoinette michaud (latin)", "dob": "1964-07-03", "address": "Haiti", "label": "High Risk", "script": "latin" }, { "name": "leandro miranda (latin)", "dob": "1986-03-06", "address": "Uganda", "label": "negative", "script": "latin" }, { "name": "ثائر القواسم (arabic)", "dob": "1997-06-08", "address": "Sudan", "label": "High Risk", "script": "arabic" }, { "name": "Даниил Борщев (cyrillic)", "dob": "1975-12-4", "address": "Russia", "label": "positive", "script": "cyrillic" }, { "name": "marc morel (latin)", "dob": "1987-03-31", "address": "Monaco", "label": "High Risk", "script": "latin" }, { "name": "Nikolai Levine (latin)", "dob": "1985-5-21", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "Yakub Zakriyev (latin)", "dob": "1990-10-16", "address": "Russia", "label": "positive", "script": "latin" } ], "query_template": "Generate 10 variations of {name}, ensuring phonetic similarity (10% Light, 50% Medium, 40% Far) and orthographic similarity (10% Light, 30% Medium, 60% Far). Approximately 41% of the total 10 variations should follow these rule-based transformations: Add a title prefix (Mr., Dr., etc.), and Add a title suffix (Jr., PhD, etc.). The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 10, "phonetic_similarity": { "Light": 0.1, "Medium": 0.5, "Far": 0.4 }, "orthographic_similarity": { "Light": 0.1, "Medium": 0.3, "Far": 0.6 }, "rule_based": { "rule_percentage": 41, "selected_rules": [ "add_random_leading_title", "add_random_trailing_title" ], "rule_descriptions": { "add_random_leading_title": "Add a title prefix (Mr., Dr., etc.)", "add_random_trailing_title": "Add a title suffix (Jr., PhD, etc.)" }, "percentage": 41 } } }, { "seed_identities_with_labels": [ { "name": "Anna Tausent (latin)", "dob": "1990-1-9", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "carolina caballero (latin)", "dob": "1963-10-17", "address": "Cuba", "label": "High Risk", "script": "latin" }, { "name": "shelia fisher (latin)", "dob": "1949-03-24", "address": "South Africa", "label": "High Risk", "script": "latin" }, { "name": "Kyo'ng-il Kim (latin)", "dob": "1979-8-1", "address": "Libya", "label": "positive", "script": "latin" }, { "name": "mckenzie dean (latin)", "dob": "1974-11-09", "address": "South Africa", "label": "High Risk", "script": "latin" }, { "name": "kayla perry (latin)", "dob": "1971-12-28", "address": "South Georgia and the South Sandwich Islands", "label": "negative", "script": "latin" }, { "name": "بشار الأسد (arabic)", "dob": "1965-9-11", "address": "Syria", "label": "positive", "script": "arabic" }, { "name": "Mukhtar Shah (latin)", "dob": "1939-11-8", "address": "Pakistan", "label": "positive", "script": "latin" }, { "name": "Jebrail Guzel (latin)", "dob": "1993-7-10", "address": "Turkey", "label": "positive", "script": "latin" }, { "name": "destiny garcia (latin)", "dob": "1990-09-14", "address": "Sierra Leone", "label": "negative", "script": "latin" }, { "name": "elizabeth rice (latin)", "dob": "1927-02-15", "address": "British Virgin Islands", "label": "High Risk", "script": "latin" }, { "name": "angela torres (latin)", "dob": "1961-05-09", "address": "French Guiana", "label": "negative", "script": "latin" }, { "name": "адалберт келешев (cyrillic)", "dob": "1978-10-30", "address": "Fiji", "label": "negative", "script": "cyrillic" }, { "name": "stacy cook (latin)", "dob": "1966-07-15", "address": "Turkey", "label": "negative", "script": "latin" }, { "name": "божик дачев (cyrillic)", "dob": "1951-10-25", "address": "Bulgaria", "label": "High Risk", "script": "cyrillic" } ], "query_template": "Generate 7 variations of {name} ensuring phonetic similarity (10% Light, 30% Medium, 60% Far) and orthographic similarity (100% Medium). Approximately 51% of the total 7 variations should follow these rule-based transformations: Additionally, generate variations that perform these transformations: Replace random vowels with different vowels, and Add a title prefix (Mr., Dr., etc.). The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 7, "phonetic_similarity": { "Light": 0.1, "Medium": 0.3, "Far": 0.6 }, "orthographic_similarity": { "Medium": 1.0 }, "rule_based": { "rule_percentage": 51, "selected_rules": [ "replace_random_vowel_with_random_vowel", "add_random_leading_title" ], "rule_descriptions": { "replace_random_vowel_with_random_vowel": "Replace random vowels with different vowels", "add_random_leading_title": "Add a title prefix (Mr., Dr., etc.)" }, "percentage": 51 } } }, { "seed_identities_with_labels": [ { "name": "ema vicente (latin)", "dob": "1945-09-04", "address": "Angola", "label": "High Risk", "script": "latin" }, { "name": "eva sousa (latin)", "dob": "1940-08-04", "address": "Atlantic Ocean", "label": "negative", "script": "latin" }, { "name": "megan friedman (latin)", "dob": "1993-01-14", "address": "Namibia", "label": "High Risk", "script": "latin" }, { "name": "carolina ferreira (latin)", "dob": "1970-07-26", "address": "Baamas", "label": "negative", "script": "latin" }, { "name": "Abud Kol (latin)", "dob": "1962-2-23", "address": "South Sudan", "label": "positive", "script": "latin" }, { "name": "jaime reis (latin)", "dob": "1966-06-15", "address": "Dinamarca", "label": "negative", "script": "latin" }, { "name": "marta campos (latin)", "dob": "1993-07-26", "address": "Jamaica", "label": "negative", "script": "latin" }, { "name": "Natalia Beglova (latin)", "dob": "1955-11-12", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "íris esteves (latin)", "dob": "1937-12-19", "address": "Angola", "label": "High Risk", "script": "latin" }, { "name": "Franki Eskeda (latin)", "dob": "1994-10-23", "address": "Mexico", "label": "positive", "script": "latin" }, { "name": "julie vallet (latin)", "dob": "1929-08-25", "address": "Haiti", "label": "High Risk", "script": "latin" }, { "name": "денис чернов (cyrillic)", "dob": "2006-08-04", "address": "Armenia", "label": "High Risk", "script": "cyrillic" }, { "name": "Mohamad Wehbe (latin)", "dob": "1992-4-14", "address": "South Africa", "label": "positive", "script": "latin" }, { "name": "Татяна Москалькова (cyrillic)", "dob": "1955-5-30", "address": "Russia", "label": "positive", "script": "cyrillic" }, { "name": "ملهم حنبولي (arabic)", "dob": "1963-03-16", "address": "Zimbabwe", "label": "negative", "script": "arabic" } ], "query_template": "The following name is the seed name to generate variations for: {name}. Generate 15 variations of the name {name}, ensuring phonetic similarity: 50% Light, 50% Medium, and orthographic similarity: 10% Light, 50% Medium, 40% Far, and also include 49% of variations that follow: Additionally, generate variations that perform these transformations: Abbreviate name parts, Reorder name parts, and Remove a random consonant. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 15, "phonetic_similarity": { "Light": 0.5, "Medium": 0.5 }, "orthographic_similarity": { "Light": 0.1, "Medium": 0.5, "Far": 0.4 }, "rule_based": { "rule_percentage": 49, "selected_rules": [ "shorten_name_to_abbreviations", "name_parts_permutations", "remove_random_consonant" ], "rule_descriptions": { "shorten_name_to_abbreviations": "Abbreviate name parts", "name_parts_permutations": "Reorder name parts", "remove_random_consonant": "Remove a random consonant" }, "percentage": 49 } } }, { "seed_identities_with_labels": [ { "name": "Aleksandr Kurenkov (latin)", "dob": "1972-6-2", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "صائب الجعليين (arabic)", "dob": "1978-10-21", "address": "Syria", "label": "High Risk", "script": "arabic" }, { "name": "katherine lee (latin)", "dob": "1963-03-16", "address": "South Africa", "label": "High Risk", "script": "latin" }, { "name": "chelo amat (latin)", "dob": "1948-11-25", "address": "Líbano", "label": "negative", "script": "latin" }, { "name": "jenaro abad (latin)", "dob": "1958-09-13", "address": "Chile", "label": "negative", "script": "latin" }, { "name": "سجا آل علي (arabic)", "dob": "1928-01-11", "address": "Mexico", "label": "negative", "script": "arabic" }, { "name": "Gholamreza Eyni (latin)", "dob": "1975-7-11", "address": "Iran", "label": "positive", "script": "latin" }, { "name": "alfredo piñeiro (latin)", "dob": "1963-07-27", "address": "Bolivia", "label": "High Risk", "script": "latin" }, { "name": "georgina amador (latin)", "dob": "1979-01-08", "address": "Cuba", "label": "High Risk", "script": "latin" }, { "name": "Дмитрий Пьянов (cyrillic)", "dob": "1977-12-7", "address": "Russia", "label": "positive", "script": "cyrillic" }, { "name": "'Abdul-Wahab AL-HUMAYQANI (latin)", "dob": "1972-8-4", "address": "Yemen", "label": "positive", "script": "latin" }, { "name": "griselda valero (latin)", "dob": "1941-07-08", "address": "Bolivia", "label": "High Risk", "script": "latin" }, { "name": "manuela puente (latin)", "dob": "1945-03-28", "address": "India", "label": "negative", "script": "latin" }, { "name": "Irina Bubnova (latin)", "dob": "1983-4-1", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "seve verdejo (latin)", "dob": "1955-09-11", "address": "Albania", "label": "negative", "script": "latin" } ], "query_template": "The following name is the seed name to generate variations for: {name}. Generate 6 variations of the name {name}, ensuring phonetic similarity: 10% Light, 30% Medium, 60% Far, and orthographic similarity: 30% Light, 40% Medium, 30% Far, and also include 33% of variations that follow: Additionally, generate variations that perform these transformations: Replace random vowels with different vowels, and Swap random adjacent letters. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 6, "phonetic_similarity": { "Light": 0.1, "Medium": 0.3, "Far": 0.6 }, "orthographic_similarity": { "Light": 0.3, "Medium": 0.4, "Far": 0.3 }, "rule_based": { "rule_percentage": 33, "selected_rules": [ "replace_random_vowel_with_random_vowel", "swap_random_letter" ], "rule_descriptions": { "replace_random_vowel_with_random_vowel": "Replace random vowels with different vowels", "swap_random_letter": "Swap random adjacent letters" }, "percentage": 33 } } }, { "seed_identities_with_labels": [ { "name": "арсений елисеев (cyrillic)", "dob": "1991-06-26", "address": "Luhansk", "label": "High Risk", "script": "cyrillic" }, { "name": "caroline pottier (latin)", "dob": "1986-08-11", "address": "Côte d'Ivoire", "label": "High Risk", "script": "latin" }, { "name": "christelle barre (latin)", "dob": "1927-09-05", "address": "Ouganda", "label": "negative", "script": "latin" }, { "name": "Denis Degtyarenko (latin)", "dob": "1989-10-9", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "Yuriy Soloviev (latin)", "dob": "1970-4-13", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "jorge elliott (latin)", "dob": "1971-09-27", "address": "British Virgin Islands", "label": "High Risk", "script": "latin" }, { "name": "deanna mason (latin)", "dob": "1952-11-29", "address": "Nigeria", "label": "High Risk", "script": "latin" }, { "name": "joséphine gomes (latin)", "dob": "1995-11-28", "address": "Guinée-Bissau", "label": "negative", "script": "latin" }, { "name": "caroline delorme (latin)", "dob": "1953-06-17", "address": "Tadjikistan", "label": "negative", "script": "latin" }, { "name": "桂華 李 (chinese)", "dob": "1964-11-22", "address": "China", "label": "positive", "script": "chinese" }, { "name": "كاظم العليان (arabic)", "dob": "1973-05-16", "address": "Norway", "label": "negative", "script": "arabic" }, { "name": "Mehdi Lashgarian (latin)", "dob": "1989-6-2", "address": "Iran", "label": "positive", "script": "latin" }, { "name": "pauline lemonnier (latin)", "dob": "1964-12-19", "address": "Sénégal", "label": "negative", "script": "latin" }, { "name": "caroline sauvage (latin)", "dob": "1927-09-10", "address": "Côte d'Ivoire", "label": "High Risk", "script": "latin" }, { "name": "Maulana Ubaidullah (latin)", "dob": "1985-1-31", "address": "Afghanistan", "label": "positive", "script": "latin" } ], "query_template": "The following name is the seed name to generate variations for: {name}. Generate 13 variations of the name {name}, ensuring phonetic similarity: 20% Light, 60% Medium, 20% Far, and orthographic similarity: 30% Light, 40% Medium, 30% Far, and also include 38% of variations that follow: Additionally, generate variations that perform these transformations: Use first name initial with last name, Duplicate a random letter, and Insert a random letter. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 13, "phonetic_similarity": { "Light": 0.2, "Medium": 0.6, "Far": 0.2 }, "orthographic_similarity": { "Light": 0.3, "Medium": 0.4, "Far": 0.3 }, "rule_based": { "rule_percentage": 38, "selected_rules": [ "initial_only_first_name", "duplicate_random_letter_as_double_letter", "insert_random_letter" ], "rule_descriptions": { "initial_only_first_name": "Use first name initial with last name", "duplicate_random_letter_as_double_letter": "Duplicate a random letter", "insert_random_letter": "Insert a random letter" }, "percentage": 38 } } }, { "seed_identities_with_labels": [ { "name": "simone leblanc (latin)", "dob": "1997-05-30", "address": "Irlande", "label": "negative", "script": "latin" }, { "name": "Valery GABRIEL (latin)", "dob": "1956-1-3", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "ذكي قبيلة هذيل البقوم (arabic)", "dob": "1945-12-09", "address": "Yemen", "label": "High Risk", "script": "arabic" }, { "name": "Leontiy Kondrakhin (latin)", "dob": "2001-4-28", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "daniel roger (latin)", "dob": "2006-03-26", "address": "Haiti", "label": "High Risk", "script": "latin" }, { "name": "Hosein Hemsi (latin)", "dob": "1982-10-27", "address": "Iran", "label": "positive", "script": "latin" }, { "name": "brittany henderson (latin)", "dob": "2003-03-14", "address": "British Virgin Islands", "label": "High Risk", "script": "latin" }, { "name": "claire jean (latin)", "dob": "1957-01-05", "address": "Monaco", "label": "High Risk", "script": "latin" }, { "name": "Anastasiya Kuznetsova (latin)", "dob": "1970-7-20", "address": "Ukraine", "label": "positive", "script": "latin" }, { "name": "Сергей Иванов (cyrillic)", "dob": "1953-1-31", "address": "Russia", "label": "positive", "script": "cyrillic" }, { "name": "michèle allain (latin)", "dob": "1968-11-14", "address": "Saint Vincent et les Grenadines", "label": "negative", "script": "latin" }, { "name": "christopher gross (latin)", "dob": "2007-04-12", "address": "Kenya", "label": "High Risk", "script": "latin" }, { "name": "لقاء هوازن (arabic)", "dob": "1987-12-17", "address": "Kiribati", "label": "negative", "script": "arabic" }, { "name": "valentine mallet (latin)", "dob": "1930-03-12", "address": "Burundi", "label": "negative", "script": "latin" }, { "name": "auguste breton (latin)", "dob": "1974-07-20", "address": "Ghana", "label": "negative", "script": "latin" } ], "query_template": "The following name is the seed name to generate variations for: {name}. Generate 11 variations of the name {name}, ensuring phonetic similarity: 30% Light, 40% Medium, 30% Far, and orthographic similarity: 10% Light, 50% Medium, 40% Far, and also include 43% of variations that follow: Additionally, generate variations that perform these transformations: Swap random adjacent letters, Insert a random letter, and Abbreviate name parts. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 11, "phonetic_similarity": { "Light": 0.3, "Medium": 0.4, "Far": 0.3 }, "orthographic_similarity": { "Light": 0.1, "Medium": 0.5, "Far": 0.4 }, "rule_based": { "rule_percentage": 43, "selected_rules": [ "swap_random_letter", "insert_random_letter", "shorten_name_to_abbreviations" ], "rule_descriptions": { "swap_random_letter": "Swap random adjacent letters", "insert_random_letter": "Insert a random letter", "shorten_name_to_abbreviations": "Abbreviate name parts" }, "percentage": 43 } } }, { "seed_identities_with_labels": [ { "name": "kathleen adams (latin)", "dob": "1932-09-22", "address": "Kenya", "label": "High Risk", "script": "latin" }, { "name": "Artem Lifshits (latin)", "dob": "1992-12-26", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "tonya rodriguez (latin)", "dob": "1976-01-16", "address": "Kenya", "label": "High Risk", "script": "latin" }, { "name": "hélène lagarde (latin)", "dob": "1947-01-11", "address": "Burkina Faso", "label": "High Risk", "script": "latin" }, { "name": "céline munoz (latin)", "dob": "2006-06-12", "address": "Japon", "label": "negative", "script": "latin" }, { "name": "Aleksey MOZGOVOY (latin)", "dob": "1975-4-3", "address": "Ukraine", "label": "positive", "script": "latin" }, { "name": "frédéric aubert (latin)", "dob": "1980-01-28", "address": "Haïti", "label": "negative", "script": "latin" }, { "name": "владлен королева (cyrillic)", "dob": "1985-11-29", "address": "Seychelles", "label": "negative", "script": "cyrillic" }, { "name": "Катерина ТИХОНОВА (cyrillic)", "dob": "1986-8-31", "address": "Russia", "label": "positive", "script": "cyrillic" }, { "name": "оник пищовколева (cyrillic)", "dob": "1944-07-03", "address": "Bulgaria", "label": "High Risk", "script": "cyrillic" }, { "name": "victoire diaz (latin)", "dob": "1956-09-15", "address": "Bangladesh", "label": "negative", "script": "latin" }, { "name": "xavier laroche (latin)", "dob": "1974-03-19", "address": "Burkina Faso", "label": "High Risk", "script": "latin" }, { "name": "Daniel He (latin)", "dob": "1965-7-6", "address": "China", "label": "positive", "script": "latin" }, { "name": "Alexander Vetenevich (latin)", "dob": "1976-6-20", "address": "Belarus", "label": "positive", "script": "latin" }, { "name": "marie guibert (latin)", "dob": "1937-08-30", "address": "Ouganda", "label": "negative", "script": "latin" } ], "query_template": "The following name is the seed name to generate variations for: {name}. Generate 7 variations of the name {name}, ensuring phonetic similarity: 10% Light, 50% Medium, 40% Far, and orthographic similarity: 20% Light, 60% Medium, 20% Far, and also include 44% of variations that follow: Additionally, generate variations that perform these transformations: Replace spaces with special characters, Abbreviate name parts, and Remove a random consonant. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 7, "phonetic_similarity": { "Light": 0.1, "Medium": 0.5, "Far": 0.4 }, "orthographic_similarity": { "Light": 0.2, "Medium": 0.6, "Far": 0.2 }, "rule_based": { "rule_percentage": 44, "selected_rules": [ "replace_spaces_with_random_special_characters", "shorten_name_to_abbreviations", "remove_random_consonant" ], "rule_descriptions": { "replace_spaces_with_random_special_characters": "Replace spaces with special characters", "shorten_name_to_abbreviations": "Abbreviate name parts", "remove_random_consonant": "Remove a random consonant" }, "percentage": 44 } } }, { "seed_identities_with_labels": [ { "name": "Imre Laszloczki (latin)", "dob": "1961-9-26", "address": "Hungary", "label": "positive", "script": "latin" }, { "name": "thibaut valentin (latin)", "dob": "1991-04-05", "address": "Gibraltar", "label": "negative", "script": "latin" }, { "name": "nicolas grondin (latin)", "dob": "1940-02-11", "address": "Côte d'Ivoire", "label": "High Risk", "script": "latin" }, { "name": "шенол фенеров (cyrillic)", "dob": "1972-09-12", "address": "Bulgaria", "label": "High Risk", "script": "cyrillic" }, { "name": "joseph leblanc (latin)", "dob": "1998-11-28", "address": "Haiti", "label": "High Risk", "script": "latin" }, { "name": "audrey langlois (latin)", "dob": "1934-06-22", "address": "Oman", "label": "negative", "script": "latin" }, { "name": "Saad AL-FAKIH (latin)", "dob": "1957-1-31", "address": "United Kingdom", "label": "positive", "script": "latin" }, { "name": "maurice durand (latin)", "dob": "1979-07-01", "address": "Democratic Republic of Congo", "label": "High Risk", "script": "latin" }, { "name": "virginie michel (latin)", "dob": "1974-07-20", "address": "Monaco", "label": "High Risk", "script": "latin" }, { "name": "وثّاب بنو أمية (arabic)", "dob": "1965-04-02", "address": "Philippines", "label": "negative", "script": "arabic" }, { "name": "guy morin (latin)", "dob": "1956-07-03", "address": "Papouasie-Nouvelle-Guinée", "label": "negative", "script": "latin" }, { "name": "Nasif Barakat (latin)", "dob": "1970-11-30", "address": "Syria", "label": "positive", "script": "latin" }, { "name": "Gholamreza Eyni (latin)", "dob": "1975-7-11", "address": "Iran", "label": "positive", "script": "latin" }, { "name": "Сергей Алтухов (cyrillic)", "dob": "1982-2-23", "address": "Russia", "label": "positive", "script": "cyrillic" }, { "name": "élodie gillet (latin)", "dob": "1971-05-30", "address": "Équateur", "label": "negative", "script": "latin" } ], "query_template": "Generate 15 variations of {name} ensuring phonetic similarity (100% Medium) and orthographic similarity (30% Light, 40% Medium, 30% Far). Approximately 16% of the total 15 variations should follow these rule-based transformations: Additionally, generate variations that perform these transformations: Swap random adjacent letters, and Remove a random vowel. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 15, "phonetic_similarity": { "Medium": 1.0 }, "orthographic_similarity": { "Light": 0.3, "Medium": 0.4, "Far": 0.3 }, "rule_based": { "rule_percentage": 16, "selected_rules": [ "swap_random_letter", "remove_random_vowel" ], "rule_descriptions": { "swap_random_letter": "Swap random adjacent letters", "remove_random_vowel": "Remove a random vowel" }, "percentage": 16 } } }, { "seed_identities_with_labels": [ { "name": "Mehdi Siari (latin)", "dob": "1959-7-12", "address": "Iran", "label": "positive", "script": "latin" }, { "name": "rosa maría lluch (latin)", "dob": "1977-12-23", "address": "Cuba", "label": "High Risk", "script": "latin" }, { "name": "Bair Zhamsuyev (latin)", "dob": "1959-1-29", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "gilbert rey (latin)", "dob": "1986-10-30", "address": "Mali", "label": "High Risk", "script": "latin" }, { "name": "william cousin (latin)", "dob": "1986-10-19", "address": "Burkina Faso", "label": "High Risk", "script": "latin" }, { "name": "айдемир сопаджиева (cyrillic)", "dob": "1926-08-25", "address": "Bulgaria", "label": "High Risk", "script": "cyrillic" }, { "name": "raquel amorim (latin)", "dob": "1995-06-09", "address": "Mozambique", "label": "High Risk", "script": "latin" }, { "name": "valentim vaz (latin)", "dob": "1984-04-13", "address": "Jibuti", "label": "negative", "script": "latin" }, { "name": "valentim reis (latin)", "dob": "1956-12-28", "address": "Guiné-Bissau", "label": "negative", "script": "latin" }, { "name": "Myo'ng-chin Kim (latin)", "dob": "1980-2-18", "address": "China", "label": "positive", "script": "latin" }, { "name": "nelson miranda (latin)", "dob": "1985-10-01", "address": "Barbados", "label": "negative", "script": "latin" }, { "name": "Дмитрий СЫТЫЙ (cyrillic)", "dob": "1989-3-23", "address": "Central African Republic", "label": "positive", "script": "cyrillic" }, { "name": "حنفي الجابر (arabic)", "dob": "1993-09-17", "address": "Puerto Rico", "label": "negative", "script": "arabic" }, { "name": "Kamkong WONG (latin)", "dob": "1958-8-8", "address": "China", "label": "positive", "script": "latin" }, { "name": "kelly moura (latin)", "dob": "1985-06-27", "address": "Tanzânia", "label": "negative", "script": "latin" } ], "query_template": "The following name is the seed name to generate variations for: {name}. Generate 11 variations of the name {name}, ensuring phonetic similarity: 20% Light, 60% Medium, 20% Far, and orthographic similarity: 30% Light, 40% Medium, 30% Far, and also include 25% of variations that follow: Additionally, generate variations that perform these transformations: Remove all spaces, and Delete a random letter. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 11, "phonetic_similarity": { "Light": 0.2, "Medium": 0.6, "Far": 0.2 }, "orthographic_similarity": { "Light": 0.3, "Medium": 0.4, "Far": 0.3 }, "rule_based": { "rule_percentage": 25, "selected_rules": [ "remove_all_spaces", "delete_random_letter" ], "rule_descriptions": { "remove_all_spaces": "Remove all spaces", "delete_random_letter": "Delete a random letter" }, "percentage": 25 } } }, { "seed_identities_with_labels": [ { "name": "яків саєнко (cyrillic)", "dob": "1965-08-14", "address": "Sweden", "label": "negative", "script": "cyrillic" }, { "name": "Muhammad Rahmun (latin)", "dob": "1957-4-1", "address": "Syria", "label": "positive", "script": "latin" }, { "name": "marcia fletcher (latin)", "dob": "2005-05-31", "address": "Norfolk Island", "label": "negative", "script": "latin" }, { "name": "sharon colon (latin)", "dob": "1993-07-30", "address": "British Virgin Islands", "label": "High Risk", "script": "latin" }, { "name": "مُنذر ترابين (arabic)", "dob": "1930-07-12", "address": "Iraq", "label": "High Risk", "script": "arabic" }, { "name": "katrina warner (latin)", "dob": "1928-10-25", "address": "Kenya", "label": "High Risk", "script": "latin" }, { "name": "robert foley (latin)", "dob": "1934-08-19", "address": "Antarctica (the territory South of 60 deg S)", "label": "negative", "script": "latin" }, { "name": "victoria bates (latin)", "dob": "1926-06-10", "address": "South Africa", "label": "High Risk", "script": "latin" }, { "name": "mary hartman (latin)", "dob": "1990-11-26", "address": "Singapore", "label": "negative", "script": "latin" }, { "name": "hélène guyot (latin)", "dob": "1966-06-29", "address": "Monaco", "label": "High Risk", "script": "latin" }, { "name": "Aleksander Zhuchkovskiy (latin)", "dob": "1986-9-9", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "jacob stewart (latin)", "dob": "1924-10-10", "address": "Mongolia", "label": "negative", "script": "latin" }, { "name": "Юлия Афанасьева (cyrillic)", "dob": "1988-2-24", "address": "Russia", "label": "positive", "script": "cyrillic" }, { "name": "Marianne Houwayek (latin)", "dob": "1980-5-14", "address": "Lebanon", "label": "positive", "script": "latin" }, { "name": "Amber Sun (latin)", "dob": "1969-3-23", "address": "Taiwan", "label": "positive", "script": "latin" } ], "query_template": "Generate exactly 10 execution vectors for each target identity {name}, ensuring phonetic similarity (30% Light, 40% Medium, 30% Far) and orthographic similarity (20% Light, 60% Medium, 20% Far). Approximately 30% of the total 10 variations should follow these rule-based transformations: Replace spaces with special characters, Duplicate a random letter, and Reorder name parts. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 10, "phonetic_similarity": { "Light": 0.3, "Medium": 0.4, "Far": 0.3 }, "orthographic_similarity": { "Light": 0.2, "Medium": 0.6, "Far": 0.2 }, "rule_based": { "rule_percentage": 30, "selected_rules": [ "replace_spaces_with_random_special_characters", "duplicate_random_letter_as_double_letter", "name_parts_permutations" ], "rule_descriptions": { "replace_spaces_with_random_special_characters": "Replace spaces with special characters", "duplicate_random_letter_as_double_letter": "Duplicate a random letter", "name_parts_permutations": "Reorder name parts" }, "percentage": 30 } } }, { "seed_identities_with_labels": [ { "name": "diana leal (latin)", "dob": "1968-01-31", "address": "Argélia", "label": "negative", "script": "latin" }, { "name": "ivo macedo (latin)", "dob": "1949-07-29", "address": "Macau", "label": "negative", "script": "latin" }, { "name": "gabriela faria (latin)", "dob": "1995-06-13", "address": "Mozambique", "label": "High Risk", "script": "latin" }, { "name": "cesar salgado (latin)", "dob": "2001-01-12", "address": "Bolivia", "label": "High Risk", "script": "latin" }, { "name": "matias silva (latin)", "dob": "1961-06-07", "address": "Quirguizistão", "label": "negative", "script": "latin" }, { "name": "Marina Mordashova (latin)", "dob": "1979-5-17", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "афанасий зыков (cyrillic)", "dob": "1972-07-21", "address": "Belarus", "label": "High Risk", "script": "cyrillic" }, { "name": "خيري مهنا (arabic)", "dob": "1996-01-31", "address": "France", "label": "negative", "script": "arabic" }, { "name": "Вадим Валиахметов (cyrillic)", "dob": "1981-5-7", "address": "Russia", "label": "positive", "script": "cyrillic" }, { "name": "íris mota (latin)", "dob": "1933-10-22", "address": "Sri Lanca", "label": "negative", "script": "latin" }, { "name": "Sergey NEVEROV (latin)", "dob": "1961-12-21", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "lisa austin (latin)", "dob": "1965-06-02", "address": "British Virgin Islands", "label": "High Risk", "script": "latin" }, { "name": "Irina Pankina (latin)", "dob": "1986-3-8", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "salud calzada (latin)", "dob": "1961-08-09", "address": "Cuba", "label": "High Risk", "script": "latin" }, { "name": "Olimjon Tohtahunov (latin)", "dob": "1949-12-31", "address": "Russia", "label": "positive", "script": "latin" } ], "query_template": "Generate 9 name variations for {name}, ensuring phonetic similarity (100% Far) and orthographic similarity (30% Light, 40% Medium, 30% Far). Approximately 55% of the total 9 variations should follow these rule-based transformations: Replace a random letter with a similar letter.\n[VALIDATION HINTS]: Apply these rule-based transformations: Duplicate a random letter. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 9, "phonetic_similarity": { "Far": 1.0 }, "orthographic_similarity": { "Light": 0.3, "Medium": 0.4, "Far": 0.3 }, "rule_based": { "rule_percentage": 55, "selected_rules": [ "duplicate_random_letter_as_double_letter" ], "rule_descriptions": { "duplicate_random_letter_as_double_letter": "Duplicate a random letter" }, "percentage": 55 } } }, { "seed_identities_with_labels": [ { "name": "прохор дементьев (cyrillic)", "dob": "1936-10-28", "address": "Donetsk", "label": "High Risk", "script": "cyrillic" }, { "name": "Wei Zhang (latin)", "dob": "1977-6-16", "address": "China", "label": "positive", "script": "latin" }, { "name": "ермил зимин (cyrillic)", "dob": "1993-11-27", "address": "Nauru", "label": "negative", "script": "cyrillic" }, { "name": "luciana faria (latin)", "dob": "1990-11-16", "address": "Granada", "label": "negative", "script": "latin" }, { "name": "Alexander Kurenkov (latin)", "dob": "1972-6-2", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "océane chevallier (latin)", "dob": "1935-10-03", "address": "Monaco", "label": "High Risk", "script": "latin" }, { "name": "mara araújo (latin)", "dob": "1947-03-30", "address": "Líbano", "label": "negative", "script": "latin" }, { "name": "bobby mcclain (latin)", "dob": "1976-09-09", "address": "Kenya", "label": "High Risk", "script": "latin" }, { "name": "Александр Щербаков (cyrillic)", "dob": "1965-5-12", "address": "Russia", "label": "positive", "script": "cyrillic" }, { "name": "Oleksandr Basov (latin)", "dob": "1971-10-16", "address": "Ukraine", "label": "positive", "script": "latin" }, { "name": "Tuah Febriwansyah (latin)", "dob": "1968-2-18", "address": "Indonesia", "label": "positive", "script": "latin" }, { "name": "constança borges (latin)", "dob": "1925-08-17", "address": "Mauritânia", "label": "negative", "script": "latin" }, { "name": "henrique freitas (latin)", "dob": "1949-07-22", "address": "Mozambique", "label": "High Risk", "script": "latin" }, { "name": "erika mota (latin)", "dob": "1987-09-16", "address": "Sri Lanca", "label": "negative", "script": "latin" }, { "name": "claude gaillard (latin)", "dob": "1942-12-14", "address": "Democratic Republic of Congo", "label": "High Risk", "script": "latin" } ], "query_template": "The following name is the seed name to generate variations for: {name}. Generate 7 variations of the name {name}, ensuring phonetic similarity: 10% Light, 30% Medium, 60% Far, and orthographic similarity: 20% Light, 60% Medium, 20% Far, and also include 18% of variations that follow: Additionally, generate variations that perform these transformations: Remove a random vowel, Remove a random consonant, and Reorder name parts. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 7, "phonetic_similarity": { "Light": 0.1, "Medium": 0.3, "Far": 0.6 }, "orthographic_similarity": { "Light": 0.2, "Medium": 0.6, "Far": 0.2 }, "rule_based": { "rule_percentage": 18, "selected_rules": [ "remove_random_vowel", "remove_random_consonant", "name_parts_permutations" ], "rule_descriptions": { "remove_random_vowel": "Remove a random vowel", "remove_random_consonant": "Remove a random consonant", "name_parts_permutations": "Reorder name parts" }, "percentage": 18 } } }, { "seed_identities_with_labels": [ { "name": "luc faivre (latin)", "dob": "1966-10-18", "address": "Arménie", "label": "negative", "script": "latin" }, { "name": "معتوق الشايع (arabic)", "dob": "1965-11-14", "address": "Colombia", "label": "negative", "script": "arabic" }, { "name": "arnaude deschamps (latin)", "dob": "1978-01-08", "address": "Danemark", "label": "negative", "script": "latin" }, { "name": "Hosein Hemsi (latin)", "dob": "1982-10-27", "address": "Iran", "label": "positive", "script": "latin" }, { "name": "troy maldonado (latin)", "dob": "1988-10-21", "address": "Kenya", "label": "High Risk", "script": "latin" }, { "name": "Hamza Akbar (latin)", "dob": "1998-9-6", "address": "South Africa", "label": "positive", "script": "latin" }, { "name": "espiridión sales (latin)", "dob": "1966-02-24", "address": "Cuba", "label": "High Risk", "script": "latin" }, { "name": "Amir Khamzat (latin)", "dob": "1974-10-22", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "amélie vallée (latin)", "dob": "1940-02-12", "address": "Jamaïque", "label": "negative", "script": "latin" }, { "name": "brianna jenkins (latin)", "dob": "1982-04-19", "address": "South Sudan", "label": "High Risk", "script": "latin" }, { "name": "margaux godard (latin)", "dob": "1953-10-10", "address": "Myanmar", "label": "negative", "script": "latin" }, { "name": "Дмитрий Левин (cyrillic)", "dob": "1965-8-27", "address": "Russia", "label": "positive", "script": "cyrillic" }, { "name": "Dmitriy Levin (latin)", "dob": "1965-8-27", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "آيات بجيلة (arabic)", "dob": "1938-12-20", "address": "Iraq", "label": "High Risk", "script": "arabic" }, { "name": "georges jean (latin)", "dob": "2003-01-11", "address": "Burkina Faso", "label": "High Risk", "script": "latin" } ], "query_template": "The following name is the seed name to generate variations for: {name}. Generate 12 variations of the name {name}, ensuring phonetic similarity: 30% Light, 40% Medium, 30% Far, and orthographic similarity: 20% Light, 60% Medium, 20% Far, and also include 52% of variations that follow: Additionally, generate variations that perform these transformations: Add a title suffix (Jr., PhD, etc.), and Convert name to initials. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 12, "phonetic_similarity": { "Light": 0.3, "Medium": 0.4, "Far": 0.3 }, "orthographic_similarity": { "Light": 0.2, "Medium": 0.6, "Far": 0.2 }, "rule_based": { "rule_percentage": 52, "selected_rules": [ "add_random_trailing_title", "shorten_name_to_initials" ], "rule_descriptions": { "add_random_trailing_title": "Add a title suffix (Jr., PhD, etc.)", "shorten_name_to_initials": "Convert name to initials" }, "percentage": 52 } } }, { "seed_identities_with_labels": [ { "name": "shelby mckay (latin)", "dob": "1984-06-14", "address": "Grenada", "label": "negative", "script": "latin" }, { "name": "Yusef Meraj (latin)", "dob": "1978-6-10", "address": "Iran", "label": "positive", "script": "latin" }, { "name": "Ibrahim Jathran (latin)", "dob": "1982-10-29", "address": "Libya", "label": "positive", "script": "latin" }, { "name": "bradley reed (latin)", "dob": "1978-03-19", "address": "Jordan", "label": "negative", "script": "latin" }, { "name": "cruz mata (latin)", "dob": "1982-03-05", "address": "Venezuela", "label": "High Risk", "script": "latin" }, { "name": "عرفان السكاكيني (arabic)", "dob": "1987-04-24", "address": "Lithuania", "label": "negative", "script": "arabic" }, { "name": "eileen williams (latin)", "dob": "1931-10-03", "address": "Falkland Islands (Malvinas)", "label": "negative", "script": "latin" }, { "name": "اميرحسين علی شاهی (arabic)", "dob": "1926-09-03", "address": "Afghanistan", "label": "High Risk", "script": "arabic" }, { "name": "jason gross (latin)", "dob": "2003-01-23", "address": "Kenya", "label": "High Risk", "script": "latin" }, { "name": "shannon lawson (latin)", "dob": "1973-03-21", "address": "Micronesia", "label": "negative", "script": "latin" }, { "name": "paco cabezas (latin)", "dob": "1958-07-31", "address": "Cuba", "label": "High Risk", "script": "latin" }, { "name": "Kia Sadeghi (latin)", "dob": "1986-3-21", "address": "Iran", "label": "positive", "script": "latin" }, { "name": "hannah bauer (latin)", "dob": "1994-03-20", "address": "South Sudan", "label": "High Risk", "script": "latin" }, { "name": "Александр Самокутяев (cyrillic)", "dob": "1970-3-13", "address": "Russia", "label": "positive", "script": "cyrillic" }, { "name": "Aleksei Gnedovskii (latin)", "dob": "1964-12-31", "address": "Russia", "label": "positive", "script": "latin" } ], "query_template": "Generate 8 variations of {name} ensuring phonetic similarity (10% Light, 50% Medium, 40% Far) and orthographic similarity (30% Light, 40% Medium, 30% Far). Approximately 57% of the total 8 variations should follow these rule-based transformations: Additionally, generate variations that perform these transformations: Duplicate a random letter, Insert a random letter, and Remove a random consonant. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 8, "phonetic_similarity": { "Light": 0.1, "Medium": 0.5, "Far": 0.4 }, "orthographic_similarity": { "Light": 0.3, "Medium": 0.4, "Far": 0.3 }, "rule_based": { "rule_percentage": 57, "selected_rules": [ "duplicate_random_letter_as_double_letter", "insert_random_letter", "remove_random_consonant" ], "rule_descriptions": { "duplicate_random_letter_as_double_letter": "Duplicate a random letter", "insert_random_letter": "Insert a random letter", "remove_random_consonant": "Remove a random consonant" }, "percentage": 57 } } }, { "seed_identities_with_labels": [ { "name": "éléonore clerc (latin)", "dob": "2001-10-21", "address": "Norfolk (Îles)", "label": "negative", "script": "latin" }, { "name": "Михайло Білоусов (cyrillic)", "dob": "1964-11-26", "address": "Ukraine", "label": "positive", "script": "cyrillic" }, { "name": "samuel abreu (latin)", "dob": "1962-10-12", "address": "Mozambique", "label": "High Risk", "script": "latin" }, { "name": "lucy letellier (latin)", "dob": "1991-04-23", "address": "Territoire britannique de l'océan Indien", "label": "negative", "script": "latin" }, { "name": "julia krause (latin)", "dob": "2002-01-21", "address": "Kenya", "label": "High Risk", "script": "latin" }, { "name": "Chapito ISIDRO (latin)", "dob": "1982-6-19", "address": "Mexico", "label": "positive", "script": "latin" }, { "name": "Rustam Kalimullin (latin)", "dob": "1958-1-2", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "jacques gomez (latin)", "dob": "1957-09-06", "address": "Autriche", "label": "negative", "script": "latin" }, { "name": "نور الحقّ أولاد زيان (arabic)", "dob": "2007-06-13", "address": "Aruba", "label": "negative", "script": "arabic" }, { "name": "Muhammad al-'Anizi (latin)", "dob": "1986-5-26", "address": "Kuwait", "label": "positive", "script": "latin" }, { "name": "claire boucher (latin)", "dob": "1942-03-08", "address": "Côte d'Ivoire", "label": "High Risk", "script": "latin" }, { "name": "laurent schmitt (latin)", "dob": "1964-09-24", "address": "Japon", "label": "negative", "script": "latin" }, { "name": "Vladimir Pospelov (latin)", "dob": "1954-7-21", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "карл жукова (cyrillic)", "dob": "1982-12-14", "address": "Donetsk", "label": "High Risk", "script": "cyrillic" }, { "name": "nathalie loiseau (latin)", "dob": "1927-06-07", "address": "Democratic Republic of Congo", "label": "High Risk", "script": "latin" } ], "query_template": "The following name is the seed name to generate variations for: {name}. Generate 8 variations of the name {name}, ensuring phonetic similarity: 50% Light, 50% Medium, and orthographic similarity: 30% Light, 40% Medium, 30% Far, and also include 17% of variations that follow: Additionally, generate variations that: Convert name to initials. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 8, "phonetic_similarity": { "Light": 0.5, "Medium": 0.5 }, "orthographic_similarity": { "Light": 0.3, "Medium": 0.4, "Far": 0.3 }, "rule_based": { "rule_percentage": 17, "selected_rules": [ "shorten_name_to_initials" ], "rule_descriptions": { "shorten_name_to_initials": "Convert name to initials" }, "percentage": 17 } } }, { "seed_identities_with_labels": [ { "name": "gregory williams (latin)", "dob": "1983-12-04", "address": "United States Virgin Islands", "label": "negative", "script": "latin" }, { "name": "Aleksandr Sarkisyan (latin)", "dob": "1946-8-17", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "james brown (latin)", "dob": "1986-11-11", "address": "Pitcairn Islands", "label": "negative", "script": "latin" }, { "name": "andrea allen (latin)", "dob": "1987-07-12", "address": "South Sudan", "label": "High Risk", "script": "latin" }, { "name": "cristiano almeida (latin)", "dob": "1961-02-16", "address": "Angola", "label": "High Risk", "script": "latin" }, { "name": "Muhammad Al-Sunaydar (latin)", "dob": "1987-3-5", "address": "Yemen", "label": "positive", "script": "latin" }, { "name": "renée étienne (latin)", "dob": "1988-03-04", "address": "Côte d'Ivoire", "label": "High Risk", "script": "latin" }, { "name": "Василий Юрченко (cyrillic)", "dob": "1960-9-26", "address": "Russia", "label": "positive", "script": "cyrillic" }, { "name": "Sergei Kudryashov (latin)", "dob": "1967-7-25", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "jack osborne (latin)", "dob": "1939-03-16", "address": "Australia", "label": "negative", "script": "latin" }, { "name": "аверкий суханов (cyrillic)", "dob": "1943-03-24", "address": "Bangladesh", "label": "negative", "script": "cyrillic" }, { "name": "надежда пахомова (cyrillic)", "dob": "1999-03-16", "address": "Donetsk", "label": "High Risk", "script": "cyrillic" }, { "name": "joseph brown (latin)", "dob": "2001-08-20", "address": "Uzbekistan", "label": "negative", "script": "latin" }, { "name": "Nikolay Arefyev (latin)", "dob": "1949-3-11", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "joséphine pinto (latin)", "dob": "1965-09-09", "address": "Burkina Faso", "label": "High Risk", "script": "latin" } ], "query_template": "Generate 15 variations of {name}, ensuring phonetic similarity (100% Medium) and orthographic similarity (10% Light, 50% Medium, 40% Far). Approximately 47% of the total 15 variations should follow these rule-based transformations: Insert a random letter, and Swap adjacent syllables. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 15, "phonetic_similarity": { "Medium": 1.0 }, "orthographic_similarity": { "Light": 0.1, "Medium": 0.5, "Far": 0.4 }, "rule_based": { "rule_percentage": 47, "selected_rules": [ "insert_random_letter", "swap_adjacent_syllables" ], "rule_descriptions": { "insert_random_letter": "Insert a random letter", "swap_adjacent_syllables": "Swap adjacent syllables" }, "percentage": 47 } } }, { "seed_identities_with_labels": [ { "name": "одарка бабак (cyrillic)", "dob": "1962-06-09", "address": "Nauru", "label": "negative", "script": "cyrillic" }, { "name": "benigno parra (latin)", "dob": "1943-01-22", "address": "Nicaragua", "label": "High Risk", "script": "latin" }, { "name": "laurent lefèvre (latin)", "dob": "1992-03-28", "address": "Maurice", "label": "negative", "script": "latin" }, { "name": "richard ferrand (latin)", "dob": "2000-03-10", "address": "Japon", "label": "negative", "script": "latin" }, { "name": "christophe faivre (latin)", "dob": "1988-07-10", "address": "Cayman (Îles)", "label": "negative", "script": "latin" }, { "name": "jaime martins (latin)", "dob": "1970-03-07", "address": "Mozambique", "label": "High Risk", "script": "latin" }, { "name": "marine boucher (latin)", "dob": "1945-09-27", "address": "Haiti", "label": "High Risk", "script": "latin" }, { "name": "Nikita Samoylenko (latin)", "dob": "1992-8-28", "address": "Ukraine", "label": "positive", "script": "latin" }, { "name": "Тимур Каноков (cyrillic)", "dob": "1972-9-24", "address": "Russia", "label": "positive", "script": "cyrillic" }, { "name": "heather mcfarland (latin)", "dob": "1971-08-20", "address": "Nigeria", "label": "High Risk", "script": "latin" }, { "name": "ماهر شمران (arabic)", "dob": "2003-06-23", "address": "Iraq", "label": "High Risk", "script": "arabic" }, { "name": "Chilli Yuan (latin)", "dob": "1985-5-8", "address": "China", "label": "positive", "script": "latin" }, { "name": "Jebrail Guzel (latin)", "dob": "1993-7-10", "address": "Turkey", "label": "positive", "script": "latin" }, { "name": "louis deschamps (latin)", "dob": "1956-03-06", "address": "Belize", "label": "negative", "script": "latin" }, { "name": "Dmitriy Slobodskoy (latin)", "dob": "1988-7-28", "address": "Russia", "label": "positive", "script": "latin" } ], "query_template": "Generate 11 variations of {name}, ensuring phonetic similarity (10% Light, 50% Medium, 40% Far) and orthographic similarity (20% Light, 60% Medium, 20% Far). Approximately 53% of the total 11 variations should follow these rule-based transformations: \nAbbreviate name parts in 3 variations, Replace spaces with special characters in 4 variations, Swap random adjacent letters in 6 variations. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 11, "phonetic_similarity": { "Light": 0.1, "Medium": 0.5, "Far": 0.4 }, "orthographic_similarity": { "Light": 0.2, "Medium": 0.6, "Far": 0.2 }, "rule_based": { "rule_percentage": 53, "selected_rules": [ "shorten_name_to_abbreviations", "replace_spaces_with_random_special_characters", "swap_random_letter" ], "rule_descriptions": { "shorten_name_to_abbreviations": "Abbreviate name parts", "replace_spaces_with_random_special_characters": "Replace spaces with special characters", "swap_random_letter": "Swap random adjacent letters" }, "percentage": 53 } } }, { "seed_identities_with_labels": [ { "name": "eugène lenoir (latin)", "dob": "1969-07-11", "address": "Croatie", "label": "negative", "script": "latin" }, { "name": "julie fleury (latin)", "dob": "1956-11-19", "address": "Tokelau", "label": "negative", "script": "latin" }, { "name": "virginie tessier (latin)", "dob": "1987-08-17", "address": "Central African Republic", "label": "High Risk", "script": "latin" }, { "name": "Abu-Ahmad Zakkur (latin)", "dob": "1979-1-5", "address": "Turkey", "label": "positive", "script": "latin" }, { "name": "стефан яремків (cyrillic)", "dob": "1947-04-08", "address": "Falkland Islands (Malvinas)", "label": "negative", "script": "cyrillic" }, { "name": "Ciro FERREIRA (latin)", "dob": "1987-8-27", "address": "Brazil", "label": "positive", "script": "latin" }, { "name": "élise henry (latin)", "dob": "1940-11-10", "address": "Liban", "label": "negative", "script": "latin" }, { "name": "steven hudson (latin)", "dob": "1936-08-08", "address": "Kenya", "label": "High Risk", "script": "latin" }, { "name": "rené delaunay (latin)", "dob": "1939-07-30", "address": "Monaco", "label": "High Risk", "script": "latin" }, { "name": "Maulana Ubaidullah (latin)", "dob": "1985-1-31", "address": "Afghanistan", "label": "positive", "script": "latin" }, { "name": "Якуб Закриев (cyrillic)", "dob": "1990-10-16", "address": "Russia", "label": "positive", "script": "cyrillic" }, { "name": "jean guibert (latin)", "dob": "1949-05-24", "address": "Cameroon", "label": "High Risk", "script": "latin" }, { "name": "gabriel picard (latin)", "dob": "1944-06-12", "address": "Dominique", "label": "negative", "script": "latin" }, { "name": "харлампий веселов (cyrillic)", "dob": "2006-05-12", "address": "Belarus", "label": "High Risk", "script": "cyrillic" }, { "name": "Peter Mbaga (latin)", "dob": "1976-9-25", "address": "South Africa", "label": "positive", "script": "latin" } ], "query_template": "The following name is the seed name to generate variations for: {name}. Generate 9 variations of the name {name}, ensuring phonetic similarity: 30% Light, 40% Medium, 30% Far, and orthographic similarity: 10% Light, 30% Medium, 60% Far, and also include 25% of variations that follow: Additionally, generate variations that perform these transformations: Remove all spaces, Add a title suffix (Jr., PhD, etc.), and Remove a random consonant. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 9, "phonetic_similarity": { "Light": 0.3, "Medium": 0.4, "Far": 0.3 }, "orthographic_similarity": { "Light": 0.1, "Medium": 0.3, "Far": 0.6 }, "rule_based": { "rule_percentage": 25, "selected_rules": [ "remove_all_spaces", "add_random_trailing_title", "remove_random_consonant" ], "rule_descriptions": { "remove_all_spaces": "Remove all spaces", "add_random_trailing_title": "Add a title suffix (Jr., PhD, etc.)", "remove_random_consonant": "Remove a random consonant" }, "percentage": 25 } } }, { "seed_identities_with_labels": [ { "name": "Сергей Аземша (cyrillic)", "dob": "1974-7-17", "address": "Belarus", "label": "positive", "script": "cyrillic" }, { "name": "эрнест гришин (cyrillic)", "dob": "1937-08-16", "address": "Armenia", "label": "High Risk", "script": "cyrillic" }, { "name": "andreia esteves (latin)", "dob": "1976-07-26", "address": "Mozambique", "label": "High Risk", "script": "latin" }, { "name": "German Gref (latin)", "dob": "1964-2-8", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "carolina vieira (latin)", "dob": "1943-05-05", "address": "Angola", "label": "High Risk", "script": "latin" }, { "name": "anouk laporte (latin)", "dob": "1956-05-16", "address": "Mali", "label": "High Risk", "script": "latin" }, { "name": "willie caldwell (latin)", "dob": "1934-12-11", "address": "Austria", "label": "negative", "script": "latin" }, { "name": "Igor STRELKOV (latin)", "dob": "1970-12-17", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "لبيد سموم (arabic)", "dob": "1968-11-13", "address": "Myanmar", "label": "negative", "script": "arabic" }, { "name": "Rajabi Al-Zahir (latin)", "dob": "1969-7-5", "address": "United Arab Emirates", "label": "positive", "script": "latin" }, { "name": "allen green (latin)", "dob": "1955-03-04", "address": "Bahamas", "label": "negative", "script": "latin" }, { "name": "gary rich (latin)", "dob": "1973-06-02", "address": "Jersey", "label": "negative", "script": "latin" }, { "name": "sarah walsh (latin)", "dob": "1985-10-30", "address": "South Sudan", "label": "High Risk", "script": "latin" }, { "name": "logan gibson (latin)", "dob": "1947-03-11", "address": "Bangladesh", "label": "negative", "script": "latin" }, { "name": "Yuriy Shevchenko (latin)", "dob": "1966-12-30", "address": "Russia", "label": "positive", "script": "latin" } ], "query_template": "Generate 14 variations of {name}, ensuring phonetic similarity (10% Light, 50% Medium, 40% Far) and orthographic similarity (30% Light, 40% Medium, 30% Far). Approximately 58% of the total 14 variations should follow these rule-based transformations: Additionally, generate variations that perform these transformations: Convert name to initials, Replace random consonants with different consonants, and Swap random adjacent letters. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 14, "phonetic_similarity": { "Light": 0.1, "Medium": 0.5, "Far": 0.4 }, "orthographic_similarity": { "Light": 0.3, "Medium": 0.4, "Far": 0.3 }, "rule_based": { "rule_percentage": 58, "selected_rules": [ "shorten_name_to_initials", "replace_random_consonant_with_random_consonant", "swap_random_letter" ], "rule_descriptions": { "shorten_name_to_initials": "Convert name to initials", "replace_random_consonant_with_random_consonant": "Replace random consonants with different consonants", "swap_random_letter": "Swap random adjacent letters" }, "percentage": 58 } } }, { "seed_identities_with_labels": [ { "name": "Tigran KHACHATUROV (latin)", "dob": "1979-2-7", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "Dmitriy Smirnov (latin)", "dob": "1987-11-10", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "émile jacques (latin)", "dob": "1935-12-20", "address": "Tanzanie", "label": "negative", "script": "latin" }, { "name": "Aleksandr Nikolov (latin)", "dob": "1962-2-19", "address": "Bulgaria", "label": "positive", "script": "latin" }, { "name": "Abdelmalek DERDOUKAL (latin)", "dob": "1970-4-20", "address": "Algeria", "label": "positive", "script": "latin" }, { "name": "эмилия карпова (cyrillic)", "dob": "1955-02-14", "address": "Armenia", "label": "High Risk", "script": "cyrillic" }, { "name": "isaac leduc (latin)", "dob": "2004-01-09", "address": "Tokelau", "label": "negative", "script": "latin" }, { "name": "denise bigot (latin)", "dob": "1941-11-21", "address": "Honduras", "label": "negative", "script": "latin" }, { "name": "gabriela villalonga (latin)", "dob": "1939-05-30", "address": "Venezuela", "label": "High Risk", "script": "latin" }, { "name": "gilles voisin (latin)", "dob": "2002-04-18", "address": "Central African Republic", "label": "High Risk", "script": "latin" }, { "name": "yves martineau (latin)", "dob": "1966-08-22", "address": "Libye", "label": "negative", "script": "latin" }, { "name": "شهير آل مقطة (arabic)", "dob": "1990-10-09", "address": "San Marino", "label": "negative", "script": "arabic" }, { "name": "gérard sanchez (latin)", "dob": "1925-09-27", "address": "Haiti", "label": "High Risk", "script": "latin" }, { "name": "suzanne philippe (latin)", "dob": "1963-09-06", "address": "Haiti", "label": "High Risk", "script": "latin" }, { "name": "Александр Черковский (cyrillic)", "dob": "1972-3-21", "address": "Russia", "label": "positive", "script": "cyrillic" } ], "query_template": "The following name is the seed name to generate variations for: {name}. Generate 13 variations of the name {name}, ensuring phonetic similarity: 100% Far, and orthographic similarity: 30% Light, 40% Medium, 30% Far, and also include 17% of variations that follow: Additionally, generate variations that perform these transformations: Add a title prefix (Mr., Dr., etc.), Insert a random letter, and Remove a random vowel. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 13, "phonetic_similarity": { "Far": 1.0 }, "orthographic_similarity": { "Light": 0.3, "Medium": 0.4, "Far": 0.3 }, "rule_based": { "rule_percentage": 17, "selected_rules": [ "add_random_leading_title", "insert_random_letter", "remove_random_vowel" ], "rule_descriptions": { "add_random_leading_title": "Add a title prefix (Mr., Dr., etc.)", "insert_random_letter": "Insert a random letter", "remove_random_vowel": "Remove a random vowel" }, "percentage": 17 } } }, { "seed_identities_with_labels": [ { "name": "Kan Zaw (latin)", "dob": "1954-10-11", "address": "Burma", "label": "positive", "script": "latin" }, { "name": "sylvia hatfield (latin)", "dob": "1931-02-19", "address": "Cambodia", "label": "negative", "script": "latin" }, { "name": "Dmitry Lelikov (latin)", "dob": "1968-5-9", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "روفيدا حمامي (arabic)", "dob": "1978-06-14", "address": "Guadeloupe", "label": "negative", "script": "arabic" }, { "name": "alexander shah (latin)", "dob": "1982-02-15", "address": "Nigeria", "label": "High Risk", "script": "latin" }, { "name": "Дмитрий Джиникашвили (cyrillic)", "dob": "1987-7-17", "address": "Russia", "label": "positive", "script": "cyrillic" }, { "name": "luísa melo (latin)", "dob": "1973-06-15", "address": "Angola", "label": "High Risk", "script": "latin" }, { "name": "marie campbell (latin)", "dob": "2007-02-27", "address": "South Sudan", "label": "High Risk", "script": "latin" }, { "name": "Abu LLONGGO (latin)", "dob": "1972-5-15", "address": "Philippines", "label": "positive", "script": "latin" }, { "name": "ryan hardy (latin)", "dob": "1973-01-25", "address": "Bouvet Island (Bouvetoya)", "label": "negative", "script": "latin" }, { "name": "lauren morris (latin)", "dob": "1965-04-16", "address": "Bermuda", "label": "negative", "script": "latin" }, { "name": "angela boyd (latin)", "dob": "1986-08-21", "address": "Hungary", "label": "negative", "script": "latin" }, { "name": "лев макарова (cyrillic)", "dob": "1987-11-04", "address": "Donetsk", "label": "High Risk", "script": "cyrillic" }, { "name": "craig harris (latin)", "dob": "1957-06-29", "address": "British Virgin Islands", "label": "High Risk", "script": "latin" }, { "name": "Evgeni Chernet (latin)", "dob": "1946-11-18", "address": "Russia", "label": "positive", "script": "latin" } ], "query_template": "Generate exactly 6 execution vectors for {name}, ensuring phonetic similarity by providing 3 Light and 3 Medium variations in sound-alike names. Also ensure orthographic similarity by generating 2 Light, 2.4 Medium, and 1.6 Far visually similar spellings. Approximately 23% of the total 6 variations should follow these rule-based transformations: Additionally, generate variations that duplicate a random letter in {name}.\n[VALIDATION HINTS]: Phonetic similarity: 50% Light.; Orthographic similarity: 30% Light. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 6, "phonetic_similarity": { "Light": 0.5, "Medium": 0.5 }, "orthographic_similarity": { "Light": 0.3, "Medium": 0.4, "Far": 0.3 }, "rule_based": { "rule_percentage": 23, "selected_rules": [ "duplicate_random_letter_as_double_letter" ], "rule_descriptions": { "duplicate_random_letter_as_double_letter": "Duplicate a random letter" }, "percentage": 23 } } }, { "seed_identities_with_labels": [ { "name": "jose antonio quesada (latin)", "dob": "1956-05-28", "address": "Bolivia", "label": "High Risk", "script": "latin" }, { "name": "cipriano roura (latin)", "dob": "2006-10-05", "address": "Venezuela", "label": "High Risk", "script": "latin" }, { "name": "benjamín andres (latin)", "dob": "1961-11-15", "address": "Uruguay", "label": "negative", "script": "latin" }, { "name": "Oleg Romanenko (latin)", "dob": "1963-10-5", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "jose luis llabrés (latin)", "dob": "1966-08-16", "address": "Nicaragua", "label": "High Risk", "script": "latin" }, { "name": "Pavel Marinychev (latin)", "dob": "1978-10-25", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "hortensia ribas (latin)", "dob": "1991-08-29", "address": "Costa Rica", "label": "negative", "script": "latin" }, { "name": "Alexander Zharov (latin)", "dob": "1964-8-11", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "Максут Шадаев (cyrillic)", "dob": "1979-11-11", "address": "Russia", "label": "positive", "script": "cyrillic" }, { "name": "tatiana vázquez (latin)", "dob": "1996-08-06", "address": "Cuba", "label": "High Risk", "script": "latin" }, { "name": "Kirill SELEZNEV (latin)", "dob": "1974-4-23", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "خلوصي عكاوي (arabic)", "dob": "1959-08-18", "address": "Syria", "label": "High Risk", "script": "arabic" }, { "name": "luna vicens (latin)", "dob": "1997-01-24", "address": "Maldivas", "label": "negative", "script": "latin" }, { "name": "مُعتز مهيار (arabic)", "dob": "1988-08-24", "address": "Cape Verde", "label": "negative", "script": "arabic" }, { "name": "íñigo francisco (latin)", "dob": "1957-10-20", "address": "Sri Lanka", "label": "negative", "script": "latin" } ], "query_template": "Generate 8 variations of {name} ensuring phonetic similarity (30% Light, 40% Medium, 30% Far) and orthographic similarity (20% Light, 60% Medium, 20% Far). Approximately 52% of the total 8 variations should follow these rule-based transformations: Additionally, generate variations that: Swap adjacent consonants. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 8, "phonetic_similarity": { "Light": 0.3, "Medium": 0.4, "Far": 0.3 }, "orthographic_similarity": { "Light": 0.2, "Medium": 0.6, "Far": 0.2 }, "rule_based": { "rule_percentage": 52, "selected_rules": [ "swap_adjacent_consonants" ], "rule_descriptions": { "swap_adjacent_consonants": "Swap adjacent consonants" }, "percentage": 52 } } }, { "seed_identities_with_labels": [ { "name": "rosa esteban (latin)", "dob": "1931-12-09", "address": "Nueva Zelandia", "label": "negative", "script": "latin" }, { "name": "julio alegria (latin)", "dob": "1985-10-15", "address": "República Democrática Popular Lao", "label": "negative", "script": "latin" }, { "name": "honoré carlier (latin)", "dob": "1962-11-08", "address": "Haiti", "label": "High Risk", "script": "latin" }, { "name": "margot morvan (latin)", "dob": "2006-04-27", "address": "Cameroon", "label": "High Risk", "script": "latin" }, { "name": "marguerite guyon (latin)", "dob": "1957-10-28", "address": "Monaco", "label": "High Risk", "script": "latin" }, { "name": "maría manuela puente (latin)", "dob": "1990-08-25", "address": "Noruega", "label": "negative", "script": "latin" }, { "name": "Pavel Marinychev (latin)", "dob": "1978-10-25", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "марко якименко (cyrillic)", "dob": "1960-10-17", "address": "Marshall Islands", "label": "negative", "script": "cyrillic" }, { "name": "Behnam SHAHRYARI (latin)", "dob": "1965-9-30", "address": "Iran", "label": "positive", "script": "latin" }, { "name": "арсений шестаков (cyrillic)", "dob": "1962-10-27", "address": "Armenia", "label": "High Risk", "script": "cyrillic" }, { "name": "Hossein Arani (latin)", "dob": "1964-12-22", "address": "Iran", "label": "positive", "script": "latin" }, { "name": "olegario pou (latin)", "dob": "1974-10-28", "address": "Palau", "label": "negative", "script": "latin" }, { "name": "СЕРГЕЙ ЧЕРГЕЙКО (cyrillic)", "dob": "1986-8-27", "address": "Belarus", "label": "positive", "script": "cyrillic" }, { "name": "Volodymyr Bandura (latin)", "dob": "1990-7-15", "address": "Ukraine", "label": "positive", "script": "latin" }, { "name": "gloria castelló (latin)", "dob": "1941-08-02", "address": "Bolivia", "label": "High Risk", "script": "latin" } ], "query_template": "Generate exactly 12 execution vectors for {name}, ensuring phonetic similarity (20% Light, 60% Medium, 20% Far) and orthographic similarity (100% Medium). Approximately 28% of the total 12 variations should follow these rule-based transformations: Additionally, generate variations that: Replace vowels with similar sounds, Replace consonants with similar sounds, Append common suffixes, Prepend common prefixes, Swap adjacent syllables, Transpose consecutive letters. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 12, "phonetic_similarity": { "Light": 0.2, "Medium": 0.6, "Far": 0.2 }, "orthographic_similarity": { "Medium": 1.0 }, "rule_based": { "rule_percentage": 28, "selected_rules": [ "swap_adjacent_syllables" ], "rule_descriptions": { "swap_adjacent_syllables": "Swap adjacent syllables" }, "percentage": 28 } } }, { "seed_identities_with_labels": [ { "name": "محیا سماوات (arabic)", "dob": "1985-04-17", "address": "Afghanistan", "label": "High Risk", "script": "arabic" }, { "name": "jordán conde (latin)", "dob": "1993-02-04", "address": "España", "label": "negative", "script": "latin" }, { "name": "ashley beck (latin)", "dob": "1964-07-01", "address": "British Virgin Islands", "label": "High Risk", "script": "latin" }, { "name": "лаврентий мамонтов (cyrillic)", "dob": "1979-02-17", "address": "Burundi", "label": "negative", "script": "cyrillic" }, { "name": "Sufian QUMU (latin)", "dob": "1959-6-26", "address": "Libya", "label": "positive", "script": "latin" }, { "name": "belen berrocal (latin)", "dob": "1956-10-20", "address": "Venezuela", "label": "High Risk", "script": "latin" }, { "name": "Hathaiwan WORAWATWICHAI (latin)", "dob": "1956-1-9", "address": "Thailand", "label": "positive", "script": "latin" }, { "name": "maribel peñalver (latin)", "dob": "1973-10-11", "address": "España", "label": "negative", "script": "latin" }, { "name": "Abu-'Ubaydah Al-Agha (latin)", "dob": "1964-5-2", "address": "Saudi Arabia", "label": "positive", "script": "latin" }, { "name": "Дмитрий Белик (cyrillic)", "dob": "1969-10-17", "address": "Russia", "label": "positive", "script": "cyrillic" }, { "name": "hermenegildo aliaga (latin)", "dob": "1952-10-31", "address": "República Checa", "label": "negative", "script": "latin" }, { "name": "candelas carrillo (latin)", "dob": "1965-05-03", "address": "Afganistán", "label": "negative", "script": "latin" }, { "name": "joel márquez (latin)", "dob": "1950-10-15", "address": "Nicaragua", "label": "High Risk", "script": "latin" }, { "name": "Vildan Zinnurov (latin)", "dob": "1964-1-30", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "blake smith (latin)", "dob": "1996-08-31", "address": "South Africa", "label": "High Risk", "script": "latin" } ], "query_template": "Generate 6 variations of {name} ensuring phonetic similarity (30% Light, 40% Medium, 30% Far) and orthographic similarity (50% Light, 50% Medium). Approximately 40% of the total 6 variations should follow these rule-based transformations: \nAdd a title prefix (Mr., Dr., etc.) to {name}, \nInsert a random letter into {name} and replace one instance with an adjacent letter, \nRemove a random vowel from {name}. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 6, "phonetic_similarity": { "Light": 0.3, "Medium": 0.4, "Far": 0.3 }, "orthographic_similarity": { "Light": 0.5, "Medium": 0.5 }, "rule_based": { "rule_percentage": 40, "selected_rules": [ "add_random_leading_title", "insert_random_letter", "remove_random_vowel" ], "rule_descriptions": { "add_random_leading_title": "Add a title prefix (Mr., Dr., etc.)", "insert_random_letter": "Insert a random letter", "remove_random_vowel": "Remove a random vowel" }, "percentage": 40 } } }, { "seed_identities_with_labels": [ { "name": "emily ewing (latin)", "dob": "1926-05-24", "address": "Kenya", "label": "High Risk", "script": "latin" }, { "name": "Igor KORNET (latin)", "dob": "1973-4-29", "address": "Ukraine", "label": "positive", "script": "latin" }, { "name": "андика точева-клопова (cyrillic)", "dob": "1948-10-05", "address": "Nauru", "label": "negative", "script": "cyrillic" }, { "name": "brandy warner (latin)", "dob": "2005-12-29", "address": "Ecuador", "label": "negative", "script": "latin" }, { "name": "krystal evans (latin)", "dob": "1933-03-16", "address": "Mauritius", "label": "negative", "script": "latin" }, { "name": "henri bernier (latin)", "dob": "1977-02-19", "address": "Monaco", "label": "High Risk", "script": "latin" }, { "name": "Mohammad Ansari (latin)", "dob": "1975-11-22", "address": "Syria", "label": "positive", "script": "latin" }, { "name": "Alexey Russkikh (latin)", "dob": "1968-7-17", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "jimmy howe (latin)", "dob": "1990-12-22", "address": "Sao Tome and Principe", "label": "negative", "script": "latin" }, { "name": "tanya williams (latin)", "dob": "1991-04-14", "address": "Bermuda", "label": "negative", "script": "latin" }, { "name": "Reza Ebadzadeh (latin)", "dob": "1964-6-13", "address": "Iran", "label": "positive", "script": "latin" }, { "name": "christopher mccormick (latin)", "dob": "1989-03-20", "address": "South Africa", "label": "High Risk", "script": "latin" }, { "name": "евдоким тетерин (cyrillic)", "dob": "1989-05-02", "address": "Russia", "label": "High Risk", "script": "cyrillic" }, { "name": "جواد غفارحدادی (arabic)", "dob": "1964-9-23", "address": "Iran", "label": "positive", "script": "arabic" }, { "name": "charles holmes (latin)", "dob": "1942-09-09", "address": "Namibia", "label": "High Risk", "script": "latin" } ], "query_template": "Generate 7 variations of {name}, ensuring phonetic similarity (100% Medium) and orthographic similarity (10% Light, 50% Medium, 40% Far). Approximately 58% of the total 7 variations should follow these rule-based transformations: \nReplace spaces with special characters. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 7, "phonetic_similarity": { "Medium": 1.0 }, "orthographic_similarity": { "Light": 0.1, "Medium": 0.5, "Far": 0.4 }, "rule_based": { "rule_percentage": 58, "selected_rules": [ "replace_spaces_with_random_special_characters" ], "rule_descriptions": { "replace_spaces_with_random_special_characters": "Replace spaces with special characters" }, "percentage": 58 } } }, { "seed_identities_with_labels": [ { "name": "نغم الخرافي (arabic)", "dob": "1936-09-12", "address": "Heard Island and McDonald Islands", "label": "negative", "script": "arabic" }, { "name": "douglas lin (latin)", "dob": "1983-10-20", "address": "Kenya", "label": "High Risk", "script": "latin" }, { "name": "alec leach (latin)", "dob": "1964-08-16", "address": "Saint Kitts and Nevis", "label": "negative", "script": "latin" }, { "name": "Viktor Netyksho (latin)", "dob": "1966-9-8", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "théodore joubert (latin)", "dob": "1939-08-28", "address": "Mali", "label": "High Risk", "script": "latin" }, { "name": "jerry glover (latin)", "dob": "1949-05-30", "address": "Vanuatu", "label": "negative", "script": "latin" }, { "name": "nicholas baker (latin)", "dob": "1965-06-17", "address": "Brazil", "label": "negative", "script": "latin" }, { "name": "святополк пахомов (cyrillic)", "dob": "1946-05-07", "address": "Crimea", "label": "High Risk", "script": "cyrillic" }, { "name": "Sufian QUMU (latin)", "dob": "1959-6-26", "address": "Libya", "label": "positive", "script": "latin" }, { "name": "julie york (latin)", "dob": "1976-05-10", "address": "Kenya", "label": "High Risk", "script": "latin" }, { "name": "Malek Ruben (latin)", "dob": "1960-1-1", "address": "South Sudan", "label": "positive", "script": "latin" }, { "name": "richard decker (latin)", "dob": "1966-02-11", "address": "South Africa", "label": "High Risk", "script": "latin" }, { "name": "Touraj Zangeneh (latin)", "dob": "1958-8-5", "address": "Iran", "label": "positive", "script": "latin" }, { "name": "Андрэй Рыбакоў (cyrillic)", "dob": "1976-7-11", "address": "Belarus", "label": "positive", "script": "cyrillic" }, { "name": "hector mccoy (latin)", "dob": "1954-04-29", "address": "Djibouti", "label": "negative", "script": "latin" } ], "query_template": "The following name is the seed name to generate variations for: {name}. Generate 15 variations of the name {name}, ensuring phonetic similarity: 50% Light, 50% Medium, and orthographic similarity: 10% Light, 30% Medium, 60% Far, and also include 24% of variations that follow: Additionally, generate variations that perform these transformations: Use first name initial with last name, and Replace spaces with special characters. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 15, "phonetic_similarity": { "Light": 0.5, "Medium": 0.5 }, "orthographic_similarity": { "Light": 0.1, "Medium": 0.3, "Far": 0.6 }, "rule_based": { "rule_percentage": 24, "selected_rules": [ "initial_only_first_name", "replace_spaces_with_random_special_characters" ], "rule_descriptions": { "initial_only_first_name": "Use first name initial with last name", "replace_spaces_with_random_special_characters": "Replace spaces with special characters" }, "percentage": 24 } } }, { "seed_identities_with_labels": [ { "name": "georges perrot (latin)", "dob": "1945-09-14", "address": "Haiti", "label": "High Risk", "script": "latin" }, { "name": "Abud Kol (latin)", "dob": "1962-2-23", "address": "South Sudan", "label": "positive", "script": "latin" }, { "name": "tatiana henriques (latin)", "dob": "1993-04-20", "address": "Angola", "label": "High Risk", "script": "latin" }, { "name": "eutropio tur (latin)", "dob": "1954-04-04", "address": "Nicaragua", "label": "High Risk", "script": "latin" }, { "name": "rebecca wise (latin)", "dob": "2005-07-02", "address": "British Virgin Islands", "label": "High Risk", "script": "latin" }, { "name": "Aleksey Budnev (latin)", "dob": "1964-7-9", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "Mohammed Matnee (latin)", "dob": "1983-4-15", "address": "Iraq", "label": "positive", "script": "latin" }, { "name": "عفيف الخرافي (arabic)", "dob": "1932-09-04", "address": "Slovakia (Slovak Republic)", "label": "negative", "script": "arabic" }, { "name": "marie lesage (latin)", "dob": "1939-01-06", "address": "Turks et Caïques (Îles)", "label": "negative", "script": "latin" }, { "name": "филарет уварова (cyrillic)", "dob": "1945-01-24", "address": "Armenia", "label": "High Risk", "script": "cyrillic" }, { "name": "Mohammed Saeed (latin)", "dob": "1977-12-4", "address": "United Arab Emirates", "label": "positive", "script": "latin" }, { "name": "Якуб Закриев (cyrillic)", "dob": "1990-10-16", "address": "Russia", "label": "positive", "script": "cyrillic" }, { "name": "zoé brunet (latin)", "dob": "1990-10-28", "address": "Niger", "label": "negative", "script": "latin" }, { "name": "dominique regnier (latin)", "dob": "1971-09-12", "address": "Uruguay", "label": "negative", "script": "latin" }, { "name": "théodore mendès (latin)", "dob": "1943-08-05", "address": "Marshall (Îles)", "label": "negative", "script": "latin" } ], "query_template": "The following name is the seed name to generate variations for: {name}. Generate 15 variations of the name {name}, ensuring phonetic similarity: 30% Light, 40% Medium, 30% Far, and orthographic similarity: 10% Light, 50% Medium, 40% Far, and also include 28% of variations that follow: Additionally, generate variations that: Delete a random letter. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 15, "phonetic_similarity": { "Light": 0.3, "Medium": 0.4, "Far": 0.3 }, "orthographic_similarity": { "Light": 0.1, "Medium": 0.5, "Far": 0.4 }, "rule_based": { "rule_percentage": 28, "selected_rules": [ "delete_random_letter" ], "rule_descriptions": { "delete_random_letter": "Delete a random letter" }, "percentage": 28 } } }, { "seed_identities_with_labels": [ { "name": "betty paul (latin)", "dob": "2002-10-02", "address": "Tuvalu", "label": "negative", "script": "latin" }, { "name": "melissa morris (latin)", "dob": "1946-10-27", "address": "South Sudan", "label": "High Risk", "script": "latin" }, { "name": "luce valentin (latin)", "dob": "1963-10-27", "address": "Central African Republic", "label": "High Risk", "script": "latin" }, { "name": "christopher stewart (latin)", "dob": "1988-03-15", "address": "Togo", "label": "negative", "script": "latin" }, { "name": "hortensia alcolea (latin)", "dob": "1943-03-10", "address": "Cuba", "label": "High Risk", "script": "latin" }, { "name": "نورس المنتفق (arabic)", "dob": "1926-09-04", "address": "Algeria", "label": "High Risk", "script": "arabic" }, { "name": "deborah gill (latin)", "dob": "1996-07-13", "address": "Latvia", "label": "negative", "script": "latin" }, { "name": "Dinar Gilmutdinov (latin)", "dob": "1969-8-10", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "charlotte brunet (latin)", "dob": "1928-02-18", "address": "Mali", "label": "High Risk", "script": "latin" }, { "name": "Александр Жуков (cyrillic)", "dob": "1956-6-1", "address": "Russia", "label": "positive", "script": "cyrillic" }, { "name": "Irina Pankina (latin)", "dob": "1986-3-8", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "shannon todd (latin)", "dob": "1929-11-18", "address": "Oman", "label": "negative", "script": "latin" }, { "name": "Mukhtar Shah (latin)", "dob": "1939-11-8", "address": "Pakistan", "label": "positive", "script": "latin" }, { "name": "نصوح آل معيض (arabic)", "dob": "1978-05-08", "address": "Romania", "label": "negative", "script": "arabic" }, { "name": "Somboon KRAPOOMPORN (latin)", "dob": "1959-5-6", "address": "Thailand", "label": "positive", "script": "latin" } ], "query_template": "The following name is the seed name to generate variations for: {name}. Generate 11 variations of the name {name}, ensuring phonetic similarity: 20% Light, 60% Medium, 20% Far, and orthographic similarity: 100% Medium, and also include 26% of variations that follow: Additionally, generate variations that perform these transformations: Duplicate a random letter, Replace spaces with special characters, and Replace double letters with a single letter. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 11, "phonetic_similarity": { "Light": 0.2, "Medium": 0.6, "Far": 0.2 }, "orthographic_similarity": { "Medium": 1.0 }, "rule_based": { "rule_percentage": 26, "selected_rules": [ "duplicate_random_letter_as_double_letter", "replace_spaces_with_random_special_characters", "replace_double_letters_with_single_letter" ], "rule_descriptions": { "duplicate_random_letter_as_double_letter": "Duplicate a random letter", "replace_spaces_with_random_special_characters": "Replace spaces with special characters", "replace_double_letters_with_single_letter": "Replace double letters with a single letter" }, "percentage": 26 } } }, { "seed_identities_with_labels": [ { "name": "mariana fuentes (latin)", "dob": "1940-01-30", "address": "Finlandia", "label": "negative", "script": "latin" }, { "name": "Рафаэль Марданшин (cyrillic)", "dob": "1961-12-24", "address": "Russia", "label": "positive", "script": "cyrillic" }, { "name": "carmelo arnal (latin)", "dob": "2001-09-24", "address": "San Vicente y las Granadinas", "label": "negative", "script": "latin" }, { "name": "ратибор русакова (cyrillic)", "dob": "1969-05-01", "address": "Bhutan", "label": "negative", "script": "cyrillic" }, { "name": "gertrudis roma (latin)", "dob": "1975-07-08", "address": "Hungría", "label": "negative", "script": "latin" }, { "name": "Nasser Nesr (latin)", "dob": "1963-4-20", "address": "Lebanon", "label": "positive", "script": "latin" }, { "name": "purificación pons (latin)", "dob": "1947-12-11", "address": "Nicaragua", "label": "High Risk", "script": "latin" }, { "name": "'Adnan Yusuf (latin)", "dob": "1956-6-6", "address": "Lebanon", "label": "positive", "script": "latin" }, { "name": "isaac campoy (latin)", "dob": "1992-02-28", "address": "Samoa", "label": "negative", "script": "latin" }, { "name": "cosme cepeda (latin)", "dob": "1979-02-13", "address": "Nicaragua", "label": "High Risk", "script": "latin" }, { "name": "anaïs germain (latin)", "dob": "1986-02-04", "address": "Central African Republic", "label": "High Risk", "script": "latin" }, { "name": "Hassan AYACH (latin)", "dob": "1963-5-1", "address": "Lebanon", "label": "positive", "script": "latin" }, { "name": "ираида шашков (cyrillic)", "dob": "1997-07-26", "address": "Donetsk", "label": "High Risk", "script": "cyrillic" }, { "name": "Igor STRELKOV (latin)", "dob": "1970-12-17", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "todd mcknight (latin)", "dob": "1931-01-13", "address": "South Africa", "label": "High Risk", "script": "latin" } ], "query_template": "The following name is the seed name to generate variations for: {name}. Generate 8 variations of the name {name}, ensuring phonetic similarity: 50% Light, 50% Medium, and orthographic similarity: 20% Light, 60% Medium, 20% Far, and also include 48% of variations that follow: Additionally, generate variations that: Add a title suffix (Jr., PhD, etc.). The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 8, "phonetic_similarity": { "Light": 0.5, "Medium": 0.5 }, "orthographic_similarity": { "Light": 0.2, "Medium": 0.6, "Far": 0.2 }, "rule_based": { "rule_percentage": 48, "selected_rules": [ "add_random_trailing_title" ], "rule_descriptions": { "add_random_trailing_title": "Add a title suffix (Jr., PhD, etc.)" }, "percentage": 48 } } }, { "seed_identities_with_labels": [ { "name": "فاطمه شادروان (arabic)", "dob": "1953-03-21", "address": "Kuwait", "label": "negative", "script": "arabic" }, { "name": "kelsey osborn (latin)", "dob": "1930-06-21", "address": "British Virgin Islands", "label": "High Risk", "script": "latin" }, { "name": "Хазалбек Атабекаў (cyrillic)", "dob": "1967-3-18", "address": "Belarus", "label": "positive", "script": "cyrillic" }, { "name": "renée foucher (latin)", "dob": "1957-05-13", "address": "Mali", "label": "High Risk", "script": "latin" }, { "name": "andrea lee (latin)", "dob": "1938-01-20", "address": "British Virgin Islands", "label": "High Risk", "script": "latin" }, { "name": "Ivan Demchenko (latin)", "dob": "1960-9-27", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "théophile clément (latin)", "dob": "1939-11-22", "address": "Vierges britanniques (Îles)", "label": "negative", "script": "latin" }, { "name": "maryse richard (latin)", "dob": "1940-03-04", "address": "Royaume-Uni", "label": "negative", "script": "latin" }, { "name": "simón bellido (latin)", "dob": "1987-01-21", "address": "Venezuela", "label": "High Risk", "script": "latin" }, { "name": "juliette dos santos (latin)", "dob": "1960-07-04", "address": "Indonésie", "label": "negative", "script": "latin" }, { "name": "Mykola Vorobei (latin)", "dob": "1963-5-4", "address": "Belarus", "label": "positive", "script": "latin" }, { "name": "علاء الموركة (arabic)", "dob": "1969-10-11", "address": "Libya", "label": "High Risk", "script": "arabic" }, { "name": "Sara Manakhimova (latin)", "dob": "1977-10-12", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "Victor Boyarkin (latin)", "dob": "1958-10-12", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "édouard pasquier (latin)", "dob": "1928-02-25", "address": "Géorgie du Sud et Sandwich du Sud (Îles)", "label": "negative", "script": "latin" } ], "query_template": "Generate exactly 10 execution vectors ({name}) for each target identity. Ensuring phonetic similarity with the following distributions: 1 variation Light, 3 variations Medium, and 6 variations Far. Additionally, ensuring orthographic similarity with the following distributions: 2 variations Light, 6 variations Medium, and 2 variations Far. Approximately 13% of the total 10 variations should follow these rule-based transformations: Generate variations that add a title suffix (Jr., PhD, etc.).\n[VALIDATION HINTS]: Phonetic similarity: 10% Light.; Orthographic similarity: 20% Light. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 10, "phonetic_similarity": { "Light": 0.1, "Medium": 0.3, "Far": 0.6 }, "orthographic_similarity": { "Light": 0.2, "Medium": 0.6, "Far": 0.2 }, "rule_based": { "rule_percentage": 13, "selected_rules": [ "add_random_trailing_title" ], "rule_descriptions": { "add_random_trailing_title": "Add a title suffix (Jr., PhD, etc.)" }, "percentage": 13 } } }, { "seed_identities_with_labels": [ { "name": "Oleg Tkach (latin)", "dob": "1967-9-23", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "jessica conner (latin)", "dob": "1973-01-05", "address": "Martinique", "label": "negative", "script": "latin" }, { "name": "flor fernandes (latin)", "dob": "1942-11-07", "address": "Angola", "label": "High Risk", "script": "latin" }, { "name": "федор уваров (cyrillic)", "dob": "1939-01-13", "address": "Armenia", "label": "High Risk", "script": "cyrillic" }, { "name": "jose white (latin)", "dob": "1985-03-26", "address": "Rwanda", "label": "negative", "script": "latin" }, { "name": "jeffrey knox (latin)", "dob": "1965-08-11", "address": "Ireland", "label": "negative", "script": "latin" }, { "name": "mara costa (latin)", "dob": "1974-08-09", "address": "Angola", "label": "High Risk", "script": "latin" }, { "name": "трифон воронова (cyrillic)", "dob": "1954-05-17", "address": "Saudi Arabia", "label": "negative", "script": "cyrillic" }, { "name": "Rafael Bastardo (latin)", "dob": "1978-9-22", "address": "Venezuela", "label": "positive", "script": "latin" }, { "name": "paca nebot (latin)", "dob": "1973-11-07", "address": "Cuba", "label": "High Risk", "script": "latin" }, { "name": "carol silva (latin)", "dob": "1979-12-28", "address": "South Africa", "label": "High Risk", "script": "latin" }, { "name": "deanna reyes (latin)", "dob": "1987-06-07", "address": "Ecuador", "label": "negative", "script": "latin" }, { "name": "Aleksei Simanovskiy (latin)", "dob": "1955-9-6", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "Aleksey Mordashov (latin)", "dob": "1965-9-26", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "Валерий Семёнов (cyrillic)", "dob": "1960-9-16", "address": "Russia", "label": "positive", "script": "cyrillic" } ], "query_template": "Generate 13 variations of {name} ensuring phonetic similarity (30% Light, 40% Medium, 30% Far) and orthographic similarity (30% Light, 40% Medium, 30% Far). Approximately 33% of the total 13 variations should follow these rule-based transformations: Use first name initial with last name. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 13, "phonetic_similarity": { "Light": 0.3, "Medium": 0.4, "Far": 0.3 }, "orthographic_similarity": { "Light": 0.3, "Medium": 0.4, "Far": 0.3 }, "rule_based": { "rule_percentage": 33, "selected_rules": [ "initial_only_first_name" ], "rule_descriptions": { "initial_only_first_name": "Use first name initial with last name" }, "percentage": 33 } } }, { "seed_identities_with_labels": [ { "name": "danielle lagarde (latin)", "dob": "1996-09-16", "address": "Pays-Bas", "label": "negative", "script": "latin" }, { "name": "رامي نهد (arabic)", "dob": "1977-07-03", "address": "Somalia", "label": "High Risk", "script": "arabic" }, { "name": "edu mateu (latin)", "dob": "1943-03-28", "address": "Cuba", "label": "High Risk", "script": "latin" }, { "name": "maurice becker (latin)", "dob": "1955-04-19", "address": "Gibraltar", "label": "negative", "script": "latin" }, { "name": "franck blanchard (latin)", "dob": "1954-12-02", "address": "Libéria", "label": "negative", "script": "latin" }, { "name": "aimé gomez (latin)", "dob": "1954-01-09", "address": "Central African Republic", "label": "High Risk", "script": "latin" }, { "name": "jacqueline vang (latin)", "dob": "1990-11-20", "address": "Kenya", "label": "High Risk", "script": "latin" }, { "name": "شيّق آل مقطة (arabic)", "dob": "1948-11-18", "address": "New Zealand", "label": "negative", "script": "arabic" }, { "name": "donna turner (latin)", "dob": "1962-03-29", "address": "South Africa", "label": "High Risk", "script": "latin" }, { "name": "Alexander Zharov (latin)", "dob": "1964-8-11", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "Abud Kol (latin)", "dob": "1962-2-23", "address": "South Sudan", "label": "positive", "script": "latin" }, { "name": "Zajar KALASHOV (latin)", "dob": "1953-3-20", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "بشير منصور (arabic)", "dob": "1980-2-9", "address": "Lebanon", "label": "positive", "script": "arabic" }, { "name": "alexandrie daniel (latin)", "dob": "1955-12-13", "address": "Finlande", "label": "negative", "script": "latin" }, { "name": "Khalil TRINIDAD (latin)", "dob": "1978-3-20", "address": "Philippines", "label": "positive", "script": "latin" } ], "query_template": "The following name is the seed name to generate variations for: {name}. Generate 14 variations of the name {name}, ensuring phonetic similarity: 50% Light, 50% Medium, and orthographic similarity: 30% Light, 40% Medium, 30% Far, and also include 34% of variations that follow: Additionally, generate variations that perform these transformations: Replace random vowels with different vowels, Reorder name parts, and Replace double letters with a single letter. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 14, "phonetic_similarity": { "Light": 0.5, "Medium": 0.5 }, "orthographic_similarity": { "Light": 0.3, "Medium": 0.4, "Far": 0.3 }, "rule_based": { "rule_percentage": 34, "selected_rules": [ "replace_random_vowel_with_random_vowel", "name_parts_permutations", "replace_double_letters_with_single_letter" ], "rule_descriptions": { "replace_random_vowel_with_random_vowel": "Replace random vowels with different vowels", "name_parts_permutations": "Reorder name parts", "replace_double_letters_with_single_letter": "Replace double letters with a single letter" }, "percentage": 34 } } }, { "seed_identities_with_labels": [ { "name": "tyler cole (latin)", "dob": "1990-02-15", "address": "Turks and Caicos Islands", "label": "negative", "script": "latin" }, { "name": "'Ali al-Shufa (latin)", "dob": "1991-7-25", "address": "Iran", "label": "positive", "script": "latin" }, { "name": "Денис Майданов (cyrillic)", "dob": "1976-2-17", "address": "Russia", "label": "positive", "script": "cyrillic" }, { "name": "Maksim Viktorov (latin)", "dob": "1972-6-22", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "adrian brown (latin)", "dob": "1997-09-29", "address": "Rwanda", "label": "negative", "script": "latin" }, { "name": "justin johnston (latin)", "dob": "2000-08-22", "address": "Jordan", "label": "negative", "script": "latin" }, { "name": "شافع الدليم (arabic)", "dob": "1924-10-09", "address": "Somalia", "label": "High Risk", "script": "arabic" }, { "name": "Tuah Febriwansyah (latin)", "dob": "1968-2-18", "address": "Indonesia", "label": "positive", "script": "latin" }, { "name": "Javier RIVERA (latin)", "dob": "1972-4-20", "address": "Honduras", "label": "positive", "script": "latin" }, { "name": "monique adams (latin)", "dob": "2000-06-29", "address": "Namibia", "label": "High Risk", "script": "latin" }, { "name": "meghan burke (latin)", "dob": "1979-08-26", "address": "Grenada", "label": "negative", "script": "latin" }, { "name": "april curtis (latin)", "dob": "1930-10-19", "address": "South Africa", "label": "High Risk", "script": "latin" }, { "name": "maggie munoz (latin)", "dob": "1925-11-30", "address": "Central African Republic", "label": "High Risk", "script": "latin" }, { "name": "مريم تنزیلی (arabic)", "dob": "1995-12-26", "address": "Bangladesh", "label": "negative", "script": "arabic" }, { "name": "timothée diallo (latin)", "dob": "1991-09-29", "address": "Haiti", "label": "High Risk", "script": "latin" } ], "query_template": "The following name is the seed name to generate variations for: {name}. Generate 9 variations of the name {name}, ensuring phonetic similarity: 10% Light, 50% Medium, 40% Far, and orthographic similarity: 10% Light, 30% Medium, 60% Far, and also include 34% of variations that follow: Additionally, generate variations that: Duplicate a random letter. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 9, "phonetic_similarity": { "Light": 0.1, "Medium": 0.5, "Far": 0.4 }, "orthographic_similarity": { "Light": 0.1, "Medium": 0.3, "Far": 0.6 }, "rule_based": { "rule_percentage": 34, "selected_rules": [ "duplicate_random_letter_as_double_letter" ], "rule_descriptions": { "duplicate_random_letter_as_double_letter": "Duplicate a random letter" }, "percentage": 34 } } }, { "seed_identities_with_labels": [ { "name": "nathalie dufour (latin)", "dob": "1947-08-15", "address": "Paraguay", "label": "negative", "script": "latin" }, { "name": "laura farley (latin)", "dob": "1974-01-18", "address": "British Virgin Islands", "label": "High Risk", "script": "latin" }, { "name": "Yelena Yevtyukhova (latin)", "dob": "1970-8-7", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "heather taylor (latin)", "dob": "1979-12-12", "address": "South Sudan", "label": "High Risk", "script": "latin" }, { "name": "Мансур Солтаев (cyrillic)", "dob": "1978-6-13", "address": "Russia", "label": "positive", "script": "cyrillic" }, { "name": "Arkady Rotenberh (latin)", "dob": "1951-12-15", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "راوي جزار (arabic)", "dob": "1946-09-29", "address": "Christmas Island", "label": "negative", "script": "arabic" }, { "name": "مُتعب قحطان (arabic)", "dob": "2000-12-23", "address": "Lebanon", "label": "High Risk", "script": "arabic" }, { "name": "nicolas george (latin)", "dob": "1973-07-13", "address": "British Virgin Islands", "label": "High Risk", "script": "latin" }, { "name": "Maxime Mocom (latin)", "dob": "1978-12-30", "address": "Central African Republic", "label": "positive", "script": "latin" }, { "name": "alex lebon (latin)", "dob": "1953-10-09", "address": "Haiti", "label": "High Risk", "script": "latin" }, { "name": "Andrei Troshev (latin)", "dob": "1953-4-5", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "michelle chevallier (latin)", "dob": "1960-12-10", "address": "Guam", "label": "negative", "script": "latin" }, { "name": "paul rousseau (latin)", "dob": "1993-03-30", "address": "République Dominicaine", "label": "negative", "script": "latin" }, { "name": "maggie chevallier (latin)", "dob": "1956-05-05", "address": "Malaisie", "label": "negative", "script": "latin" } ], "query_template": "The following name is the seed name to generate variations for: {name}. Generate 15 variations of the name {name}, ensuring phonetic similarity: 50% Light, 50% Medium, and orthographic similarity: 10% Light, 50% Medium, 40% Far, and also include 47% of variations that follow: Additionally, generate variations that perform these transformations: Add a title prefix (Mr., Dr., etc.), and Swap adjacent consonants. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 15, "phonetic_similarity": { "Light": 0.5, "Medium": 0.5 }, "orthographic_similarity": { "Light": 0.1, "Medium": 0.5, "Far": 0.4 }, "rule_based": { "rule_percentage": 47, "selected_rules": [ "add_random_leading_title", "swap_adjacent_consonants" ], "rule_descriptions": { "add_random_leading_title": "Add a title prefix (Mr., Dr., etc.)", "swap_adjacent_consonants": "Swap adjacent consonants" }, "percentage": 47 } } }, { "seed_identities_with_labels": [ { "name": "Faouzi CAN'AN (latin)", "dob": "1943-6-7", "address": "Venezuela", "label": "positive", "script": "latin" }, { "name": "nicole fischer (latin)", "dob": "1970-05-07", "address": "Mali", "label": "High Risk", "script": "latin" }, { "name": "lucas rocher (latin)", "dob": "1957-03-03", "address": "Heard et McDonald (Îles)", "label": "negative", "script": "latin" }, { "name": "Ajmal Rahmani (latin)", "dob": "1982-1-1", "address": "United Arab Emirates", "label": "positive", "script": "latin" }, { "name": "audrey dubois (latin)", "dob": "2005-08-17", "address": "Democratic Republic of Congo", "label": "High Risk", "script": "latin" }, { "name": "verónica ponce (latin)", "dob": "2005-07-06", "address": "Venezuela", "label": "High Risk", "script": "latin" }, { "name": "camille thierry (latin)", "dob": "1953-05-08", "address": "République centrafricaine", "label": "negative", "script": "latin" }, { "name": "Михаил Авдеев (cyrillic)", "dob": "1977-3-6", "address": "Russia", "label": "positive", "script": "cyrillic" }, { "name": "thomas guillou (latin)", "dob": "1951-09-15", "address": "Botswana", "label": "negative", "script": "latin" }, { "name": "Nataliya KHORSHEVA (latin)", "dob": "1972-7-14", "address": "Ukraine", "label": "positive", "script": "latin" }, { "name": "laetitia guyot (latin)", "dob": "2003-05-21", "address": "Géorgie", "label": "negative", "script": "latin" }, { "name": "صدّام بنو الأسمر (arabic)", "dob": "1986-02-02", "address": "Montserrat", "label": "negative", "script": "arabic" }, { "name": "ульян яковлева (cyrillic)", "dob": "1999-05-04", "address": "Belarus", "label": "High Risk", "script": "cyrillic" }, { "name": "Viktor Ignatov (latin)", "dob": "1968-10-15", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "emmanuelle besnard (latin)", "dob": "1977-11-19", "address": "Monaco", "label": "High Risk", "script": "latin" } ], "query_template": "The following name is the seed name to generate variations for: {name}. Generate 15 variations of the name {name}, ensuring phonetic similarity: 30% Light, 40% Medium, 30% Far, and orthographic similarity: 30% Light, 40% Medium, 30% Far, and also include 21% of variations that follow: Additionally, generate variations that: Swap adjacent syllables. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 15, "phonetic_similarity": { "Light": 0.3, "Medium": 0.4, "Far": 0.3 }, "orthographic_similarity": { "Light": 0.3, "Medium": 0.4, "Far": 0.3 }, "rule_based": { "rule_percentage": 21, "selected_rules": [ "swap_adjacent_syllables" ], "rule_descriptions": { "swap_adjacent_syllables": "Swap adjacent syllables" }, "percentage": 21 } } }, { "seed_identities_with_labels": [ { "name": "juliette foucher (latin)", "dob": "1964-06-03", "address": "Democratic Republic of Congo", "label": "High Risk", "script": "latin" }, { "name": "латьо кодуков (cyrillic)", "dob": "1999-08-15", "address": "Bulgaria", "label": "High Risk", "script": "cyrillic" }, { "name": "leire arnaiz (latin)", "dob": "2006-08-14", "address": "Cuba", "label": "High Risk", "script": "latin" }, { "name": "аникита соловьева (cyrillic)", "dob": "1957-06-18", "address": "United States Virgin Islands", "label": "negative", "script": "cyrillic" }, { "name": "manu tena (latin)", "dob": "1979-04-06", "address": "Afganistán", "label": "negative", "script": "latin" }, { "name": "Andrey Lavrishchev (latin)", "dob": "1959-10-12", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "Артем Кирьянов (cyrillic)", "dob": "1977-1-12", "address": "Russia", "label": "positive", "script": "cyrillic" }, { "name": "Alex Schetinin (latin)", "dob": "1987-8-22", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "graciela gomis (latin)", "dob": "1944-08-26", "address": "Bolivia", "label": "High Risk", "script": "latin" }, { "name": "samu galván (latin)", "dob": "1973-09-23", "address": "Chipre", "label": "negative", "script": "latin" }, { "name": "telmo acevedo (latin)", "dob": "1978-04-18", "address": "Saint Kitts y Nevis", "label": "negative", "script": "latin" }, { "name": "heather lowery (latin)", "dob": "1931-01-10", "address": "South Sudan", "label": "High Risk", "script": "latin" }, { "name": "Muhsin al-Zibin (latin)", "dob": "1973-7-1", "address": "Kuwait", "label": "positive", "script": "latin" }, { "name": "rubén donoso (latin)", "dob": "1926-04-22", "address": "Filipinas", "label": "negative", "script": "latin" }, { "name": "Raja Salame (latin)", "dob": "1960-8-15", "address": "Lebanon", "label": "positive", "script": "latin" } ], "query_template": "The following name is the seed name to generate variations for: {name}. Generate 13 variations of the name {name}, ensuring phonetic similarity: 50% Light, 50% Medium, and orthographic similarity: 30% Light, 40% Medium, 30% Far, and also include 13% of variations that follow: Additionally, generate variations that perform these transformations: Abbreviate name parts, Swap adjacent syllables, and Swap random adjacent letters. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 13, "phonetic_similarity": { "Light": 0.5, "Medium": 0.5 }, "orthographic_similarity": { "Light": 0.3, "Medium": 0.4, "Far": 0.3 }, "rule_based": { "rule_percentage": 13, "selected_rules": [ "shorten_name_to_abbreviations", "swap_adjacent_syllables", "swap_random_letter" ], "rule_descriptions": { "shorten_name_to_abbreviations": "Abbreviate name parts", "swap_adjacent_syllables": "Swap adjacent syllables", "swap_random_letter": "Swap random adjacent letters" }, "percentage": 13 } } }, { "seed_identities_with_labels": [ { "name": "Hiran CHARI-APHAPHON (latin)", "dob": "1950-1-3", "address": "Thailand", "label": "positive", "script": "latin" }, { "name": "joanna richardson (latin)", "dob": "1957-02-04", "address": "Namibia", "label": "High Risk", "script": "latin" }, { "name": "Дзмітрый Баскаў (cyrillic)", "dob": "1978-8-25", "address": "Belarus", "label": "positive", "script": "cyrillic" }, { "name": "aitor cruz (latin)", "dob": "1986-12-21", "address": "Bolivia", "label": "High Risk", "script": "latin" }, { "name": "monica wilson (latin)", "dob": "1930-07-04", "address": "Timor-Leste", "label": "negative", "script": "latin" }, { "name": "الینا رسته (arabic)", "dob": "2001-06-07", "address": "Iceland", "label": "negative", "script": "arabic" }, { "name": "victoria brewer (latin)", "dob": "1986-02-22", "address": "Ukraine", "label": "negative", "script": "latin" }, { "name": "Medny Kadyrova (latin)", "dob": "1978-9-7", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "Anatoly Bifov (latin)", "dob": "1963-1-7", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "anthony colon (latin)", "dob": "2006-11-29", "address": "Martinique", "label": "negative", "script": "latin" }, { "name": "georges colin (latin)", "dob": "1945-12-18", "address": "Democratic Republic of Congo", "label": "High Risk", "script": "latin" }, { "name": "Banyar Moe (latin)", "dob": "1947-8-14", "address": "Burma", "label": "positive", "script": "latin" }, { "name": "jack lloyd (latin)", "dob": "1950-03-03", "address": "Saint Helena", "label": "negative", "script": "latin" }, { "name": "ناردين طزيز (arabic)", "dob": "1960-09-24", "address": "Syria", "label": "High Risk", "script": "arabic" }, { "name": "kimberly harris (latin)", "dob": "1954-04-26", "address": "Kenya", "label": "High Risk", "script": "latin" } ], "query_template": "The following name is the seed name to generate variations for: {name}. Generate 11 variations of the name {name}, ensuring phonetic similarity: 50% Light, 50% Medium, and orthographic similarity: 10% Light, 30% Medium, 60% Far, and also include 12% of variations that follow: Additionally, generate variations that perform these transformations: Remove all spaces, Swap adjacent syllables, and Replace random consonants with different consonants. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 11, "phonetic_similarity": { "Light": 0.5, "Medium": 0.5 }, "orthographic_similarity": { "Light": 0.1, "Medium": 0.3, "Far": 0.6 }, "rule_based": { "rule_percentage": 12, "selected_rules": [ "remove_all_spaces", "swap_adjacent_syllables", "replace_random_consonant_with_random_consonant" ], "rule_descriptions": { "remove_all_spaces": "Remove all spaces", "swap_adjacent_syllables": "Swap adjacent syllables", "replace_random_consonant_with_random_consonant": "Replace random consonants with different consonants" }, "percentage": 12 } } }, { "seed_identities_with_labels": [ { "name": "marie bonnin (latin)", "dob": "1998-01-09", "address": "Côte d'Ivoire", "label": "High Risk", "script": "latin" }, { "name": "Vakhtang Gomelauri (latin)", "dob": "1975-12-24", "address": "Georgia", "label": "positive", "script": "latin" }, { "name": "Андрей Горохов (cyrillic)", "dob": "1960-1-13", "address": "Russia", "label": "positive", "script": "cyrillic" }, { "name": "Svetlana Emilianova (latin)", "dob": "1971-10-7", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "Ilyas Umahanov (latin)", "dob": "1957-3-27", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "Zaki Ararawi (latin)", "dob": "1961-5-23", "address": "Turkey", "label": "positive", "script": "latin" }, { "name": "salomón alemany (latin)", "dob": "1993-02-08", "address": "Cuba", "label": "High Risk", "script": "latin" }, { "name": "claude martineau (latin)", "dob": "1943-11-26", "address": "Corée, Sud", "label": "negative", "script": "latin" }, { "name": "arthur fournier (latin)", "dob": "1971-05-30", "address": "Cameroon", "label": "High Risk", "script": "latin" }, { "name": "suzanne reynaud (latin)", "dob": "1991-04-15", "address": "Monaco", "label": "High Risk", "script": "latin" }, { "name": "aurore laroche (latin)", "dob": "1973-07-07", "address": "Andorre", "label": "negative", "script": "latin" }, { "name": "capucine bernier (latin)", "dob": "1963-05-12", "address": "Italie", "label": "negative", "script": "latin" }, { "name": "christophe dupuis (latin)", "dob": "2005-01-01", "address": "Turquie", "label": "negative", "script": "latin" }, { "name": "деспинка иликьов (cyrillic)", "dob": "1978-06-04", "address": "Bahrain", "label": "negative", "script": "cyrillic" }, { "name": "عدوي بارق (arabic)", "dob": "1942-10-20", "address": "Sudan", "label": "High Risk", "script": "arabic" } ], "query_template": "The following name is the seed name to generate variations for: {name}. Generate 7 variations of the name {name}, ensuring phonetic similarity: 30% Light, 40% Medium, 30% Far, and orthographic similarity: 50% Light, 50% Medium, and also include 27% of variations that follow: Additionally, generate variations that perform these transformations: Remove a random vowel, and Use first name initial with last name. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 7, "phonetic_similarity": { "Light": 0.3, "Medium": 0.4, "Far": 0.3 }, "orthographic_similarity": { "Light": 0.5, "Medium": 0.5 }, "rule_based": { "rule_percentage": 27, "selected_rules": [ "remove_random_vowel", "initial_only_first_name" ], "rule_descriptions": { "remove_random_vowel": "Remove a random vowel", "initial_only_first_name": "Use first name initial with last name" }, "percentage": 27 } } }, { "seed_identities_with_labels": [ { "name": "helen gonzalez (latin)", "dob": "2001-12-28", "address": "India", "label": "negative", "script": "latin" }, { "name": "Oleg Khorokhordin (latin)", "dob": "1972-4-3", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "شافع بو مدين (arabic)", "dob": "2007-08-01", "address": "Kazakhstan", "label": "negative", "script": "arabic" }, { "name": "chantal dupuis (latin)", "dob": "1952-02-06", "address": "Mali", "label": "High Risk", "script": "latin" }, { "name": "richard bridges (latin)", "dob": "1934-10-24", "address": "Morocco", "label": "negative", "script": "latin" }, { "name": "david roger (latin)", "dob": "1999-09-05", "address": "Burkina Faso", "label": "High Risk", "script": "latin" }, { "name": "katie nichols (latin)", "dob": "1926-11-03", "address": "Faroe Islands", "label": "negative", "script": "latin" }, { "name": "Владимир Артяков (cyrillic)", "dob": "1959-7-30", "address": "Russia", "label": "positive", "script": "cyrillic" }, { "name": "travis davis (latin)", "dob": "1986-06-25", "address": "Namibia", "label": "High Risk", "script": "latin" }, { "name": "Muhammad Adhiguna (latin)", "dob": "1996-7-30", "address": "Turkey", "label": "positive", "script": "latin" }, { "name": "адриан полякова (cyrillic)", "dob": "1992-04-18", "address": "Russia", "label": "High Risk", "script": "cyrillic" }, { "name": "Anton Cherepennikov (latin)", "dob": "1983-5-7", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "Vinai PICHAYOT (latin)", "dob": "1957-12-1", "address": "Thailand", "label": "positive", "script": "latin" }, { "name": "michael johnson (latin)", "dob": "1995-04-06", "address": "Namibia", "label": "High Risk", "script": "latin" }, { "name": "shawn cherry (latin)", "dob": "1929-02-25", "address": "Gambia", "label": "negative", "script": "latin" } ], "query_template": "The following name is the seed name to generate variations for: {name}. Generate 6 variations of the name {name}, ensuring phonetic similarity: 100% Medium, and orthographic similarity: 20% Light, 60% Medium, 20% Far, and also include 27% of variations that follow: Additionally, generate variations that perform these transformations: Abbreviate name parts, and Replace random consonants with different consonants. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 6, "phonetic_similarity": { "Medium": 1.0 }, "orthographic_similarity": { "Light": 0.2, "Medium": 0.6, "Far": 0.2 }, "rule_based": { "rule_percentage": 27, "selected_rules": [ "shorten_name_to_abbreviations", "replace_random_consonant_with_random_consonant" ], "rule_descriptions": { "shorten_name_to_abbreviations": "Abbreviate name parts", "replace_random_consonant_with_random_consonant": "Replace random consonants with different consonants" }, "percentage": 27 } } }, { "seed_identities_with_labels": [ { "name": "noa rocha (latin)", "dob": "1993-06-19", "address": "Mozambique", "label": "High Risk", "script": "latin" }, { "name": "antoine vaillant (latin)", "dob": "1970-02-11", "address": "Wallis et Futuna (Îles)", "label": "negative", "script": "latin" }, { "name": "charlotte lecoq (latin)", "dob": "1949-05-16", "address": "Slovénie", "label": "negative", "script": "latin" }, { "name": "Abualfazl Nazeri (latin)", "dob": "1969-9-14", "address": "Iran", "label": "positive", "script": "latin" }, { "name": "amber graham (latin)", "dob": "1974-09-10", "address": "South Africa", "label": "High Risk", "script": "latin" }, { "name": "william raynaud (latin)", "dob": "1963-12-17", "address": "Gibraltar", "label": "negative", "script": "latin" }, { "name": "Mykola Vorobei (latin)", "dob": "1963-5-4", "address": "Belarus", "label": "positive", "script": "latin" }, { "name": "yves bousquet (latin)", "dob": "1959-03-11", "address": "Central African Republic", "label": "High Risk", "script": "latin" }, { "name": "سلطان اسعد (arabic)", "dob": "1962-10-31", "address": "Lebanon", "label": "positive", "script": "arabic" }, { "name": "Alexei Oleksin (latin)", "dob": "1966-10-29", "address": "Belarus", "label": "positive", "script": "latin" }, { "name": "evita coronado (latin)", "dob": "1973-04-17", "address": "Venezuela", "label": "High Risk", "script": "latin" }, { "name": "богодар титаренко (cyrillic)", "dob": "1971-12-30", "address": "Marshall Islands", "label": "negative", "script": "cyrillic" }, { "name": "michèle marty (latin)", "dob": "1970-10-16", "address": "Mongolie", "label": "negative", "script": "latin" }, { "name": "سعدي قبيلة هذيل البقوم (arabic)", "dob": "1950-12-13", "address": "Algeria", "label": "High Risk", "script": "arabic" }, { "name": "Ahmed Afraah (latin)", "dob": "1985-8-17", "address": "Maldives", "label": "positive", "script": "latin" } ], "query_template": "The following name is the seed name to generate variations for: {name}. Generate 10 variations of the name {name}, ensuring phonetic similarity: 30% Light, 40% Medium, 30% Far, and orthographic similarity: 30% Light, 40% Medium, 30% Far, and also include 11% of variations that follow: Additionally, generate variations that perform these transformations: Reorder name parts, Replace random vowels with different vowels, and Remove a random vowel. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 10, "phonetic_similarity": { "Light": 0.3, "Medium": 0.4, "Far": 0.3 }, "orthographic_similarity": { "Light": 0.3, "Medium": 0.4, "Far": 0.3 }, "rule_based": { "rule_percentage": 11, "selected_rules": [ "name_parts_permutations", "replace_random_vowel_with_random_vowel", "remove_random_vowel" ], "rule_descriptions": { "name_parts_permutations": "Reorder name parts", "replace_random_vowel_with_random_vowel": "Replace random vowels with different vowels", "remove_random_vowel": "Remove a random vowel" }, "percentage": 11 } } }, { "seed_identities_with_labels": [ { "name": "marcus lane (latin)", "dob": "1932-10-25", "address": "Portugal", "label": "negative", "script": "latin" }, { "name": "sandra patton (latin)", "dob": "1992-10-12", "address": "South Sudan", "label": "High Risk", "script": "latin" }, { "name": "kimberly aguilar (latin)", "dob": "1982-07-14", "address": "Nigeria", "label": "High Risk", "script": "latin" }, { "name": "أناهيد حوسة (arabic)", "dob": "1975-09-07", "address": "Oman", "label": "negative", "script": "arabic" }, { "name": "laura bullock (latin)", "dob": "2001-10-25", "address": "Swaziland", "label": "negative", "script": "latin" }, { "name": "dorita solsona (latin)", "dob": "1937-08-06", "address": "Venezuela", "label": "High Risk", "script": "latin" }, { "name": "شهير قضاعة (arabic)", "dob": "1932-11-14", "address": "Algeria", "label": "High Risk", "script": "arabic" }, { "name": "Vladimir Pavlenko (latin)", "dob": "1962-4-14", "address": "Ukraine", "label": "positive", "script": "latin" }, { "name": "alexandrie hardy (latin)", "dob": "1967-12-21", "address": "Central African Republic", "label": "High Risk", "script": "latin" }, { "name": "Кирилл Царёв (cyrillic)", "dob": "1978-9-25", "address": "Russia", "label": "positive", "script": "cyrillic" }, { "name": "scott taylor (latin)", "dob": "1955-02-03", "address": "Macao", "label": "negative", "script": "latin" }, { "name": "Rady Khabirov (latin)", "dob": "1964-3-20", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "morgan estrada (latin)", "dob": "1939-06-23", "address": "Puerto Rico", "label": "negative", "script": "latin" }, { "name": "Igor Afanasyev (latin)", "dob": "1968-9-7", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "Pye Tayza (latin)", "dob": "1987-1-29", "address": "Burma", "label": "positive", "script": "latin" } ], "query_template": "The following name is the seed name to generate variations for: {name}. Generate 6 variations of the name {name}, ensuring phonetic similarity: 70% Light, 30% Medium, and orthographic similarity: 100% Medium, and also include 42% of variations that follow: Additionally, generate variations that perform these transformations: Replace random consonants with different consonants, and Reorder name parts. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 6, "phonetic_similarity": { "Light": 0.7, "Medium": 0.3 }, "orthographic_similarity": { "Medium": 1.0 }, "rule_based": { "rule_percentage": 42, "selected_rules": [ "replace_random_consonant_with_random_consonant", "name_parts_permutations" ], "rule_descriptions": { "replace_random_consonant_with_random_consonant": "Replace random consonants with different consonants", "name_parts_permutations": "Reorder name parts" }, "percentage": 42 } } }, { "seed_identities_with_labels": [ { "name": "Mohsen NAFTCHI (latin)", "dob": "1988-2-16", "address": "Iran", "label": "positive", "script": "latin" }, { "name": "abilio revilla (latin)", "dob": "1997-01-06", "address": "India", "label": "negative", "script": "latin" }, { "name": "franck leclercq (latin)", "dob": "1991-11-01", "address": "Mali", "label": "High Risk", "script": "latin" }, { "name": "Ihar Kenjuch (latin)", "dob": "1980-1-21", "address": "Belarus", "label": "positive", "script": "latin" }, { "name": "محمود آل معيض (arabic)", "dob": "2002-09-12", "address": "Colombia", "label": "negative", "script": "arabic" }, { "name": "primitivo miró (latin)", "dob": "1971-07-29", "address": "Sri Lanka", "label": "negative", "script": "latin" }, { "name": "custodio ribas (latin)", "dob": "1943-07-05", "address": "Zambia", "label": "negative", "script": "latin" }, { "name": "javier dalton (latin)", "dob": "1941-12-11", "address": "Namibia", "label": "High Risk", "script": "latin" }, { "name": "rufino nicolau (latin)", "dob": "1955-04-21", "address": "Mongolia", "label": "negative", "script": "latin" }, { "name": "Irina Kaverzina (latin)", "dob": "1986-7-18", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "Анна Суровикина (cyrillic)", "dob": "1973-7-4", "address": "Russia", "label": "positive", "script": "cyrillic" }, { "name": "carrie norman (latin)", "dob": "1971-06-08", "address": "Kenya", "label": "High Risk", "script": "latin" }, { "name": "basilio guardia (latin)", "dob": "1991-01-18", "address": "Cuba", "label": "High Risk", "script": "latin" }, { "name": "Andrej Grigor'ev (latin)", "dob": "1963-1-30", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "آتنا کرمانی (arabic)", "dob": "1972-08-28", "address": "Iran", "label": "High Risk", "script": "arabic" } ], "query_template": "The following name is the seed name to generate variations for: {name}. Generate 8 variations of the name {name}, ensuring phonetic similarity: 20% Light, 60% Medium, 20% Far, and orthographic similarity: 10% Light, 50% Medium, 40% Far, and also include 47% of variations that follow: Additionally, generate variations that perform these transformations: Duplicate a random letter, Swap adjacent syllables, and Delete a random letter. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 8, "phonetic_similarity": { "Light": 0.2, "Medium": 0.6, "Far": 0.2 }, "orthographic_similarity": { "Light": 0.1, "Medium": 0.5, "Far": 0.4 }, "rule_based": { "rule_percentage": 47, "selected_rules": [ "duplicate_random_letter_as_double_letter", "swap_adjacent_syllables", "delete_random_letter" ], "rule_descriptions": { "duplicate_random_letter_as_double_letter": "Duplicate a random letter", "swap_adjacent_syllables": "Swap adjacent syllables", "delete_random_letter": "Delete a random letter" }, "percentage": 47 } } }, { "seed_identities_with_labels": [ { "name": "تاج الدّين مزينة (arabic)", "dob": "1935-08-17", "address": "Lebanon", "label": "High Risk", "script": "arabic" }, { "name": "néstor mayol (latin)", "dob": "1950-03-18", "address": "Nicaragua", "label": "High Risk", "script": "latin" }, { "name": "Zhiguang Ou (latin)", "dob": "1961-8-16", "address": "China", "label": "positive", "script": "latin" }, { "name": "Dirk Troendle (latin)", "dob": "1956-3-11", "address": "Germany", "label": "positive", "script": "latin" }, { "name": "paula summers (latin)", "dob": "1971-12-10", "address": "South Sudan", "label": "High Risk", "script": "latin" }, { "name": "mafalda gomes (latin)", "dob": "1968-11-14", "address": "Sudão", "label": "negative", "script": "latin" }, { "name": "Татьяна Егерева (cyrillic)", "dob": "1966-4-20", "address": "Russia", "label": "positive", "script": "cyrillic" }, { "name": "enzo soares (latin)", "dob": "2007-07-10", "address": "Angola", "label": "High Risk", "script": "latin" }, { "name": "ованес иванов (cyrillic)", "dob": "1984-11-30", "address": "Guinea", "label": "negative", "script": "cyrillic" }, { "name": "Natalia Beglova (latin)", "dob": "1955-11-12", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "Amber Sun (latin)", "dob": "1969-3-23", "address": "Taiwan", "label": "positive", "script": "latin" }, { "name": "arthur pruvost (latin)", "dob": "1950-06-03", "address": "Central African Republic", "label": "High Risk", "script": "latin" }, { "name": "rita teixeira (latin)", "dob": "2000-11-22", "address": "Samoa Americana", "label": "negative", "script": "latin" }, { "name": "clara correia (latin)", "dob": "1967-10-09", "address": "Bósnia e Herzegovina", "label": "negative", "script": "latin" }, { "name": "francisca assunção (latin)", "dob": "1926-05-25", "address": "Canadá", "label": "negative", "script": "latin" } ], "query_template": "The following name is the seed name to generate variations for: {name}. Generate 14 variations of the name {name}, ensuring phonetic similarity: 20% Light, 60% Medium, 20% Far, and orthographic similarity: 10% Light, 50% Medium, 40% Far, and also include 30% of variations that follow: Additionally, generate variations that: Replace random consonants with different consonants. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 14, "phonetic_similarity": { "Light": 0.2, "Medium": 0.6, "Far": 0.2 }, "orthographic_similarity": { "Light": 0.1, "Medium": 0.5, "Far": 0.4 }, "rule_based": { "rule_percentage": 30, "selected_rules": [ "replace_random_consonant_with_random_consonant" ], "rule_descriptions": { "replace_random_consonant_with_random_consonant": "Replace random consonants with different consonants" }, "percentage": 30 } } }, { "seed_identities_with_labels": [ { "name": "samantha jackson (latin)", "dob": "1958-11-07", "address": "Congo", "label": "negative", "script": "latin" }, { "name": "david poulain (latin)", "dob": "1935-04-03", "address": "Central African Republic", "label": "High Risk", "script": "latin" }, { "name": "Aleksandr Vetenevich (latin)", "dob": "1976-6-20", "address": "Belarus", "label": "positive", "script": "latin" }, { "name": "steven kim (latin)", "dob": "1953-10-03", "address": "Faroe Islands", "label": "negative", "script": "latin" }, { "name": "susan robinson (latin)", "dob": "1990-09-04", "address": "British Virgin Islands", "label": "High Risk", "script": "latin" }, { "name": "محمدمهدي جلالی (arabic)", "dob": "1979-07-31", "address": "Afghanistan", "label": "High Risk", "script": "arabic" }, { "name": "émilie renard (latin)", "dob": "1947-10-31", "address": "Central African Republic", "label": "High Risk", "script": "latin" }, { "name": "Egor Mozhaev (latin)", "dob": "1982-5-31", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "Viktor Bondarev (latin)", "dob": "1959-12-7", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "veronica frazier (latin)", "dob": "1967-06-12", "address": "Cook Islands", "label": "negative", "script": "latin" }, { "name": "Vladimir Polin (latin)", "dob": "1962-8-10", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "lindsey richardson (latin)", "dob": "1966-02-18", "address": "Antarctica (the territory South of 60 deg S)", "label": "negative", "script": "latin" }, { "name": "joyce hernandez (latin)", "dob": "1950-05-03", "address": "Namibia", "label": "High Risk", "script": "latin" }, { "name": "Генадзь Казакевiч (cyrillic)", "dob": "1975-2-14", "address": "Belarus", "label": "positive", "script": "cyrillic" }, { "name": "омуртаг клатуров (cyrillic)", "dob": "1956-01-28", "address": "Albania", "label": "negative", "script": "cyrillic" } ], "query_template": "The following name is the seed name to generate variations for: {name}. Generate 12 variations of the name {name}, ensuring phonetic similarity: 10% Light, 50% Medium, 40% Far, and orthographic similarity: 30% Light, 40% Medium, 30% Far, and also include 41% of variations that follow: Additionally, generate variations that: Swap adjacent syllables. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 12, "phonetic_similarity": { "Light": 0.1, "Medium": 0.5, "Far": 0.4 }, "orthographic_similarity": { "Light": 0.3, "Medium": 0.4, "Far": 0.3 }, "rule_based": { "rule_percentage": 41, "selected_rules": [ "swap_adjacent_syllables" ], "rule_descriptions": { "swap_adjacent_syllables": "Swap adjacent syllables" }, "percentage": 41 } } }, { "seed_identities_with_labels": [ { "name": "émile peltier (latin)", "dob": "2001-12-31", "address": "Burkina Faso", "label": "High Risk", "script": "latin" }, { "name": "رشدي ازحيمان (arabic)", "dob": "1970-08-01", "address": "Yemen", "label": "High Risk", "script": "arabic" }, { "name": "Min Shi (latin)", "dob": "1979-5-20", "address": "China", "label": "positive", "script": "latin" }, { "name": "Valery Pakhnits (latin)", "dob": "1953-1-22", "address": "Ukraine", "label": "positive", "script": "latin" }, { "name": "hector bayo (latin)", "dob": "1941-11-13", "address": "Venezuela", "label": "High Risk", "script": "latin" }, { "name": "alain hoarau (latin)", "dob": "1964-02-13", "address": "Bahrain", "label": "negative", "script": "latin" }, { "name": "derrick hawkins (latin)", "dob": "1969-10-02", "address": "South Africa", "label": "High Risk", "script": "latin" }, { "name": "Jihad Kansou (latin)", "dob": "1966-2-10", "address": "Lebanon", "label": "positive", "script": "latin" }, { "name": "mireia martín (latin)", "dob": "1989-05-20", "address": "Bolivia", "label": "High Risk", "script": "latin" }, { "name": "lucie michaud (latin)", "dob": "1960-12-29", "address": "Togo", "label": "negative", "script": "latin" }, { "name": "Ivan Demchenko (latin)", "dob": "1960-9-27", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "Алексей Мордашов (cyrillic)", "dob": "1965-9-26", "address": "Russia", "label": "positive", "script": "cyrillic" }, { "name": "афанасий дементьев (cyrillic)", "dob": "1935-02-25", "address": "French Guiana", "label": "negative", "script": "cyrillic" }, { "name": "auguste jacquot (latin)", "dob": "1935-12-23", "address": "Sri Lanka", "label": "negative", "script": "latin" }, { "name": "véronique lemonnier (latin)", "dob": "1942-06-30", "address": "Singapour", "label": "negative", "script": "latin" } ], "query_template": "The following name is the seed name to generate variations for: {name}. Generate 14 variations of the name {name}, ensuring phonetic similarity: 30% Light, 40% Medium, 30% Far, and orthographic similarity: 30% Light, 40% Medium, 30% Far, and also include 11% of variations that follow: Additionally, generate variations that perform these transformations: Remove a random vowel, Delete a random letter, and Remove a random consonant. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 14, "phonetic_similarity": { "Light": 0.3, "Medium": 0.4, "Far": 0.3 }, "orthographic_similarity": { "Light": 0.3, "Medium": 0.4, "Far": 0.3 }, "rule_based": { "rule_percentage": 11, "selected_rules": [ "remove_random_vowel", "delete_random_letter", "remove_random_consonant" ], "rule_descriptions": { "remove_random_vowel": "Remove a random vowel", "delete_random_letter": "Delete a random letter", "remove_random_consonant": "Remove a random consonant" }, "percentage": 11 } } }, { "seed_identities_with_labels": [ { "name": "RASOUL JALILI (latin)", "dob": "1961-8-19", "address": "Iran", "label": "positive", "script": "latin" }, { "name": "deanna cummings (latin)", "dob": "1992-03-25", "address": "British Virgin Islands", "label": "High Risk", "script": "latin" }, { "name": "kelly robles (latin)", "dob": "1933-12-03", "address": "Croatia", "label": "negative", "script": "latin" }, { "name": "‫فراس‬ ‫احمد‬ (arabic)", "dob": "1991-5-14", "address": "South Africa", "label": "positive", "script": "arabic" }, { "name": "Aysen Nikolayev (latin)", "dob": "1972-1-22", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "susan jones (latin)", "dob": "1948-09-19", "address": "Congo", "label": "negative", "script": "latin" }, { "name": "joseph miller (latin)", "dob": "1988-11-30", "address": "Saint Kitts and Nevis", "label": "negative", "script": "latin" }, { "name": "آتنا سلطانی (arabic)", "dob": "1924-12-18", "address": "American Samoa", "label": "negative", "script": "arabic" }, { "name": "melinda martin (latin)", "dob": "1990-12-19", "address": "Wallis and Futuna", "label": "negative", "script": "latin" }, { "name": "Viktor Netyksho (latin)", "dob": "1966-9-8", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "camila melo (latin)", "dob": "1990-02-04", "address": "Mozambique", "label": "High Risk", "script": "latin" }, { "name": "petra gomes (latin)", "dob": "1984-08-15", "address": "Mozambique", "label": "High Risk", "script": "latin" }, { "name": "joseph guillou (latin)", "dob": "1938-10-01", "address": "Côte d'Ivoire", "label": "High Risk", "script": "latin" }, { "name": "Hathaiwan WORAWATWICHAI (latin)", "dob": "1956-1-9", "address": "Thailand", "label": "positive", "script": "latin" }, { "name": "олимпий козлов (cyrillic)", "dob": "1969-09-30", "address": "Crimea", "label": "High Risk", "script": "cyrillic" } ], "query_template": "The following name is the seed name to generate variations for: {name}. Generate 12 variations of the name {name}, ensuring phonetic similarity: 30% Light, 40% Medium, 30% Far, and orthographic similarity: 10% Light, 30% Medium, 60% Far, and also include 18% of variations that follow: Additionally, generate variations that perform these transformations: Replace random consonants with different consonants, Remove a random consonant, and Abbreviate name parts. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 12, "phonetic_similarity": { "Light": 0.3, "Medium": 0.4, "Far": 0.3 }, "orthographic_similarity": { "Light": 0.1, "Medium": 0.3, "Far": 0.6 }, "rule_based": { "rule_percentage": 18, "selected_rules": [ "replace_random_consonant_with_random_consonant", "remove_random_consonant", "shorten_name_to_abbreviations" ], "rule_descriptions": { "replace_random_consonant_with_random_consonant": "Replace random consonants with different consonants", "remove_random_consonant": "Remove a random consonant", "shorten_name_to_abbreviations": "Abbreviate name parts" }, "percentage": 18 } } }, { "seed_identities_with_labels": [ { "name": "barry avery (latin)", "dob": "1944-03-28", "address": "Niue", "label": "negative", "script": "latin" }, { "name": "Zakhar KALASHOV (latin)", "dob": "1953-3-20", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "Владимир Артяков (cyrillic)", "dob": "1959-7-30", "address": "Russia", "label": "positive", "script": "cyrillic" }, { "name": "coral cuadrado (latin)", "dob": "2006-01-12", "address": "Nicaragua", "label": "High Risk", "script": "latin" }, { "name": "Waseem al-Assad (latin)", "dob": "1980-7-18", "address": "Syria", "label": "positive", "script": "latin" }, { "name": "sherry martin (latin)", "dob": "1924-11-28", "address": "Germany", "label": "negative", "script": "latin" }, { "name": "samuel riggs (latin)", "dob": "1952-11-16", "address": "South Sudan", "label": "High Risk", "script": "latin" }, { "name": "wayne horne (latin)", "dob": "1999-04-07", "address": "Norway", "label": "negative", "script": "latin" }, { "name": "allen stanley (latin)", "dob": "1986-04-02", "address": "Turks and Caicos Islands", "label": "negative", "script": "latin" }, { "name": "Olexiy KOSTRUBITSKY (latin)", "dob": "1978-8-24", "address": "Ukraine", "label": "positive", "script": "latin" }, { "name": "светлана шубин (cyrillic)", "dob": "1977-04-27", "address": "Russia", "label": "High Risk", "script": "cyrillic" }, { "name": "Waad Qado (latin)", "dob": "1971-1-1", "address": "Iraq", "label": "positive", "script": "latin" }, { "name": "پارسا قاضی (arabic)", "dob": "1986-07-19", "address": "Uganda", "label": "negative", "script": "arabic" }, { "name": "cheryl mueller (latin)", "dob": "1959-03-03", "address": "Namibia", "label": "High Risk", "script": "latin" }, { "name": "claudine lacroix (latin)", "dob": "1969-03-22", "address": "Central African Republic", "label": "High Risk", "script": "latin" } ], "query_template": "The following name is the seed name to generate variations for: {name}. Generate 15 variations of the name {name}, ensuring phonetic similarity: 20% Light, 60% Medium, 20% Far, and orthographic similarity: 30% Light, 40% Medium, 30% Far, and also include 54% of variations that follow: Additionally, generate variations that perform these transformations: Replace spaces with special characters, Convert name to initials, and Remove all spaces. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 15, "phonetic_similarity": { "Light": 0.2, "Medium": 0.6, "Far": 0.2 }, "orthographic_similarity": { "Light": 0.3, "Medium": 0.4, "Far": 0.3 }, "rule_based": { "rule_percentage": 54, "selected_rules": [ "replace_spaces_with_random_special_characters", "shorten_name_to_initials", "remove_all_spaces" ], "rule_descriptions": { "replace_spaces_with_random_special_characters": "Replace spaces with special characters", "shorten_name_to_initials": "Convert name to initials", "remove_all_spaces": "Remove all spaces" }, "percentage": 54 } } }, { "seed_identities_with_labels": [ { "name": "Olga PLAKSINA (latin)", "dob": "1974-3-3", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "severino cuadrado (latin)", "dob": "1965-07-16", "address": "Kuwait", "label": "negative", "script": "latin" }, { "name": "Bernard Mheshe (latin)", "dob": "1974-10-10", "address": "Congo, Democratic Republic of the", "label": "positive", "script": "latin" }, { "name": "gloria palomo (latin)", "dob": "1930-08-05", "address": "Bhután", "label": "negative", "script": "latin" }, { "name": "Григорий Карасин (cyrillic)", "dob": "1949-8-23", "address": "Russia", "label": "positive", "script": "cyrillic" }, { "name": "lilia múñiz (latin)", "dob": "1947-10-15", "address": "Francia", "label": "negative", "script": "latin" }, { "name": "سوگند صارمی (arabic)", "dob": "1952-11-04", "address": "Iran", "label": "High Risk", "script": "arabic" }, { "name": "Anna Tausent (latin)", "dob": "1990-1-9", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "cécile duhamel (latin)", "dob": "1946-12-24", "address": "Monaco", "label": "High Risk", "script": "latin" }, { "name": "morena gimenez (latin)", "dob": "1965-10-07", "address": "Bhután", "label": "negative", "script": "latin" }, { "name": "габи куртажова (cyrillic)", "dob": "1955-05-12", "address": "Isle of Man", "label": "negative", "script": "cyrillic" }, { "name": "bertrand david (latin)", "dob": "1925-05-20", "address": "Monaco", "label": "High Risk", "script": "latin" }, { "name": "Dmitry Ishchenko (latin)", "dob": "1983-1-24", "address": "Belarus", "label": "positive", "script": "latin" }, { "name": "célina lesage (latin)", "dob": "1955-12-13", "address": "Cameroon", "label": "High Risk", "script": "latin" }, { "name": "genoveva macías (latin)", "dob": "1991-10-03", "address": "Venezuela", "label": "High Risk", "script": "latin" } ], "query_template": "The following name is the seed name to generate variations for: {name}. Generate 14 variations of the name {name}, ensuring phonetic similarity: 20% Light, 60% Medium, 20% Far, and orthographic similarity: 100% Medium, and also include 49% of variations that follow: Additionally, generate variations that perform these transformations: Add a title suffix (Jr., PhD, etc.), and Remove all spaces. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 14, "phonetic_similarity": { "Light": 0.2, "Medium": 0.6, "Far": 0.2 }, "orthographic_similarity": { "Medium": 1.0 }, "rule_based": { "rule_percentage": 49, "selected_rules": [ "add_random_trailing_title", "remove_all_spaces" ], "rule_descriptions": { "add_random_trailing_title": "Add a title suffix (Jr., PhD, etc.)", "remove_all_spaces": "Remove all spaces" }, "percentage": 49 } } }, { "seed_identities_with_labels": [ { "name": "Ivan Yermakov (latin)", "dob": "1986-4-10", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "Hossein Arani (latin)", "dob": "1964-12-22", "address": "Iran", "label": "positive", "script": "latin" }, { "name": "Muhammad Rahmun (latin)", "dob": "1957-4-1", "address": "Syria", "label": "positive", "script": "latin" }, { "name": "santiago cunha (latin)", "dob": "1971-07-18", "address": "Angola", "label": "High Risk", "script": "latin" }, { "name": "rebecca banks (latin)", "dob": "1983-03-12", "address": "Liberia", "label": "negative", "script": "latin" }, { "name": "فوّاز الألجاوي (arabic)", "dob": "1934-05-20", "address": "Yemen", "label": "High Risk", "script": "arabic" }, { "name": "steven stone (latin)", "dob": "1979-10-07", "address": "British Virgin Islands", "label": "High Risk", "script": "latin" }, { "name": "Sergei Kudryashov (latin)", "dob": "1967-7-25", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "lori lindsey (latin)", "dob": "1949-01-23", "address": "Jordan", "label": "negative", "script": "latin" }, { "name": "بهار اشتری (arabic)", "dob": "1971-08-18", "address": "Portugal", "label": "negative", "script": "arabic" }, { "name": "juan peterson (latin)", "dob": "1959-08-27", "address": "Honduras", "label": "negative", "script": "latin" }, { "name": "carrie todd (latin)", "dob": "2000-01-26", "address": "British Virgin Islands", "label": "High Risk", "script": "latin" }, { "name": "حسن دقو (arabic)", "dob": "1985-2-1", "address": "Lebanon", "label": "positive", "script": "arabic" }, { "name": "bruna henriques (latin)", "dob": "1945-03-24", "address": "Angola", "label": "High Risk", "script": "latin" }, { "name": "amanda lewis (latin)", "dob": "2003-05-18", "address": "Burundi", "label": "negative", "script": "latin" } ], "query_template": "The following name is the seed name to generate variations for: {name}. Generate 7 variations of the name {name}, ensuring phonetic similarity: 30% Light, 40% Medium, 30% Far, and orthographic similarity: 70% Light, 30% Medium, and also include 32% of variations that follow: Additionally, generate variations that perform these transformations: Convert name to initials, and Replace double letters with a single letter. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 7, "phonetic_similarity": { "Light": 0.3, "Medium": 0.4, "Far": 0.3 }, "orthographic_similarity": { "Light": 0.7, "Medium": 0.3 }, "rule_based": { "rule_percentage": 32, "selected_rules": [ "shorten_name_to_initials", "replace_double_letters_with_single_letter" ], "rule_descriptions": { "shorten_name_to_initials": "Convert name to initials", "replace_double_letters_with_single_letter": "Replace double letters with a single letter" }, "percentage": 32 } } }, { "seed_identities_with_labels": [ { "name": "Александap Добрић (cyrillic)", "dob": "1991-7-15", "address": "Bosnia and Herzegovina", "label": "positive", "script": "cyrillic" }, { "name": "bridget campos (latin)", "dob": "1980-10-10", "address": "South Africa", "label": "High Risk", "script": "latin" }, { "name": "segismundo enríquez (latin)", "dob": "1962-06-23", "address": "Nicaragua", "label": "High Risk", "script": "latin" }, { "name": "hunter adams (latin)", "dob": "1939-07-03", "address": "Martinique", "label": "negative", "script": "latin" }, { "name": "jacqueline johnson (latin)", "dob": "1938-03-10", "address": "Tokelau", "label": "negative", "script": "latin" }, { "name": "Aleksandr Gaevoi (latin)", "dob": "1986-6-16", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "Sergei Yeliseyev (latin)", "dob": "1971-5-5", "address": "Ukraine", "label": "positive", "script": "latin" }, { "name": "dylan miranda (latin)", "dob": "1988-01-31", "address": "Lesotho", "label": "negative", "script": "latin" }, { "name": "Karina Kadyrova (latin)", "dob": "2000-1-17", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "Aysen Nikolayev (latin)", "dob": "1972-1-22", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "carlos jones (latin)", "dob": "1959-09-25", "address": "Martinique", "label": "negative", "script": "latin" }, { "name": "орина кащенко (cyrillic)", "dob": "1991-01-29", "address": "Bahamas", "label": "negative", "script": "cyrillic" }, { "name": "monique schroeder (latin)", "dob": "1951-01-10", "address": "South Sudan", "label": "High Risk", "script": "latin" }, { "name": "святослав захаров (cyrillic)", "dob": "1972-06-13", "address": "Donetsk", "label": "High Risk", "script": "cyrillic" }, { "name": "arturo reguera (latin)", "dob": "1945-04-15", "address": "Bolivia", "label": "High Risk", "script": "latin" } ], "query_template": "The following name is the seed name to generate variations for: {name}. Generate 14 variations of the name {name}, ensuring phonetic similarity: 10% Light, 30% Medium, 60% Far, and orthographic similarity: 20% Light, 60% Medium, 20% Far, and also include 36% of variations that follow: Additionally, generate variations that perform these transformations: Reorder name parts, Replace spaces with special characters, and Add a title prefix (Mr., Dr., etc.). The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 14, "phonetic_similarity": { "Light": 0.1, "Medium": 0.3, "Far": 0.6 }, "orthographic_similarity": { "Light": 0.2, "Medium": 0.6, "Far": 0.2 }, "rule_based": { "rule_percentage": 36, "selected_rules": [ "name_parts_permutations", "replace_spaces_with_random_special_characters", "add_random_leading_title" ], "rule_descriptions": { "name_parts_permutations": "Reorder name parts", "replace_spaces_with_random_special_characters": "Replace spaces with special characters", "add_random_leading_title": "Add a title prefix (Mr., Dr., etc.)" }, "percentage": 36 } } }, { "seed_identities_with_labels": [ { "name": "Laith Al-Khazali (latin)", "dob": "1975-10-14", "address": "Iraq", "label": "positive", "script": "latin" }, { "name": "cruz franco (latin)", "dob": "2003-05-28", "address": "Italia", "label": "negative", "script": "latin" }, { "name": "مظهر آل سعود (arabic)", "dob": "1991-07-29", "address": "Cambodia", "label": "negative", "script": "arabic" }, { "name": "amy morris (latin)", "dob": "1963-02-21", "address": "Namibia", "label": "High Risk", "script": "latin" }, { "name": "Oleg Romanenko (latin)", "dob": "1963-10-5", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "аскольд устинова (cyrillic)", "dob": "1959-03-21", "address": "Donetsk", "label": "High Risk", "script": "cyrillic" }, { "name": "florina alvarez (latin)", "dob": "1989-10-18", "address": "Eslovaquia", "label": "negative", "script": "latin" }, { "name": "gilbert colin (latin)", "dob": "1971-12-08", "address": "Cameroon", "label": "High Risk", "script": "latin" }, { "name": "gabriel neves (latin)", "dob": "1929-11-16", "address": "Mozambique", "label": "High Risk", "script": "latin" }, { "name": "silvio agustín (latin)", "dob": "1942-01-11", "address": "Zimbabwe", "label": "negative", "script": "latin" }, { "name": "áurea camacho (latin)", "dob": "1941-11-18", "address": "Nicaragua", "label": "High Risk", "script": "latin" }, { "name": "Andrei Rybakov (latin)", "dob": "1976-7-11", "address": "Belarus", "label": "positive", "script": "latin" }, { "name": "Наталья Орлова (cyrillic)", "dob": "1969-8-29", "address": "Russia", "label": "positive", "script": "cyrillic" }, { "name": "matías domínguez (latin)", "dob": "1974-08-15", "address": "República Popular Democrática de Corea", "label": "negative", "script": "latin" }, { "name": "Iyad Makhlouf (latin)", "dob": "1973-1-21", "address": "Syria", "label": "positive", "script": "latin" } ], "query_template": "The following name is the seed name to generate variations for: {name}. Generate 12 variations of the name {name}, ensuring phonetic similarity: 20% Light, 60% Medium, 20% Far, and orthographic similarity: 10% Light, 30% Medium, 60% Far, and also include 22% of variations that follow: Additionally, generate variations that perform these transformations: Add a title suffix (Jr., PhD, etc.), Delete a random letter, and Remove a random consonant. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 12, "phonetic_similarity": { "Light": 0.2, "Medium": 0.6, "Far": 0.2 }, "orthographic_similarity": { "Light": 0.1, "Medium": 0.3, "Far": 0.6 }, "rule_based": { "rule_percentage": 22, "selected_rules": [ "add_random_trailing_title", "delete_random_letter", "remove_random_consonant" ], "rule_descriptions": { "add_random_trailing_title": "Add a title suffix (Jr., PhD, etc.)", "delete_random_letter": "Delete a random letter", "remove_random_consonant": "Remove a random consonant" }, "percentage": 22 } } }, { "seed_identities_with_labels": [ { "name": "Николай Воробей (cyrillic)", "dob": "1963-5-4", "address": "Belarus", "label": "positive", "script": "cyrillic" }, { "name": "alice albert (latin)", "dob": "1983-02-08", "address": "Lettonie", "label": "negative", "script": "latin" }, { "name": "لورين النعنيش (arabic)", "dob": "1938-11-09", "address": "United Kingdom", "label": "negative", "script": "arabic" }, { "name": "chantal gérard (latin)", "dob": "1981-03-10", "address": "Ethiopie", "label": "negative", "script": "latin" }, { "name": "adrien allain (latin)", "dob": "1978-05-29", "address": "Chypre", "label": "negative", "script": "latin" }, { "name": "alexandria guillaume (latin)", "dob": "1989-04-08", "address": "Monaco", "label": "High Risk", "script": "latin" }, { "name": "امیرمهدی صارمی (arabic)", "dob": "1977-11-09", "address": "Afghanistan", "label": "High Risk", "script": "arabic" }, { "name": "Tun Naing (latin)", "dob": "1963-4-30", "address": "Burma", "label": "positive", "script": "latin" }, { "name": "Yunnian GUO (latin)", "dob": "1961-12-5", "address": "China", "label": "positive", "script": "latin" }, { "name": "Alireza Chegha-Marani (latin)", "dob": "1962-8-11", "address": "Iran", "label": "positive", "script": "latin" }, { "name": "marthe brunel (latin)", "dob": "1973-02-21", "address": "Central African Republic", "label": "High Risk", "script": "latin" }, { "name": "christopher allen (latin)", "dob": "1948-10-30", "address": "South Africa", "label": "High Risk", "script": "latin" }, { "name": "Chapo ISIDRO (latin)", "dob": "1982-6-19", "address": "Mexico", "label": "positive", "script": "latin" }, { "name": "susanne aubry (latin)", "dob": "1995-01-15", "address": "Australie", "label": "negative", "script": "latin" }, { "name": "erasmo silva (latin)", "dob": "1952-09-06", "address": "Bolivia", "label": "High Risk", "script": "latin" } ], "query_template": "The following name is the seed name to generate variations for: {name}. Generate 13 variations of the name {name}, ensuring phonetic similarity: 30% Light, 40% Medium, 30% Far, and orthographic similarity: 30% Light, 40% Medium, 30% Far, and also include 22% of variations that follow: Additionally, generate variations that perform these transformations: Replace double letters with a single letter, and Remove a random consonant. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 13, "phonetic_similarity": { "Light": 0.3, "Medium": 0.4, "Far": 0.3 }, "orthographic_similarity": { "Light": 0.3, "Medium": 0.4, "Far": 0.3 }, "rule_based": { "rule_percentage": 22, "selected_rules": [ "replace_double_letters_with_single_letter", "remove_random_consonant" ], "rule_descriptions": { "replace_double_letters_with_single_letter": "Replace double letters with a single letter", "remove_random_consonant": "Remove a random consonant" }, "percentage": 22 } } }, { "seed_identities_with_labels": [ { "name": "margaud lopez (latin)", "dob": "1976-06-03", "address": "Zaïre", "label": "negative", "script": "latin" }, { "name": "Alexander Beglov (latin)", "dob": "1956-5-19", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "Chapito ISIDRO (latin)", "dob": "1982-6-19", "address": "Mexico", "label": "positive", "script": "latin" }, { "name": "alfred bouvet (latin)", "dob": "1979-09-08", "address": "Mali", "label": "High Risk", "script": "latin" }, { "name": "jacques brunel (latin)", "dob": "1986-12-29", "address": "Yougoslavie", "label": "negative", "script": "latin" }, { "name": "luce bodin (latin)", "dob": "2007-09-29", "address": "Lesotho", "label": "negative", "script": "latin" }, { "name": "ірина таран (cyrillic)", "dob": "1981-06-13", "address": "Brunei Darussalam", "label": "negative", "script": "cyrillic" }, { "name": "Iurii Hotsaniuk (latin)", "dob": "1966-7-18", "address": "Ukraine", "label": "positive", "script": "latin" }, { "name": "давыд зыков (cyrillic)", "dob": "1987-04-17", "address": "Armenia", "label": "High Risk", "script": "cyrillic" }, { "name": "michèle renard (latin)", "dob": "1973-11-14", "address": "Democratic Republic of Congo", "label": "High Risk", "script": "latin" }, { "name": "cleto rebollo (latin)", "dob": "1984-10-08", "address": "Venezuela", "label": "High Risk", "script": "latin" }, { "name": "adrienne pereira (latin)", "dob": "1934-05-24", "address": "Svalbard et Jan Mayen (Îles)", "label": "negative", "script": "latin" }, { "name": "aurélie hamel (latin)", "dob": "1958-12-19", "address": "Côte d'Ivoire", "label": "High Risk", "script": "latin" }, { "name": "Алексей СИМАНОВСКИЙ (cyrillic)", "dob": "1955-9-6", "address": "Russia", "label": "positive", "script": "cyrillic" }, { "name": "Aleksey MOZGOVOY (latin)", "dob": "1975-4-3", "address": "Ukraine", "label": "positive", "script": "latin" } ], "query_template": "The following name is the seed name to generate variations for: {name}. Generate 14 variations of the name {name}, ensuring phonetic similarity: 20% Light, 60% Medium, 20% Far, and orthographic similarity: 100% Medium, and also include 14% of variations that follow: Additionally, generate variations that perform these transformations: Reorder name parts, Convert name to initials, and Duplicate a random letter. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 14, "phonetic_similarity": { "Light": 0.2, "Medium": 0.6, "Far": 0.2 }, "orthographic_similarity": { "Medium": 1.0 }, "rule_based": { "rule_percentage": 14, "selected_rules": [ "name_parts_permutations", "shorten_name_to_initials", "duplicate_random_letter_as_double_letter" ], "rule_descriptions": { "name_parts_permutations": "Reorder name parts", "shorten_name_to_initials": "Convert name to initials", "duplicate_random_letter_as_double_letter": "Duplicate a random letter" }, "percentage": 14 } } }, { "seed_identities_with_labels": [ { "name": "joaquim assunção (latin)", "dob": "2003-08-14", "address": "Angola", "label": "High Risk", "script": "latin" }, { "name": "marine morvan (latin)", "dob": "1974-01-30", "address": "Central African Republic", "label": "High Risk", "script": "latin" }, { "name": "Maria Faassen (latin)", "dob": "1985-4-28", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "émilie faivre (latin)", "dob": "1987-02-03", "address": "Cameroon", "label": "High Risk", "script": "latin" }, { "name": "Naser Neser (latin)", "dob": "1963-4-20", "address": "Lebanon", "label": "positive", "script": "latin" }, { "name": "حميد شرف (arabic)", "dob": "1992-07-19", "address": "Iraq", "label": "High Risk", "script": "arabic" }, { "name": "Andrei Trofimov (latin)", "dob": "1972-8-14", "address": "Ukraine", "label": "positive", "script": "latin" }, { "name": "michelle launay (latin)", "dob": "1957-01-25", "address": "Afrique du sud", "label": "negative", "script": "latin" }, { "name": "nathalie buisson (latin)", "dob": "2000-02-13", "address": "Zimbabwe", "label": "negative", "script": "latin" }, { "name": "filipa carvalho (latin)", "dob": "1955-06-19", "address": "Mozambique", "label": "High Risk", "script": "latin" }, { "name": "حلما جلیلی (arabic)", "dob": "1958-04-20", "address": "Hungary", "label": "negative", "script": "arabic" }, { "name": "marcel turpin (latin)", "dob": "1995-10-17", "address": "Érythrée", "label": "negative", "script": "latin" }, { "name": "Umm Layth (latin)", "dob": "1994-5-11", "address": "Syria", "label": "positive", "script": "latin" }, { "name": "Сергей Митин (cyrillic)", "dob": "1951-6-14", "address": "Russia", "label": "positive", "script": "cyrillic" }, { "name": "renée traore (latin)", "dob": "1959-07-04", "address": "Thailande", "label": "negative", "script": "latin" } ], "query_template": "The following name is the seed name to generate variations for: {name}. Generate 9 variations of the name {name}, ensuring phonetic similarity: 20% Light, 60% Medium, 20% Far, and orthographic similarity: 10% Light, 30% Medium, 60% Far, and also include 14% of variations that follow: Additionally, generate variations that perform these transformations: Replace random vowels with different vowels, Swap adjacent consonants, and Delete a random letter. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 9, "phonetic_similarity": { "Light": 0.2, "Medium": 0.6, "Far": 0.2 }, "orthographic_similarity": { "Light": 0.1, "Medium": 0.3, "Far": 0.6 }, "rule_based": { "rule_percentage": 14, "selected_rules": [ "replace_random_vowel_with_random_vowel", "swap_adjacent_consonants", "delete_random_letter" ], "rule_descriptions": { "replace_random_vowel_with_random_vowel": "Replace random vowels with different vowels", "swap_adjacent_consonants": "Swap adjacent consonants", "delete_random_letter": "Delete a random letter" }, "percentage": 14 } } }, { "seed_identities_with_labels": [ { "name": "paulette renaud (latin)", "dob": "1927-06-05", "address": "Côte d'Ivoire", "label": "High Risk", "script": "latin" }, { "name": "kevin wilson (latin)", "dob": "1967-03-13", "address": "Nigeria", "label": "High Risk", "script": "latin" }, { "name": "Aleksander Zhuchkovskiy (latin)", "dob": "1986-9-9", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "Ruslan Sarkisov (latin)", "dob": "1978-12-1", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "وفيق هاشم (arabic)", "dob": "1928-06-07", "address": "Australia", "label": "negative", "script": "arabic" }, { "name": "virginie gauthier (latin)", "dob": "2001-02-26", "address": "Mali", "label": "High Risk", "script": "latin" }, { "name": "élisabeth fleury (latin)", "dob": "1971-08-03", "address": "Cap Vert", "label": "negative", "script": "latin" }, { "name": "Николай Коломейцев (cyrillic)", "dob": "1956-9-1", "address": "Russia", "label": "positive", "script": "cyrillic" }, { "name": "pauline meunier (latin)", "dob": "1950-03-11", "address": "Saint-Kitts et Nevis", "label": "negative", "script": "latin" }, { "name": "Chapito ISIDRO (latin)", "dob": "1982-6-19", "address": "Mexico", "label": "positive", "script": "latin" }, { "name": "dorothée coste (latin)", "dob": "1992-12-01", "address": "Monaco", "label": "High Risk", "script": "latin" }, { "name": "claude laine (latin)", "dob": "2006-02-01", "address": "Vierges britanniques (Îles)", "label": "negative", "script": "latin" }, { "name": "marcel laroche (latin)", "dob": "1968-06-19", "address": "Macau", "label": "negative", "script": "latin" }, { "name": "'Adnan Yusuf (latin)", "dob": "1956-6-6", "address": "Lebanon", "label": "positive", "script": "latin" }, { "name": "مؤيّد ابو السعود (arabic)", "dob": "1988-01-29", "address": "Iraq", "label": "High Risk", "script": "arabic" } ], "query_template": "The following name is the seed name to generate variations for: {name}. Generate 14 variations of the name {name}, ensuring phonetic similarity: 50% Light, 50% Medium, and orthographic similarity: 20% Light, 60% Medium, 20% Far, and also include 42% of variations that follow: Additionally, generate variations that perform these transformations: Insert a random letter, and Replace random vowels with different vowels. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 14, "phonetic_similarity": { "Light": 0.5, "Medium": 0.5 }, "orthographic_similarity": { "Light": 0.2, "Medium": 0.6, "Far": 0.2 }, "rule_based": { "rule_percentage": 42, "selected_rules": [ "insert_random_letter", "replace_random_vowel_with_random_vowel" ], "rule_descriptions": { "insert_random_letter": "Insert a random letter", "replace_random_vowel_with_random_vowel": "Replace random vowels with different vowels" }, "percentage": 42 } } }, { "seed_identities_with_labels": [ { "name": "dora olivares (latin)", "dob": "1964-05-05", "address": "Venezuela", "label": "High Risk", "script": "latin" }, { "name": "علی اصغر کرمانی (arabic)", "dob": "1973-06-16", "address": "Bermuda", "label": "negative", "script": "arabic" }, { "name": "marino parejo (latin)", "dob": "1997-02-03", "address": "India", "label": "negative", "script": "latin" }, { "name": "Kifah Milhem (latin)", "dob": "1961-11-28", "address": "Syria", "label": "positive", "script": "latin" }, { "name": "eduardo batalla (latin)", "dob": "1951-12-23", "address": "Países Bajos", "label": "negative", "script": "latin" }, { "name": "robert bradford (latin)", "dob": "1939-09-18", "address": "South Sudan", "label": "High Risk", "script": "latin" }, { "name": "pascual calvo (latin)", "dob": "1989-10-17", "address": "República Árabe Siria", "label": "negative", "script": "latin" }, { "name": "Юлия Афанасьева (cyrillic)", "dob": "1988-2-24", "address": "Russia", "label": "positive", "script": "cyrillic" }, { "name": "marcelo egea (latin)", "dob": "1998-05-12", "address": "España", "label": "negative", "script": "latin" }, { "name": "варлаам лобанова (cyrillic)", "dob": "1932-10-15", "address": "Armenia", "label": "High Risk", "script": "cyrillic" }, { "name": "Franki Eskeda (latin)", "dob": "1994-10-23", "address": "Mexico", "label": "positive", "script": "latin" }, { "name": "grégoire moreau (latin)", "dob": "1969-10-27", "address": "Democratic Republic of Congo", "label": "High Risk", "script": "latin" }, { "name": "Abbass Asadrouz (latin)", "dob": "1971-3-23", "address": "Iran", "label": "positive", "script": "latin" }, { "name": "malena mariscal (latin)", "dob": "1943-01-22", "address": "Cuba", "label": "High Risk", "script": "latin" }, { "name": "Zelimkhan Mutsoev (latin)", "dob": "1959-10-13", "address": "Russia", "label": "positive", "script": "latin" } ], "query_template": "The following name is the seed name to generate variations for: {name}. Generate 6 variations of the name {name}, ensuring phonetic similarity: 30% Light, 40% Medium, 30% Far, and orthographic similarity: 30% Light, 40% Medium, 30% Far, and also include 23% of variations that follow: Additionally, generate variations that: Replace random vowels with different vowels. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 6, "phonetic_similarity": { "Light": 0.3, "Medium": 0.4, "Far": 0.3 }, "orthographic_similarity": { "Light": 0.3, "Medium": 0.4, "Far": 0.3 }, "rule_based": { "rule_percentage": 23, "selected_rules": [ "replace_random_vowel_with_random_vowel" ], "rule_descriptions": { "replace_random_vowel_with_random_vowel": "Replace random vowels with different vowels" }, "percentage": 23 } } }, { "seed_identities_with_labels": [ { "name": "michael salinas (latin)", "dob": "1988-04-29", "address": "Kenya", "label": "High Risk", "script": "latin" }, { "name": "donald austin (latin)", "dob": "1931-08-06", "address": "El Salvador", "label": "negative", "script": "latin" }, { "name": "eduarda barbosa (latin)", "dob": "1980-09-04", "address": "Mozambique", "label": "High Risk", "script": "latin" }, { "name": "Imre Laszloczki (latin)", "dob": "1961-9-26", "address": "Hungary", "label": "positive", "script": "latin" }, { "name": "barbara fernandez (latin)", "dob": "1954-08-14", "address": "South Sudan", "label": "High Risk", "script": "latin" }, { "name": "Дмитрий КАРАСАВИДИ (cyrillic)", "dob": "1985-7-9", "address": "Russia", "label": "positive", "script": "cyrillic" }, { "name": "Aleksandra Oksenchuk (latin)", "dob": "1992-10-16", "address": "Belarus", "label": "positive", "script": "latin" }, { "name": "آیناز جهانی (arabic)", "dob": "1930-02-14", "address": "Iran", "label": "High Risk", "script": "arabic" }, { "name": "محيا کابلی (arabic)", "dob": "1953-08-28", "address": "Cape Verde", "label": "negative", "script": "arabic" }, { "name": "Boris Komotsky (latin)", "dob": "1956-1-31", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "daniel webb (latin)", "dob": "1992-04-13", "address": "Liechtenstein", "label": "negative", "script": "latin" }, { "name": "Anton Cherepennikov (latin)", "dob": "1983-5-7", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "dawn david (latin)", "dob": "1935-05-30", "address": "Holy See (Vatican City State)", "label": "negative", "script": "latin" }, { "name": "ignacia vizcaíno (latin)", "dob": "1993-10-19", "address": "Bolivia", "label": "High Risk", "script": "latin" }, { "name": "michelle miller (latin)", "dob": "1952-03-28", "address": "Falkland Islands (Malvinas)", "label": "negative", "script": "latin" } ], "query_template": "The following name is the seed name to generate variations for: {name}. Generate 8 variations of the name {name}, ensuring phonetic similarity: 20% Light, 60% Medium, 20% Far, and orthographic similarity: 10% Light, 30% Medium, 60% Far, and also include 15% of variations that follow: Additionally, generate variations that perform these transformations: Convert name to initials, Insert a random letter, and Add a title suffix (Jr., PhD, etc.). The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 8, "phonetic_similarity": { "Light": 0.2, "Medium": 0.6, "Far": 0.2 }, "orthographic_similarity": { "Light": 0.1, "Medium": 0.3, "Far": 0.6 }, "rule_based": { "rule_percentage": 15, "selected_rules": [ "shorten_name_to_initials", "insert_random_letter", "add_random_trailing_title" ], "rule_descriptions": { "shorten_name_to_initials": "Convert name to initials", "insert_random_letter": "Insert a random letter", "add_random_trailing_title": "Add a title suffix (Jr., PhD, etc.)" }, "percentage": 15 } } }, { "seed_identities_with_labels": [ { "name": "barry edwards (latin)", "dob": "1985-02-11", "address": "Isle of Man", "label": "negative", "script": "latin" }, { "name": "astrid simon (latin)", "dob": "1998-02-23", "address": "Mali", "label": "High Risk", "script": "latin" }, { "name": "víctor tirado (latin)", "dob": "1945-03-03", "address": "Cuba", "label": "High Risk", "script": "latin" }, { "name": "зиновий носкова (cyrillic)", "dob": "1928-03-16", "address": "Armenia", "label": "High Risk", "script": "cyrillic" }, { "name": "Дзмітрый Баскаў (cyrillic)", "dob": "1978-8-25", "address": "Belarus", "label": "positive", "script": "cyrillic" }, { "name": "Mohammed Saeed (latin)", "dob": "1977-12-4", "address": "United Arab Emirates", "label": "positive", "script": "latin" }, { "name": "Vadim Shuvalov (latin)", "dob": "1958-2-17", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "ian dunn (latin)", "dob": "2003-05-19", "address": "British Virgin Islands", "label": "High Risk", "script": "latin" }, { "name": "amanda tucker (latin)", "dob": "1979-04-05", "address": "San Marino", "label": "negative", "script": "latin" }, { "name": "Abdul al-Mohammedawi (latin)", "dob": "1968-1-20", "address": "Iraq", "label": "positive", "script": "latin" }, { "name": "sierra dickerson (latin)", "dob": "1995-03-04", "address": "Serbia", "label": "negative", "script": "latin" }, { "name": "joseph clark (latin)", "dob": "1980-02-08", "address": "Guyana", "label": "negative", "script": "latin" }, { "name": "رضا لاچینی (arabic)", "dob": "1929-11-01", "address": "Congo", "label": "negative", "script": "arabic" }, { "name": "Vitaly Likhachev (latin)", "dob": "1964-2-22", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "yasmin rocha (latin)", "dob": "1936-03-28", "address": "Angola", "label": "High Risk", "script": "latin" } ], "query_template": "The following name is the seed name to generate variations for: {name}. Generate 15 variations of the name {name}, ensuring phonetic similarity: 30% Light, 40% Medium, 30% Far, and orthographic similarity: 20% Light, 60% Medium, 20% Far, and also include 38% of variations that follow: Additionally, generate variations that: Convert name to initials. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 15, "phonetic_similarity": { "Light": 0.3, "Medium": 0.4, "Far": 0.3 }, "orthographic_similarity": { "Light": 0.2, "Medium": 0.6, "Far": 0.2 }, "rule_based": { "rule_percentage": 38, "selected_rules": [ "shorten_name_to_initials" ], "rule_descriptions": { "shorten_name_to_initials": "Convert name to initials" }, "percentage": 38 } } }, { "seed_identities_with_labels": [ { "name": "станимир носков (cyrillic)", "dob": "1926-05-21", "address": "Crimea", "label": "High Risk", "script": "cyrillic" }, { "name": "Khalil TRINIDAD (latin)", "dob": "1978-3-20", "address": "Philippines", "label": "positive", "script": "latin" }, { "name": "مُنجد ترابين (arabic)", "dob": "1988-03-01", "address": "Kazakhstan", "label": "negative", "script": "arabic" }, { "name": "aimée laroche (latin)", "dob": "1983-12-29", "address": "Ghana", "label": "negative", "script": "latin" }, { "name": "jeannine ledoux (latin)", "dob": "1947-09-14", "address": "Cameroon", "label": "High Risk", "script": "latin" }, { "name": "Александр Ветеневич (cyrillic)", "dob": "1976-6-20", "address": "Belarus", "label": "positive", "script": "cyrillic" }, { "name": "pénélope gonzalez (latin)", "dob": "1925-04-02", "address": "Oman", "label": "negative", "script": "latin" }, { "name": "Qari Amjad (latin)", "dob": "1979-4-17", "address": "Afghanistan", "label": "positive", "script": "latin" }, { "name": "ileana casanova (latin)", "dob": "1978-07-08", "address": "Bolivia", "label": "High Risk", "script": "latin" }, { "name": "clémence denis (latin)", "dob": "1996-04-27", "address": "Cameroon", "label": "High Risk", "script": "latin" }, { "name": "Vadim Shuvalov (latin)", "dob": "1958-2-17", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "adrien gauthier (latin)", "dob": "1961-06-04", "address": "Democratic Republic of Congo", "label": "High Risk", "script": "latin" }, { "name": "amélie brunel (latin)", "dob": "1949-12-07", "address": "Tokelau", "label": "negative", "script": "latin" }, { "name": "luc maréchal (latin)", "dob": "1938-05-23", "address": "Réunion (La)", "label": "negative", "script": "latin" }, { "name": "Myo'ng-hun Ri (latin)", "dob": "1969-3-14", "address": "Korea, North", "label": "positive", "script": "latin" } ], "query_template": "The following name is the seed name to generate variations for: {name}. Generate 9 variations of the name {name}, ensuring phonetic similarity: 50% Light, 50% Medium, and orthographic similarity: 10% Light, 30% Medium, 60% Far, and also include 35% of variations that follow: Additionally, generate variations that perform these transformations: Convert name to initials, Replace spaces with special characters, and Swap random adjacent letters. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 9, "phonetic_similarity": { "Light": 0.5, "Medium": 0.5 }, "orthographic_similarity": { "Light": 0.1, "Medium": 0.3, "Far": 0.6 }, "rule_based": { "rule_percentage": 35, "selected_rules": [ "shorten_name_to_initials", "replace_spaces_with_random_special_characters", "swap_random_letter" ], "rule_descriptions": { "shorten_name_to_initials": "Convert name to initials", "replace_spaces_with_random_special_characters": "Replace spaces with special characters", "swap_random_letter": "Swap random adjacent letters" }, "percentage": 35 } } }, { "seed_identities_with_labels": [ { "name": "Aleksei Chekunkov (latin)", "dob": "1980-10-3", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "Igor STRELKOV (latin)", "dob": "1970-12-17", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "catherine duval (latin)", "dob": "1984-01-28", "address": "Cap Vert", "label": "negative", "script": "latin" }, { "name": "雅琴 吴 (chinese)", "dob": "1992-11-13", "address": "China", "label": "positive", "script": "chinese" }, { "name": "isabelle prévost (latin)", "dob": "1969-10-23", "address": "Cameroon", "label": "High Risk", "script": "latin" }, { "name": "Olga PLAKSINA (latin)", "dob": "1974-3-3", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "véronique carre (latin)", "dob": "1981-04-04", "address": "Côte d'Ivoire", "label": "High Risk", "script": "latin" }, { "name": "étienne denis (latin)", "dob": "1953-08-11", "address": "Monaco", "label": "High Risk", "script": "latin" }, { "name": "marcelle millet (latin)", "dob": "1940-03-10", "address": "Burkina Faso", "label": "High Risk", "script": "latin" }, { "name": "чеслав ильин (cyrillic)", "dob": "1986-07-25", "address": "Russia", "label": "High Risk", "script": "cyrillic" }, { "name": "eugène guichard (latin)", "dob": "1950-01-12", "address": "Suisse", "label": "negative", "script": "latin" }, { "name": "frédérique masson (latin)", "dob": "1992-09-30", "address": "Guatemala", "label": "negative", "script": "latin" }, { "name": "alexandre delahaye (latin)", "dob": "2005-07-18", "address": "Chypre", "label": "negative", "script": "latin" }, { "name": "Neil Tsang (latin)", "dob": "1957-10-20", "address": "Taiwan", "label": "positive", "script": "latin" }, { "name": "спиридон горшкова (cyrillic)", "dob": "1980-07-09", "address": "Dominica", "label": "negative", "script": "cyrillic" } ], "query_template": "The following name is the seed name to generate variations for: {name}. Generate 12 variations of the name {name}, ensuring phonetic similarity: 30% Light, 40% Medium, 30% Far, and orthographic similarity: 10% Light, 30% Medium, 60% Far, and also include 55% of variations that follow: Additionally, generate variations that perform these transformations: Delete a random letter, and Remove a random vowel. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 12, "phonetic_similarity": { "Light": 0.3, "Medium": 0.4, "Far": 0.3 }, "orthographic_similarity": { "Light": 0.1, "Medium": 0.3, "Far": 0.6 }, "rule_based": { "rule_percentage": 55, "selected_rules": [ "delete_random_letter", "remove_random_vowel" ], "rule_descriptions": { "delete_random_letter": "Delete a random letter", "remove_random_vowel": "Remove a random vowel" }, "percentage": 55 } } }, { "seed_identities_with_labels": [ { "name": "Aleksei MOZHOVYY (latin)", "dob": "1975-4-3", "address": "Ukraine", "label": "positive", "script": "latin" }, { "name": "margot lefèvre (latin)", "dob": "1940-11-12", "address": "Democratic Republic of Congo", "label": "High Risk", "script": "latin" }, { "name": "antoinette thierry (latin)", "dob": "1954-05-26", "address": "Cameroon", "label": "High Risk", "script": "latin" }, { "name": "blanca echevarría (latin)", "dob": "1983-12-20", "address": "Nicaragua", "label": "High Risk", "script": "latin" }, { "name": "cheryl powell (latin)", "dob": "1992-10-20", "address": "Saint Lucia", "label": "negative", "script": "latin" }, { "name": "matthew hawkins (latin)", "dob": "1977-05-28", "address": "Cook Islands", "label": "negative", "script": "latin" }, { "name": "първолетка чуков (cyrillic)", "dob": "1990-09-22", "address": "United Kingdom", "label": "negative", "script": "cyrillic" }, { "name": "يونس البلوطي (arabic)", "dob": "1995-1-4", "address": "United Arab Emirates", "label": "positive", "script": "arabic" }, { "name": "Mukhtar Shah (latin)", "dob": "1939-11-8", "address": "Pakistan", "label": "positive", "script": "latin" }, { "name": "лукьян сергеев (cyrillic)", "dob": "1967-04-04", "address": "Donetsk", "label": "High Risk", "script": "cyrillic" }, { "name": "Alexei Rakhmanov (latin)", "dob": "1964-7-18", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "'Ali Sharara (latin)", "dob": "1968-9-25", "address": "Lebanon", "label": "positive", "script": "latin" }, { "name": "jennifer johnson (latin)", "dob": "1996-04-01", "address": "Colombia", "label": "negative", "script": "latin" }, { "name": "gordon smith (latin)", "dob": "1955-06-25", "address": "Namibia", "label": "High Risk", "script": "latin" }, { "name": "kelsey henry (latin)", "dob": "1965-10-20", "address": "Tanzania", "label": "negative", "script": "latin" } ], "query_template": "The following name is the seed name to generate variations for: {name}. Generate 6 variations of the name {name}, ensuring phonetic similarity: 100% Far, and orthographic similarity: 100% Medium, and also include 32% of variations that follow: Additionally, generate variations that perform these transformations: Remove all spaces, and Convert name to initials. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 6, "phonetic_similarity": { "Far": 1.0 }, "orthographic_similarity": { "Medium": 1.0 }, "rule_based": { "rule_percentage": 32, "selected_rules": [ "remove_all_spaces", "shorten_name_to_initials" ], "rule_descriptions": { "remove_all_spaces": "Remove all spaces", "shorten_name_to_initials": "Convert name to initials" }, "percentage": 32 } } }, { "seed_identities_with_labels": [ { "name": "adrienne jean (latin)", "dob": "1951-05-15", "address": "Monaco", "label": "High Risk", "script": "latin" }, { "name": "осип степанов (cyrillic)", "dob": "1975-01-13", "address": "Russia", "label": "High Risk", "script": "cyrillic" }, { "name": "ariana ribeiro (latin)", "dob": "1947-02-10", "address": "Estónia", "label": "negative", "script": "latin" }, { "name": "Denis Degtyarenko (latin)", "dob": "1989-10-9", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "luís moura (latin)", "dob": "1958-03-08", "address": "Paraguai", "label": "negative", "script": "latin" }, { "name": "gaspar valente (latin)", "dob": "1991-02-07", "address": "Mozambique", "label": "High Risk", "script": "latin" }, { "name": "Bushra Shawkat (latin)", "dob": "1960-10-24", "address": "United Arab Emirates", "label": "positive", "script": "latin" }, { "name": "Yevgeniya Podgornova (latin)", "dob": "1980-7-16", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "冬冬 赵 (chinese)", "dob": "1990-2-4", "address": "China", "label": "positive", "script": "chinese" }, { "name": "diogo faria (latin)", "dob": "1993-09-29", "address": "Timor Leste", "label": "negative", "script": "latin" }, { "name": "mariana cruz (latin)", "dob": "1953-02-09", "address": "Cabo Verde", "label": "negative", "script": "latin" }, { "name": "محمد فرجی (arabic)", "dob": "1969-02-20", "address": "Heard Island and McDonald Islands", "label": "negative", "script": "arabic" }, { "name": "marc potier (latin)", "dob": "1935-12-21", "address": "Mali", "label": "High Risk", "script": "latin" }, { "name": "claudine letellier (latin)", "dob": "1994-11-18", "address": "Cameroon", "label": "High Risk", "script": "latin" }, { "name": "Anton Cherepennikov (latin)", "dob": "1983-5-7", "address": "Russia", "label": "positive", "script": "latin" } ], "query_template": "The following name is the seed name to generate variations for: {name}. Generate 12 variations of the name {name}, ensuring phonetic similarity: 20% Light, 60% Medium, 20% Far, and orthographic similarity: 100% Far, and also include 12% of variations that follow: Additionally, generate variations that perform these transformations: Swap adjacent syllables, and Remove all spaces. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 12, "phonetic_similarity": { "Light": 0.2, "Medium": 0.6, "Far": 0.2 }, "orthographic_similarity": { "Far": 1.0 }, "rule_based": { "rule_percentage": 12, "selected_rules": [ "swap_adjacent_syllables", "remove_all_spaces" ], "rule_descriptions": { "swap_adjacent_syllables": "Swap adjacent syllables", "remove_all_spaces": "Remove all spaces" }, "percentage": 12 } } }, { "seed_identities_with_labels": [ { "name": "Leonid Kalashnikov (latin)", "dob": "1960-8-6", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "ناصر العلي (arabic)", "dob": "1961-2-1", "address": "Syria", "label": "positive", "script": "arabic" }, { "name": "tiburcio morán (latin)", "dob": "1993-10-05", "address": "Brasil", "label": "negative", "script": "latin" }, { "name": "aimée perrot (latin)", "dob": "1957-03-09", "address": "Monaco", "label": "High Risk", "script": "latin" }, { "name": "مزهر عويضة (arabic)", "dob": "1969-08-29", "address": "Wallis and Futuna", "label": "negative", "script": "arabic" }, { "name": "оксана галкин (cyrillic)", "dob": "2004-01-07", "address": "Crimea", "label": "High Risk", "script": "cyrillic" }, { "name": "anabel hernandez (latin)", "dob": "1925-12-03", "address": "Ecuador", "label": "negative", "script": "latin" }, { "name": "Asadollah Seify (latin)", "dob": "1965-4-4", "address": "Iran", "label": "positive", "script": "latin" }, { "name": "kristin mckenzie (latin)", "dob": "1995-03-22", "address": "South Sudan", "label": "High Risk", "script": "latin" }, { "name": "Sergei Arenin (latin)", "dob": "1958-8-29", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "adriana barriga (latin)", "dob": "1998-07-19", "address": "Uzbekistán", "label": "negative", "script": "latin" }, { "name": "nádia carvalho (latin)", "dob": "1942-12-04", "address": "Angola", "label": "High Risk", "script": "latin" }, { "name": "consuela méndez (latin)", "dob": "1998-09-10", "address": "Egipto", "label": "negative", "script": "latin" }, { "name": "haydée hernández (latin)", "dob": "1967-03-03", "address": "Nicaragua", "label": "High Risk", "script": "latin" }, { "name": "Bair Zhamsuyev (latin)", "dob": "1959-1-29", "address": "Russia", "label": "positive", "script": "latin" } ], "query_template": "The following name is the seed name to generate variations for: {name}. Generate 7 variations of the name {name}, ensuring phonetic similarity: 70% Light, 30% Medium, and orthographic similarity: 100% Medium, and also include 58% of variations that follow: Additionally, generate variations that: Duplicate a random letter. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 7, "phonetic_similarity": { "Light": 0.7, "Medium": 0.3 }, "orthographic_similarity": { "Medium": 1.0 }, "rule_based": { "rule_percentage": 58, "selected_rules": [ "duplicate_random_letter_as_double_letter" ], "rule_descriptions": { "duplicate_random_letter_as_double_letter": "Duplicate a random letter" }, "percentage": 58 } } }, { "seed_identities_with_labels": [ { "name": "Maxime Mocom (latin)", "dob": "1978-12-30", "address": "Central African Republic", "label": "positive", "script": "latin" }, { "name": "Hataiwan WORAWATVICHAI (latin)", "dob": "1956-1-9", "address": "Thailand", "label": "positive", "script": "latin" }, { "name": "antoine lemaire (latin)", "dob": "1984-03-15", "address": "Polynésie française", "label": "negative", "script": "latin" }, { "name": "gregory james (latin)", "dob": "1935-03-27", "address": "Kenya", "label": "High Risk", "script": "latin" }, { "name": "тарас жарко (cyrillic)", "dob": "1955-03-11", "address": "Zambia", "label": "negative", "script": "cyrillic" }, { "name": "Olimxon Ismailov (latin)", "dob": "1996-10-4", "address": "Uzbekistan", "label": "positive", "script": "latin" }, { "name": "makayla richards (latin)", "dob": "1937-05-19", "address": "British Virgin Islands", "label": "High Risk", "script": "latin" }, { "name": "valérie robert (latin)", "dob": "1979-01-25", "address": "Mongolie", "label": "negative", "script": "latin" }, { "name": "marcelle meyer (latin)", "dob": "1952-01-17", "address": "Haiti", "label": "High Risk", "script": "latin" }, { "name": "margot léger (latin)", "dob": "2006-07-13", "address": "Îles Mineures Éloignées des États-Unis", "label": "negative", "script": "latin" }, { "name": "adélaïde blin (latin)", "dob": "1936-05-24", "address": "Cameroon", "label": "High Risk", "script": "latin" }, { "name": "Мария Прусакова (cyrillic)", "dob": "1983-9-4", "address": "Russia", "label": "positive", "script": "cyrillic" }, { "name": "آرتین جهانی (arabic)", "dob": "1927-10-12", "address": "Afghanistan", "label": "High Risk", "script": "arabic" }, { "name": "Daniel He (latin)", "dob": "1965-7-6", "address": "China", "label": "positive", "script": "latin" }, { "name": "luce parent (latin)", "dob": "1998-01-06", "address": "Portugal", "label": "negative", "script": "latin" } ], "query_template": "The following name is the seed name to generate variations for: {name}. Generate 8 variations of the name {name}, ensuring phonetic similarity: 20% Light, 60% Medium, 20% Far, and orthographic similarity: 10% Light, 30% Medium, 60% Far, and also include 27% of variations that follow: Additionally, generate variations that perform these transformations: Reorder name parts, Add a title suffix (Jr., PhD, etc.), and Use first name initial with last name. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 8, "phonetic_similarity": { "Light": 0.2, "Medium": 0.6, "Far": 0.2 }, "orthographic_similarity": { "Light": 0.1, "Medium": 0.3, "Far": 0.6 }, "rule_based": { "rule_percentage": 27, "selected_rules": [ "name_parts_permutations", "add_random_trailing_title", "initial_only_first_name" ], "rule_descriptions": { "name_parts_permutations": "Reorder name parts", "add_random_trailing_title": "Add a title suffix (Jr., PhD, etc.)", "initial_only_first_name": "Use first name initial with last name" }, "percentage": 27 } } }, { "seed_identities_with_labels": [ { "name": "lucas rousset (latin)", "dob": "1948-11-19", "address": "Suriname", "label": "negative", "script": "latin" }, { "name": "gabriel avilés (latin)", "dob": "1938-07-04", "address": "Nicaragua", "label": "High Risk", "script": "latin" }, { "name": "isabelle michel (latin)", "dob": "1952-12-14", "address": "Monaco", "label": "High Risk", "script": "latin" }, { "name": "Hamid Boord (latin)", "dob": "1964-3-27", "address": "Iran", "label": "positive", "script": "latin" }, { "name": "аскольд иванов (cyrillic)", "dob": "1970-08-21", "address": "Russia", "label": "High Risk", "script": "cyrillic" }, { "name": "Alexey Sukhodolov (latin)", "dob": "1974-4-19", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "سوراجو محمد (arabic)", "dob": "1979-7-3", "address": "United Arab Emirates", "label": "positive", "script": "arabic" }, { "name": "Viktor Zolotov (latin)", "dob": "1954-1-27", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "dominique lebreton (latin)", "dob": "1938-07-23", "address": "Swaziland", "label": "negative", "script": "latin" }, { "name": "شبلي حجار (arabic)", "dob": "1932-04-26", "address": "Oman", "label": "negative", "script": "arabic" }, { "name": "hugues coulon (latin)", "dob": "1946-09-15", "address": "Pitcairn (Îles)", "label": "negative", "script": "latin" }, { "name": "rémy sanchez (latin)", "dob": "1980-10-27", "address": "Burkina Faso", "label": "High Risk", "script": "latin" }, { "name": "valérie jourdan (latin)", "dob": "1985-10-04", "address": "Koweit", "label": "negative", "script": "latin" }, { "name": "laetitia grondin (latin)", "dob": "1958-07-16", "address": "Haiti", "label": "High Risk", "script": "latin" }, { "name": "Svetlana Zamlelova (latin)", "dob": "1973-8-22", "address": "Russia", "label": "positive", "script": "latin" } ], "query_template": "Generate 14 variations of {name}, ensuring phonetic similarity (50% Light, 50% Medium) and orthographic similarity (70% Light, 30% Medium). Approximately 11% of the total 14 variations should follow these rule-based transformations: Reorder name parts, Add a title prefix (Mr., Dr., etc.), and Replace spaces with special characters. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 14, "phonetic_similarity": { "Light": 0.5, "Medium": 0.5 }, "orthographic_similarity": { "Light": 0.7, "Medium": 0.3 }, "rule_based": { "rule_percentage": 11, "selected_rules": [ "name_parts_permutations", "add_random_leading_title", "replace_spaces_with_random_special_characters" ], "rule_descriptions": { "name_parts_permutations": "Reorder name parts", "add_random_leading_title": "Add a title prefix (Mr., Dr., etc.)", "replace_spaces_with_random_special_characters": "Replace spaces with special characters" }, "percentage": 11 } } }, { "seed_identities_with_labels": [ { "name": "cody pruitt (latin)", "dob": "1967-11-17", "address": "Comoros", "label": "negative", "script": "latin" }, { "name": "kimberly rodriguez (latin)", "dob": "1982-01-16", "address": "Saudi Arabia", "label": "negative", "script": "latin" }, { "name": "Zelimir Petrovic (latin)", "dob": "1981-9-1", "address": "Serbia", "label": "positive", "script": "latin" }, { "name": "donna baldwin (latin)", "dob": "1968-06-18", "address": "Cook Islands", "label": "negative", "script": "latin" }, { "name": "miguel vieira (latin)", "dob": "1976-12-03", "address": "Angola", "label": "High Risk", "script": "latin" }, { "name": "ابتسام أبو اسنينة (arabic)", "dob": "1965-11-24", "address": "Korea", "label": "negative", "script": "arabic" }, { "name": "Jamal Alshutti (latin)", "dob": "1964-8-20", "address": "Kuwait", "label": "positive", "script": "latin" }, { "name": "michael nelson (latin)", "dob": "1951-10-24", "address": "South Africa", "label": "High Risk", "script": "latin" }, { "name": "richard johnson (latin)", "dob": "1983-08-31", "address": "Singapore", "label": "negative", "script": "latin" }, { "name": "Александр Ершов (cyrillic)", "dob": "1985-9-28", "address": "Russia", "label": "positive", "script": "cyrillic" }, { "name": "Hussam Al-Qatirji (latin)", "dob": "1982-1-11", "address": "Syria", "label": "positive", "script": "latin" }, { "name": "кузьма сорокин (cyrillic)", "dob": "1959-03-22", "address": "Donetsk", "label": "High Risk", "script": "cyrillic" }, { "name": "adèle lagarde (latin)", "dob": "1949-10-09", "address": "Burkina Faso", "label": "High Risk", "script": "latin" }, { "name": "michael young (latin)", "dob": "1945-10-27", "address": "Nigeria", "label": "High Risk", "script": "latin" }, { "name": "Alexander Beglov (latin)", "dob": "1956-5-19", "address": "Russia", "label": "positive", "script": "latin" } ], "query_template": "Generate 13 variations of {name} ensuring phonetic similarity (10% Light, 30% Medium, 60% Far) and orthographic similarity (50% Light, 50% Medium). Approximately 50% of the total 13 variations should follow these rule-based transformations: Use first name initial with last name, Remove a random vowel, and Add a title suffix (Jr., PhD, etc.). The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 13, "phonetic_similarity": { "Light": 0.1, "Medium": 0.3, "Far": 0.6 }, "orthographic_similarity": { "Light": 0.5, "Medium": 0.5 }, "rule_based": { "rule_percentage": 50, "selected_rules": [ "initial_only_first_name", "remove_random_vowel", "add_random_trailing_title" ], "rule_descriptions": { "initial_only_first_name": "Use first name initial with last name", "remove_random_vowel": "Remove a random vowel", "add_random_trailing_title": "Add a title suffix (Jr., PhD, etc.)" }, "percentage": 50 } } }, { "seed_identities_with_labels": [ { "name": "jennifer jordan (latin)", "dob": "1948-01-10", "address": "Namibia", "label": "High Risk", "script": "latin" }, { "name": "joaquín heras (latin)", "dob": "1991-08-24", "address": "Cuba", "label": "High Risk", "script": "latin" }, { "name": "炳強 鄧 (chinese)", "dob": "1965-7-4", "address": "Hong Kong", "label": "positive", "script": "chinese" }, { "name": "Leonid Simanovskiy (latin)", "dob": "1949-7-19", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "حسین رضا زاده (arabic)", "dob": "1950-02-17", "address": "Iran", "label": "High Risk", "script": "arabic" }, { "name": "hélène lacombe (latin)", "dob": "1979-05-11", "address": "Central African Republic", "label": "High Risk", "script": "latin" }, { "name": "pablo ballesteros (latin)", "dob": "1941-05-29", "address": "Montenegro", "label": "negative", "script": "latin" }, { "name": "Ciro FERREIRA (latin)", "dob": "1987-8-27", "address": "Brazil", "label": "positive", "script": "latin" }, { "name": "nicolás carretero (latin)", "dob": "1958-01-05", "address": "República Dominicana", "label": "negative", "script": "latin" }, { "name": "милован андреев (cyrillic)", "dob": "1936-09-15", "address": "Norfolk Island", "label": "negative", "script": "cyrillic" }, { "name": "ruth calderon (latin)", "dob": "1949-11-12", "address": "Fiji", "label": "negative", "script": "latin" }, { "name": "thierry lelièvre (latin)", "dob": "1926-04-18", "address": "Mali", "label": "High Risk", "script": "latin" }, { "name": "bienvenida valverde (latin)", "dob": "1995-10-19", "address": "República Unida de Tanzanía", "label": "negative", "script": "latin" }, { "name": "Alexei Rakhmanov (latin)", "dob": "1964-7-18", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "Aleksey Russkikh (latin)", "dob": "1968-7-17", "address": "Russia", "label": "positive", "script": "latin" } ], "query_template": "The following name is the seed name to generate variations for: {name}. Generate 7 variations of the name {name}, ensuring phonetic similarity: 100% Far, and orthographic similarity: 30% Light, 40% Medium, 30% Far, and also include 44% of variations that follow: Additionally, generate variations that perform these transformations: Remove all spaces, Swap random adjacent letters, and Swap adjacent syllables. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 7, "phonetic_similarity": { "Far": 1.0 }, "orthographic_similarity": { "Light": 0.3, "Medium": 0.4, "Far": 0.3 }, "rule_based": { "rule_percentage": 44, "selected_rules": [ "remove_all_spaces", "swap_random_letter", "swap_adjacent_syllables" ], "rule_descriptions": { "remove_all_spaces": "Remove all spaces", "swap_random_letter": "Swap random adjacent letters", "swap_adjacent_syllables": "Swap adjacent syllables" }, "percentage": 44 } } }, { "seed_identities_with_labels": [ { "name": "calvin brandt (latin)", "dob": "2002-02-15", "address": "Kiribati", "label": "negative", "script": "latin" }, { "name": "Тина Канделаки (cyrillic)", "dob": "1975-11-10", "address": "Russia", "label": "positive", "script": "cyrillic" }, { "name": "جاد الكلغاصي (arabic)", "dob": "1940-03-04", "address": "Lebanon", "label": "High Risk", "script": "arabic" }, { "name": "nancy edwards (latin)", "dob": "1989-05-30", "address": "Kenya", "label": "High Risk", "script": "latin" }, { "name": "Aleksey Budnev (latin)", "dob": "1964-7-9", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "игорь григорьева (cyrillic)", "dob": "1954-04-07", "address": "Guam", "label": "negative", "script": "cyrillic" }, { "name": "steven gray (latin)", "dob": "2003-05-06", "address": "Moldova", "label": "negative", "script": "latin" }, { "name": "ricky clayton (latin)", "dob": "1977-04-29", "address": "Netherlands", "label": "negative", "script": "latin" }, { "name": "Mohamed Ibrahim (latin)", "dob": "1977-4-5", "address": "Brazil", "label": "positive", "script": "latin" }, { "name": "Andrei Trofimov (latin)", "dob": "1972-8-14", "address": "Ukraine", "label": "positive", "script": "latin" }, { "name": "peter steele (latin)", "dob": "1926-05-09", "address": "Nigeria", "label": "High Risk", "script": "latin" }, { "name": "Viktoriya Savruk (latin)", "dob": "1980-2-12", "address": "Belarus", "label": "positive", "script": "latin" }, { "name": "valentine de oliveira (latin)", "dob": "1982-11-12", "address": "Monaco", "label": "High Risk", "script": "latin" }, { "name": "christelle guillet (latin)", "dob": "1953-08-27", "address": "Monaco", "label": "High Risk", "script": "latin" }, { "name": "jeremy johnson (latin)", "dob": "1998-01-18", "address": "Botswana", "label": "negative", "script": "latin" } ], "query_template": "The following name is the seed name to generate variations for: {name}. Generate 10 variations of the name {name}, ensuring phonetic similarity: 20% Light, 60% Medium, 20% Far, and orthographic similarity: 10% Light, 30% Medium, 60% Far, and also include 14% of variations that follow: Additionally, generate variations that perform these transformations: Add a title prefix (Mr., Dr., etc.), and Remove a random vowel. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 10, "phonetic_similarity": { "Light": 0.2, "Medium": 0.6, "Far": 0.2 }, "orthographic_similarity": { "Light": 0.1, "Medium": 0.3, "Far": 0.6 }, "rule_based": { "rule_percentage": 14, "selected_rules": [ "add_random_leading_title", "remove_random_vowel" ], "rule_descriptions": { "add_random_leading_title": "Add a title prefix (Mr., Dr., etc.)", "remove_random_vowel": "Remove a random vowel" }, "percentage": 14 } } }, { "seed_identities_with_labels": [ { "name": "Anton Kuzmin (latin)", "dob": "1983-3-8", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "renée blot (latin)", "dob": "2007-02-10", "address": "Côte d'Ivoire", "label": "High Risk", "script": "latin" }, { "name": "Рајко Кузмановић (cyrillic)", "dob": "1931-12-1", "address": "Bosnia and Herzegovina", "label": "positive", "script": "cyrillic" }, { "name": "طريف خثعم (arabic)", "dob": "1973-07-24", "address": "Syria", "label": "High Risk", "script": "arabic" }, { "name": "آتنا زمانی (arabic)", "dob": "1994-01-11", "address": "Bouvet Island (Bouvetoya)", "label": "negative", "script": "arabic" }, { "name": "susanne monnier (latin)", "dob": "1959-01-04", "address": "Tokelau", "label": "negative", "script": "latin" }, { "name": "Nelli Parutenko (latin)", "dob": "1962-3-21", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "noël perez (latin)", "dob": "1986-05-18", "address": "Guyane française", "label": "negative", "script": "latin" }, { "name": "hugues leleu (latin)", "dob": "1956-06-19", "address": "Monaco", "label": "High Risk", "script": "latin" }, { "name": "bernard aubert (latin)", "dob": "1946-05-08", "address": "Lithuanie", "label": "negative", "script": "latin" }, { "name": "Irina Shoygu (latin)", "dob": "1955-5-31", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "Oleg Nikolayev (latin)", "dob": "1969-12-10", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "catherine morales (latin)", "dob": "1937-09-25", "address": "South Sudan", "label": "High Risk", "script": "latin" }, { "name": "lucas maillard (latin)", "dob": "1930-09-12", "address": "Central African Republic", "label": "High Risk", "script": "latin" }, { "name": "véronique pelletier (latin)", "dob": "1932-03-16", "address": "Macau", "label": "negative", "script": "latin" } ], "query_template": "The following name is the seed name to generate variations for: {name}. Generate 12 variations of the name {name}, ensuring phonetic similarity: 50% Light, 50% Medium, and orthographic similarity: 20% Light, 60% Medium, 20% Far, and also include 51% of variations that follow: Additionally, generate variations that perform these transformations: Duplicate a random letter, Convert name to initials, and Insert a random letter. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 12, "phonetic_similarity": { "Light": 0.5, "Medium": 0.5 }, "orthographic_similarity": { "Light": 0.2, "Medium": 0.6, "Far": 0.2 }, "rule_based": { "rule_percentage": 51, "selected_rules": [ "duplicate_random_letter_as_double_letter", "shorten_name_to_initials", "insert_random_letter" ], "rule_descriptions": { "duplicate_random_letter_as_double_letter": "Duplicate a random letter", "shorten_name_to_initials": "Convert name to initials", "insert_random_letter": "Insert a random letter" }, "percentage": 51 } } }, { "seed_identities_with_labels": [ { "name": "dominique robert (latin)", "dob": "1927-07-21", "address": "Central African Republic", "label": "High Risk", "script": "latin" }, { "name": "Виктор Игнатов (cyrillic)", "dob": "1968-10-15", "address": "Russia", "label": "positive", "script": "cyrillic" }, { "name": "charlotte loiseau (latin)", "dob": "1993-10-23", "address": "Montserrat", "label": "negative", "script": "latin" }, { "name": "Peter Mbaga (latin)", "dob": "1976-9-25", "address": "South Africa", "label": "positive", "script": "latin" }, { "name": "thomas durand (latin)", "dob": "1998-01-31", "address": "Sainte Hélène", "label": "negative", "script": "latin" }, { "name": "walter carter (latin)", "dob": "1950-09-14", "address": "South Africa", "label": "High Risk", "script": "latin" }, { "name": "lorraine payet (latin)", "dob": "1952-07-03", "address": "Suriname", "label": "negative", "script": "latin" }, { "name": "emmanuelle samson (latin)", "dob": "1983-12-14", "address": "Côte d'Ivoire", "label": "High Risk", "script": "latin" }, { "name": "carlota paiva (latin)", "dob": "1940-08-04", "address": "Angola", "label": "High Risk", "script": "latin" }, { "name": "أسرار قرادة (arabic)", "dob": "1942-11-18", "address": "Ukraine", "label": "negative", "script": "arabic" }, { "name": "alfred maillot (latin)", "dob": "1930-03-08", "address": "Palau", "label": "negative", "script": "latin" }, { "name": "Viktor Mozhelyansky (latin)", "dob": "1964-5-10", "address": "Ukraine", "label": "positive", "script": "latin" }, { "name": "Marianne Houwayek (latin)", "dob": "1980-5-14", "address": "Lebanon", "label": "positive", "script": "latin" }, { "name": "غسّان عويضة (arabic)", "dob": "1930-07-14", "address": "Libya", "label": "High Risk", "script": "arabic" }, { "name": "Aleksandr Sokolov (latin)", "dob": "1970-8-4", "address": "Russia", "label": "positive", "script": "latin" } ], "query_template": "Generate 14 variations of {name}. Ensuring phonetic similarity (20% Light, 60% Medium, 20% Far) and orthographic similarity (30% Light, 40% Medium, 30% Far). Approximately 18% of the total 14 variations should follow these rule-based transformations: Additionally, generate variations that: Replace double letters with a single letter. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 14, "phonetic_similarity": { "Light": 0.2, "Medium": 0.6, "Far": 0.2 }, "orthographic_similarity": { "Light": 0.3, "Medium": 0.4, "Far": 0.3 }, "rule_based": { "rule_percentage": 18, "selected_rules": [ "replace_double_letters_with_single_letter" ], "rule_descriptions": { "replace_double_letters_with_single_letter": "Replace double letters with a single letter" }, "percentage": 18 } } }, { "seed_identities_with_labels": [ { "name": "alexandre neto (latin)", "dob": "1997-12-05", "address": "Mozambique", "label": "High Risk", "script": "latin" }, { "name": "gonçalo pinto (latin)", "dob": "1945-07-08", "address": "Malávi", "label": "negative", "script": "latin" }, { "name": "Kirill SELEZNEV (latin)", "dob": "1974-4-23", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "victória freitas (latin)", "dob": "1999-03-01", "address": "Laos", "label": "negative", "script": "latin" }, { "name": "sarah mcbride (latin)", "dob": "1997-12-20", "address": "South Africa", "label": "High Risk", "script": "latin" }, { "name": "remedios lara (latin)", "dob": "1985-12-20", "address": "Nicaragua", "label": "High Risk", "script": "latin" }, { "name": "lorena simões (latin)", "dob": "1971-08-09", "address": "Nauru", "label": "negative", "script": "latin" }, { "name": "valentina barros (latin)", "dob": "1965-07-07", "address": "Arctic Ocean", "label": "negative", "script": "latin" }, { "name": "Rafi Udin (latin)", "dob": "1966-6-3", "address": "Syria", "label": "positive", "script": "latin" }, { "name": "康男 高木 (chinese)", "dob": "1948-1-30", "address": "Japan", "label": "positive", "script": "chinese" }, { "name": "brianna ibarra (latin)", "dob": "1960-10-01", "address": "Nigeria", "label": "High Risk", "script": "latin" }, { "name": "Mohammed Matnee (latin)", "dob": "1983-4-15", "address": "Iraq", "label": "positive", "script": "latin" }, { "name": "طريف النشاشيبي (arabic)", "dob": "2007-05-19", "address": "Grenada", "label": "negative", "script": "arabic" }, { "name": "Valeriy Chekalov (latin)", "dob": "1976-1-10", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "يارا صيام (arabic)", "dob": "1945-07-17", "address": "Lebanon", "label": "High Risk", "script": "arabic" } ], "query_template": "Generate 13 variations of {name}, ensuring phonetic similarity (20% Light, 60% Medium, 20% Far) and orthographic similarity (10% Light, 30% Medium, 60% Far). Approximately 43% of the total 13 variations should follow these rule-based transformations: Additionally, generate variations that perform these transformations on {name}: \n- Swap adjacent syllables\n- Insert a random letter\n- Remove a random vowel. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 13, "phonetic_similarity": { "Light": 0.2, "Medium": 0.6, "Far": 0.2 }, "orthographic_similarity": { "Light": 0.1, "Medium": 0.3, "Far": 0.6 }, "rule_based": { "rule_percentage": 43, "selected_rules": [ "swap_adjacent_syllables", "insert_random_letter", "remove_random_vowel" ], "rule_descriptions": { "swap_adjacent_syllables": "Swap adjacent syllables", "insert_random_letter": "Insert a random letter", "remove_random_vowel": "Remove a random vowel" }, "percentage": 43 } } }, { "seed_identities_with_labels": [ { "name": "roger gillet (latin)", "dob": "1950-10-01", "address": "Vierges britanniques (Îles)", "label": "negative", "script": "latin" }, { "name": "雅琴 吴 (chinese)", "dob": "1992-11-13", "address": "China", "label": "positive", "script": "chinese" }, { "name": "Stanislav Voskresenskiy (latin)", "dob": "1976-9-29", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "دؤوب طقش (arabic)", "dob": "1940-08-09", "address": "Somalia", "label": "High Risk", "script": "arabic" }, { "name": "marc renault (latin)", "dob": "1932-08-01", "address": "Norfolk (Îles)", "label": "negative", "script": "latin" }, { "name": "antoine fouquet (latin)", "dob": "1982-03-05", "address": "Côte d'Ivoire", "label": "High Risk", "script": "latin" }, { "name": "ani mármol (latin)", "dob": "2006-05-12", "address": "Cuba", "label": "High Risk", "script": "latin" }, { "name": "élise jacquet (latin)", "dob": "1976-08-14", "address": "Afrique du sud", "label": "negative", "script": "latin" }, { "name": "daniel guillou (latin)", "dob": "1976-12-19", "address": "Palau", "label": "negative", "script": "latin" }, { "name": "Wei Zhang (latin)", "dob": "1977-6-16", "address": "China", "label": "positive", "script": "latin" }, { "name": "linda bolton (latin)", "dob": "1952-02-22", "address": "Namibia", "label": "High Risk", "script": "latin" }, { "name": "graciana viña (latin)", "dob": "1977-07-25", "address": "Bolivia", "label": "High Risk", "script": "latin" }, { "name": "Gennady Plaksin (latin)", "dob": "1961-8-31", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "Mariam Barreh (latin)", "dob": "1971-4-10", "address": "United Arab Emirates", "label": "positive", "script": "latin" }, { "name": "رضي الصالحي (arabic)", "dob": "1975-08-18", "address": "Korea", "label": "negative", "script": "arabic" } ], "query_template": "Generate 10 variations of {name} ensuring phonetic similarity (100% Light) and orthographic similarity (20% Light, 60% Medium, 20% Far). Approximately 48% of the total 10 variations should follow these rule-based transformations: Reorder name parts. The remaining 52% should be distributed among phonetic and orthographic similarities according to their specified percentages. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 10, "phonetic_similarity": { "Light": 1.0 }, "orthographic_similarity": { "Light": 0.2, "Medium": 0.6, "Far": 0.2 }, "rule_based": { "rule_percentage": 48, "selected_rules": [ "name_parts_permutations" ], "rule_descriptions": { "name_parts_permutations": "Reorder name parts" }, "percentage": 48 } } }, { "seed_identities_with_labels": [ { "name": "Ch'o'l-man Han (latin)", "dob": "1978-5-6", "address": "China", "label": "positive", "script": "latin" }, { "name": "carminho maia (latin)", "dob": "1961-01-22", "address": "Mozambique", "label": "High Risk", "script": "latin" }, { "name": "adrienne parent (latin)", "dob": "1965-12-04", "address": "Central African Republic", "label": "High Risk", "script": "latin" }, { "name": "valentina amorim (latin)", "dob": "1984-07-12", "address": "Cazaquistão", "label": "negative", "script": "latin" }, { "name": "vítor cruz (latin)", "dob": "1994-02-03", "address": "Ilhas Caimão", "label": "negative", "script": "latin" }, { "name": "Aleksandr Ganov (latin)", "dob": "1974-10-24", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "emily hatfield (latin)", "dob": "1945-10-04", "address": "Namibia", "label": "High Risk", "script": "latin" }, { "name": "fábio gaspar (latin)", "dob": "1935-05-06", "address": "Costa do Marfim", "label": "negative", "script": "latin" }, { "name": "laura thomas (latin)", "dob": "1944-02-04", "address": "Namibia", "label": "High Risk", "script": "latin" }, { "name": "Ratka Kamceva (latin)", "dob": "1945-10-16", "address": "North Macedonia, The Republic of", "label": "positive", "script": "latin" }, { "name": "Yuri Nabokov (latin)", "dob": "1981-1-1", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "mateus loureiro (latin)", "dob": "1970-12-23", "address": "Ilhas dos Cocos", "label": "negative", "script": "latin" }, { "name": "лев бурова (cyrillic)", "dob": "1955-01-27", "address": "Crimea", "label": "High Risk", "script": "cyrillic" }, { "name": "قدري الخياط (arabic)", "dob": "1967-03-06", "address": "Jordan", "label": "negative", "script": "arabic" }, { "name": "Дмитрий СЫТЫЙ (cyrillic)", "dob": "1989-3-23", "address": "Central African Republic", "label": "positive", "script": "cyrillic" } ], "query_template": "Generate 15 execution vectors for {name} with phonetic similarity (100% Far) to detect sound-alike names. For orthographic similarity, generate variations that are 20% Light, 60% Medium, and 20% Far in visually similar spellings.\n\nApproximately 42% of the total 15 variations should follow these rule-based transformations: Remove a random vowel from {name}, Insert a random letter into {name}, and Reorder name parts of {name}. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 15, "phonetic_similarity": { "Far": 1.0 }, "orthographic_similarity": { "Light": 0.2, "Medium": 0.6, "Far": 0.2 }, "rule_based": { "rule_percentage": 42, "selected_rules": [ "remove_random_vowel", "insert_random_letter", "name_parts_permutations" ], "rule_descriptions": { "remove_random_vowel": "Remove a random vowel", "insert_random_letter": "Insert a random letter", "name_parts_permutations": "Reorder name parts" }, "percentage": 42 } } }, { "seed_identities_with_labels": [ { "name": "alex dijoux (latin)", "dob": "1976-06-04", "address": "Côte d'Ivoire", "label": "High Risk", "script": "latin" }, { "name": "luís brito (latin)", "dob": "1959-06-18", "address": "Território Britânico do Oceano Índico", "label": "negative", "script": "latin" }, { "name": "ângelo pinho (latin)", "dob": "2002-05-20", "address": "Malávi", "label": "negative", "script": "latin" }, { "name": "madalena assunção (latin)", "dob": "1975-06-04", "address": "Burquina Faso", "label": "negative", "script": "latin" }, { "name": "طموح اسطمبولي (arabic)", "dob": "1976-04-27", "address": "Lebanon", "label": "High Risk", "script": "arabic" }, { "name": "Виктор Кидяев (cyrillic)", "dob": "1956-7-9", "address": "Russia", "label": "positive", "script": "cyrillic" }, { "name": "fábio fernandes (latin)", "dob": "1942-07-27", "address": "Macedónia do Norte", "label": "negative", "script": "latin" }, { "name": "German Belous (latin)", "dob": "1977-11-14", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "Mohammad Khademi (latin)", "dob": "1966-4-5", "address": "United Arab Emirates", "label": "positive", "script": "latin" }, { "name": "Mansur Soltajev (latin)", "dob": "1978-6-13", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "renato valente (latin)", "dob": "1942-04-27", "address": "Angola", "label": "High Risk", "script": "latin" }, { "name": "شافع نسيبة (arabic)", "dob": "1953-05-30", "address": "Paraguay", "label": "negative", "script": "arabic" }, { "name": "Aleksandr Ganov (latin)", "dob": "1974-10-24", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "gonçalo loureiro (latin)", "dob": "1948-08-26", "address": "Mozambique", "label": "High Risk", "script": "latin" }, { "name": "clementina pedrosa (latin)", "dob": "1992-06-01", "address": "Bolivia", "label": "High Risk", "script": "latin" } ], "query_template": "Generate 6 execution vectors for {name}, ensuring phonetic similarity (10% Light, 50% Medium, 40% Far) and orthographic similarity (30% Light, 40% Medium, 30% Far). Approximately 10% of the total 6 variations should follow these rule-based transformations: \nAdd a title suffix (Jr., PhD, etc.). The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 6, "phonetic_similarity": { "Light": 0.1, "Medium": 0.5, "Far": 0.4 }, "orthographic_similarity": { "Light": 0.3, "Medium": 0.4, "Far": 0.3 }, "rule_based": { "rule_percentage": 10, "selected_rules": [ "add_random_trailing_title" ], "rule_descriptions": { "add_random_trailing_title": "Add a title suffix (Jr., PhD, etc.)" }, "percentage": 10 } } }, { "seed_identities_with_labels": [ { "name": "Oleg Smolin (latin)", "dob": "1952-2-10", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "ناهد آل علي (arabic)", "dob": "1961-09-02", "address": "Denmark", "label": "negative", "script": "arabic" }, { "name": "Андрей Макаревич (cyrillic)", "dob": "1984-5-9", "address": "Belarus", "label": "positive", "script": "cyrillic" }, { "name": "Artyom Verkhov (latin)", "dob": "1986-8-3", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "Osama Ibrahim (latin)", "dob": "1976-4-2", "address": "Libya", "label": "positive", "script": "latin" }, { "name": "модест абрамов (cyrillic)", "dob": "1978-10-23", "address": "Donetsk", "label": "High Risk", "script": "cyrillic" }, { "name": "michael lucero (latin)", "dob": "1955-02-28", "address": "Turkmenistan", "label": "negative", "script": "latin" }, { "name": "kayla parrish (latin)", "dob": "1973-05-17", "address": "South Africa", "label": "High Risk", "script": "latin" }, { "name": "robin roberts (latin)", "dob": "1982-08-20", "address": "Estonia", "label": "negative", "script": "latin" }, { "name": "corey byrd (latin)", "dob": "1935-10-27", "address": "Cayman Islands", "label": "negative", "script": "latin" }, { "name": "sean sanders (latin)", "dob": "1937-11-01", "address": "Albania", "label": "negative", "script": "latin" }, { "name": "diana merritt (latin)", "dob": "1983-07-05", "address": "Kenya", "label": "High Risk", "script": "latin" }, { "name": "Laith Al-Khazali (latin)", "dob": "1975-10-14", "address": "Iraq", "label": "positive", "script": "latin" }, { "name": "george curtis (latin)", "dob": "1968-03-23", "address": "British Virgin Islands", "label": "High Risk", "script": "latin" }, { "name": "angela stephens (latin)", "dob": "1987-04-18", "address": "British Virgin Islands", "label": "High Risk", "script": "latin" } ], "query_template": "Generate exactly 8 execution vectors for {name}, ensuring phonetic similarity (30% Light, e.g. {name}, {name}y, etc., 40% Medium, e.g. {name}e, {name}i, etc., and 30% Far, e.g. {name}son, {name}tan, etc.) and orthographic similarity (20% Light, e.g. {name}, 60% Medium, e.g. {name}s, {name}es, etc., and 20% Far, e.g. {name}sson, {name}stan, etc.). Approximately 46% of the total variations should follow these rule-based transformations: Additionally, generate variations that: Remove all spaces from {name}. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 8, "phonetic_similarity": { "Light": 0.3, "Medium": 0.4, "Far": 0.3 }, "orthographic_similarity": { "Light": 0.2, "Medium": 0.6, "Far": 0.2 }, "rule_based": { "rule_percentage": 46, "selected_rules": [ "remove_all_spaces" ], "rule_descriptions": { "remove_all_spaces": "Remove all spaces" }, "percentage": 46 } } }, { "seed_identities_with_labels": [ { "name": "فاطمه زهرا سغیری (arabic)", "dob": "1955-11-23", "address": "Grenada", "label": "negative", "script": "arabic" }, { "name": "Apinya CHANTARAPRAPAPORN (latin)", "dob": "1955-1-14", "address": "Thailand", "label": "positive", "script": "latin" }, { "name": "Kia Sadeghi (latin)", "dob": "1986-3-21", "address": "Iran", "label": "positive", "script": "latin" }, { "name": "Myo'ng-hun Ri (latin)", "dob": "1969-3-14", "address": "Korea, North", "label": "positive", "script": "latin" }, { "name": "Hyon Jang (latin)", "dob": "1958-2-22", "address": "Syria", "label": "positive", "script": "latin" }, { "name": "пимен харитонов (cyrillic)", "dob": "2004-07-01", "address": "Belarus", "label": "High Risk", "script": "cyrillic" }, { "name": "julie verdier (latin)", "dob": "1929-09-01", "address": "Suriname", "label": "negative", "script": "latin" }, { "name": "constance leduc (latin)", "dob": "2004-06-10", "address": "Cameroon", "label": "High Risk", "script": "latin" }, { "name": "mathilde boutin (latin)", "dob": "1968-07-18", "address": "Democratic Republic of Congo", "label": "High Risk", "script": "latin" }, { "name": "rémy hebert (latin)", "dob": "1933-08-18", "address": "La Barbad", "label": "negative", "script": "latin" }, { "name": "valentine paul (latin)", "dob": "2001-07-29", "address": "Oman", "label": "negative", "script": "latin" }, { "name": "dylan armstrong (latin)", "dob": "1980-06-03", "address": "Namibia", "label": "High Risk", "script": "latin" }, { "name": "اسماء الاسد (arabic)", "dob": "1975-8-11", "address": "Syria", "label": "positive", "script": "arabic" }, { "name": "philippe lombard (latin)", "dob": "1981-05-02", "address": "Cameroon", "label": "High Risk", "script": "latin" }, { "name": "océane roux (latin)", "dob": "1997-09-15", "address": "Antigua et Barbuda", "label": "negative", "script": "latin" } ], "query_template": "Generate exactly 10 variations of {name}, ensuring phonetic similarity (30% Light, 40% Medium, 30% Far) and orthographic similarity (10% Light, 30% Medium, 60% Far). Approximately 53% of the total 10 variations should follow these rule-based transformations: Replace double letters with a single letter. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 10, "phonetic_similarity": { "Light": 0.3, "Medium": 0.4, "Far": 0.3 }, "orthographic_similarity": { "Light": 0.1, "Medium": 0.3, "Far": 0.6 }, "rule_based": { "rule_percentage": 53, "selected_rules": [ "replace_double_letters_with_single_letter" ], "rule_descriptions": { "replace_double_letters_with_single_letter": "Replace double letters with a single letter" }, "percentage": 53 } } }, { "seed_identities_with_labels": [ { "name": "camille berthelot (latin)", "dob": "1944-04-25", "address": "Côte d'Ivoire", "label": "High Risk", "script": "latin" }, { "name": "alexandria faure (latin)", "dob": "1926-10-20", "address": "Vierges britanniques (Îles)", "label": "negative", "script": "latin" }, { "name": "Андрей Скоч (cyrillic)", "dob": "1966-1-30", "address": "Russia", "label": "positive", "script": "cyrillic" }, { "name": "Jianming Cao (latin)", "dob": "1955-9-24", "address": "China", "label": "positive", "script": "latin" }, { "name": "захар комиссаров (cyrillic)", "dob": "1982-12-08", "address": "Armenia", "label": "High Risk", "script": "cyrillic" }, { "name": "jeffrey sharp (latin)", "dob": "1957-10-15", "address": "Nigeria", "label": "High Risk", "script": "latin" }, { "name": "jordana adadia (latin)", "dob": "1958-03-30", "address": "Cuba", "label": "High Risk", "script": "latin" }, { "name": "alexandria pineau (latin)", "dob": "1968-10-19", "address": "Mali", "label": "High Risk", "script": "latin" }, { "name": "Bernard Mheshe (latin)", "dob": "1974-10-10", "address": "Congo, Democratic Republic of the", "label": "positive", "script": "latin" }, { "name": "bernadette bouvet (latin)", "dob": "1948-03-15", "address": "Lesotho", "label": "negative", "script": "latin" }, { "name": "Jay PEREZ (latin)", "dob": "1973-9-15", "address": "Philippines", "label": "positive", "script": "latin" }, { "name": "راضي الجابر (arabic)", "dob": "1999-05-24", "address": "Chile", "label": "negative", "script": "arabic" }, { "name": "Shahab Javanmardy (latin)", "dob": "1974-3-21", "address": "Iran", "label": "positive", "script": "latin" }, { "name": "océane roux (latin)", "dob": "1993-05-10", "address": "Bhoutan", "label": "negative", "script": "latin" }, { "name": "frédéric bazin (latin)", "dob": "1951-02-13", "address": "Oman", "label": "negative", "script": "latin" } ], "query_template": "The following name is the seed name to generate variations for: {name}. Generate 15 variations of the name {name}, ensuring phonetic similarity: 20% Light, 60% Medium, 20% Far, and orthographic similarity: 50% Light, 50% Medium, and also include 16% of variations that follow: Additionally, generate variations that perform these transformations: Swap adjacent consonants, and Convert name to initials. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 15, "phonetic_similarity": { "Light": 0.2, "Medium": 0.6, "Far": 0.2 }, "orthographic_similarity": { "Light": 0.5, "Medium": 0.5 }, "rule_based": { "rule_percentage": 16, "selected_rules": [ "swap_adjacent_consonants", "shorten_name_to_initials" ], "rule_descriptions": { "swap_adjacent_consonants": "Swap adjacent consonants", "shorten_name_to_initials": "Convert name to initials" }, "percentage": 16 } } }, { "seed_identities_with_labels": [ { "name": "Vitaly Likhachev (latin)", "dob": "1964-2-22", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "Ибрагим Закриев (cyrillic)", "dob": "1990-10-16", "address": "Russia", "label": "positive", "script": "cyrillic" }, { "name": "Reza Ebadzadeh (latin)", "dob": "1964-6-13", "address": "Iran", "label": "positive", "script": "latin" }, { "name": "juanita sebastián (latin)", "dob": "1928-10-04", "address": "Trinidad y Tabago", "label": "negative", "script": "latin" }, { "name": "ivan andrade (latin)", "dob": "2001-10-29", "address": "Angola", "label": "High Risk", "script": "latin" }, { "name": "كبير كنانة (arabic)", "dob": "2003-08-16", "address": "Libya", "label": "High Risk", "script": "arabic" }, { "name": "marina villalobos (latin)", "dob": "1967-10-30", "address": "Bolivia", "label": "High Risk", "script": "latin" }, { "name": "plinio martin (latin)", "dob": "1996-04-29", "address": "Honduras", "label": "negative", "script": "latin" }, { "name": "lino salamanca (latin)", "dob": "1942-01-16", "address": "Malawi", "label": "negative", "script": "latin" }, { "name": "сеслав джогов (cyrillic)", "dob": "1990-09-23", "address": "Kazakhstan", "label": "negative", "script": "cyrillic" }, { "name": "Ekaterina Krivoruchko (latin)", "dob": "1986-3-26", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "Zaki Ararawi (latin)", "dob": "1961-5-23", "address": "Turkey", "label": "positive", "script": "latin" }, { "name": "jules léger (latin)", "dob": "1966-05-25", "address": "Monaco", "label": "High Risk", "script": "latin" }, { "name": "antonia becerra (latin)", "dob": "1962-07-29", "address": "Costa Rica", "label": "negative", "script": "latin" }, { "name": "judith ripoll (latin)", "dob": "1962-01-21", "address": "Bolivia", "label": "High Risk", "script": "latin" } ], "query_template": "The following name is the seed name to generate variations for: {name}. Generate 6 variations of the name {name}, ensuring phonetic similarity: 20% Light, 60% Medium, 20% Far, and orthographic similarity: 30% Light, 40% Medium, 30% Far, and also include 51% of variations that follow: Additionally, generate variations that perform these transformations: Replace spaces with special characters, Reorder name parts, and Use first name initial with last name. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 6, "phonetic_similarity": { "Light": 0.2, "Medium": 0.6, "Far": 0.2 }, "orthographic_similarity": { "Light": 0.3, "Medium": 0.4, "Far": 0.3 }, "rule_based": { "rule_percentage": 51, "selected_rules": [ "replace_spaces_with_random_special_characters", "name_parts_permutations", "initial_only_first_name" ], "rule_descriptions": { "replace_spaces_with_random_special_characters": "Replace spaces with special characters", "name_parts_permutations": "Reorder name parts", "initial_only_first_name": "Use first name initial with last name" }, "percentage": 51 } } }, { "seed_identities_with_labels": [ { "name": "Элеонора Федоренко (cyrillic)", "dob": "1972-10-28", "address": "Ukraine", "label": "positive", "script": "cyrillic" }, { "name": "jeffrey thomas (latin)", "dob": "1945-02-16", "address": "Kenya", "label": "High Risk", "script": "latin" }, { "name": "German Belous (latin)", "dob": "1977-11-14", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "Qari Amjad (latin)", "dob": "1979-4-17", "address": "Afghanistan", "label": "positive", "script": "latin" }, { "name": "suzanne lefebvre (latin)", "dob": "1987-12-19", "address": "Équateur", "label": "negative", "script": "latin" }, { "name": "eugène voisin (latin)", "dob": "1951-03-30", "address": "Seychelles", "label": "negative", "script": "latin" }, { "name": "панкрат тимофеева (cyrillic)", "dob": "1944-07-03", "address": "Crimea", "label": "High Risk", "script": "cyrillic" }, { "name": "Andrii Sushko (latin)", "dob": "1976-1-23", "address": "Ukraine", "label": "positive", "script": "latin" }, { "name": "قانت قرادة (arabic)", "dob": "1934-09-07", "address": "Ukraine", "label": "negative", "script": "arabic" }, { "name": "gilles leleu (latin)", "dob": "1941-04-08", "address": "Haiti", "label": "High Risk", "script": "latin" }, { "name": "franck paris (latin)", "dob": "1940-08-30", "address": "Ouganda", "label": "negative", "script": "latin" }, { "name": "Yuriy Zaitsev (latin)", "dob": "1970-12-16", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "whitney cervantes (latin)", "dob": "1962-07-26", "address": "British Virgin Islands", "label": "High Risk", "script": "latin" }, { "name": "marianne schmitt (latin)", "dob": "1973-04-29", "address": "Côte d'Ivoire", "label": "High Risk", "script": "latin" }, { "name": "grégoire louis (latin)", "dob": "1999-06-25", "address": "Gambie", "label": "negative", "script": "latin" } ], "query_template": "The following name is the seed name to generate variations for: {name}. Generate 8 variations of the name {name}, ensuring phonetic similarity: 30% Light, 40% Medium, 30% Far, and orthographic similarity: 100% Medium, and also include 21% of variations that follow: Additionally, generate variations that perform these transformations: Swap adjacent consonants, Duplicate a random letter, and Add a title prefix (Mr., Dr., etc.). The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 8, "phonetic_similarity": { "Light": 0.3, "Medium": 0.4, "Far": 0.3 }, "orthographic_similarity": { "Medium": 1.0 }, "rule_based": { "rule_percentage": 21, "selected_rules": [ "swap_adjacent_consonants", "duplicate_random_letter_as_double_letter", "add_random_leading_title" ], "rule_descriptions": { "swap_adjacent_consonants": "Swap adjacent consonants", "duplicate_random_letter_as_double_letter": "Duplicate a random letter", "add_random_leading_title": "Add a title prefix (Mr., Dr., etc.)" }, "percentage": 21 } } }, { "seed_identities_with_labels": [ { "name": "Song Jong (latin)", "dob": "1972-11-15", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "Игорь Карпенко (cyrillic)", "dob": "1964-4-28", "address": "Belarus", "label": "positive", "script": "cyrillic" }, { "name": "عارف الحلاق (arabic)", "dob": "1941-01-28", "address": "Algeria", "label": "High Risk", "script": "arabic" }, { "name": "eva antunes (latin)", "dob": "1943-02-18", "address": "Angola", "label": "High Risk", "script": "latin" }, { "name": "Peter Tultaev (latin)", "dob": "1961-1-1", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "sara neves (latin)", "dob": "1950-12-06", "address": "Ilhas Salomão", "label": "negative", "script": "latin" }, { "name": "emma macedo (latin)", "dob": "1952-01-22", "address": "Território Britânico do Oceano Índico", "label": "negative", "script": "latin" }, { "name": "دهمان المهنا (arabic)", "dob": "1973-08-30", "address": "Libyan Arab Jamahiriya", "label": "negative", "script": "arabic" }, { "name": "vera leal (latin)", "dob": "1986-09-03", "address": "Lesoto", "label": "negative", "script": "latin" }, { "name": "Iurii Hotsaniuk (latin)", "dob": "1966-7-18", "address": "Ukraine", "label": "positive", "script": "latin" }, { "name": "laurent collin (latin)", "dob": "2005-11-01", "address": "Côte d'Ivoire", "label": "High Risk", "script": "latin" }, { "name": "filipa coelho (latin)", "dob": "1929-05-16", "address": "França", "label": "negative", "script": "latin" }, { "name": "michelle breton (latin)", "dob": "1935-12-07", "address": "Mali", "label": "High Risk", "script": "latin" }, { "name": "thérèse dupuis (latin)", "dob": "1963-11-02", "address": "Mali", "label": "High Risk", "script": "latin" }, { "name": "Muhammad Rahmun (latin)", "dob": "1957-4-1", "address": "Syria", "label": "positive", "script": "latin" } ], "query_template": "Generate 6 execution vectors for {name}, ensuring:\nPhonetic similarity with 100% Medium, generating names that sound alike.\nOrthographic similarity with 30% Light, 40% Medium, and 30% Far, generating visually similar spellings.\n\nApproximately 29% of the total variations should follow these rule-based transformations:\nDelete a random letter from {name}.\nDuplicate a random letter in {name}.\nAbbreviate name parts in {name}, for example, \"John Smith\" becomes \"J.S.\" or \"JS\". The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 6, "phonetic_similarity": { "Medium": 1.0 }, "orthographic_similarity": { "Light": 0.3, "Medium": 0.4, "Far": 0.3 }, "rule_based": { "rule_percentage": 29, "selected_rules": [ "delete_random_letter", "duplicate_random_letter_as_double_letter", "shorten_name_to_abbreviations" ], "rule_descriptions": { "delete_random_letter": "Delete a random letter", "duplicate_random_letter_as_double_letter": "Duplicate a random letter", "shorten_name_to_abbreviations": "Abbreviate name parts" }, "percentage": 29 } } }, { "seed_identities_with_labels": [ { "name": "ملهم بنو ضمرة (arabic)", "dob": "1970-06-08", "address": "Sudan", "label": "High Risk", "script": "arabic" }, { "name": "inés guitart (latin)", "dob": "1961-05-11", "address": "Nicaragua", "label": "High Risk", "script": "latin" }, { "name": "Iyad Makhlouf (latin)", "dob": "1973-1-21", "address": "Syria", "label": "positive", "script": "latin" }, { "name": "جهاد بنو النجار (arabic)", "dob": "1984-09-13", "address": "Rwanda", "label": "negative", "script": "arabic" }, { "name": "Sergey Topor-Gilka (latin)", "dob": "1970-2-17", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "jose galiano (latin)", "dob": "1962-11-18", "address": "Alemania", "label": "negative", "script": "latin" }, { "name": "Amin Shary (latin)", "dob": "1957-8-2", "address": "Lebanon", "label": "positive", "script": "latin" }, { "name": "miriam maldonado (latin)", "dob": "1965-07-25", "address": "Liberia", "label": "negative", "script": "latin" }, { "name": "patricia santiago (latin)", "dob": "1993-11-25", "address": "Jamaica", "label": "negative", "script": "latin" }, { "name": "maryse muller (latin)", "dob": "1953-08-07", "address": "Cameroon", "label": "High Risk", "script": "latin" }, { "name": "chelo blanch (latin)", "dob": "1993-04-04", "address": "Kiribati", "label": "negative", "script": "latin" }, { "name": "adán sánchez (latin)", "dob": "1960-01-09", "address": "Nicaragua", "label": "High Risk", "script": "latin" }, { "name": "joséphine gonzalez (latin)", "dob": "1933-11-06", "address": "Cameroon", "label": "High Risk", "script": "latin" }, { "name": "Nikita Samoylenko (latin)", "dob": "1992-8-28", "address": "Ukraine", "label": "positive", "script": "latin" }, { "name": "燕妮 郑 (chinese)", "dob": "1973-7-2", "address": "Thailand", "label": "positive", "script": "chinese" } ], "query_template": "Generate exactly 12 execution vectors for each target identity {name}, ensuring both phonetic similarity and orthographic similarity. For phonetic similarity: 30% of variations should be Light sound-alike names, 40% Medium sound-alike names, and 30% Far sound-alike names. For orthographic similarity: 30% of variations should be Light visually similar spellings, 40% Medium visually similar spellings, and 30% Far visually similar spellings. Approximately 33% of the total 12 variations should follow these rule-based transformations: Additionally, generate variations that perform these transformations: Swap adjacent consonants, Add a title suffix (Jr., PhD, etc.), and Use first name initial with last name for each target identity {name}. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 12, "phonetic_similarity": { "Light": 0.3, "Medium": 0.4, "Far": 0.3 }, "orthographic_similarity": { "Light": 0.3, "Medium": 0.4, "Far": 0.3 }, "rule_based": { "rule_percentage": 33, "selected_rules": [ "swap_adjacent_consonants", "add_random_trailing_title", "initial_only_first_name" ], "rule_descriptions": { "swap_adjacent_consonants": "Swap adjacent consonants", "add_random_trailing_title": "Add a title suffix (Jr., PhD, etc.)", "initial_only_first_name": "Use first name initial with last name" }, "percentage": 33 } } }, { "seed_identities_with_labels": [ { "name": "رواء الداودي (arabic)", "dob": "1989-01-20", "address": "Somalia", "label": "High Risk", "script": "arabic" }, { "name": "Ilyas SA'B (latin)", "dob": "1961-4-6", "address": "Lebanon", "label": "positive", "script": "latin" }, { "name": "ivan silva (latin)", "dob": "2001-11-30", "address": "Baamas", "label": "negative", "script": "latin" }, { "name": "miriam assunção (latin)", "dob": "1959-08-24", "address": "Moldávia", "label": "negative", "script": "latin" }, { "name": "andré silva (latin)", "dob": "1936-11-10", "address": "Angola", "label": "High Risk", "script": "latin" }, { "name": "rodrigo diéguez (latin)", "dob": "1961-04-26", "address": "Nicaragua", "label": "High Risk", "script": "latin" }, { "name": "márcio borges (latin)", "dob": "1996-07-03", "address": "Angola", "label": "High Risk", "script": "latin" }, { "name": "كاملة شاهين (arabic)", "dob": "2007-07-05", "address": "Guernsey", "label": "negative", "script": "arabic" }, { "name": "Денис Чемоданов (cyrillic)", "dob": "1977-9-11", "address": "Belarus", "label": "positive", "script": "cyrillic" }, { "name": "Khalil TRINIDAD (latin)", "dob": "1978-3-20", "address": "Philippines", "label": "positive", "script": "latin" }, { "name": "vasco andrade (latin)", "dob": "1954-06-17", "address": "Países Baixos", "label": "negative", "script": "latin" }, { "name": "adriana domingues (latin)", "dob": "1944-11-20", "address": "Man, Isle of", "label": "negative", "script": "latin" }, { "name": "Artyom Verkhov (latin)", "dob": "1986-8-3", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "Irina Shoygu (latin)", "dob": "1955-5-31", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "philip dunn (latin)", "dob": "1938-08-27", "address": "South Sudan", "label": "High Risk", "script": "latin" } ], "query_template": "Generate exactly 7 execution vectors ({name} variations) for each target identity. Ensure phonetic similarity (70% Light, 30% Medium) and orthographic similarity (50% Light, 50% Medium). Approximately 41% of the total 7 variations should follow these rule-based transformations: Additionally, generate variations that replace spaces with special characters in {name} to create new names, and also generate other name variations where {name} is used as-is. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 7, "phonetic_similarity": { "Light": 0.7, "Medium": 0.3 }, "orthographic_similarity": { "Light": 0.5, "Medium": 0.5 }, "rule_based": { "rule_percentage": 41, "selected_rules": [ "replace_spaces_with_random_special_characters" ], "rule_descriptions": { "replace_spaces_with_random_special_characters": "Replace spaces with special characters" }, "percentage": 41 } } }, { "seed_identities_with_labels": [ { "name": "corinne renaud (latin)", "dob": "1999-07-15", "address": "Afrique du sud", "label": "negative", "script": "latin" }, { "name": "inès laurent (latin)", "dob": "1978-04-26", "address": "Sainte Lucie", "label": "negative", "script": "latin" }, { "name": "marcel bailly (latin)", "dob": "1958-08-12", "address": "Mali", "label": "High Risk", "script": "latin" }, { "name": "Vladimir Pavlenko (latin)", "dob": "1962-4-14", "address": "Ukraine", "label": "positive", "script": "latin" }, { "name": "Svetlana Yemilianova (latin)", "dob": "1971-10-7", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "Alexey Shkadarevich (latin)", "dob": "1947-10-27", "address": "Belarus", "label": "positive", "script": "latin" }, { "name": "bertrand traore (latin)", "dob": "1968-06-22", "address": "République tchèque", "label": "negative", "script": "latin" }, { "name": "Delcy Rodriguez (latin)", "dob": "1969-5-18", "address": "Venezuela", "label": "positive", "script": "latin" }, { "name": "حدیث اشرفی (arabic)", "dob": "1948-05-05", "address": "Afghanistan", "label": "High Risk", "script": "arabic" }, { "name": "émilie godard (latin)", "dob": "1993-11-21", "address": "São Tomé et Príncipe (Rép.)", "label": "negative", "script": "latin" }, { "name": "هاجر القضماني (arabic)", "dob": "1932-05-29", "address": "Andorra", "label": "negative", "script": "arabic" }, { "name": "Александр Самокутяев (cyrillic)", "dob": "1970-3-13", "address": "Russia", "label": "positive", "script": "cyrillic" }, { "name": "marie picard (latin)", "dob": "1936-06-11", "address": "Haiti", "label": "High Risk", "script": "latin" }, { "name": "alexandre perret (latin)", "dob": "2003-01-02", "address": "Democratic Republic of Congo", "label": "High Risk", "script": "latin" }, { "name": "julie salmon (latin)", "dob": "1932-10-01", "address": "Haiti", "label": "High Risk", "script": "latin" } ], "query_template": "Generate 7 variations of {name}. Ensuring phonetic similarity (50% Light, 50% Medium) and orthographic similarity (30% Light, 40% Medium, 30% Far). Approximately 17% of the total 7 variations should follow these rule-based transformations: Additionally, generate variations that perform these transformations: Use first name initial with last name, Remove a random vowel, and Delete a random letter. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 7, "phonetic_similarity": { "Light": 0.5, "Medium": 0.5 }, "orthographic_similarity": { "Light": 0.3, "Medium": 0.4, "Far": 0.3 }, "rule_based": { "rule_percentage": 17, "selected_rules": [ "initial_only_first_name", "remove_random_vowel", "delete_random_letter" ], "rule_descriptions": { "initial_only_first_name": "Use first name initial with last name", "remove_random_vowel": "Remove a random vowel", "delete_random_letter": "Delete a random letter" }, "percentage": 17 } } }, { "seed_identities_with_labels": [ { "name": "filipe soares (latin)", "dob": "1942-03-03", "address": "Angola", "label": "High Risk", "script": "latin" }, { "name": "бронислав евдокимова (cyrillic)", "dob": "1937-10-27", "address": "Russia", "label": "High Risk", "script": "cyrillic" }, { "name": "cláudio anjos (latin)", "dob": "1961-11-21", "address": "Niue", "label": "negative", "script": "latin" }, { "name": "Mohammad Raad (latin)", "dob": "1955-8-28", "address": "Lebanon", "label": "positive", "script": "latin" }, { "name": "helena domingues (latin)", "dob": "1928-05-06", "address": "Líbano", "label": "negative", "script": "latin" }, { "name": "ضيائي الصالحي (arabic)", "dob": "1930-04-21", "address": "Gambia", "label": "negative", "script": "arabic" }, { "name": "حسین شتابان (arabic)", "dob": "1992-7-20", "address": "Iran", "label": "positive", "script": "arabic" }, { "name": "Amin Shary (latin)", "dob": "1957-8-2", "address": "Lebanon", "label": "positive", "script": "latin" }, { "name": "Zaini ZAKARIA (latin)", "dob": "1967-5-16", "address": "Malaysia", "label": "positive", "script": "latin" }, { "name": "ivan andrade (latin)", "dob": "1965-08-12", "address": "Camarões", "label": "negative", "script": "latin" }, { "name": "valérie marion (latin)", "dob": "1944-06-04", "address": "Haiti", "label": "High Risk", "script": "latin" }, { "name": "jules nicolas (latin)", "dob": "1992-08-04", "address": "Monaco", "label": "High Risk", "script": "latin" }, { "name": "Vyacheslav Rossolay (latin)", "dob": "1981-10-17", "address": "Belarus", "label": "positive", "script": "latin" }, { "name": "diana borges (latin)", "dob": "1963-06-28", "address": "Mozambique", "label": "High Risk", "script": "latin" }, { "name": "afonso henriques (latin)", "dob": "1996-03-19", "address": "Gibraltar", "label": "negative", "script": "latin" } ], "query_template": "Generate 6 variations of {name} ensuring phonetic similarity (100% Medium) and orthographic similarity (30% Light, 40% Medium, 30% Far). Approximately 50% of the total 6 variations should follow these rule-based transformations: Reorder name parts, Remove a random consonant, and Replace spaces with special characters. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 6, "phonetic_similarity": { "Medium": 1.0 }, "orthographic_similarity": { "Light": 0.3, "Medium": 0.4, "Far": 0.3 }, "rule_based": { "rule_percentage": 50, "selected_rules": [ "name_parts_permutations", "remove_random_consonant", "replace_spaces_with_random_special_characters" ], "rule_descriptions": { "name_parts_permutations": "Reorder name parts", "remove_random_consonant": "Remove a random consonant", "replace_spaces_with_random_special_characters": "Replace spaces with special characters" }, "percentage": 50 } } }, { "seed_identities_with_labels": [ { "name": "édouard nicolas (latin)", "dob": "2005-12-12", "address": "Israël", "label": "negative", "script": "latin" }, { "name": "عرفان شمشیری (arabic)", "dob": "1962-11-24", "address": "Iran", "label": "High Risk", "script": "arabic" }, { "name": "Alireza Fatahinojokambari (latin)", "dob": "1980-9-21", "address": "Iran", "label": "positive", "script": "latin" }, { "name": "Laith Al-Khazali (latin)", "dob": "1975-10-14", "address": "Iraq", "label": "positive", "script": "latin" }, { "name": "thérèse noël (latin)", "dob": "1948-07-15", "address": "Andorre", "label": "negative", "script": "latin" }, { "name": "anouk bernard (latin)", "dob": "1991-02-25", "address": "Lithuanie", "label": "negative", "script": "latin" }, { "name": "Александр Бортников (cyrillic)", "dob": "1951-11-15", "address": "Russia", "label": "positive", "script": "cyrillic" }, { "name": "Harun Nasir-Al-Din (latin)", "dob": "1970-6-5", "address": "Turkey", "label": "positive", "script": "latin" }, { "name": "الينا گلپایگانی (arabic)", "dob": "1974-05-23", "address": "Guadeloupe", "label": "negative", "script": "arabic" }, { "name": "xavier royer (latin)", "dob": "1980-03-23", "address": "Burkina Faso", "label": "High Risk", "script": "latin" }, { "name": "Sergey Kravtsov (latin)", "dob": "1974-3-17", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "diane peltier (latin)", "dob": "1979-07-11", "address": "Côte d'Ivoire", "label": "High Risk", "script": "latin" }, { "name": "christine huet (latin)", "dob": "1958-06-14", "address": "Djibouti", "label": "negative", "script": "latin" }, { "name": "scott miller (latin)", "dob": "1954-02-26", "address": "South Africa", "label": "High Risk", "script": "latin" }, { "name": "susanita gracia (latin)", "dob": "1947-09-13", "address": "Venezuela", "label": "High Risk", "script": "latin" } ], "query_template": "Generate 12 variations of {name}, ensuring phonetic similarity (20% Light, 60% Medium, 20% Far) and orthographic similarity (100% Medium). Approximately 51% of the total 12 variations should follow these rule-based transformations: \nAdd a prefix to {name}.\nRemove a suffix from {name}.\nReplace a character in {name} with a similar-sounding one.\nSwap two adjacent characters in {name}.\nAdd or remove an apostrophe in {name}.\n[VALIDATION HINTS]: Apply these rule-based transformations: Reorder name parts. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 12, "phonetic_similarity": { "Light": 0.2, "Medium": 0.6, "Far": 0.2 }, "orthographic_similarity": { "Medium": 1.0 }, "rule_based": { "rule_percentage": 51, "selected_rules": [ "name_parts_permutations" ], "rule_descriptions": { "name_parts_permutations": "Reorder name parts" }, "percentage": 51 } } }, { "seed_identities_with_labels": [ { "name": "marc mace (latin)", "dob": "1965-09-14", "address": "Central African Republic", "label": "High Risk", "script": "latin" }, { "name": "خليل يافع (arabic)", "dob": "1977-05-31", "address": "Equatorial Guinea", "label": "negative", "script": "arabic" }, { "name": "sergio fox (latin)", "dob": "1936-10-26", "address": "Mauritania", "label": "negative", "script": "latin" }, { "name": "Ivan Kondratyev (latin)", "dob": "1996-4-8", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "pía torrijos (latin)", "dob": "1945-12-27", "address": "Bolivia", "label": "High Risk", "script": "latin" }, { "name": "emily bridges (latin)", "dob": "1952-12-29", "address": "Heard Island and McDonald Islands", "label": "negative", "script": "latin" }, { "name": "Виталий Савельев (cyrillic)", "dob": "1954-1-18", "address": "Russia", "label": "positive", "script": "cyrillic" }, { "name": "Valery Pakhnits (latin)", "dob": "1953-1-22", "address": "Ukraine", "label": "positive", "script": "latin" }, { "name": "тамара сергеева (cyrillic)", "dob": "2007-08-29", "address": "Belarus", "label": "High Risk", "script": "cyrillic" }, { "name": "Ping-keung Tang (latin)", "dob": "1965-7-4", "address": "Hong Kong", "label": "positive", "script": "latin" }, { "name": "kenneth howard (latin)", "dob": "1964-11-23", "address": "Cambodia", "label": "negative", "script": "latin" }, { "name": "jessica kerr (latin)", "dob": "1935-11-08", "address": "South Africa", "label": "High Risk", "script": "latin" }, { "name": "martin torres (latin)", "dob": "1932-05-13", "address": "Timor-Leste", "label": "negative", "script": "latin" }, { "name": "Taher Kayali (latin)", "dob": "1960-7-11", "address": "Syria", "label": "positive", "script": "latin" }, { "name": "scott mitchell (latin)", "dob": "1947-03-07", "address": "Namibia", "label": "High Risk", "script": "latin" } ], "query_template": "Generate 14 name variations for {name}, ensuring phonetic similarity with 1.4 Light names sounding like \"{name}\" but with slight deviations in pronunciation, 7 Medium names sounding similar to \"{name}\" but slightly altered, and 6 Far names sounding very different from \"{name}\", as well as orthographic similarity with 2.8 Light names having minor visual spelling changes, 8.4 Medium names with moderate visual changes, and 3 Far names with significant visual differences. Approximately 3 of the total 14 variations should follow these rule-based transformations: Additionally, generate variations that: Swap adjacent syllables, resulting in names like \"{name}a\" or \"{name}i\".\n[VALIDATION HINTS]: Phonetic similarity: 10% Light.; Orthographic similarity: 20% Light.; Approximately 21% of the variations should follow rule-based transformations. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 14, "phonetic_similarity": { "Light": 0.1, "Medium": 0.5, "Far": 0.4 }, "orthographic_similarity": { "Light": 0.2, "Medium": 0.6, "Far": 0.2 }, "rule_based": { "rule_percentage": 21, "selected_rules": [ "swap_adjacent_syllables" ], "rule_descriptions": { "swap_adjacent_syllables": "Swap adjacent syllables" }, "percentage": 21 } } }, { "seed_identities_with_labels": [ { "name": "isabelle letellier (latin)", "dob": "1954-01-17", "address": "Côte d'Ivoire", "label": "High Risk", "script": "latin" }, { "name": "yves payet (latin)", "dob": "1997-01-02", "address": "Monaco", "label": "High Risk", "script": "latin" }, { "name": "Irina Bubnova (latin)", "dob": "1983-4-1", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "Laith Al-Khazali (latin)", "dob": "1975-10-14", "address": "Iraq", "label": "positive", "script": "latin" }, { "name": "ванухи катъров (cyrillic)", "dob": "1957-10-19", "address": "Bulgaria", "label": "High Risk", "script": "cyrillic" }, { "name": "éric lefebvre (latin)", "dob": "1988-05-03", "address": "Zaïre", "label": "negative", "script": "latin" }, { "name": "قائد المشاولة (arabic)", "dob": "1929-12-28", "address": "Spain", "label": "negative", "script": "arabic" }, { "name": "juliette georges (latin)", "dob": "1951-01-01", "address": "Ouzbékistan", "label": "negative", "script": "latin" }, { "name": "alain costa (latin)", "dob": "1948-04-02", "address": "Moldavie", "label": "negative", "script": "latin" }, { "name": "Nikolay Levichev (latin)", "dob": "1953-5-28", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "dawn clark (latin)", "dob": "1990-02-22", "address": "British Virgin Islands", "label": "High Risk", "script": "latin" }, { "name": "marine monnier (latin)", "dob": "1950-12-17", "address": "Hongrie", "label": "negative", "script": "latin" }, { "name": "Илья Вольфсон (cyrillic)", "dob": "1981-6-8", "address": "Russia", "label": "positive", "script": "cyrillic" }, { "name": "frédéric lefèvre (latin)", "dob": "1956-04-03", "address": "Côte d'Ivoire", "label": "High Risk", "script": "latin" }, { "name": "Ratka Kamceva (latin)", "dob": "1945-10-16", "address": "North Macedonia, The Republic of", "label": "positive", "script": "latin" } ], "query_template": "Generate 11 execution vectors for {name}, ensuring phonetic similarity (10% Light, 50% Medium, 40% Far) and orthographic similarity (50% Light, 50% Medium). Approximately 15% of the total 11 variations should follow these rule-based transformations: Additionally, generate variations that perform these transformations: Convert name to initials, and Replace double letters with a single letter. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 11, "phonetic_similarity": { "Light": 0.1, "Medium": 0.5, "Far": 0.4 }, "orthographic_similarity": { "Light": 0.5, "Medium": 0.5 }, "rule_based": { "rule_percentage": 15, "selected_rules": [ "shorten_name_to_initials", "replace_double_letters_with_single_letter" ], "rule_descriptions": { "shorten_name_to_initials": "Convert name to initials", "replace_double_letters_with_single_letter": "Replace double letters with a single letter" }, "percentage": 15 } } }, { "seed_identities_with_labels": [ { "name": "laurent blanchet (latin)", "dob": "1993-09-26", "address": "Canada", "label": "negative", "script": "latin" }, { "name": "anne gonzalez (latin)", "dob": "1959-04-04", "address": "Russie", "label": "negative", "script": "latin" }, { "name": "николай медведев (cyrillic)", "dob": "1951-03-25", "address": "Luhansk", "label": "High Risk", "script": "cyrillic" }, { "name": "carrie beck (latin)", "dob": "2002-03-09", "address": "South Sudan", "label": "High Risk", "script": "latin" }, { "name": "anouk lesage (latin)", "dob": "1963-07-24", "address": "Pays-Bas", "label": "negative", "script": "latin" }, { "name": "Vladimir Uyba (latin)", "dob": "1958-10-4", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "Svetlana Zamlelova (latin)", "dob": "1973-8-22", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "hannah oneal (latin)", "dob": "1969-03-07", "address": "Nigeria", "label": "High Risk", "script": "latin" }, { "name": "denis faure (latin)", "dob": "1970-12-27", "address": "Cameroon", "label": "High Risk", "script": "latin" }, { "name": "Sergey Kozlov (latin)", "dob": "1960-5-31", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "владлен чернова (cyrillic)", "dob": "1948-09-14", "address": "Colombia", "label": "negative", "script": "cyrillic" }, { "name": "rémy valette (latin)", "dob": "1987-10-10", "address": "Central African Republic", "label": "High Risk", "script": "latin" }, { "name": "Mikhail Afanasov (latin)", "dob": "1953-6-15", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "cécile ribeiro (latin)", "dob": "1975-10-19", "address": "Albanie", "label": "negative", "script": "latin" }, { "name": "Сергей Алтухов (cyrillic)", "dob": "1982-2-23", "address": "Russia", "label": "positive", "script": "cyrillic" } ], "query_template": "Generate exactly 7 execution vectors for each target identity {name}, ensuring phonetic similarity (30% Light variations with suffixes such as \"jr\", \"sr\", \"iii\" or similar, 40% Medium variations with suffixes like \"son\", \"da\", etc., and 30% Far variations with completely different sounds) and orthographic similarity (50% Light variations with visual changes like adding/removing diacritics, 50% Medium variations with visual changes like swapping letters or numbers). Approximately 27% of the total 7 variations should follow these rule-based transformations: Duplicate a random letter, Replace double letters with a single letter, and Add a title prefix (Mr., Dr., etc.). The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 7, "phonetic_similarity": { "Light": 0.3, "Medium": 0.4, "Far": 0.3 }, "orthographic_similarity": { "Light": 0.5, "Medium": 0.5 }, "rule_based": { "rule_percentage": 27, "selected_rules": [ "duplicate_random_letter_as_double_letter", "replace_double_letters_with_single_letter", "add_random_leading_title" ], "rule_descriptions": { "duplicate_random_letter_as_double_letter": "Duplicate a random letter", "replace_double_letters_with_single_letter": "Replace double letters with a single letter", "add_random_leading_title": "Add a title prefix (Mr., Dr., etc.)" }, "percentage": 27 } } }, { "seed_identities_with_labels": [ { "name": "وليد الحجار (arabic)", "dob": "1943-10-14", "address": "Saudi Arabia", "label": "negative", "script": "arabic" }, { "name": "bertrand jacques (latin)", "dob": "1979-03-18", "address": "Burkina Faso", "label": "High Risk", "script": "latin" }, { "name": "Yuri Shvytkin (latin)", "dob": "1965-5-24", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "Дмитрий Песков (cyrillic)", "dob": "1967-10-17", "address": "Russia", "label": "positive", "script": "cyrillic" }, { "name": "capucine rodriguez (latin)", "dob": "2006-05-22", "address": "Maurice", "label": "negative", "script": "latin" }, { "name": "christelle traore (latin)", "dob": "1955-01-17", "address": "Côte d'Ivoire", "label": "High Risk", "script": "latin" }, { "name": "Dzmitryri Braim (latin)", "dob": "1976-4-18", "address": "Belarus", "label": "positive", "script": "latin" }, { "name": "gabriel labbé (latin)", "dob": "1936-12-07", "address": "Pays-Bas", "label": "negative", "script": "latin" }, { "name": "nicolas roger (latin)", "dob": "1968-04-07", "address": "Îles Mineures Éloignées des États-Unis", "label": "negative", "script": "latin" }, { "name": "беро келешев (cyrillic)", "dob": "1979-10-01", "address": "Bulgaria", "label": "High Risk", "script": "cyrillic" }, { "name": "Aleksey MOZGOVOY (latin)", "dob": "1975-4-3", "address": "Ukraine", "label": "positive", "script": "latin" }, { "name": "philippe boutin (latin)", "dob": "1983-05-11", "address": "Democratic Republic of Congo", "label": "High Risk", "script": "latin" }, { "name": "madeleine lejeune (latin)", "dob": "1928-04-16", "address": "Guyane", "label": "negative", "script": "latin" }, { "name": "azeneth rico (latin)", "dob": "1982-05-11", "address": "Bolivia", "label": "High Risk", "script": "latin" }, { "name": "Jury Gotsanyuk (latin)", "dob": "1966-7-18", "address": "Ukraine", "label": "positive", "script": "latin" } ], "query_template": "The following name is the seed name to generate variations for: {name}. Generate 10 variations of the name {name}, ensuring phonetic similarity: 70% Light, 30% Medium, and orthographic similarity: 10% Light, 30% Medium, 60% Far, and also include 25% of variations that follow: Additionally, generate variations that perform these transformations: Delete a random letter, Use first name initial with last name, and Swap random adjacent letters. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 10, "phonetic_similarity": { "Light": 0.7, "Medium": 0.3 }, "orthographic_similarity": { "Light": 0.1, "Medium": 0.3, "Far": 0.6 }, "rule_based": { "rule_percentage": 25, "selected_rules": [ "delete_random_letter", "initial_only_first_name", "swap_random_letter" ], "rule_descriptions": { "delete_random_letter": "Delete a random letter", "initial_only_first_name": "Use first name initial with last name", "swap_random_letter": "Swap random adjacent letters" }, "percentage": 25 } } }, { "seed_identities_with_labels": [ { "name": "Andrey Gurulev (latin)", "dob": "1967-10-16", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "christine rocha (latin)", "dob": "1933-04-13", "address": "British Virgin Islands", "label": "High Risk", "script": "latin" }, { "name": "гико овнарски (cyrillic)", "dob": "1938-01-23", "address": "Falkland Islands (Malvinas)", "label": "negative", "script": "cyrillic" }, { "name": "eric gilbert (latin)", "dob": "2003-02-18", "address": "Gabon", "label": "negative", "script": "latin" }, { "name": "elijah lee (latin)", "dob": "1997-05-17", "address": "Bangladesh", "label": "negative", "script": "latin" }, { "name": "joseph bender (latin)", "dob": "1939-07-30", "address": "South Sudan", "label": "High Risk", "script": "latin" }, { "name": "Yuri Nabokov (latin)", "dob": "1981-1-1", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "Aleksei Chekunkov (latin)", "dob": "1980-10-3", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "ariel ribera (latin)", "dob": "1954-04-11", "address": "Cuba", "label": "High Risk", "script": "latin" }, { "name": "ярополк веселов (cyrillic)", "dob": "1930-02-23", "address": "Luhansk", "label": "High Risk", "script": "cyrillic" }, { "name": "lauren holden (latin)", "dob": "1942-06-27", "address": "Myanmar", "label": "negative", "script": "latin" }, { "name": "Yuri Karayev (latin)", "dob": "1966-6-21", "address": "Belarus", "label": "positive", "script": "latin" }, { "name": "sarah robinson (latin)", "dob": "1932-01-23", "address": "Nigeria", "label": "High Risk", "script": "latin" }, { "name": "heidi sweeney (latin)", "dob": "1980-12-17", "address": "Christmas Island", "label": "negative", "script": "latin" }, { "name": "Иван Заворотный (cyrillic)", "dob": "1979-10-22", "address": "Russia", "label": "positive", "script": "cyrillic" } ], "query_template": "The following name is the seed name to generate variations for: {name}. Generate 12 variations of the name {name}, ensuring phonetic similarity: 10% Light, 50% Medium, 40% Far, and orthographic similarity: 30% Light, 40% Medium, 30% Far, and also include 41% of variations that follow: Additionally, generate variations that perform these transformations: Swap adjacent syllables, Remove all spaces, and Reorder name parts. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 12, "phonetic_similarity": { "Light": 0.1, "Medium": 0.5, "Far": 0.4 }, "orthographic_similarity": { "Light": 0.3, "Medium": 0.4, "Far": 0.3 }, "rule_based": { "rule_percentage": 41, "selected_rules": [ "swap_adjacent_syllables", "remove_all_spaces", "name_parts_permutations" ], "rule_descriptions": { "swap_adjacent_syllables": "Swap adjacent syllables", "remove_all_spaces": "Remove all spaces", "name_parts_permutations": "Reorder name parts" }, "percentage": 41 } } }, { "seed_identities_with_labels": [ { "name": "مُتيّم مهنا (arabic)", "dob": "1992-04-17", "address": "Albania", "label": "negative", "script": "arabic" }, { "name": "jacques tessier (latin)", "dob": "1970-12-17", "address": "Guatemala", "label": "negative", "script": "latin" }, { "name": "Imre Laszloczki (latin)", "dob": "1961-9-26", "address": "Hungary", "label": "positive", "script": "latin" }, { "name": "Maulana Ubaidullah (latin)", "dob": "1985-1-31", "address": "Afghanistan", "label": "positive", "script": "latin" }, { "name": "Алексей СИМАНОВСКИЙ (cyrillic)", "dob": "1955-9-6", "address": "Russia", "label": "positive", "script": "cyrillic" }, { "name": "juliette dubois (latin)", "dob": "2005-12-09", "address": "Democratic Republic of Congo", "label": "High Risk", "script": "latin" }, { "name": "Boris Kovalchuk (latin)", "dob": "1977-12-1", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "امیررضا مجتهدی (arabic)", "dob": "1925-05-16", "address": "Iran", "label": "High Risk", "script": "arabic" }, { "name": "laurent perret (latin)", "dob": "1953-03-08", "address": "Rép. Dém. du Congo", "label": "negative", "script": "latin" }, { "name": "sara vasquez (latin)", "dob": "1982-07-15", "address": "South Sudan", "label": "High Risk", "script": "latin" }, { "name": "cécile bousquet (latin)", "dob": "2002-03-06", "address": "République centrafricaine", "label": "negative", "script": "latin" }, { "name": "zacharie cousin (latin)", "dob": "1967-07-25", "address": "Mali", "label": "High Risk", "script": "latin" }, { "name": "Il Cho (latin)", "dob": "1945-5-10", "address": "Korea, North", "label": "positive", "script": "latin" }, { "name": "márcio moreira (latin)", "dob": "1956-08-31", "address": "Angola", "label": "High Risk", "script": "latin" }, { "name": "guillaume perrier (latin)", "dob": "1950-07-02", "address": "Bosnie-Herzégovine", "label": "negative", "script": "latin" } ], "query_template": "Generate 13 variations of {name} ensuring phonetic similarity (30% Light, 40% Medium, 30% Far) and orthographic similarity (10% Light, 30% Medium, 60% Far). Approximately 47% of the total 13 variations should follow these rule-based transformations: Additionally, generate variations that: Insert a random letter in each name variation, specifically after every character with a 20% chance, before every character with a 25% chance, and at the beginning and end of the name with equal probability. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 13, "phonetic_similarity": { "Light": 0.3, "Medium": 0.4, "Far": 0.3 }, "orthographic_similarity": { "Light": 0.1, "Medium": 0.3, "Far": 0.6 }, "rule_based": { "rule_percentage": 47, "selected_rules": [ "insert_random_letter" ], "rule_descriptions": { "insert_random_letter": "Insert a random letter" }, "percentage": 47 } } }, { "seed_identities_with_labels": [ { "name": "eric potter (latin)", "dob": "1996-03-12", "address": "Nigeria", "label": "High Risk", "script": "latin" }, { "name": "lúcia fonseca (latin)", "dob": "1971-05-19", "address": "Emiratos Árabes Unidos", "label": "negative", "script": "latin" }, { "name": "Steven Liu (latin)", "dob": "1984-11-13", "address": "China", "label": "positive", "script": "latin" }, { "name": "hugo anjos (latin)", "dob": "1991-05-12", "address": "Angola", "label": "High Risk", "script": "latin" }, { "name": "Анна Суровикина (cyrillic)", "dob": "1973-7-4", "address": "Russia", "label": "positive", "script": "cyrillic" }, { "name": "lia carvalho (latin)", "dob": "1934-04-24", "address": "Spratly Islands", "label": "negative", "script": "latin" }, { "name": "Aleksei Gnedovskii (latin)", "dob": "1964-12-31", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "Boris Vologdin (latin)", "dob": "1955-7-22", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "renata ribeiro (latin)", "dob": "1957-09-08", "address": "Vaticano", "label": "negative", "script": "latin" }, { "name": "evan campbell (latin)", "dob": "1963-09-20", "address": "Nigeria", "label": "High Risk", "script": "latin" }, { "name": "'Abdallah AL-JAMAL (latin)", "dob": "1997-2-2", "address": "Yemen", "label": "positive", "script": "latin" }, { "name": "mário simões (latin)", "dob": "1993-03-29", "address": "Mozambique", "label": "High Risk", "script": "latin" }, { "name": "jéssica lima (latin)", "dob": "1951-07-18", "address": "Bolívia", "label": "negative", "script": "latin" }, { "name": "رجاء العوالق (arabic)", "dob": "1926-10-19", "address": "Algeria", "label": "High Risk", "script": "arabic" }, { "name": "октябрина ильина (cyrillic)", "dob": "1956-02-26", "address": "United States of America", "label": "negative", "script": "cyrillic" } ], "query_template": "Generate 13 variations of {name} ensuring phonetic similarity (10% Light, 30% Medium, 60% Far) and orthographic similarity (30% Light, 40% Medium, 30% Far). Approximately 43% of the total 13 variations should follow these rule-based transformations: Remove all spaces from {name}, Replace one or more spaces in {name} with a hyphen, Replace one or more spaces in {name} with an underscore, Replace one or more spaces in {name} with a period. Additionally, generate variations that perform these transformations: Remove all spaces from {name}, and Replace spaces with special characters such as @, #, $, etc. in {name}. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 13, "phonetic_similarity": { "Light": 0.1, "Medium": 0.3, "Far": 0.6 }, "orthographic_similarity": { "Light": 0.3, "Medium": 0.4, "Far": 0.3 }, "rule_based": { "rule_percentage": 43, "selected_rules": [ "remove_all_spaces", "replace_spaces_with_random_special_characters" ], "rule_descriptions": { "remove_all_spaces": "Remove all spaces", "replace_spaces_with_random_special_characters": "Replace spaces with special characters" }, "percentage": 43 } } }, { "seed_identities_with_labels": [ { "name": "Abualfazl Nazeri (latin)", "dob": "1969-9-14", "address": "Iran", "label": "positive", "script": "latin" }, { "name": "morgan lewis (latin)", "dob": "1968-02-04", "address": "Swaziland", "label": "negative", "script": "latin" }, { "name": "Vladimir Turin (latin)", "dob": "1958-12-20", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "michelle payne (latin)", "dob": "1935-12-22", "address": "Burundi", "label": "negative", "script": "latin" }, { "name": "sherri barnes (latin)", "dob": "1938-04-02", "address": "Namibia", "label": "High Risk", "script": "latin" }, { "name": "بشير منصور (arabic)", "dob": "1980-2-9", "address": "Lebanon", "label": "positive", "script": "arabic" }, { "name": "трофим самсонов (cyrillic)", "dob": "1988-07-06", "address": "Armenia", "label": "High Risk", "script": "cyrillic" }, { "name": "ратмир кудряшов (cyrillic)", "dob": "2003-01-14", "address": "Jamaica", "label": "negative", "script": "cyrillic" }, { "name": "Lilia Rotenberg (latin)", "dob": "1978-4-17", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "william jenkins (latin)", "dob": "1988-07-20", "address": "Aruba", "label": "negative", "script": "latin" }, { "name": "marine nguyen (latin)", "dob": "1931-04-17", "address": "Mali", "label": "High Risk", "script": "latin" }, { "name": "Valery GABRIEL (latin)", "dob": "1956-1-3", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "angela miller (latin)", "dob": "1998-11-10", "address": "Nigeria", "label": "High Risk", "script": "latin" }, { "name": "katie mclaughlin (latin)", "dob": "1941-02-08", "address": "Denmark", "label": "negative", "script": "latin" }, { "name": "michel paris (latin)", "dob": "1943-08-26", "address": "Democratic Republic of Congo", "label": "High Risk", "script": "latin" } ], "query_template": "Generate exactly 14 variations of {name}, ensuring phonetic similarity (50% Light, 50% Medium) and orthographic similarity (100% Far). Approximately 32% of the total 14 variations should follow these rule-based transformations: Additionally, generate variations that: Replace random vowels with different vowels. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 14, "phonetic_similarity": { "Light": 0.5, "Medium": 0.5 }, "orthographic_similarity": { "Far": 1.0 }, "rule_based": { "rule_percentage": 32, "selected_rules": [ "replace_random_vowel_with_random_vowel" ], "rule_descriptions": { "replace_random_vowel_with_random_vowel": "Replace random vowels with different vowels" }, "percentage": 32 } } }, { "seed_identities_with_labels": [ { "name": "petrona cabrera (latin)", "dob": "1937-08-21", "address": "Venezuela", "label": "High Risk", "script": "latin" }, { "name": "ashley jones (latin)", "dob": "1977-06-23", "address": "Australia", "label": "negative", "script": "latin" }, { "name": "kyle lewis (latin)", "dob": "1965-07-14", "address": "Botswana", "label": "negative", "script": "latin" }, { "name": "أحلام الخرافي (arabic)", "dob": "1990-03-01", "address": "Guinea", "label": "negative", "script": "arabic" }, { "name": "dorothy boyd (latin)", "dob": "1993-04-29", "address": "Nigeria", "label": "High Risk", "script": "latin" }, { "name": "andré laroche (latin)", "dob": "2006-01-02", "address": "Haiti", "label": "High Risk", "script": "latin" }, { "name": "olivier pottier (latin)", "dob": "1930-02-21", "address": "Cameroon", "label": "High Risk", "script": "latin" }, { "name": "Nikolay Kosov (latin)", "dob": "1955-6-30", "address": "Hungary", "label": "positive", "script": "latin" }, { "name": "مشرق بنو مهدي (arabic)", "dob": "1945-05-09", "address": "Somalia", "label": "High Risk", "script": "arabic" }, { "name": "Song Jong (latin)", "dob": "1972-11-15", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "christopher garcia (latin)", "dob": "1992-07-28", "address": "Malaysia", "label": "negative", "script": "latin" }, { "name": "Павло Крилло (cyrillic)", "dob": "1981-12-1", "address": "Ukraine", "label": "positive", "script": "cyrillic" }, { "name": "zachary stafford (latin)", "dob": "1957-02-18", "address": "Antarctica (the territory South of 60 deg S)", "label": "negative", "script": "latin" }, { "name": "Nelli Parutenko (latin)", "dob": "1962-3-21", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "Andrey Ivanov (latin)", "dob": "1983-4-13", "address": "Russia", "label": "positive", "script": "latin" } ], "query_template": "Generate exactly 14 variations of {name}. ensuring phonetic similarity (20% Light, 60% Medium, 20% Far) and orthographic similarity (100% Medium). Approximately 58% of the total 14 variations should follow these rule-based transformations: Additionally, generate variations that perform these transformations: Convert name to initials, Abbreviate name parts, and Replace random vowels with different vowels. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 14, "phonetic_similarity": { "Light": 0.2, "Medium": 0.6, "Far": 0.2 }, "orthographic_similarity": { "Medium": 1.0 }, "rule_based": { "rule_percentage": 58, "selected_rules": [ "shorten_name_to_initials", "shorten_name_to_abbreviations", "replace_random_vowel_with_random_vowel" ], "rule_descriptions": { "shorten_name_to_initials": "Convert name to initials", "shorten_name_to_abbreviations": "Abbreviate name parts", "replace_random_vowel_with_random_vowel": "Replace random vowels with different vowels" }, "percentage": 58 } } }, { "seed_identities_with_labels": [ { "name": "salomé morais (latin)", "dob": "1953-04-14", "address": "Santa Helena", "label": "negative", "script": "latin" }, { "name": "Sergey Omelnitskii (latin)", "dob": "1980-6-2", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "michelle delannoy (latin)", "dob": "2004-11-10", "address": "Côte d'Ivoire", "label": "High Risk", "script": "latin" }, { "name": "evaristo arranz (latin)", "dob": "1956-11-20", "address": "Nicaragua", "label": "High Risk", "script": "latin" }, { "name": "قحطان راجح (arabic)", "dob": "1951-02-03", "address": "Christmas Island", "label": "negative", "script": "arabic" }, { "name": "melissa jarvis (latin)", "dob": "1930-04-04", "address": "Nigeria", "label": "High Risk", "script": "latin" }, { "name": "Alexander Kurenkov (latin)", "dob": "1972-6-2", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "Виктор Николаев (cyrillic)", "dob": "1982-4-16", "address": "Russia", "label": "positive", "script": "cyrillic" }, { "name": "césar moura (latin)", "dob": "1939-09-04", "address": "Antilhas Neerlandesas", "label": "negative", "script": "latin" }, { "name": "Elena Drapeko (latin)", "dob": "1948-10-29", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "mia andrade (latin)", "dob": "1985-02-28", "address": "Omã", "label": "negative", "script": "latin" }, { "name": "Nufail Akbar (latin)", "dob": "1972-3-26", "address": "South Africa", "label": "positive", "script": "latin" }, { "name": "emma simões (latin)", "dob": "1927-08-30", "address": "Micronésia", "label": "negative", "script": "latin" }, { "name": "nádia cruz (latin)", "dob": "1978-06-07", "address": "Mozambique", "label": "High Risk", "script": "latin" }, { "name": "георгий субботин (cyrillic)", "dob": "1938-02-02", "address": "Crimea", "label": "High Risk", "script": "cyrillic" } ], "query_template": "Generate 13 variations of {name}, ensuring phonetic similarity (10% Light, 30% Medium, 60% Far) and orthographic similarity (70% Light, 30% Medium). Approximately 12% of the total 13 variations should follow these rule-based transformations: Additionally, generate variations that perform these transformations: Convert name to initials, and Remove all spaces. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 13, "phonetic_similarity": { "Light": 0.1, "Medium": 0.3, "Far": 0.6 }, "orthographic_similarity": { "Light": 0.7, "Medium": 0.3 }, "rule_based": { "rule_percentage": 12, "selected_rules": [ "shorten_name_to_initials", "remove_all_spaces" ], "rule_descriptions": { "shorten_name_to_initials": "Convert name to initials", "remove_all_spaces": "Remove all spaces" }, "percentage": 12 } } }, { "seed_identities_with_labels": [ { "name": "baldomero blasco (latin)", "dob": "1942-01-04", "address": "Guinea Bissau", "label": "negative", "script": "latin" }, { "name": "ruperto carmona (latin)", "dob": "1931-11-16", "address": "Nicaragua", "label": "High Risk", "script": "latin" }, { "name": "alfred baron (latin)", "dob": "1971-10-20", "address": "Cameroon", "label": "High Risk", "script": "latin" }, { "name": "victoria priego (latin)", "dob": "1962-02-16", "address": "Camboya", "label": "negative", "script": "latin" }, { "name": "ДМИТРИЙ МИХАЛЬЦОВ (cyrillic)", "dob": "1979-11-27", "address": "Belarus", "label": "positive", "script": "cyrillic" }, { "name": "Mehdi Lashgarian (latin)", "dob": "1989-6-2", "address": "Iran", "label": "positive", "script": "latin" }, { "name": "Harun Nasir-Al-Din (latin)", "dob": "1970-6-5", "address": "Turkey", "label": "positive", "script": "latin" }, { "name": "مَسعد شاهين (arabic)", "dob": "1933-11-09", "address": "United States Virgin Islands", "label": "negative", "script": "arabic" }, { "name": "vilma bermúdez (latin)", "dob": "1950-10-30", "address": "Swazilandia", "label": "negative", "script": "latin" }, { "name": "jamie cruz (latin)", "dob": "1998-06-26", "address": "South Africa", "label": "High Risk", "script": "latin" }, { "name": "tamara rozas (latin)", "dob": "1957-03-17", "address": "República de Macedonia del Norte", "label": "negative", "script": "latin" }, { "name": "مائده روحانی (arabic)", "dob": "1991-08-03", "address": "Iran", "label": "High Risk", "script": "arabic" }, { "name": "Ahmet DURI (latin)", "dob": "1987-1-12", "address": "Turkey", "label": "positive", "script": "latin" }, { "name": "Andrey Doukhvalov (latin)", "dob": "1957-12-3", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "kaitlyn stevenson (latin)", "dob": "1938-10-25", "address": "British Virgin Islands", "label": "High Risk", "script": "latin" } ], "query_template": "Generate 12 variations of {name} ensuring phonetic similarity (10% Light, 50% Medium, 40% Far) and orthographic similarity (50% Light, 50% Medium). Approximately 51% of the total 12 variations should follow these rule-based transformations: Delete a random letter from {name}, Add a title suffix (Jr., PhD, etc.) to {name} and Abbreviate name parts from {name}. The remaining 49% of variations should be randomly generated phonetic and orthographic similar names. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 12, "phonetic_similarity": { "Light": 0.1, "Medium": 0.5, "Far": 0.4 }, "orthographic_similarity": { "Light": 0.5, "Medium": 0.5 }, "rule_based": { "rule_percentage": 51, "selected_rules": [ "delete_random_letter", "add_random_trailing_title", "shorten_name_to_abbreviations" ], "rule_descriptions": { "delete_random_letter": "Delete a random letter", "add_random_trailing_title": "Add a title suffix (Jr., PhD, etc.)", "shorten_name_to_abbreviations": "Abbreviate name parts" }, "percentage": 51 } } }, { "seed_identities_with_labels": [ { "name": "alexandria gautier (latin)", "dob": "1931-03-06", "address": "Espagne", "label": "negative", "script": "latin" }, { "name": "عبد العليم جزار (arabic)", "dob": "1999-10-17", "address": "Libyan Arab Jamahiriya", "label": "negative", "script": "arabic" }, { "name": "флорентин давыдова (cyrillic)", "dob": "1993-03-19", "address": "Belarus", "label": "High Risk", "script": "cyrillic" }, { "name": "Oleg Krinitsyn (latin)", "dob": "1971-5-12", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "Andrey Ivanov (latin)", "dob": "1983-4-13", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "camille lopez (latin)", "dob": "1928-07-06", "address": "Haiti", "label": "High Risk", "script": "latin" }, { "name": "christophe carpentier (latin)", "dob": "1966-01-06", "address": "Ouganda", "label": "negative", "script": "latin" }, { "name": "alice renaud (latin)", "dob": "1982-06-09", "address": "Malte", "label": "negative", "script": "latin" }, { "name": "michelle weiss (latin)", "dob": "1964-10-31", "address": "Monaco", "label": "High Risk", "script": "latin" }, { "name": "raymond michael (latin)", "dob": "1939-09-14", "address": "British Virgin Islands", "label": "High Risk", "script": "latin" }, { "name": "marcelle sauvage (latin)", "dob": "1966-10-13", "address": "Côte d'Ivoire", "label": "High Risk", "script": "latin" }, { "name": "gilles wagner (latin)", "dob": "1941-07-27", "address": "Sénégal", "label": "negative", "script": "latin" }, { "name": "可成 尹 (chinese)", "dob": "1986-12-8", "address": "China", "label": "positive", "script": "chinese" }, { "name": "Jay PEREZ (latin)", "dob": "1973-9-15", "address": "Philippines", "label": "positive", "script": "latin" }, { "name": "Kia Sadeghi (latin)", "dob": "1986-3-21", "address": "Iran", "label": "positive", "script": "latin" } ], "query_template": "The following name is the seed name to generate variations for: {name}. Generate 6 variations of the name {name}, ensuring phonetic similarity: 20% Light, 60% Medium, 20% Far, and orthographic similarity: 30% Light, 40% Medium, 30% Far, and also include 28% of variations that follow: Additionally, generate variations that perform these transformations: Remove all spaces, Add a title suffix (Jr., PhD, etc.), and Insert a random letter. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 6, "phonetic_similarity": { "Light": 0.2, "Medium": 0.6, "Far": 0.2 }, "orthographic_similarity": { "Light": 0.3, "Medium": 0.4, "Far": 0.3 }, "rule_based": { "rule_percentage": 28, "selected_rules": [ "remove_all_spaces", "add_random_trailing_title", "insert_random_letter" ], "rule_descriptions": { "remove_all_spaces": "Remove all spaces", "add_random_trailing_title": "Add a title suffix (Jr., PhD, etc.)", "insert_random_letter": "Insert a random letter" }, "percentage": 28 } } }, { "seed_identities_with_labels": [ { "name": "Nikolay Kosov (latin)", "dob": "1955-6-30", "address": "Hungary", "label": "positive", "script": "latin" }, { "name": "michelle barbier (latin)", "dob": "1964-04-04", "address": "Japon", "label": "negative", "script": "latin" }, { "name": "melissa ribeiro (latin)", "dob": "1947-01-06", "address": "Angola", "label": "High Risk", "script": "latin" }, { "name": "жанна кононова (cyrillic)", "dob": "1935-09-20", "address": "Luhansk", "label": "High Risk", "script": "cyrillic" }, { "name": "Anatoly Bifov (latin)", "dob": "1963-1-7", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "زينب سمسار (arabic)", "dob": "1972-12-31", "address": "Korea", "label": "negative", "script": "arabic" }, { "name": "Oleksiy MOZGOVY (latin)", "dob": "1975-4-3", "address": "Ukraine", "label": "positive", "script": "latin" }, { "name": "margot letellier (latin)", "dob": "1996-10-21", "address": "Central African Republic", "label": "High Risk", "script": "latin" }, { "name": "Zaini ZAKARIA (latin)", "dob": "1967-5-16", "address": "Malaysia", "label": "positive", "script": "latin" }, { "name": "maggie bodin (latin)", "dob": "1981-06-07", "address": "Monaco", "label": "High Risk", "script": "latin" }, { "name": "Геннадий Казакевич (cyrillic)", "dob": "1975-2-14", "address": "Belarus", "label": "positive", "script": "cyrillic" }, { "name": "reina marqués (latin)", "dob": "1977-10-08", "address": "Bolivia", "label": "High Risk", "script": "latin" }, { "name": "louis alves (latin)", "dob": "2005-04-27", "address": "Paraguay", "label": "negative", "script": "latin" }, { "name": "rémy paul (latin)", "dob": "2000-07-14", "address": "Canada", "label": "negative", "script": "latin" }, { "name": "nicolas chrétien (latin)", "dob": "2005-08-05", "address": "Madagascar", "label": "negative", "script": "latin" } ], "query_template": "Generate 6 execution vectors for {name}, ensuring phonetic similarity with 30% Light, 40% Medium, and 30% Far variations, as well as orthographic similarity with 10% Light, 50% Medium, and 40% Far variations. Approximately 15% of the total 6 variations should follow these rule-based transformations: Additionally, generate variations that perform these transformations: Swap adjacent consonants in {name}, Abbreviate {first name} or {last name}, Remove a random vowel from {name}.\n[VALIDATION HINTS]: Apply these rule-based transformations: Abbreviate name parts. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 6, "phonetic_similarity": { "Light": 0.3, "Medium": 0.4, "Far": 0.3 }, "orthographic_similarity": { "Light": 0.1, "Medium": 0.5, "Far": 0.4 }, "rule_based": { "rule_percentage": 15, "selected_rules": [ "swap_adjacent_consonants", "shorten_name_to_abbreviations", "remove_random_vowel" ], "rule_descriptions": { "swap_adjacent_consonants": "Swap adjacent consonants", "shorten_name_to_abbreviations": "Abbreviate name parts", "remove_random_vowel": "Remove a random vowel" }, "percentage": 15 } } }, { "seed_identities_with_labels": [ { "name": "alexandre masson (latin)", "dob": "1958-03-06", "address": "Djibouti", "label": "negative", "script": "latin" }, { "name": "عتيق أستيتية (arabic)", "dob": "2006-03-06", "address": "Moldova", "label": "negative", "script": "arabic" }, { "name": "Svetlana Emilianova (latin)", "dob": "1971-10-7", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "michel petit (latin)", "dob": "1950-07-29", "address": "Haiti", "label": "High Risk", "script": "latin" }, { "name": "Антон Котяков (cyrillic)", "dob": "1980-8-15", "address": "Russia", "label": "positive", "script": "cyrillic" }, { "name": "marc mendès (latin)", "dob": "1940-05-26", "address": "Central African Republic", "label": "High Risk", "script": "latin" }, { "name": "élise gaudin (latin)", "dob": "1928-02-09", "address": "Somalie", "label": "negative", "script": "latin" }, { "name": "luc garcia (latin)", "dob": "1976-10-06", "address": "Inde", "label": "negative", "script": "latin" }, { "name": "Ezzatullah Sorbani (latin)", "dob": "1967-2-1", "address": "Iran", "label": "positive", "script": "latin" }, { "name": "Aleksei Budnev (latin)", "dob": "1964-7-9", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "Yong Jon (latin)", "dob": "1976-8-25", "address": "Syria", "label": "positive", "script": "latin" }, { "name": "william greene (latin)", "dob": "1927-08-22", "address": "South Africa", "label": "High Risk", "script": "latin" }, { "name": "philippine pelletier (latin)", "dob": "1958-11-23", "address": "Géorgie du Sud et Sandwich du Sud (Îles)", "label": "negative", "script": "latin" }, { "name": "nancy harper (latin)", "dob": "1936-08-19", "address": "Namibia", "label": "High Risk", "script": "latin" }, { "name": "милий никонова (cyrillic)", "dob": "1949-07-25", "address": "Russia", "label": "High Risk", "script": "cyrillic" } ], "query_template": "Generate 13 variations of {name}. Ensuring phonetic similarity (70% Light, 30% Medium) and orthographic similarity (20% Light, 60% Medium, 20% Far). Approximately 41% of the total 13 variations should follow these rule-based transformations: Additionally, generate variations that: Convert name to initials, Replace 'John' with 'Jon', Replace 'Doe' with 'Smith', Remove all vowels from {name}, Swap first and last letters of {name}, Reverse order of {name} words, Keep only first letter of each word in {name}. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 13, "phonetic_similarity": { "Light": 0.7, "Medium": 0.3 }, "orthographic_similarity": { "Light": 0.2, "Medium": 0.6, "Far": 0.2 }, "rule_based": { "rule_percentage": 41, "selected_rules": [ "shorten_name_to_initials" ], "rule_descriptions": { "shorten_name_to_initials": "Convert name to initials" }, "percentage": 41 } } }, { "seed_identities_with_labels": [ { "name": "matias figueiredo (latin)", "dob": "1983-01-24", "address": "República Centro-Africana", "label": "negative", "script": "latin" }, { "name": "Hossein VAZIRI (latin)", "dob": "1961-3-21", "address": "Malaysia", "label": "positive", "script": "latin" }, { "name": "سليمان مهنا (arabic)", "dob": "1972-11-21", "address": "Germany", "label": "negative", "script": "arabic" }, { "name": "frédérique lacombe (latin)", "dob": "1968-04-30", "address": "Côte d'Ivoire", "label": "High Risk", "script": "latin" }, { "name": "Yuri Afonin (latin)", "dob": "1977-3-22", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "федот анисимова (cyrillic)", "dob": "1929-05-17", "address": "Russia", "label": "High Risk", "script": "cyrillic" }, { "name": "Samer Ismail (latin)", "dob": "1980-10-25", "address": "Syria", "label": "positive", "script": "latin" }, { "name": "Aleksandr Mishustin (latin)", "dob": "2000-12-13", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "ivo faria (latin)", "dob": "1978-03-17", "address": "Brunei", "label": "negative", "script": "latin" }, { "name": "joão branco (latin)", "dob": "1982-11-10", "address": "Mozambique", "label": "High Risk", "script": "latin" }, { "name": "Мансур Солтаев (cyrillic)", "dob": "1978-6-13", "address": "Russia", "label": "positive", "script": "cyrillic" }, { "name": "kyara fonseca (latin)", "dob": "1932-11-20", "address": "Síria", "label": "negative", "script": "latin" }, { "name": "alexandra gaspar (latin)", "dob": "1949-07-25", "address": "Mozambique", "label": "High Risk", "script": "latin" }, { "name": "donato mate (latin)", "dob": "1992-08-29", "address": "Venezuela", "label": "High Risk", "script": "latin" }, { "name": "sérgio baptista (latin)", "dob": "2003-08-16", "address": "Hong Kong", "label": "negative", "script": "latin" } ], "query_template": "The following name is the seed name to generate variations for: {name}. Generate 14 variations of the name {name}, ensuring phonetic similarity: 30% Light, 40% Medium, 30% Far, and orthographic similarity: 10% Light, 30% Medium, 60% Far, and also include 34% of variations that follow: Additionally, generate variations that perform these transformations: Add a title suffix (Jr., PhD, etc.), Swap adjacent syllables, and Reorder name parts. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 14, "phonetic_similarity": { "Light": 0.3, "Medium": 0.4, "Far": 0.3 }, "orthographic_similarity": { "Light": 0.1, "Medium": 0.3, "Far": 0.6 }, "rule_based": { "rule_percentage": 34, "selected_rules": [ "add_random_trailing_title", "swap_adjacent_syllables", "name_parts_permutations" ], "rule_descriptions": { "add_random_trailing_title": "Add a title suffix (Jr., PhD, etc.)", "swap_adjacent_syllables": "Swap adjacent syllables", "name_parts_permutations": "Reorder name parts" }, "percentage": 34 } } }, { "seed_identities_with_labels": [ { "name": "Fatemeh Sadeghi (latin)", "dob": "1995-11-28", "address": "Iran", "label": "positive", "script": "latin" }, { "name": "Aleksandr Nikolov (latin)", "dob": "1962-2-19", "address": "Bulgaria", "label": "positive", "script": "latin" }, { "name": "kenneth robbins (latin)", "dob": "1925-04-01", "address": "Lao People's Democratic Republic", "label": "negative", "script": "latin" }, { "name": "شهير الصالحي (arabic)", "dob": "1946-01-28", "address": "Norfolk Island", "label": "negative", "script": "arabic" }, { "name": "Алексей Суходолов (cyrillic)", "dob": "1974-4-19", "address": "Russia", "label": "positive", "script": "cyrillic" }, { "name": "Oleg Krinitsyn (latin)", "dob": "1971-5-12", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "michael jones (latin)", "dob": "1954-12-30", "address": "El Salvador", "label": "negative", "script": "latin" }, { "name": "catherine smith (latin)", "dob": "1972-01-21", "address": "Micronesia", "label": "negative", "script": "latin" }, { "name": "richard cortez (latin)", "dob": "1987-03-15", "address": "Serbia", "label": "negative", "script": "latin" }, { "name": "Yuriy Shevchenko (latin)", "dob": "1966-12-30", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "carminho azevedo (latin)", "dob": "1926-05-02", "address": "Mozambique", "label": "High Risk", "script": "latin" }, { "name": "مهدیس سماوات (arabic)", "dob": "1949-09-10", "address": "Afghanistan", "label": "High Risk", "script": "arabic" }, { "name": "julián barreda (latin)", "dob": "1950-01-25", "address": "Venezuela", "label": "High Risk", "script": "latin" }, { "name": "larry wilcox (latin)", "dob": "1934-12-24", "address": "South Sudan", "label": "High Risk", "script": "latin" }, { "name": "lisa ross (latin)", "dob": "1935-02-06", "address": "South Africa", "label": "High Risk", "script": "latin" } ], "query_template": "Generate 7 variations of {name}. Ensuring phonetic similarity (10% Light, 50% Medium, 40% Far) and orthographic similarity (100% Light). Approximately 21% of the total 7 variations should follow these rule-based transformations: Additionally, generate variations that perform these transformations: Insert a random letter, Swap adjacent consonants, and Delete a random letter. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 7, "phonetic_similarity": { "Light": 0.1, "Medium": 0.5, "Far": 0.4 }, "orthographic_similarity": { "Light": 1.0 }, "rule_based": { "rule_percentage": 21, "selected_rules": [ "insert_random_letter", "swap_adjacent_consonants", "delete_random_letter" ], "rule_descriptions": { "insert_random_letter": "Insert a random letter", "swap_adjacent_consonants": "Swap adjacent consonants", "delete_random_letter": "Delete a random letter" }, "percentage": 21 } } }, { "seed_identities_with_labels": [ { "name": "frédérique poulain (latin)", "dob": "1996-09-26", "address": "Mali", "label": "High Risk", "script": "latin" }, { "name": "anaïs mace (latin)", "dob": "1931-08-31", "address": "Tchad", "label": "negative", "script": "latin" }, { "name": "amador parra (latin)", "dob": "1927-09-27", "address": "Venezuela", "label": "High Risk", "script": "latin" }, { "name": "Leonid Kalashnikov (latin)", "dob": "1960-8-6", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "vincent lecoq (latin)", "dob": "1985-09-14", "address": "Territoires français du sud", "label": "negative", "script": "latin" }, { "name": "Marianne Houwayek (latin)", "dob": "1980-5-14", "address": "Lebanon", "label": "positive", "script": "latin" }, { "name": "pauline roussel (latin)", "dob": "1934-12-15", "address": "Dominique", "label": "negative", "script": "latin" }, { "name": "océane henry (latin)", "dob": "1987-07-08", "address": "Équateur", "label": "negative", "script": "latin" }, { "name": "Svetlana Emilianova (latin)", "dob": "1971-10-7", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "محمدیاسین مجتبوی (arabic)", "dob": "1959-09-05", "address": "Iran", "label": "High Risk", "script": "arabic" }, { "name": "Виктор Кидяев (cyrillic)", "dob": "1956-7-9", "address": "Russia", "label": "positive", "script": "cyrillic" }, { "name": "Omid Noori (latin)", "dob": "1976-3-12", "address": "Iran", "label": "positive", "script": "latin" }, { "name": "véronique fontaine (latin)", "dob": "1965-04-10", "address": "Mali", "label": "High Risk", "script": "latin" }, { "name": "کيانا شبیری (arabic)", "dob": "1943-03-24", "address": "Tanzania", "label": "negative", "script": "arabic" }, { "name": "james tran (latin)", "dob": "1962-04-02", "address": "South Sudan", "label": "High Risk", "script": "latin" } ], "query_template": "Generate exactly 8 execution vectors for each target identity named {name}, ensuring phonetic similarity (20% Light, 60% Medium, 20% Far) and orthographic similarity (20% Light, 60% Medium, 20% Far). Approximately 60% of the total 8 variations should follow these rule-based transformations: Additionally, generate variations that perform these transformations for {name}: Reorder first name and last name parts, Abbreviate first and middle names to initials, Reorder middle name and last name parts, and Abbreviate last name to acronym. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 8, "phonetic_similarity": { "Light": 0.2, "Medium": 0.6, "Far": 0.2 }, "orthographic_similarity": { "Light": 0.2, "Medium": 0.6, "Far": 0.2 }, "rule_based": { "rule_percentage": 60, "selected_rules": [ "name_parts_permutations", "shorten_name_to_abbreviations" ], "rule_descriptions": { "name_parts_permutations": "Reorder name parts", "shorten_name_to_abbreviations": "Abbreviate name parts" }, "percentage": 60 } } }, { "seed_identities_with_labels": [ { "name": "ياسين رسولی (arabic)", "dob": "1984-03-06", "address": "Afghanistan", "label": "High Risk", "script": "arabic" }, { "name": "julien ferreira (latin)", "dob": "2006-12-22", "address": "Géorgie du Sud et Sandwich du Sud (Îles)", "label": "negative", "script": "latin" }, { "name": "'Ali al-Shufa (latin)", "dob": "1991-7-25", "address": "Iran", "label": "positive", "script": "latin" }, { "name": "Oleg Smolin (latin)", "dob": "1952-2-10", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "Игорь Гутник (cyrillic)", "dob": "1974-12-17", "address": "Belarus", "label": "positive", "script": "cyrillic" }, { "name": "bertrand faivre (latin)", "dob": "2001-09-18", "address": "Monaco", "label": "High Risk", "script": "latin" }, { "name": "янина вампирска (cyrillic)", "dob": "1944-07-29", "address": "French Polynesia", "label": "negative", "script": "cyrillic" }, { "name": "valérie leclerc (latin)", "dob": "2003-04-06", "address": "Saint-Marin (Rép. de)", "label": "negative", "script": "latin" }, { "name": "claude lefebvre (latin)", "dob": "2007-06-02", "address": "Wallis et Futuna (Îles)", "label": "negative", "script": "latin" }, { "name": "noël perrot (latin)", "dob": "1937-04-09", "address": "Cameroon", "label": "High Risk", "script": "latin" }, { "name": "Muhammad Adhiguna (latin)", "dob": "1996-7-30", "address": "Turkey", "label": "positive", "script": "latin" }, { "name": "juan pablo acero (latin)", "dob": "1965-03-11", "address": "Venezuela", "label": "High Risk", "script": "latin" }, { "name": "margot martins (latin)", "dob": "1951-10-01", "address": "Aruba", "label": "negative", "script": "latin" }, { "name": "lorraine imbert (latin)", "dob": "1994-03-24", "address": "Democratic Republic of Congo", "label": "High Risk", "script": "latin" }, { "name": "Zajar KALASHOV (latin)", "dob": "1953-3-20", "address": "Russia", "label": "positive", "script": "latin" } ], "query_template": "Generate 8 variations of {name} ensuring phonetic similarity (50% Light, 50% Medium) and orthographic similarity (10% Light, 50% Medium, 40% Far). Approximately 13% of the total 8 variations should follow these rule-based transformations: Additionally, generate variations that reorder name parts. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 8, "phonetic_similarity": { "Light": 0.5, "Medium": 0.5 }, "orthographic_similarity": { "Light": 0.1, "Medium": 0.5, "Far": 0.4 }, "rule_based": { "rule_percentage": 13, "selected_rules": [ "name_parts_permutations" ], "rule_descriptions": { "name_parts_permutations": "Reorder name parts" }, "percentage": 13 } } }, { "seed_identities_with_labels": [ { "name": "Amin Shary (latin)", "dob": "1957-8-2", "address": "Lebanon", "label": "positive", "script": "latin" }, { "name": "Роман Старовойт (cyrillic)", "dob": "1972-1-20", "address": "Russia", "label": "positive", "script": "cyrillic" }, { "name": "kaitlyn mcneil (latin)", "dob": "1990-08-19", "address": "Nigeria", "label": "High Risk", "script": "latin" }, { "name": "laurence boulanger (latin)", "dob": "1954-05-09", "address": "Kiribati", "label": "negative", "script": "latin" }, { "name": "samuel james (latin)", "dob": "2005-12-18", "address": "South Africa", "label": "High Risk", "script": "latin" }, { "name": "Abdullah Faisal (latin)", "dob": "1963-9-10", "address": "Jamaica", "label": "positive", "script": "latin" }, { "name": "дакота щърбов (cyrillic)", "dob": "1929-04-22", "address": "Bulgaria", "label": "High Risk", "script": "cyrillic" }, { "name": "Aleksey MOZGOVOY (latin)", "dob": "1975-4-3", "address": "Ukraine", "label": "positive", "script": "latin" }, { "name": "nicole lambert (latin)", "dob": "1975-03-31", "address": "Mongolie", "label": "negative", "script": "latin" }, { "name": "zoé cousin (latin)", "dob": "2006-06-26", "address": "Seychelles", "label": "negative", "script": "latin" }, { "name": "pierre daniel (latin)", "dob": "1932-02-26", "address": "Cameroon", "label": "High Risk", "script": "latin" }, { "name": "اميرمحمد ظفری (arabic)", "dob": "1950-08-18", "address": "Burundi", "label": "negative", "script": "arabic" }, { "name": "mathilde ruiz (latin)", "dob": "1963-09-28", "address": "Haiti", "label": "High Risk", "script": "latin" }, { "name": "guillaume cohen (latin)", "dob": "1983-02-25", "address": "Érythrée", "label": "negative", "script": "latin" }, { "name": "Abu Salih (latin)", "dob": "1983-12-13", "address": "Syria", "label": "positive", "script": "latin" } ], "query_template": "The following name is the seed name to generate variations for: {name}. Generate 11 variations of the name {name}, ensuring phonetic similarity: 30% Light, 40% Medium, 30% Far, and orthographic similarity: 70% Light, 30% Medium, and also include 34% of variations that follow: Additionally, generate variations that perform these transformations: Use first name initial with last name, Swap adjacent syllables, and Reorder name parts. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 11, "phonetic_similarity": { "Light": 0.3, "Medium": 0.4, "Far": 0.3 }, "orthographic_similarity": { "Light": 0.7, "Medium": 0.3 }, "rule_based": { "rule_percentage": 34, "selected_rules": [ "initial_only_first_name", "swap_adjacent_syllables", "name_parts_permutations" ], "rule_descriptions": { "initial_only_first_name": "Use first name initial with last name", "swap_adjacent_syllables": "Swap adjacent syllables", "name_parts_permutations": "Reorder name parts" }, "percentage": 34 } } }, { "seed_identities_with_labels": [ { "name": "Bushra Shawkat (latin)", "dob": "1960-10-24", "address": "United Arab Emirates", "label": "positive", "script": "latin" }, { "name": "steven smith (latin)", "dob": "1993-03-04", "address": "Kuwait", "label": "negative", "script": "latin" }, { "name": "ariel zavala (latin)", "dob": "1942-08-14", "address": "South Sudan", "label": "High Risk", "script": "latin" }, { "name": "justin lane (latin)", "dob": "1972-10-22", "address": "Saint Helena", "label": "negative", "script": "latin" }, { "name": "Reuben LAVILLA (latin)", "dob": "1972-10-4", "address": "Philippines", "label": "positive", "script": "latin" }, { "name": "anita vicente (latin)", "dob": "1980-12-28", "address": "Mozambique", "label": "High Risk", "script": "latin" }, { "name": "едвин муев (cyrillic)", "dob": "1968-02-26", "address": "Bulgaria", "label": "High Risk", "script": "cyrillic" }, { "name": "shawn rowe (latin)", "dob": "1997-10-19", "address": "Dominican Republic", "label": "negative", "script": "latin" }, { "name": "Zelimir Petrovic (latin)", "dob": "1981-9-1", "address": "Serbia", "label": "positive", "script": "latin" }, { "name": "marissa stevenson (latin)", "dob": "1971-12-11", "address": "Namibia", "label": "High Risk", "script": "latin" }, { "name": "Wei Zhang (latin)", "dob": "1977-6-16", "address": "China", "label": "positive", "script": "latin" }, { "name": "lucas rojas (latin)", "dob": "1927-05-14", "address": "Bolivia", "label": "High Risk", "script": "latin" }, { "name": "sherry johnson (latin)", "dob": "1937-07-14", "address": "Canada", "label": "negative", "script": "latin" }, { "name": "قانت بنو معقل (arabic)", "dob": "1999-02-06", "address": "Uganda", "label": "negative", "script": "arabic" }, { "name": "Виктор Игнатов (cyrillic)", "dob": "1968-10-15", "address": "Russia", "label": "positive", "script": "cyrillic" } ], "query_template": "Generate exactly 10 execution vectors for each target identity {name}, ensuring phonetic similarity (30% Light, 40% Medium, 30% Far) and orthographic similarity (70% Light, 30% Medium). Approximately 25% of the total 10 variations should follow these rule-based transformations: Replace double letters with a single letter, and Swap adjacent syllables. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 10, "phonetic_similarity": { "Light": 0.3, "Medium": 0.4, "Far": 0.3 }, "orthographic_similarity": { "Light": 0.7, "Medium": 0.3 }, "rule_based": { "rule_percentage": 25, "selected_rules": [ "replace_double_letters_with_single_letter", "swap_adjacent_syllables" ], "rule_descriptions": { "replace_double_letters_with_single_letter": "Replace double letters with a single letter", "swap_adjacent_syllables": "Swap adjacent syllables" }, "percentage": 25 } } }, { "seed_identities_with_labels": [ { "name": "nico olmo (latin)", "dob": "1931-03-03", "address": "Costa Rica", "label": "negative", "script": "latin" }, { "name": "mary knight (latin)", "dob": "1976-01-04", "address": "Kenya", "label": "High Risk", "script": "latin" }, { "name": "прокл максимова (cyrillic)", "dob": "1941-11-27", "address": "Luhansk", "label": "High Risk", "script": "cyrillic" }, { "name": "Myint Swe (latin)", "dob": "1951-5-24", "address": "Burma", "label": "positive", "script": "latin" }, { "name": "graciela losada (latin)", "dob": "1934-07-31", "address": "Mónaco", "label": "negative", "script": "latin" }, { "name": "victor cousin (latin)", "dob": "1987-12-01", "address": "Côte d'Ivoire", "label": "High Risk", "script": "latin" }, { "name": "Valentina Tereshkova (latin)", "dob": "1937-3-6", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "زاخر الخرافي (arabic)", "dob": "1947-04-22", "address": "Australia", "label": "negative", "script": "arabic" }, { "name": "Григорий Карасин (cyrillic)", "dob": "1949-8-23", "address": "Russia", "label": "positive", "script": "cyrillic" }, { "name": "Sergei Kudryashov (latin)", "dob": "1967-7-25", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "emilia montaña (latin)", "dob": "1953-06-12", "address": "Bangladesh", "label": "negative", "script": "latin" }, { "name": "mariana godoy (latin)", "dob": "1965-01-04", "address": "República de Corea", "label": "negative", "script": "latin" }, { "name": "Volodymyr Bandura (latin)", "dob": "1990-7-15", "address": "Ukraine", "label": "positive", "script": "latin" }, { "name": "tyler leon (latin)", "dob": "1952-03-04", "address": "Namibia", "label": "High Risk", "script": "latin" }, { "name": "alma montaña (latin)", "dob": "1975-03-18", "address": "Venezuela", "label": "High Risk", "script": "latin" } ], "query_template": "Generate 11 name variations for {name}, ensuring phonetic similarity by applying 50% Light sound-alike names and 50% Medium sound-alike names. Additionally, generate name variations that exhibit orthographic similarity by applying 50% Light visually similar spellings and 50% Medium visually similar spellings. Approximately 40% of the total 11 name variations should follow these rule-based transformations: \nAdditionally, generate variations that perform these transformations: Swap adjacent syllables, and Remove a random consonant.. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 11, "phonetic_similarity": { "Light": 0.5, "Medium": 0.5 }, "orthographic_similarity": { "Light": 0.5, "Medium": 0.5 }, "rule_based": { "rule_percentage": 40, "selected_rules": [ "swap_adjacent_syllables", "remove_random_consonant" ], "rule_descriptions": { "swap_adjacent_syllables": "Swap adjacent syllables", "remove_random_consonant": "Remove a random consonant" }, "percentage": 40 } } }, { "seed_identities_with_labels": [ { "name": "valentine benard (latin)", "dob": "1974-12-28", "address": "Chypre", "label": "negative", "script": "latin" }, { "name": "Olimjon Tohtahunov (latin)", "dob": "1949-12-31", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "anne perrot (latin)", "dob": "1940-01-18", "address": "Falkland (Île)", "label": "negative", "script": "latin" }, { "name": "امیرعباس سمسار (arabic)", "dob": "1926-07-12", "address": "Colombia", "label": "negative", "script": "arabic" }, { "name": "loida vallejo (latin)", "dob": "1943-10-31", "address": "Cuba", "label": "High Risk", "script": "latin" }, { "name": "nath chauvin (latin)", "dob": "1981-03-15", "address": "Mali", "label": "High Risk", "script": "latin" }, { "name": "diane menard (latin)", "dob": "1937-10-23", "address": "Guatemala", "label": "negative", "script": "latin" }, { "name": "Иван Демченко (cyrillic)", "dob": "1960-9-27", "address": "Russia", "label": "positive", "script": "cyrillic" }, { "name": "adriana tena (latin)", "dob": "1981-08-16", "address": "Cuba", "label": "High Risk", "script": "latin" }, { "name": "синклитикия якушев (cyrillic)", "dob": "1936-06-08", "address": "Armenia", "label": "High Risk", "script": "cyrillic" }, { "name": "Alexey Russkikh (latin)", "dob": "1968-7-17", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "antoine gallet (latin)", "dob": "1941-05-07", "address": "République centrafricaine", "label": "negative", "script": "latin" }, { "name": "Aleksandra Oksenchuk (latin)", "dob": "1992-10-16", "address": "Belarus", "label": "positive", "script": "latin" }, { "name": "noémi loiseau (latin)", "dob": "1955-07-31", "address": "Cameroon", "label": "High Risk", "script": "latin" }, { "name": "Muhsin al-Zibin (latin)", "dob": "1973-7-1", "address": "Kuwait", "label": "positive", "script": "latin" } ], "query_template": "Generate 8 variations of {name}, ensuring phonetic similarity (10% Light, 50% Medium, 40% Far) and orthographic similarity (30% Light, 40% Medium, 30% Far). Approximately 55% of the total 8 variations should follow these rule-based transformations: Replace random consonants with different consonants in {name}, Add a title prefix (Mr., Dr., etc.) to {name}, and Reorder name parts in {name}. The remaining 45% of the total 8 variations should be generated using only phonetic and orthographic similarity transformations. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 8, "phonetic_similarity": { "Light": 0.1, "Medium": 0.5, "Far": 0.4 }, "orthographic_similarity": { "Light": 0.3, "Medium": 0.4, "Far": 0.3 }, "rule_based": { "rule_percentage": 55, "selected_rules": [ "replace_random_consonant_with_random_consonant", "add_random_leading_title", "name_parts_permutations" ], "rule_descriptions": { "replace_random_consonant_with_random_consonant": "Replace random consonants with different consonants", "add_random_leading_title": "Add a title prefix (Mr., Dr., etc.)", "name_parts_permutations": "Reorder name parts" }, "percentage": 55 } } }, { "seed_identities_with_labels": [ { "name": "rafael lorenzo (latin)", "dob": "1960-09-29", "address": "Rwanda", "label": "negative", "script": "latin" }, { "name": "капитон новиков (cyrillic)", "dob": "2005-04-23", "address": "Falkland Islands (Malvinas)", "label": "negative", "script": "cyrillic" }, { "name": "фома уваров (cyrillic)", "dob": "1954-07-06", "address": "Belarus", "label": "High Risk", "script": "cyrillic" }, { "name": "elvira donaire (latin)", "dob": "1943-04-18", "address": "Islandia", "label": "negative", "script": "latin" }, { "name": "Rafi Udin (latin)", "dob": "1966-6-3", "address": "Syria", "label": "positive", "script": "latin" }, { "name": "Sergei Serdiukov (latin)", "dob": "1987-6-23", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "agustín espinosa (latin)", "dob": "1932-12-10", "address": "Alemania", "label": "negative", "script": "latin" }, { "name": "larry dean (latin)", "dob": "1945-10-19", "address": "Namibia", "label": "High Risk", "script": "latin" }, { "name": "ruperto miranda (latin)", "dob": "1957-12-27", "address": "Venezuela", "label": "High Risk", "script": "latin" }, { "name": "amada duran (latin)", "dob": "2005-11-07", "address": "República Dominicana", "label": "negative", "script": "latin" }, { "name": "Александap Добрић (cyrillic)", "dob": "1991-7-15", "address": "Bosnia and Herzegovina", "label": "positive", "script": "cyrillic" }, { "name": "adélaïde moreau (latin)", "dob": "1937-09-28", "address": "Cameroon", "label": "High Risk", "script": "latin" }, { "name": "Edgar Sarrias (latin)", "dob": "1976-1-24", "address": "Venezuela", "label": "positive", "script": "latin" }, { "name": "charles marchand (latin)", "dob": "1965-05-12", "address": "Democratic Republic of Congo", "label": "High Risk", "script": "latin" }, { "name": "Anton Anisimov (latin)", "dob": "1985-2-19", "address": "Russia", "label": "positive", "script": "latin" } ], "query_template": "Generate 7 variations of {name} ensuring phonetic similarity (10% Light, 30% Medium, 60% Far) and orthographic similarity (30% Light, 40% Medium, 30% Far). Approximately 57% of the total 7 variations should follow these rule-based transformations: \nReorder name parts (e.g. First name, Last name -> Last name, First name), \nSwap adjacent consonants (e.g. Smith -> Sthim), \nand Insert a random letter (e.g. John -> Johan). The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 7, "phonetic_similarity": { "Light": 0.1, "Medium": 0.3, "Far": 0.6 }, "orthographic_similarity": { "Light": 0.3, "Medium": 0.4, "Far": 0.3 }, "rule_based": { "rule_percentage": 57, "selected_rules": [ "name_parts_permutations", "swap_adjacent_consonants", "insert_random_letter" ], "rule_descriptions": { "name_parts_permutations": "Reorder name parts", "swap_adjacent_consonants": "Swap adjacent consonants", "insert_random_letter": "Insert a random letter" }, "percentage": 57 } } }, { "seed_identities_with_labels": [ { "name": "Kamlesh Pattni (latin)", "dob": "1965-3-12", "address": "United Arab Emirates", "label": "positive", "script": "latin" }, { "name": "Сайгидпаша Умаханов (cyrillic)", "dob": "1962-4-3", "address": "Russia", "label": "positive", "script": "cyrillic" }, { "name": "christopher charles (latin)", "dob": "1974-05-17", "address": "British Virgin Islands", "label": "High Risk", "script": "latin" }, { "name": "jenaro ruiz (latin)", "dob": "1975-11-09", "address": "Tayikistán", "label": "negative", "script": "latin" }, { "name": "rufina grau (latin)", "dob": "1995-06-21", "address": "Pakistán", "label": "negative", "script": "latin" }, { "name": "نازنين شمشیری (arabic)", "dob": "1959-12-09", "address": "Afghanistan", "label": "High Risk", "script": "arabic" }, { "name": "miriam valenciano (latin)", "dob": "1992-02-17", "address": "Bolivia", "label": "High Risk", "script": "latin" }, { "name": "Tigran KHACHATUROV (latin)", "dob": "1979-2-7", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "ضياء الدّين أبو اسنينة (arabic)", "dob": "1960-07-21", "address": "Eritrea", "label": "negative", "script": "arabic" }, { "name": "Yuriy Soloviev (latin)", "dob": "1970-4-13", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "constança pacheco (latin)", "dob": "1959-09-09", "address": "Mozambique", "label": "High Risk", "script": "latin" }, { "name": "Rajabi Al-Zahir (latin)", "dob": "1969-7-5", "address": "United Arab Emirates", "label": "positive", "script": "latin" }, { "name": "salud aramburu (latin)", "dob": "1928-07-29", "address": "Estonia", "label": "negative", "script": "latin" }, { "name": "nicole vasseur (latin)", "dob": "1944-03-10", "address": "Côte d'Ivoire", "label": "High Risk", "script": "latin" }, { "name": "sebastián busquets (latin)", "dob": "1952-07-20", "address": "República de Corea", "label": "negative", "script": "latin" } ], "query_template": "The following name is the seed name to generate variations for: {name}. Generate 10 variations of the name {name}, ensuring phonetic similarity: 10% Light, 50% Medium, 40% Far, and orthographic similarity: 50% Light, 50% Medium, and also include 49% of variations that follow: Additionally, generate variations that perform these transformations: Remove a random consonant, and Remove a random vowel. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 10, "phonetic_similarity": { "Light": 0.1, "Medium": 0.5, "Far": 0.4 }, "orthographic_similarity": { "Light": 0.5, "Medium": 0.5 }, "rule_based": { "rule_percentage": 49, "selected_rules": [ "remove_random_consonant", "remove_random_vowel" ], "rule_descriptions": { "remove_random_consonant": "Remove a random consonant", "remove_random_vowel": "Remove a random vowel" }, "percentage": 49 } } }, { "seed_identities_with_labels": [ { "name": "Mahdy Helbawi (latin)", "dob": "1987-10-30", "address": "Colombia", "label": "positive", "script": "latin" }, { "name": "anabel jover (latin)", "dob": "1935-07-24", "address": "Nicaragua", "label": "High Risk", "script": "latin" }, { "name": "'Ali Sharara (latin)", "dob": "1968-9-25", "address": "Lebanon", "label": "positive", "script": "latin" }, { "name": "véronique durand (latin)", "dob": "1932-08-30", "address": "Samoa", "label": "negative", "script": "latin" }, { "name": "Bushra Shawkat (latin)", "dob": "1960-10-24", "address": "United Arab Emirates", "label": "positive", "script": "latin" }, { "name": "amélie lemonnier (latin)", "dob": "1962-10-23", "address": "Botswana", "label": "negative", "script": "latin" }, { "name": "céline bonneau (latin)", "dob": "1951-01-08", "address": "Mali", "label": "High Risk", "script": "latin" }, { "name": "lorraine gallet (latin)", "dob": "2000-04-24", "address": "Mali", "label": "High Risk", "script": "latin" }, { "name": "حسین تناور (arabic)", "dob": "1981-8-30", "address": "Iran", "label": "positive", "script": "arabic" }, { "name": "Abdelbasit Khair (latin)", "dob": "1955-8-28", "address": "Sudan", "label": "positive", "script": "latin" }, { "name": "danielle fabre (latin)", "dob": "1943-05-17", "address": "Madagascar", "label": "negative", "script": "latin" }, { "name": "michael graham (latin)", "dob": "1934-09-12", "address": "Nigeria", "label": "High Risk", "script": "latin" }, { "name": "julien perret (latin)", "dob": "1982-10-03", "address": "Bulgarie", "label": "negative", "script": "latin" }, { "name": "پریا صارمی (arabic)", "dob": "1931-02-05", "address": "Puerto Rico", "label": "negative", "script": "arabic" }, { "name": "كريم أشجع (arabic)", "dob": "2003-09-27", "address": "Algeria", "label": "High Risk", "script": "arabic" } ], "query_template": "Generate 6 name variations for {name}, ensuring phonetic similarity (10% Light, e.g. \"{name}i\", 30% Medium, e.g. \"Mc{name}\", 60% Far, e.g. \"{name}ovich\") and orthographic similarity (70% Light, e.g. \"{name}e\", 30% Medium, e.g. \"{name}y\"). Approximately 50% of the total 6 variations should follow these rule-based transformations: Additionally, generate variations that perform these transformations: Replace random vowels with different vowels, e.g. \"A{name}a\"; Add a title prefix (Mr., Dr., etc.), e.g. \"Mr.{name}\"; and Swap adjacent syllables, e.g. \"{naMe}\". The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 6, "phonetic_similarity": { "Light": 0.1, "Medium": 0.3, "Far": 0.6 }, "orthographic_similarity": { "Light": 0.7, "Medium": 0.3 }, "rule_based": { "rule_percentage": 50, "selected_rules": [ "replace_random_vowel_with_random_vowel", "add_random_leading_title", "swap_adjacent_syllables" ], "rule_descriptions": { "replace_random_vowel_with_random_vowel": "Replace random vowels with different vowels", "add_random_leading_title": "Add a title prefix (Mr., Dr., etc.)", "swap_adjacent_syllables": "Swap adjacent syllables" }, "percentage": 50 } } }, { "seed_identities_with_labels": [ { "name": "david riou (latin)", "dob": "1931-04-23", "address": "Democratic Republic of Congo", "label": "High Risk", "script": "latin" }, { "name": "tony wang (latin)", "dob": "1988-11-26", "address": "Saint Lucia", "label": "negative", "script": "latin" }, { "name": "susan carter (latin)", "dob": "1959-09-27", "address": "Guyana", "label": "negative", "script": "latin" }, { "name": "albino bermejo (latin)", "dob": "1975-01-29", "address": "Bolivia", "label": "High Risk", "script": "latin" }, { "name": "christopher williams (latin)", "dob": "1952-01-09", "address": "South Sudan", "label": "High Risk", "script": "latin" }, { "name": "Olga PLAKSINA (latin)", "dob": "1974-3-3", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "латко лимонадов (cyrillic)", "dob": "1946-09-07", "address": "Bulgaria", "label": "High Risk", "script": "cyrillic" }, { "name": "héctor pozo (latin)", "dob": "1960-01-16", "address": "Cuba", "label": "High Risk", "script": "latin" }, { "name": "سمانه دمیرچی‌لو (arabic)", "dob": "1990-8-26", "address": "Iran", "label": "positive", "script": "arabic" }, { "name": "سرحان أستيتية (arabic)", "dob": "1980-12-10", "address": "Belgium", "label": "negative", "script": "arabic" }, { "name": "benjamin soto (latin)", "dob": "1986-06-02", "address": "Guyana", "label": "negative", "script": "latin" }, { "name": "Valery GABRIEL (latin)", "dob": "1956-1-3", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "ryan miller (latin)", "dob": "1947-04-17", "address": "Korea", "label": "negative", "script": "latin" }, { "name": "Firuza Kerimova (latin)", "dob": "1967-10-22", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "German Gref (latin)", "dob": "1964-2-8", "address": "Russia", "label": "positive", "script": "latin" } ], "query_template": "Generate 8 execution vectors for {name}, ensuring phonetic similarity with 70% Light variations (e.g., Soundex, Metaphone) and 30% Medium variations (e.g., Levenshtein distance), as well as orthographic similarity with 10% Light variations (e.g., misspellings), 50% Medium variations (e.g., similar spellings), and 40% Far variations (e.g., significantly different spellings). Approximately 31% of the total 8 variations should follow these rule-based transformations: Additionally, generate variations that perform these transformations: Replace spaces with special characters (e.g., _, ~, !), and Replace double letters with a single letter (e.g., \"ee\" becomes \"e\"). The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 8, "phonetic_similarity": { "Light": 0.7, "Medium": 0.3 }, "orthographic_similarity": { "Light": 0.1, "Medium": 0.5, "Far": 0.4 }, "rule_based": { "rule_percentage": 31, "selected_rules": [ "replace_spaces_with_random_special_characters", "replace_double_letters_with_single_letter" ], "rule_descriptions": { "replace_spaces_with_random_special_characters": "Replace spaces with special characters", "replace_double_letters_with_single_letter": "Replace double letters with a single letter" }, "percentage": 31 } } }, { "seed_identities_with_labels": [ { "name": "Андрей Макаров (cyrillic)", "dob": "1954-7-22", "address": "Russia", "label": "positive", "script": "cyrillic" }, { "name": "ângelo torres (latin)", "dob": "1925-10-23", "address": "Sri Lanca", "label": "negative", "script": "latin" }, { "name": "Amir Khamzat (latin)", "dob": "1974-10-22", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "Saad AL-FAKIH (latin)", "dob": "1957-1-31", "address": "United Kingdom", "label": "positive", "script": "latin" }, { "name": "mia baptista (latin)", "dob": "1968-11-06", "address": "Angola", "label": "High Risk", "script": "latin" }, { "name": "benjamim valente (latin)", "dob": "2003-09-10", "address": "Santa Lúcia", "label": "negative", "script": "latin" }, { "name": "candelaria piñol (latin)", "dob": "1995-04-02", "address": "Nicaragua", "label": "High Risk", "script": "latin" }, { "name": "مُعتز لخم (arabic)", "dob": "1986-11-29", "address": "Wallis and Futuna", "label": "negative", "script": "arabic" }, { "name": "Airat Gibatdinov (latin)", "dob": "1986-1-16", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "tatiana carvalho (latin)", "dob": "1978-03-02", "address": "Mozambique", "label": "High Risk", "script": "latin" }, { "name": "matilde moreira (latin)", "dob": "1986-01-01", "address": "Arctic Ocean", "label": "negative", "script": "latin" }, { "name": "lucas lima (latin)", "dob": "1993-08-20", "address": "São Vicente e Granadinas", "label": "negative", "script": "latin" }, { "name": "حمدان ازحيمان (arabic)", "dob": "1938-10-03", "address": "Lebanon", "label": "High Risk", "script": "arabic" }, { "name": "Laith Al-Khazali (latin)", "dob": "1975-10-14", "address": "Iraq", "label": "positive", "script": "latin" }, { "name": "gerald nelson (latin)", "dob": "1966-11-14", "address": "Namibia", "label": "High Risk", "script": "latin" } ], "query_template": "Generate 10 name variations for {name}, ensuring phonetic similarity (10% Light, 30% Medium, 60% Far) and orthographic similarity (100% Light). Approximately 52% of the total 10 variations should follow these rule-based transformations: Additionally, generate variations that Remove all spaces. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 10, "phonetic_similarity": { "Light": 0.1, "Medium": 0.3, "Far": 0.6 }, "orthographic_similarity": { "Light": 1.0 }, "rule_based": { "rule_percentage": 52, "selected_rules": [ "remove_all_spaces" ], "rule_descriptions": { "remove_all_spaces": "Remove all spaces" }, "percentage": 52 } } }, { "seed_identities_with_labels": [ { "name": "Виктор Дзюба (cyrillic)", "dob": "1977-8-10", "address": "Russia", "label": "positive", "script": "cyrillic" }, { "name": "henrique teixeira (latin)", "dob": "1957-12-25", "address": "Quiribáti", "label": "negative", "script": "latin" }, { "name": "marta coelho (latin)", "dob": "1970-10-13", "address": "Bielorrússia", "label": "negative", "script": "latin" }, { "name": "Kazbek Kokov (latin)", "dob": "1973-7-20", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "miriam lima (latin)", "dob": "1930-11-27", "address": "Suriname", "label": "negative", "script": "latin" }, { "name": "antónio cruz (latin)", "dob": "1979-07-12", "address": "Sérvia", "label": "negative", "script": "latin" }, { "name": "مصباح قيس عيلان (arabic)", "dob": "1946-09-08", "address": "Timor-Leste", "label": "negative", "script": "arabic" }, { "name": "Chilli Yuan (latin)", "dob": "1985-5-8", "address": "China", "label": "positive", "script": "latin" }, { "name": "Chris Tang (latin)", "dob": "1965-7-4", "address": "Hong Kong", "label": "positive", "script": "latin" }, { "name": "Aleksey Budnev (latin)", "dob": "1964-7-9", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "scott cook (latin)", "dob": "2005-12-18", "address": "South Africa", "label": "High Risk", "script": "latin" }, { "name": "luísa tavares (latin)", "dob": "1985-01-08", "address": "Mozambique", "label": "High Risk", "script": "latin" }, { "name": "maría teresa gracia (latin)", "dob": "1961-07-06", "address": "Cuba", "label": "High Risk", "script": "latin" }, { "name": "рубен ильин (cyrillic)", "dob": "1991-08-26", "address": "Luhansk", "label": "High Risk", "script": "cyrillic" }, { "name": "césar baptista (latin)", "dob": "1968-10-10", "address": "Angola", "label": "High Risk", "script": "latin" } ], "query_template": "The following name is the seed name to generate variations for: {name}. Generate 9 variations of the name {name}, ensuring phonetic similarity: 30% Light, 40% Medium, 30% Far, and orthographic similarity: 30% Light, 40% Medium, 30% Far, and also include 39% of variations that follow: Additionally, generate variations that perform these transformations: Swap random adjacent letters, Remove all spaces, and Remove a random vowel. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 9, "phonetic_similarity": { "Light": 0.3, "Medium": 0.4, "Far": 0.3 }, "orthographic_similarity": { "Light": 0.3, "Medium": 0.4, "Far": 0.3 }, "rule_based": { "rule_percentage": 39, "selected_rules": [ "swap_random_letter", "remove_all_spaces", "remove_random_vowel" ], "rule_descriptions": { "swap_random_letter": "Swap random adjacent letters", "remove_all_spaces": "Remove all spaces", "remove_random_vowel": "Remove a random vowel" }, "percentage": 39 } } }, { "seed_identities_with_labels": [ { "name": "Svetlana Emilianova (latin)", "dob": "1971-10-7", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "соломон лубенець (cyrillic)", "dob": "1995-08-25", "address": "Honduras", "label": "negative", "script": "cyrillic" }, { "name": "arnaude blanchet (latin)", "dob": "1941-02-09", "address": "Haiti", "label": "High Risk", "script": "latin" }, { "name": "Aleksandr Zhivlyuk (latin)", "dob": "1981-1-13", "address": "Belarus", "label": "positive", "script": "latin" }, { "name": "matthew perez (latin)", "dob": "1951-04-07", "address": "Kenya", "label": "High Risk", "script": "latin" }, { "name": "agnès germain (latin)", "dob": "1979-10-08", "address": "Norfolk (Îles)", "label": "negative", "script": "latin" }, { "name": "suzanne delannoy (latin)", "dob": "1934-06-16", "address": "Burkina Faso", "label": "High Risk", "script": "latin" }, { "name": "مائده قاضی (arabic)", "dob": "1984-07-25", "address": "Iran", "label": "High Risk", "script": "arabic" }, { "name": "Timur Badr (latin)", "dob": "1984-10-18", "address": "United Arab Emirates", "label": "positive", "script": "latin" }, { "name": "olivie monnier (latin)", "dob": "1992-01-22", "address": "Wallis et Futuna (Îles)", "label": "negative", "script": "latin" }, { "name": "Wendy Morales (latin)", "dob": "1980-5-28", "address": "Nicaragua", "label": "positive", "script": "latin" }, { "name": "brigitte dufour (latin)", "dob": "1992-02-11", "address": "Saint-Kitts et Nevis", "label": "negative", "script": "latin" }, { "name": "Олексій Дікій (cyrillic)", "dob": "1974-7-5", "address": "Ukraine", "label": "positive", "script": "cyrillic" }, { "name": "ronald jefferson (latin)", "dob": "1946-12-05", "address": "South Africa", "label": "High Risk", "script": "latin" }, { "name": "arthur leconte (latin)", "dob": "1929-07-28", "address": "Bermudes (Les)", "label": "negative", "script": "latin" } ], "query_template": "Generate 13 variations of {name} for phonetic similarity, with exactly 6.5 variations being Light and 6.5 variations being Medium, and for orthographic similarity, with 3.9 variations being Light, 5.2 variations being Medium, and 3.9 variations being Far. Approximately 46% of the total 13 variations should follow these rule-based transformations: Additionally, generate variations that perform these transformations on {name}: Swap random adjacent letters to get a variation like \"{name} with letters swapped\", Replace random vowels with different vowels to get a variation like \"{name} with vowels replaced\".\n[VALIDATION HINTS]: Phonetic similarity: 50% Light.; Orthographic similarity: 30% Light. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 13, "phonetic_similarity": { "Light": 0.5, "Medium": 0.5 }, "orthographic_similarity": { "Light": 0.3, "Medium": 0.4, "Far": 0.3 }, "rule_based": { "rule_percentage": 46, "selected_rules": [ "swap_random_letter", "replace_random_vowel_with_random_vowel" ], "rule_descriptions": { "swap_random_letter": "Swap random adjacent letters", "replace_random_vowel_with_random_vowel": "Replace random vowels with different vowels" }, "percentage": 46 } } }, { "seed_identities_with_labels": [ { "name": "valentine pierre (latin)", "dob": "2004-02-16", "address": "Mali", "label": "High Risk", "script": "latin" }, { "name": "charles marchal (latin)", "dob": "2007-01-11", "address": "Suède", "label": "negative", "script": "latin" }, { "name": "nicolas bigot (latin)", "dob": "1943-06-09", "address": "Aruba", "label": "negative", "script": "latin" }, { "name": "antoinette becker (latin)", "dob": "1927-04-05", "address": "Cameroon", "label": "High Risk", "script": "latin" }, { "name": "émilie dias (latin)", "dob": "1947-03-15", "address": "Central African Republic", "label": "High Risk", "script": "latin" }, { "name": "Yuriy Gudilin (latin)", "dob": "1983-6-18", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "jessica stein (latin)", "dob": "1994-10-01", "address": "Nigeria", "label": "High Risk", "script": "latin" }, { "name": "Alexander Demin (latin)", "dob": "1988-9-23", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "нифонт силина (cyrillic)", "dob": "1988-06-29", "address": "Luhansk", "label": "High Risk", "script": "cyrillic" }, { "name": "Олег Иванинский (cyrillic)", "dob": "1966-6-5", "address": "Russia", "label": "positive", "script": "cyrillic" }, { "name": "michèle samson (latin)", "dob": "1979-02-27", "address": "Pitcairn (Îles)", "label": "negative", "script": "latin" }, { "name": "sabine langlois (latin)", "dob": "1979-02-11", "address": "Corée du Nord", "label": "negative", "script": "latin" }, { "name": "Yury Nabokov (latin)", "dob": "1981-1-1", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "Aleksei Budnev (latin)", "dob": "1964-7-9", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "زهرة أبا الخيل (arabic)", "dob": "1990-07-22", "address": "Pitcairn Islands", "label": "negative", "script": "arabic" } ], "query_template": "Generate 9 variations of {name} ensuring phonetic similarity (10% Light, 30% Medium, 60% Far) and orthographic similarity (50% Light, 50% Medium). Approximately 40% of the total 9 variations should follow these rule-based transformations: Additionally, generate variations that perform these transformations: Add a title suffix (Jr., PhD, etc.), Duplicate a random letter, and Remove a random consonant. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 9, "phonetic_similarity": { "Light": 0.1, "Medium": 0.3, "Far": 0.6 }, "orthographic_similarity": { "Light": 0.5, "Medium": 0.5 }, "rule_based": { "rule_percentage": 40, "selected_rules": [ "add_random_trailing_title", "duplicate_random_letter_as_double_letter", "remove_random_consonant" ], "rule_descriptions": { "add_random_trailing_title": "Add a title suffix (Jr., PhD, etc.)", "duplicate_random_letter_as_double_letter": "Duplicate a random letter", "remove_random_consonant": "Remove a random consonant" }, "percentage": 40 } } }, { "seed_identities_with_labels": [ { "name": "jeanne lenoir (latin)", "dob": "1981-04-10", "address": "Haiti", "label": "High Risk", "script": "latin" }, { "name": "Yuri Afonin (latin)", "dob": "1977-3-22", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "auguste rousseau (latin)", "dob": "1979-04-08", "address": "Cameroon", "label": "High Risk", "script": "latin" }, { "name": "Muhammad YUNUS (latin)", "dob": "1979-3-3", "address": "Indonesia", "label": "positive", "script": "latin" }, { "name": "'Adnan Yusuf (latin)", "dob": "1956-6-6", "address": "Lebanon", "label": "positive", "script": "latin" }, { "name": "Elena Drapeko (latin)", "dob": "1948-10-29", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "geneviève roussel (latin)", "dob": "1953-05-18", "address": "Mali", "label": "High Risk", "script": "latin" }, { "name": "marguerite mace (latin)", "dob": "1949-01-11", "address": "Honduras", "label": "negative", "script": "latin" }, { "name": "aurore rousseau (latin)", "dob": "1926-04-01", "address": "Vierges britanniques (Îles)", "label": "negative", "script": "latin" }, { "name": "مُصلح المظفر (arabic)", "dob": "2003-09-06", "address": "Libya", "label": "High Risk", "script": "arabic" }, { "name": "هدى بقشان (arabic)", "dob": "1972-03-01", "address": "Jordan", "label": "negative", "script": "arabic" }, { "name": "philippe fabre (latin)", "dob": "1939-08-20", "address": "Dominique", "label": "negative", "script": "latin" }, { "name": "Кирилл Царёв (cyrillic)", "dob": "1978-9-25", "address": "Russia", "label": "positive", "script": "cyrillic" }, { "name": "jules delahaye (latin)", "dob": "1927-04-02", "address": "Burkina Faso", "label": "High Risk", "script": "latin" }, { "name": "victoire poulain (latin)", "dob": "1961-03-11", "address": "Botswana", "label": "negative", "script": "latin" } ], "query_template": "Generate 10 name variations for {name}, ensuring phonetic similarity (10% Light, 30% Medium, 60% Far) and orthographic similarity (30% Light, 40% Medium, 30% Far). Approximately 21% of the total 10 variations should follow these rule-based transformations: Additionally, generate variations that perform these transformations: Add a title suffix (Jr., PhD, etc.), and Replace random vowels with different vowels. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 10, "phonetic_similarity": { "Light": 0.1, "Medium": 0.3, "Far": 0.6 }, "orthographic_similarity": { "Light": 0.3, "Medium": 0.4, "Far": 0.3 }, "rule_based": { "rule_percentage": 21, "selected_rules": [ "add_random_trailing_title", "replace_random_vowel_with_random_vowel" ], "rule_descriptions": { "add_random_trailing_title": "Add a title suffix (Jr., PhD, etc.)", "replace_random_vowel_with_random_vowel": "Replace random vowels with different vowels" }, "percentage": 21 } } }, { "seed_identities_with_labels": [ { "name": "christina rodriguez (latin)", "dob": "1987-02-20", "address": "Namibia", "label": "High Risk", "script": "latin" }, { "name": "eva maría catalá (latin)", "dob": "1998-05-04", "address": "Sudáfrica", "label": "negative", "script": "latin" }, { "name": "andrea morcillo (latin)", "dob": "1945-08-06", "address": "República Dominicana", "label": "negative", "script": "latin" }, { "name": "henri duval (latin)", "dob": "2004-07-07", "address": "Mali", "label": "High Risk", "script": "latin" }, { "name": "тарас уманець (cyrillic)", "dob": "1978-11-05", "address": "India", "label": "negative", "script": "cyrillic" }, { "name": "Boris Vologdin (latin)", "dob": "1955-7-22", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "Pavel Akifyev (latin)", "dob": "1985-12-19", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "adriana baró (latin)", "dob": "1951-12-21", "address": "Cuba", "label": "High Risk", "script": "latin" }, { "name": "jimena macias (latin)", "dob": "2002-06-17", "address": "Suriname", "label": "negative", "script": "latin" }, { "name": "kevin blanchard (latin)", "dob": "1982-10-01", "address": "South Africa", "label": "High Risk", "script": "latin" }, { "name": "алла логинова (cyrillic)", "dob": "1998-11-15", "address": "Luhansk", "label": "High Risk", "script": "cyrillic" }, { "name": "Kambiz Rostamian (latin)", "dob": "1960-8-27", "address": "United Arab Emirates", "label": "positive", "script": "latin" }, { "name": "Eduard Benderski (latin)", "dob": "1970-6-25", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "Евгений Гладких (cyrillic)", "dob": "1985-9-5", "address": "Russia", "label": "positive", "script": "cyrillic" }, { "name": "valentín calvo (latin)", "dob": "1973-05-17", "address": "Turquía", "label": "negative", "script": "latin" } ], "query_template": "The following name is the seed name to generate variations for: {name}. Generate 6 variations of the name {name}, ensuring phonetic similarity: 30% Light, 40% Medium, 30% Far, and orthographic similarity: 50% Light, 50% Medium, and also include 45% of variations that follow: Additionally, generate variations that perform these transformations: Remove all spaces, and Delete a random letter. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 6, "phonetic_similarity": { "Light": 0.3, "Medium": 0.4, "Far": 0.3 }, "orthographic_similarity": { "Light": 0.5, "Medium": 0.5 }, "rule_based": { "rule_percentage": 45, "selected_rules": [ "remove_all_spaces", "delete_random_letter" ], "rule_descriptions": { "remove_all_spaces": "Remove all spaces", "delete_random_letter": "Delete a random letter" }, "percentage": 45 } } }, { "seed_identities_with_labels": [ { "name": "eugène renault (latin)", "dob": "1963-02-01", "address": "Soudan", "label": "negative", "script": "latin" }, { "name": "nathalie rey (latin)", "dob": "1932-09-21", "address": "Democratic Republic of Congo", "label": "High Risk", "script": "latin" }, { "name": "بثينة بنو ياس (arabic)", "dob": "2002-08-28", "address": "Heard Island and McDonald Islands", "label": "negative", "script": "arabic" }, { "name": "edward mills (latin)", "dob": "1998-12-24", "address": "Nigeria", "label": "High Risk", "script": "latin" }, { "name": "Valery Pakhnits (latin)", "dob": "1953-1-22", "address": "Ukraine", "label": "positive", "script": "latin" }, { "name": "raimundo sanmiguel (latin)", "dob": "1928-09-27", "address": "Nicaragua", "label": "High Risk", "script": "latin" }, { "name": "zoé turpin (latin)", "dob": "1952-08-16", "address": "Tchad", "label": "negative", "script": "latin" }, { "name": "васенка куртакова (cyrillic)", "dob": "1950-10-27", "address": "Bulgaria", "label": "High Risk", "script": "cyrillic" }, { "name": "Аркадий Дзименштейн (cyrillic)", "dob": "1956-1-13", "address": "Belarus", "label": "positive", "script": "cyrillic" }, { "name": "Vitaly Likhachev (latin)", "dob": "1964-2-22", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "nath marchand (latin)", "dob": "1980-08-25", "address": "Guinée-Bissau", "label": "negative", "script": "latin" }, { "name": "Vinai PICHAYOT (latin)", "dob": "1957-12-1", "address": "Thailand", "label": "positive", "script": "latin" }, { "name": "marcelle de sousa (latin)", "dob": "1936-01-18", "address": "Mali", "label": "High Risk", "script": "latin" }, { "name": "antoine legros (latin)", "dob": "1973-11-12", "address": "Pitcairn (Îles)", "label": "negative", "script": "latin" }, { "name": "Vladimir Turin (latin)", "dob": "1958-12-20", "address": "Russia", "label": "positive", "script": "latin" } ], "query_template": "Generate 15 variations of {name} ensuring phonetic similarity where 50% are Light names such as {name}-ski and 50% are Medium names such as {name}-ovich. \nApproximately 53% of the total 15 variations should follow these rule-based transformations: Additionally, generate variations that perform these transformations: Abbreviate name parts to {name}J, Insert a random letter to become {name}A{l or r}, and Replace random vowels with different vowels to become {name}e instead of {name}.\n[VALIDATION HINTS]: Orthographic similarity: 10% Light. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 15, "phonetic_similarity": { "Light": 0.5, "Medium": 0.5 }, "orthographic_similarity": { "Light": 0.1, "Medium": 0.3, "Far": 0.6 }, "rule_based": { "rule_percentage": 53, "selected_rules": [ "shorten_name_to_abbreviations", "insert_random_letter", "replace_random_vowel_with_random_vowel" ], "rule_descriptions": { "shorten_name_to_abbreviations": "Abbreviate name parts", "insert_random_letter": "Insert a random letter", "replace_random_vowel_with_random_vowel": "Replace random vowels with different vowels" }, "percentage": 53 } } }, { "seed_identities_with_labels": [ { "name": "Vladimir Yakushev (latin)", "dob": "1968-6-14", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "Yaroslav Bulygin (latin)", "dob": "1973-8-2", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "Ivan KUSOV (latin)", "dob": "1987-1-24", "address": "Ukraine", "label": "positive", "script": "latin" }, { "name": "victoire allain (latin)", "dob": "1941-04-27", "address": "Guinée-Bissau", "label": "negative", "script": "latin" }, { "name": "andrée hervé (latin)", "dob": "1951-11-03", "address": "Vatican (Etat du)", "label": "negative", "script": "latin" }, { "name": "lucas vaillant (latin)", "dob": "1941-09-06", "address": "Guatemala", "label": "negative", "script": "latin" }, { "name": "Кирилл Селезнёв (cyrillic)", "dob": "1974-4-23", "address": "Russia", "label": "positive", "script": "cyrillic" }, { "name": "Sergey NEVEROV (latin)", "dob": "1961-12-21", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "maurice courtois (latin)", "dob": "1934-05-30", "address": "Cameroon", "label": "High Risk", "script": "latin" }, { "name": "творимир гурьева (cyrillic)", "dob": "1994-07-15", "address": "Turks and Caicos Islands", "label": "negative", "script": "cyrillic" }, { "name": "thibault maurice (latin)", "dob": "1928-03-09", "address": "Cameroon", "label": "High Risk", "script": "latin" }, { "name": "amélie goncalves (latin)", "dob": "1940-10-06", "address": "Zimbabwe", "label": "negative", "script": "latin" }, { "name": "زاهر بنو حنيفة (arabic)", "dob": "1962-06-30", "address": "Yemen", "label": "High Risk", "script": "arabic" }, { "name": "patrick bonneau (latin)", "dob": "2000-07-17", "address": "Burkina Faso", "label": "High Risk", "script": "latin" }, { "name": "cayetana prats (latin)", "dob": "1970-08-15", "address": "Venezuela", "label": "High Risk", "script": "latin" } ], "query_template": "The following name is the seed name to generate variations for: {name}. Generate 12 variations of the name {name}, ensuring phonetic similarity: 10% Light, 50% Medium, 40% Far, and orthographic similarity: 10% Light, 30% Medium, 60% Far, and also include 21% of variations that follow: Additionally, generate variations that perform these transformations: Swap random adjacent letters, Add a title suffix (Jr., PhD, etc.), and Abbreviate name parts. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 12, "phonetic_similarity": { "Light": 0.1, "Medium": 0.5, "Far": 0.4 }, "orthographic_similarity": { "Light": 0.1, "Medium": 0.3, "Far": 0.6 }, "rule_based": { "rule_percentage": 21, "selected_rules": [ "swap_random_letter", "add_random_trailing_title", "shorten_name_to_abbreviations" ], "rule_descriptions": { "swap_random_letter": "Swap random adjacent letters", "add_random_trailing_title": "Add a title suffix (Jr., PhD, etc.)", "shorten_name_to_abbreviations": "Abbreviate name parts" }, "percentage": 21 } } }, { "seed_identities_with_labels": [ { "name": "антонин носов (cyrillic)", "dob": "1992-09-09", "address": "Crimea", "label": "High Risk", "script": "cyrillic" }, { "name": "christelle bouchet (latin)", "dob": "1947-07-02", "address": "Democratic Republic of Congo", "label": "High Risk", "script": "latin" }, { "name": "Yusuf al-Hatum (latin)", "dob": "1966-1-5", "address": "Syria", "label": "positive", "script": "latin" }, { "name": "Николай Бурляев (cyrillic)", "dob": "1946-8-3", "address": "Russia", "label": "positive", "script": "cyrillic" }, { "name": "margot simon (latin)", "dob": "1964-01-14", "address": "Émirats arabes unis", "label": "negative", "script": "latin" }, { "name": "victor jean (latin)", "dob": "1989-09-20", "address": "Cambodge", "label": "negative", "script": "latin" }, { "name": "cesar jimenez (latin)", "dob": "1948-06-07", "address": "Nicaragua", "label": "High Risk", "script": "latin" }, { "name": "zacharie perrin (latin)", "dob": "1974-04-25", "address": "Fidji (République des)", "label": "negative", "script": "latin" }, { "name": "Sergei Serdiukov (latin)", "dob": "1987-6-23", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "евграф блинова (cyrillic)", "dob": "1942-09-21", "address": "Belize", "label": "negative", "script": "cyrillic" }, { "name": "thomas carpentier (latin)", "dob": "1993-01-16", "address": "Côte d'Ivoire", "label": "High Risk", "script": "latin" }, { "name": "Kwo'n-u Han (latin)", "dob": "1962-8-21", "address": "China", "label": "positive", "script": "latin" }, { "name": "charles renault (latin)", "dob": "1928-03-19", "address": "Pakistan", "label": "negative", "script": "latin" }, { "name": "alex marsh (latin)", "dob": "1938-11-11", "address": "Namibia", "label": "High Risk", "script": "latin" }, { "name": "Ch'un-hwan RI (latin)", "dob": "1957-8-21", "address": "China", "label": "positive", "script": "latin" } ], "query_template": "Generate exactly 9 execution vectors (name variations) for each target identity {name}, ensuring phonetic similarity with 70% Light and 30% Medium variations, and orthographic similarity with 10% Light, 50% Medium, and 40% Far variations. Approximately 51% of the total 9 variations should follow these rule-based transformations: Add a title suffix (Jr., PhD, etc.), Remove all spaces, and Insert a random letter. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 9, "phonetic_similarity": { "Light": 0.7, "Medium": 0.3 }, "orthographic_similarity": { "Light": 0.1, "Medium": 0.5, "Far": 0.4 }, "rule_based": { "rule_percentage": 51, "selected_rules": [ "add_random_trailing_title", "remove_all_spaces", "insert_random_letter" ], "rule_descriptions": { "add_random_trailing_title": "Add a title suffix (Jr., PhD, etc.)", "remove_all_spaces": "Remove all spaces", "insert_random_letter": "Insert a random letter" }, "percentage": 51 } } }, { "seed_identities_with_labels": [ { "name": "Dzheykhun Aslanov (latin)", "dob": "1990-1-1", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "eduarda ribeiro (latin)", "dob": "1948-05-18", "address": "Fiji", "label": "negative", "script": "latin" }, { "name": "leonor branco (latin)", "dob": "1971-08-04", "address": "Clipperton Island", "label": "negative", "script": "latin" }, { "name": "marcos cruz (latin)", "dob": "1979-07-19", "address": "Angola", "label": "High Risk", "script": "latin" }, { "name": "arnaude leconte (latin)", "dob": "1946-07-23", "address": "Haiti", "label": "High Risk", "script": "latin" }, { "name": "Apinya CHANTARAPRAPAPORN (latin)", "dob": "1955-1-14", "address": "Thailand", "label": "positive", "script": "latin" }, { "name": "céline lemaître (latin)", "dob": "2000-06-01", "address": "Cameroon", "label": "High Risk", "script": "latin" }, { "name": "فضل الأدغم (arabic)", "dob": "1999-01-24", "address": "Bangladesh", "label": "negative", "script": "arabic" }, { "name": "ismael carvalho (latin)", "dob": "1988-03-18", "address": "Belize", "label": "negative", "script": "latin" }, { "name": "Xutong Qin (latin)", "dob": "1994-4-29", "address": "China", "label": "positive", "script": "latin" }, { "name": "Aegli Tamani-Phella (latin)", "dob": "1978-6-21", "address": "Cyprus", "label": "positive", "script": "latin" }, { "name": "roxana gonzález (latin)", "dob": "1963-04-02", "address": "Venezuela", "label": "High Risk", "script": "latin" }, { "name": "daniela batista (latin)", "dob": "1933-05-04", "address": "Antígua e Barbuda", "label": "negative", "script": "latin" }, { "name": "любомир константинов (cyrillic)", "dob": "1972-12-19", "address": "Armenia", "label": "High Risk", "script": "cyrillic" }, { "name": "‫بلال‬ ‫حدرج‬ (arabic)", "dob": "1968-7-10", "address": "Lebanon", "label": "positive", "script": "arabic" } ], "query_template": "The following name is the seed name to generate variations for: {name}. Generate 12 variations of the name {name}, ensuring phonetic similarity: 30% Light, 40% Medium, 30% Far, and orthographic similarity: 50% Light, 50% Medium, and also include 42% of variations that follow: Additionally, generate variations that perform these transformations: Use first name initial with last name, and Swap random adjacent letters. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 12, "phonetic_similarity": { "Light": 0.3, "Medium": 0.4, "Far": 0.3 }, "orthographic_similarity": { "Light": 0.5, "Medium": 0.5 }, "rule_based": { "rule_percentage": 42, "selected_rules": [ "initial_only_first_name", "swap_random_letter" ], "rule_descriptions": { "initial_only_first_name": "Use first name initial with last name", "swap_random_letter": "Swap random adjacent letters" }, "percentage": 42 } } }, { "seed_identities_with_labels": [ { "name": "Ochur-Suge Mongush (latin)", "dob": "1993-3-28", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "Muhammad Al-Sunaydar (latin)", "dob": "1987-3-5", "address": "Yemen", "label": "positive", "script": "latin" }, { "name": "елена туров (cyrillic)", "dob": "1949-02-09", "address": "Luhansk", "label": "High Risk", "script": "cyrillic" }, { "name": "luana mota (latin)", "dob": "1986-10-24", "address": "Pacific Ocean", "label": "negative", "script": "latin" }, { "name": "Aleksandr Gaevoi (latin)", "dob": "1986-6-16", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "maria lee (latin)", "dob": "1949-12-26", "address": "Kenya", "label": "High Risk", "script": "latin" }, { "name": "andreia assunção (latin)", "dob": "1934-07-19", "address": "Pitcairn", "label": "negative", "script": "latin" }, { "name": "simão morais (latin)", "dob": "1988-03-29", "address": "Niue", "label": "negative", "script": "latin" }, { "name": "danielle blondel (latin)", "dob": "1981-10-06", "address": "Haiti", "label": "High Risk", "script": "latin" }, { "name": "diogo cardoso (latin)", "dob": "1930-09-29", "address": "Mozambique", "label": "High Risk", "script": "latin" }, { "name": "Sergei Serdiukov (latin)", "dob": "1987-6-23", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "antónio freitas (latin)", "dob": "2001-03-25", "address": "Seicheles", "label": "negative", "script": "latin" }, { "name": "joséphine millet (latin)", "dob": "1965-07-25", "address": "Cameroon", "label": "High Risk", "script": "latin" }, { "name": "богданна засенко (cyrillic)", "dob": "1956-06-24", "address": "Guinea", "label": "negative", "script": "cyrillic" }, { "name": "Дзмітрьій Замулевіч (cyrillic)", "dob": "1974-5-7", "address": "Belarus", "label": "positive", "script": "cyrillic" } ], "query_template": "Generate 14 variations of {name} ensuring phonetic similarity (20% Light, 60% Medium, 20% Far) and orthographic similarity (70% Light, 30% Medium). Approximately 31% of the total 14 variations should follow these rule-based transformations: Replace first consonant with different consonant, replace last consonant with different consonant, replace second consonant with different consonant, replace third consonant with different consonant, replace fourth consonant with different consonant, replace fifth consonant with different consonant, replace sixth consonant with different consonant, and replace random consonants with different consonants.. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 14, "phonetic_similarity": { "Light": 0.2, "Medium": 0.6, "Far": 0.2 }, "orthographic_similarity": { "Light": 0.7, "Medium": 0.3 }, "rule_based": { "rule_percentage": 31, "selected_rules": [ "replace_random_consonant_with_random_consonant" ], "rule_descriptions": { "replace_random_consonant_with_random_consonant": "Replace random consonants with different consonants" }, "percentage": 31 } } }, { "seed_identities_with_labels": [ { "name": "patricia marques (latin)", "dob": "1950-01-12", "address": "Corée du Nord", "label": "negative", "script": "latin" }, { "name": "emmanuel hoarau (latin)", "dob": "1970-07-14", "address": "Norfolk (Îles)", "label": "negative", "script": "latin" }, { "name": "richard girard (latin)", "dob": "1985-04-11", "address": "Central African Republic", "label": "High Risk", "script": "latin" }, { "name": "jamie larsen (latin)", "dob": "1958-01-01", "address": "South Sudan", "label": "High Risk", "script": "latin" }, { "name": "Vasiliy Yurchenko (latin)", "dob": "1960-9-26", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "michelle olson (latin)", "dob": "1944-12-21", "address": "South Sudan", "label": "High Risk", "script": "latin" }, { "name": "галина чаленко (cyrillic)", "dob": "1985-03-21", "address": "Bermuda", "label": "negative", "script": "cyrillic" }, { "name": "margot godard (latin)", "dob": "1992-05-13", "address": "Vierges britanniques (Îles)", "label": "negative", "script": "latin" }, { "name": "gabriel lopez (latin)", "dob": "1960-04-24", "address": "Democratic Republic of Congo", "label": "High Risk", "script": "latin" }, { "name": "Vladimir Turin (latin)", "dob": "1958-12-20", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "عالم كمال (arabic)", "dob": "1988-05-27", "address": "Sudan", "label": "High Risk", "script": "arabic" }, { "name": "audrey rodrigues (latin)", "dob": "2007-08-11", "address": "Soudan", "label": "negative", "script": "latin" }, { "name": "Sergei Arenin (latin)", "dob": "1958-8-29", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "Faysal al-Wadi (latin)", "dob": "1976-12-15", "address": "Malta", "label": "positive", "script": "latin" }, { "name": "Віктор Емеляненко (cyrillic)", "dob": "1953-10-11", "address": "Ukraine", "label": "positive", "script": "cyrillic" } ], "query_template": "Generate exactly 13 execution vectors for {name}, ensuring phonetic similarity by producing 30% Light variations like \"L{name}\", 40% Medium variations such as \"{name}y\", and 30% Far variations similar to \"F{name}r\"; and orthographic similarity by generating 50% Light variations like \"{n}{ame}\" and 50% Medium variations similar to \"{nam}e\". Approximately 22% of the total 13 variations should follow these rule-based transformations: Additionally, generate variations that perform these transformations: Replace double letters with a single letter ({name} becomes {nmae}), Convert name to initials (J.Smith becomes \"JS\"), and Remove a random vowel ({name} becomes \" nm{t}\"). The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 13, "phonetic_similarity": { "Light": 0.3, "Medium": 0.4, "Far": 0.3 }, "orthographic_similarity": { "Light": 0.5, "Medium": 0.5 }, "rule_based": { "rule_percentage": 22, "selected_rules": [ "replace_double_letters_with_single_letter", "shorten_name_to_initials", "remove_random_vowel" ], "rule_descriptions": { "replace_double_letters_with_single_letter": "Replace double letters with a single letter", "shorten_name_to_initials": "Convert name to initials", "remove_random_vowel": "Remove a random vowel" }, "percentage": 22 } } }, { "seed_identities_with_labels": [ { "name": "Роман Старовойт (cyrillic)", "dob": "1972-1-20", "address": "Russia", "label": "positive", "script": "cyrillic" }, { "name": "stéphane lecoq (latin)", "dob": "1964-09-28", "address": "États-Unis", "label": "negative", "script": "latin" }, { "name": "pénélope weber (latin)", "dob": "1966-05-21", "address": "Haiti", "label": "High Risk", "script": "latin" }, { "name": "наина михайлова (cyrillic)", "dob": "1979-02-08", "address": "Russia", "label": "High Risk", "script": "cyrillic" }, { "name": "Muhammad Fachry (latin)", "dob": "1968-2-18", "address": "Indonesia", "label": "positive", "script": "latin" }, { "name": "Mohamed Ibrahim (latin)", "dob": "1977-4-5", "address": "Brazil", "label": "positive", "script": "latin" }, { "name": "sophie baron (latin)", "dob": "1956-12-02", "address": "Tokelau", "label": "negative", "script": "latin" }, { "name": "anastasie marques (latin)", "dob": "1931-03-23", "address": "Burkina Faso", "label": "High Risk", "script": "latin" }, { "name": "che ramírez (latin)", "dob": "1972-07-09", "address": "Bolivia", "label": "High Risk", "script": "latin" }, { "name": "Vladimir Turin (latin)", "dob": "1958-12-20", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "david bonneau (latin)", "dob": "1975-09-01", "address": "Seychelles", "label": "negative", "script": "latin" }, { "name": "sarah holder (latin)", "dob": "1957-11-26", "address": "Nigeria", "label": "High Risk", "script": "latin" }, { "name": "нифонт кабанова (cyrillic)", "dob": "1975-01-22", "address": "Saint Martin", "label": "negative", "script": "cyrillic" }, { "name": "eugène étienne (latin)", "dob": "1930-06-02", "address": "Hongrie", "label": "negative", "script": "latin" }, { "name": "Kakha SHUSHANASHVILI (latin)", "dob": "1972-2-8", "address": "Russia", "label": "positive", "script": "latin" } ], "query_template": "The following name is the seed name to generate variations for: {name}. Generate 14 variations of the name {name}, ensuring phonetic similarity: 10% Light, 30% Medium, 60% Far, and orthographic similarity: 10% Light, 30% Medium, 60% Far, and also include 44% of variations that follow: Additionally, generate variations that perform these transformations: Swap adjacent consonants, Use first name initial with last name, and Reorder name parts. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 14, "phonetic_similarity": { "Light": 0.1, "Medium": 0.3, "Far": 0.6 }, "orthographic_similarity": { "Light": 0.1, "Medium": 0.3, "Far": 0.6 }, "rule_based": { "rule_percentage": 44, "selected_rules": [ "swap_adjacent_consonants", "initial_only_first_name", "name_parts_permutations" ], "rule_descriptions": { "swap_adjacent_consonants": "Swap adjacent consonants", "initial_only_first_name": "Use first name initial with last name", "name_parts_permutations": "Reorder name parts" }, "percentage": 44 } } }, { "seed_identities_with_labels": [ { "name": "mark green (latin)", "dob": "1962-10-16", "address": "South Sudan", "label": "High Risk", "script": "latin" }, { "name": "david hill (latin)", "dob": "1927-11-02", "address": "South Africa", "label": "High Risk", "script": "latin" }, { "name": "Евгения Васильева (cyrillic)", "dob": "1979-2-20", "address": "Russia", "label": "positive", "script": "cyrillic" }, { "name": "marcel hernandez (latin)", "dob": "1941-11-19", "address": "Democratic Republic of Congo", "label": "High Risk", "script": "latin" }, { "name": "caleb young (latin)", "dob": "1952-01-28", "address": "Saudi Arabia", "label": "negative", "script": "latin" }, { "name": "Zelimir Petrovic (latin)", "dob": "1981-9-1", "address": "Serbia", "label": "positive", "script": "latin" }, { "name": "sherry jackson (latin)", "dob": "2002-04-24", "address": "Seychelles", "label": "negative", "script": "latin" }, { "name": "ابتهال الأوس (arabic)", "dob": "1963-07-23", "address": "Guatemala", "label": "negative", "script": "arabic" }, { "name": "Oleksiy MOZHOVY (latin)", "dob": "1975-4-3", "address": "Ukraine", "label": "positive", "script": "latin" }, { "name": "Malek Ruben (latin)", "dob": "1960-1-1", "address": "South Sudan", "label": "positive", "script": "latin" }, { "name": "Myo'ng-hun Ri (latin)", "dob": "1969-3-14", "address": "Korea, North", "label": "positive", "script": "latin" }, { "name": "amy mccoy (latin)", "dob": "1995-02-14", "address": "British Virgin Islands", "label": "High Risk", "script": "latin" }, { "name": "alexis hunter (latin)", "dob": "1927-08-15", "address": "Malaysia", "label": "negative", "script": "latin" }, { "name": "john adkins (latin)", "dob": "1945-12-31", "address": "Saint Martin", "label": "negative", "script": "latin" }, { "name": "милован соболев (cyrillic)", "dob": "1947-11-10", "address": "Russia", "label": "High Risk", "script": "cyrillic" } ], "query_template": "The following name is the seed name to generate variations for: {name}. Generate 7 variations of the name {name}, ensuring phonetic similarity: 30% Light, 40% Medium, 30% Far, and orthographic similarity: 10% Light, 30% Medium, 60% Far, and also include 45% of variations that follow: Additionally, generate variations that perform these transformations: Remove all spaces, Use first name initial with last name, and Swap adjacent consonants. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 7, "phonetic_similarity": { "Light": 0.3, "Medium": 0.4, "Far": 0.3 }, "orthographic_similarity": { "Light": 0.1, "Medium": 0.3, "Far": 0.6 }, "rule_based": { "rule_percentage": 45, "selected_rules": [ "remove_all_spaces", "initial_only_first_name", "swap_adjacent_consonants" ], "rule_descriptions": { "remove_all_spaces": "Remove all spaces", "initial_only_first_name": "Use first name initial with last name", "swap_adjacent_consonants": "Swap adjacent consonants" }, "percentage": 45 } } }, { "seed_identities_with_labels": [ { "name": "tito atienza (latin)", "dob": "1942-04-29", "address": "Bolivia", "label": "High Risk", "script": "latin" }, { "name": "julien ribeiro (latin)", "dob": "1946-02-26", "address": "Macédoine du Nord", "label": "negative", "script": "latin" }, { "name": "Galina Danchikova (latin)", "dob": "1954-8-13", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "caroline collet (latin)", "dob": "1952-07-29", "address": "Cayman (Îles)", "label": "negative", "script": "latin" }, { "name": "anouk samson (latin)", "dob": "1939-04-20", "address": "Democratic Republic of Congo", "label": "High Risk", "script": "latin" }, { "name": "denis delattre (latin)", "dob": "1981-12-26", "address": "Niger", "label": "negative", "script": "latin" }, { "name": "michelle philippe (latin)", "dob": "1936-01-19", "address": "Tonga", "label": "negative", "script": "latin" }, { "name": "stéphane daniel (latin)", "dob": "1980-06-26", "address": "Central African Republic", "label": "High Risk", "script": "latin" }, { "name": "агатопод мераков (cyrillic)", "dob": "2005-06-03", "address": "Montserrat", "label": "negative", "script": "cyrillic" }, { "name": "patricia palomares (latin)", "dob": "1939-10-24", "address": "Nicaragua", "label": "High Risk", "script": "latin" }, { "name": "سوگند ترکاشوند (arabic)", "dob": "1953-07-09", "address": "Iran", "label": "High Risk", "script": "arabic" }, { "name": "Muhammad Dayri (latin)", "dob": "1961-6-6", "address": "Syria", "label": "positive", "script": "latin" }, { "name": "Vildan Zinnurov (latin)", "dob": "1964-1-30", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "Сяргей Зубкоў (cyrillic)", "dob": "1975-8-21", "address": "Belarus", "label": "positive", "script": "cyrillic" }, { "name": "Osman Hamid (latin)", "dob": "1966-1-1", "address": "Sudan", "label": "positive", "script": "latin" } ], "query_template": "The following name is the seed name to generate variations for: {name}. Generate 11 variations of the name {name}, ensuring phonetic similarity: 50% Light, 50% Medium, and orthographic similarity: 70% Light, 30% Medium, and also include 12% of variations that follow: Additionally, generate variations that perform these transformations: Add a title suffix (Jr., PhD, etc.), and Abbreviate name parts. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 11, "phonetic_similarity": { "Light": 0.5, "Medium": 0.5 }, "orthographic_similarity": { "Light": 0.7, "Medium": 0.3 }, "rule_based": { "rule_percentage": 12, "selected_rules": [ "add_random_trailing_title", "shorten_name_to_abbreviations" ], "rule_descriptions": { "add_random_trailing_title": "Add a title suffix (Jr., PhD, etc.)", "shorten_name_to_abbreviations": "Abbreviate name parts" }, "percentage": 12 } } }, { "seed_identities_with_labels": [ { "name": "Mukharbek Barakhoyev (latin)", "dob": "1971-1-4", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "乾培 施 (chinese)", "dob": "1989-7-9", "address": "China", "label": "positive", "script": "chinese" }, { "name": "josé antonio prats (latin)", "dob": "1975-07-21", "address": "Haití", "label": "negative", "script": "latin" }, { "name": "andrés amores (latin)", "dob": "1975-11-07", "address": "Jordania", "label": "negative", "script": "latin" }, { "name": "Malek Ruben (latin)", "dob": "1960-1-1", "address": "South Sudan", "label": "positive", "script": "latin" }, { "name": "severo mármol (latin)", "dob": "1992-11-06", "address": "Bolivia", "label": "High Risk", "script": "latin" }, { "name": "Ch'o'l-man Han (latin)", "dob": "1978-5-6", "address": "China", "label": "positive", "script": "latin" }, { "name": "eusebia ferrer (latin)", "dob": "1971-08-24", "address": "Granada", "label": "negative", "script": "latin" }, { "name": "guy marty (latin)", "dob": "1988-11-24", "address": "Mali", "label": "High Risk", "script": "latin" }, { "name": "شافع أبا الخيل (arabic)", "dob": "1973-04-08", "address": "Zimbabwe", "label": "negative", "script": "arabic" }, { "name": "Ihar Kenjuch (latin)", "dob": "1980-1-21", "address": "Belarus", "label": "positive", "script": "latin" }, { "name": "lucy ramos (latin)", "dob": "1941-11-24", "address": "Monaco", "label": "High Risk", "script": "latin" }, { "name": "roger grondin (latin)", "dob": "1956-08-14", "address": "Mali", "label": "High Risk", "script": "latin" }, { "name": "محمدیاسین موسوی (arabic)", "dob": "2000-03-07", "address": "Iran", "label": "High Risk", "script": "arabic" }, { "name": "dafne urrutia (latin)", "dob": "1951-06-06", "address": "República Democrática Popular Lao", "label": "negative", "script": "latin" } ], "query_template": "The following name is the seed name to generate variations for: {name}. Generate 6 variations of the name {name}, ensuring phonetic similarity: 20% Light, 60% Medium, 20% Far, and orthographic similarity: 20% Light, 60% Medium, 20% Far, and also include 24% of variations that follow: Additionally, generate variations that perform these transformations: Remove a random vowel, and Replace random consonants with different consonants. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 6, "phonetic_similarity": { "Light": 0.2, "Medium": 0.6, "Far": 0.2 }, "orthographic_similarity": { "Light": 0.2, "Medium": 0.6, "Far": 0.2 }, "rule_based": { "rule_percentage": 24, "selected_rules": [ "remove_random_vowel", "replace_random_consonant_with_random_consonant" ], "rule_descriptions": { "remove_random_vowel": "Remove a random vowel", "replace_random_consonant_with_random_consonant": "Replace random consonants with different consonants" }, "percentage": 24 } } }, { "seed_identities_with_labels": [ { "name": "benoît poirier (latin)", "dob": "1951-10-28", "address": "Botswana", "label": "negative", "script": "latin" }, { "name": "timothée de oliveira (latin)", "dob": "1934-05-12", "address": "Vanuatu", "label": "negative", "script": "latin" }, { "name": "سيّد الخماش (arabic)", "dob": "1947-12-06", "address": "Congo", "label": "negative", "script": "arabic" }, { "name": "amélie monnier (latin)", "dob": "1930-04-08", "address": "Lettonie", "label": "negative", "script": "latin" }, { "name": "astrid ledoux (latin)", "dob": "1970-02-01", "address": "Guinée Equatoriale", "label": "negative", "script": "latin" }, { "name": "tammy nelson (latin)", "dob": "1943-09-29", "address": "South Sudan", "label": "High Risk", "script": "latin" }, { "name": "matthew prince (latin)", "dob": "1987-07-03", "address": "Nigeria", "label": "High Risk", "script": "latin" }, { "name": "Rayimbek Matraimov (latin)", "dob": "1971-5-3", "address": "Kyrgyzstan", "label": "positive", "script": "latin" }, { "name": "модест лебедева (cyrillic)", "dob": "1978-03-21", "address": "Crimea", "label": "High Risk", "script": "cyrillic" }, { "name": "Віктор Емеляненко (cyrillic)", "dob": "1953-10-11", "address": "Ukraine", "label": "positive", "script": "cyrillic" }, { "name": "salomé garcia (latin)", "dob": "1930-11-21", "address": "Angola", "label": "High Risk", "script": "latin" }, { "name": "Alexey Sukhodolov (latin)", "dob": "1974-4-19", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "Aleksandr Zhivlyuk (latin)", "dob": "1981-1-13", "address": "Belarus", "label": "positive", "script": "latin" }, { "name": "jeanne olivier (latin)", "dob": "1924-10-09", "address": "Mali", "label": "High Risk", "script": "latin" }, { "name": "Walid Al-Rawi (latin)", "dob": "1988-11-11", "address": "Iraq", "label": "positive", "script": "latin" } ], "query_template": "Generate 15 name variations of {name} ensuring phonetic similarity (50% Light, 50% Medium) and orthographic similarity (30% Light, 40% Medium, 30% Far). Approximately 47% of the total 15 variations should follow these rule-based transformations: \n\nAdditionally, generate variations that: \n- Substitute each \"a\" with \"e\", except for words containing \"at\"\n- Replace all instances of \"tion\" with \"sion\"\n- Swap adjacent consonants\n- Remove one random letter from the middle of the name (no duplicates)\n- Transpose two adjacent syllables in multi-syllable names The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 15, "phonetic_similarity": { "Light": 0.5, "Medium": 0.5 }, "orthographic_similarity": { "Light": 0.3, "Medium": 0.4, "Far": 0.3 }, "rule_based": { "rule_percentage": 47, "selected_rules": [ "swap_adjacent_consonants" ], "rule_descriptions": { "swap_adjacent_consonants": "Swap adjacent consonants" }, "percentage": 47 } } }, { "seed_identities_with_labels": [ { "name": "لؤي الحواش (arabic)", "dob": "1948-03-08", "address": "Algeria", "label": "High Risk", "script": "arabic" }, { "name": "camila pinho (latin)", "dob": "1941-08-05", "address": "Angola", "label": "High Risk", "script": "latin" }, { "name": "jaime williams (latin)", "dob": "1978-05-31", "address": "South Africa", "label": "High Risk", "script": "latin" }, { "name": "lúcia barbosa (latin)", "dob": "1992-06-30", "address": "Singapura", "label": "negative", "script": "latin" }, { "name": "aaron marquez (latin)", "dob": "1974-01-26", "address": "South Sudan", "label": "High Risk", "script": "latin" }, { "name": "Sergey Omelnitskii (latin)", "dob": "1980-6-2", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "Rafael Bastardo (latin)", "dob": "1978-9-22", "address": "Venezuela", "label": "positive", "script": "latin" }, { "name": "vítor nogueira (latin)", "dob": "1994-07-26", "address": "Peru", "label": "negative", "script": "latin" }, { "name": "Moalim Ibrahim (latin)", "dob": "1985-3-13", "address": "Somalia", "label": "positive", "script": "latin" }, { "name": "francisco rodrigues (latin)", "dob": "1976-03-30", "address": "Irlanda", "label": "negative", "script": "latin" }, { "name": "claire carpentier (latin)", "dob": "1968-07-09", "address": "Burkina Faso", "label": "High Risk", "script": "latin" }, { "name": "Rida Gull (latin)", "dob": "1981-12-25", "address": "Pakistan", "label": "positive", "script": "latin" }, { "name": "якоб кривошапкова (cyrillic)", "dob": "1943-08-21", "address": "Grenada", "label": "negative", "script": "cyrillic" }, { "name": "Ирек Файзуллин (cyrillic)", "dob": "1962-12-8", "address": "Russia", "label": "positive", "script": "cyrillic" }, { "name": "vitória fernandes (latin)", "dob": "1932-01-16", "address": "Estados Unidos", "label": "negative", "script": "latin" } ], "query_template": "Generate 12 variations of {name}, ensuring phonetic similarity by creating 1.2 variations with a Light transformation, 6 variations with a Medium transformation, and 4.8 variations with a Far transformation, and orthographic similarity with 100% of the 12 variations being visually similar spellings. Approximately 48% of the total 12 variations should follow these rule-based transformations: Additionally, generate variations that abbreviate name parts.\n[VALIDATION HINTS]: Phonetic similarity: 10% Light. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 12, "phonetic_similarity": { "Light": 0.1, "Medium": 0.5, "Far": 0.4 }, "orthographic_similarity": { "Light": 1.0 }, "rule_based": { "rule_percentage": 48, "selected_rules": [ "shorten_name_to_abbreviations" ], "rule_descriptions": { "shorten_name_to_abbreviations": "Abbreviate name parts" }, "percentage": 48 } } }, { "seed_identities_with_labels": [ { "name": "joseph walters (latin)", "dob": "1973-10-09", "address": "South Africa", "label": "High Risk", "script": "latin" }, { "name": "دانة سبيع (arabic)", "dob": "1943-04-13", "address": "Somalia", "label": "High Risk", "script": "arabic" }, { "name": "james fletcher (latin)", "dob": "1973-12-12", "address": "Cayman Islands", "label": "negative", "script": "latin" }, { "name": "Галина Лукашенко (cyrillic)", "dob": "1955-1-1", "address": "Belarus", "label": "positive", "script": "cyrillic" }, { "name": "ساجد حمامي (arabic)", "dob": "1999-09-01", "address": "Malta", "label": "negative", "script": "arabic" }, { "name": "Abu-'Ubaydah Al-Agha (latin)", "dob": "1964-5-2", "address": "Saudi Arabia", "label": "positive", "script": "latin" }, { "name": "Aleksander Lukashenko (latin)", "dob": "1954-8-31", "address": "Belarus", "label": "positive", "script": "latin" }, { "name": "lindsay flores (latin)", "dob": "2005-03-12", "address": "Falkland Islands (Malvinas)", "label": "negative", "script": "latin" }, { "name": "courtney ingram (latin)", "dob": "1997-01-22", "address": "Korea", "label": "negative", "script": "latin" }, { "name": "joel hughes (latin)", "dob": "2005-01-16", "address": "Lesotho", "label": "negative", "script": "latin" }, { "name": "céline pineau (latin)", "dob": "2001-07-20", "address": "Haiti", "label": "High Risk", "script": "latin" }, { "name": "raúl carlos (latin)", "dob": "1960-07-20", "address": "Cuba", "label": "High Risk", "script": "latin" }, { "name": "Bella Chen (latin)", "dob": "1992-11-15", "address": "China", "label": "positive", "script": "latin" }, { "name": "marcel joseph (latin)", "dob": "1977-11-13", "address": "Côte d'Ivoire", "label": "High Risk", "script": "latin" }, { "name": "Shahab Javanmardy (latin)", "dob": "1974-3-21", "address": "Iran", "label": "positive", "script": "latin" } ], "query_template": "The following name is the seed name to generate variations for: {name}. Generate 13 variations of the name {name}, ensuring phonetic similarity: 20% Light, 60% Medium, 20% Far, and orthographic similarity: 10% Light, 30% Medium, 60% Far, and also include 34% of variations that follow: Additionally, generate variations that perform these transformations: Remove a random vowel, Use first name initial with last name, and Add a title prefix (Mr., Dr., etc.). The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 13, "phonetic_similarity": { "Light": 0.2, "Medium": 0.6, "Far": 0.2 }, "orthographic_similarity": { "Light": 0.1, "Medium": 0.3, "Far": 0.6 }, "rule_based": { "rule_percentage": 34, "selected_rules": [ "remove_random_vowel", "initial_only_first_name", "add_random_leading_title" ], "rule_descriptions": { "remove_random_vowel": "Remove a random vowel", "initial_only_first_name": "Use first name initial with last name", "add_random_leading_title": "Add a title prefix (Mr., Dr., etc.)" }, "percentage": 34 } } }, { "seed_identities_with_labels": [ { "name": "amanda patel (latin)", "dob": "1938-02-19", "address": "Bahamas", "label": "negative", "script": "latin" }, { "name": "suzanne chrétien (latin)", "dob": "1981-09-10", "address": "Cameroon", "label": "High Risk", "script": "latin" }, { "name": "Tigran KHACHATUROV (latin)", "dob": "1979-2-7", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "Олег Ларин (cyrillic)", "dob": "1973-10-9", "address": "Belarus", "label": "positive", "script": "cyrillic" }, { "name": "عبد اللطيف الديسي (arabic)", "dob": "1965-12-29", "address": "Sudan", "label": "High Risk", "script": "arabic" }, { "name": "Bair Zhamsuyev (latin)", "dob": "1959-1-29", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "jose jackson (latin)", "dob": "1966-06-22", "address": "Tunisia", "label": "negative", "script": "latin" }, { "name": "lisa ware (latin)", "dob": "1998-02-16", "address": "Saint Helena", "label": "negative", "script": "latin" }, { "name": "Taher Kayali (latin)", "dob": "1960-7-11", "address": "Syria", "label": "positive", "script": "latin" }, { "name": "david ballard (latin)", "dob": "1951-06-28", "address": "Namibia", "label": "High Risk", "script": "latin" }, { "name": "مايا خولان (arabic)", "dob": "1973-04-11", "address": "Maldives", "label": "negative", "script": "arabic" }, { "name": "erika wilkins (latin)", "dob": "1969-10-27", "address": "Papua New Guinea", "label": "negative", "script": "latin" }, { "name": "Mustafa Fawaz (latin)", "dob": "1964-9-10", "address": "Nigeria", "label": "positive", "script": "latin" }, { "name": "natalie morris (latin)", "dob": "1967-07-07", "address": "Kenya", "label": "High Risk", "script": "latin" }, { "name": "sandra burns (latin)", "dob": "1980-05-02", "address": "Nigeria", "label": "High Risk", "script": "latin" } ], "query_template": "Generate 6 variations of {name} ensuring phonetic similarity (30% Light, e.g. {name} spelled backwards; 40% Medium, e.g. {name} with similar-sounding suffixes added or removed; 30% Far, e.g. {name} with significantly different pronunciation) and orthographic similarity (100% Light, e.g. {name} with single-character variations). Approximately 17% of the total 6 variations should follow these rule-based transformations: Additionally, generate variations that perform these transformations: Insert a random letter into the middle or end of the name; Swap adjacent syllables in the name. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 6, "phonetic_similarity": { "Light": 0.3, "Medium": 0.4, "Far": 0.3 }, "orthographic_similarity": { "Light": 1.0 }, "rule_based": { "rule_percentage": 17, "selected_rules": [ "insert_random_letter", "swap_adjacent_syllables" ], "rule_descriptions": { "insert_random_letter": "Insert a random letter", "swap_adjacent_syllables": "Swap adjacent syllables" }, "percentage": 17 } } }, { "seed_identities_with_labels": [ { "name": "Татьяна Томилина (cyrillic)", "dob": "1966-4-18", "address": "Ukraine", "label": "positive", "script": "cyrillic" }, { "name": "anne costa (latin)", "dob": "1972-01-18", "address": "Democratic Republic of Congo", "label": "High Risk", "script": "latin" }, { "name": "donna crawford (latin)", "dob": "1957-05-09", "address": "Nigeria", "label": "High Risk", "script": "latin" }, { "name": "Zajar KALASHOV (latin)", "dob": "1953-3-20", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "Rafi Udin (latin)", "dob": "1966-6-3", "address": "Syria", "label": "positive", "script": "latin" }, { "name": "جالا زبيد (arabic)", "dob": "1985-10-10", "address": "Libya", "label": "High Risk", "script": "arabic" }, { "name": "vasco barros (latin)", "dob": "1978-12-26", "address": "Brunei", "label": "negative", "script": "latin" }, { "name": "bernard marin (latin)", "dob": "1951-08-06", "address": "Democratic Republic of Congo", "label": "High Risk", "script": "latin" }, { "name": "аверкий селиверстова (cyrillic)", "dob": "2001-06-23", "address": "Azerbaijan", "label": "negative", "script": "cyrillic" }, { "name": "ismael gomes (latin)", "dob": "1997-01-29", "address": "Santa Helena", "label": "negative", "script": "latin" }, { "name": "ismael silva (latin)", "dob": "1995-04-26", "address": "Mozambique", "label": "High Risk", "script": "latin" }, { "name": "Kiya Sadeghi (latin)", "dob": "1986-3-21", "address": "Iran", "label": "positive", "script": "latin" }, { "name": "carlota correia (latin)", "dob": "1981-08-25", "address": "Tunísia", "label": "negative", "script": "latin" }, { "name": "teresa cunha (latin)", "dob": "1967-02-20", "address": "Geórgia", "label": "negative", "script": "latin" }, { "name": "Svetlana Emilianova (latin)", "dob": "1971-10-7", "address": "Russia", "label": "positive", "script": "latin" } ], "query_template": "Generate 15 variations of {name} ensuring phonetic similarity (50% Light, 50% Medium) and orthographic similarity (100% Medium). Approximately 40% of the total 15 variations should follow these rule-based transformations: Reorder name parts, and Use first name initial with last name. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 15, "phonetic_similarity": { "Light": 0.5, "Medium": 0.5 }, "orthographic_similarity": { "Medium": 1.0 }, "rule_based": { "rule_percentage": 40, "selected_rules": [ "name_parts_permutations", "initial_only_first_name" ], "rule_descriptions": { "name_parts_permutations": "Reorder name parts", "initial_only_first_name": "Use first name initial with last name" }, "percentage": 40 } } }, { "seed_identities_with_labels": [ { "name": "Andrei Tikhonov (latin)", "dob": "1966-11-9", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "david sauvage (latin)", "dob": "1975-03-16", "address": "Monaco", "label": "High Risk", "script": "latin" }, { "name": "isidro donoso (latin)", "dob": "1990-08-25", "address": "Seychelles", "label": "negative", "script": "latin" }, { "name": "Любовь Глебова (cyrillic)", "dob": "1960-3-7", "address": "Russia", "label": "positive", "script": "cyrillic" }, { "name": "inés larrea (latin)", "dob": "1936-03-18", "address": "Bolivia", "label": "High Risk", "script": "latin" }, { "name": "tamara urrutia (latin)", "dob": "1941-01-01", "address": "Cuba", "label": "High Risk", "script": "latin" }, { "name": "قائد بنو النجار (arabic)", "dob": "1935-04-29", "address": "Iraq", "label": "High Risk", "script": "arabic" }, { "name": "roxana raya (latin)", "dob": "1958-04-11", "address": "Tayikistán", "label": "negative", "script": "latin" }, { "name": "Hassan AYACH (latin)", "dob": "1963-5-1", "address": "Lebanon", "label": "positive", "script": "latin" }, { "name": "bautista montoya (latin)", "dob": "1955-01-21", "address": "República de Corea", "label": "negative", "script": "latin" }, { "name": "marie bigot (latin)", "dob": "1969-01-05", "address": "Mali", "label": "High Risk", "script": "latin" }, { "name": "Abdul RAHIM (latin)", "dob": "1961-12-8", "address": "Saudi Arabia", "label": "positive", "script": "latin" }, { "name": "ابتهاج يافع (arabic)", "dob": "1971-12-26", "address": "Tanzania", "label": "negative", "script": "arabic" }, { "name": "che villalonga (latin)", "dob": "1962-12-18", "address": "Uganda", "label": "negative", "script": "latin" }, { "name": "Mustafa Fawaz (latin)", "dob": "1964-9-10", "address": "Nigeria", "label": "positive", "script": "latin" } ], "query_template": "Generate exactly 8 execution vectors (name variations) for each target identity {name}, ensuring phonetic similarity (30% Light variations that are identical in pronunciation but with different spelling, 40% Medium variations that are similar in pronunciation but not identical, and 30% Far variations that have a significantly different pronunciation), and orthographic similarity (70% Light variations that are visually similar to the original name with minor modifications such as added or removed diacritical marks, and 30% Medium variations that are slightly more altered). Approximately 35% of the total 8 variations should follow these rule-based transformations: Additionally, generate variations that perform these transformations: Remove all spaces from {name}, and Replace a random consonant in {name} with a different consonant. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 8, "phonetic_similarity": { "Light": 0.3, "Medium": 0.4, "Far": 0.3 }, "orthographic_similarity": { "Light": 0.7, "Medium": 0.3 }, "rule_based": { "rule_percentage": 35, "selected_rules": [ "remove_all_spaces", "remove_random_consonant" ], "rule_descriptions": { "remove_all_spaces": "Remove all spaces", "remove_random_consonant": "Remove a random consonant" }, "percentage": 35 } } }, { "seed_identities_with_labels": [ { "name": "くみ子 佐々木 (chinese)", "dob": "2004-05-15", "address": "Oman", "label": "negative", "script": "chinese" }, { "name": "Aleksei Gnedovskii (latin)", "dob": "1964-12-31", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "ann smith (latin)", "dob": "1971-10-07", "address": "Ethiopia", "label": "negative", "script": "latin" }, { "name": "maria barton (latin)", "dob": "2004-03-08", "address": "Indonesia", "label": "negative", "script": "latin" }, { "name": "raymond buisson (latin)", "dob": "1936-05-02", "address": "Central African Republic", "label": "High Risk", "script": "latin" }, { "name": "bryan morris (latin)", "dob": "1934-05-09", "address": "Christmas Island", "label": "negative", "script": "latin" }, { "name": "Dmitry Khotimskiy (latin)", "dob": "1973-6-29", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "abigail price (latin)", "dob": "1984-06-14", "address": "Namibia", "label": "High Risk", "script": "latin" }, { "name": "Rao Anwar (latin)", "dob": "1959-1-1", "address": "Pakistan", "label": "positive", "script": "latin" }, { "name": "сократ жукова (cyrillic)", "dob": "1938-08-16", "address": "Armenia", "label": "High Risk", "script": "cyrillic" }, { "name": "kelly morales (latin)", "dob": "1924-11-15", "address": "Saint Martin", "label": "negative", "script": "latin" }, { "name": "brooke joseph (latin)", "dob": "1950-10-17", "address": "Nigeria", "label": "High Risk", "script": "latin" }, { "name": "Jaden Ayad (latin)", "dob": "1988-11-26", "address": "Zambia", "label": "positive", "script": "latin" }, { "name": "Валерий Семёнов (cyrillic)", "dob": "1960-9-16", "address": "Russia", "label": "positive", "script": "cyrillic" }, { "name": "yara azevedo (latin)", "dob": "1954-12-26", "address": "Mozambique", "label": "High Risk", "script": "latin" } ], "query_template": "Generate 6 variations of {name}. Ensuring phonetic similarity (10% Light, 50% Medium, 40% Far) and orthographic similarity (10% Light, 50% Medium, 40% Far). Approximately 32% of the total 6 variations should follow these rule-based transformations: Additionally, generate variations that Convert name to initials. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 6, "phonetic_similarity": { "Light": 0.1, "Medium": 0.5, "Far": 0.4 }, "orthographic_similarity": { "Light": 0.1, "Medium": 0.5, "Far": 0.4 }, "rule_based": { "rule_percentage": 32, "selected_rules": [ "shorten_name_to_initials" ], "rule_descriptions": { "shorten_name_to_initials": "Convert name to initials" }, "percentage": 32 } } }, { "seed_identities_with_labels": [ { "name": "Abdullah Al-Faisal (latin)", "dob": "1963-9-10", "address": "Jamaica", "label": "positive", "script": "latin" }, { "name": "Omid Noori (latin)", "dob": "1976-3-12", "address": "Iran", "label": "positive", "script": "latin" }, { "name": "arthur lemoine (latin)", "dob": "1933-07-02", "address": "Tunisie", "label": "negative", "script": "latin" }, { "name": "Nufail Akbar (latin)", "dob": "1972-3-26", "address": "South Africa", "label": "positive", "script": "latin" }, { "name": "victoire lombard (latin)", "dob": "1989-01-11", "address": "Malaisie", "label": "negative", "script": "latin" }, { "name": "ананий лаврентьева (cyrillic)", "dob": "1988-11-07", "address": "Luhansk", "label": "High Risk", "script": "cyrillic" }, { "name": "élise collet (latin)", "dob": "2003-05-24", "address": "Cameroon", "label": "High Risk", "script": "latin" }, { "name": "hugues langlois (latin)", "dob": "1955-02-21", "address": "Côte d'Ivoire", "label": "High Risk", "script": "latin" }, { "name": "Saladin Badi (latin)", "dob": "1957-5-23", "address": "Libya", "label": "positive", "script": "latin" }, { "name": "élise lévêque (latin)", "dob": "1972-12-22", "address": "Fidji (République des)", "label": "negative", "script": "latin" }, { "name": "хранимир бобев (cyrillic)", "dob": "1997-04-23", "address": "Canada", "label": "negative", "script": "cyrillic" }, { "name": "susanne fournier (latin)", "dob": "1944-11-05", "address": "Monaco", "label": "High Risk", "script": "latin" }, { "name": "caroline turpin (latin)", "dob": "1994-04-30", "address": "Lithuanie", "label": "negative", "script": "latin" }, { "name": "paca guzman (latin)", "dob": "1991-06-18", "address": "Cuba", "label": "High Risk", "script": "latin" }, { "name": "Ігар Маршалаў (cyrillic)", "dob": "1972-1-12", "address": "Belarus", "label": "positive", "script": "cyrillic" } ], "query_template": "Generate 15 variations of {name}, ensuring phonetic similarity (50% Light, 50% Medium) and orthographic similarity (70% Light, 30% Medium). Approximately 19% of the total 15 variations should follow these rule-based transformations: \nReplace random consonants with different consonants; Replace random vowels with different vowels; and Remove all spaces. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 15, "phonetic_similarity": { "Light": 0.5, "Medium": 0.5 }, "orthographic_similarity": { "Light": 0.7, "Medium": 0.3 }, "rule_based": { "rule_percentage": 19, "selected_rules": [ "replace_random_consonant_with_random_consonant", "replace_random_vowel_with_random_vowel", "remove_all_spaces" ], "rule_descriptions": { "replace_random_consonant_with_random_consonant": "Replace random consonants with different consonants", "replace_random_vowel_with_random_vowel": "Replace random vowels with different vowels", "remove_all_spaces": "Remove all spaces" }, "percentage": 19 } } }, { "seed_identities_with_labels": [ { "name": "amanda canals (latin)", "dob": "1958-11-12", "address": "República de Corea", "label": "negative", "script": "latin" }, { "name": "Osama Ibrahim (latin)", "dob": "1976-4-2", "address": "Libya", "label": "positive", "script": "latin" }, { "name": "ريمان المهنا (arabic)", "dob": "1954-08-22", "address": "Bermuda", "label": "negative", "script": "arabic" }, { "name": "román gordillo (latin)", "dob": "1941-03-23", "address": "República Árabe Siria", "label": "negative", "script": "latin" }, { "name": "tomasa capdevila (latin)", "dob": "1926-09-26", "address": "República Checa", "label": "negative", "script": "latin" }, { "name": "نجم الدّين قليبو (arabic)", "dob": "1959-01-20", "address": "Somalia", "label": "High Risk", "script": "arabic" }, { "name": "victoria pinilla (latin)", "dob": "1984-04-18", "address": "Venezuela", "label": "High Risk", "script": "latin" }, { "name": "corona valle (latin)", "dob": "1955-07-13", "address": "Bolivia", "label": "High Risk", "script": "latin" }, { "name": "larry reyes (latin)", "dob": "1970-07-23", "address": "South Africa", "label": "High Risk", "script": "latin" }, { "name": "Mukharby Ulbashev (latin)", "dob": "1960-5-15", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "cornelio peral (latin)", "dob": "1940-04-20", "address": "Venezuela", "label": "High Risk", "script": "latin" }, { "name": "yolanda vega (latin)", "dob": "1981-08-07", "address": "Fiji", "label": "negative", "script": "latin" }, { "name": "Gennadii NIKULOV (latin)", "dob": "1967-2-17", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "Андрей Шевченко (cyrillic)", "dob": "1965-5-29", "address": "Russia", "label": "positive", "script": "cyrillic" }, { "name": "Viktor Boyarkin (latin)", "dob": "1958-10-12", "address": "Russia", "label": "positive", "script": "latin" } ], "query_template": "Generate 8 variations of {name}, ensuring phonetic similarity (20% Light, 60% Medium, 20% Far) and orthographic similarity (100% Medium). Approximately 42% of the total 8 variations should follow these rule-based transformations: Remove all spaces. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 8, "phonetic_similarity": { "Light": 0.2, "Medium": 0.6, "Far": 0.2 }, "orthographic_similarity": { "Medium": 1.0 }, "rule_based": { "rule_percentage": 42, "selected_rules": [ "remove_all_spaces" ], "rule_descriptions": { "remove_all_spaces": "Remove all spaces" }, "percentage": 42 } } }, { "seed_identities_with_labels": [ { "name": "أسرار بنو شيبان (arabic)", "dob": "1933-08-10", "address": "Syrian Arab Republic", "label": "negative", "script": "arabic" }, { "name": "Mustafa Fawaz (latin)", "dob": "1964-9-10", "address": "Nigeria", "label": "positive", "script": "latin" }, { "name": "ольга михеева (cyrillic)", "dob": "1969-04-04", "address": "Luhansk", "label": "High Risk", "script": "cyrillic" }, { "name": "Irina Bubnova (latin)", "dob": "1983-4-1", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "Igor Zubarev (latin)", "dob": "1966-6-20", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "mary gonzalez (latin)", "dob": "1960-07-21", "address": "Poland", "label": "negative", "script": "latin" }, { "name": "samantha krueger (latin)", "dob": "1971-01-04", "address": "Namibia", "label": "High Risk", "script": "latin" }, { "name": "jasmine walker (latin)", "dob": "1954-08-18", "address": "Syrian Arab Republic", "label": "negative", "script": "latin" }, { "name": "josette brun (latin)", "dob": "1927-06-07", "address": "Cameroon", "label": "High Risk", "script": "latin" }, { "name": "Aliasghar Norouzi (latin)", "dob": "1962-11-11", "address": "Iran", "label": "positive", "script": "latin" }, { "name": "daniel davis (latin)", "dob": "1928-07-09", "address": "South Africa", "label": "High Risk", "script": "latin" }, { "name": "ДМИТРИЙ МИХАЛЬЦОВ (cyrillic)", "dob": "1979-11-27", "address": "Belarus", "label": "positive", "script": "cyrillic" }, { "name": "christine simpson (latin)", "dob": "1977-12-30", "address": "Jordan", "label": "negative", "script": "latin" }, { "name": "heather walls (latin)", "dob": "1943-11-09", "address": "British Virgin Islands", "label": "High Risk", "script": "latin" }, { "name": "kristin hendricks (latin)", "dob": "2000-12-13", "address": "Luxembourg", "label": "negative", "script": "latin" } ], "query_template": "Generate 9 variations of {name}. Ensuring phonetic similarity (30% Light, e.g. \"Thom*ss\", 40% Medium, e.g. \"Thomasen\", 30% Far, e.g. \"Tommasi\") and orthographic similarity (10% Light, e.g. \"Th0mas\", 30% Medium, e.g. \"Thomassen\", 60% Far, e.g. \"Tomassini\"). Approximately 48% of the total 9 variations should follow these rule-based transformations: Additionally, generate variations that perform these transformations: Add a title suffix (Jr., PhD, etc.), and Remove a random consonant. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 9, "phonetic_similarity": { "Light": 0.3, "Medium": 0.4, "Far": 0.3 }, "orthographic_similarity": { "Light": 0.1, "Medium": 0.3, "Far": 0.6 }, "rule_based": { "rule_percentage": 48, "selected_rules": [ "add_random_trailing_title", "remove_random_consonant" ], "rule_descriptions": { "add_random_trailing_title": "Add a title suffix (Jr., PhD, etc.)", "remove_random_consonant": "Remove a random consonant" }, "percentage": 48 } } }, { "seed_identities_with_labels": [ { "name": "brigitte peron (latin)", "dob": "1929-06-19", "address": "Émirats arabes unis", "label": "negative", "script": "latin" }, { "name": "طالب بقشان (arabic)", "dob": "1925-07-05", "address": "Philippines", "label": "negative", "script": "arabic" }, { "name": "susanne bouvet (latin)", "dob": "1935-11-03", "address": "Rép. Dém. du Congo", "label": "negative", "script": "latin" }, { "name": "Олег Михайлов (cyrillic)", "dob": "1987-1-6", "address": "Russia", "label": "positive", "script": "cyrillic" }, { "name": "marc lejeune (latin)", "dob": "1975-05-14", "address": "Burkina Faso", "label": "High Risk", "script": "latin" }, { "name": "aimé bernard (latin)", "dob": "1960-11-18", "address": "Saint-Kitts et Nevis", "label": "negative", "script": "latin" }, { "name": "michèle muller (latin)", "dob": "1951-06-03", "address": "Ouzbékistan", "label": "negative", "script": "latin" }, { "name": "mar pinto (latin)", "dob": "1938-08-31", "address": "Cuba", "label": "High Risk", "script": "latin" }, { "name": "Myo'ng-chin Kim (latin)", "dob": "1980-2-18", "address": "China", "label": "positive", "script": "latin" }, { "name": "Alexander Malkevich (latin)", "dob": "1975-6-14", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "Anastasiya BONDARENKO (latin)", "dob": "1978-4-9", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "ферапонт самойлов (cyrillic)", "dob": "1970-09-07", "address": "Crimea", "label": "High Risk", "script": "cyrillic" }, { "name": "Irina Bubnova (latin)", "dob": "1983-4-1", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "inès étienne (latin)", "dob": "1977-06-16", "address": "Central African Republic", "label": "High Risk", "script": "latin" }, { "name": "godofredo cobos (latin)", "dob": "1962-07-13", "address": "Nicaragua", "label": "High Risk", "script": "latin" } ], "query_template": "The following name is the seed name to generate variations for: {name}. Generate 10 variations of the name {name}, ensuring phonetic similarity: 10% Light, 30% Medium, 60% Far, and orthographic similarity: 30% Light, 40% Medium, 30% Far, and also include 17% of variations that follow: Additionally, generate variations that perform these transformations: Abbreviate name parts, Convert name to initials, and Replace double letters with a single letter. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 10, "phonetic_similarity": { "Light": 0.1, "Medium": 0.3, "Far": 0.6 }, "orthographic_similarity": { "Light": 0.3, "Medium": 0.4, "Far": 0.3 }, "rule_based": { "rule_percentage": 17, "selected_rules": [ "shorten_name_to_abbreviations", "shorten_name_to_initials", "replace_double_letters_with_single_letter" ], "rule_descriptions": { "shorten_name_to_abbreviations": "Abbreviate name parts", "shorten_name_to_initials": "Convert name to initials", "replace_double_letters_with_single_letter": "Replace double letters with a single letter" }, "percentage": 17 } } }, { "seed_identities_with_labels": [ { "name": "валентина давыдов (cyrillic)", "dob": "1949-10-28", "address": "Swaziland", "label": "negative", "script": "cyrillic" }, { "name": "Aleksey MOZGOVOY (latin)", "dob": "1975-4-3", "address": "Ukraine", "label": "positive", "script": "latin" }, { "name": "lorenza aliaga (latin)", "dob": "1933-07-02", "address": "Indonesia", "label": "negative", "script": "latin" }, { "name": "Alexander Kurenkov (latin)", "dob": "1972-6-2", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "sergio checa (latin)", "dob": "1978-04-23", "address": "Bahamas", "label": "negative", "script": "latin" }, { "name": "sherry chan (latin)", "dob": "1929-11-08", "address": "South Sudan", "label": "High Risk", "script": "latin" }, { "name": "исай крюкова (cyrillic)", "dob": "2006-10-29", "address": "Donetsk", "label": "High Risk", "script": "cyrillic" }, { "name": "Mariam Barreh (latin)", "dob": "1971-4-10", "address": "United Arab Emirates", "label": "positive", "script": "latin" }, { "name": "Катерина Губарева (cyrillic)", "dob": "1983-7-5", "address": "Ukraine", "label": "positive", "script": "cyrillic" }, { "name": "ángela clemente (latin)", "dob": "1933-04-30", "address": "República de Moldova", "label": "negative", "script": "latin" }, { "name": "Abulghasem Valagohar (latin)", "dob": "1969-8-15", "address": "Iran", "label": "positive", "script": "latin" }, { "name": "florina vilalta (latin)", "dob": "1958-06-21", "address": "Nicaragua", "label": "High Risk", "script": "latin" }, { "name": "nicodemo escrivá (latin)", "dob": "1984-09-14", "address": "Venezuela", "label": "High Risk", "script": "latin" }, { "name": "gabriel vieira (latin)", "dob": "1947-01-09", "address": "Angola", "label": "High Risk", "script": "latin" }, { "name": "clotilde buendía (latin)", "dob": "1989-03-04", "address": "República Unida de Tanzanía", "label": "negative", "script": "latin" } ], "query_template": "Generate exactly 10 execution vectors (name variations) for each target identity {name}, ensuring phonetic similarity (100% Medium) and orthographic similarity (30% Light, 40% Medium, 30% Far). Approximately 38% of the total 10 variations should follow these rule-based transformations: Additionally, generate variations that: Remove a random consonant. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 10, "phonetic_similarity": { "Medium": 1.0 }, "orthographic_similarity": { "Light": 0.3, "Medium": 0.4, "Far": 0.3 }, "rule_based": { "rule_percentage": 38, "selected_rules": [ "remove_random_consonant" ], "rule_descriptions": { "remove_random_consonant": "Remove a random consonant" }, "percentage": 38 } } }, { "seed_identities_with_labels": [ { "name": "eufemia palmer (latin)", "dob": "1980-03-05", "address": "Bolivia", "label": "High Risk", "script": "latin" }, { "name": "zoé hardy (latin)", "dob": "1998-01-28", "address": "Territoire britannique de l'océan Indien", "label": "negative", "script": "latin" }, { "name": "пеко пондьов (cyrillic)", "dob": "1997-12-31", "address": "Bulgaria", "label": "High Risk", "script": "cyrillic" }, { "name": "Ratka Kamceva (latin)", "dob": "1945-10-16", "address": "North Macedonia, The Republic of", "label": "positive", "script": "latin" }, { "name": "Reza Ebadzadeh (latin)", "dob": "1964-6-13", "address": "Iran", "label": "positive", "script": "latin" }, { "name": "daniel ferrand (latin)", "dob": "1959-02-06", "address": "États-Unis", "label": "negative", "script": "latin" }, { "name": "曉明 張 (chinese)", "dob": "1963-9-3", "address": "China", "label": "positive", "script": "chinese" }, { "name": "hugo gonçalves (latin)", "dob": "1988-05-21", "address": "Angola", "label": "High Risk", "script": "latin" }, { "name": "георги овнарски (cyrillic)", "dob": "1995-11-14", "address": "Sri Lanka", "label": "negative", "script": "cyrillic" }, { "name": "rémy fernandez (latin)", "dob": "1943-03-02", "address": "Bermudes (Les)", "label": "negative", "script": "latin" }, { "name": "Andrei Tikhonov (latin)", "dob": "1966-11-9", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "Felix Okpoh (latin)", "dob": "1989-3-9", "address": "Nigeria", "label": "positive", "script": "latin" }, { "name": "michel blanchet (latin)", "dob": "1988-12-26", "address": "Inde", "label": "negative", "script": "latin" }, { "name": "catherine barry (latin)", "dob": "1980-09-05", "address": "British Virgin Islands", "label": "High Risk", "script": "latin" }, { "name": "éric gay (latin)", "dob": "1971-10-04", "address": "Mali", "label": "High Risk", "script": "latin" } ], "query_template": "Generate 11 execution vectors for each target identity {name}, ensuring phonetic similarity (100% Medium) by applying medium-level sound-alike transformations. \n\nAdditionally, generate variations that perform these rule-based transformations: approximately 35% of the total 11 variations should follow these rules across the entire set:\n- Convert name to initials\n- Replace random vowels with different vowels\n- Duplicate a random letter\n\nFor orthographic similarity (approximating visually similar spellings), ensure that the following percentages of the 11 execution vectors are Medium and Far transformations respectively:\n- Orthographic Similarity: 30% (Medium)\n- Orthographic Similarity: 60% (Far)\n\nInclude Light-level (10%) orthographic variations as well for a total of 11 variations per identity. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 11, "phonetic_similarity": { "Medium": 1.0 }, "orthographic_similarity": { "Light": 0.1, "Medium": 0.3, "Far": 0.6 }, "rule_based": { "rule_percentage": 35, "selected_rules": [ "shorten_name_to_initials", "replace_random_vowel_with_random_vowel", "duplicate_random_letter_as_double_letter" ], "rule_descriptions": { "shorten_name_to_initials": "Convert name to initials", "replace_random_vowel_with_random_vowel": "Replace random vowels with different vowels", "duplicate_random_letter_as_double_letter": "Duplicate a random letter" }, "percentage": 35 } } }, { "seed_identities_with_labels": [ { "name": "denise marchal (latin)", "dob": "1925-08-10", "address": "Norfolk (Îles)", "label": "negative", "script": "latin" }, { "name": "Andrej Grigor'ev (latin)", "dob": "1963-1-30", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "Jay PEREZ (latin)", "dob": "1973-9-15", "address": "Philippines", "label": "positive", "script": "latin" }, { "name": "luce richard (latin)", "dob": "2001-02-28", "address": "Burkina Faso", "label": "High Risk", "script": "latin" }, { "name": "alexandrie morin (latin)", "dob": "1929-01-06", "address": "République centrafricaine", "label": "negative", "script": "latin" }, { "name": "catherine guillaume (latin)", "dob": "1949-12-02", "address": "Mali", "label": "High Risk", "script": "latin" }, { "name": "victoire gilbert (latin)", "dob": "1965-10-24", "address": "Ukraine", "label": "negative", "script": "latin" }, { "name": "Zajar KALASHOV (latin)", "dob": "1953-3-20", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "Vitaly Likhachev (latin)", "dob": "1964-2-22", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "benjamin diaz (latin)", "dob": "1973-12-09", "address": "Monaco", "label": "High Risk", "script": "latin" }, { "name": "andrée étienne (latin)", "dob": "1946-02-13", "address": "Namibie", "label": "negative", "script": "latin" }, { "name": "ольга харитонова (cyrillic)", "dob": "1942-07-02", "address": "Crimea", "label": "High Risk", "script": "cyrillic" }, { "name": "tristan bonneau (latin)", "dob": "1974-11-30", "address": "Mali", "label": "High Risk", "script": "latin" }, { "name": "وائل آل رفيع (arabic)", "dob": "1971-05-24", "address": "Bermuda", "label": "negative", "script": "arabic" }, { "name": "Иван Заворотный (cyrillic)", "dob": "1979-10-22", "address": "Russia", "label": "positive", "script": "cyrillic" } ], "query_template": "The following name is the seed name to generate variations for: {name}. Generate 7 variations of the name {name}, ensuring phonetic similarity: 10% Light, 50% Medium, 40% Far, and orthographic similarity: 10% Light, 30% Medium, 60% Far, and also include 36% of variations that follow: Additionally, generate variations that perform these transformations: Duplicate a random letter, Delete a random letter, and Replace random consonants with different consonants. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 7, "phonetic_similarity": { "Light": 0.1, "Medium": 0.5, "Far": 0.4 }, "orthographic_similarity": { "Light": 0.1, "Medium": 0.3, "Far": 0.6 }, "rule_based": { "rule_percentage": 36, "selected_rules": [ "duplicate_random_letter_as_double_letter", "delete_random_letter", "replace_random_consonant_with_random_consonant" ], "rule_descriptions": { "duplicate_random_letter_as_double_letter": "Duplicate a random letter", "delete_random_letter": "Delete a random letter", "replace_random_consonant_with_random_consonant": "Replace random consonants with different consonants" }, "percentage": 36 } } }, { "seed_identities_with_labels": [ { "name": "joseph maillet (latin)", "dob": "1960-10-10", "address": "Democratic Republic of Congo", "label": "High Risk", "script": "latin" }, { "name": "Дмитрий Артюхов (cyrillic)", "dob": "1988-2-7", "address": "Russia", "label": "positive", "script": "cyrillic" }, { "name": "lonnie young (latin)", "dob": "1942-12-08", "address": "South Africa", "label": "High Risk", "script": "latin" }, { "name": "robert bonilla (latin)", "dob": "1927-05-21", "address": "Lao People's Democratic Republic", "label": "negative", "script": "latin" }, { "name": "ronald davenport (latin)", "dob": "1963-10-06", "address": "Bouvet Island (Bouvetoya)", "label": "negative", "script": "latin" }, { "name": "Gibran Bassil (latin)", "dob": "1970-6-21", "address": "Lebanon", "label": "positive", "script": "latin" }, { "name": "gabriela morais (latin)", "dob": "1972-04-12", "address": "Angola", "label": "High Risk", "script": "latin" }, { "name": "Igor Afanasyev (latin)", "dob": "1968-9-7", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "Dmitry Pirog (latin)", "dob": "1980-6-27", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "Mohsen NAFTCHI (latin)", "dob": "1988-2-16", "address": "Iran", "label": "positive", "script": "latin" }, { "name": "kyle cohen (latin)", "dob": "1982-12-22", "address": "Tanzania", "label": "negative", "script": "latin" }, { "name": "святополк воронова (cyrillic)", "dob": "1966-07-03", "address": "Armenia", "label": "High Risk", "script": "cyrillic" }, { "name": "lori wilson (latin)", "dob": "1995-09-17", "address": "Cambodia", "label": "negative", "script": "latin" }, { "name": "راجي الحكير (arabic)", "dob": "1940-06-08", "address": "Tanzania", "label": "negative", "script": "arabic" }, { "name": "alexandra assunção (latin)", "dob": "1978-09-26", "address": "Mozambique", "label": "High Risk", "script": "latin" } ], "query_template": "The following name is the seed name to generate variations for: {name}. Generate 10 variations of the name {name}, ensuring phonetic similarity: 20% Light, 60% Medium, 20% Far, and orthographic similarity: 20% Light, 60% Medium, 20% Far, and also include 33% of variations that follow: Additionally, generate variations that perform these transformations: Add a title prefix (Mr., Dr., etc.), and Remove a random consonant. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 10, "phonetic_similarity": { "Light": 0.2, "Medium": 0.6, "Far": 0.2 }, "orthographic_similarity": { "Light": 0.2, "Medium": 0.6, "Far": 0.2 }, "rule_based": { "rule_percentage": 33, "selected_rules": [ "add_random_leading_title", "remove_random_consonant" ], "rule_descriptions": { "add_random_leading_title": "Add a title prefix (Mr., Dr., etc.)", "remove_random_consonant": "Remove a random consonant" }, "percentage": 33 } } }, { "seed_identities_with_labels": [ { "name": "Aleksey Mordashov (latin)", "dob": "1965-9-26", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "claudine collin (latin)", "dob": "1937-07-06", "address": "Géorgie", "label": "negative", "script": "latin" }, { "name": "christine gaillard (latin)", "dob": "1959-12-17", "address": "Féroé (Îles)", "label": "negative", "script": "latin" }, { "name": "зинови тодоров (cyrillic)", "dob": "2004-04-18", "address": "Martinique", "label": "negative", "script": "cyrillic" }, { "name": "armando galván (latin)", "dob": "1993-04-02", "address": "Nicaragua", "label": "High Risk", "script": "latin" }, { "name": "Александр Волфович (cyrillic)", "dob": "1961-10-26", "address": "Cyprus", "label": "positive", "script": "cyrillic" }, { "name": "Sergey Topor-Gilka (latin)", "dob": "1970-2-17", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "Muhammad Rahmun (latin)", "dob": "1957-4-1", "address": "Syria", "label": "positive", "script": "latin" }, { "name": "victoria alfaro (latin)", "dob": "1988-12-01", "address": "Cuba", "label": "High Risk", "script": "latin" }, { "name": "josette thomas (latin)", "dob": "1930-06-11", "address": "Cameroon", "label": "High Risk", "script": "latin" }, { "name": "پارسا احمدی (arabic)", "dob": "1974-09-17", "address": "Afghanistan", "label": "High Risk", "script": "arabic" }, { "name": "Jay PEREZ (latin)", "dob": "1973-9-15", "address": "Philippines", "label": "positive", "script": "latin" }, { "name": "sylvie toussaint (latin)", "dob": "1968-04-09", "address": "São Tomé et Príncipe (Rép.)", "label": "negative", "script": "latin" }, { "name": "mia branco (latin)", "dob": "1944-01-21", "address": "Mozambique", "label": "High Risk", "script": "latin" }, { "name": "patricia salmon (latin)", "dob": "1983-02-21", "address": "Chine (Rép. pop.)", "label": "negative", "script": "latin" } ], "query_template": "Generate 13 variations of {name} ensuring phonetic similarity (10% Light, 30% Medium, 60% Far) and orthographic similarity (100% Medium). Approximately 10% of the total 13 variations should follow these rule-based transformations: Add a title prefix (Mr., Dr., etc.) The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 13, "phonetic_similarity": { "Light": 0.1, "Medium": 0.3, "Far": 0.6 }, "orthographic_similarity": { "Medium": 1.0 }, "rule_based": { "rule_percentage": 10, "selected_rules": [ "add_random_leading_title" ], "rule_descriptions": { "add_random_leading_title": "Add a title prefix (Mr., Dr., etc.)" }, "percentage": 10 } } }, { "seed_identities_with_labels": [ { "name": "anaïs grenier (latin)", "dob": "1969-01-20", "address": "Hong Kong", "label": "negative", "script": "latin" }, { "name": "Михаил Шеремет (cyrillic)", "dob": "1971-5-23", "address": "Russia", "label": "positive", "script": "cyrillic" }, { "name": "Artem Malyshev (latin)", "dob": "1988-2-2", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "aimée pons (latin)", "dob": "1924-12-21", "address": "Mali", "label": "High Risk", "script": "latin" }, { "name": "زهدي البرغوثي (arabic)", "dob": "1949-09-01", "address": "Croatia", "label": "negative", "script": "arabic" }, { "name": "jeannine fischer (latin)", "dob": "1961-03-17", "address": "Trinité et Tobago", "label": "negative", "script": "latin" }, { "name": "jeanne picard (latin)", "dob": "1990-11-22", "address": "Democratic Republic of Congo", "label": "High Risk", "script": "latin" }, { "name": "Abu Aisyah (latin)", "dob": "1983-9-6", "address": "Syria", "label": "positive", "script": "latin" }, { "name": "aimé duval (latin)", "dob": "1935-03-28", "address": "Saint-Kitts et Nevis", "label": "negative", "script": "latin" }, { "name": "agathe baron (latin)", "dob": "1961-10-21", "address": "Guadeloupe", "label": "negative", "script": "latin" }, { "name": "Tatyana Dyakonova (latin)", "dob": "1970-4-22", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "alexandrie girard (latin)", "dob": "1925-09-27", "address": "Haiti", "label": "High Risk", "script": "latin" }, { "name": "lara padilla (latin)", "dob": "1973-04-15", "address": "Venezuela", "label": "High Risk", "script": "latin" }, { "name": "Ko Oo (latin)", "dob": "1972-12-2", "address": "Burma", "label": "positive", "script": "latin" }, { "name": "طامح سموم (arabic)", "dob": "1967-10-06", "address": "Syria", "label": "High Risk", "script": "arabic" } ], "query_template": "The following name is the seed name to generate variations for: {name}. Generate 11 variations of the name {name}, ensuring phonetic similarity: 50% Light, 50% Medium, and orthographic similarity: 30% Light, 40% Medium, 30% Far, and also include 47% of variations that follow: Additionally, generate variations that perform these transformations: Replace random consonants with different consonants, Duplicate a random letter, and Swap random adjacent letters. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 11, "phonetic_similarity": { "Light": 0.5, "Medium": 0.5 }, "orthographic_similarity": { "Light": 0.3, "Medium": 0.4, "Far": 0.3 }, "rule_based": { "rule_percentage": 47, "selected_rules": [ "replace_random_consonant_with_random_consonant", "duplicate_random_letter_as_double_letter", "swap_random_letter" ], "rule_descriptions": { "replace_random_consonant_with_random_consonant": "Replace random consonants with different consonants", "duplicate_random_letter_as_double_letter": "Duplicate a random letter", "swap_random_letter": "Swap random adjacent letters" }, "percentage": 47 } } }, { "seed_identities_with_labels": [ { "name": "Artem Malyshev (latin)", "dob": "1988-2-2", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "isidora jurado (latin)", "dob": "1936-09-14", "address": "Venezuela", "label": "High Risk", "script": "latin" }, { "name": "pepita romeu (latin)", "dob": "1983-08-12", "address": "Mauritania", "label": "negative", "script": "latin" }, { "name": "William ZHOU (latin)", "dob": "1977-3-25", "address": "China", "label": "positive", "script": "latin" }, { "name": "ricardo mendizábal (latin)", "dob": "1957-10-06", "address": "Rumania", "label": "negative", "script": "latin" }, { "name": "Mehdi Lashgarian (latin)", "dob": "1989-6-2", "address": "Iran", "label": "positive", "script": "latin" }, { "name": "силаги кокошкова (cyrillic)", "dob": "1974-08-28", "address": "Bulgaria", "label": "High Risk", "script": "cyrillic" }, { "name": "نبيل الشاويش (arabic)", "dob": "1953-04-08", "address": "Anguilla", "label": "negative", "script": "arabic" }, { "name": "tere ariza (latin)", "dob": "1942-11-02", "address": "Bolivia", "label": "High Risk", "script": "latin" }, { "name": "ani godoy (latin)", "dob": "2002-10-15", "address": "Sierra Leona", "label": "negative", "script": "latin" }, { "name": "joann davis (latin)", "dob": "2001-06-24", "address": "South Sudan", "label": "High Risk", "script": "latin" }, { "name": "Mayya Bolotova (latin)", "dob": "1975-1-18", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "Ольга Забралова (cyrillic)", "dob": "1980-3-30", "address": "Russia", "label": "positive", "script": "cyrillic" }, { "name": "adelardo herrero (latin)", "dob": "1955-04-08", "address": "Noruega", "label": "negative", "script": "latin" }, { "name": "victor vilalta (latin)", "dob": "1941-04-30", "address": "Venezuela", "label": "High Risk", "script": "latin" } ], "query_template": "Generate 14 variations of {name} ensuring phonetic similarity (20% Light, 60% Medium, 20% Far) and orthographic similarity (50% Light, 50% Medium). Approximately 23% of the total 14 variations should follow these rule-based transformations: \n\n* Use first name initial with last name as \"FN{first letter of first name}L{last name}\"\n* Replace spaces with special characters by replacing all spaces in {name} with a mix of underscores, hyphens, and asterisks.\n\nThe remaining 77% of the variations should be generated randomly from these two similarity types. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 14, "phonetic_similarity": { "Light": 0.2, "Medium": 0.6, "Far": 0.2 }, "orthographic_similarity": { "Light": 0.5, "Medium": 0.5 }, "rule_based": { "rule_percentage": 23, "selected_rules": [ "initial_only_first_name", "replace_spaces_with_random_special_characters" ], "rule_descriptions": { "initial_only_first_name": "Use first name initial with last name", "replace_spaces_with_random_special_characters": "Replace spaces with special characters" }, "percentage": 23 } } }, { "seed_identities_with_labels": [ { "name": "nydia sebastián (latin)", "dob": "2001-07-15", "address": "Nicaragua", "label": "High Risk", "script": "latin" }, { "name": "james fernandez (latin)", "dob": "1936-01-05", "address": "Trinidad and Tobago", "label": "negative", "script": "latin" }, { "name": "ronald miller (latin)", "dob": "2005-08-10", "address": "Marshall Islands", "label": "negative", "script": "latin" }, { "name": "rachel clark (latin)", "dob": "1931-05-20", "address": "Nigeria", "label": "High Risk", "script": "latin" }, { "name": "Andrei Troshev (latin)", "dob": "1953-4-5", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "Javier RIVERA (latin)", "dob": "1972-4-20", "address": "Honduras", "label": "positive", "script": "latin" }, { "name": "Dinar Gilmutdinov (latin)", "dob": "1969-8-10", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "gregory berger (latin)", "dob": "1972-11-18", "address": "South Sudan", "label": "High Risk", "script": "latin" }, { "name": "تاج بارق (arabic)", "dob": "1976-06-19", "address": "Algeria", "label": "High Risk", "script": "arabic" }, { "name": "feliciano vega (latin)", "dob": "2006-03-24", "address": "Bolivia", "label": "High Risk", "script": "latin" }, { "name": "Миша ВАЦИЋ (cyrillic)", "dob": "1985-6-5", "address": "Serbia", "label": "positive", "script": "cyrillic" }, { "name": "Seyed Ghoreishi (latin)", "dob": "1964-9-27", "address": "Iran", "label": "positive", "script": "latin" }, { "name": "sandra ramirez (latin)", "dob": "1978-03-24", "address": "Maldives", "label": "negative", "script": "latin" }, { "name": "борислав черінько (cyrillic)", "dob": "1925-04-04", "address": "Romania", "label": "negative", "script": "cyrillic" }, { "name": "louis moore (latin)", "dob": "1988-04-27", "address": "Saint Vincent and the Grenadines", "label": "negative", "script": "latin" } ], "query_template": "The following name is the seed name to generate variations for: {name}. Generate 10 variations of the name {name}, ensuring phonetic similarity: 20% Light, 60% Medium, 20% Far, and orthographic similarity: 10% Light, 30% Medium, 60% Far, and also include 25% of variations that follow: Additionally, generate variations that perform these transformations: Swap adjacent syllables, and Add a title suffix (Jr., PhD, etc.). The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 10, "phonetic_similarity": { "Light": 0.2, "Medium": 0.6, "Far": 0.2 }, "orthographic_similarity": { "Light": 0.1, "Medium": 0.3, "Far": 0.6 }, "rule_based": { "rule_percentage": 25, "selected_rules": [ "swap_adjacent_syllables", "add_random_trailing_title" ], "rule_descriptions": { "swap_adjacent_syllables": "Swap adjacent syllables", "add_random_trailing_title": "Add a title suffix (Jr., PhD, etc.)" }, "percentage": 25 } } }, { "seed_identities_with_labels": [ { "name": "Kamlesh Pattni (latin)", "dob": "1965-3-12", "address": "United Arab Emirates", "label": "positive", "script": "latin" }, { "name": "josette roger (latin)", "dob": "1955-10-26", "address": "Polynésie française", "label": "negative", "script": "latin" }, { "name": "william mace (latin)", "dob": "1930-07-18", "address": "Mali", "label": "High Risk", "script": "latin" }, { "name": "timothée morel (latin)", "dob": "1955-05-07", "address": "Falkland (Île)", "label": "negative", "script": "latin" }, { "name": "امیرمهدی شادروان (arabic)", "dob": "1926-06-24", "address": "Panama", "label": "negative", "script": "arabic" }, { "name": "Ch'un-hwan RI (latin)", "dob": "1957-8-21", "address": "China", "label": "positive", "script": "latin" }, { "name": "denis clerc (latin)", "dob": "1941-09-21", "address": "Democratic Republic of Congo", "label": "High Risk", "script": "latin" }, { "name": "иосиф иванов (cyrillic)", "dob": "1963-08-13", "address": "Crimea", "label": "High Risk", "script": "cyrillic" }, { "name": "louis legendre (latin)", "dob": "1969-02-10", "address": "Central African Republic", "label": "High Risk", "script": "latin" }, { "name": "margaret petit (latin)", "dob": "2007-06-19", "address": "Suède", "label": "negative", "script": "latin" }, { "name": "Aleksandr Zhivlyuk (latin)", "dob": "1981-1-13", "address": "Belarus", "label": "positive", "script": "latin" }, { "name": "Reza Ebadzadeh (latin)", "dob": "1964-6-13", "address": "Iran", "label": "positive", "script": "latin" }, { "name": "jeannine gosselin (latin)", "dob": "2006-10-28", "address": "Bulgarie", "label": "negative", "script": "latin" }, { "name": "grégoire boucher (latin)", "dob": "1996-05-05", "address": "Democratic Republic of Congo", "label": "High Risk", "script": "latin" }, { "name": "Клим Комаров (cyrillic)", "dob": "1996-6-21", "address": "Russia", "label": "positive", "script": "cyrillic" } ], "query_template": "The following name is the seed name to generate variations for: {name}. Generate 14 variations of the name {name}, ensuring phonetic similarity: 30% Light, 40% Medium, 30% Far, and orthographic similarity: 50% Light, 50% Medium, and also include 26% of variations that follow: Additionally, generate variations that perform these transformations: Swap random adjacent letters, Abbreviate name parts, and Add a title prefix (Mr., Dr., etc.). The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 14, "phonetic_similarity": { "Light": 0.3, "Medium": 0.4, "Far": 0.3 }, "orthographic_similarity": { "Light": 0.5, "Medium": 0.5 }, "rule_based": { "rule_percentage": 26, "selected_rules": [ "swap_random_letter", "shorten_name_to_abbreviations", "add_random_leading_title" ], "rule_descriptions": { "swap_random_letter": "Swap random adjacent letters", "shorten_name_to_abbreviations": "Abbreviate name parts", "add_random_leading_title": "Add a title prefix (Mr., Dr., etc.)" }, "percentage": 26 } } }, { "seed_identities_with_labels": [ { "name": "maría dolores domínguez (latin)", "dob": "1939-07-11", "address": "Nicaragua", "label": "High Risk", "script": "latin" }, { "name": "сократ шарова (cyrillic)", "dob": "1977-12-27", "address": "Belarus", "label": "High Risk", "script": "cyrillic" }, { "name": "Fadlallah Taher (latin)", "dob": "1963-11-10", "address": "Guinea", "label": "positive", "script": "latin" }, { "name": "omar gonzález (latin)", "dob": "1960-01-13", "address": "Bahamas", "label": "negative", "script": "latin" }, { "name": "florencio sastre (latin)", "dob": "1939-01-27", "address": "Rwanda", "label": "negative", "script": "latin" }, { "name": "Александр Ветеневич (cyrillic)", "dob": "1976-6-20", "address": "Belarus", "label": "positive", "script": "cyrillic" }, { "name": "plácido dalmau (latin)", "dob": "1996-01-28", "address": "Montenegro", "label": "negative", "script": "latin" }, { "name": "román calderón (latin)", "dob": "1950-11-15", "address": "Cuba", "label": "High Risk", "script": "latin" }, { "name": "Andrei Shved (latin)", "dob": "1973-4-21", "address": "Belarus", "label": "positive", "script": "latin" }, { "name": "Evgeni Chernet (latin)", "dob": "1946-11-18", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "jean marchal (latin)", "dob": "1936-06-11", "address": "Central African Republic", "label": "High Risk", "script": "latin" }, { "name": "marco santos (latin)", "dob": "1962-08-18", "address": "Angola", "label": "High Risk", "script": "latin" }, { "name": "Jamal Alshutti (latin)", "dob": "1964-8-20", "address": "Kuwait", "label": "positive", "script": "latin" }, { "name": "irene garcía (latin)", "dob": "1999-03-08", "address": "Qatar", "label": "negative", "script": "latin" }, { "name": "филип луланков (cyrillic)", "dob": "1944-06-06", "address": "Reunion", "label": "negative", "script": "cyrillic" } ], "query_template": "The following name is the seed name to generate variations for: {name}. Generate 10 variations of the name {name}, ensuring phonetic similarity: 10% Light, 30% Medium, 60% Far, and orthographic similarity: 50% Light, 50% Medium, and also include 47% of variations that follow: Additionally, generate variations that perform these transformations: Replace random consonants with different consonants, and Add a title suffix (Jr., PhD, etc.). The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 10, "phonetic_similarity": { "Light": 0.1, "Medium": 0.3, "Far": 0.6 }, "orthographic_similarity": { "Light": 0.5, "Medium": 0.5 }, "rule_based": { "rule_percentage": 47, "selected_rules": [ "replace_random_consonant_with_random_consonant", "add_random_trailing_title" ], "rule_descriptions": { "replace_random_consonant_with_random_consonant": "Replace random consonants with different consonants", "add_random_trailing_title": "Add a title suffix (Jr., PhD, etc.)" }, "percentage": 47 } } }, { "seed_identities_with_labels": [ { "name": "claire lambert (latin)", "dob": "1952-04-28", "address": "Central African Republic", "label": "High Risk", "script": "latin" }, { "name": "jennifer moore (latin)", "dob": "1976-08-21", "address": "Cote d'Ivoire", "label": "negative", "script": "latin" }, { "name": "mark hall (latin)", "dob": "1929-04-30", "address": "South Africa", "label": "High Risk", "script": "latin" }, { "name": "Iryna Cherkasova (latin)", "dob": "1963-5-30", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "сократ калашников (cyrillic)", "dob": "1963-06-09", "address": "New Caledonia", "label": "negative", "script": "cyrillic" }, { "name": "Yuriy Shevchenko (latin)", "dob": "1966-12-30", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "tina johnson (latin)", "dob": "1955-07-16", "address": "United Arab Emirates", "label": "negative", "script": "latin" }, { "name": "ashley stewart (latin)", "dob": "1927-11-24", "address": "Seychelles", "label": "negative", "script": "latin" }, { "name": "'Abdul-Wahab AL-HUMAYQANI (latin)", "dob": "1972-8-4", "address": "Yemen", "label": "positive", "script": "latin" }, { "name": "Steven Liu (latin)", "dob": "1984-11-13", "address": "China", "label": "positive", "script": "latin" }, { "name": "август иванова (cyrillic)", "dob": "1929-01-24", "address": "Belarus", "label": "High Risk", "script": "cyrillic" }, { "name": "ernest miller (latin)", "dob": "1949-11-13", "address": "Namibia", "label": "High Risk", "script": "latin" }, { "name": "joel gomes (latin)", "dob": "1986-03-20", "address": "Mozambique", "label": "High Risk", "script": "latin" }, { "name": "amber williams (latin)", "dob": "1989-03-05", "address": "Tajikistan", "label": "negative", "script": "latin" }, { "name": "Дмитро Джинiкашвiлi (cyrillic)", "dob": "1987-7-17", "address": "Russia", "label": "positive", "script": "cyrillic" } ], "query_template": "The following name is the seed name to generate variations for: {name}. Generate 6 variations of the name {name}, ensuring phonetic similarity: 100% Medium, and orthographic similarity: 10% Light, 30% Medium, 60% Far, and also include 55% of variations that follow: Additionally, generate variations that: Abbreviate name parts. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 6, "phonetic_similarity": { "Medium": 1.0 }, "orthographic_similarity": { "Light": 0.1, "Medium": 0.3, "Far": 0.6 }, "rule_based": { "rule_percentage": 55, "selected_rules": [ "shorten_name_to_abbreviations" ], "rule_descriptions": { "shorten_name_to_abbreviations": "Abbreviate name parts" }, "percentage": 55 } } }, { "seed_identities_with_labels": [ { "name": "dorotea guijarro (latin)", "dob": "1951-08-23", "address": "Nicaragua", "label": "High Risk", "script": "latin" }, { "name": "moreno esteve (latin)", "dob": "1971-02-25", "address": "Cuba", "label": "High Risk", "script": "latin" }, { "name": "teófila oliver (latin)", "dob": "1940-12-06", "address": "Benin", "label": "negative", "script": "latin" }, { "name": "Дзмітрьій Замулевіч (cyrillic)", "dob": "1974-5-7", "address": "Belarus", "label": "positive", "script": "cyrillic" }, { "name": "Eduard Benderski (latin)", "dob": "1970-6-25", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "flor mate (latin)", "dob": "1964-12-17", "address": "Alemania", "label": "negative", "script": "latin" }, { "name": "Rao Anwar (latin)", "dob": "1959-1-1", "address": "Pakistan", "label": "positive", "script": "latin" }, { "name": "aaron norton (latin)", "dob": "1973-05-19", "address": "South Africa", "label": "High Risk", "script": "latin" }, { "name": "thérèse blin (latin)", "dob": "1977-02-03", "address": "Central African Republic", "label": "High Risk", "script": "latin" }, { "name": "Gennady Plaksin (latin)", "dob": "1961-8-31", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "Touraj Zangeneh (latin)", "dob": "1958-8-5", "address": "Iran", "label": "positive", "script": "latin" }, { "name": "donato ayala (latin)", "dob": "1970-09-12", "address": "San Marino", "label": "negative", "script": "latin" }, { "name": "тимофей горшков (cyrillic)", "dob": "1987-06-10", "address": "Eritrea", "label": "negative", "script": "cyrillic" }, { "name": "نبيه مطير (arabic)", "dob": "1970-11-25", "address": "Algeria", "label": "High Risk", "script": "arabic" }, { "name": "ricarda mateo (latin)", "dob": "1974-12-06", "address": "Federación de Rusia", "label": "negative", "script": "latin" } ], "query_template": "The following name is the seed name to generate variations for: {name}. Generate 10 variations of the name {name}, ensuring phonetic similarity: 50% Light, 50% Medium, and orthographic similarity: 20% Light, 60% Medium, 20% Far, and also include 31% of variations that follow: Additionally, generate variations that: Remove all spaces. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 10, "phonetic_similarity": { "Light": 0.5, "Medium": 0.5 }, "orthographic_similarity": { "Light": 0.2, "Medium": 0.6, "Far": 0.2 }, "rule_based": { "rule_percentage": 31, "selected_rules": [ "remove_all_spaces" ], "rule_descriptions": { "remove_all_spaces": "Remove all spaces" }, "percentage": 31 } } }, { "seed_identities_with_labels": [ { "name": "joel waters (latin)", "dob": "1994-03-25", "address": "Bouvet Island (Bouvetoya)", "label": "negative", "script": "latin" }, { "name": "Геннадий Емельянов (cyrillic)", "dob": "1957-1-1", "address": "Russia", "label": "positive", "script": "cyrillic" }, { "name": "james smith (latin)", "dob": "1992-01-05", "address": "British Virgin Islands", "label": "High Risk", "script": "latin" }, { "name": "Svetlana Emilianova (latin)", "dob": "1971-10-7", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "noa magalhães (latin)", "dob": "1962-10-28", "address": "Mozambique", "label": "High Risk", "script": "latin" }, { "name": "sean hernandez (latin)", "dob": "1974-06-06", "address": "Mongolia", "label": "negative", "script": "latin" }, { "name": "phillip lyons (latin)", "dob": "1970-05-19", "address": "Cyprus", "label": "negative", "script": "latin" }, { "name": "robert hurley (latin)", "dob": "1939-10-24", "address": "Saint Barthelemy", "label": "negative", "script": "latin" }, { "name": "رئيس جرار (arabic)", "dob": "1977-10-03", "address": "Lithuania", "label": "negative", "script": "arabic" }, { "name": "valentina maia (latin)", "dob": "1988-09-15", "address": "Angola", "label": "High Risk", "script": "latin" }, { "name": "telmo somoza (latin)", "dob": "1944-02-25", "address": "Nicaragua", "label": "High Risk", "script": "latin" }, { "name": "قحطان مطير (arabic)", "dob": "1955-11-09", "address": "Sudan", "label": "High Risk", "script": "arabic" }, { "name": "Rajabi Al-Zahir (latin)", "dob": "1969-7-5", "address": "United Arab Emirates", "label": "positive", "script": "latin" }, { "name": "Ramon Carrizales (latin)", "dob": "1952-11-8", "address": "Venezuela", "label": "positive", "script": "latin" }, { "name": "Irina Bubnova (latin)", "dob": "1983-4-1", "address": "Russia", "label": "positive", "script": "latin" } ], "query_template": "Generate exactly 11 execution vectors (name variations) for each target identity \"{name}\". Ensure phonetic similarity with 100% Medium and orthographic similarity with 70% Light and 30% Medium. Approximately 20% of the total 11 variations should follow these rule-based transformations: Additionally, generate variations that: Replace random consonants with different consonants in {name}. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 11, "phonetic_similarity": { "Medium": 1.0 }, "orthographic_similarity": { "Light": 0.7, "Medium": 0.3 }, "rule_based": { "rule_percentage": 20, "selected_rules": [ "replace_random_consonant_with_random_consonant" ], "rule_descriptions": { "replace_random_consonant_with_random_consonant": "Replace random consonants with different consonants" }, "percentage": 20 } } }, { "seed_identities_with_labels": [ { "name": "Irina Bubnova (latin)", "dob": "1983-4-1", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "رجا سلامة (arabic)", "dob": "1960-8-15", "address": "Lebanon", "label": "positive", "script": "arabic" }, { "name": "Pierre Obeid (latin)", "dob": "1959-12-14", "address": "Lebanon", "label": "positive", "script": "latin" }, { "name": "élodie lesage (latin)", "dob": "1976-05-01", "address": "Côte d'Ivoire", "label": "High Risk", "script": "latin" }, { "name": "maria andrade (latin)", "dob": "1987-01-08", "address": "Suriname", "label": "negative", "script": "latin" }, { "name": "Iakov Antonov (latin)", "dob": "1972-11-7", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "lucie berger (latin)", "dob": "1983-03-20", "address": "Burkina Faso", "label": "High Risk", "script": "latin" }, { "name": "michelle pierce (latin)", "dob": "1987-07-21", "address": "British Virgin Islands", "label": "High Risk", "script": "latin" }, { "name": "christopher daniels (latin)", "dob": "1946-05-28", "address": "Syrian Arab Republic", "label": "negative", "script": "latin" }, { "name": "elizabeth oconnor (latin)", "dob": "2003-10-29", "address": "Mexico", "label": "negative", "script": "latin" }, { "name": "Elena Milskaya (latin)", "dob": "1980-12-20", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "парфен беспалова (cyrillic)", "dob": "1944-11-12", "address": "Belarus", "label": "High Risk", "script": "cyrillic" }, { "name": "felix garcía (latin)", "dob": "1993-03-06", "address": "Nicaragua", "label": "High Risk", "script": "latin" }, { "name": "عائد الوعري (arabic)", "dob": "1948-06-03", "address": "Sierra Leone", "label": "negative", "script": "arabic" }, { "name": "mark goodman (latin)", "dob": "1946-12-07", "address": "Lao People's Democratic Republic", "label": "negative", "script": "latin" } ], "query_template": "The following name is the seed name to generate variations for: {name}. Generate 6 variations of the name {name}, ensuring phonetic similarity: 20% Light, 60% Medium, 20% Far, and orthographic similarity: 10% Light, 30% Medium, 60% Far, and also include 37% of variations that follow: Additionally, generate variations that perform these transformations: Add a title suffix (Jr., PhD, etc.), and Swap adjacent syllables. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 6, "phonetic_similarity": { "Light": 0.2, "Medium": 0.6, "Far": 0.2 }, "orthographic_similarity": { "Light": 0.1, "Medium": 0.3, "Far": 0.6 }, "rule_based": { "rule_percentage": 37, "selected_rules": [ "add_random_trailing_title", "swap_adjacent_syllables" ], "rule_descriptions": { "add_random_trailing_title": "Add a title suffix (Jr., PhD, etc.)", "swap_adjacent_syllables": "Swap adjacent syllables" }, "percentage": 37 } } }, { "seed_identities_with_labels": [ { "name": "joel middleton (latin)", "dob": "1987-11-16", "address": "Seychelles", "label": "negative", "script": "latin" }, { "name": "زهدي مرمش (arabic)", "dob": "1933-12-26", "address": "Antigua and Barbuda", "label": "negative", "script": "arabic" }, { "name": "jose hart (latin)", "dob": "1953-03-20", "address": "French Southern Territories", "label": "negative", "script": "latin" }, { "name": "Mohammad Alenezi (latin)", "dob": "1986-5-26", "address": "Kuwait", "label": "positive", "script": "latin" }, { "name": "peter malone (latin)", "dob": "1930-08-26", "address": "Kenya", "label": "High Risk", "script": "latin" }, { "name": "ربى تغلب بن وائل (arabic)", "dob": "1953-01-30", "address": "Lebanon", "label": "High Risk", "script": "arabic" }, { "name": "Elvis KELJMENDI (latin)", "dob": "1978-5-3", "address": "Kosovo", "label": "positive", "script": "latin" }, { "name": "paul lane (latin)", "dob": "1961-10-23", "address": "Ecuador", "label": "negative", "script": "latin" }, { "name": "Maria Faassen (latin)", "dob": "1985-4-28", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "Dmitry Pleshevskiy (latin)", "dob": "1992-7-30", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "Бату Хасиков (cyrillic)", "dob": "1980-6-28", "address": "Russia", "label": "positive", "script": "cyrillic" }, { "name": "timothy barnes (latin)", "dob": "1931-04-09", "address": "South Africa", "label": "High Risk", "script": "latin" }, { "name": "kathryn wright (latin)", "dob": "1974-12-01", "address": "Kenya", "label": "High Risk", "script": "latin" }, { "name": "michel rocher (latin)", "dob": "1964-11-12", "address": "Côte d'Ivoire", "label": "High Risk", "script": "latin" }, { "name": "lori davenport (latin)", "dob": "1935-04-08", "address": "Micronesia", "label": "negative", "script": "latin" } ], "query_template": "The following name is the seed name to generate variations for: {name}. Generate 13 variations of the name {name}, ensuring phonetic similarity: 10% Light, 50% Medium, 40% Far, and orthographic similarity: 50% Light, 50% Medium, and also include 24% of variations that follow: Additionally, generate variations that perform these transformations: Convert name to initials, Abbreviate name parts, and Insert a random letter. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 13, "phonetic_similarity": { "Light": 0.1, "Medium": 0.5, "Far": 0.4 }, "orthographic_similarity": { "Light": 0.5, "Medium": 0.5 }, "rule_based": { "rule_percentage": 24, "selected_rules": [ "shorten_name_to_initials", "shorten_name_to_abbreviations", "insert_random_letter" ], "rule_descriptions": { "shorten_name_to_initials": "Convert name to initials", "shorten_name_to_abbreviations": "Abbreviate name parts", "insert_random_letter": "Insert a random letter" }, "percentage": 24 } } }, { "seed_identities_with_labels": [ { "name": "lawrence livingston (latin)", "dob": "1971-01-23", "address": "Guernsey", "label": "negative", "script": "latin" }, { "name": "jerry guzman (latin)", "dob": "1967-02-03", "address": "France", "label": "negative", "script": "latin" }, { "name": "ممدوح الحداء (arabic)", "dob": "2000-11-17", "address": "Somalia", "label": "High Risk", "script": "arabic" }, { "name": "victor kerr (latin)", "dob": "1961-08-23", "address": "Nigeria", "label": "High Risk", "script": "latin" }, { "name": "Дмитрий Киселев (cyrillic)", "dob": "1954-4-26", "address": "Russia", "label": "positive", "script": "cyrillic" }, { "name": "kevin preston (latin)", "dob": "1981-05-30", "address": "Wallis and Futuna", "label": "negative", "script": "latin" }, { "name": "camille hervé (latin)", "dob": "1951-09-17", "address": "Monaco", "label": "High Risk", "script": "latin" }, { "name": "nicole david (latin)", "dob": "1940-05-23", "address": "South Africa", "label": "High Risk", "script": "latin" }, { "name": "michael shannon (latin)", "dob": "1944-10-23", "address": "South Sudan", "label": "High Risk", "script": "latin" }, { "name": "делян иликьов (cyrillic)", "dob": "1991-12-30", "address": "Cote d'Ivoire", "label": "negative", "script": "cyrillic" }, { "name": "Natalia Beglova (latin)", "dob": "1955-11-12", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "scott allen (latin)", "dob": "1970-05-31", "address": "Syrian Arab Republic", "label": "negative", "script": "latin" }, { "name": "Tetyana Tumilina (latin)", "dob": "1966-4-18", "address": "Ukraine", "label": "positive", "script": "latin" }, { "name": "Marcelin Makolo (latin)", "dob": "1985-11-30", "address": "Congo, Democratic Republic of the", "label": "positive", "script": "latin" }, { "name": "'Ali al-Shufa (latin)", "dob": "1991-7-25", "address": "Iran", "label": "positive", "script": "latin" } ], "query_template": "Generate 8 variations of {name}, ensuring phonetic similarity (100% Medium) and orthographic similarity (70% Light, 30% Medium). Approximately 56% of the total 8 variations should follow these rule-based transformations: \nAdditionally, generate variations that: Convert name to initials, Add suffixes (- Jr., - Sr., -I), Replace characters with similar sounding ones (0, O, Q), Remove or rearrange characters in the middle or at the end, Replace vowels with consonants and vice versa, Change word order or word separation. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 8, "phonetic_similarity": { "Medium": 1.0 }, "orthographic_similarity": { "Light": 0.7, "Medium": 0.3 }, "rule_based": { "rule_percentage": 56, "selected_rules": [ "shorten_name_to_initials" ], "rule_descriptions": { "shorten_name_to_initials": "Convert name to initials" }, "percentage": 56 } } }, { "seed_identities_with_labels": [ { "name": "Sholban Kara-ool (latin)", "dob": "1966-7-18", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "cody jackson (latin)", "dob": "1997-09-20", "address": "South Africa", "label": "High Risk", "script": "latin" }, { "name": "хома парасюк (cyrillic)", "dob": "1947-08-23", "address": "Cayman Islands", "label": "negative", "script": "cyrillic" }, { "name": "Aleksandr Sarkisyan (latin)", "dob": "1946-8-17", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "حسين عطية (arabic)", "dob": "1965-12-19", "address": "Lebanon", "label": "positive", "script": "arabic" }, { "name": "роман соболева (cyrillic)", "dob": "1946-11-10", "address": "Luhansk", "label": "High Risk", "script": "cyrillic" }, { "name": "'Ali Sharara (latin)", "dob": "1968-9-25", "address": "Lebanon", "label": "positive", "script": "latin" }, { "name": "ashley rowland (latin)", "dob": "2003-08-22", "address": "United States Minor Outlying Islands", "label": "negative", "script": "latin" }, { "name": "joshua cox (latin)", "dob": "1971-08-28", "address": "Greenland", "label": "negative", "script": "latin" }, { "name": "mary george (latin)", "dob": "1949-11-01", "address": "Malaysia", "label": "negative", "script": "latin" }, { "name": "cindy reynolds (latin)", "dob": "1956-06-22", "address": "Argentina", "label": "negative", "script": "latin" }, { "name": "lorraine vidal (latin)", "dob": "1937-03-15", "address": "Burkina Faso", "label": "High Risk", "script": "latin" }, { "name": "Abdul RAHIM (latin)", "dob": "1961-12-8", "address": "Saudi Arabia", "label": "positive", "script": "latin" }, { "name": "lucas pascal (latin)", "dob": "1947-05-12", "address": "Mali", "label": "High Risk", "script": "latin" }, { "name": "philippe leclerc (latin)", "dob": "1963-10-28", "address": "Côte d'Ivoire", "label": "High Risk", "script": "latin" } ], "query_template": "Generate exactly 7 execution vectors for each target identity {name}. Ensure phonetic similarity (sound-alike names) is represented by 20% Light, 60% Medium, and 20% Far. Also ensure orthographic similarity (visually similar spellings) is represented by 50% Light and 50% Medium variations. Approximately 15% of the total 7 variations should follow these rule-based transformations: Additionally, generate variations that: Delete a random letter from {name}. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 7, "phonetic_similarity": { "Light": 0.2, "Medium": 0.6, "Far": 0.2 }, "orthographic_similarity": { "Light": 0.5, "Medium": 0.5 }, "rule_based": { "rule_percentage": 15, "selected_rules": [ "delete_random_letter" ], "rule_descriptions": { "delete_random_letter": "Delete a random letter" }, "percentage": 15 } } }, { "seed_identities_with_labels": [ { "name": "Дмитрий Кузнецов (cyrillic)", "dob": "1975-3-5", "address": "Russia", "label": "positive", "script": "cyrillic" }, { "name": "Nikolai Levine (latin)", "dob": "1985-5-21", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "megan mathis (latin)", "dob": "1967-01-06", "address": "Panama", "label": "negative", "script": "latin" }, { "name": "michelle davis (latin)", "dob": "1981-01-26", "address": "Nigeria", "label": "High Risk", "script": "latin" }, { "name": "Andrey Parshin (latin)", "dob": "1974-2-19", "address": "Belarus", "label": "positive", "script": "latin" }, { "name": "Hosseyn Akbari (latin)", "dob": "1961-6-17", "address": "Iran", "label": "positive", "script": "latin" }, { "name": "ірена яремків (cyrillic)", "dob": "2003-02-02", "address": "American Samoa", "label": "negative", "script": "cyrillic" }, { "name": "andrea guerrero (latin)", "dob": "1938-08-16", "address": "San Marino", "label": "negative", "script": "latin" }, { "name": "caroline techer (latin)", "dob": "1959-08-29", "address": "Democratic Republic of Congo", "label": "High Risk", "script": "latin" }, { "name": "matthew jones (latin)", "dob": "1975-10-02", "address": "Nigeria", "label": "High Risk", "script": "latin" }, { "name": "поликарп красильников (cyrillic)", "dob": "1955-06-09", "address": "Crimea", "label": "High Risk", "script": "cyrillic" }, { "name": "Mohammad Alenezi (latin)", "dob": "1986-5-26", "address": "Kuwait", "label": "positive", "script": "latin" }, { "name": "brittany kelley (latin)", "dob": "1929-01-26", "address": "Nigeria", "label": "High Risk", "script": "latin" }, { "name": "deborah gomez (latin)", "dob": "1958-10-20", "address": "Wallis and Futuna", "label": "negative", "script": "latin" }, { "name": "david goodman (latin)", "dob": "1962-08-24", "address": "Turkey", "label": "negative", "script": "latin" } ], "query_template": "The following name is the seed name to generate variations for: {name}. Generate 12 variations of the name {name}, ensuring phonetic similarity: 20% Light, 60% Medium, 20% Far, and orthographic similarity: 20% Light, 60% Medium, 20% Far, and also include 38% of variations that follow: Additionally, generate variations that perform these transformations: Use first name initial with last name, Convert name to initials, and Replace random consonants with different consonants. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 12, "phonetic_similarity": { "Light": 0.2, "Medium": 0.6, "Far": 0.2 }, "orthographic_similarity": { "Light": 0.2, "Medium": 0.6, "Far": 0.2 }, "rule_based": { "rule_percentage": 38, "selected_rules": [ "initial_only_first_name", "shorten_name_to_initials", "replace_random_consonant_with_random_consonant" ], "rule_descriptions": { "initial_only_first_name": "Use first name initial with last name", "shorten_name_to_initials": "Convert name to initials", "replace_random_consonant_with_random_consonant": "Replace random consonants with different consonants" }, "percentage": 38 } } }, { "seed_identities_with_labels": [ { "name": "tomás vieira (latin)", "dob": "1942-12-08", "address": "Gabão", "label": "negative", "script": "latin" }, { "name": "filipe teixeira (latin)", "dob": "1966-06-18", "address": "Usbequistão", "label": "negative", "script": "latin" }, { "name": "mariana neves (latin)", "dob": "1971-03-17", "address": "Sudão", "label": "negative", "script": "latin" }, { "name": "Aleksandr Nezhentsev (latin)", "dob": "1992-11-10", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "ермил афанасьев (cyrillic)", "dob": "1987-01-19", "address": "Belarus", "label": "High Risk", "script": "cyrillic" }, { "name": "давид єрьоменко (cyrillic)", "dob": "1927-11-13", "address": "Brazil", "label": "negative", "script": "cyrillic" }, { "name": "Ibragim Zakriev (latin)", "dob": "1990-10-16", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "Abualfazl Nazeri (latin)", "dob": "1969-9-14", "address": "Iran", "label": "positive", "script": "latin" }, { "name": "sharon harris (latin)", "dob": "1967-07-02", "address": "British Virgin Islands", "label": "High Risk", "script": "latin" }, { "name": "Gholamreza Eyni (latin)", "dob": "1975-7-11", "address": "Iran", "label": "positive", "script": "latin" }, { "name": "德建 韩 (chinese)", "dob": "1989-12-18", "address": "China", "label": "positive", "script": "chinese" }, { "name": "beatriz garcia (latin)", "dob": "1959-10-25", "address": "Benim", "label": "negative", "script": "latin" }, { "name": "henrique matos (latin)", "dob": "1991-06-26", "address": "Angola", "label": "High Risk", "script": "latin" }, { "name": "élise colin (latin)", "dob": "1996-06-09", "address": "Cameroon", "label": "High Risk", "script": "latin" }, { "name": "peter martinez (latin)", "dob": "1950-12-17", "address": "Nigeria", "label": "High Risk", "script": "latin" } ], "query_template": "Generate 13 variations of {name}, ensuring phonetic similarity (10% Light: replace consonants with similar ones, 30% Medium: add or remove one syllable, 60% Far: switch vowels) and orthographic similarity (30% Light: swap adjacent letters, 40% Medium: insert or delete a letter, 30% Far: change the case of every other letter). Approximately 35% of the total 13 variations should follow these rule-based transformations: Additionally, generate variations that perform these transformations: Remove a random consonant, Replace random consonants with different consonants, and Insert a random letter. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 13, "phonetic_similarity": { "Light": 0.1, "Medium": 0.3, "Far": 0.6 }, "orthographic_similarity": { "Light": 0.3, "Medium": 0.4, "Far": 0.3 }, "rule_based": { "rule_percentage": 35, "selected_rules": [ "remove_random_consonant", "replace_random_consonant_with_random_consonant", "insert_random_letter" ], "rule_descriptions": { "remove_random_consonant": "Remove a random consonant", "replace_random_consonant_with_random_consonant": "Replace random consonants with different consonants", "insert_random_letter": "Insert a random letter" }, "percentage": 35 } } }, { "seed_identities_with_labels": [ { "name": "joseph harris (latin)", "dob": "1972-04-18", "address": "Kazakhstan", "label": "negative", "script": "latin" }, { "name": "margot lopes (latin)", "dob": "1945-01-14", "address": "Central African Republic", "label": "High Risk", "script": "latin" }, { "name": "jesse washington (latin)", "dob": "1973-05-06", "address": "Bermuda", "label": "negative", "script": "latin" }, { "name": "بلسم بنو خالد (arabic)", "dob": "1925-02-13", "address": "Syria", "label": "High Risk", "script": "arabic" }, { "name": "Myint Swe (latin)", "dob": "1951-5-24", "address": "Burma", "label": "positive", "script": "latin" }, { "name": "Abdullah Faisal (latin)", "dob": "1963-9-10", "address": "Jamaica", "label": "positive", "script": "latin" }, { "name": "Дмитрий Левин (cyrillic)", "dob": "1965-8-27", "address": "Russia", "label": "positive", "script": "cyrillic" }, { "name": "benjamin regnier (latin)", "dob": "1977-03-04", "address": "Côte d'Ivoire", "label": "High Risk", "script": "latin" }, { "name": "amber davis (latin)", "dob": "1928-08-15", "address": "Lao People's Democratic Republic", "label": "negative", "script": "latin" }, { "name": "Hamza Akbar (latin)", "dob": "1998-9-6", "address": "South Africa", "label": "positive", "script": "latin" }, { "name": "michael moore (latin)", "dob": "1992-12-24", "address": "Greenland", "label": "negative", "script": "latin" }, { "name": "Muhammad Al-Nabi (latin)", "dob": "1952-1-1", "address": "Yemen", "label": "positive", "script": "latin" }, { "name": "samuel heath (latin)", "dob": "1963-02-02", "address": "South Africa", "label": "High Risk", "script": "latin" }, { "name": "правда кесьов (cyrillic)", "dob": "1973-03-28", "address": "Ghana", "label": "negative", "script": "cyrillic" }, { "name": "luc colin (latin)", "dob": "1944-11-09", "address": "Democratic Republic of Congo", "label": "High Risk", "script": "latin" } ], "query_template": "Generate exactly 8 execution vectors for each target identity {name}, ensuring phonetic similarity with 50% Light and 50% Medium variations. Ensure orthographic similarity with 70% Light and 30% Medium variations. Approximately 29% of the total 8 variations should follow these rule-based transformations: \nAdditionally, generate variations that: \nDelete a random letter from {name}. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 8, "phonetic_similarity": { "Light": 0.5, "Medium": 0.5 }, "orthographic_similarity": { "Light": 0.7, "Medium": 0.3 }, "rule_based": { "rule_percentage": 29, "selected_rules": [ "delete_random_letter" ], "rule_descriptions": { "delete_random_letter": "Delete a random letter" }, "percentage": 29 } } }, { "seed_identities_with_labels": [ { "name": "hugues seguin (latin)", "dob": "1941-06-18", "address": "Vatican (Etat du)", "label": "negative", "script": "latin" }, { "name": "Sergey NEVEROV (latin)", "dob": "1961-12-21", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "Сергей Аземша (cyrillic)", "dob": "1974-7-17", "address": "Belarus", "label": "positive", "script": "cyrillic" }, { "name": "Tun Latt (latin)", "dob": "1969-2-6", "address": "Burma", "label": "positive", "script": "latin" }, { "name": "david frazier (latin)", "dob": "1928-04-04", "address": "British Virgin Islands", "label": "High Risk", "script": "latin" }, { "name": "عتاب أبو شلبك (arabic)", "dob": "1962-12-07", "address": "North Macedonia", "label": "negative", "script": "arabic" }, { "name": "michèle mary (latin)", "dob": "1965-03-30", "address": "Ukraine", "label": "negative", "script": "latin" }, { "name": "Elena Yampolskaya (latin)", "dob": "1971-6-20", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "danielle hoareau (latin)", "dob": "1970-06-19", "address": "Cameroon", "label": "High Risk", "script": "latin" }, { "name": "luce auger (latin)", "dob": "1972-08-24", "address": "Irlande", "label": "negative", "script": "latin" }, { "name": "أفراح بنو شعبة (arabic)", "dob": "1988-12-02", "address": "Somalia", "label": "High Risk", "script": "arabic" }, { "name": "emmanuel pascal (latin)", "dob": "1925-08-01", "address": "Lesotho", "label": "negative", "script": "latin" }, { "name": "Antonis Vakanas (latin)", "dob": "1977-6-20", "address": "Cyprus", "label": "positive", "script": "latin" }, { "name": "chantal delahaye (latin)", "dob": "1936-12-06", "address": "Monaco", "label": "High Risk", "script": "latin" }, { "name": "alex delahaye (latin)", "dob": "1929-03-26", "address": "Central African Republic", "label": "High Risk", "script": "latin" } ], "query_template": "Generate 10 execution vectors for {name}, ensuring phonetic similarity (100% Far) and orthographic similarity (10% Light, 30% Medium, 60% Far). Approximately 24% of the total 10 variations should follow these rule-based transformations: Replace spaces with special characters, and Insert a random letter. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 10, "phonetic_similarity": { "Far": 1.0 }, "orthographic_similarity": { "Light": 0.1, "Medium": 0.3, "Far": 0.6 }, "rule_based": { "rule_percentage": 24, "selected_rules": [ "replace_spaces_with_random_special_characters", "insert_random_letter" ], "rule_descriptions": { "replace_spaces_with_random_special_characters": "Replace spaces with special characters", "insert_random_letter": "Insert a random letter" }, "percentage": 24 } } }, { "seed_identities_with_labels": [ { "name": "simón becerra (latin)", "dob": "1958-02-17", "address": "Bolivia", "label": "High Risk", "script": "latin" }, { "name": "guy faivre (latin)", "dob": "1942-01-01", "address": "Guam", "label": "negative", "script": "latin" }, { "name": "متين محجوب (arabic)", "dob": "1946-04-11", "address": "Azerbaijan", "label": "negative", "script": "arabic" }, { "name": "Sufian QUMU (latin)", "dob": "1959-6-26", "address": "Libya", "label": "positive", "script": "latin" }, { "name": "Alexander Tanchev (latin)", "dob": "1985-8-30", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "hugo pinho (latin)", "dob": "1975-08-28", "address": "Angola", "label": "High Risk", "script": "latin" }, { "name": "camille grégoire (latin)", "dob": "2005-01-17", "address": "Roumanie", "label": "negative", "script": "latin" }, { "name": "josette merle (latin)", "dob": "1941-10-06", "address": "Swaziland", "label": "negative", "script": "latin" }, { "name": "Сергей Иванов (cyrillic)", "dob": "1953-1-31", "address": "Russia", "label": "positive", "script": "cyrillic" }, { "name": "thomas prévost (latin)", "dob": "1950-05-21", "address": "Philippines", "label": "negative", "script": "latin" }, { "name": "Slimane Chabani (latin)", "dob": "1965-12-5", "address": "Syria", "label": "positive", "script": "latin" }, { "name": "Mibrak Yazid (latin)", "dob": "1969-1-1", "address": "Algeria", "label": "positive", "script": "latin" }, { "name": "guiomar dávila (latin)", "dob": "1926-06-07", "address": "Bolivia", "label": "High Risk", "script": "latin" }, { "name": "велиянка яназов (cyrillic)", "dob": "1949-09-11", "address": "Bulgaria", "label": "High Risk", "script": "cyrillic" }, { "name": "bernard marin (latin)", "dob": "2002-05-31", "address": "Mali", "label": "High Risk", "script": "latin" } ], "query_template": "Generate 15 variations of {name}. Ensuring phonetic similarity, with approximately 20% being Light ({name} with slight pronunciation variation), 60% being Medium ({name} with moderate pronunciation variation), and 20% being Far ({name} with significant pronunciation variation). Additionally, ensure orthographic similarity with approximately 50% being Light ({name} with minimal spelling changes) and 50% being Medium ({name} with moderate spelling changes). Approximately 38% of the total 15 variations should follow these rule-based transformations: Additionally, generate variations that: Remove a random vowel. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 15, "phonetic_similarity": { "Light": 0.2, "Medium": 0.6, "Far": 0.2 }, "orthographic_similarity": { "Light": 0.5, "Medium": 0.5 }, "rule_based": { "rule_percentage": 38, "selected_rules": [ "remove_random_vowel" ], "rule_descriptions": { "remove_random_vowel": "Remove a random vowel" }, "percentage": 38 } } }, { "seed_identities_with_labels": [ { "name": "Николай Волобуев (cyrillic)", "dob": "1952-2-24", "address": "Russia", "label": "positive", "script": "cyrillic" }, { "name": "ceferino chaves (latin)", "dob": "1956-03-01", "address": "Tonga", "label": "negative", "script": "latin" }, { "name": "alan lee (latin)", "dob": "1980-06-24", "address": "Namibia", "label": "High Risk", "script": "latin" }, { "name": "савва белоусов (cyrillic)", "dob": "1934-07-25", "address": "Bangladesh", "label": "negative", "script": "cyrillic" }, { "name": "angelina manuel (latin)", "dob": "1929-06-19", "address": "Ecuador", "label": "negative", "script": "latin" }, { "name": "rené chevalier (latin)", "dob": "1997-08-05", "address": "Democratic Republic of Congo", "label": "High Risk", "script": "latin" }, { "name": "lázaro valera (latin)", "dob": "2001-11-21", "address": "Bahrein", "label": "negative", "script": "latin" }, { "name": "jordán cerdá (latin)", "dob": "1934-04-16", "address": "Venezuela", "label": "High Risk", "script": "latin" }, { "name": "Chol Kim (latin)", "dob": "1964-9-27", "address": "China", "label": "positive", "script": "latin" }, { "name": "pilar gárate (latin)", "dob": "1935-09-25", "address": "República de Corea", "label": "negative", "script": "latin" }, { "name": "andré menard (latin)", "dob": "1970-02-18", "address": "Burkina Faso", "label": "High Risk", "script": "latin" }, { "name": "Hafiz SAEED (latin)", "dob": "1950-6-5", "address": "Pakistan", "label": "positive", "script": "latin" }, { "name": "наина колобова (cyrillic)", "dob": "1966-07-16", "address": "Russia", "label": "High Risk", "script": "cyrillic" }, { "name": "Ibrahim Jathran (latin)", "dob": "1982-10-29", "address": "Libya", "label": "positive", "script": "latin" }, { "name": "Omid Noori (latin)", "dob": "1976-3-12", "address": "Iran", "label": "positive", "script": "latin" } ], "query_template": "Generate 11 variations of {name}. Ensuring phonetic similarity (10% Light, 50% Medium, 40% Far) and orthographic similarity (20% Light, 60% Medium, 20% Far). Approximately 20% of the total 11 variations should follow these rule-based transformations: Additionally, generate variations that perform these transformations: Swap random adjacent letters, and Replace spaces with special characters. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 11, "phonetic_similarity": { "Light": 0.1, "Medium": 0.5, "Far": 0.4 }, "orthographic_similarity": { "Light": 0.2, "Medium": 0.6, "Far": 0.2 }, "rule_based": { "rule_percentage": 20, "selected_rules": [ "swap_random_letter", "replace_spaces_with_random_special_characters" ], "rule_descriptions": { "swap_random_letter": "Swap random adjacent letters", "replace_spaces_with_random_special_characters": "Replace spaces with special characters" }, "percentage": 20 } } }, { "seed_identities_with_labels": [ { "name": "richard le gall (latin)", "dob": "1985-01-15", "address": "Cap Vert", "label": "negative", "script": "latin" }, { "name": "Юрый Назаранка (cyrillic)", "dob": "1976-4-17", "address": "Belarus", "label": "positive", "script": "cyrillic" }, { "name": "élodie étienne (latin)", "dob": "1941-11-06", "address": "Haiti", "label": "High Risk", "script": "latin" }, { "name": "benoît peltier (latin)", "dob": "1929-07-11", "address": "Central African Republic", "label": "High Risk", "script": "latin" }, { "name": "Dzmitryri Braim (latin)", "dob": "1976-4-18", "address": "Belarus", "label": "positive", "script": "latin" }, { "name": "laetitia letellier (latin)", "dob": "1976-02-15", "address": "Wallis et Futuna (Îles)", "label": "negative", "script": "latin" }, { "name": "émile lefebvre (latin)", "dob": "1983-01-10", "address": "Democratic Republic of Congo", "label": "High Risk", "script": "latin" }, { "name": "Ahmad Shafa'i (latin)", "dob": "1968-5-21", "address": "Iran", "label": "positive", "script": "latin" }, { "name": "martin andre (latin)", "dob": "1986-04-26", "address": "Îles Mineures Éloignées des États-Unis", "label": "negative", "script": "latin" }, { "name": "Alexander Kurenkov (latin)", "dob": "1972-6-2", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "Wendy Morales (latin)", "dob": "1980-5-28", "address": "Nicaragua", "label": "positive", "script": "latin" }, { "name": "louis lemaître (latin)", "dob": "1996-02-08", "address": "Algérie", "label": "negative", "script": "latin" }, { "name": "كليم بجيلة (arabic)", "dob": "1961-10-16", "address": "Somalia", "label": "High Risk", "script": "arabic" }, { "name": "amy alvarez (latin)", "dob": "1953-04-03", "address": "South Africa", "label": "High Risk", "script": "latin" }, { "name": "феоктист коновалова (cyrillic)", "dob": "1993-05-06", "address": "Paraguay", "label": "negative", "script": "cyrillic" } ], "query_template": "Generate 10 execution vectors ({name}), ensuring phonetic similarity with 30% Light, 40% Medium, and 30% Far sound-alike names. Also ensure orthographic similarity with 70% Light and 30% Medium visually similar spellings. Approximately 23% of the total 10 variations should follow these rule-based transformations: Additionally, generate variations that convert {name} to initials. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 10, "phonetic_similarity": { "Light": 0.3, "Medium": 0.4, "Far": 0.3 }, "orthographic_similarity": { "Light": 0.7, "Medium": 0.3 }, "rule_based": { "rule_percentage": 23, "selected_rules": [ "shorten_name_to_initials" ], "rule_descriptions": { "shorten_name_to_initials": "Convert name to initials" }, "percentage": 23 } } }, { "seed_identities_with_labels": [ { "name": "megan gibson (latin)", "dob": "1980-04-05", "address": "Turkmenistan", "label": "negative", "script": "latin" }, { "name": "Anatoliy Lappo (latin)", "dob": "1963-5-24", "address": "Belarus", "label": "positive", "script": "latin" }, { "name": "Artem Lifshits (latin)", "dob": "1992-12-26", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "Krym Kazanokov (latin)", "dob": "1962-7-19", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "Aysen Nikolayev (latin)", "dob": "1972-1-22", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "демид селезнев (cyrillic)", "dob": "1955-07-04", "address": "Belarus", "label": "High Risk", "script": "cyrillic" }, { "name": "brian mccarthy (latin)", "dob": "1968-11-27", "address": "South Sudan", "label": "High Risk", "script": "latin" }, { "name": "Павел Андреев (cyrillic)", "dob": "1980-2-6", "address": "Russia", "label": "positive", "script": "cyrillic" }, { "name": "gloria davies (latin)", "dob": "1989-08-29", "address": "Korea", "label": "negative", "script": "latin" }, { "name": "catherine leconte (latin)", "dob": "1977-01-08", "address": "Côte d'Ivoire", "label": "High Risk", "script": "latin" }, { "name": "melissa hayes (latin)", "dob": "1946-11-08", "address": "Western Sahara", "label": "negative", "script": "latin" }, { "name": "amber vasquez (latin)", "dob": "1955-12-30", "address": "Bouvet Island (Bouvetoya)", "label": "negative", "script": "latin" }, { "name": "tommy rodriguez (latin)", "dob": "1945-05-12", "address": "Namibia", "label": "High Risk", "script": "latin" }, { "name": "pastor terrón (latin)", "dob": "1937-11-02", "address": "Nicaragua", "label": "High Risk", "script": "latin" }, { "name": "анжела бандура (cyrillic)", "dob": "1996-05-07", "address": "Honduras", "label": "negative", "script": "cyrillic" } ], "query_template": "Generate 12 variations of {name} ensuring phonetic similarity (50% Light, 50% Medium) and orthographic similarity (70% Light, 30% Medium). Approximately 46% of the total 12 variations should follow these rule-based transformations: Additionally, generate variations that: Add a title prefix (Mr., Dr., etc.). The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 12, "phonetic_similarity": { "Light": 0.5, "Medium": 0.5 }, "orthographic_similarity": { "Light": 0.7, "Medium": 0.3 }, "rule_based": { "rule_percentage": 46, "selected_rules": [ "add_random_leading_title" ], "rule_descriptions": { "add_random_leading_title": "Add a title prefix (Mr., Dr., etc.)" }, "percentage": 46 } } }, { "seed_identities_with_labels": [ { "name": "Александр Бортников (cyrillic)", "dob": "1951-11-15", "address": "Russia", "label": "positive", "script": "cyrillic" }, { "name": "gabrielle turpin (latin)", "dob": "1941-11-11", "address": "Côte d'Ivoire", "label": "High Risk", "script": "latin" }, { "name": "Zaini ZAKARIA (latin)", "dob": "1967-5-16", "address": "Malaysia", "label": "positive", "script": "latin" }, { "name": "Dmitry Lelikov (latin)", "dob": "1968-5-9", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "willie garrett (latin)", "dob": "1942-03-04", "address": "South Sudan", "label": "High Risk", "script": "latin" }, { "name": "Alexander Demin (latin)", "dob": "1988-9-23", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "bryan johnson (latin)", "dob": "1988-12-04", "address": "Slovenia", "label": "negative", "script": "latin" }, { "name": "natalie olson (latin)", "dob": "1935-06-20", "address": "Kenya", "label": "High Risk", "script": "latin" }, { "name": "Fadlallah Taher (latin)", "dob": "1963-11-10", "address": "Guinea", "label": "positive", "script": "latin" }, { "name": "timothy solomon (latin)", "dob": "1933-07-16", "address": "Mayotte", "label": "negative", "script": "latin" }, { "name": "james watson (latin)", "dob": "2001-09-20", "address": "South Africa", "label": "High Risk", "script": "latin" }, { "name": "rebecca holmes (latin)", "dob": "1948-03-20", "address": "Reunion", "label": "negative", "script": "latin" }, { "name": "عبد الرّحيم بني هاجر (arabic)", "dob": "1963-08-27", "address": "Algeria", "label": "High Risk", "script": "arabic" }, { "name": "brian mcgrath (latin)", "dob": "1981-11-13", "address": "United States Minor Outlying Islands", "label": "negative", "script": "latin" }, { "name": "سعدون آل بن لافي (arabic)", "dob": "1927-12-23", "address": "Cayman Islands", "label": "negative", "script": "arabic" } ], "query_template": "Generate 14 variations of {name} ensuring phonetic similarity (30% Light, e.g. {name}e, {name}, {name}y; 40% Medium, e.g. {name}in, {name}an, {name}en; 30% Far, e.g. {name}on, {name}yn) and orthographic similarity (10% Light, e.g. {naym}; 30% Medium, e.g. {neim}, {nem}; 60% Far, e.g. {nemn}, {neem}). Approximately 25% of the total 14 variations should follow these rule-based transformations: Additionally, generate variations that: Replace double letters with a single letter (e.g. {namme} -> {name}). The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 14, "phonetic_similarity": { "Light": 0.3, "Medium": 0.4, "Far": 0.3 }, "orthographic_similarity": { "Light": 0.1, "Medium": 0.3, "Far": 0.6 }, "rule_based": { "rule_percentage": 25, "selected_rules": [ "replace_double_letters_with_single_letter" ], "rule_descriptions": { "replace_double_letters_with_single_letter": "Replace double letters with a single letter" }, "percentage": 25 } } }, { "seed_identities_with_labels": [ { "name": "حمود الخماش (arabic)", "dob": "1994-06-30", "address": "Slovenia", "label": "negative", "script": "arabic" }, { "name": "céline boucher (latin)", "dob": "1929-06-29", "address": "Côte d'Ivoire", "label": "High Risk", "script": "latin" }, { "name": "carlota abril (latin)", "dob": "2002-02-24", "address": "Turquía", "label": "negative", "script": "latin" }, { "name": "Dinar Gilmutdinov (latin)", "dob": "1969-8-10", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "Gennady Plaksin (latin)", "dob": "1961-8-31", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "Sergei Kiriyenko (latin)", "dob": "1962-7-26", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "patricia briones (latin)", "dob": "1981-05-11", "address": "Venezuela", "label": "High Risk", "script": "latin" }, { "name": "ruperta garriga (latin)", "dob": "1942-07-26", "address": "Venezuela", "label": "High Risk", "script": "latin" }, { "name": "marc bertin (latin)", "dob": "1986-11-06", "address": "Cameroon", "label": "High Risk", "script": "latin" }, { "name": "Виктор Емельяненко (cyrillic)", "dob": "1953-10-11", "address": "Ukraine", "label": "positive", "script": "cyrillic" }, { "name": "genoveva iglesia (latin)", "dob": "1983-09-03", "address": "Maldivas", "label": "negative", "script": "latin" }, { "name": "Andrey Klishas (latin)", "dob": "1972-11-9", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "rubén borrego (latin)", "dob": "1975-09-06", "address": "San Marino", "label": "negative", "script": "latin" }, { "name": "اصيل جهينة (arabic)", "dob": "1955-11-06", "address": "Iraq", "label": "High Risk", "script": "arabic" }, { "name": "claudia ayllón (latin)", "dob": "1925-05-01", "address": "Grecia", "label": "negative", "script": "latin" } ], "query_template": "Here is the query template:\n\nGenerate 15 variations of {name}. Ensuring phonetic similarity (100% Medium) and orthographic similarity (20% Light: {name} spelled slightly differently, 60% Medium: {name} with slight modifications, 20% Far: {name} changed significantly). Approximately 44% of the total 15 variations should follow these rule-based transformations: Additionally, generate variations that perform these transformations: Replace double letters with a single letter ({name} becomes {name} without double letters), Add a title prefix (Mr., Dr., etc.) ({name} becomes Mr. {name}), and Replace random vowels with different vowels ({name} becomes {name} with altered vowels). The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 15, "phonetic_similarity": { "Medium": 1.0 }, "orthographic_similarity": { "Light": 0.2, "Medium": 0.6, "Far": 0.2 }, "rule_based": { "rule_percentage": 44, "selected_rules": [ "replace_double_letters_with_single_letter", "add_random_leading_title", "replace_random_vowel_with_random_vowel" ], "rule_descriptions": { "replace_double_letters_with_single_letter": "Replace double letters with a single letter", "add_random_leading_title": "Add a title prefix (Mr., Dr., etc.)", "replace_random_vowel_with_random_vowel": "Replace random vowels with different vowels" }, "percentage": 44 } } }, { "seed_identities_with_labels": [ { "name": "joão amaral (latin)", "dob": "2002-10-30", "address": "Eslovénia", "label": "negative", "script": "latin" }, { "name": "михей тарасов (cyrillic)", "dob": "1940-06-04", "address": "Russia", "label": "High Risk", "script": "cyrillic" }, { "name": "Cholung Choe (latin)", "dob": "1973-5-16", "address": "Korea, North", "label": "positive", "script": "latin" }, { "name": "Serhiy Melnychuk (latin)", "dob": "1976-9-30", "address": "Ukraine", "label": "positive", "script": "latin" }, { "name": "tomás gomes (latin)", "dob": "1972-01-06", "address": "Ilhas Virgens Americanas", "label": "negative", "script": "latin" }, { "name": "simone marchal (latin)", "dob": "1940-03-15", "address": "Mali", "label": "High Risk", "script": "latin" }, { "name": "Михаил Берулава (cyrillic)", "dob": "1950-8-3", "address": "Russia", "label": "positive", "script": "cyrillic" }, { "name": "josé mota (latin)", "dob": "1969-10-13", "address": "Vaticano", "label": "negative", "script": "latin" }, { "name": "Andre Nyamvumba (latin)", "dob": "1973-1-1", "address": "Rwanda", "label": "positive", "script": "latin" }, { "name": "Esteban VALENCIA (latin)", "dob": "1964-12-19", "address": "Mexico", "label": "positive", "script": "latin" }, { "name": "paulette pichon (latin)", "dob": "2007-04-06", "address": "Central African Republic", "label": "High Risk", "script": "latin" }, { "name": "benjamim ramos (latin)", "dob": "1960-12-28", "address": "Costa Rica", "label": "negative", "script": "latin" }, { "name": "andreia correia (latin)", "dob": "1944-03-06", "address": "Mozambique", "label": "High Risk", "script": "latin" }, { "name": "leoncio gil (latin)", "dob": "1929-12-17", "address": "Venezuela", "label": "High Risk", "script": "latin" }, { "name": "احسان رستمی (arabic)", "dob": "1930-05-16", "address": "Netherlands", "label": "negative", "script": "arabic" } ], "query_template": "Generate 10 variations of {name} ensuring phonetic similarity with the following distribution: \n30% Light variations generated using Soundex algorithm, \n40% Medium variations generated using Metaphone algorithm, \n30% Far variations generated using Levenshtein distance algorithm. \nApproximately 37% of the total 10 variations should follow these rule-based transformations: \nAdditionally, generate variations that perform these transformations: Remove a random consonant, Reorder name parts, and Swap adjacent syllables.\n[VALIDATION HINTS]: Orthographic similarity: 20% Light. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 10, "phonetic_similarity": { "Light": 0.3, "Medium": 0.4, "Far": 0.3 }, "orthographic_similarity": { "Light": 0.2, "Medium": 0.6, "Far": 0.2 }, "rule_based": { "rule_percentage": 37, "selected_rules": [ "remove_random_consonant", "name_parts_permutations", "swap_adjacent_syllables" ], "rule_descriptions": { "remove_random_consonant": "Remove a random consonant", "name_parts_permutations": "Reorder name parts", "swap_adjacent_syllables": "Swap adjacent syllables" }, "percentage": 37 } } }, { "seed_identities_with_labels": [ { "name": "Oleksandr Melnychuk (latin)", "dob": "1965-1-17", "address": "Ukraine", "label": "positive", "script": "latin" }, { "name": "دؤوب الكثيري (arabic)", "dob": "1958-04-30", "address": "Libya", "label": "High Risk", "script": "arabic" }, { "name": "lindsay guerra (latin)", "dob": "1981-01-25", "address": "British Virgin Islands", "label": "High Risk", "script": "latin" }, { "name": "david jones (latin)", "dob": "1928-02-15", "address": "Canada", "label": "negative", "script": "latin" }, { "name": "Sergey NEVEROV (latin)", "dob": "1961-12-21", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "teresa roy (latin)", "dob": "1970-08-25", "address": "South Africa", "label": "High Risk", "script": "latin" }, { "name": "nicole bertrand (latin)", "dob": "1951-05-28", "address": "Democratic Republic of Congo", "label": "High Risk", "script": "latin" }, { "name": "эрнест воронцова (cyrillic)", "dob": "1940-03-24", "address": "Marshall Islands", "label": "negative", "script": "cyrillic" }, { "name": "áurea acuña (latin)", "dob": "1999-10-22", "address": "Venezuela", "label": "High Risk", "script": "latin" }, { "name": "Pavel Marinychev (latin)", "dob": "1978-10-25", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "amy stewart (latin)", "dob": "1937-01-10", "address": "Kyrgyz Republic", "label": "negative", "script": "latin" }, { "name": "Олег Кожемяко (cyrillic)", "dob": "1962-3-17", "address": "Russia", "label": "positive", "script": "cyrillic" }, { "name": "Andrey Shishkin (latin)", "dob": "1959-3-13", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "gary hampton (latin)", "dob": "1932-07-21", "address": "Togo", "label": "negative", "script": "latin" }, { "name": "christopher mora (latin)", "dob": "1939-10-04", "address": "Sri Lanka", "label": "negative", "script": "latin" } ], "query_template": "Generate 14 name variations for {name}, ensuring phonetic similarity (100% Light) to detect sound-alike names. Additionally, generate variations that perform the following rule-based transformations approximately 31% of the time: Replace random vowels with different vowels, Swap random adjacent letters, and Remove a random vowel. Also, generate variations for orthographic similarity, consisting of 30% Light, 40% Medium, and 30% Far visually similar spellings.\n\nNote: The above response meets all formatting requirements specified in the prompt. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 14, "phonetic_similarity": { "Light": 1.0 }, "orthographic_similarity": { "Light": 0.3, "Medium": 0.4, "Far": 0.3 }, "rule_based": { "rule_percentage": 31, "selected_rules": [ "replace_random_vowel_with_random_vowel", "swap_random_letter", "remove_random_vowel" ], "rule_descriptions": { "replace_random_vowel_with_random_vowel": "Replace random vowels with different vowels", "swap_random_letter": "Swap random adjacent letters", "remove_random_vowel": "Remove a random vowel" }, "percentage": 31 } } }, { "seed_identities_with_labels": [ { "name": "melissa carneiro (latin)", "dob": "1984-01-09", "address": "Eslováquia", "label": "negative", "script": "latin" }, { "name": "lauren lee (latin)", "dob": "1930-12-30", "address": "Kenya", "label": "High Risk", "script": "latin" }, { "name": "Ihor Rotenberg (latin)", "dob": "1973-5-9", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "Muhammad Al-Nabi (latin)", "dob": "1952-1-1", "address": "Yemen", "label": "positive", "script": "latin" }, { "name": "artur miranda (latin)", "dob": "1990-08-22", "address": "Itália", "label": "negative", "script": "latin" }, { "name": "сергей попова (cyrillic)", "dob": "1961-06-12", "address": "Donetsk", "label": "High Risk", "script": "cyrillic" }, { "name": "Павел Тараканов (cyrillic)", "dob": "1982-6-21", "address": "Russia", "label": "positive", "script": "cyrillic" }, { "name": "vasco barbosa (latin)", "dob": "1925-11-15", "address": "Guiné-Bissau", "label": "negative", "script": "latin" }, { "name": "soraia ferreira (latin)", "dob": "1974-05-25", "address": "Guiana", "label": "negative", "script": "latin" }, { "name": "salvador vaz (latin)", "dob": "1975-09-12", "address": "Angola", "label": "High Risk", "script": "latin" }, { "name": "نوّاف حمامي (arabic)", "dob": "1994-03-17", "address": "Estonia", "label": "negative", "script": "arabic" }, { "name": "Abu LLONGGO (latin)", "dob": "1972-5-15", "address": "Philippines", "label": "positive", "script": "latin" }, { "name": "adélaïde ferreira (latin)", "dob": "1990-02-11", "address": "Central African Republic", "label": "High Risk", "script": "latin" }, { "name": "Mu Xiaolu (latin)", "dob": "1990-3-8", "address": "China", "label": "positive", "script": "latin" }, { "name": "candice smith (latin)", "dob": "1924-10-20", "address": "South Africa", "label": "High Risk", "script": "latin" } ], "query_template": "Generate 15 variations of {name} ensuring phonetic similarity with 30% Light, 40% Medium, and 30% Far; and orthographic similarity with 20% Light, 60% Medium, and 20% Far. Approximately 33% of the total 15 variations should follow these rule-based transformations: Additionally, generate variations that perform these transformations: Replace {name} with {name}-modified; Remove a random vowel from {name}; and Swap adjacent letters in {name}. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 15, "phonetic_similarity": { "Light": 0.3, "Medium": 0.4, "Far": 0.3 }, "orthographic_similarity": { "Light": 0.2, "Medium": 0.6, "Far": 0.2 }, "rule_based": { "rule_percentage": 33, "selected_rules": [ "remove_random_vowel", "swap_random_letter" ], "rule_descriptions": { "remove_random_vowel": "Remove a random vowel", "swap_random_letter": "Swap random adjacent letters" }, "percentage": 33 } } }, { "seed_identities_with_labels": [ { "name": "thérèse louis (latin)", "dob": "1992-04-10", "address": "Haiti", "label": "High Risk", "script": "latin" }, { "name": "عمرو حمشو (arabic)", "dob": "1995-3-7", "address": "Syria", "label": "positive", "script": "arabic" }, { "name": "مُرضي آل معيض (arabic)", "dob": "1935-04-17", "address": "North Macedonia", "label": "negative", "script": "arabic" }, { "name": "édith marin (latin)", "dob": "1949-03-29", "address": "Guadeloupe", "label": "negative", "script": "latin" }, { "name": "marine michel (latin)", "dob": "1968-01-08", "address": "Bulgarie", "label": "negative", "script": "latin" }, { "name": "zoraida pons (latin)", "dob": "1993-11-20", "address": "Bolivia", "label": "High Risk", "script": "latin" }, { "name": "Andrey Doukhvalov (latin)", "dob": "1957-12-3", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "antoinette gautier (latin)", "dob": "1958-11-21", "address": "Cook (Îles)", "label": "negative", "script": "latin" }, { "name": "brandon andrews (latin)", "dob": "1961-10-21", "address": "South Africa", "label": "High Risk", "script": "latin" }, { "name": "нифонт орлов (cyrillic)", "dob": "1955-11-15", "address": "Luhansk", "label": "High Risk", "script": "cyrillic" }, { "name": "jules guilbert (latin)", "dob": "2001-01-01", "address": "Haiti", "label": "High Risk", "script": "latin" }, { "name": "Gulbuddin HEKHMARTYAR (latin)", "dob": "1949-8-1", "address": "Iran", "label": "positive", "script": "latin" }, { "name": "Iakov Antonov (latin)", "dob": "1972-11-7", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "Hussam Al-Qatirji (latin)", "dob": "1982-1-11", "address": "Syria", "label": "positive", "script": "latin" }, { "name": "noël barbier (latin)", "dob": "1995-06-18", "address": "Macédoine du Nord", "label": "negative", "script": "latin" } ], "query_template": "Generate exactly 15 execution vectors for each target identity {name}, ensuring phonetic similarity (sound-alike names) with 20% Light, 60% Medium, and 20% Far variations. Also ensure orthographic similarity (visually similar spellings) with 20% Light, 60% Medium, and 20% Far variations. Approximately 49% of the total 15 variations should follow these rule-based transformations: Additionally, generate variations that: Replace {name} with a new name having one letter changed, Replace {name} with a new name having two letters changed, Replace {name} with a new name having three or more letters changed, Replace some or all vowels in {name}, Replace some or all consonants in {name}, Insert a random letter before the first letter of {name}, Insert a random letter after the last letter of {name}. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 15, "phonetic_similarity": { "Light": 0.2, "Medium": 0.6, "Far": 0.2 }, "orthographic_similarity": { "Light": 0.2, "Medium": 0.6, "Far": 0.2 }, "rule_based": { "rule_percentage": 49, "selected_rules": [ "insert_random_letter" ], "rule_descriptions": { "insert_random_letter": "Insert a random letter" }, "percentage": 49 } } }, { "seed_identities_with_labels": [ { "name": "jason dixon (latin)", "dob": "1970-10-14", "address": "Chad", "label": "negative", "script": "latin" }, { "name": "robert gordon (latin)", "dob": "1997-04-08", "address": "Guernsey", "label": "negative", "script": "latin" }, { "name": "benoît auger (latin)", "dob": "1975-07-04", "address": "Cameroon", "label": "High Risk", "script": "latin" }, { "name": "angela wang (latin)", "dob": "1931-12-03", "address": "Guyana", "label": "negative", "script": "latin" }, { "name": "édgar piñeiro (latin)", "dob": "1987-07-26", "address": "Bolivia", "label": "High Risk", "script": "latin" }, { "name": "demetrio pereira (latin)", "dob": "1984-02-01", "address": "Bolivia", "label": "High Risk", "script": "latin" }, { "name": "Naser Neser (latin)", "dob": "1963-4-20", "address": "Lebanon", "label": "positive", "script": "latin" }, { "name": "melissa johnson (latin)", "dob": "1962-06-24", "address": "Rwanda", "label": "negative", "script": "latin" }, { "name": "عبد الرّؤوف بلغازي (arabic)", "dob": "1982-06-01", "address": "Somalia", "label": "High Risk", "script": "arabic" }, { "name": "Yuri Nabokov (latin)", "dob": "1981-1-1", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "douglas winters (latin)", "dob": "1955-11-22", "address": "Kenya", "label": "High Risk", "script": "latin" }, { "name": "пейо цоцов (cyrillic)", "dob": "1994-08-13", "address": "Netherlands Antilles", "label": "negative", "script": "cyrillic" }, { "name": "Евгения Васильева (cyrillic)", "dob": "1979-2-20", "address": "Russia", "label": "positive", "script": "cyrillic" }, { "name": "Vasiliy Golubev (latin)", "dob": "1957-1-30", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "Aping JUNTARAPRAPORN (latin)", "dob": "1955-1-14", "address": "Thailand", "label": "positive", "script": "latin" } ], "query_template": "Generate 11 execution vectors for {name}, ensuring phonetic similarity with 50% Light variations using Levenshtein distance (e.g. \"Jhon\" becomes \"Jon\") and 50% Medium variations using Soundex (e.g. \"Robert\" becomes \"R163T\") and orthographic similarity with 50% Light variations using Metro distance (e.g. \"Michael\" becomes \"Micheal\") and 50% Medium variations using Jaro-Winkler distance (e.g. \"David\" becomes \"Daveid\"). Approximately 19% of the total 11 variations should follow these rule-based transformations: Additionally, generate variations that perform these transformations on {name}: Remove all spaces, Duplicate a random letter, and Insert a random letter. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 11, "phonetic_similarity": { "Light": 0.5, "Medium": 0.5 }, "orthographic_similarity": { "Light": 0.5, "Medium": 0.5 }, "rule_based": { "rule_percentage": 19, "selected_rules": [ "remove_all_spaces", "duplicate_random_letter_as_double_letter", "insert_random_letter" ], "rule_descriptions": { "remove_all_spaces": "Remove all spaces", "duplicate_random_letter_as_double_letter": "Duplicate a random letter", "insert_random_letter": "Insert a random letter" }, "percentage": 19 } } }, { "seed_identities_with_labels": [ { "name": "teresa stokes (latin)", "dob": "1996-05-04", "address": "Antarctica (the territory South of 60 deg S)", "label": "negative", "script": "latin" }, { "name": "salvador ramos (latin)", "dob": "1992-04-12", "address": "Angola", "label": "High Risk", "script": "latin" }, { "name": "teresa rogers (latin)", "dob": "1975-02-22", "address": "Indonesia", "label": "negative", "script": "latin" }, { "name": "Peter Mbaga (latin)", "dob": "1976-9-25", "address": "South Africa", "label": "positive", "script": "latin" }, { "name": "Andrey Gurulev (latin)", "dob": "1967-10-16", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "sarah ortega (latin)", "dob": "1991-09-18", "address": "South Sudan", "label": "High Risk", "script": "latin" }, { "name": "catherine martel (latin)", "dob": "1961-01-16", "address": "Monaco", "label": "High Risk", "script": "latin" }, { "name": "أسيل التركمان (arabic)", "dob": "1937-01-30", "address": "Uzbekistan", "label": "negative", "script": "arabic" }, { "name": "Иван Нарейко (cyrillic)", "dob": "1975-5-12", "address": "Belarus", "label": "positive", "script": "cyrillic" }, { "name": "وجيه غطفان (arabic)", "dob": "1944-11-24", "address": "Lebanon", "label": "High Risk", "script": "arabic" }, { "name": "elizabeth skinner (latin)", "dob": "1979-08-07", "address": "Spain", "label": "negative", "script": "latin" }, { "name": "Aleksei MOZHOVYY (latin)", "dob": "1975-4-3", "address": "Ukraine", "label": "positive", "script": "latin" }, { "name": "veronica rodriguez (latin)", "dob": "1997-11-21", "address": "Turkey", "label": "negative", "script": "latin" }, { "name": "Ch'o'l-man Han (latin)", "dob": "1978-5-6", "address": "China", "label": "positive", "script": "latin" }, { "name": "marine perret (latin)", "dob": "2004-05-14", "address": "Cameroon", "label": "High Risk", "script": "latin" } ], "query_template": "The following name is the seed name to generate variations for: {name}. Generate 8 variations of the name {name}, ensuring phonetic similarity: 10% Light, 50% Medium, 40% Far, and orthographic similarity: 20% Light, 60% Medium, 20% Far, and also include 34% of variations that follow: Additionally, generate variations that perform these transformations: Swap adjacent syllables, Add a title prefix (Mr., Dr., etc.), and Remove all spaces. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 8, "phonetic_similarity": { "Light": 0.1, "Medium": 0.5, "Far": 0.4 }, "orthographic_similarity": { "Light": 0.2, "Medium": 0.6, "Far": 0.2 }, "rule_based": { "rule_percentage": 34, "selected_rules": [ "swap_adjacent_syllables", "add_random_leading_title", "remove_all_spaces" ], "rule_descriptions": { "swap_adjacent_syllables": "Swap adjacent syllables", "add_random_leading_title": "Add a title prefix (Mr., Dr., etc.)", "remove_all_spaces": "Remove all spaces" }, "percentage": 34 } } }, { "seed_identities_with_labels": [ { "name": "СЕРГЕЙ ЧЕРГЕЙКО (cyrillic)", "dob": "1986-8-27", "address": "Belarus", "label": "positive", "script": "cyrillic" }, { "name": "diane johnson (latin)", "dob": "1985-12-27", "address": "South Africa", "label": "High Risk", "script": "latin" }, { "name": "alexandre bertrand (latin)", "dob": "1963-07-16", "address": "Mali", "label": "High Risk", "script": "latin" }, { "name": "clémence bouvet (latin)", "dob": "1983-11-03", "address": "Pérou", "label": "negative", "script": "latin" }, { "name": "lucas schneider (latin)", "dob": "1975-02-25", "address": "Irlande", "label": "negative", "script": "latin" }, { "name": "Parviz Soltanizadeh (latin)", "dob": "1960-7-13", "address": "Iran", "label": "positive", "script": "latin" }, { "name": "Hussam Al-Qatirji (latin)", "dob": "1982-1-11", "address": "Syria", "label": "positive", "script": "latin" }, { "name": "Sveta Boyko (latin)", "dob": "1990-4-14", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "Atul Gupta (latin)", "dob": "1968-6-14", "address": "United Arab Emirates", "label": "positive", "script": "latin" }, { "name": "محمدياسين مجتبوی (arabic)", "dob": "1985-06-03", "address": "Afghanistan", "label": "High Risk", "script": "arabic" }, { "name": "teófilo amigó (latin)", "dob": "2001-12-03", "address": "Nicaragua", "label": "High Risk", "script": "latin" }, { "name": "gilles laine (latin)", "dob": "1930-03-13", "address": "Côte d'Ivoire", "label": "High Risk", "script": "latin" }, { "name": "pierre de oliveira (latin)", "dob": "1976-09-11", "address": "Mauritanie", "label": "negative", "script": "latin" }, { "name": "اعتماد آل الشيخ (arabic)", "dob": "1952-12-23", "address": "Saudi Arabia", "label": "negative", "script": "arabic" }, { "name": "éléonore delmas (latin)", "dob": "1942-11-10", "address": "Papouasie-Nouvelle-Guinée", "label": "negative", "script": "latin" } ], "query_template": "The following name is the seed name to generate variations for: {name}. Generate 10 variations of the name {name}, ensuring phonetic similarity: 50% Light, 50% Medium, and orthographic similarity: 10% Light, 30% Medium, 60% Far, and also include 54% of variations that follow: Additionally, generate variations that perform these transformations: Duplicate a random letter, Replace spaces with special characters, and Add a title suffix (Jr., PhD, etc.). The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 10, "phonetic_similarity": { "Light": 0.5, "Medium": 0.5 }, "orthographic_similarity": { "Light": 0.1, "Medium": 0.3, "Far": 0.6 }, "rule_based": { "rule_percentage": 54, "selected_rules": [ "duplicate_random_letter_as_double_letter", "replace_spaces_with_random_special_characters", "add_random_trailing_title" ], "rule_descriptions": { "duplicate_random_letter_as_double_letter": "Duplicate a random letter", "replace_spaces_with_random_special_characters": "Replace spaces with special characters", "add_random_trailing_title": "Add a title suffix (Jr., PhD, etc.)" }, "percentage": 54 } } }, { "seed_identities_with_labels": [ { "name": "адам куликов (cyrillic)", "dob": "1938-02-18", "address": "Russia", "label": "High Risk", "script": "cyrillic" }, { "name": "Rimma Utyasheva (latin)", "dob": "1952-1-3", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "adrienne albert (latin)", "dob": "1939-01-23", "address": "Australie", "label": "negative", "script": "latin" }, { "name": "Igor Zavyalov (latin)", "dob": "1960-1-19", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "Андрей Коринец (cyrillic)", "dob": "1987-5-18", "address": "Russia", "label": "positive", "script": "cyrillic" }, { "name": "agnès sanchez (latin)", "dob": "1951-09-10", "address": "Guadeloupe", "label": "negative", "script": "latin" }, { "name": "marc gros (latin)", "dob": "1978-07-25", "address": "Haïti", "label": "negative", "script": "latin" }, { "name": "édouard langlois (latin)", "dob": "1982-03-16", "address": "Côte d'Ivoire", "label": "High Risk", "script": "latin" }, { "name": "jacqueline jacob (latin)", "dob": "1987-10-30", "address": "Burkina Faso", "label": "High Risk", "script": "latin" }, { "name": "Yevgeny KHODOTOV (latin)", "dob": "1964-3-21", "address": "Central African Republic", "label": "positive", "script": "latin" }, { "name": "marc perrier (latin)", "dob": "1982-06-21", "address": "Côte d'Ivoire", "label": "High Risk", "script": "latin" }, { "name": "émilie ledoux (latin)", "dob": "1971-11-16", "address": "Nouvelle-Zélande", "label": "negative", "script": "latin" }, { "name": "Dmitry Pirog (latin)", "dob": "1980-6-27", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "آريا جعفر پور (arabic)", "dob": "2002-08-19", "address": "Aruba", "label": "negative", "script": "arabic" }, { "name": "olivie labbé (latin)", "dob": "1943-11-23", "address": "Democratic Republic of Congo", "label": "High Risk", "script": "latin" } ], "query_template": "Generate 8 variations of {name} ensuring phonetic similarity (10% Light, e.g. {name}, 50% Medium, e.g. {name2}, 40% Far) and orthographic similarity (20% Light, e.g. {name3}, 60% Medium, e.g. {name4}, 20% Far). Approximately 22% of the total 8 variations should follow these rule-based transformations: Additionally, generate variations that perform these transformations: Reorder name parts, Insert a random letter, and Replace random vowels with different vowels. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 8, "phonetic_similarity": { "Light": 0.1, "Medium": 0.5, "Far": 0.4 }, "orthographic_similarity": { "Light": 0.2, "Medium": 0.6, "Far": 0.2 }, "rule_based": { "rule_percentage": 22, "selected_rules": [ "name_parts_permutations", "insert_random_letter", "replace_random_vowel_with_random_vowel" ], "rule_descriptions": { "name_parts_permutations": "Reorder name parts", "insert_random_letter": "Insert a random letter", "replace_random_vowel_with_random_vowel": "Replace random vowels with different vowels" }, "percentage": 22 } } }, { "seed_identities_with_labels": [ { "name": "bernadette masse (latin)", "dob": "2005-05-06", "address": "Mali", "label": "High Risk", "script": "latin" }, { "name": "عبد الرّزاق العجلان (arabic)", "dob": "1981-08-29", "address": "Papua New Guinea", "label": "negative", "script": "arabic" }, { "name": "Ибрагим Закриев (cyrillic)", "dob": "1990-10-16", "address": "Russia", "label": "positive", "script": "cyrillic" }, { "name": "Hamid Zareikajosangi (latin)", "dob": "1987-5-20", "address": "Iran", "label": "positive", "script": "latin" }, { "name": "miriam arnaiz (latin)", "dob": "1939-10-19", "address": "Bolivia", "label": "High Risk", "script": "latin" }, { "name": "Ruslan Lechkhadzhiev (latin)", "dob": "1965-7-2", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "marguerite masse (latin)", "dob": "1946-04-05", "address": "Gibraltar", "label": "negative", "script": "latin" }, { "name": "شافع زحلاوي (arabic)", "dob": "2004-10-15", "address": "Libya", "label": "High Risk", "script": "arabic" }, { "name": "kevin williams (latin)", "dob": "1927-10-02", "address": "British Virgin Islands", "label": "High Risk", "script": "latin" }, { "name": "Dinar Gilmutdinov (latin)", "dob": "1969-8-10", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "serafina dávila (latin)", "dob": "1970-05-03", "address": "Nicaragua", "label": "High Risk", "script": "latin" }, { "name": "émilie aubry (latin)", "dob": "1962-05-12", "address": "Territoire britannique de l'océan Indien", "label": "negative", "script": "latin" }, { "name": "Yuri Shvytkin (latin)", "dob": "1965-5-24", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "colette paris (latin)", "dob": "1992-09-11", "address": "Cap Vert", "label": "negative", "script": "latin" }, { "name": "gérard tessier (latin)", "dob": "1945-08-22", "address": "Syrie", "label": "negative", "script": "latin" } ], "query_template": "Generate 15 variations of {name}, ensuring phonetic similarity (30% Light, 40% Medium, 30% Far) and orthographic similarity (20% Light, 60% Medium, 20% Far). Approximately 52% of the total 15 variations should follow these rule-based transformations: Convert name to title case, convert name to upper case, add a space before each letter, remove all vowels, replace 'a' with 'e', replace 'i' with '1'. Additionally, generate variations that: Convert name to initials. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 15, "phonetic_similarity": { "Light": 0.3, "Medium": 0.4, "Far": 0.3 }, "orthographic_similarity": { "Light": 0.2, "Medium": 0.6, "Far": 0.2 }, "rule_based": { "rule_percentage": 52, "selected_rules": [ "shorten_name_to_initials" ], "rule_descriptions": { "shorten_name_to_initials": "Convert name to initials" }, "percentage": 52 } } }, { "seed_identities_with_labels": [ { "name": "marcus johnson (latin)", "dob": "1978-11-09", "address": "Namibia", "label": "High Risk", "script": "latin" }, { "name": "Farhaad DOCKRAT (latin)", "dob": "1959-2-28", "address": "South Africa", "label": "positive", "script": "latin" }, { "name": "abilio tormo (latin)", "dob": "1957-08-28", "address": "Belice", "label": "negative", "script": "latin" }, { "name": "валентин котова (cyrillic)", "dob": "2005-01-07", "address": "Russia", "label": "High Risk", "script": "cyrillic" }, { "name": "Mahmoud Baghlani (latin)", "dob": "1978-3-20", "address": "Iran", "label": "positive", "script": "latin" }, { "name": "alberto tello (latin)", "dob": "1990-11-21", "address": "Liechtenstein", "label": "negative", "script": "latin" }, { "name": "Белан Хамчиев (cyrillic)", "dob": "1960-12-7", "address": "Russia", "label": "positive", "script": "cyrillic" }, { "name": "نسيب آل صفوان (arabic)", "dob": "1953-12-21", "address": "Samoa", "label": "negative", "script": "arabic" }, { "name": "jordi moll (latin)", "dob": "1987-08-11", "address": "República Centroafricana", "label": "negative", "script": "latin" }, { "name": "Aliasghar Norouzi (latin)", "dob": "1962-11-11", "address": "Iran", "label": "positive", "script": "latin" }, { "name": "kara sanchez (latin)", "dob": "1977-02-27", "address": "Namibia", "label": "High Risk", "script": "latin" }, { "name": "edu carrión (latin)", "dob": "1966-12-31", "address": "Suecia", "label": "negative", "script": "latin" }, { "name": "Touraj Zangeneh (latin)", "dob": "1958-8-5", "address": "Iran", "label": "positive", "script": "latin" }, { "name": "estrella bueno (latin)", "dob": "2000-06-07", "address": "Cuba", "label": "High Risk", "script": "latin" }, { "name": "marta boada (latin)", "dob": "1958-08-09", "address": "Cuba", "label": "High Risk", "script": "latin" } ], "query_template": "Generate 12 variations of {name}, ensuring phonetic similarity (10% Light, 30% Medium, 60% Far) and orthographic similarity (20% Light, 60% Medium, 20% Far). Approximately 34% of the total 12 variations should follow these rule-based transformations: Additionally, generate variations that perform these transformations: Swap adjacent syllables, Convert name to initials, and Use first name initial with last name. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 12, "phonetic_similarity": { "Light": 0.1, "Medium": 0.3, "Far": 0.6 }, "orthographic_similarity": { "Light": 0.2, "Medium": 0.6, "Far": 0.2 }, "rule_based": { "rule_percentage": 34, "selected_rules": [ "swap_adjacent_syllables", "shorten_name_to_initials", "initial_only_first_name" ], "rule_descriptions": { "swap_adjacent_syllables": "Swap adjacent syllables", "shorten_name_to_initials": "Convert name to initials", "initial_only_first_name": "Use first name initial with last name" }, "percentage": 34 } } }, { "seed_identities_with_labels": [ { "name": "Gibran Bassil (latin)", "dob": "1970-6-21", "address": "Lebanon", "label": "positive", "script": "latin" }, { "name": "полина фомичева (cyrillic)", "dob": "2002-10-16", "address": "Belarus", "label": "High Risk", "script": "cyrillic" }, { "name": "dorothée dijoux (latin)", "dob": "1957-03-25", "address": "Democratic Republic of Congo", "label": "High Risk", "script": "latin" }, { "name": "лалко пъков (cyrillic)", "dob": "1945-10-19", "address": "United Kingdom", "label": "negative", "script": "cyrillic" }, { "name": "mara melo (latin)", "dob": "1929-07-24", "address": "Filipinas", "label": "negative", "script": "latin" }, { "name": "Yuri Valyaev (latin)", "dob": "1959-4-18", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "mateus fernandes (latin)", "dob": "1998-06-02", "address": "Angola", "label": "High Risk", "script": "latin" }, { "name": "soraia andrade (latin)", "dob": "1963-07-15", "address": "Somália", "label": "negative", "script": "latin" }, { "name": "Павел Сорокин (cyrillic)", "dob": "1985-8-1", "address": "Russia", "label": "positive", "script": "cyrillic" }, { "name": "isabela branco (latin)", "dob": "1951-11-22", "address": "Quiribáti", "label": "negative", "script": "latin" }, { "name": "Arkady Ponomarev (latin)", "dob": "1956-5-16", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "joseph parker (latin)", "dob": "1990-04-15", "address": "Kenya", "label": "High Risk", "script": "latin" }, { "name": "Omid ABEDSHAHI (latin)", "dob": "1983-1-21", "address": "Iran", "label": "positive", "script": "latin" }, { "name": "jill farmer (latin)", "dob": "1992-06-08", "address": "Kenya", "label": "High Risk", "script": "latin" }, { "name": "kevin carvalho (latin)", "dob": "1930-04-22", "address": "Vanuatu", "label": "negative", "script": "latin" } ], "query_template": "Generate 12 name variations for {name}, ensuring phonetic similarity (20% Light, e.g. {name}y, 60% Medium, e.g. {name}ee, 20% Far, e.g. {name}ey) and orthographic similarity (30% Light, e.g. {nam}e, 40% Medium, e.g. {naye}, 30% Far, e.g. {naie}). Approximately 45% of the total 12 variations should follow these rule-based transformations: Replace spaces with special characters (e.g. {name}s becomes {names}). The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 12, "phonetic_similarity": { "Light": 0.2, "Medium": 0.6, "Far": 0.2 }, "orthographic_similarity": { "Light": 0.3, "Medium": 0.4, "Far": 0.3 }, "rule_based": { "rule_percentage": 45, "selected_rules": [ "replace_spaces_with_random_special_characters" ], "rule_descriptions": { "replace_spaces_with_random_special_characters": "Replace spaces with special characters" }, "percentage": 45 } } }, { "seed_identities_with_labels": [ { "name": "Nikolai Levine (latin)", "dob": "1985-5-21", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "lucas bailly (latin)", "dob": "1931-11-06", "address": "Côte d'Ivoire", "label": "High Risk", "script": "latin" }, { "name": "nathalie gros (latin)", "dob": "1960-09-15", "address": "Singapour", "label": "negative", "script": "latin" }, { "name": "Jihad Kansou (latin)", "dob": "1966-2-10", "address": "Lebanon", "label": "positive", "script": "latin" }, { "name": "خليفة الخالدي (arabic)", "dob": "1954-11-02", "address": "Iraq", "label": "High Risk", "script": "arabic" }, { "name": "альберт опанасенко (cyrillic)", "dob": "1924-12-09", "address": "Equatorial Guinea", "label": "negative", "script": "cyrillic" }, { "name": "Alexander Kurenkov (latin)", "dob": "1972-6-2", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "heather williams (latin)", "dob": "1981-09-26", "address": "Nigeria", "label": "High Risk", "script": "latin" }, { "name": "felix tena (latin)", "dob": "1941-03-19", "address": "Cuba", "label": "High Risk", "script": "latin" }, { "name": "nath gaillard (latin)", "dob": "2002-03-16", "address": "Cameroon", "label": "High Risk", "script": "latin" }, { "name": "madeleine benoit (latin)", "dob": "1970-05-09", "address": "Zambie", "label": "negative", "script": "latin" }, { "name": "odette blot (latin)", "dob": "1951-06-18", "address": "Seychelles", "label": "negative", "script": "latin" }, { "name": "Олег Герасим (cyrillic)", "dob": "1962-3-3", "address": "Belarus", "label": "positive", "script": "cyrillic" }, { "name": "émile huet (latin)", "dob": "1983-09-28", "address": "Liechtenstein", "label": "negative", "script": "latin" }, { "name": "Abdul AL-MAGHREBI (latin)", "dob": "1970-7-1", "address": "Iran", "label": "positive", "script": "latin" } ], "query_template": "Generate 6 name variations for {name}, ensuring phonetic similarity (20% Light, e.g. \"Ameri\" from \"America\", 60% Medium, e.g. \"Amirik\" from \"America\", 20% Far) and orthographic similarity (20% Light, e.g. \"Amrika\" from \"America\", 60% Medium, e.g. \"Americia\", 20% Far). Approximately 47% of the total 6 variations should follow these rule-based transformations: Additionally, generate variations that: Substitute letters with similar sounds, e.g. \"p\" for \"b\", \"c\" for \"s\"; Transpose letters, e.g. \"americA\" from \"America\"; Swap adjacent letters, e.g. \"ameircA\" from \"America\".\n[VALIDATION HINTS]: Apply these rule-based transformations: Swap adjacent syllables. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 6, "phonetic_similarity": { "Light": 0.2, "Medium": 0.6, "Far": 0.2 }, "orthographic_similarity": { "Light": 0.2, "Medium": 0.6, "Far": 0.2 }, "rule_based": { "rule_percentage": 47, "selected_rules": [ "swap_adjacent_syllables" ], "rule_descriptions": { "swap_adjacent_syllables": "Swap adjacent syllables" }, "percentage": 47 } } }, { "seed_identities_with_labels": [ { "name": "Hossein Akbari (latin)", "dob": "1961-6-17", "address": "Iran", "label": "positive", "script": "latin" }, { "name": "lucie moreno (latin)", "dob": "1942-03-31", "address": "Samoa", "label": "negative", "script": "latin" }, { "name": "marianne david (latin)", "dob": "1950-08-26", "address": "Burkina Faso", "label": "High Risk", "script": "latin" }, { "name": "сильвестр власов (cyrillic)", "dob": "1961-04-26", "address": "Belarus", "label": "High Risk", "script": "cyrillic" }, { "name": "Yuriy Gudilin (latin)", "dob": "1983-6-18", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "Рајко Кузмановић (cyrillic)", "dob": "1931-12-1", "address": "Bosnia and Herzegovina", "label": "positive", "script": "cyrillic" }, { "name": "margot bonnin (latin)", "dob": "1938-08-27", "address": "Gibraltar", "label": "negative", "script": "latin" }, { "name": "philippine laporte (latin)", "dob": "1971-01-10", "address": "Honduras", "label": "negative", "script": "latin" }, { "name": "kelly arroyo (latin)", "dob": "1965-09-11", "address": "Nigeria", "label": "High Risk", "script": "latin" }, { "name": "michel robin (latin)", "dob": "1995-04-16", "address": "Brésil", "label": "negative", "script": "latin" }, { "name": "юстим шиян (cyrillic)", "dob": "1927-10-18", "address": "Botswana", "label": "negative", "script": "cyrillic" }, { "name": "Alexander Moor (latin)", "dob": "1974-1-6", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "apolinar cordero (latin)", "dob": "1954-12-04", "address": "Bolivia", "label": "High Risk", "script": "latin" }, { "name": "juan josé enríquez (latin)", "dob": "1961-02-10", "address": "Cuba", "label": "High Risk", "script": "latin" }, { "name": "Airat Gibatdinov (latin)", "dob": "1986-1-16", "address": "Russia", "label": "positive", "script": "latin" } ], "query_template": "Generate exactly 7 execution vectors for each target identity {name}, ensuring phonetic similarity with 20% Light, 60% Medium, and 20% Far transformations, as well as orthographic similarity with 20% Light, 60% Medium, and 20% Far transformations. Approximately 25% of the total 7 variations should follow these rule-based transformations: \nAdditionally, generate variations that Add a title suffix (Jr., PhD, etc.). The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 7, "phonetic_similarity": { "Light": 0.2, "Medium": 0.6, "Far": 0.2 }, "orthographic_similarity": { "Light": 0.2, "Medium": 0.6, "Far": 0.2 }, "rule_based": { "rule_percentage": 25, "selected_rules": [ "add_random_trailing_title" ], "rule_descriptions": { "add_random_trailing_title": "Add a title suffix (Jr., PhD, etc.)" }, "percentage": 25 } } }, { "seed_identities_with_labels": [ { "name": "nicole gilbert (latin)", "dob": "1937-02-22", "address": "Côte d'Ivoire", "label": "High Risk", "script": "latin" }, { "name": "Alexander Malkevich (latin)", "dob": "1975-6-14", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "любосмысл константинов (cyrillic)", "dob": "1944-10-03", "address": "Austria", "label": "negative", "script": "cyrillic" }, { "name": "سوراجو محمد (arabic)", "dob": "1979-7-3", "address": "United Arab Emirates", "label": "positive", "script": "arabic" }, { "name": "Omid Noori (latin)", "dob": "1976-3-12", "address": "Iran", "label": "positive", "script": "latin" }, { "name": "susan launay (latin)", "dob": "1984-02-20", "address": "Sainte Lucie", "label": "negative", "script": "latin" }, { "name": "denis da silva (latin)", "dob": "1991-05-07", "address": "Ouzbékistan", "label": "negative", "script": "latin" }, { "name": "Krym Kazanokov (latin)", "dob": "1962-7-19", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "christophe labbé (latin)", "dob": "1940-05-30", "address": "Monaco", "label": "High Risk", "script": "latin" }, { "name": "andré chrétien (latin)", "dob": "1980-10-16", "address": "Haiti", "label": "High Risk", "script": "latin" }, { "name": "Mikhail Afanasov (latin)", "dob": "1953-6-15", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "لوليا بنو الأحمر (arabic)", "dob": "1960-01-23", "address": "Somalia", "label": "High Risk", "script": "arabic" }, { "name": "isaac royer (latin)", "dob": "1950-11-10", "address": "Belize", "label": "negative", "script": "latin" }, { "name": "charles lacombe (latin)", "dob": "2002-04-15", "address": "Belize", "label": "negative", "script": "latin" }, { "name": "brittany huber (latin)", "dob": "1971-01-30", "address": "South Sudan", "label": "High Risk", "script": "latin" } ], "query_template": "Generate 14 variations of {name}. Ensuring phonetic similarity, create name variations that are Light (30%), Medium (40%), and Far (30) from the actual name. For orthographic similarity, generate variations that are Light (30%), Medium (40%), and Far (30) from the actual spelling.\n\nApproximately 27% of the total 14 variations should follow these rule-based transformations: Additionally, generate variations that perform these transformations on {name}: \n- Swap a random adjacent pair of letters\n- Insert a single random letter into the name. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 14, "phonetic_similarity": { "Light": 0.3, "Medium": 0.4, "Far": 0.3 }, "orthographic_similarity": { "Light": 0.3, "Medium": 0.4, "Far": 0.3 }, "rule_based": { "rule_percentage": 27, "selected_rules": [ "swap_random_letter", "insert_random_letter" ], "rule_descriptions": { "swap_random_letter": "Swap random adjacent letters", "insert_random_letter": "Insert a random letter" }, "percentage": 27 } } }, { "seed_identities_with_labels": [ { "name": "Boris Kovalchuk (latin)", "dob": "1977-12-1", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "бронислав муравьев (cyrillic)", "dob": "1976-10-16", "address": "Luhansk", "label": "High Risk", "script": "cyrillic" }, { "name": "borja bonet (latin)", "dob": "1970-11-24", "address": "Venezuela", "label": "High Risk", "script": "latin" }, { "name": "teodoro salamanca (latin)", "dob": "1993-08-06", "address": "Bahamas", "label": "negative", "script": "latin" }, { "name": "حسین حمصی (arabic)", "dob": "1982-10-27", "address": "Iran", "label": "positive", "script": "arabic" }, { "name": "Zachari KALASHOV (latin)", "dob": "1953-3-20", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "Esteban VALENCIA (latin)", "dob": "1964-12-19", "address": "Mexico", "label": "positive", "script": "latin" }, { "name": "الينا طلوعی (arabic)", "dob": "1992-04-25", "address": "Kuwait", "label": "negative", "script": "arabic" }, { "name": "brunilda carreras (latin)", "dob": "1992-11-13", "address": "Austria", "label": "negative", "script": "latin" }, { "name": "aarón benet (latin)", "dob": "1966-03-03", "address": "Cuba", "label": "High Risk", "script": "latin" }, { "name": "Mohammad Raad (latin)", "dob": "1955-8-28", "address": "Lebanon", "label": "positive", "script": "latin" }, { "name": "margot gonzalez (latin)", "dob": "1967-07-02", "address": "Cameroon", "label": "High Risk", "script": "latin" }, { "name": "cheryl rubio (latin)", "dob": "1960-10-01", "address": "South Sudan", "label": "High Risk", "script": "latin" }, { "name": "teobaldo cordero (latin)", "dob": "1948-05-02", "address": "Uzbekistán", "label": "negative", "script": "latin" }, { "name": "candelaria cases (latin)", "dob": "1972-03-03", "address": "Comoras", "label": "negative", "script": "latin" } ], "query_template": "Generate exactly 7 execution vectors (name variations) for each target identity {name}. Ensure phonetic similarity by generating 30% of the variations as Light, 40% as Medium, and 30% as Far sound-alike names. For orthographic similarity, generate 50% of the variations with visually similar spellings that are Light and 50% with those that are Medium.\n\nApproximately 35% of the total 7 variations should follow these rule-based transformations: Additionally, generate variations that perform these transformations: Add a title prefix (Mr., Dr., etc.), and Replace spaces with special characters. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 7, "phonetic_similarity": { "Light": 0.3, "Medium": 0.4, "Far": 0.3 }, "orthographic_similarity": { "Light": 0.5, "Medium": 0.5 }, "rule_based": { "rule_percentage": 35, "selected_rules": [ "add_random_leading_title", "replace_spaces_with_random_special_characters" ], "rule_descriptions": { "add_random_leading_title": "Add a title prefix (Mr., Dr., etc.)", "replace_spaces_with_random_special_characters": "Replace spaces with special characters" }, "percentage": 35 } } }, { "seed_identities_with_labels": [ { "name": "abilio burgos (latin)", "dob": "1983-07-26", "address": "Nicaragua", "label": "High Risk", "script": "latin" }, { "name": "Александр Богомаз (cyrillic)", "dob": "1961-2-23", "address": "Russia", "label": "positive", "script": "cyrillic" }, { "name": "steven schmidt (latin)", "dob": "1977-11-08", "address": "Mongolia", "label": "negative", "script": "latin" }, { "name": "Yusuf al-Hatum (latin)", "dob": "1966-1-5", "address": "Syria", "label": "positive", "script": "latin" }, { "name": "عاكف اميوني (arabic)", "dob": "1927-09-30", "address": "Sudan", "label": "High Risk", "script": "arabic" }, { "name": "Vyacheslav Rossolay (latin)", "dob": "1981-10-17", "address": "Belarus", "label": "positive", "script": "latin" }, { "name": "Gibran Bassil (latin)", "dob": "1970-6-21", "address": "Lebanon", "label": "positive", "script": "latin" }, { "name": "marcel durand (latin)", "dob": "1950-10-06", "address": "Burkina Faso", "label": "High Risk", "script": "latin" }, { "name": "brian boyd (latin)", "dob": "1993-07-21", "address": "Cambodia", "label": "negative", "script": "latin" }, { "name": "alexis ramirez (latin)", "dob": "1999-12-10", "address": "South Sudan", "label": "High Risk", "script": "latin" }, { "name": "andrée salmon (latin)", "dob": "2007-08-25", "address": "Cameroon", "label": "High Risk", "script": "latin" }, { "name": "adam kent (latin)", "dob": "1950-09-28", "address": "Maldives", "label": "negative", "script": "latin" }, { "name": "Ismatullah Khalozai (latin)", "dob": "1995-1-1", "address": "Afghanistan", "label": "positive", "script": "latin" }, { "name": "тарас ґоляш (cyrillic)", "dob": "1942-12-04", "address": "Trinidad and Tobago", "label": "negative", "script": "cyrillic" }, { "name": "steven guerrero (latin)", "dob": "2003-04-10", "address": "Wallis and Futuna", "label": "negative", "script": "latin" } ], "query_template": "Generate 30 variations of {name}. Ensuring phonetic similarity (10% Light, 30% Medium, 60% Far) and orthographic similarity (100% Medium). Approximately 24% of the total 30 variations should follow these rule-based transformations: Additionally, generate variations that perform these transformations: Replace double letters with a single letter, Remove a random consonant, and Duplicate a random letter.\n[VALIDATION HINTS]: Exact number of variations: 15. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 15, "phonetic_similarity": { "Light": 0.1, "Medium": 0.3, "Far": 0.6 }, "orthographic_similarity": { "Medium": 1.0 }, "rule_based": { "rule_percentage": 24, "selected_rules": [ "replace_double_letters_with_single_letter", "remove_random_consonant", "duplicate_random_letter_as_double_letter" ], "rule_descriptions": { "replace_double_letters_with_single_letter": "Replace double letters with a single letter", "remove_random_consonant": "Remove a random consonant", "duplicate_random_letter_as_double_letter": "Duplicate a random letter" }, "percentage": 24 } } }, { "seed_identities_with_labels": [ { "name": "Sergey Kozlov (latin)", "dob": "1960-5-31", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "édouard lefèvre (latin)", "dob": "1949-10-16", "address": "Mali", "label": "High Risk", "script": "latin" }, { "name": "борис нестерова (cyrillic)", "dob": "1953-01-02", "address": "Equatorial Guinea", "label": "negative", "script": "cyrillic" }, { "name": "thierry bouchet (latin)", "dob": "1951-11-15", "address": "Pérou", "label": "negative", "script": "latin" }, { "name": "marguerite maillard (latin)", "dob": "2006-09-29", "address": "Cameroon", "label": "High Risk", "script": "latin" }, { "name": "stéphanie gallet (latin)", "dob": "1971-04-29", "address": "Papouasie-Nouvelle-Guinée", "label": "negative", "script": "latin" }, { "name": "stéphane lopez (latin)", "dob": "1994-10-16", "address": "Haiti", "label": "High Risk", "script": "latin" }, { "name": "Sergei Kudryashov (latin)", "dob": "1967-7-25", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "مقداد جديس (arabic)", "dob": "1955-04-26", "address": "Iraq", "label": "High Risk", "script": "arabic" }, { "name": "Abu-'Ubaydah Al-Agha (latin)", "dob": "1964-5-2", "address": "Saudi Arabia", "label": "positive", "script": "latin" }, { "name": "Vladimir Resin (latin)", "dob": "1936-2-21", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "Мария Прусакова (cyrillic)", "dob": "1983-9-4", "address": "Russia", "label": "positive", "script": "cyrillic" }, { "name": "rené valette (latin)", "dob": "1957-03-19", "address": "États-Unis", "label": "negative", "script": "latin" }, { "name": "théodore weber (latin)", "dob": "1990-07-07", "address": "Israël", "label": "negative", "script": "latin" }, { "name": "michael garcia (latin)", "dob": "1947-04-09", "address": "Nigeria", "label": "High Risk", "script": "latin" } ], "query_template": "Generate 10 variations of {name}, ensuring phonetic similarity (20% Light, e.g. {name}y, {name}ie) and orthographic similarity (60% Medium, e.g. {name}a, {name}o) and (20% Far, e.g. {name}z, {name}x). Approximately 43% of the total 10 variations should follow these rule-based transformations: Additionally, generate variations that: Abbreviate name parts, e.g. (John Smith -> J.S., J-S).\n[VALIDATION HINTS]: Orthographic similarity: 100% Light. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 10, "phonetic_similarity": { "Light": 0.2, "Medium": 0.6, "Far": 0.2 }, "orthographic_similarity": { "Light": 1.0 }, "rule_based": { "rule_percentage": 43, "selected_rules": [ "shorten_name_to_abbreviations" ], "rule_descriptions": { "shorten_name_to_abbreviations": "Abbreviate name parts" }, "percentage": 43 } } }, { "seed_identities_with_labels": [ { "name": "kelly hernandez (latin)", "dob": "1943-09-06", "address": "Namibia", "label": "High Risk", "script": "latin" }, { "name": "prudencia cuadrado (latin)", "dob": "1944-12-23", "address": "Sudán", "label": "negative", "script": "latin" }, { "name": "Николай Левичев (cyrillic)", "dob": "1953-5-28", "address": "Russia", "label": "positive", "script": "cyrillic" }, { "name": "tania valero (latin)", "dob": "1929-08-01", "address": "Guyana", "label": "negative", "script": "latin" }, { "name": "Oleksandr Melnychuk (latin)", "dob": "1965-1-17", "address": "Ukraine", "label": "positive", "script": "latin" }, { "name": "atilio zamora (latin)", "dob": "1992-02-10", "address": "Venezuela", "label": "High Risk", "script": "latin" }, { "name": "епифан гаврилов (cyrillic)", "dob": "1989-02-09", "address": "Belarus", "label": "High Risk", "script": "cyrillic" }, { "name": "christophe masse (latin)", "dob": "1973-01-11", "address": "Côte d'Ivoire", "label": "High Risk", "script": "latin" }, { "name": "عبد القهّار الصالحي (arabic)", "dob": "1964-01-31", "address": "Northern Mariana Islands", "label": "negative", "script": "arabic" }, { "name": "Oleg Tkach (latin)", "dob": "1967-9-23", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "nilda santana (latin)", "dob": "1995-01-20", "address": "República Democrática del Congo", "label": "negative", "script": "latin" }, { "name": "dorothée sauvage (latin)", "dob": "1977-06-07", "address": "Burkina Faso", "label": "High Risk", "script": "latin" }, { "name": "Yusuf al-Hatum (latin)", "dob": "1966-1-5", "address": "Syria", "label": "positive", "script": "latin" }, { "name": "Oleg Nikolayev (latin)", "dob": "1969-12-10", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "lucía farré (latin)", "dob": "2005-11-22", "address": "Croacia", "label": "negative", "script": "latin" } ], "query_template": "The following name is the seed name to generate variations for: {name}. Generate 8 variations of the name {name}, ensuring phonetic similarity: 10% Light, 30% Medium, 60% Far, and orthographic similarity: 30% Light, 40% Medium, 30% Far, and also include 20% of variations that follow: Additionally, generate variations that perform these transformations: Replace spaces with special characters, Use first name initial with last name, and Add a title suffix (Jr., PhD, etc.). The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 8, "phonetic_similarity": { "Light": 0.1, "Medium": 0.3, "Far": 0.6 }, "orthographic_similarity": { "Light": 0.3, "Medium": 0.4, "Far": 0.3 }, "rule_based": { "rule_percentage": 20, "selected_rules": [ "replace_spaces_with_random_special_characters", "initial_only_first_name", "add_random_trailing_title" ], "rule_descriptions": { "replace_spaces_with_random_special_characters": "Replace spaces with special characters", "initial_only_first_name": "Use first name initial with last name", "add_random_trailing_title": "Add a title suffix (Jr., PhD, etc.)" }, "percentage": 20 } } }, { "seed_identities_with_labels": [ { "name": "jeffrey stevens (latin)", "dob": "1968-02-08", "address": "Nigeria", "label": "High Risk", "script": "latin" }, { "name": "kimberly harris (latin)", "dob": "1946-10-26", "address": "Russian Federation", "label": "negative", "script": "latin" }, { "name": "Kiya Sadeghi (latin)", "dob": "1986-3-21", "address": "Iran", "label": "positive", "script": "latin" }, { "name": "Firuza Kerimova (latin)", "dob": "1967-10-22", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "susan soto (latin)", "dob": "1988-05-06", "address": "Nigeria", "label": "High Risk", "script": "latin" }, { "name": "Михаило Развожаєв (cyrillic)", "dob": "1980-12-30", "address": "Ukraine", "label": "positive", "script": "cyrillic" }, { "name": "amanda sullivan (latin)", "dob": "1935-02-03", "address": "Namibia", "label": "High Risk", "script": "latin" }, { "name": "rebecca logan (latin)", "dob": "1994-08-28", "address": "Barbados", "label": "negative", "script": "latin" }, { "name": "Aleksei Gnedovskii (latin)", "dob": "1964-12-31", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "نشأت بلغازي (arabic)", "dob": "1959-09-28", "address": "Algeria", "label": "High Risk", "script": "arabic" }, { "name": "jessica johnson (latin)", "dob": "2006-09-01", "address": "Lesotho", "label": "negative", "script": "latin" }, { "name": "Abu Arif (latin)", "dob": "1957-10-22", "address": "Saudi Arabia", "label": "positive", "script": "latin" }, { "name": "габи плюцова (cyrillic)", "dob": "2001-04-25", "address": "Belgium", "label": "negative", "script": "cyrillic" }, { "name": "megan smith (latin)", "dob": "1977-07-26", "address": "Saint Helena", "label": "negative", "script": "latin" }, { "name": "benedita torres (latin)", "dob": "2000-06-10", "address": "Angola", "label": "High Risk", "script": "latin" } ], "query_template": "Generate 14 variations of {name}, ensuring phonetic similarity (20% Light, e.g. {name}ly, 60% Medium, e.g. {name}ian, 20% Far, e.g. {name}n) and orthographic similarity (30% Light, e.g. {name}ey, 40% Medium, e.g. {name}in, 30% Far, e.g. {name}on). Approximately 31% of the total 14 variations should follow these rule-based transformations: Additionally, generate variations that perform these transformations: Swap adjacent consonants, and Use first name initial with last name. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 14, "phonetic_similarity": { "Light": 0.2, "Medium": 0.6, "Far": 0.2 }, "orthographic_similarity": { "Light": 0.3, "Medium": 0.4, "Far": 0.3 }, "rule_based": { "rule_percentage": 31, "selected_rules": [ "swap_adjacent_consonants", "initial_only_first_name" ], "rule_descriptions": { "swap_adjacent_consonants": "Swap adjacent consonants", "initial_only_first_name": "Use first name initial with last name" }, "percentage": 31 } } }, { "seed_identities_with_labels": [ { "name": "martin dufour (latin)", "dob": "1951-07-23", "address": "Côte d'Ivoire", "label": "High Risk", "script": "latin" }, { "name": "gérard olivier (latin)", "dob": "1941-12-20", "address": "Gibraltar", "label": "negative", "script": "latin" }, { "name": "Andrei Tikhonov (latin)", "dob": "1966-11-9", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "marine lefèvre (latin)", "dob": "1945-11-24", "address": "Vierges (Îles)", "label": "negative", "script": "latin" }, { "name": "chris conrad (latin)", "dob": "1937-02-01", "address": "Namibia", "label": "High Risk", "script": "latin" }, { "name": "АЛЯКСАНДР ЖАРСКІ (cyrillic)", "dob": "1971-12-14", "address": "Belarus", "label": "positive", "script": "cyrillic" }, { "name": "Dmitry Perminov (latin)", "dob": "1979-4-3", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "Evgeni Chernet (latin)", "dob": "1946-11-18", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "Oleksiy MOZHOVY (latin)", "dob": "1975-4-3", "address": "Ukraine", "label": "positive", "script": "latin" }, { "name": "diane imbert (latin)", "dob": "1957-03-19", "address": "Croatie", "label": "negative", "script": "latin" }, { "name": "natalie bullock (latin)", "dob": "1927-11-11", "address": "British Virgin Islands", "label": "High Risk", "script": "latin" }, { "name": "виктор лапина (cyrillic)", "dob": "1981-09-21", "address": "Russia", "label": "High Risk", "script": "cyrillic" }, { "name": "aimée chevallier (latin)", "dob": "1984-01-20", "address": "Portugal", "label": "negative", "script": "latin" }, { "name": "thomas wilson (latin)", "dob": "1951-09-28", "address": "British Virgin Islands", "label": "High Risk", "script": "latin" }, { "name": "пейко куртажова (cyrillic)", "dob": "1975-02-06", "address": "Luxembourg", "label": "negative", "script": "cyrillic" } ], "query_template": "Generate 15 variations of {name}, ensuring phonetic similarity (30% Light, 40% Medium, 30% Far) and orthographic similarity (30% Light, 40% Medium, 30% Far). Approximately 14% of the total 15 variations should follow these rule-based transformations: Additionally, generate variations that perform these transformations: Abbreviate name parts, Insert a random letter, and Swap adjacent syllables. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 15, "phonetic_similarity": { "Light": 0.3, "Medium": 0.4, "Far": 0.3 }, "orthographic_similarity": { "Light": 0.3, "Medium": 0.4, "Far": 0.3 }, "rule_based": { "rule_percentage": 14, "selected_rules": [ "shorten_name_to_abbreviations", "insert_random_letter", "swap_adjacent_syllables" ], "rule_descriptions": { "shorten_name_to_abbreviations": "Abbreviate name parts", "insert_random_letter": "Insert a random letter", "swap_adjacent_syllables": "Swap adjacent syllables" }, "percentage": 14 } } }, { "seed_identities_with_labels": [ { "name": "Виталий Савельев (cyrillic)", "dob": "1954-1-18", "address": "Russia", "label": "positive", "script": "cyrillic" }, { "name": "Yuri Nabokov (latin)", "dob": "1981-1-1", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "joseph lambert (latin)", "dob": "1929-05-02", "address": "Nigeria", "label": "High Risk", "script": "latin" }, { "name": "benoît bouvier (latin)", "dob": "1943-05-12", "address": "Falkland (Île)", "label": "negative", "script": "latin" }, { "name": "stephanie thornton (latin)", "dob": "1963-07-24", "address": "British Virgin Islands", "label": "High Risk", "script": "latin" }, { "name": "timothée tanguy (latin)", "dob": "1988-07-29", "address": "Japon", "label": "negative", "script": "latin" }, { "name": "alice blanc (latin)", "dob": "1985-04-22", "address": "Azerbaïdjan", "label": "negative", "script": "latin" }, { "name": "alex masse (latin)", "dob": "1991-08-06", "address": "Grenade", "label": "negative", "script": "latin" }, { "name": "Kambiz Rostamian (latin)", "dob": "1960-8-27", "address": "United Arab Emirates", "label": "positive", "script": "latin" }, { "name": "valérie devaux (latin)", "dob": "1995-02-08", "address": "Haiti", "label": "High Risk", "script": "latin" }, { "name": "елизавета мухин (cyrillic)", "dob": "1932-08-08", "address": "Russia", "label": "High Risk", "script": "cyrillic" }, { "name": "nilo cárdenas (latin)", "dob": "1994-07-13", "address": "Nicaragua", "label": "High Risk", "script": "latin" }, { "name": "Qari Amjad (latin)", "dob": "1979-4-17", "address": "Afghanistan", "label": "positive", "script": "latin" }, { "name": "Yuri Karayev (latin)", "dob": "1966-6-21", "address": "Belarus", "label": "positive", "script": "latin" }, { "name": "نشوان صليبا (arabic)", "dob": "1950-05-30", "address": "Congo", "label": "negative", "script": "arabic" } ], "query_template": "The following name is the seed name to generate variations for: {name}. Generate 13 variations of the name {name}, ensuring phonetic similarity: 10% Light, 50% Medium, 40% Far, and orthographic similarity: 20% Light, 60% Medium, 20% Far, and also include 43% of variations that follow: Additionally, generate variations that perform these transformations: Replace spaces with special characters, and Swap random adjacent letters. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 13, "phonetic_similarity": { "Light": 0.1, "Medium": 0.5, "Far": 0.4 }, "orthographic_similarity": { "Light": 0.2, "Medium": 0.6, "Far": 0.2 }, "rule_based": { "rule_percentage": 43, "selected_rules": [ "replace_spaces_with_random_special_characters", "swap_random_letter" ], "rule_descriptions": { "replace_spaces_with_random_special_characters": "Replace spaces with special characters", "swap_random_letter": "Swap random adjacent letters" }, "percentage": 43 } } }, { "seed_identities_with_labels": [ { "name": "Cholung Choe (latin)", "dob": "1973-5-16", "address": "Korea, North", "label": "positive", "script": "latin" }, { "name": "Сергей Тен (cyrillic)", "dob": "1976-8-25", "address": "Russia", "label": "positive", "script": "cyrillic" }, { "name": "benigno navas (latin)", "dob": "1966-12-20", "address": "Palau", "label": "negative", "script": "latin" }, { "name": "леонтий белов (cyrillic)", "dob": "1952-10-03", "address": "Spain", "label": "negative", "script": "cyrillic" }, { "name": "javi segura (latin)", "dob": "1991-03-09", "address": "Bahamas", "label": "negative", "script": "latin" }, { "name": "Olimxon Ismailov (latin)", "dob": "1996-10-4", "address": "Uzbekistan", "label": "positive", "script": "latin" }, { "name": "Katerina Tikhonova (latin)", "dob": "1986-8-31", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "gabrielle torres (latin)", "dob": "1931-05-13", "address": "Central African Republic", "label": "High Risk", "script": "latin" }, { "name": "dominique delorme (latin)", "dob": "1965-08-10", "address": "Central African Republic", "label": "High Risk", "script": "latin" }, { "name": "Yuri Karayev (latin)", "dob": "1966-6-21", "address": "Belarus", "label": "positive", "script": "latin" }, { "name": "celia barreda (latin)", "dob": "1927-11-19", "address": "Djibouti", "label": "negative", "script": "latin" }, { "name": "luce bouchet (latin)", "dob": "1952-11-20", "address": "Central African Republic", "label": "High Risk", "script": "latin" }, { "name": "azucena acedo (latin)", "dob": "1945-06-30", "address": "Sierra Leona", "label": "negative", "script": "latin" }, { "name": "angélica novoa (latin)", "dob": "1929-05-12", "address": "Venezuela", "label": "High Risk", "script": "latin" }, { "name": "олимпиада носов (cyrillic)", "dob": "1963-05-21", "address": "Donetsk", "label": "High Risk", "script": "cyrillic" } ], "query_template": "Generate 12 variations of {name}. Ensuring phonetic similarity (10% Light: {name} with light phonetic alteration, 30% Medium: {name} with medium phonetic alteration, 60% Far: {name} with far phonetic alteration) and orthographic similarity (10% Light: {name} with light orthographic alteration, 50% Medium: {name} with medium orthographic alteration, 40% Far: {name} with far orthographic alteration). Approximately 20% of the total 12 variations should follow these rule-based transformations: Additionally, generate variations that perform these transformations: Remove a random vowel from {name}, Swap two adjacent letters in {name}, and Replace one or more vowels in {name} with different vowels. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 12, "phonetic_similarity": { "Light": 0.1, "Medium": 0.3, "Far": 0.6 }, "orthographic_similarity": { "Light": 0.1, "Medium": 0.5, "Far": 0.4 }, "rule_based": { "rule_percentage": 20, "selected_rules": [ "remove_random_vowel", "swap_random_letter", "replace_random_vowel_with_random_vowel" ], "rule_descriptions": { "remove_random_vowel": "Remove a random vowel", "swap_random_letter": "Swap random adjacent letters", "replace_random_vowel_with_random_vowel": "Replace random vowels with different vowels" }, "percentage": 20 } } }, { "seed_identities_with_labels": [ { "name": "vicenta ferrán (latin)", "dob": "1962-06-20", "address": "Venezuela", "label": "High Risk", "script": "latin" }, { "name": "susan lemonnier (latin)", "dob": "2007-09-28", "address": "Cameroon", "label": "High Risk", "script": "latin" }, { "name": "صخر آل سلطان (arabic)", "dob": "1962-01-22", "address": "Benin", "label": "negative", "script": "arabic" }, { "name": "Артем Кирьянов (cyrillic)", "dob": "1977-1-12", "address": "Russia", "label": "positive", "script": "cyrillic" }, { "name": "théophile merle (latin)", "dob": "2002-09-22", "address": "Gabon", "label": "negative", "script": "latin" }, { "name": "نشأت ازحيمان (arabic)", "dob": "1952-04-09", "address": "Somalia", "label": "High Risk", "script": "arabic" }, { "name": "Gennadii NIKULOV (latin)", "dob": "1967-2-17", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "Vladimir Yakushev (latin)", "dob": "1968-6-14", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "Aleksey Budnev (latin)", "dob": "1964-7-9", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "patrick paul (latin)", "dob": "1958-04-20", "address": "Monaco", "label": "High Risk", "script": "latin" }, { "name": "caroline charrier (latin)", "dob": "1941-11-15", "address": "Australie", "label": "negative", "script": "latin" }, { "name": "gilles michel (latin)", "dob": "1929-04-19", "address": "Chili", "label": "negative", "script": "latin" }, { "name": "constance blot (latin)", "dob": "1938-11-20", "address": "Russie", "label": "negative", "script": "latin" }, { "name": "'Abdullah al-'Anizi (latin)", "dob": "1984-8-2", "address": "Kuwait", "label": "positive", "script": "latin" }, { "name": "simone baron (latin)", "dob": "1971-01-09", "address": "Monaco", "label": "High Risk", "script": "latin" } ], "query_template": "The following name is the seed name to generate variations for: {name}. Generate 15 variations of the name {name}, ensuring phonetic similarity: 50% Light, 50% Medium, and orthographic similarity: 20% Light, 60% Medium, 20% Far, and also include 23% of variations that follow: Additionally, generate variations that: Add a title suffix (Jr., PhD, etc.). The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 15, "phonetic_similarity": { "Light": 0.5, "Medium": 0.5 }, "orthographic_similarity": { "Light": 0.2, "Medium": 0.6, "Far": 0.2 }, "rule_based": { "rule_percentage": 23, "selected_rules": [ "add_random_trailing_title" ], "rule_descriptions": { "add_random_trailing_title": "Add a title suffix (Jr., PhD, etc.)" }, "percentage": 23 } } }, { "seed_identities_with_labels": [ { "name": "madeleine duhamel (latin)", "dob": "1969-05-25", "address": "Cameroon", "label": "High Risk", "script": "latin" }, { "name": "Hyon Jang (latin)", "dob": "1958-2-22", "address": "Syria", "label": "positive", "script": "latin" }, { "name": "Hassan AYACH (latin)", "dob": "1963-5-1", "address": "Lebanon", "label": "positive", "script": "latin" }, { "name": "marta carneiro (latin)", "dob": "1968-08-22", "address": "Angola", "label": "High Risk", "script": "latin" }, { "name": "Олексій Дікій (cyrillic)", "dob": "1974-7-5", "address": "Ukraine", "label": "positive", "script": "cyrillic" }, { "name": "Sergei Kiriyenko (latin)", "dob": "1962-7-26", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "nicole macedo (latin)", "dob": "1926-06-20", "address": "Ruanda", "label": "negative", "script": "latin" }, { "name": "محمدحسين بهمنی (arabic)", "dob": "1964-11-08", "address": "Afghanistan", "label": "High Risk", "script": "arabic" }, { "name": "مهدي دغمش (arabic)", "dob": "1960-02-01", "address": "Panama", "label": "negative", "script": "arabic" }, { "name": "Sergey Lavrov (latin)", "dob": "1950-3-21", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "genoveva vallés (latin)", "dob": "1944-09-28", "address": "Cuba", "label": "High Risk", "script": "latin" }, { "name": "leandro simões (latin)", "dob": "1967-11-16", "address": "Salvador", "label": "negative", "script": "latin" }, { "name": "alexandrie garnier (latin)", "dob": "2005-01-27", "address": "Central African Republic", "label": "High Risk", "script": "latin" }, { "name": "bryan paiva (latin)", "dob": "1984-11-25", "address": "Camarões", "label": "negative", "script": "latin" }, { "name": "diego batista (latin)", "dob": "1966-03-23", "address": "Polónia", "label": "negative", "script": "latin" } ], "query_template": "The following name is the seed name to generate variations for: {name}. Generate 9 variations of the name {name}, ensuring phonetic similarity: 10% Light, 50% Medium, 40% Far, and orthographic similarity: 30% Light, 40% Medium, 30% Far, and also include 12% of variations that follow: Additionally, generate variations that perform these transformations: Insert a random letter, Duplicate a random letter, and Remove a random consonant. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 9, "phonetic_similarity": { "Light": 0.1, "Medium": 0.5, "Far": 0.4 }, "orthographic_similarity": { "Light": 0.3, "Medium": 0.4, "Far": 0.3 }, "rule_based": { "rule_percentage": 12, "selected_rules": [ "insert_random_letter", "duplicate_random_letter_as_double_letter", "remove_random_consonant" ], "rule_descriptions": { "insert_random_letter": "Insert a random letter", "duplicate_random_letter_as_double_letter": "Duplicate a random letter", "remove_random_consonant": "Remove a random consonant" }, "percentage": 12 } } }, { "seed_identities_with_labels": [ { "name": "cécile pires (latin)", "dob": "1932-02-20", "address": "Democratic Republic of Congo", "label": "High Risk", "script": "latin" }, { "name": "Mibrak Yazid (latin)", "dob": "1969-1-1", "address": "Algeria", "label": "positive", "script": "latin" }, { "name": "فتوح صليبا (arabic)", "dob": "1954-05-18", "address": "Austria", "label": "negative", "script": "arabic" }, { "name": "Vadim Shuvalov (latin)", "dob": "1958-2-17", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "claire rivière (latin)", "dob": "1994-11-24", "address": "Malte", "label": "negative", "script": "latin" }, { "name": "Hakid AKMAL (latin)", "dob": "1972-5-15", "address": "Philippines", "label": "positive", "script": "latin" }, { "name": "christelle chartier (latin)", "dob": "1941-10-28", "address": "Democratic Republic of Congo", "label": "High Risk", "script": "latin" }, { "name": "joséphine cordier (latin)", "dob": "1993-01-02", "address": "Andorre", "label": "negative", "script": "latin" }, { "name": "georges martel (latin)", "dob": "1976-03-18", "address": "Democratic Republic of Congo", "label": "High Risk", "script": "latin" }, { "name": "jules schmitt (latin)", "dob": "1936-04-12", "address": "Gabon", "label": "negative", "script": "latin" }, { "name": "родион терентьев (cyrillic)", "dob": "2006-10-27", "address": "Russia", "label": "High Risk", "script": "cyrillic" }, { "name": "denise charrier (latin)", "dob": "1951-01-21", "address": "Allemagne", "label": "negative", "script": "latin" }, { "name": "margaud royer (latin)", "dob": "1966-08-17", "address": "Côte d'Ivoire", "label": "High Risk", "script": "latin" }, { "name": "Андрей Скоч (cyrillic)", "dob": "1966-1-30", "address": "Russia", "label": "positive", "script": "cyrillic" }, { "name": "Nikolai GAICHUK (latin)", "dob": "1973-5-31", "address": "Belarus", "label": "positive", "script": "latin" } ], "query_template": "Generate exactly 6 variations of {name} for phonetic similarity, with 50% Light and 50% Medium variants. Additionally, generate variations that perform these transformations: Replace spaces with special characters, Abbreviate name parts, and Insert a random letter. Approximately 22% of the total 6 variations should follow these rule-based transformations. \n\nFor orthographic similarity (visually similar spellings), generate exactly 6 variations for each target identity with 30% Light, 40% Medium, and 30% Far variants.\n\nEnsure that all listed transformations are represented across the set of rule-based variations. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 6, "phonetic_similarity": { "Light": 0.5, "Medium": 0.5 }, "orthographic_similarity": { "Light": 0.3, "Medium": 0.4, "Far": 0.3 }, "rule_based": { "rule_percentage": 22, "selected_rules": [ "replace_spaces_with_random_special_characters", "shorten_name_to_abbreviations", "insert_random_letter" ], "rule_descriptions": { "replace_spaces_with_random_special_characters": "Replace spaces with special characters", "shorten_name_to_abbreviations": "Abbreviate name parts", "insert_random_letter": "Insert a random letter" }, "percentage": 22 } } }, { "seed_identities_with_labels": [ { "name": "marta nunes (latin)", "dob": "1985-09-13", "address": "México", "label": "negative", "script": "latin" }, { "name": "герга пачаръзка (cyrillic)", "dob": "1997-06-25", "address": "Ghana", "label": "negative", "script": "cyrillic" }, { "name": "Kwo'n-u Han (latin)", "dob": "1962-8-21", "address": "China", "label": "positive", "script": "latin" }, { "name": "santiago assunção (latin)", "dob": "1973-12-27", "address": "Mozambique", "label": "High Risk", "script": "latin" }, { "name": "ismael fernandes (latin)", "dob": "2000-07-15", "address": "Ruanda", "label": "negative", "script": "latin" }, { "name": "بشير منصور (arabic)", "dob": "1980-2-9", "address": "Lebanon", "label": "positive", "script": "arabic" }, { "name": "роман мамонтова (cyrillic)", "dob": "1978-06-19", "address": "Armenia", "label": "High Risk", "script": "cyrillic" }, { "name": "Delcy Rodriguez (latin)", "dob": "1969-5-18", "address": "Venezuela", "label": "positive", "script": "latin" }, { "name": "beatriz brito (latin)", "dob": "1929-05-04", "address": "República Centro-Africana", "label": "negative", "script": "latin" }, { "name": "melissa neto (latin)", "dob": "1944-11-08", "address": "Angola", "label": "High Risk", "script": "latin" }, { "name": "danielle valette (latin)", "dob": "1983-02-19", "address": "Haiti", "label": "High Risk", "script": "latin" }, { "name": "Hossein VAZIRI (latin)", "dob": "1961-3-21", "address": "Malaysia", "label": "positive", "script": "latin" }, { "name": "laura rodriguez (latin)", "dob": "1976-05-28", "address": "British Virgin Islands", "label": "High Risk", "script": "latin" }, { "name": "Omid Noori (latin)", "dob": "1976-3-12", "address": "Iran", "label": "positive", "script": "latin" }, { "name": "lourenço vieira (latin)", "dob": "1961-10-27", "address": "Antígua e Barbuda", "label": "negative", "script": "latin" } ], "query_template": "Generate 7 variations of {name}, ensuring phonetic similarity (20% Light, e.g. {name}ee, {name}y, {name}ie; 60% Medium, e.g. {name}on, {name}en, {name}in; 20% Far, e.g. {name}son, {name}zen) and orthographic similarity (100% Light, e.g. {namE}, {NAme}). Approximately 25% of the total 7 variations should follow these rule-based transformations: Additionally, generate variations that perform these transformations: Abbreviate name parts (e.g. John Smith -> J.S.), and Replace spaces with special characters (e.g. Jane Doe -> Jane_Doe). The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 7, "phonetic_similarity": { "Light": 0.2, "Medium": 0.6, "Far": 0.2 }, "orthographic_similarity": { "Light": 1.0 }, "rule_based": { "rule_percentage": 25, "selected_rules": [ "shorten_name_to_abbreviations", "replace_spaces_with_random_special_characters" ], "rule_descriptions": { "shorten_name_to_abbreviations": "Abbreviate name parts", "replace_spaces_with_random_special_characters": "Replace spaces with special characters" }, "percentage": 25 } } }, { "seed_identities_with_labels": [ { "name": "olivie delaunay (latin)", "dob": "1967-11-27", "address": "Samoa", "label": "negative", "script": "latin" }, { "name": "Алексей Русских (cyrillic)", "dob": "1968-7-17", "address": "Russia", "label": "positive", "script": "cyrillic" }, { "name": "jacqueline merle (latin)", "dob": "1927-06-02", "address": "Trinité et Tobago", "label": "negative", "script": "latin" }, { "name": "Andrey Parshin (latin)", "dob": "1974-2-19", "address": "Belarus", "label": "positive", "script": "latin" }, { "name": "Muhammad Dayri (latin)", "dob": "1961-6-6", "address": "Syria", "label": "positive", "script": "latin" }, { "name": "رزين دغمش (arabic)", "dob": "1958-11-06", "address": "Croatia", "label": "negative", "script": "arabic" }, { "name": "екатерина ефремова (cyrillic)", "dob": "1958-10-06", "address": "Donetsk", "label": "High Risk", "script": "cyrillic" }, { "name": "chad wiley (latin)", "dob": "1978-09-05", "address": "Nigeria", "label": "High Risk", "script": "latin" }, { "name": "vincent munoz (latin)", "dob": "2005-10-14", "address": "Monaco", "label": "High Risk", "script": "latin" }, { "name": "Mukhtar Shah (latin)", "dob": "1939-11-8", "address": "Pakistan", "label": "positive", "script": "latin" }, { "name": "laetitia schneider (latin)", "dob": "1995-04-19", "address": "Mali", "label": "High Risk", "script": "latin" }, { "name": "guillaume gilles (latin)", "dob": "1997-12-26", "address": "Libye", "label": "negative", "script": "latin" }, { "name": "Medny Kadyrova (latin)", "dob": "1978-9-7", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "marthe joseph (latin)", "dob": "1947-09-30", "address": "Mali", "label": "High Risk", "script": "latin" }, { "name": "capucine hamon (latin)", "dob": "1994-08-21", "address": "Polynésie française", "label": "negative", "script": "latin" } ], "query_template": "Generate 10 execution vectors for {name}, ensuring phonetic similarity (30% Light, 40% Medium, 30% Far) and orthographic similarity (70% Light, 30% Medium). Approximately 10% of the total 10 variations should follow these rule-based transformations: \nReplace random vowels with different vowels, and Convert name to initials. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 10, "phonetic_similarity": { "Light": 0.3, "Medium": 0.4, "Far": 0.3 }, "orthographic_similarity": { "Light": 0.7, "Medium": 0.3 }, "rule_based": { "rule_percentage": 10, "selected_rules": [ "replace_random_vowel_with_random_vowel", "shorten_name_to_initials" ], "rule_descriptions": { "replace_random_vowel_with_random_vowel": "Replace random vowels with different vowels", "shorten_name_to_initials": "Convert name to initials" }, "percentage": 10 } } }, { "seed_identities_with_labels": [ { "name": "sophie sauvage (latin)", "dob": "1932-02-12", "address": "Cameroon", "label": "High Risk", "script": "latin" }, { "name": "léon wagner (latin)", "dob": "1986-09-18", "address": "Costa Rica", "label": "negative", "script": "latin" }, { "name": "Aleksander Zhuchkovskiy (latin)", "dob": "1986-9-9", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "Марина Оргеева (cyrillic)", "dob": "1959-9-21", "address": "Russia", "label": "positive", "script": "cyrillic" }, { "name": "julie dalton (latin)", "dob": "1955-05-05", "address": "Kenya", "label": "High Risk", "script": "latin" }, { "name": "hortense marty (latin)", "dob": "1996-10-14", "address": "Namibie", "label": "negative", "script": "latin" }, { "name": "мария константинова (cyrillic)", "dob": "1945-07-30", "address": "Armenia", "label": "High Risk", "script": "cyrillic" }, { "name": "Saad AL-FAGIH (latin)", "dob": "1957-1-31", "address": "United Kingdom", "label": "positive", "script": "latin" }, { "name": "jean becker (latin)", "dob": "1953-08-18", "address": "Zimbabwe", "label": "negative", "script": "latin" }, { "name": "paul andrews (latin)", "dob": "1962-12-13", "address": "Kenya", "label": "High Risk", "script": "latin" }, { "name": "Anna Tausent (latin)", "dob": "1990-1-9", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "مبينا کریمی (arabic)", "dob": "2006-12-24", "address": "Fiji", "label": "negative", "script": "arabic" }, { "name": "éléonore pascal (latin)", "dob": "1935-02-05", "address": "Cameroon", "label": "High Risk", "script": "latin" }, { "name": "Mikhail Shchapov (latin)", "dob": "1975-9-20", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "maryse schneider (latin)", "dob": "1948-05-30", "address": "Indonésie", "label": "negative", "script": "latin" } ], "query_template": "The following name is the seed name to generate variations for: {name}. Generate 11 variations of the name {name}, ensuring phonetic similarity: 100% Far, and orthographic similarity: 20% Light, 60% Medium, 20% Far, and also include 48% of variations that follow: Additionally, generate variations that perform these transformations: Replace spaces with special characters, Replace random vowels with different vowels, and Add a title suffix (Jr., PhD, etc.). The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 11, "phonetic_similarity": { "Far": 1.0 }, "orthographic_similarity": { "Light": 0.2, "Medium": 0.6, "Far": 0.2 }, "rule_based": { "rule_percentage": 48, "selected_rules": [ "replace_spaces_with_random_special_characters", "replace_random_vowel_with_random_vowel", "add_random_trailing_title" ], "rule_descriptions": { "replace_spaces_with_random_special_characters": "Replace spaces with special characters", "replace_random_vowel_with_random_vowel": "Replace random vowels with different vowels", "add_random_trailing_title": "Add a title suffix (Jr., PhD, etc.)" }, "percentage": 48 } } }, { "seed_identities_with_labels": [ { "name": "erica lima (latin)", "dob": "1988-11-22", "address": "Singapura", "label": "negative", "script": "latin" }, { "name": "Hamid Ahmar (latin)", "dob": "1967-1-1", "address": "Turkey", "label": "positive", "script": "latin" }, { "name": "Seyed Ghoreishi (latin)", "dob": "1964-9-27", "address": "Iran", "label": "positive", "script": "latin" }, { "name": "Somboon KRAPOOMPORN (latin)", "dob": "1959-5-6", "address": "Thailand", "label": "positive", "script": "latin" }, { "name": "eva martins (latin)", "dob": "1963-11-05", "address": "Porto Rico", "label": "negative", "script": "latin" }, { "name": "leandro loureiro (latin)", "dob": "1930-06-20", "address": "Turquemenistão", "label": "negative", "script": "latin" }, { "name": "منتصر آل العسكري (arabic)", "dob": "1973-07-06", "address": "American Samoa", "label": "negative", "script": "arabic" }, { "name": "noah barbosa (latin)", "dob": "1988-05-18", "address": "Bósnia e Herzegovina", "label": "negative", "script": "latin" }, { "name": "明明 王 (chinese)", "dob": "1989-3-12", "address": "China", "label": "positive", "script": "chinese" }, { "name": "Saad AL-FAKIH (latin)", "dob": "1957-1-31", "address": "United Kingdom", "label": "positive", "script": "latin" }, { "name": "agathe paris (latin)", "dob": "1958-04-05", "address": "Côte d'Ivoire", "label": "High Risk", "script": "latin" }, { "name": "carminho pinho (latin)", "dob": "1943-04-20", "address": "Mozambique", "label": "High Risk", "script": "latin" }, { "name": "colette evrard (latin)", "dob": "1941-09-15", "address": "Monaco", "label": "High Risk", "script": "latin" }, { "name": "аселина овнарски (cyrillic)", "dob": "1936-09-11", "address": "Bulgaria", "label": "High Risk", "script": "cyrillic" }, { "name": "isabelle michel (latin)", "dob": "1933-02-05", "address": "Democratic Republic of Congo", "label": "High Risk", "script": "latin" } ], "query_template": "Generate exactly 9 execution vectors ({name} variations) for each target identity. Ensure phonetic similarity (100% Medium). For orthographic similarity, implement 20% Light: {name} + slight spelling variation, 60% Medium: {name} with vowel changes, and 20% Far: {name} with significant spelling alteration. Approximately 21% of the total variations should follow these rule-based transformations: Replace random consonants with different consonants: {name} with [random consonant change], Insert a random letter: {name} + [inserted letter], and Replace random vowels with different vowels: {name} with [vowel substitution]. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 9, "phonetic_similarity": { "Medium": 1.0 }, "orthographic_similarity": { "Light": 0.2, "Medium": 0.6, "Far": 0.2 }, "rule_based": { "rule_percentage": 21, "selected_rules": [ "replace_random_consonant_with_random_consonant", "insert_random_letter", "replace_random_vowel_with_random_vowel" ], "rule_descriptions": { "replace_random_consonant_with_random_consonant": "Replace random consonants with different consonants", "insert_random_letter": "Insert a random letter", "replace_random_vowel_with_random_vowel": "Replace random vowels with different vowels" }, "percentage": 21 } } }, { "seed_identities_with_labels": [ { "name": "gerónimo aparicio (latin)", "dob": "1935-03-17", "address": "Australia", "label": "negative", "script": "latin" }, { "name": "marina soler (latin)", "dob": "1948-11-04", "address": "Emiratos Árabes Unidos", "label": "negative", "script": "latin" }, { "name": "رجائي الدباغ (arabic)", "dob": "1940-02-27", "address": "Malta", "label": "negative", "script": "arabic" }, { "name": "Lilia Rotenberg (latin)", "dob": "1978-4-17", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "edward huber (latin)", "dob": "2001-01-25", "address": "South Africa", "label": "High Risk", "script": "latin" }, { "name": "calixto hervia (latin)", "dob": "1940-08-27", "address": "Tonga", "label": "negative", "script": "latin" }, { "name": "كسّاب السادة الراويون (arabic)", "dob": "1963-03-24", "address": "Syria", "label": "High Risk", "script": "arabic" }, { "name": "Kwo'n-u Han (latin)", "dob": "1962-8-21", "address": "China", "label": "positive", "script": "latin" }, { "name": "Геннадий Орденов (cyrillic)", "dob": "1957-9-4", "address": "Russia", "label": "positive", "script": "cyrillic" }, { "name": "Evgeny NOVITSKIY (latin)", "dob": "1957-11-19", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "joel prieto (latin)", "dob": "1928-07-24", "address": "Dominicana", "label": "negative", "script": "latin" }, { "name": "adora segovia (latin)", "dob": "2007-04-26", "address": "Cuba", "label": "High Risk", "script": "latin" }, { "name": "marcelo valentín (latin)", "dob": "1989-04-01", "address": "Cuba", "label": "High Risk", "script": "latin" }, { "name": "mark taylor (latin)", "dob": "1976-03-11", "address": "British Virgin Islands", "label": "High Risk", "script": "latin" }, { "name": "Krym Kazanokov (latin)", "dob": "1962-7-19", "address": "Russia", "label": "positive", "script": "latin" } ], "query_template": "Generate 6 execution vectors for each target identity {name}, ensuring phonetic similarity (10% Light, 30% Medium, 60% Far) and orthographic similarity (50% Light, 50% Medium). Approximately 42% of the total 6 variations should follow these rule-based transformations: Additionally, generate variations that perform these transformations: Swap random adjacent letters, Use first name initial with last name, and Remove a random consonant. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 6, "phonetic_similarity": { "Light": 0.1, "Medium": 0.3, "Far": 0.6 }, "orthographic_similarity": { "Light": 0.5, "Medium": 0.5 }, "rule_based": { "rule_percentage": 42, "selected_rules": [ "swap_random_letter", "initial_only_first_name", "remove_random_consonant" ], "rule_descriptions": { "swap_random_letter": "Swap random adjacent letters", "initial_only_first_name": "Use first name initial with last name", "remove_random_consonant": "Remove a random consonant" }, "percentage": 42 } } }, { "seed_identities_with_labels": [ { "name": "ربى صليبا (arabic)", "dob": "1928-05-09", "address": "Netherlands", "label": "negative", "script": "arabic" }, { "name": "Andrey Lavrishchev (latin)", "dob": "1959-10-12", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "Мария Прусакова (cyrillic)", "dob": "1983-9-4", "address": "Russia", "label": "positive", "script": "cyrillic" }, { "name": "Ilyas SA'B (latin)", "dob": "1961-4-6", "address": "Lebanon", "label": "positive", "script": "latin" }, { "name": "julien da costa (latin)", "dob": "1977-12-24", "address": "Grèce", "label": "negative", "script": "latin" }, { "name": "valerio tapia (latin)", "dob": "1989-05-24", "address": "Nicaragua", "label": "High Risk", "script": "latin" }, { "name": "Valentina Matvienko (latin)", "dob": "1949-4-7", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "حمدان بنو عمرو (arabic)", "dob": "1959-01-14", "address": "Libya", "label": "High Risk", "script": "arabic" }, { "name": "sophie rodriguez (latin)", "dob": "1974-10-31", "address": "Mali", "label": "High Risk", "script": "latin" }, { "name": "arthur petitjean (latin)", "dob": "1935-05-07", "address": "Géorgie", "label": "negative", "script": "latin" }, { "name": "louise benard (latin)", "dob": "1962-05-14", "address": "Liechtenstein", "label": "negative", "script": "latin" }, { "name": "susanne le goff (latin)", "dob": "1976-08-18", "address": "Cameroon", "label": "High Risk", "script": "latin" }, { "name": "dominique voisin (latin)", "dob": "1944-05-29", "address": "Azerbaïdjan", "label": "negative", "script": "latin" }, { "name": "renée bousquet (latin)", "dob": "1967-07-25", "address": "Monaco", "label": "High Risk", "script": "latin" }, { "name": "Victor Vekselberg (latin)", "dob": "1957-4-14", "address": "Russia", "label": "positive", "script": "latin" } ], "query_template": "Generate 11 variations of {name} ensuring phonetic similarity (20% Light, e.g. Ligh{t}, 60% Medium, e.g. Lieht, 20% Far) and orthographic similarity (10% Light, e.g. Nai{m}e, 30% Medium, e.g. Nayme, 60% Far). Approximately 54% of the total 11 variations should follow these rule-based transformations: Additionally, generate variations that: Add a title prefix (Mr., Dr., etc.). The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 11, "phonetic_similarity": { "Light": 0.2, "Medium": 0.6, "Far": 0.2 }, "orthographic_similarity": { "Light": 0.1, "Medium": 0.3, "Far": 0.6 }, "rule_based": { "rule_percentage": 54, "selected_rules": [ "add_random_leading_title" ], "rule_descriptions": { "add_random_leading_title": "Add a title prefix (Mr., Dr., etc.)" }, "percentage": 54 } } }, { "seed_identities_with_labels": [ { "name": "معصومه هومن (arabic)", "dob": "1965-08-09", "address": "Afghanistan", "label": "High Risk", "script": "arabic" }, { "name": "أحمد أبو شلبك (arabic)", "dob": "1951-12-31", "address": "Syrian Arab Republic", "label": "negative", "script": "arabic" }, { "name": "Olga PLAKSINA (latin)", "dob": "1974-3-3", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "Ramon Carrizales (latin)", "dob": "1952-11-8", "address": "Venezuela", "label": "positive", "script": "latin" }, { "name": "joseph marion (latin)", "dob": "1946-02-14", "address": "Haiti", "label": "High Risk", "script": "latin" }, { "name": "paca roselló (latin)", "dob": "1979-11-22", "address": "Indonesia", "label": "negative", "script": "latin" }, { "name": "Yuri Valyaev (latin)", "dob": "1959-4-18", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "stephanie harrington (latin)", "dob": "1928-07-16", "address": "Nigeria", "label": "High Risk", "script": "latin" }, { "name": "Кирилл Селезнёв (cyrillic)", "dob": "1974-4-23", "address": "Russia", "label": "positive", "script": "cyrillic" }, { "name": "sarah ross (latin)", "dob": "1953-07-10", "address": "British Virgin Islands", "label": "High Risk", "script": "latin" }, { "name": "Laith Al-Khazali (latin)", "dob": "1975-10-14", "address": "Iraq", "label": "positive", "script": "latin" }, { "name": "nicolás rubio (latin)", "dob": "1976-01-30", "address": "Noruega", "label": "negative", "script": "latin" }, { "name": "isidora tomé (latin)", "dob": "2002-09-01", "address": "Omán", "label": "negative", "script": "latin" }, { "name": "concha ramón (latin)", "dob": "1999-01-13", "address": "Bélgica", "label": "negative", "script": "latin" }, { "name": "édgar blazquez (latin)", "dob": "1982-03-12", "address": "Venezuela", "label": "High Risk", "script": "latin" } ], "query_template": "Generate 15 variations of {name}, ensuring phonetic similarity (30% Light, e.g. \"Jhon\", 40% Medium, e.g. \"Jon\", 30% Far) and orthographic similarity (10% Light, e.g. \"{name}y\", 30% Medium, e.g. \"{name}ney\", 60% Far). Approximately 58% of the total 15 variations should follow these rule-based transformations: Additionally, generate variations that perform these transformations: Insert a random letter, and Abbreviate name parts. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 15, "phonetic_similarity": { "Light": 0.3, "Medium": 0.4, "Far": 0.3 }, "orthographic_similarity": { "Light": 0.1, "Medium": 0.3, "Far": 0.6 }, "rule_based": { "rule_percentage": 58, "selected_rules": [ "insert_random_letter", "shorten_name_to_abbreviations" ], "rule_descriptions": { "insert_random_letter": "Insert a random letter", "shorten_name_to_abbreviations": "Abbreviate name parts" }, "percentage": 58 } } }, { "seed_identities_with_labels": [ { "name": "pénélope breton (latin)", "dob": "1991-10-25", "address": "Côte d'Ivoire", "label": "High Risk", "script": "latin" }, { "name": "thibaut maillard (latin)", "dob": "1969-12-20", "address": "Canada", "label": "negative", "script": "latin" }, { "name": "hortense valentin (latin)", "dob": "1928-01-26", "address": "Monaco", "label": "High Risk", "script": "latin" }, { "name": "martine lagarde (latin)", "dob": "1937-07-17", "address": "Panama", "label": "negative", "script": "latin" }, { "name": "роман куликова (cyrillic)", "dob": "1986-04-20", "address": "Costa Rica", "label": "negative", "script": "cyrillic" }, { "name": "АЛЯКСАНДР ЖАРСКІ (cyrillic)", "dob": "1971-12-14", "address": "Belarus", "label": "positive", "script": "cyrillic" }, { "name": "здравчо бобев (cyrillic)", "dob": "1929-09-14", "address": "Bulgaria", "label": "High Risk", "script": "cyrillic" }, { "name": "tammy abbott (latin)", "dob": "1940-12-11", "address": "Namibia", "label": "High Risk", "script": "latin" }, { "name": "Dmitry Pleshevskiy (latin)", "dob": "1992-7-30", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "william benoit (latin)", "dob": "1996-04-27", "address": "Saint Pierre et Miquelon", "label": "negative", "script": "latin" }, { "name": "Mehdi Lashgarian (latin)", "dob": "1989-6-2", "address": "Iran", "label": "positive", "script": "latin" }, { "name": "richard jacquet (latin)", "dob": "1985-04-18", "address": "Liban", "label": "negative", "script": "latin" }, { "name": "Vitaly Markelov (latin)", "dob": "1963-8-5", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "Dmitry Ishchenko (latin)", "dob": "1983-1-24", "address": "Belarus", "label": "positive", "script": "latin" }, { "name": "susan guillet (latin)", "dob": "1955-11-28", "address": "Haiti", "label": "High Risk", "script": "latin" } ], "query_template": "Generate exactly 8 execution vectors for {name}, ensuring phonetic similarity with 100% Far for sound-alike names. For orthographic similarity, include 20% Light, 60% Medium, and 20% Far visually similar spellings. Approximately 53% of the total variations should follow these rule-based transformations: Insert a random letter into {name} and Swap adjacent syllables within {name}. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 8, "phonetic_similarity": { "Far": 1.0 }, "orthographic_similarity": { "Light": 0.2, "Medium": 0.6, "Far": 0.2 }, "rule_based": { "rule_percentage": 53, "selected_rules": [ "insert_random_letter", "swap_adjacent_syllables" ], "rule_descriptions": { "insert_random_letter": "Insert a random letter", "swap_adjacent_syllables": "Swap adjacent syllables" }, "percentage": 53 } } }, { "seed_identities_with_labels": [ { "name": "elías franch (latin)", "dob": "2006-06-19", "address": "Singapur", "label": "negative", "script": "latin" }, { "name": "عبد الرّزاق كانو (arabic)", "dob": "1994-08-16", "address": "Chile", "label": "negative", "script": "arabic" }, { "name": "عارف الطحان (arabic)", "dob": "1946-05-28", "address": "Somalia", "label": "High Risk", "script": "arabic" }, { "name": "amador leiva (latin)", "dob": "2004-09-07", "address": "Kiribati", "label": "negative", "script": "latin" }, { "name": "patrick johnson (latin)", "dob": "1986-01-13", "address": "South Sudan", "label": "High Risk", "script": "latin" }, { "name": "manon françois (latin)", "dob": "2003-05-24", "address": "Democratic Republic of Congo", "label": "High Risk", "script": "latin" }, { "name": "françois schneider (latin)", "dob": "1942-06-13", "address": "Central African Republic", "label": "High Risk", "script": "latin" }, { "name": "damián campoy (latin)", "dob": "1997-02-08", "address": "Madagascar", "label": "negative", "script": "latin" }, { "name": "Kamkong WONG (latin)", "dob": "1958-8-8", "address": "China", "label": "positive", "script": "latin" }, { "name": "سلطان اسعد (arabic)", "dob": "1962-10-31", "address": "Lebanon", "label": "positive", "script": "arabic" }, { "name": "encarnación avilés (latin)", "dob": "1999-12-27", "address": "Cuba", "label": "High Risk", "script": "latin" }, { "name": "Mohammad Raad (latin)", "dob": "1955-8-28", "address": "Lebanon", "label": "positive", "script": "latin" }, { "name": "Dmitry Lelikov (latin)", "dob": "1968-5-9", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "Muhammad Dayri (latin)", "dob": "1961-6-6", "address": "Syria", "label": "positive", "script": "latin" }, { "name": "julia tirado (latin)", "dob": "1944-10-27", "address": "Uzbekistán", "label": "negative", "script": "latin" } ], "query_template": "Generate 11 name variations for {name} ensuring phonetic similarity with a far distance of 100% and orthographic similarity with a distribution of 10% Light, 30% Medium, and 60% Far. Approximately 51% of the total 11 variations should follow these rule-based transformations: Remove all spaces from {name}, Swap adjacent syllables in {name}, and Remove a random consonant from {name}. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 11, "phonetic_similarity": { "Far": 1.0 }, "orthographic_similarity": { "Light": 0.1, "Medium": 0.3, "Far": 0.6 }, "rule_based": { "rule_percentage": 51, "selected_rules": [ "remove_all_spaces", "swap_adjacent_syllables", "remove_random_consonant" ], "rule_descriptions": { "remove_all_spaces": "Remove all spaces", "swap_adjacent_syllables": "Swap adjacent syllables", "remove_random_consonant": "Remove a random consonant" }, "percentage": 51 } } }, { "seed_identities_with_labels": [ { "name": "Yongbok Kim (latin)", "dob": "1957-7-27", "address": "Korea, North", "label": "positive", "script": "latin" }, { "name": "michelle rey (latin)", "dob": "1943-04-29", "address": "Central African Republic", "label": "High Risk", "script": "latin" }, { "name": "dorothée diaz (latin)", "dob": "1974-08-02", "address": "Danemark", "label": "negative", "script": "latin" }, { "name": "heather hahn (latin)", "dob": "2000-11-19", "address": "South Africa", "label": "High Risk", "script": "latin" }, { "name": "Ibrahim Jathran (latin)", "dob": "1982-10-29", "address": "Libya", "label": "positive", "script": "latin" }, { "name": "anaïs launay (latin)", "dob": "1941-06-19", "address": "Moldavie", "label": "negative", "script": "latin" }, { "name": "aurore rousseau (latin)", "dob": "1994-07-07", "address": "Sainte Hélène", "label": "negative", "script": "latin" }, { "name": "Anna Tausent (latin)", "dob": "1990-1-9", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "فلاح الكبابيش (arabic)", "dob": "1943-07-07", "address": "Sudan", "label": "High Risk", "script": "arabic" }, { "name": "Mohammed Saeed (latin)", "dob": "1977-12-4", "address": "United Arab Emirates", "label": "positive", "script": "latin" }, { "name": "هناء صليبا (arabic)", "dob": "1989-01-01", "address": "Lithuania", "label": "negative", "script": "arabic" }, { "name": "claude grenier (latin)", "dob": "1930-09-26", "address": "Central African Republic", "label": "High Risk", "script": "latin" }, { "name": "Дзмітрьій Замулевіч (cyrillic)", "dob": "1974-5-7", "address": "Belarus", "label": "positive", "script": "cyrillic" }, { "name": "esperanza pujadas (latin)", "dob": "1954-09-11", "address": "Venezuela", "label": "High Risk", "script": "latin" }, { "name": "georges tessier (latin)", "dob": "1974-07-19", "address": "République centrafricaine", "label": "negative", "script": "latin" } ], "query_template": "Generate 14 variations of {name} for phonetic similarity (50% Light, 50% Medium) and orthographic similarity (100% Medium). Approximately 40% of the total 14 variations should follow these rule-based transformations: Additionally, generate variations that perform these transformations: Swap random adjacent letters with each other, resulting in names such as {name}e, {name}i, or {name}a; Replace all instances of 'A' with 'E', 'I', or 'O'; Replace all instances of 'E' with 'A', 'I', or 'O'; Replace all instances of 'I' with 'A', 'E', or 'O'.\n[VALIDATION HINTS]: Apply these rule-based transformations: Replace random vowels with different vowels. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 14, "phonetic_similarity": { "Light": 0.5, "Medium": 0.5 }, "orthographic_similarity": { "Medium": 1.0 }, "rule_based": { "rule_percentage": 40, "selected_rules": [ "swap_random_letter", "replace_random_vowel_with_random_vowel" ], "rule_descriptions": { "swap_random_letter": "Swap random adjacent letters", "replace_random_vowel_with_random_vowel": "Replace random vowels with different vowels" }, "percentage": 40 } } }, { "seed_identities_with_labels": [ { "name": "caroline berger (latin)", "dob": "1952-04-15", "address": "Panama", "label": "negative", "script": "latin" }, { "name": "Yuri Shvytkin (latin)", "dob": "1965-5-24", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "соломон субботина (cyrillic)", "dob": "1953-10-31", "address": "Luhansk", "label": "High Risk", "script": "cyrillic" }, { "name": "christopher hoffman (latin)", "dob": "1969-11-24", "address": "Nigeria", "label": "High Risk", "script": "latin" }, { "name": "Тина Канделаки (cyrillic)", "dob": "1975-11-10", "address": "Russia", "label": "positive", "script": "cyrillic" }, { "name": "jennifer smith (latin)", "dob": "1986-10-08", "address": "Nigeria", "label": "High Risk", "script": "latin" }, { "name": "jacqueline maréchal (latin)", "dob": "1951-07-06", "address": "Côte d'Ivoire", "label": "High Risk", "script": "latin" }, { "name": "Nasif Barakat (latin)", "dob": "1970-11-30", "address": "Syria", "label": "positive", "script": "latin" }, { "name": "jérôme marie (latin)", "dob": "2007-01-29", "address": "Colombie", "label": "negative", "script": "latin" }, { "name": "ديمه الجفالي (arabic)", "dob": "2004-09-13", "address": "Syrian Arab Republic", "label": "negative", "script": "arabic" }, { "name": "paul voisin (latin)", "dob": "1956-02-29", "address": "Comores", "label": "negative", "script": "latin" }, { "name": "Ahmad Seyedoshohada (latin)", "dob": "1959-4-27", "address": "Iran", "label": "positive", "script": "latin" }, { "name": "dan villanueva (latin)", "dob": "2001-09-13", "address": "Kenya", "label": "High Risk", "script": "latin" }, { "name": "Andrei Siguta (latin)", "dob": "1979-5-5", "address": "Ukraine", "label": "positive", "script": "latin" }, { "name": "louis gomez (latin)", "dob": "1966-01-02", "address": "Suisse", "label": "negative", "script": "latin" } ], "query_template": "The following name is the seed name to generate variations for: {name}. Generate 12 variations of the name {name}, ensuring phonetic similarity: 50% Light, 50% Medium, and orthographic similarity: 30% Light, 40% Medium, 30% Far, and also include 58% of variations that follow: Additionally, generate variations that perform these transformations: Swap random adjacent letters, and Reorder name parts. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 12, "phonetic_similarity": { "Light": 0.5, "Medium": 0.5 }, "orthographic_similarity": { "Light": 0.3, "Medium": 0.4, "Far": 0.3 }, "rule_based": { "rule_percentage": 58, "selected_rules": [ "swap_random_letter", "name_parts_permutations" ], "rule_descriptions": { "swap_random_letter": "Swap random adjacent letters", "name_parts_permutations": "Reorder name parts" }, "percentage": 58 } } }, { "seed_identities_with_labels": [ { "name": "robin smith (latin)", "dob": "1957-02-14", "address": "South Sudan", "label": "High Risk", "script": "latin" }, { "name": "Gibran Bassil (latin)", "dob": "1970-6-21", "address": "Lebanon", "label": "positive", "script": "latin" }, { "name": "daniel shepherd (latin)", "dob": "1966-11-26", "address": "Nigeria", "label": "High Risk", "script": "latin" }, { "name": "sergio castañeda (latin)", "dob": "1977-06-23", "address": "Omán", "label": "negative", "script": "latin" }, { "name": "gabriel nogués (latin)", "dob": "1968-01-03", "address": "Nicaragua", "label": "High Risk", "script": "latin" }, { "name": "jesusa díez (latin)", "dob": "1947-12-16", "address": "Brunei Darussalam", "label": "negative", "script": "latin" }, { "name": "Vladimir Resin (latin)", "dob": "1936-2-21", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "Дмитрий Путилин (cyrillic)", "dob": "1993-4-24", "address": "Russia", "label": "positive", "script": "cyrillic" }, { "name": "joseph allard (latin)", "dob": "1993-07-24", "address": "Mali", "label": "High Risk", "script": "latin" }, { "name": "карчо куртажова (cyrillic)", "dob": "1971-07-11", "address": "Bulgaria", "label": "High Risk", "script": "cyrillic" }, { "name": "майя комарова (cyrillic)", "dob": "1932-07-18", "address": "Guinea", "label": "negative", "script": "cyrillic" }, { "name": "Ho'-kyu Kim (latin)", "dob": "1970-9-15", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "gregorio echeverría (latin)", "dob": "1951-05-19", "address": "Bélgica", "label": "negative", "script": "latin" }, { "name": "ovidio osorio (latin)", "dob": "1988-03-19", "address": "Sudán", "label": "negative", "script": "latin" }, { "name": "Viktor Boyarkin (latin)", "dob": "1958-10-12", "address": "Russia", "label": "positive", "script": "latin" } ], "query_template": "Generate 14 name variations for {name}, ensuring phonetic similarity (20% Light, 60% Medium, 20% Far) and orthographic similarity (10% Light, 30% Medium, 60% Far). Approximately 37% of the total 14 variations should follow these rule-based transformations: Additionally, generate variations that: Remove all spaces. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 14, "phonetic_similarity": { "Light": 0.2, "Medium": 0.6, "Far": 0.2 }, "orthographic_similarity": { "Light": 0.1, "Medium": 0.3, "Far": 0.6 }, "rule_based": { "rule_percentage": 37, "selected_rules": [ "remove_all_spaces" ], "rule_descriptions": { "remove_all_spaces": "Remove all spaces" }, "percentage": 37 } } }, { "seed_identities_with_labels": [ { "name": "luísa santos (latin)", "dob": "1988-02-12", "address": "Irlanda", "label": "negative", "script": "latin" }, { "name": "نازنین زهرا ضابطی (arabic)", "dob": "1961-10-22", "address": "Nauru", "label": "negative", "script": "arabic" }, { "name": "roldán torrijos (latin)", "dob": "1933-04-17", "address": "Venezuela", "label": "High Risk", "script": "latin" }, { "name": "brendan harris (latin)", "dob": "1926-02-15", "address": "South Africa", "label": "High Risk", "script": "latin" }, { "name": "Viktor Ignatov (latin)", "dob": "1968-10-15", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "ema barros (latin)", "dob": "1926-12-13", "address": "Angola", "label": "High Risk", "script": "latin" }, { "name": "Алексей Кузьмичёв (cyrillic)", "dob": "1962-10-15", "address": "France", "label": "positive", "script": "cyrillic" }, { "name": "Valentina Matvienko (latin)", "dob": "1949-4-7", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "márcio brito (latin)", "dob": "2001-07-22", "address": "Catar", "label": "negative", "script": "latin" }, { "name": "Ahmad Kaza'i (latin)", "dob": "1960-3-30", "address": "Iran", "label": "positive", "script": "latin" }, { "name": "david clark (latin)", "dob": "1933-12-19", "address": "British Virgin Islands", "label": "High Risk", "script": "latin" }, { "name": "ثنا رفیعی (arabic)", "dob": "1956-12-10", "address": "Afghanistan", "label": "High Risk", "script": "arabic" }, { "name": "Rafi Udin (latin)", "dob": "1966-6-3", "address": "Syria", "label": "positive", "script": "latin" }, { "name": "violeta azevedo (latin)", "dob": "1948-07-22", "address": "Argélia", "label": "negative", "script": "latin" }, { "name": "ângelo anjos (latin)", "dob": "2002-03-10", "address": "Lesoto", "label": "negative", "script": "latin" } ], "query_template": "Generate 14 variations of {name}, ensuring phonetic similarity (20% Light, 60% Medium, 20% Far) and orthographic similarity (20% Light, 60% Medium, 20% Far). Approximately 52% of the total 14 variations should follow these rule-based transformations: Remove a random vowel. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 14, "phonetic_similarity": { "Light": 0.2, "Medium": 0.6, "Far": 0.2 }, "orthographic_similarity": { "Light": 0.2, "Medium": 0.6, "Far": 0.2 }, "rule_based": { "rule_percentage": 52, "selected_rules": [ "remove_random_vowel" ], "rule_descriptions": { "remove_random_vowel": "Remove a random vowel" }, "percentage": 52 } } }, { "seed_identities_with_labels": [ { "name": "Rayimbek Matraimov (latin)", "dob": "1971-5-3", "address": "Kyrgyzstan", "label": "positive", "script": "latin" }, { "name": "Andrei Khokhlun (latin)", "dob": "1966-12-4", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "lonnie james (latin)", "dob": "1963-03-13", "address": "Jordan", "label": "negative", "script": "latin" }, { "name": "gabriel amaral (latin)", "dob": "1973-07-07", "address": "Angola", "label": "High Risk", "script": "latin" }, { "name": "eric collins (latin)", "dob": "1991-11-20", "address": "Nigeria", "label": "High Risk", "script": "latin" }, { "name": "micael faria (latin)", "dob": "1989-04-02", "address": "Mozambique", "label": "High Risk", "script": "latin" }, { "name": "Vladimir Uyba (latin)", "dob": "1958-10-4", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "Andrei DUBEN (latin)", "dob": "1970-12-12", "address": "Chile", "label": "positive", "script": "latin" }, { "name": "manon schneider (latin)", "dob": "1953-07-03", "address": "Cameroon", "label": "High Risk", "script": "latin" }, { "name": "евпраксия семенов (cyrillic)", "dob": "1929-07-14", "address": "Russia", "label": "High Risk", "script": "cyrillic" }, { "name": "michael rivas (latin)", "dob": "1970-12-12", "address": "Turks and Caicos Islands", "label": "negative", "script": "latin" }, { "name": "Уладзімір Лапыр (cyrillic)", "dob": "1977-8-21", "address": "Belarus", "label": "positive", "script": "cyrillic" }, { "name": "steven miller (latin)", "dob": "1971-08-26", "address": "Brunei Darussalam", "label": "negative", "script": "latin" }, { "name": "василий ильин (cyrillic)", "dob": "1999-07-22", "address": "Kazakhstan", "label": "negative", "script": "cyrillic" }, { "name": "david rich (latin)", "dob": "1937-11-13", "address": "Congo", "label": "negative", "script": "latin" } ], "query_template": "The following name is the seed name to generate variations for: {name}. Generate 14 variations of the name {name}, ensuring phonetic similarity: 30% Light, 40% Medium, 30% Far, and orthographic similarity: 10% Light, 50% Medium, 40% Far, and also include 46% of variations that follow: Additionally, generate variations that perform these transformations: Add a title suffix (Jr., PhD, etc.), and Duplicate a random letter. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 14, "phonetic_similarity": { "Light": 0.3, "Medium": 0.4, "Far": 0.3 }, "orthographic_similarity": { "Light": 0.1, "Medium": 0.5, "Far": 0.4 }, "rule_based": { "rule_percentage": 46, "selected_rules": [ "add_random_trailing_title", "duplicate_random_letter_as_double_letter" ], "rule_descriptions": { "add_random_trailing_title": "Add a title suffix (Jr., PhD, etc.)", "duplicate_random_letter_as_double_letter": "Duplicate a random letter" }, "percentage": 46 } } }, { "seed_identities_with_labels": [ { "name": "يسري البغدادي (arabic)", "dob": "1962-10-09", "address": "Lebanon", "label": "High Risk", "script": "arabic" }, { "name": "Kamkong WONG (latin)", "dob": "1958-8-8", "address": "China", "label": "positive", "script": "latin" }, { "name": "Pye Tayza (latin)", "dob": "1987-1-29", "address": "Burma", "label": "positive", "script": "latin" }, { "name": "marcelino cañizares (latin)", "dob": "1993-04-07", "address": "República Unida de Tanzanía", "label": "negative", "script": "latin" }, { "name": "rosendo taboada (latin)", "dob": "1994-02-12", "address": "Nicaragua", "label": "High Risk", "script": "latin" }, { "name": "家超 李 (chinese)", "dob": "1957-12-7", "address": "Hong Kong", "label": "positive", "script": "chinese" }, { "name": "bienvenida lloret (latin)", "dob": "1964-10-27", "address": "Francia", "label": "negative", "script": "latin" }, { "name": "Viktor Mozhelyansky (latin)", "dob": "1964-5-10", "address": "Ukraine", "label": "positive", "script": "latin" }, { "name": "bernard robin (latin)", "dob": "1984-06-25", "address": "Democratic Republic of Congo", "label": "High Risk", "script": "latin" }, { "name": "brian fernandez (latin)", "dob": "1967-07-09", "address": "South Africa", "label": "High Risk", "script": "latin" }, { "name": "Kambiz Rostamian (latin)", "dob": "1960-8-27", "address": "United Arab Emirates", "label": "positive", "script": "latin" }, { "name": "نصر الدّين الجفالي (arabic)", "dob": "1990-03-04", "address": "Niger", "label": "negative", "script": "arabic" }, { "name": "adriana mas (latin)", "dob": "1947-07-05", "address": "Singapur", "label": "negative", "script": "latin" }, { "name": "álvaro luna (latin)", "dob": "2005-08-23", "address": "Bahrein", "label": "negative", "script": "latin" }, { "name": "celestina marqués (latin)", "dob": "1968-07-09", "address": "Cuba", "label": "High Risk", "script": "latin" } ], "query_template": "Generate 11 name variations for {name}, ensuring phonetic similarity (30% Light, e.g. {name}e, {name}y, etc., 40% Medium, e.g. {name}en, {name}in, etc., 30% Far, e.g. {name}kn, {name}pn) and orthographic similarity (10% Light, e.g. {name}, 30% Medium, e.g. {name}a, {name}e, etc., 60% Far, e.g. {name}x, {name}z). Approximately 56% of the total 11 variations should follow these rule-based transformations: Additionally, generate variations that: Remove a random vowel from {name}. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 11, "phonetic_similarity": { "Light": 0.3, "Medium": 0.4, "Far": 0.3 }, "orthographic_similarity": { "Light": 0.1, "Medium": 0.3, "Far": 0.6 }, "rule_based": { "rule_percentage": 56, "selected_rules": [ "remove_random_vowel" ], "rule_descriptions": { "remove_random_vowel": "Remove a random vowel" }, "percentage": 56 } } }, { "seed_identities_with_labels": [ { "name": "Дмитрий Кузьмин (cyrillic)", "dob": "1975-6-28", "address": "Russia", "label": "positive", "script": "cyrillic" }, { "name": "christopher harris (latin)", "dob": "1927-12-23", "address": "Saint Vincent and the Grenadines", "label": "negative", "script": "latin" }, { "name": "sofía maldonado (latin)", "dob": "1961-08-18", "address": "Bolivia", "label": "High Risk", "script": "latin" }, { "name": "Zelimkhan Mutsoev (latin)", "dob": "1959-10-13", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "christophe le roux (latin)", "dob": "1933-09-09", "address": "Cameroon", "label": "High Risk", "script": "latin" }, { "name": "susan mueller (latin)", "dob": "1983-07-26", "address": "Congo", "label": "negative", "script": "latin" }, { "name": "Mukharbek Barakhoyev (latin)", "dob": "1971-1-4", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "michael castro (latin)", "dob": "1997-10-26", "address": "Bosnia and Herzegovina", "label": "negative", "script": "latin" }, { "name": "Myo Oo (latin)", "dob": "1960-6-23", "address": "Burma", "label": "positive", "script": "latin" }, { "name": "Egor Mozhaev (latin)", "dob": "1982-5-31", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "راجح آل عايض (arabic)", "dob": "1968-07-26", "address": "Slovakia (Slovak Republic)", "label": "negative", "script": "arabic" }, { "name": "destiny mcdonald (latin)", "dob": "1965-04-28", "address": "Brazil", "label": "negative", "script": "latin" }, { "name": "anouk schmitt (latin)", "dob": "1992-03-31", "address": "Monaco", "label": "High Risk", "script": "latin" }, { "name": "айрен плюцова (cyrillic)", "dob": "1999-07-20", "address": "Bulgaria", "label": "High Risk", "script": "cyrillic" }, { "name": "donna dickerson (latin)", "dob": "1962-05-23", "address": "South Sudan", "label": "High Risk", "script": "latin" } ], "query_template": "Generate 11 name variations for {name} ensuring phonetic similarity (100% Medium) and orthographic similarity (100% Medium). Approximately 58% of the total 11 name variations should follow these rule-based transformations: Additionally, generate variations that remove all spaces. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 11, "phonetic_similarity": { "Medium": 1.0 }, "orthographic_similarity": { "Medium": 1.0 }, "rule_based": { "rule_percentage": 58, "selected_rules": [ "remove_all_spaces" ], "rule_descriptions": { "remove_all_spaces": "Remove all spaces" }, "percentage": 58 } } }, { "seed_identities_with_labels": [ { "name": "reinaldo alcántara (latin)", "dob": "1972-03-06", "address": "Afganistán", "label": "negative", "script": "latin" }, { "name": "Tetyana Tumilina (latin)", "dob": "1966-4-18", "address": "Ukraine", "label": "positive", "script": "latin" }, { "name": "rosario noriega (latin)", "dob": "1974-08-24", "address": "Djibouti", "label": "negative", "script": "latin" }, { "name": "russell hoover (latin)", "dob": "1973-01-21", "address": "Namibia", "label": "High Risk", "script": "latin" }, { "name": "مرعي الكبابيش (arabic)", "dob": "1963-03-16", "address": "Algeria", "label": "High Risk", "script": "arabic" }, { "name": "Roman Semenov (latin)", "dob": "1987-11-8", "address": "United Arab Emirates", "label": "positive", "script": "latin" }, { "name": "мадлен пъков (cyrillic)", "dob": "1976-11-14", "address": "Myanmar", "label": "negative", "script": "cyrillic" }, { "name": "Андрей Никипелов (cyrillic)", "dob": "1968-3-7", "address": "Russia", "label": "positive", "script": "cyrillic" }, { "name": "Pavel Akifyev (latin)", "dob": "1985-12-19", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "coral aliaga (latin)", "dob": "1962-04-23", "address": "Turquía", "label": "negative", "script": "latin" }, { "name": "simone martel (latin)", "dob": "1939-05-16", "address": "Central African Republic", "label": "High Risk", "script": "latin" }, { "name": "agnès couturier (latin)", "dob": "1932-08-14", "address": "Monaco", "label": "High Risk", "script": "latin" }, { "name": "arcelia esparza (latin)", "dob": "1946-02-11", "address": "Nicaragua", "label": "High Risk", "script": "latin" }, { "name": "Qari Amjad (latin)", "dob": "1979-4-17", "address": "Afghanistan", "label": "positive", "script": "latin" }, { "name": "cruz mendizábal (latin)", "dob": "1964-01-08", "address": "Congo", "label": "negative", "script": "latin" } ], "query_template": "Generate 6 execution vectors ({name} variations) for each target identity. Ensure phonetic similarity by implementing 100% Light rules. For orthographic similarity, implement 70% of the variations as 70% Light and 30% as Medium. Approximately 30% of the total 6 variations should follow these rule-based transformations: Additionally, generate variations that perform these transformations: Remove a random consonant from {name}, Abbreviate name parts in {name}, and Replace random vowels with different vowels in {name}. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 6, "phonetic_similarity": { "Light": 1.0 }, "orthographic_similarity": { "Light": 0.7, "Medium": 0.3 }, "rule_based": { "rule_percentage": 30, "selected_rules": [ "remove_random_consonant", "shorten_name_to_abbreviations", "replace_random_vowel_with_random_vowel" ], "rule_descriptions": { "remove_random_consonant": "Remove a random consonant", "shorten_name_to_abbreviations": "Abbreviate name parts", "replace_random_vowel_with_random_vowel": "Replace random vowels with different vowels" }, "percentage": 30 } } }, { "seed_identities_with_labels": [ { "name": "barry campbell (latin)", "dob": "1986-07-23", "address": "South Africa", "label": "High Risk", "script": "latin" }, { "name": "Mochtar AKWAN (latin)", "dob": "1946-5-4", "address": "Indonesia", "label": "positive", "script": "latin" }, { "name": "bradley randall (latin)", "dob": "1941-05-04", "address": "Madagascar", "label": "negative", "script": "latin" }, { "name": "mary elliott (latin)", "dob": "1945-03-09", "address": "Papua New Guinea", "label": "negative", "script": "latin" }, { "name": "james cox (latin)", "dob": "1953-05-06", "address": "United Kingdom", "label": "negative", "script": "latin" }, { "name": "timothée regnier (latin)", "dob": "1973-05-29", "address": "Monaco", "label": "High Risk", "script": "latin" }, { "name": "heather brooks (latin)", "dob": "1933-09-05", "address": "Jordan", "label": "negative", "script": "latin" }, { "name": "Андрей Скоч (cyrillic)", "dob": "1966-1-30", "address": "Russia", "label": "positive", "script": "cyrillic" }, { "name": "Hamza Akbar (latin)", "dob": "1998-9-6", "address": "South Africa", "label": "positive", "script": "latin" }, { "name": "викентий родионова (cyrillic)", "dob": "1980-03-16", "address": "Luhansk", "label": "High Risk", "script": "cyrillic" }, { "name": "Oleksiy MOZHOVY (latin)", "dob": "1975-4-3", "address": "Ukraine", "label": "positive", "script": "latin" }, { "name": "Volodymyr Bandura (latin)", "dob": "1990-7-15", "address": "Ukraine", "label": "positive", "script": "latin" }, { "name": "william barbier (latin)", "dob": "2004-07-12", "address": "Mali", "label": "High Risk", "script": "latin" }, { "name": "ruy rubio (latin)", "dob": "1966-06-24", "address": "Bolivia", "label": "High Risk", "script": "latin" }, { "name": "ودود الإغباري (arabic)", "dob": "1973-02-05", "address": "Turks and Caicos Islands", "label": "negative", "script": "arabic" } ], "query_template": "Generate 12 variations of {name}, ensuring phonetic similarity (20% Light, 60% Medium, 20% Far) and orthographic similarity (20% Light, 60% Medium, 20% Far). Approximately 18% of the total 12 variations should follow these rule-based transformations: Replace random vowels with different vowels, Remove a random vowel, and Swap random adjacent letters. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 12, "phonetic_similarity": { "Light": 0.2, "Medium": 0.6, "Far": 0.2 }, "orthographic_similarity": { "Light": 0.2, "Medium": 0.6, "Far": 0.2 }, "rule_based": { "rule_percentage": 18, "selected_rules": [ "replace_random_vowel_with_random_vowel", "remove_random_vowel", "swap_random_letter" ], "rule_descriptions": { "replace_random_vowel_with_random_vowel": "Replace random vowels with different vowels", "remove_random_vowel": "Remove a random vowel", "swap_random_letter": "Swap random adjacent letters" }, "percentage": 18 } } }, { "seed_identities_with_labels": [ { "name": "Amer ALshawa (latin)", "dob": "1964-4-29", "address": "Turkey", "label": "positive", "script": "latin" }, { "name": "pelayo batlle (latin)", "dob": "1949-05-07", "address": "Paraguay", "label": "negative", "script": "latin" }, { "name": "anna joyce (latin)", "dob": "1977-12-24", "address": "Nigeria", "label": "High Risk", "script": "latin" }, { "name": "dorita falcó (latin)", "dob": "1943-10-23", "address": "Grecia", "label": "negative", "script": "latin" }, { "name": "luc andre (latin)", "dob": "1930-02-01", "address": "Cameroon", "label": "High Risk", "script": "latin" }, { "name": "vito gomis (latin)", "dob": "1969-06-15", "address": "Bolivia", "label": "High Risk", "script": "latin" }, { "name": "مُخيمر نجم (arabic)", "dob": "1943-03-28", "address": "Somalia", "label": "High Risk", "script": "arabic" }, { "name": "Olimjon Tohtahunov (latin)", "dob": "1949-12-31", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "Oleg Smolin (latin)", "dob": "1952-2-10", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "Клим Комаров (cyrillic)", "dob": "1996-6-21", "address": "Russia", "label": "positive", "script": "cyrillic" }, { "name": "cándida valenzuela (latin)", "dob": "1968-05-28", "address": "México", "label": "negative", "script": "latin" }, { "name": "manon pierre (latin)", "dob": "1956-07-29", "address": "Cameroon", "label": "High Risk", "script": "latin" }, { "name": "بارعة شاهين (arabic)", "dob": "1927-08-27", "address": "Morocco", "label": "negative", "script": "arabic" }, { "name": "Malek Reuben (latin)", "dob": "1960-1-1", "address": "South Sudan", "label": "positive", "script": "latin" }, { "name": "aurelio peiró (latin)", "dob": "1950-09-18", "address": "Estados Unidos de América", "label": "negative", "script": "latin" } ], "query_template": "The following name is the seed name to generate variations for: {name}. Generate 7 variations of the name {name}, ensuring phonetic similarity: 10% Light, 30% Medium, 60% Far, and orthographic similarity: 20% Light, 60% Medium, 20% Far, and also include 29% of variations that follow: Additionally, generate variations that perform these transformations: Swap adjacent consonants, and Replace random vowels with different vowels. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 7, "phonetic_similarity": { "Light": 0.1, "Medium": 0.3, "Far": 0.6 }, "orthographic_similarity": { "Light": 0.2, "Medium": 0.6, "Far": 0.2 }, "rule_based": { "rule_percentage": 29, "selected_rules": [ "swap_adjacent_consonants", "replace_random_vowel_with_random_vowel" ], "rule_descriptions": { "swap_adjacent_consonants": "Swap adjacent consonants", "replace_random_vowel_with_random_vowel": "Replace random vowels with different vowels" }, "percentage": 29 } } }, { "seed_identities_with_labels": [ { "name": "yves langlois (latin)", "dob": "1992-01-29", "address": "Zaïre", "label": "negative", "script": "latin" }, { "name": "مهنّد بكيل (arabic)", "dob": "1955-03-23", "address": "Lebanon", "label": "High Risk", "script": "arabic" }, { "name": "joan chaves (latin)", "dob": "2003-10-07", "address": "Venezuela", "label": "High Risk", "script": "latin" }, { "name": "Kiya Sadeghi (latin)", "dob": "1986-3-21", "address": "Iran", "label": "positive", "script": "latin" }, { "name": "Дзмітрый Баскаў (cyrillic)", "dob": "1978-8-25", "address": "Belarus", "label": "positive", "script": "cyrillic" }, { "name": "Katerina Pawlowska (latin)", "dob": "1977-3-28", "address": "United Arab Emirates", "label": "positive", "script": "latin" }, { "name": "عصمت أنمار (arabic)", "dob": "1998-03-15", "address": "Moldova", "label": "negative", "script": "arabic" }, { "name": "Lyudmila Zaitseva (latin)", "dob": "1979-7-25", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "bernard gaudin (latin)", "dob": "1953-04-05", "address": "Corée, Sud", "label": "negative", "script": "latin" }, { "name": "jacques rodriguez (latin)", "dob": "1947-06-07", "address": "Jordanie", "label": "negative", "script": "latin" }, { "name": "christine evrard (latin)", "dob": "1974-06-11", "address": "Burkina Faso", "label": "High Risk", "script": "latin" }, { "name": "michelle blin (latin)", "dob": "1967-05-10", "address": "Central African Republic", "label": "High Risk", "script": "latin" }, { "name": "michelle white (latin)", "dob": "1931-03-09", "address": "South Sudan", "label": "High Risk", "script": "latin" }, { "name": "lucy germain (latin)", "dob": "2007-10-02", "address": "Yougoslavie", "label": "negative", "script": "latin" }, { "name": "Yves Demasure (latin)", "dob": "1975-9-11", "address": "United Arab Emirates", "label": "positive", "script": "latin" } ], "query_template": "Generate 8 execution vectors for {name}, ensuring phonetic similarity (100% Light) by applying the following rules: Metaphone Soundex NYSIIS Double Metaphone. Additionally, generate variations that perform these orthographic similarities: \n10% Light: Replace one letter with similar-looking character\n50% Medium: Swap two adjacent letters or swap two non-adjacent letters\n40% Far: Reverse a segment of the string\n\nApproximately 38% of the total variations should follow the rule-based transformations below. These transformations must be represented across the set of rule-based variations:\nRemove all spaces, Duplicate a random letter, and Remove a random vowel. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 8, "phonetic_similarity": { "Light": 1.0 }, "orthographic_similarity": { "Light": 0.1, "Medium": 0.5, "Far": 0.4 }, "rule_based": { "rule_percentage": 38, "selected_rules": [ "remove_all_spaces", "duplicate_random_letter_as_double_letter", "remove_random_vowel" ], "rule_descriptions": { "remove_all_spaces": "Remove all spaces", "duplicate_random_letter_as_double_letter": "Duplicate a random letter", "remove_random_vowel": "Remove a random vowel" }, "percentage": 38 } } }, { "seed_identities_with_labels": [ { "name": "Maxime Mocom (latin)", "dob": "1978-12-30", "address": "Central African Republic", "label": "positive", "script": "latin" }, { "name": "franck pineau (latin)", "dob": "1982-12-26", "address": "Haiti", "label": "High Risk", "script": "latin" }, { "name": "anaïs moulin (latin)", "dob": "1939-10-02", "address": "Laos", "label": "negative", "script": "latin" }, { "name": "andré fournier (latin)", "dob": "1992-04-26", "address": "Côte d'Ivoire", "label": "High Risk", "script": "latin" }, { "name": "philippe michel (latin)", "dob": "1938-11-27", "address": "Paraguay", "label": "negative", "script": "latin" }, { "name": "emmanuelle jacquet (latin)", "dob": "1966-10-15", "address": "Brésil", "label": "negative", "script": "latin" }, { "name": "ظريف الراشد (arabic)", "dob": "1957-09-16", "address": "Moldova", "label": "negative", "script": "arabic" }, { "name": "Yakiv Antonov (latin)", "dob": "1972-11-7", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "Nufail Akbar (latin)", "dob": "1972-3-26", "address": "South Africa", "label": "positive", "script": "latin" }, { "name": "jacques pineau (latin)", "dob": "1945-10-21", "address": "Democratic Republic of Congo", "label": "High Risk", "script": "latin" }, { "name": "édouard marie (latin)", "dob": "1965-07-29", "address": "Niue", "label": "negative", "script": "latin" }, { "name": "Владимир Потанин (cyrillic)", "dob": "1961-1-3", "address": "Russia", "label": "positive", "script": "cyrillic" }, { "name": "благой канчин (cyrillic)", "dob": "1959-02-27", "address": "Bulgaria", "label": "High Risk", "script": "cyrillic" }, { "name": "Javier RIVERA (latin)", "dob": "1972-4-20", "address": "Honduras", "label": "positive", "script": "latin" }, { "name": "colette delannoy (latin)", "dob": "1953-11-29", "address": "Haiti", "label": "High Risk", "script": "latin" } ], "query_template": "The following name is the seed name to generate variations for: {name}. Generate 8 variations of the name {name}, ensuring phonetic similarity: 20% Light, 60% Medium, 20% Far, and orthographic similarity: 10% Light, 30% Medium, 60% Far, and also include 37% of variations that follow: Additionally, generate variations that perform these transformations: Swap adjacent consonants, Swap random adjacent letters, and Duplicate a random letter. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 8, "phonetic_similarity": { "Light": 0.2, "Medium": 0.6, "Far": 0.2 }, "orthographic_similarity": { "Light": 0.1, "Medium": 0.3, "Far": 0.6 }, "rule_based": { "rule_percentage": 37, "selected_rules": [ "swap_adjacent_consonants", "swap_random_letter", "duplicate_random_letter_as_double_letter" ], "rule_descriptions": { "swap_adjacent_consonants": "Swap adjacent consonants", "swap_random_letter": "Swap random adjacent letters", "duplicate_random_letter_as_double_letter": "Duplicate a random letter" }, "percentage": 37 } } }, { "seed_identities_with_labels": [ { "name": "humberto pou (latin)", "dob": "1967-01-07", "address": "Cuba", "label": "High Risk", "script": "latin" }, { "name": "себахтин дришльов (cyrillic)", "dob": "1941-05-05", "address": "Saint Pierre and Miquelon", "label": "negative", "script": "cyrillic" }, { "name": "Ho'-kyu Kim (latin)", "dob": "1970-9-15", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "megan brown (latin)", "dob": "1937-12-06", "address": "Nigeria", "label": "High Risk", "script": "latin" }, { "name": "Abualfazl Nazeri (latin)", "dob": "1969-9-14", "address": "Iran", "label": "positive", "script": "latin" }, { "name": "brian white (latin)", "dob": "1974-02-10", "address": "Indonesia", "label": "negative", "script": "latin" }, { "name": "vincent carroll (latin)", "dob": "1947-09-30", "address": "Paraguay", "label": "negative", "script": "latin" }, { "name": "Адальби Шхагошев (cyrillic)", "dob": "1967-6-6", "address": "Russia", "label": "positive", "script": "cyrillic" }, { "name": "che rosselló (latin)", "dob": "1952-07-01", "address": "Cuba", "label": "High Risk", "script": "latin" }, { "name": "Amjad Sazgar (latin)", "dob": "1979-4-16", "address": "Iran", "label": "positive", "script": "latin" }, { "name": "Vildan Zinnurov (latin)", "dob": "1964-1-30", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "élise gérard (latin)", "dob": "1990-03-16", "address": "Monaco", "label": "High Risk", "script": "latin" }, { "name": "james kelly (latin)", "dob": "1960-05-12", "address": "Belgium", "label": "negative", "script": "latin" }, { "name": "lauren palmer (latin)", "dob": "1927-03-11", "address": "Sweden", "label": "negative", "script": "latin" }, { "name": "мина куликова (cyrillic)", "dob": "1928-06-18", "address": "Belarus", "label": "High Risk", "script": "cyrillic" } ], "query_template": "Generate 11 variations of {name}, ensuring phonetic similarity (20% Light, e.g. {name}L, 60% Medium, e.g. {name}M, 20% Far) and orthographic similarity (50% Light, e.g. {name}a, {name}i, 50% Medium, e.g. {name}e, {name}o). Approximately 5 of the total 11 variations should follow these rule-based transformations: Swap adjacent consonants, e.g. {name}s becomes {name}p; Remove a random consonant, e.g. {name}t -> {name}; and Replace random vowels with different vowels, e.g. {name}a becomes {name}e. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 11, "phonetic_similarity": { "Light": 0.2, "Medium": 0.6, "Far": 0.2 }, "orthographic_similarity": { "Light": 0.5, "Medium": 0.5 }, "rule_based": { "rule_percentage": 50, "selected_rules": [ "swap_adjacent_consonants", "remove_random_consonant", "replace_random_vowel_with_random_vowel" ], "rule_descriptions": { "swap_adjacent_consonants": "Swap adjacent consonants", "remove_random_consonant": "Remove a random consonant", "replace_random_vowel_with_random_vowel": "Replace random vowels with different vowels" }, "percentage": 50 } } }, { "seed_identities_with_labels": [ { "name": "Oleg Smolin (latin)", "dob": "1952-2-10", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "gonçalo gaspar (latin)", "dob": "1990-09-04", "address": "Angola", "label": "High Risk", "script": "latin" }, { "name": "ana ramos (latin)", "dob": "1951-08-08", "address": "Estados Unidos", "label": "negative", "script": "latin" }, { "name": "júlia jesus (latin)", "dob": "1945-11-25", "address": "Angola", "label": "High Risk", "script": "latin" }, { "name": "thibault boutin (latin)", "dob": "1963-08-11", "address": "Cameroon", "label": "High Risk", "script": "latin" }, { "name": "Olexiy KOSTRUBITSKY (latin)", "dob": "1978-8-24", "address": "Ukraine", "label": "positive", "script": "latin" }, { "name": "antónio reis (latin)", "dob": "1962-08-25", "address": "Islândia", "label": "negative", "script": "latin" }, { "name": "حنبل هوازن (arabic)", "dob": "1936-03-25", "address": "Sudan", "label": "High Risk", "script": "arabic" }, { "name": "lucie gallet (latin)", "dob": "1980-02-13", "address": "Haiti", "label": "High Risk", "script": "latin" }, { "name": "Андрей Макаров (cyrillic)", "dob": "1954-7-22", "address": "Russia", "label": "positive", "script": "cyrillic" }, { "name": "Myo'ng-chin Kim (latin)", "dob": "1980-2-18", "address": "China", "label": "positive", "script": "latin" }, { "name": "nuno costa (latin)", "dob": "1999-07-10", "address": "Eritreia", "label": "negative", "script": "latin" }, { "name": "júlia pinto (latin)", "dob": "2002-02-09", "address": "Tunísia", "label": "negative", "script": "latin" }, { "name": "Mahmoud Kzemabad (latin)", "dob": "1965-6-26", "address": "Iran", "label": "positive", "script": "latin" }, { "name": "دانیال هاشمی (arabic)", "dob": "1951-05-11", "address": "United States Virgin Islands", "label": "negative", "script": "arabic" } ], "query_template": "Generate 9 variations of {name}, ensuring phonetic similarity (10% Light, 30% Medium, 60% Far) and orthographic similarity (20% Light, 60% Medium, 20% Far). Approximately 48% of the total 9 variations should follow these rule-based transformations: Additionally, generate variations that: Insert a random letter in position 1, insert a random letter in position 2 through 6, replace a single character with a similar-looking one (e.g. 'o' -> '0'), add a commonly used prefix or suffix, and swap two adjacent letters. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 9, "phonetic_similarity": { "Light": 0.1, "Medium": 0.3, "Far": 0.6 }, "orthographic_similarity": { "Light": 0.2, "Medium": 0.6, "Far": 0.2 }, "rule_based": { "rule_percentage": 48, "selected_rules": [ "insert_random_letter" ], "rule_descriptions": { "insert_random_letter": "Insert a random letter" }, "percentage": 48 } } }, { "seed_identities_with_labels": [ { "name": "kyle parker (latin)", "dob": "1989-01-20", "address": "Czech Republic", "label": "negative", "script": "latin" }, { "name": "lori daugherty (latin)", "dob": "2005-07-24", "address": "British Virgin Islands", "label": "High Risk", "script": "latin" }, { "name": "jafet mata (latin)", "dob": "1957-12-30", "address": "Venezuela", "label": "High Risk", "script": "latin" }, { "name": "jacques riou (latin)", "dob": "1996-07-07", "address": "Cameroon", "label": "High Risk", "script": "latin" }, { "name": "james davis (latin)", "dob": "1993-04-28", "address": "Guinea", "label": "negative", "script": "latin" }, { "name": "marcelle garnier (latin)", "dob": "1936-10-05", "address": "Cameroon", "label": "High Risk", "script": "latin" }, { "name": "هستي زارعی (arabic)", "dob": "1957-03-13", "address": "Jersey", "label": "negative", "script": "arabic" }, { "name": "صدّام أنمار (arabic)", "dob": "1952-09-13", "address": "Iraq", "label": "High Risk", "script": "arabic" }, { "name": "Павел Сорокин (cyrillic)", "dob": "1985-8-1", "address": "Russia", "label": "positive", "script": "cyrillic" }, { "name": "Amir Khamzat (latin)", "dob": "1974-10-22", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "Peter Mbaga (latin)", "dob": "1976-9-25", "address": "South Africa", "label": "positive", "script": "latin" }, { "name": "jade haynes (latin)", "dob": "1927-04-08", "address": "Equatorial Guinea", "label": "negative", "script": "latin" }, { "name": "Denis Gusev (latin)", "dob": "1986-6-10", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "Aleksey Mordashov (latin)", "dob": "1965-9-26", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "stacy hale (latin)", "dob": "1973-10-03", "address": "Reunion", "label": "negative", "script": "latin" } ], "query_template": "The following name is the seed name to generate variations for: {name}. Generate 15 variations of the name {name}, ensuring phonetic similarity: 30% Light, 40% Medium, 30% Far, and orthographic similarity: 20% Light, 60% Medium, 20% Far, and also include 51% of variations that follow: Additionally, generate variations that perform these transformations: Swap adjacent consonants, and Insert a random letter. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 15, "phonetic_similarity": { "Light": 0.3, "Medium": 0.4, "Far": 0.3 }, "orthographic_similarity": { "Light": 0.2, "Medium": 0.6, "Far": 0.2 }, "rule_based": { "rule_percentage": 51, "selected_rules": [ "swap_adjacent_consonants", "insert_random_letter" ], "rule_descriptions": { "swap_adjacent_consonants": "Swap adjacent consonants", "insert_random_letter": "Insert a random letter" }, "percentage": 51 } } }, { "seed_identities_with_labels": [ { "name": "georges hernandez (latin)", "dob": "1963-07-03", "address": "Tchad", "label": "negative", "script": "latin" }, { "name": "sylvie delahaye (latin)", "dob": "1982-09-19", "address": "Samoa", "label": "negative", "script": "latin" }, { "name": "Aleksey Russkikh (latin)", "dob": "1968-7-17", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "Zachari KALASHOV (latin)", "dob": "1953-3-20", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "lucas guérin (latin)", "dob": "2005-02-28", "address": "Argentine", "label": "negative", "script": "latin" }, { "name": "بلسم حجازي (arabic)", "dob": "1956-03-31", "address": "Iraq", "label": "High Risk", "script": "arabic" }, { "name": "Lyudmila Zaitseva (latin)", "dob": "1979-7-25", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "єва радченко (cyrillic)", "dob": "1963-07-27", "address": "France", "label": "negative", "script": "cyrillic" }, { "name": "gilbert parent (latin)", "dob": "2002-04-08", "address": "Burkina Faso", "label": "High Risk", "script": "latin" }, { "name": "Татяна Москалькова (cyrillic)", "dob": "1955-5-30", "address": "Russia", "label": "positive", "script": "cyrillic" }, { "name": "Vladimir Pavlov (latin)", "dob": "1976-6-1", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "chantal maillet (latin)", "dob": "1935-10-12", "address": "Canada", "label": "negative", "script": "latin" }, { "name": "margot michel (latin)", "dob": "1977-09-03", "address": "Central African Republic", "label": "High Risk", "script": "latin" }, { "name": "nicole pruvost (latin)", "dob": "2001-01-10", "address": "Monaco", "label": "High Risk", "script": "latin" }, { "name": "michelle labbé (latin)", "dob": "1925-02-09", "address": "Burkina Faso", "label": "High Risk", "script": "latin" } ], "query_template": "The following name is the seed name to generate variations for: {name}. Generate 9 variations of the name {name}, ensuring phonetic similarity: 50% Light, 50% Medium, and orthographic similarity: 30% Light, 40% Medium, 30% Far, and also include 46% of variations that follow: Additionally, generate variations that perform these transformations: Convert name to initials, Swap adjacent syllables, and Replace spaces with special characters. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 9, "phonetic_similarity": { "Light": 0.5, "Medium": 0.5 }, "orthographic_similarity": { "Light": 0.3, "Medium": 0.4, "Far": 0.3 }, "rule_based": { "rule_percentage": 46, "selected_rules": [ "shorten_name_to_initials", "swap_adjacent_syllables", "replace_spaces_with_random_special_characters" ], "rule_descriptions": { "shorten_name_to_initials": "Convert name to initials", "swap_adjacent_syllables": "Swap adjacent syllables", "replace_spaces_with_random_special_characters": "Replace spaces with special characters" }, "percentage": 46 } } }, { "seed_identities_with_labels": [ { "name": "Nikolay Kosov (latin)", "dob": "1955-6-30", "address": "Hungary", "label": "positive", "script": "latin" }, { "name": "margot legros (latin)", "dob": "1972-05-26", "address": "Groenland", "label": "negative", "script": "latin" }, { "name": "Igor Kuzmenko (latin)", "dob": "1967-11-11", "address": "Belarus", "label": "positive", "script": "latin" }, { "name": "laurent leclerc (latin)", "dob": "1976-03-27", "address": "Bhoutan", "label": "negative", "script": "latin" }, { "name": "Сергей Гордеев (cyrillic)", "dob": "1972-11-22", "address": "Russia", "label": "positive", "script": "cyrillic" }, { "name": "حيدر عامر بن صعصعة (arabic)", "dob": "1962-05-08", "address": "Algeria", "label": "High Risk", "script": "arabic" }, { "name": "michèle bazin (latin)", "dob": "1938-11-28", "address": "Haiti", "label": "High Risk", "script": "latin" }, { "name": "donato morales (latin)", "dob": "1966-04-15", "address": "Venezuela", "label": "High Risk", "script": "latin" }, { "name": "Abdul AL-MAGHREBI (latin)", "dob": "1970-7-1", "address": "Iran", "label": "positive", "script": "latin" }, { "name": "maurice loiseau (latin)", "dob": "2003-06-09", "address": "Érythrée", "label": "negative", "script": "latin" }, { "name": "victoire toussaint (latin)", "dob": "1971-10-08", "address": "Haiti", "label": "High Risk", "script": "latin" }, { "name": "alexandre caron (latin)", "dob": "1936-08-28", "address": "Thailande", "label": "negative", "script": "latin" }, { "name": "طريف آل سلطان (arabic)", "dob": "1930-08-17", "address": "Chile", "label": "negative", "script": "arabic" }, { "name": "Jinghe Lin (latin)", "dob": "1982-12-3", "address": "China", "label": "positive", "script": "latin" }, { "name": "george murray (latin)", "dob": "1962-10-09", "address": "Kenya", "label": "High Risk", "script": "latin" } ], "query_template": "Generate 11 variations of {name}, ensuring phonetic similarity (10% Light, 50% Medium, 40% Far) and orthographic similarity (70% Light, 30% Medium). Approximately 58% of the total 11 variations should follow these rule-based transformations: \nAdditionally, generate variations that: \n- Insert a random letter in the middle. \n- Remove a random letter from the end. \n- Swap first and last name order. \n- Change first name to last name, vice versa. \n- Replace first name with a synonym. \n- Replace last name with a synonym. \n- Append a suffix (-Jr, -Sr, etc.). \n- Remove all punctuation. \n- Replace special characters with spaces. \n- Swap random adjacent letters. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 11, "phonetic_similarity": { "Light": 0.1, "Medium": 0.5, "Far": 0.4 }, "orthographic_similarity": { "Light": 0.7, "Medium": 0.3 }, "rule_based": { "rule_percentage": 58, "selected_rules": [ "swap_random_letter" ], "rule_descriptions": { "swap_random_letter": "Swap random adjacent letters" }, "percentage": 58 } } }, { "seed_identities_with_labels": [ { "name": "marthe gautier (latin)", "dob": "1984-12-11", "address": "Russie", "label": "negative", "script": "latin" }, { "name": "angela yang (latin)", "dob": "1984-04-08", "address": "Kenya", "label": "High Risk", "script": "latin" }, { "name": "david legendre (latin)", "dob": "1946-05-14", "address": "Colombie", "label": "negative", "script": "latin" }, { "name": "brigitte fleury (latin)", "dob": "1997-08-14", "address": "Philippines", "label": "negative", "script": "latin" }, { "name": "franck legros (latin)", "dob": "1960-06-22", "address": "Democratic Republic of Congo", "label": "High Risk", "script": "latin" }, { "name": "édith brunel (latin)", "dob": "1969-01-25", "address": "Burkina Faso", "label": "High Risk", "script": "latin" }, { "name": "Evgeny NOVITSKIY (latin)", "dob": "1957-11-19", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "Aleksander Drozdenko (latin)", "dob": "1964-11-1", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "marie normand (latin)", "dob": "1989-12-26", "address": "Jamaïque", "label": "negative", "script": "latin" }, { "name": "Abdullah Faisal (latin)", "dob": "1963-9-10", "address": "Jamaica", "label": "positive", "script": "latin" }, { "name": "محمدرضا زمانی (arabic)", "dob": "1973-05-30", "address": "Afghanistan", "label": "High Risk", "script": "arabic" }, { "name": "Bernard Mheshe (latin)", "dob": "1974-10-10", "address": "Congo, Democratic Republic of the", "label": "positive", "script": "latin" }, { "name": "véronique arnaud (latin)", "dob": "1930-07-18", "address": "Côte d'Ivoire", "label": "High Risk", "script": "latin" }, { "name": "إباء جرار (arabic)", "dob": "1962-08-07", "address": "United States Virgin Islands", "label": "negative", "script": "arabic" }, { "name": "داني خوري (arabic)", "dob": "1967-5-2", "address": "Lebanon", "label": "positive", "script": "arabic" } ], "query_template": "Generate exactly 8 execution vectors for {name}, ensuring phonetic similarity (100% Medium) and orthographic similarity (30% Light, 40% Medium, 30% Far). Approximately 29% of the total 8 variations should follow these rule-based transformations: Additionally, generate variations that perform these transformations: Duplicate a random letter in {name} to create new names like {name}ee or {name}dd; Remove a random vowel from {name} to create new names like {name}tn or {name}rp; Replace random vowels with different vowels in {name} to create new names like {name}o instead of {name}e. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 8, "phonetic_similarity": { "Medium": 1.0 }, "orthographic_similarity": { "Light": 0.3, "Medium": 0.4, "Far": 0.3 }, "rule_based": { "rule_percentage": 29, "selected_rules": [ "duplicate_random_letter_as_double_letter", "remove_random_vowel", "replace_random_vowel_with_random_vowel" ], "rule_descriptions": { "duplicate_random_letter_as_double_letter": "Duplicate a random letter", "remove_random_vowel": "Remove a random vowel", "replace_random_vowel_with_random_vowel": "Replace random vowels with different vowels" }, "percentage": 29 } } }, { "seed_identities_with_labels": [ { "name": "lori henry (latin)", "dob": "1928-02-08", "address": "Madagascar", "label": "negative", "script": "latin" }, { "name": "grégoire salmon (latin)", "dob": "1984-08-04", "address": "Cameroon", "label": "High Risk", "script": "latin" }, { "name": "бажен макарова (cyrillic)", "dob": "1977-07-02", "address": "Belarus", "label": "High Risk", "script": "cyrillic" }, { "name": "maryse benard (latin)", "dob": "1974-03-04", "address": "Burkina Faso", "label": "High Risk", "script": "latin" }, { "name": "парамон потапова (cyrillic)", "dob": "1950-12-03", "address": "United States Minor Outlying Islands", "label": "negative", "script": "cyrillic" }, { "name": "داني خوري (arabic)", "dob": "1967-5-2", "address": "Lebanon", "label": "positive", "script": "arabic" }, { "name": "sarah cole (latin)", "dob": "1966-02-09", "address": "South Africa", "label": "High Risk", "script": "latin" }, { "name": "erika christensen (latin)", "dob": "1971-06-24", "address": "Namibia", "label": "High Risk", "script": "latin" }, { "name": "Su WEI (latin)", "dob": "1959-12-3", "address": "Hong Kong", "label": "positive", "script": "latin" }, { "name": "donald sawyer (latin)", "dob": "1992-02-21", "address": "Northern Mariana Islands", "label": "negative", "script": "latin" }, { "name": "Mikhail Kizeev (latin)", "dob": "1978-3-31", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "Ho'-kyu Kim (latin)", "dob": "1970-9-15", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "heather glover (latin)", "dob": "1962-10-07", "address": "Guinea-Bissau", "label": "negative", "script": "latin" }, { "name": "robin olson (latin)", "dob": "1981-02-16", "address": "Palestinian Territory", "label": "negative", "script": "latin" }, { "name": "Aleksandra Oksenchuk (latin)", "dob": "1992-10-16", "address": "Belarus", "label": "positive", "script": "latin" } ], "query_template": "Generate 6 variations of {name}, ensuring phonetic similarity (10% Light, 50% Medium, 40% Far) and orthographic similarity (30% Light, 40% Medium, 30% Far). Approximately 58% of the total 6 variations should follow these rule-based transformations: Additionally, generate variations that perform these transformations: Reorder name parts, Swap adjacent consonants, and Swap random adjacent letters. The 58% rule-based variations should include a mix of all listed transformations across the entire set, without individually specifying each transformation for every variation. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 6, "phonetic_similarity": { "Light": 0.1, "Medium": 0.5, "Far": 0.4 }, "orthographic_similarity": { "Light": 0.3, "Medium": 0.4, "Far": 0.3 }, "rule_based": { "rule_percentage": 58, "selected_rules": [ "name_parts_permutations", "swap_adjacent_consonants", "swap_random_letter" ], "rule_descriptions": { "name_parts_permutations": "Reorder name parts", "swap_adjacent_consonants": "Swap adjacent consonants", "swap_random_letter": "Swap random adjacent letters" }, "percentage": 58 } } }, { "seed_identities_with_labels": [ { "name": "manon benard (latin)", "dob": "1976-07-10", "address": "Monaco", "label": "High Risk", "script": "latin" }, { "name": "марангони монтянов (cyrillic)", "dob": "1950-06-16", "address": "Lithuania", "label": "negative", "script": "cyrillic" }, { "name": "إلينا الكبابيش (arabic)", "dob": "1978-01-26", "address": "Iraq", "label": "High Risk", "script": "arabic" }, { "name": "colette garnier (latin)", "dob": "1961-02-26", "address": "Côte d'Ivoire", "label": "High Risk", "script": "latin" }, { "name": "Евгений Колюшин (cyrillic)", "dob": "1947-10-8", "address": "Russia", "label": "positive", "script": "cyrillic" }, { "name": "thibaut raynaud (latin)", "dob": "1947-06-08", "address": "Côte d'Ivoire", "label": "High Risk", "script": "latin" }, { "name": "Sergei Savchenkov (latin)", "dob": "1954-10-1", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "'Adnan Yusuf (latin)", "dob": "1956-6-6", "address": "Lebanon", "label": "positive", "script": "latin" }, { "name": "joaquina trillo (latin)", "dob": "1976-06-19", "address": "Nicaragua", "label": "High Risk", "script": "latin" }, { "name": "Ella Pamfilova (latin)", "dob": "1953-9-12", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "Nikolai GAICHUK (latin)", "dob": "1973-5-31", "address": "Belarus", "label": "positive", "script": "latin" }, { "name": "victoire buisson (latin)", "dob": "1974-02-02", "address": "Dominique", "label": "negative", "script": "latin" }, { "name": "marthe courtois (latin)", "dob": "1961-04-29", "address": "Guyane", "label": "negative", "script": "latin" }, { "name": "anouk lenoir (latin)", "dob": "1999-12-13", "address": "Vatican (Etat du)", "label": "negative", "script": "latin" }, { "name": "gérard delmas (latin)", "dob": "1996-08-25", "address": "Suisse", "label": "negative", "script": "latin" } ], "query_template": "Generate exactly 7 execution vectors for {name}, ensuring phonetic similarity (100% Medium) and orthographic similarity (10% Light, 50% Medium, 40% Far). Approximately 24% of the total variations should follow these rule-based transformations: Additionally, generate variations that perform these transformations: Remove a random consonant from {name}, Replace all spaces in {name} with an empty string, and Convert {name} to its initials. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 7, "phonetic_similarity": { "Medium": 1.0 }, "orthographic_similarity": { "Light": 0.1, "Medium": 0.5, "Far": 0.4 }, "rule_based": { "rule_percentage": 24, "selected_rules": [ "remove_random_consonant", "remove_all_spaces", "shorten_name_to_initials" ], "rule_descriptions": { "remove_random_consonant": "Remove a random consonant", "remove_all_spaces": "Remove all spaces", "shorten_name_to_initials": "Convert name to initials" }, "percentage": 24 } } }, { "seed_identities_with_labels": [ { "name": "Ольга Волкова (cyrillic)", "dob": "1974-10-9", "address": "Ukraine", "label": "positive", "script": "cyrillic" }, { "name": "lucie da costa (latin)", "dob": "1985-01-15", "address": "Burkina Faso", "label": "High Risk", "script": "latin" }, { "name": "твердислав шубина (cyrillic)", "dob": "2005-02-07", "address": "Russia", "label": "High Risk", "script": "cyrillic" }, { "name": "adrienne levy (latin)", "dob": "1936-07-05", "address": "British Virgin Islands", "label": "High Risk", "script": "latin" }, { "name": "Igor KORNET (latin)", "dob": "1973-4-29", "address": "Ukraine", "label": "positive", "script": "latin" }, { "name": "богодар рябовіл (cyrillic)", "dob": "1997-03-10", "address": "Botswana", "label": "negative", "script": "cyrillic" }, { "name": "laura jones (latin)", "dob": "1988-05-13", "address": "India", "label": "negative", "script": "latin" }, { "name": "manon bigot (latin)", "dob": "1999-07-13", "address": "Burkina Faso", "label": "High Risk", "script": "latin" }, { "name": "sydney shepherd (latin)", "dob": "1945-05-31", "address": "Kenya", "label": "High Risk", "script": "latin" }, { "name": "Vladimir Pavlov (latin)", "dob": "1976-6-1", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "Anastasiya Kuznetsova (latin)", "dob": "1970-7-20", "address": "Ukraine", "label": "positive", "script": "latin" }, { "name": "Reuben LAVILLA (latin)", "dob": "1972-10-4", "address": "Philippines", "label": "positive", "script": "latin" }, { "name": "amanda burns (latin)", "dob": "1931-10-26", "address": "Andorra", "label": "negative", "script": "latin" }, { "name": "amanda peterson (latin)", "dob": "1927-01-29", "address": "South Georgia and the South Sandwich Islands", "label": "negative", "script": "latin" }, { "name": "jason klein (latin)", "dob": "1965-03-02", "address": "India", "label": "negative", "script": "latin" } ], "query_template": "The following name is the seed name to generate variations for: {name}. Generate 15 variations of the name {name}, ensuring phonetic similarity: 10% Light, 30% Medium, 60% Far, and orthographic similarity: 20% Light, 60% Medium, 20% Far, and also include 27% of variations that follow: Additionally, generate variations that perform these transformations: Convert name to initials, Add a title suffix (Jr., PhD, etc.), and Swap random adjacent letters. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 15, "phonetic_similarity": { "Light": 0.1, "Medium": 0.3, "Far": 0.6 }, "orthographic_similarity": { "Light": 0.2, "Medium": 0.6, "Far": 0.2 }, "rule_based": { "rule_percentage": 27, "selected_rules": [ "shorten_name_to_initials", "add_random_trailing_title", "swap_random_letter" ], "rule_descriptions": { "shorten_name_to_initials": "Convert name to initials", "add_random_trailing_title": "Add a title suffix (Jr., PhD, etc.)", "swap_random_letter": "Swap random adjacent letters" }, "percentage": 27 } } }, { "seed_identities_with_labels": [ { "name": "Reza Ebadzadeh (latin)", "dob": "1964-6-13", "address": "Iran", "label": "positive", "script": "latin" }, { "name": "arthur blondel (latin)", "dob": "1993-02-11", "address": "Haiti", "label": "High Risk", "script": "latin" }, { "name": "عزّت التركمان (arabic)", "dob": "2007-04-24", "address": "Dominican Republic", "label": "negative", "script": "arabic" }, { "name": "Aleksey Budnev (latin)", "dob": "1964-7-9", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "capucine julien (latin)", "dob": "1926-05-18", "address": "Central African Republic", "label": "High Risk", "script": "latin" }, { "name": "sébastien ferreira (latin)", "dob": "2005-12-19", "address": "Côte d'Ivoire", "label": "High Risk", "script": "latin" }, { "name": "paul fleury (latin)", "dob": "1979-08-02", "address": "Vierges britanniques (Îles)", "label": "negative", "script": "latin" }, { "name": "martha clark (latin)", "dob": "1954-02-07", "address": "British Virgin Islands", "label": "High Risk", "script": "latin" }, { "name": "эрнест воронов (cyrillic)", "dob": "1957-05-21", "address": "Russia", "label": "High Risk", "script": "cyrillic" }, { "name": "alex lamy (latin)", "dob": "1993-07-09", "address": "Îles Mineures Éloignées des États-Unis", "label": "negative", "script": "latin" }, { "name": "Igor Afanasyev (latin)", "dob": "1968-9-7", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "Евгений Колюшин (cyrillic)", "dob": "1947-10-8", "address": "Russia", "label": "positive", "script": "cyrillic" }, { "name": "grégoire mathieu (latin)", "dob": "2003-12-21", "address": "Algérie", "label": "negative", "script": "latin" }, { "name": "andrée fernandez (latin)", "dob": "2002-02-18", "address": "Belize", "label": "negative", "script": "latin" }, { "name": "Ahmed Afraah (latin)", "dob": "1985-8-17", "address": "Maldives", "label": "positive", "script": "latin" } ], "query_template": "Generate execution vectors for {name} ensuring exactly 7 variations:\nPhonetic similarity: \n30% Light variation = {name}\n40% Medium variation = {name1} or {name2} \n30% Far variation = {name3} or {name4} \n\nOrthographic similarity: \n10% Light variation = {name5}\n50% Medium variation = {name6}\n40% Far variation = {name7}\n\nApproximately 40% of the total variations should follow these rule-based transformations:\nAdditionally, generate variations that: Convert name to initials. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 7, "phonetic_similarity": { "Light": 0.3, "Medium": 0.4, "Far": 0.3 }, "orthographic_similarity": { "Light": 0.1, "Medium": 0.5, "Far": 0.4 }, "rule_based": { "rule_percentage": 40, "selected_rules": [ "shorten_name_to_initials" ], "rule_descriptions": { "shorten_name_to_initials": "Convert name to initials" }, "percentage": 40 } } }, { "seed_identities_with_labels": [ { "name": "john salinas (latin)", "dob": "1957-05-05", "address": "Kenya", "label": "High Risk", "script": "latin" }, { "name": "charlotte roche (latin)", "dob": "2005-11-17", "address": "Namibie", "label": "negative", "script": "latin" }, { "name": "Amir Khamzat (latin)", "dob": "1974-10-22", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "نصور جرهم (arabic)", "dob": "1948-12-24", "address": "Lebanon", "label": "High Risk", "script": "arabic" }, { "name": "زهرة البرغوثي (arabic)", "dob": "1954-06-13", "address": "Trinidad and Tobago", "label": "negative", "script": "arabic" }, { "name": "alex martin (latin)", "dob": "2000-02-25", "address": "Lesotho", "label": "negative", "script": "latin" }, { "name": "honoré faure (latin)", "dob": "2007-08-21", "address": "Gibraltar", "label": "negative", "script": "latin" }, { "name": "jesusa soria (latin)", "dob": "1948-10-13", "address": "Cuba", "label": "High Risk", "script": "latin" }, { "name": "Elvis KELJMENDI (latin)", "dob": "1978-5-3", "address": "Kosovo", "label": "positive", "script": "latin" }, { "name": "Saad AL-FAGIH (latin)", "dob": "1957-1-31", "address": "United Kingdom", "label": "positive", "script": "latin" }, { "name": "Olimxon Ismailov (latin)", "dob": "1996-10-4", "address": "Uzbekistan", "label": "positive", "script": "latin" }, { "name": "michelle brunel (latin)", "dob": "1961-11-13", "address": "Cameroon", "label": "High Risk", "script": "latin" }, { "name": "Сергей Носов (cyrillic)", "dob": "1961-2-17", "address": "Russia", "label": "positive", "script": "cyrillic" }, { "name": "daniel voisin (latin)", "dob": "1927-04-13", "address": "Turkménistan", "label": "negative", "script": "latin" }, { "name": "bianca leal (latin)", "dob": "1984-04-08", "address": "Angola", "label": "High Risk", "script": "latin" } ], "query_template": "Generate 6 variations of {name}, ensuring phonetic similarity (10% Light, 50% Medium, 40% Far) and orthographic similarity (100% Medium). Approximately 36% of the total 6 variations should follow these rule-based transformations: Additionally, generate variations that perform these transformations: Remove a random consonant, Remove a random vowel, and Convert name to initials. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 6, "phonetic_similarity": { "Light": 0.1, "Medium": 0.5, "Far": 0.4 }, "orthographic_similarity": { "Medium": 1.0 }, "rule_based": { "rule_percentage": 36, "selected_rules": [ "remove_random_consonant", "remove_random_vowel", "shorten_name_to_initials" ], "rule_descriptions": { "remove_random_consonant": "Remove a random consonant", "remove_random_vowel": "Remove a random vowel", "shorten_name_to_initials": "Convert name to initials" }, "percentage": 36 } } }, { "seed_identities_with_labels": [ { "name": "мартин конопленко (cyrillic)", "dob": "1934-07-19", "address": "Jamaica", "label": "negative", "script": "cyrillic" }, { "name": "hector cabeza (latin)", "dob": "1933-09-09", "address": "Palau", "label": "negative", "script": "latin" }, { "name": "jose romero (latin)", "dob": "1932-06-04", "address": "Cuba", "label": "High Risk", "script": "latin" }, { "name": "Valery Pakhnits (latin)", "dob": "1953-1-22", "address": "Ukraine", "label": "positive", "script": "latin" }, { "name": "Wai-chung Lo (latin)", "dob": "1961-11-19", "address": "Hong Kong", "label": "positive", "script": "latin" }, { "name": "yéssica santiago (latin)", "dob": "1991-07-19", "address": "Pakistán", "label": "negative", "script": "latin" }, { "name": "مسلم بنو هلال (arabic)", "dob": "1969-09-18", "address": "Iraq", "label": "High Risk", "script": "arabic" }, { "name": "Кирилл Селезнёв (cyrillic)", "dob": "1974-4-23", "address": "Russia", "label": "positive", "script": "cyrillic" }, { "name": "Hamza Akbar (latin)", "dob": "1998-9-6", "address": "South Africa", "label": "positive", "script": "latin" }, { "name": "Aleksandr Akimov (latin)", "dob": "1954-11-10", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "manuela diego (latin)", "dob": "1997-12-07", "address": "Dinamarca", "label": "negative", "script": "latin" }, { "name": "asdrubal osorio (latin)", "dob": "1979-09-04", "address": "Cuba", "label": "High Risk", "script": "latin" }, { "name": "michelle berger (latin)", "dob": "1980-04-27", "address": "Kenya", "label": "High Risk", "script": "latin" }, { "name": "rosario gual (latin)", "dob": "1925-10-18", "address": "Canadá", "label": "negative", "script": "latin" }, { "name": "cynthia pena (latin)", "dob": "1927-01-09", "address": "Kenya", "label": "High Risk", "script": "latin" } ], "query_template": "The following name is the seed name to generate variations for: {name}. Generate 15 variations of the name {name}, ensuring phonetic similarity: 100% Medium, and orthographic similarity: 30% Light, 40% Medium, 30% Far, and also include 60% of variations that follow: Additionally, generate variations that perform these transformations: Replace random vowels with different vowels, Replace double letters with a single letter, and Swap random adjacent letters. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 15, "phonetic_similarity": { "Medium": 1.0 }, "orthographic_similarity": { "Light": 0.3, "Medium": 0.4, "Far": 0.3 }, "rule_based": { "rule_percentage": 60, "selected_rules": [ "replace_random_vowel_with_random_vowel", "replace_double_letters_with_single_letter", "swap_random_letter" ], "rule_descriptions": { "replace_random_vowel_with_random_vowel": "Replace random vowels with different vowels", "replace_double_letters_with_single_letter": "Replace double letters with a single letter", "swap_random_letter": "Swap random adjacent letters" }, "percentage": 60 } } }, { "seed_identities_with_labels": [ { "name": "Nikolay Burlyayev (latin)", "dob": "1946-8-3", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "Recep Aydin (latin)", "dob": "1996-11-14", "address": "Turkey", "label": "positive", "script": "latin" }, { "name": "stephen rogers (latin)", "dob": "1935-05-18", "address": "French Polynesia", "label": "negative", "script": "latin" }, { "name": "William ZHOU (latin)", "dob": "1977-3-25", "address": "China", "label": "positive", "script": "latin" }, { "name": "édith leblanc (latin)", "dob": "1974-09-07", "address": "Central African Republic", "label": "High Risk", "script": "latin" }, { "name": "erica phillips (latin)", "dob": "2000-06-25", "address": "Cote d'Ivoire", "label": "negative", "script": "latin" }, { "name": "cody howell (latin)", "dob": "1987-06-04", "address": "Equatorial Guinea", "label": "negative", "script": "latin" }, { "name": "amanda barnes (latin)", "dob": "1946-12-17", "address": "British Virgin Islands", "label": "High Risk", "script": "latin" }, { "name": "صاحب راجح (arabic)", "dob": "1994-03-23", "address": "Bouvet Island (Bouvetoya)", "label": "negative", "script": "arabic" }, { "name": "маргарита щербакова (cyrillic)", "dob": "2002-12-26", "address": "Belarus", "label": "High Risk", "script": "cyrillic" }, { "name": "robert suarez (latin)", "dob": "1942-06-17", "address": "British Virgin Islands", "label": "High Risk", "script": "latin" }, { "name": "thibault lacroix (latin)", "dob": "2003-10-11", "address": "Cameroon", "label": "High Risk", "script": "latin" }, { "name": "april fields (latin)", "dob": "1927-04-29", "address": "Micronesia", "label": "negative", "script": "latin" }, { "name": "Татьяна Томилина (cyrillic)", "dob": "1966-4-18", "address": "Ukraine", "label": "positive", "script": "cyrillic" }, { "name": "Myo Oo (latin)", "dob": "1960-6-23", "address": "Burma", "label": "positive", "script": "latin" } ], "query_template": "Generate 15 variations of {name}, ensuring phonetic similarity (10% Light, 30% Medium, 60% Far) and orthographic similarity (10% Light, 30% Medium, 60% Far). Approximately 52% of the total 15 variations should follow these rule-based transformations: Additionally, generate variations that perform these transformations: Replace spaces with special characters, and Remove all spaces. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 15, "phonetic_similarity": { "Light": 0.1, "Medium": 0.3, "Far": 0.6 }, "orthographic_similarity": { "Light": 0.1, "Medium": 0.3, "Far": 0.6 }, "rule_based": { "rule_percentage": 52, "selected_rules": [ "replace_spaces_with_random_special_characters", "remove_all_spaces" ], "rule_descriptions": { "replace_spaces_with_random_special_characters": "Replace spaces with special characters", "remove_all_spaces": "Remove all spaces" }, "percentage": 52 } } }, { "seed_identities_with_labels": [ { "name": "Aleksandra Oksenchuk (latin)", "dob": "1992-10-16", "address": "Belarus", "label": "positive", "script": "latin" }, { "name": "Kamkong WONG (latin)", "dob": "1958-8-8", "address": "China", "label": "positive", "script": "latin" }, { "name": "christopher thompson (latin)", "dob": "2004-01-15", "address": "Nigeria", "label": "High Risk", "script": "latin" }, { "name": "Theint Htet (latin)", "dob": "1999-5-21", "address": "Burma", "label": "positive", "script": "latin" }, { "name": "مجید مسلط (arabic)", "dob": "1968-10-15", "address": "Iran", "label": "positive", "script": "arabic" }, { "name": "noah guerreiro (latin)", "dob": "1966-06-17", "address": "Angola", "label": "High Risk", "script": "latin" }, { "name": "larissa marques (latin)", "dob": "1935-12-04", "address": "Quirguizistão", "label": "negative", "script": "latin" }, { "name": "sofia reis (latin)", "dob": "1965-03-01", "address": "Wallis e Futuna", "label": "negative", "script": "latin" }, { "name": "beatriz pinheiro (latin)", "dob": "1991-04-19", "address": "Islândia", "label": "negative", "script": "latin" }, { "name": "франц манавски (cyrillic)", "dob": "1968-03-19", "address": "Luxembourg", "label": "negative", "script": "cyrillic" }, { "name": "marine gauthier (latin)", "dob": "2002-11-13", "address": "Monaco", "label": "High Risk", "script": "latin" }, { "name": "нинель сорокина (cyrillic)", "dob": "1976-05-23", "address": "Belarus", "label": "High Risk", "script": "cyrillic" }, { "name": "Gennadii NIKULOV (latin)", "dob": "1967-2-17", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "diego batista (latin)", "dob": "1981-05-10", "address": "Trindade e Tobago", "label": "negative", "script": "latin" }, { "name": "ariana nascimento (latin)", "dob": "1964-09-04", "address": "Angola", "label": "High Risk", "script": "latin" } ], "query_template": "Generate 13 variations of {name}, ensuring phonetic similarity (30% Light, 40% Medium, 30% Far) and orthographic similarity (10% Light, 50% Medium, 40% Far). Approximately 37% of the total 13 variations should follow these rule-based transformations: \nReplace random consonants with different consonants. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 13, "phonetic_similarity": { "Light": 0.3, "Medium": 0.4, "Far": 0.3 }, "orthographic_similarity": { "Light": 0.1, "Medium": 0.5, "Far": 0.4 }, "rule_based": { "rule_percentage": 37, "selected_rules": [ "replace_random_consonant_with_random_consonant" ], "rule_descriptions": { "replace_random_consonant_with_random_consonant": "Replace random consonants with different consonants" }, "percentage": 37 } } }, { "seed_identities_with_labels": [ { "name": "Aegli Tamani-Phella (latin)", "dob": "1978-6-21", "address": "Cyprus", "label": "positive", "script": "latin" }, { "name": "robert tapia (latin)", "dob": "1925-08-24", "address": "Finland", "label": "negative", "script": "latin" }, { "name": "shawn banks (latin)", "dob": "1954-06-27", "address": "Hong Kong", "label": "negative", "script": "latin" }, { "name": "martin maury (latin)", "dob": "1961-08-07", "address": "Democratic Republic of Congo", "label": "High Risk", "script": "latin" }, { "name": "قانت أنمار (arabic)", "dob": "1937-09-16", "address": "Iraq", "label": "High Risk", "script": "arabic" }, { "name": "sharon simmons (latin)", "dob": "1952-09-12", "address": "Madagascar", "label": "negative", "script": "latin" }, { "name": "marcel boulanger (latin)", "dob": "1947-10-20", "address": "Côte d'Ivoire", "label": "High Risk", "script": "latin" }, { "name": "Lucy Miller (latin)", "dob": "1973-7-2", "address": "Thailand", "label": "positive", "script": "latin" }, { "name": "пантелеймон гузенко (cyrillic)", "dob": "1933-06-24", "address": "Egypt", "label": "negative", "script": "cyrillic" }, { "name": "juan chang (latin)", "dob": "2005-11-13", "address": "Kenya", "label": "High Risk", "script": "latin" }, { "name": "Aleksandr Akimov (latin)", "dob": "1954-11-10", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "angie patterson (latin)", "dob": "1968-06-06", "address": "Namibia", "label": "High Risk", "script": "latin" }, { "name": "christie villa (latin)", "dob": "1991-12-05", "address": "Bangladesh", "label": "negative", "script": "latin" }, { "name": "Леонид Пасечник (cyrillic)", "dob": "1970-3-15", "address": "Ukraine", "label": "positive", "script": "cyrillic" }, { "name": "Serhiy Khvoshch (latin)", "dob": "1951-5-31", "address": "Russia", "label": "positive", "script": "latin" } ], "query_template": "Generate exactly 14 execution vectors ({name} variations) for each target identity. Ensure phonetic similarity (sound-alike names) with the following distributions: 10% Light, 30% Medium, and 60% Far. Additionally, ensure orthographic similarity (visually similar spellings) with the following distributions: 50% Light and 50% Medium. Approximately 16% of the total 14 variations should follow these rule-based transformations: Add a title prefix (Mr., Dr., etc.). The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 14, "phonetic_similarity": { "Light": 0.1, "Medium": 0.3, "Far": 0.6 }, "orthographic_similarity": { "Light": 0.5, "Medium": 0.5 }, "rule_based": { "rule_percentage": 16, "selected_rules": [ "add_random_leading_title" ], "rule_descriptions": { "add_random_leading_title": "Add a title prefix (Mr., Dr., etc.)" }, "percentage": 16 } } }, { "seed_identities_with_labels": [ { "name": "Nelli Parutenko (latin)", "dob": "1962-3-21", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "arsenio medina (latin)", "dob": "1992-05-07", "address": "Cabo Verde", "label": "negative", "script": "latin" }, { "name": "Muhammad al-'Anizi (latin)", "dob": "1986-5-26", "address": "Kuwait", "label": "positive", "script": "latin" }, { "name": "emma pinto (latin)", "dob": "1933-11-08", "address": "Mozambique", "label": "High Risk", "script": "latin" }, { "name": "Jinghe Lin (latin)", "dob": "1982-12-3", "address": "China", "label": "positive", "script": "latin" }, { "name": "manuelita bermudez (latin)", "dob": "1993-06-07", "address": "Guyana", "label": "negative", "script": "latin" }, { "name": "teodora ruiz (latin)", "dob": "1932-02-18", "address": "Ucrania", "label": "negative", "script": "latin" }, { "name": "галина богданова (cyrillic)", "dob": "1992-08-16", "address": "Equatorial Guinea", "label": "negative", "script": "cyrillic" }, { "name": "афиноген владимиров (cyrillic)", "dob": "1976-04-06", "address": "Armenia", "label": "High Risk", "script": "cyrillic" }, { "name": "gilles guilbert (latin)", "dob": "1932-12-10", "address": "Mali", "label": "High Risk", "script": "latin" }, { "name": "gervasio sanchez (latin)", "dob": "1939-09-08", "address": "Nicaragua", "label": "High Risk", "script": "latin" }, { "name": "michael garcia (latin)", "dob": "1975-03-30", "address": "South Sudan", "label": "High Risk", "script": "latin" }, { "name": "Ivan Prokopenko (latin)", "dob": "1973-9-28", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "Андрей Паршин (cyrillic)", "dob": "1974-2-19", "address": "Belarus", "label": "positive", "script": "cyrillic" }, { "name": "tere español (latin)", "dob": "2000-07-25", "address": "Grecia", "label": "negative", "script": "latin" } ], "query_template": "Generate 15 variations of {name}. Ensuring phonetic similarity by generating exactly 3 light, 9 medium, and 3 far sound-alike names. Additionally, generate orthographic similarity variations with exactly 10 lightly visually similar spellings and 5 moderately visually similar spellings. Approximately 56% of the total 15 variations should follow these rule-based transformations: Replace double letters with a single letter in 4 variations, replace random consonants with different consonants in 4 variations, and abbreviate name parts in 3 variations, with each transformation represented across the set of rule-based variations.\n[VALIDATION HINTS]: Phonetic similarity: 20% Light.; Orthographic similarity: 70% Light. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 15, "phonetic_similarity": { "Light": 0.2, "Medium": 0.6, "Far": 0.2 }, "orthographic_similarity": { "Light": 0.7, "Medium": 0.3 }, "rule_based": { "rule_percentage": 56, "selected_rules": [ "replace_double_letters_with_single_letter", "replace_random_consonant_with_random_consonant", "shorten_name_to_abbreviations" ], "rule_descriptions": { "replace_double_letters_with_single_letter": "Replace double letters with a single letter", "replace_random_consonant_with_random_consonant": "Replace random consonants with different consonants", "shorten_name_to_abbreviations": "Abbreviate name parts" }, "percentage": 56 } } }, { "seed_identities_with_labels": [ { "name": "adriana faria (latin)", "dob": "1939-01-07", "address": "Angola", "label": "High Risk", "script": "latin" }, { "name": "edgar costa (latin)", "dob": "1986-04-05", "address": "Palau", "label": "negative", "script": "latin" }, { "name": "valentina branco (latin)", "dob": "1930-10-25", "address": "Papua-Nova Guiné", "label": "negative", "script": "latin" }, { "name": "lorena guerreiro (latin)", "dob": "1944-06-18", "address": "Nova Caledónia", "label": "negative", "script": "latin" }, { "name": "amanda osborne (latin)", "dob": "1951-11-16", "address": "British Virgin Islands", "label": "High Risk", "script": "latin" }, { "name": "emilia peñas (latin)", "dob": "1956-03-18", "address": "Nicaragua", "label": "High Risk", "script": "latin" }, { "name": "Kseniya Shoigu (latin)", "dob": "1991-1-10", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "Reuben LAVILLA (latin)", "dob": "1972-10-4", "address": "Philippines", "label": "positive", "script": "latin" }, { "name": "wilson pereira (latin)", "dob": "1980-05-24", "address": "Barém", "label": "negative", "script": "latin" }, { "name": "samuel pires (latin)", "dob": "1945-05-10", "address": "Mozambique", "label": "High Risk", "script": "latin" }, { "name": "Дмитрий Барышников (cyrillic)", "dob": "1976-11-6", "address": "Russia", "label": "positive", "script": "cyrillic" }, { "name": "Mohammad Alenezi (latin)", "dob": "1986-5-26", "address": "Kuwait", "label": "positive", "script": "latin" }, { "name": "Maria Faassen (latin)", "dob": "1985-4-28", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "христодор токов (cyrillic)", "dob": "1976-01-12", "address": "Benin", "label": "negative", "script": "cyrillic" }, { "name": "دارين درويش (arabic)", "dob": "1974-06-20", "address": "Libya", "label": "High Risk", "script": "arabic" } ], "query_template": "Generate 6 execution vectors for {name}, ensuring phonetic similarity with 20% Light, 60% Medium, and 20% Far variations. Also, ensure orthographic similarity with 10% Light, 30% Medium, and 60% Far variations. Approximately 11% of the total 6 variations should follow these rule-based transformations: Additionally, generate variations that: Insert a random letter. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 6, "phonetic_similarity": { "Light": 0.2, "Medium": 0.6, "Far": 0.2 }, "orthographic_similarity": { "Light": 0.1, "Medium": 0.3, "Far": 0.6 }, "rule_based": { "rule_percentage": 11, "selected_rules": [ "insert_random_letter" ], "rule_descriptions": { "insert_random_letter": "Insert a random letter" }, "percentage": 11 } } }, { "seed_identities_with_labels": [ { "name": "remedios carretero (latin)", "dob": "2007-02-12", "address": "Palau", "label": "negative", "script": "latin" }, { "name": "luce mendès (latin)", "dob": "1966-05-01", "address": "Mali", "label": "High Risk", "script": "latin" }, { "name": "manuela nadal (latin)", "dob": "1956-05-11", "address": "Ecuador", "label": "negative", "script": "latin" }, { "name": "samanta cámara (latin)", "dob": "1990-12-13", "address": "Nauru", "label": "negative", "script": "latin" }, { "name": "трифон носкова (cyrillic)", "dob": "1974-01-30", "address": "Western Sahara", "label": "negative", "script": "cyrillic" }, { "name": "Aleksei Budnev (latin)", "dob": "1964-7-9", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "Sergey Perminov (latin)", "dob": "1968-9-16", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "Иван Демченко (cyrillic)", "dob": "1960-9-27", "address": "Russia", "label": "positive", "script": "cyrillic" }, { "name": "Alexander Malkevich (latin)", "dob": "1975-6-14", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "pascual sarmiento (latin)", "dob": "1984-01-20", "address": "Bolivia", "label": "High Risk", "script": "latin" }, { "name": "Marianne Houwayek (latin)", "dob": "1980-5-14", "address": "Lebanon", "label": "positive", "script": "latin" }, { "name": "طيّع بنو زيد (arabic)", "dob": "1962-05-26", "address": "Sudan", "label": "High Risk", "script": "arabic" }, { "name": "clémence breton (latin)", "dob": "1937-09-27", "address": "Côte d'Ivoire", "label": "High Risk", "script": "latin" }, { "name": "francisco balaguer (latin)", "dob": "1956-12-23", "address": "Líbano", "label": "negative", "script": "latin" }, { "name": "léon carre (latin)", "dob": "1948-09-20", "address": "Monaco", "label": "High Risk", "script": "latin" } ], "query_template": "Generate 10 variations of {name} ensuring phonetic similarity (50% Light, e.g. Soundex({name}), Metaphone({name}) - Medium) and orthographic similarity (30% Light, e.g. LevenshteinDistance({name}, '{name}-ed') - Medium, e.g. JaroWinklerDistance({name}, '{name}sky'), 30% Far). Approximately 50% of the total 10 variations should follow these rule-based transformations: Additionally, generate variations that perform these transformations: Insert a random letter (e.g. {name}a), Replace spaces with special characters (e.g. {name}@#$%), and Reorder name parts (e.g. Last{middle}{first}).\n[VALIDATION HINTS]: Orthographic similarity: 30% Light, 40% Medium. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 10, "phonetic_similarity": { "Light": 0.5, "Medium": 0.5 }, "orthographic_similarity": { "Light": 0.3, "Medium": 0.4, "Far": 0.3 }, "rule_based": { "rule_percentage": 50, "selected_rules": [ "insert_random_letter", "replace_spaces_with_random_special_characters", "name_parts_permutations" ], "rule_descriptions": { "insert_random_letter": "Insert a random letter", "replace_spaces_with_random_special_characters": "Replace spaces with special characters", "name_parts_permutations": "Reorder name parts" }, "percentage": 50 } } }, { "seed_identities_with_labels": [ { "name": "Ahmad Shafa'i (latin)", "dob": "1968-5-21", "address": "Iran", "label": "positive", "script": "latin" }, { "name": "amancio quero (latin)", "dob": "1975-02-02", "address": "Francia", "label": "negative", "script": "latin" }, { "name": "kike alfonso (latin)", "dob": "1968-03-23", "address": "Venezuela", "label": "High Risk", "script": "latin" }, { "name": "julián fajardo (latin)", "dob": "1925-12-31", "address": "Camboya", "label": "negative", "script": "latin" }, { "name": "laurence blanc (latin)", "dob": "1930-07-06", "address": "Côte d'Ivoire", "label": "High Risk", "script": "latin" }, { "name": "abilio iñiguez (latin)", "dob": "1961-03-25", "address": "Papua Nueva Guinea", "label": "negative", "script": "latin" }, { "name": "明明 王 (chinese)", "dob": "1989-3-12", "address": "China", "label": "positive", "script": "chinese" }, { "name": "معزّ المشاولة (arabic)", "dob": "1991-09-04", "address": "New Zealand", "label": "negative", "script": "arabic" }, { "name": "maximino palau (latin)", "dob": "1960-02-09", "address": "Bolivia", "label": "High Risk", "script": "latin" }, { "name": "jesús perera (latin)", "dob": "1967-07-09", "address": "Turquía", "label": "negative", "script": "latin" }, { "name": "heather baldwin (latin)", "dob": "1982-02-18", "address": "South Sudan", "label": "High Risk", "script": "latin" }, { "name": "Wafiq Naser (latin)", "dob": "1964-7-10", "address": "Syria", "label": "positive", "script": "latin" }, { "name": "филарет исаев (cyrillic)", "dob": "1934-12-28", "address": "Russia", "label": "High Risk", "script": "cyrillic" }, { "name": "Amber Sun (latin)", "dob": "1969-3-23", "address": "Taiwan", "label": "positive", "script": "latin" }, { "name": "'Ali al-Shufa (latin)", "dob": "1991-7-25", "address": "Iran", "label": "positive", "script": "latin" } ], "query_template": "The following name is the seed name to generate variations for: {name}. Generate 11 variations of the name {name}, ensuring phonetic similarity: 30% Light, 40% Medium, 30% Far, and orthographic similarity: 50% Light, 50% Medium, and also include 39% of variations that follow: Additionally, generate variations that perform these transformations: Remove a random consonant, and Convert name to initials. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 11, "phonetic_similarity": { "Light": 0.3, "Medium": 0.4, "Far": 0.3 }, "orthographic_similarity": { "Light": 0.5, "Medium": 0.5 }, "rule_based": { "rule_percentage": 39, "selected_rules": [ "remove_random_consonant", "shorten_name_to_initials" ], "rule_descriptions": { "remove_random_consonant": "Remove a random consonant", "shorten_name_to_initials": "Convert name to initials" }, "percentage": 39 } } }, { "seed_identities_with_labels": [ { "name": "françois gaillard (latin)", "dob": "1981-01-06", "address": "Central African Republic", "label": "High Risk", "script": "latin" }, { "name": "Maulana Ubaidullah (latin)", "dob": "1985-1-31", "address": "Afghanistan", "label": "positive", "script": "latin" }, { "name": "marine nguyen (latin)", "dob": "1976-09-14", "address": "Sri Lanka", "label": "negative", "script": "latin" }, { "name": "Apinya CHANTRAPRAPAPORN (latin)", "dob": "1955-1-14", "address": "Thailand", "label": "positive", "script": "latin" }, { "name": "anselma escalona (latin)", "dob": "1936-04-13", "address": "Nicaragua", "label": "High Risk", "script": "latin" }, { "name": "Nikita Samoylenko (latin)", "dob": "1992-8-28", "address": "Ukraine", "label": "positive", "script": "latin" }, { "name": "roland rolland (latin)", "dob": "1976-05-25", "address": "Israël", "label": "negative", "script": "latin" }, { "name": "طامح بني رشيد (arabic)", "dob": "1928-11-14", "address": "Equatorial Guinea", "label": "negative", "script": "arabic" }, { "name": "jacques lacombe (latin)", "dob": "1940-01-07", "address": "Lesotho", "label": "negative", "script": "latin" }, { "name": "émile ferrand (latin)", "dob": "1934-03-12", "address": "Cameroon", "label": "High Risk", "script": "latin" }, { "name": "thibaut allain (latin)", "dob": "1971-10-26", "address": "Mali", "label": "High Risk", "script": "latin" }, { "name": "maurice muller (latin)", "dob": "1952-12-29", "address": "Turquie", "label": "negative", "script": "latin" }, { "name": "Irina Kosenko (latin)", "dob": "1973-1-16", "address": "North Macedonia, The Republic of", "label": "positive", "script": "latin" }, { "name": "شهم نسيبة (arabic)", "dob": "1944-04-18", "address": "Somalia", "label": "High Risk", "script": "arabic" }, { "name": "Николай Воробей (cyrillic)", "dob": "1963-5-4", "address": "Belarus", "label": "positive", "script": "cyrillic" } ], "query_template": "The following name is the seed name to generate variations for: {name}. Generate 6 variations of the name {name}, ensuring phonetic similarity: 100% Far, and orthographic similarity: 10% Light, 50% Medium, 40% Far, and also include 37% of variations that follow: Additionally, generate variations that perform these transformations: Duplicate a random letter, Swap adjacent consonants, and Add a title suffix (Jr., PhD, etc.). The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 6, "phonetic_similarity": { "Far": 1.0 }, "orthographic_similarity": { "Light": 0.1, "Medium": 0.5, "Far": 0.4 }, "rule_based": { "rule_percentage": 37, "selected_rules": [ "duplicate_random_letter_as_double_letter", "swap_adjacent_consonants", "add_random_trailing_title" ], "rule_descriptions": { "duplicate_random_letter_as_double_letter": "Duplicate a random letter", "swap_adjacent_consonants": "Swap adjacent consonants", "add_random_trailing_title": "Add a title suffix (Jr., PhD, etc.)" }, "percentage": 37 } } }, { "seed_identities_with_labels": [ { "name": "griselda aparicio (latin)", "dob": "1960-05-13", "address": "Nicaragua", "label": "High Risk", "script": "latin" }, { "name": "سرحان الدباغ (arabic)", "dob": "1963-04-23", "address": "Mayotte", "label": "negative", "script": "arabic" }, { "name": "Abdullah Faisal (latin)", "dob": "1963-9-10", "address": "Jamaica", "label": "positive", "script": "latin" }, { "name": "مشفق ابو عيد (arabic)", "dob": "1952-10-07", "address": "Algeria", "label": "High Risk", "script": "arabic" }, { "name": "Ruslan Lechkhadzhiev (latin)", "dob": "1965-7-2", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "alice colas (latin)", "dob": "1966-11-16", "address": "Cameroon", "label": "High Risk", "script": "latin" }, { "name": "élise hoarau (latin)", "dob": "1991-09-27", "address": "Maurice", "label": "negative", "script": "latin" }, { "name": "sophie marchal (latin)", "dob": "1990-05-17", "address": "Italie", "label": "negative", "script": "latin" }, { "name": "Любовь Пригожина (cyrillic)", "dob": "1970-6-26", "address": "Russia", "label": "positive", "script": "cyrillic" }, { "name": "alfred brunet (latin)", "dob": "1976-12-25", "address": "Maldives (Îles)", "label": "negative", "script": "latin" }, { "name": "Harun Nasir-Al-Din (latin)", "dob": "1970-6-5", "address": "Turkey", "label": "positive", "script": "latin" }, { "name": "jason burns (latin)", "dob": "1965-09-17", "address": "South Africa", "label": "High Risk", "script": "latin" }, { "name": "denise gautier (latin)", "dob": "1944-07-26", "address": "Polynésie française", "label": "negative", "script": "latin" }, { "name": "Nasser Nesr (latin)", "dob": "1963-4-20", "address": "Lebanon", "label": "positive", "script": "latin" }, { "name": "kim clark (latin)", "dob": "2001-06-11", "address": "Nigeria", "label": "High Risk", "script": "latin" } ], "query_template": "Generate 14 execution vectors for {name}, ensuring phonetic similarity with 7 Light and 7 Medium variations, and orthographic similarity with 4 Light, 5.6 Medium, and 4 Far variations. Approximately 41% of the total 14 variations should follow these rule-based transformations: Additionally, generate variations that: Remove a random consonant from {name}.\n[VALIDATION HINTS]: Phonetic similarity: 50% Light.; Orthographic similarity: 30% Light. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 14, "phonetic_similarity": { "Light": 0.5, "Medium": 0.5 }, "orthographic_similarity": { "Light": 0.3, "Medium": 0.4, "Far": 0.3 }, "rule_based": { "rule_percentage": 41, "selected_rules": [ "remove_random_consonant" ], "rule_descriptions": { "remove_random_consonant": "Remove a random consonant" }, "percentage": 41 } } }, { "seed_identities_with_labels": [ { "name": "Myint Swe (latin)", "dob": "1951-5-24", "address": "Burma", "label": "positive", "script": "latin" }, { "name": "luísa leite (latin)", "dob": "1955-04-19", "address": "Angola", "label": "High Risk", "script": "latin" }, { "name": "Muhammad Al-Sunaydar (latin)", "dob": "1987-3-5", "address": "Yemen", "label": "positive", "script": "latin" }, { "name": "nazaret menendez (latin)", "dob": "1951-01-25", "address": "Albania", "label": "negative", "script": "latin" }, { "name": "maximino agustí (latin)", "dob": "1931-08-21", "address": "Egipto", "label": "negative", "script": "latin" }, { "name": "Nasser Nesr (latin)", "dob": "1963-4-20", "address": "Lebanon", "label": "positive", "script": "latin" }, { "name": "Людмила Зайцева (cyrillic)", "dob": "1979-7-25", "address": "Russia", "label": "positive", "script": "cyrillic" }, { "name": "nidia español (latin)", "dob": "1957-09-23", "address": "Sudáfrica", "label": "negative", "script": "latin" }, { "name": "ناهد أبو شقدم (arabic)", "dob": "1973-06-14", "address": "Hong Kong", "label": "negative", "script": "arabic" }, { "name": "Fadlallah Taher (latin)", "dob": "1963-11-10", "address": "Guinea", "label": "positive", "script": "latin" }, { "name": "gilles charpentier (latin)", "dob": "1995-09-22", "address": "Burkina Faso", "label": "High Risk", "script": "latin" }, { "name": "colette fischer (latin)", "dob": "1948-04-03", "address": "Côte d'Ivoire", "label": "High Risk", "script": "latin" }, { "name": "رشاد طيء (arabic)", "dob": "2004-03-21", "address": "Somalia", "label": "High Risk", "script": "arabic" }, { "name": "angelino camino (latin)", "dob": "2004-03-13", "address": "Malasia", "label": "negative", "script": "latin" }, { "name": "xavier martín (latin)", "dob": "1979-09-01", "address": "Nicaragua", "label": "High Risk", "script": "latin" } ], "query_template": "Generate exactly 6 execution vectors for each target identity {name}, ensuring phonetic similarity (50% Light, 50% Medium) and orthographic similarity (30% Light, 40% Medium, 30% Far). Approximately 43% of the total 6 variations should follow these rule-based transformations: \nReplace random vowels with different vowels, \nSwap adjacent syllables. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 6, "phonetic_similarity": { "Light": 0.5, "Medium": 0.5 }, "orthographic_similarity": { "Light": 0.3, "Medium": 0.4, "Far": 0.3 }, "rule_based": { "rule_percentage": 43, "selected_rules": [ "replace_random_vowel_with_random_vowel", "swap_adjacent_syllables" ], "rule_descriptions": { "replace_random_vowel_with_random_vowel": "Replace random vowels with different vowels", "swap_adjacent_syllables": "Swap adjacent syllables" }, "percentage": 43 } } }, { "seed_identities_with_labels": [ { "name": "زايد بجيلة (arabic)", "dob": "1981-07-02", "address": "Solomon Islands", "label": "negative", "script": "arabic" }, { "name": "chad lewis (latin)", "dob": "1971-11-01", "address": "South Africa", "label": "High Risk", "script": "latin" }, { "name": "Yuriy Shevchenko (latin)", "dob": "1966-12-30", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "germán ribera (latin)", "dob": "1977-07-07", "address": "Bolivia", "label": "High Risk", "script": "latin" }, { "name": "Hosseyn Akbari (latin)", "dob": "1961-6-17", "address": "Iran", "label": "positive", "script": "latin" }, { "name": "Chris Tang (latin)", "dob": "1965-7-4", "address": "Hong Kong", "label": "positive", "script": "latin" }, { "name": "sarah scott (latin)", "dob": "1977-01-13", "address": "Timor-Leste", "label": "negative", "script": "latin" }, { "name": "nicholas lin (latin)", "dob": "1987-05-04", "address": "Nigeria", "label": "High Risk", "script": "latin" }, { "name": "tracey peters (latin)", "dob": "1935-12-28", "address": "South Sudan", "label": "High Risk", "script": "latin" }, { "name": "леонтий фролов (cyrillic)", "dob": "1959-03-08", "address": "Belarus", "label": "High Risk", "script": "cyrillic" }, { "name": "julie parker (latin)", "dob": "1968-11-03", "address": "Reunion", "label": "negative", "script": "latin" }, { "name": "joel dunn (latin)", "dob": "1941-03-28", "address": "Reunion", "label": "negative", "script": "latin" }, { "name": "Игор Бабушкин (cyrillic)", "dob": "1976-4-5", "address": "Russia", "label": "positive", "script": "cyrillic" }, { "name": "Igor Kobzev (latin)", "dob": "1966-10-29", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "breanna schwartz (latin)", "dob": "2003-03-26", "address": "Finland", "label": "negative", "script": "latin" } ], "query_template": "Generate 14 name variations of {name}, ensuring phonetic similarity (sound-alike names) of 100% Medium and orthographic similarity (visually similar spellings) of 100% Medium. Approximately 20% of the total 14 variations should follow these rule-based transformations: Additionally, generate variations that swap adjacent syllables in {name}. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 14, "phonetic_similarity": { "Medium": 1.0 }, "orthographic_similarity": { "Medium": 1.0 }, "rule_based": { "rule_percentage": 20, "selected_rules": [ "swap_adjacent_syllables" ], "rule_descriptions": { "swap_adjacent_syllables": "Swap adjacent syllables" }, "percentage": 20 } } }, { "seed_identities_with_labels": [ { "name": "claude mary (latin)", "dob": "1952-07-29", "address": "Democratic Republic of Congo", "label": "High Risk", "script": "latin" }, { "name": "tatiana esteves (latin)", "dob": "1965-08-07", "address": "Bangladeche", "label": "negative", "script": "latin" }, { "name": "'Abdullah al-'Anizi (latin)", "dob": "1984-8-2", "address": "Kuwait", "label": "positive", "script": "latin" }, { "name": "felix valdés (latin)", "dob": "1977-12-27", "address": "Nicaragua", "label": "High Risk", "script": "latin" }, { "name": "Svetlana Bessarab (latin)", "dob": "1970-12-7", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "Vladimir Resin (latin)", "dob": "1936-2-21", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "victória pinheiro (latin)", "dob": "2000-12-25", "address": "Arctic Ocean", "label": "negative", "script": "latin" }, { "name": "luca marques (latin)", "dob": "1990-04-10", "address": "Suazilândia", "label": "negative", "script": "latin" }, { "name": "catarina lourenço (latin)", "dob": "1941-06-01", "address": "Mozambique", "label": "High Risk", "script": "latin" }, { "name": "Svetlana Yemilianova (latin)", "dob": "1971-10-7", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "Роман Леньшин (cyrillic)", "dob": "1976-8-2", "address": "Russia", "label": "positive", "script": "cyrillic" }, { "name": "جيلان ابو الحاج (arabic)", "dob": "1935-09-14", "address": "Tokelau", "label": "negative", "script": "arabic" }, { "name": "steven stuart (latin)", "dob": "1963-10-08", "address": "Nigeria", "label": "High Risk", "script": "latin" }, { "name": "ناظم همدان (arabic)", "dob": "1965-07-28", "address": "Libya", "label": "High Risk", "script": "arabic" }, { "name": "bernardo macedo (latin)", "dob": "1928-01-14", "address": "Roménia", "label": "negative", "script": "latin" } ], "query_template": "Generate 14 variations of {name} ensuring phonetic similarity (30% Light, 40% Medium, 30% Far) and orthographic similarity (30% Light, 40% Medium, 30% Far). Approximately 41% of the total 14 variations should follow these rule-based transformations: Additionally, generate variations that replace double letters with a single letter. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 14, "phonetic_similarity": { "Light": 0.3, "Medium": 0.4, "Far": 0.3 }, "orthographic_similarity": { "Light": 0.3, "Medium": 0.4, "Far": 0.3 }, "rule_based": { "rule_percentage": 41, "selected_rules": [ "replace_double_letters_with_single_letter" ], "rule_descriptions": { "replace_double_letters_with_single_letter": "Replace double letters with a single letter" }, "percentage": 41 } } }, { "seed_identities_with_labels": [ { "name": "james bonilla (latin)", "dob": "1981-08-03", "address": "South Africa", "label": "High Risk", "script": "latin" }, { "name": "inès bodin (latin)", "dob": "2007-04-12", "address": "Burkina Faso", "label": "High Risk", "script": "latin" }, { "name": "donna anderson (latin)", "dob": "1973-12-05", "address": "British Virgin Islands", "label": "High Risk", "script": "latin" }, { "name": "Сяргей Зубкоў (cyrillic)", "dob": "1975-8-21", "address": "Belarus", "label": "positive", "script": "cyrillic" }, { "name": "mariana coelho (latin)", "dob": "1984-03-22", "address": "Granada", "label": "negative", "script": "latin" }, { "name": "leonardo gonçalves (latin)", "dob": "1958-03-17", "address": "Chipre", "label": "negative", "script": "latin" }, { "name": "Yuri Valyaev (latin)", "dob": "1959-4-18", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "زكريا شاهين (arabic)", "dob": "1957-12-16", "address": "Romania", "label": "negative", "script": "arabic" }, { "name": "Genadz DAVYDZKA (latin)", "dob": "1955-9-29", "address": "Belarus", "label": "positive", "script": "latin" }, { "name": "rúben assunção (latin)", "dob": "1979-09-18", "address": "Angola", "label": "High Risk", "script": "latin" }, { "name": "Gholamali Mohammadi (latin)", "dob": "1963-6-22", "address": "Iran", "label": "positive", "script": "latin" }, { "name": "leonardo martins (latin)", "dob": "1934-08-30", "address": "Suíça", "label": "negative", "script": "latin" }, { "name": "kyara melo (latin)", "dob": "1944-03-01", "address": "Burquina Faso", "label": "negative", "script": "latin" }, { "name": "سوگند سغیری (arabic)", "dob": "1964-09-04", "address": "Afghanistan", "label": "High Risk", "script": "arabic" }, { "name": "In O (latin)", "dob": "1969-7-3", "address": "Russia", "label": "positive", "script": "latin" } ], "query_template": "Generate 7 execution vectors for {name} with phonetic similarity varying between Light (10%), Medium (30%), and Far (60%). Ensure orthographic similarity also varies between Light (30%), Medium (40%), and Far (30%). Approximately 23% of the total variations should follow rule-based transformations: Additionally, generate variations that perform these transformations: Remove a random vowel from {name}, Add a title suffix (Jr., PhD, etc.) to {name}, and Remove all spaces in {name}. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 7, "phonetic_similarity": { "Light": 0.1, "Medium": 0.3, "Far": 0.6 }, "orthographic_similarity": { "Light": 0.3, "Medium": 0.4, "Far": 0.3 }, "rule_based": { "rule_percentage": 23, "selected_rules": [ "remove_random_vowel", "add_random_trailing_title", "remove_all_spaces" ], "rule_descriptions": { "remove_random_vowel": "Remove a random vowel", "add_random_trailing_title": "Add a title suffix (Jr., PhD, etc.)", "remove_all_spaces": "Remove all spaces" }, "percentage": 23 } } }, { "seed_identities_with_labels": [ { "name": "jessica king (latin)", "dob": "1999-03-30", "address": "Nigeria", "label": "High Risk", "script": "latin" }, { "name": "Nikolay Levichev (latin)", "dob": "1953-5-28", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "دهمان آل علي (arabic)", "dob": "2001-09-21", "address": "Cape Verde", "label": "negative", "script": "arabic" }, { "name": "anastasie michel (latin)", "dob": "1967-11-15", "address": "Antarctique", "label": "negative", "script": "latin" }, { "name": "paulette fischer (latin)", "dob": "1988-03-07", "address": "Democratic Republic of Congo", "label": "High Risk", "script": "latin" }, { "name": "thibault lebreton (latin)", "dob": "1969-06-02", "address": "Bermudes (Les)", "label": "negative", "script": "latin" }, { "name": "camille guillaume (latin)", "dob": "1983-01-21", "address": "Burkina Faso", "label": "High Risk", "script": "latin" }, { "name": "Min Shi (latin)", "dob": "1979-5-20", "address": "China", "label": "positive", "script": "latin" }, { "name": "timothée goncalves (latin)", "dob": "1993-11-03", "address": "Falkland (Île)", "label": "negative", "script": "latin" }, { "name": "селена вакрилов (cyrillic)", "dob": "1942-01-13", "address": "Bulgaria", "label": "High Risk", "script": "cyrillic" }, { "name": "Михаил Берулава (cyrillic)", "dob": "1950-8-3", "address": "Russia", "label": "positive", "script": "cyrillic" }, { "name": "ezequiel pallarès (latin)", "dob": "1931-01-22", "address": "Venezuela", "label": "High Risk", "script": "latin" }, { "name": "Xutong Qin (latin)", "dob": "1994-4-29", "address": "China", "label": "positive", "script": "latin" }, { "name": "Vladimir Polin (latin)", "dob": "1962-8-10", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "aimé marques (latin)", "dob": "1963-08-01", "address": "Sénégal", "label": "negative", "script": "latin" } ], "query_template": "Generate exactly 10 execution vectors for {name}, ensuring phonetic similarity (30% Light, 40% Medium, 30% Far) and orthographic similarity (100% Medium). Approximately 19% of the total 10 variations should follow these rule-based transformations: Additionally, generate variations that add a title prefix (Mr., Dr., etc.). The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 10, "phonetic_similarity": { "Light": 0.3, "Medium": 0.4, "Far": 0.3 }, "orthographic_similarity": { "Medium": 1.0 }, "rule_based": { "rule_percentage": 19, "selected_rules": [ "add_random_leading_title" ], "rule_descriptions": { "add_random_leading_title": "Add a title prefix (Mr., Dr., etc.)" }, "percentage": 19 } } }, { "seed_identities_with_labels": [ { "name": "Wai-chung Lo (latin)", "dob": "1961-11-19", "address": "Hong Kong", "label": "positive", "script": "latin" }, { "name": "charles brun (latin)", "dob": "1976-05-06", "address": "Bahamas", "label": "negative", "script": "latin" }, { "name": "william bertin (latin)", "dob": "1932-02-15", "address": "Burkina Faso", "label": "High Risk", "script": "latin" }, { "name": "ana belén pujadas (latin)", "dob": "1970-12-31", "address": "Bolivia", "label": "High Risk", "script": "latin" }, { "name": "luce louis (latin)", "dob": "1983-09-03", "address": "Micronésie (États fédérés de)", "label": "negative", "script": "latin" }, { "name": "Tun Naing (latin)", "dob": "1963-4-30", "address": "Burma", "label": "positive", "script": "latin" }, { "name": "савва абрамова (cyrillic)", "dob": "1940-06-28", "address": "Donetsk", "label": "High Risk", "script": "cyrillic" }, { "name": "thomas didier (latin)", "dob": "1996-10-17", "address": "Arménie", "label": "negative", "script": "latin" }, { "name": "vincent da costa (latin)", "dob": "1947-10-07", "address": "Rwanda", "label": "negative", "script": "latin" }, { "name": "Yuriy Karaev (latin)", "dob": "1966-6-21", "address": "Belarus", "label": "positive", "script": "latin" }, { "name": "maryse charles (latin)", "dob": "1988-06-17", "address": "Côte d'Ivoire", "label": "High Risk", "script": "latin" }, { "name": "véronique guyot (latin)", "dob": "1937-02-08", "address": "Cameroon", "label": "High Risk", "script": "latin" }, { "name": "Mohammad Alenezi (latin)", "dob": "1986-5-26", "address": "Kuwait", "label": "positive", "script": "latin" }, { "name": "جاسم التركمان (arabic)", "dob": "1936-09-16", "address": "El Salvador", "label": "negative", "script": "arabic" }, { "name": "Александр Черковский (cyrillic)", "dob": "1972-3-21", "address": "Russia", "label": "positive", "script": "cyrillic" } ], "query_template": "The following name is the seed name to generate variations for: {name}. Generate 15 variations of the name {name}, ensuring phonetic similarity: 10% Light, 30% Medium, 60% Far, and orthographic similarity: 10% Light, 30% Medium, 60% Far, and also include 27% of variations that follow: Additionally, generate variations that perform these transformations: Replace random vowels with different vowels, and Replace double letters with a single letter. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 15, "phonetic_similarity": { "Light": 0.1, "Medium": 0.3, "Far": 0.6 }, "orthographic_similarity": { "Light": 0.1, "Medium": 0.3, "Far": 0.6 }, "rule_based": { "rule_percentage": 27, "selected_rules": [ "replace_random_vowel_with_random_vowel", "replace_double_letters_with_single_letter" ], "rule_descriptions": { "replace_random_vowel_with_random_vowel": "Replace random vowels with different vowels", "replace_double_letters_with_single_letter": "Replace double letters with a single letter" }, "percentage": 27 } } }, { "seed_identities_with_labels": [ { "name": "Дмитрий Левин (cyrillic)", "dob": "1965-8-27", "address": "Russia", "label": "positive", "script": "cyrillic" }, { "name": "Denis RJAUZOW (latin)", "dob": "1974-5-23", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "david walter (latin)", "dob": "1941-07-10", "address": "Nigeria", "label": "High Risk", "script": "latin" }, { "name": "barry delacruz (latin)", "dob": "1929-12-29", "address": "Martinique", "label": "negative", "script": "latin" }, { "name": "Yuriy Karaev (latin)", "dob": "1966-6-21", "address": "Belarus", "label": "positive", "script": "latin" }, { "name": "илианна токов (cyrillic)", "dob": "1988-01-19", "address": "Luxembourg", "label": "negative", "script": "cyrillic" }, { "name": "kelly mcmillan (latin)", "dob": "1961-06-30", "address": "Namibia", "label": "High Risk", "script": "latin" }, { "name": "ميلاء البغدادي (arabic)", "dob": "1938-06-01", "address": "Somalia", "label": "High Risk", "script": "arabic" }, { "name": "Johnson Byabashaija (latin)", "dob": "1957-9-27", "address": "Uganda", "label": "positive", "script": "latin" }, { "name": "stéphanie denis (latin)", "dob": "1991-09-04", "address": "Cameroon", "label": "High Risk", "script": "latin" }, { "name": "justin ayers (latin)", "dob": "1966-05-19", "address": "Denmark", "label": "negative", "script": "latin" }, { "name": "Andrei Troshev (latin)", "dob": "1953-4-5", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "brandon meyer (latin)", "dob": "1930-07-10", "address": "Albania", "label": "negative", "script": "latin" }, { "name": "ariana oliveira (latin)", "dob": "1962-01-14", "address": "Angola", "label": "High Risk", "script": "latin" }, { "name": "david ramos (latin)", "dob": "1999-05-07", "address": "Togo", "label": "negative", "script": "latin" } ], "query_template": "Generate 15 variations of {name}. ensuring phonetic similarity (10% Light variations, e.g. {name} ~light, 50% Medium variations, e.g. {name} ~medium, 40% Far variations, e.g. {name} ~far) and orthographic similarity (30% Light variations, e.g. {name}-light, 40% Medium variations, e.g. {name}-med, 30% Far variations, e.g. {name}-f). Approximately 22% of the total 15 variations should follow these rule-based transformations: Additionally, generate variations that perform these transformations: Abbreviate first name to initials, Use first name initial with last name (e.g. J.Doe), and Reorder last and first names (e.g. Doe.Jane).\n[VALIDATION HINTS]: Apply these rule-based transformations: Abbreviate name parts; Reorder name parts. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 15, "phonetic_similarity": { "Light": 0.1, "Medium": 0.5, "Far": 0.4 }, "orthographic_similarity": { "Light": 0.3, "Medium": 0.4, "Far": 0.3 }, "rule_based": { "rule_percentage": 22, "selected_rules": [ "shorten_name_to_abbreviations", "initial_only_first_name", "name_parts_permutations" ], "rule_descriptions": { "shorten_name_to_abbreviations": "Abbreviate name parts", "initial_only_first_name": "Use first name initial with last name", "name_parts_permutations": "Reorder name parts" }, "percentage": 22 } } }, { "seed_identities_with_labels": [ { "name": "орлин таралингова (cyrillic)", "dob": "1963-09-02", "address": "Panama", "label": "negative", "script": "cyrillic" }, { "name": "françois blanc (latin)", "dob": "1949-01-22", "address": "Nouvelle Calédonie", "label": "negative", "script": "latin" }, { "name": "marguerite barre (latin)", "dob": "1995-03-01", "address": "Ouzbékistan", "label": "negative", "script": "latin" }, { "name": "Andrei Suvorov (latin)", "dob": "1967-10-12", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "joseph berthelot (latin)", "dob": "1990-03-01", "address": "Côte d'Ivoire", "label": "High Risk", "script": "latin" }, { "name": "joana pinto (latin)", "dob": "1986-07-26", "address": "Angola", "label": "High Risk", "script": "latin" }, { "name": "sophie lebrun (latin)", "dob": "1926-03-23", "address": "Cameroon", "label": "High Risk", "script": "latin" }, { "name": "Roman Semenov (latin)", "dob": "1987-11-8", "address": "United Arab Emirates", "label": "positive", "script": "latin" }, { "name": "diane bertin (latin)", "dob": "1992-02-02", "address": "Mali", "label": "High Risk", "script": "latin" }, { "name": "伟 张 (chinese)", "dob": "1977-6-16", "address": "China", "label": "positive", "script": "chinese" }, { "name": "William ZHOU (latin)", "dob": "1977-3-25", "address": "China", "label": "positive", "script": "latin" }, { "name": "zacharie adam (latin)", "dob": "1928-10-18", "address": "Malawi", "label": "negative", "script": "latin" }, { "name": "تامر عضل (arabic)", "dob": "1999-12-30", "address": "Somalia", "label": "High Risk", "script": "arabic" }, { "name": "Xenia Iudaeva (latin)", "dob": "1970-3-17", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "gérard nicolas (latin)", "dob": "1929-04-08", "address": "Hongrie", "label": "negative", "script": "latin" } ], "query_template": "Generate 7 variations of {name}. Ensuring phonetic similarity (20% Light, 60% Medium, 20% Far), generate the following: \n{Light Phonetic Variation}, \n{Medium Phonetic Variation} (repeated to meet the 60% requirement), \n{Far Phonetic Variation}, \n{Far Phonetic Variation}, \n{Far Phonetic Variation}. \nApproximately 14% of the total 7 variations should follow these rule-based transformations: Additionally, generate variations that perform these transformations: Abbreviate name parts, Replace random vowels with different vowels, and Remove a random vowel.\n[VALIDATION HINTS]: Orthographic similarity: 100% Far. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 7, "phonetic_similarity": { "Light": 0.2, "Medium": 0.6, "Far": 0.2 }, "orthographic_similarity": { "Far": 1.0 }, "rule_based": { "rule_percentage": 14, "selected_rules": [ "shorten_name_to_abbreviations", "replace_random_vowel_with_random_vowel", "remove_random_vowel" ], "rule_descriptions": { "shorten_name_to_abbreviations": "Abbreviate name parts", "replace_random_vowel_with_random_vowel": "Replace random vowels with different vowels", "remove_random_vowel": "Remove a random vowel" }, "percentage": 14 } } }, { "seed_identities_with_labels": [ { "name": "thérèse fabre (latin)", "dob": "1984-08-02", "address": "Porto Rico", "label": "negative", "script": "latin" }, { "name": "Ahmad Shafa'i (latin)", "dob": "1968-5-21", "address": "Iran", "label": "positive", "script": "latin" }, { "name": "Kakha SHUSHANASHVILI (latin)", "dob": "1972-2-8", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "élodie dos santos (latin)", "dob": "1944-05-08", "address": "République Dominicaine", "label": "negative", "script": "latin" }, { "name": "Андрей Никитин (cyrillic)", "dob": "1979-11-26", "address": "Russia", "label": "positive", "script": "cyrillic" }, { "name": "maurice dumont (latin)", "dob": "1961-06-20", "address": "Haiti", "label": "High Risk", "script": "latin" }, { "name": "marcel weber (latin)", "dob": "2000-11-25", "address": "Burkina Faso", "label": "High Risk", "script": "latin" }, { "name": "Alexander Vetenevich (latin)", "dob": "1976-6-20", "address": "Belarus", "label": "positive", "script": "latin" }, { "name": "كاسر معتوق (arabic)", "dob": "1930-03-16", "address": "Libya", "label": "High Risk", "script": "arabic" }, { "name": "mauricio sureda (latin)", "dob": "1946-07-31", "address": "Nicaragua", "label": "High Risk", "script": "latin" }, { "name": "charles robinson (latin)", "dob": "1964-09-09", "address": "Kenya", "label": "High Risk", "script": "latin" }, { "name": "isabelle guillaume (latin)", "dob": "1973-07-21", "address": "Soudan", "label": "negative", "script": "latin" }, { "name": "philippe perez (latin)", "dob": "1950-03-10", "address": "Albanie", "label": "negative", "script": "latin" }, { "name": "رزين الحكير (arabic)", "dob": "1991-08-24", "address": "Nauru", "label": "negative", "script": "arabic" }, { "name": "Kwo'n-u Han (latin)", "dob": "1962-8-21", "address": "China", "label": "positive", "script": "latin" } ], "query_template": "Generate 12 variations of {name}. Ensuring phonetic similarity (10% Light, e.g. {name} -> {name}.LY), (50% Medium, e.g. {name} -> {pronunciation}), and (40% Far, e.g. {name} -> {completely unrelated sound}). Additionally, 7 of the total 12 variations should follow these rule-based transformations: Remove all spaces, and Use first name initial with last name.. Approximately 57% of the total 12 variations should follow these rule-based transformations. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 12, "phonetic_similarity": { "Light": 0.1, "Medium": 0.5, "Far": 0.4 }, "orthographic_similarity": { "Light": 0.5, "Medium": 0.5 }, "rule_based": { "rule_percentage": 57, "selected_rules": [ "remove_all_spaces", "initial_only_first_name" ], "rule_descriptions": { "remove_all_spaces": "Remove all spaces", "initial_only_first_name": "Use first name initial with last name" }, "percentage": 57 } } }, { "seed_identities_with_labels": [ { "name": "федосий фадеева (cyrillic)", "dob": "1985-08-27", "address": "Lesotho", "label": "negative", "script": "cyrillic" }, { "name": "adelia gomez (latin)", "dob": "1983-08-02", "address": "Belarús", "label": "negative", "script": "latin" }, { "name": "chus burgos (latin)", "dob": "1981-11-05", "address": "Uruguay", "label": "negative", "script": "latin" }, { "name": "candela palomino (latin)", "dob": "1927-04-13", "address": "Venezuela", "label": "High Risk", "script": "latin" }, { "name": "Leonid Kalashnikov (latin)", "dob": "1960-8-6", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "claudia solís (latin)", "dob": "1943-11-08", "address": "Bhután", "label": "negative", "script": "latin" }, { "name": "Igor Rotenberh (latin)", "dob": "1973-5-9", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "kristie rice (latin)", "dob": "1943-10-18", "address": "South Africa", "label": "High Risk", "script": "latin" }, { "name": "paulino mulet (latin)", "dob": "1935-06-02", "address": "Tuvalu", "label": "negative", "script": "latin" }, { "name": "تحسين سموم (arabic)", "dob": "1978-08-07", "address": "Libya", "label": "High Risk", "script": "arabic" }, { "name": "Olimjon Tohtahunov (latin)", "dob": "1949-12-31", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "Павло Крилло (cyrillic)", "dob": "1981-12-1", "address": "Ukraine", "label": "positive", "script": "cyrillic" }, { "name": "Yusuf al-Hatum (latin)", "dob": "1966-1-5", "address": "Syria", "label": "positive", "script": "latin" }, { "name": "kim walker (latin)", "dob": "1999-04-15", "address": "British Virgin Islands", "label": "High Risk", "script": "latin" }, { "name": "angel frank (latin)", "dob": "1984-12-20", "address": "Nigeria", "label": "High Risk", "script": "latin" } ], "query_template": "Generate 10 variations of {name} ensuring phonetic similarity (50% Light, 50% Medium) and orthographic similarity (50% Light, 50% Medium). Approximately 49% of the total 10 variations should follow these rule-based transformations: \n- Remove a random consonant\n- Swap adjacent syllables.\nAdditionally, include variations that match the specified percentages for phonetic and orthographic similarities. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 10, "phonetic_similarity": { "Light": 0.5, "Medium": 0.5 }, "orthographic_similarity": { "Light": 0.5, "Medium": 0.5 }, "rule_based": { "rule_percentage": 49, "selected_rules": [ "remove_random_consonant", "swap_adjacent_syllables" ], "rule_descriptions": { "remove_random_consonant": "Remove a random consonant", "swap_adjacent_syllables": "Swap adjacent syllables" }, "percentage": 49 } } }, { "seed_identities_with_labels": [ { "name": "Song Jong (latin)", "dob": "1972-11-15", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "Muhammad Al-Sunaydar (latin)", "dob": "1987-3-5", "address": "Yemen", "label": "positive", "script": "latin" }, { "name": "حمدي زحيكة (arabic)", "dob": "1997-04-16", "address": "Yemen", "label": "High Risk", "script": "arabic" }, { "name": "tami jensen (latin)", "dob": "1930-02-24", "address": "British Virgin Islands", "label": "High Risk", "script": "latin" }, { "name": "Abu Salih (latin)", "dob": "1983-12-13", "address": "Syria", "label": "positive", "script": "latin" }, { "name": "Азат Ягафаров (cyrillic)", "dob": "1961-4-4", "address": "Russia", "label": "positive", "script": "cyrillic" }, { "name": "محدثه رسته (arabic)", "dob": "1985-05-26", "address": "Tajikistan", "label": "negative", "script": "arabic" }, { "name": "marthe leduc (latin)", "dob": "1999-08-23", "address": "Slovaquie", "label": "negative", "script": "latin" }, { "name": "élodie chevalier (latin)", "dob": "1958-03-11", "address": "Niue", "label": "negative", "script": "latin" }, { "name": "Myo'ng-hun Ri (latin)", "dob": "1969-3-14", "address": "Korea, North", "label": "positive", "script": "latin" }, { "name": "espiridión haro (latin)", "dob": "1978-03-21", "address": "Cuba", "label": "High Risk", "script": "latin" }, { "name": "michelle hamon (latin)", "dob": "1966-04-01", "address": "Cayman (Îles)", "label": "negative", "script": "latin" }, { "name": "sylvie lagarde (latin)", "dob": "1957-05-02", "address": "Burkina Faso", "label": "High Risk", "script": "latin" }, { "name": "antoinette allard (latin)", "dob": "1995-01-21", "address": "Groenland", "label": "negative", "script": "latin" }, { "name": "margaux philippe (latin)", "dob": "1967-12-06", "address": "Monaco", "label": "High Risk", "script": "latin" } ], "query_template": "Generate 11 variations of {name}, ensuring phonetic similarity (10% Light: replace with \" similar to\" or \" sounds like\", 30% Medium, 60% Far) and orthographic similarity (10% Light, 50% Medium, 40% Far). Approximately 34% of the total 11 variations should follow these rule-based transformations: Additionally, generate variations that perform these transformations: Remove a random consonant, and Duplicate a random letter. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 11, "phonetic_similarity": { "Light": 0.1, "Medium": 0.3, "Far": 0.6 }, "orthographic_similarity": { "Light": 0.1, "Medium": 0.5, "Far": 0.4 }, "rule_based": { "rule_percentage": 34, "selected_rules": [ "remove_random_consonant", "duplicate_random_letter_as_double_letter" ], "rule_descriptions": { "remove_random_consonant": "Remove a random consonant", "duplicate_random_letter_as_double_letter": "Duplicate a random letter" }, "percentage": 34 } } }, { "seed_identities_with_labels": [ { "name": "مظاهر مجيدی (arabic)", "dob": "1960-5-26", "address": "Iran", "label": "positive", "script": "arabic" }, { "name": "fernando anglada (latin)", "dob": "1924-12-06", "address": "Argentina", "label": "negative", "script": "latin" }, { "name": "Muhammad Dayri (latin)", "dob": "1961-6-6", "address": "Syria", "label": "positive", "script": "latin" }, { "name": "benigna pujol (latin)", "dob": "1996-09-22", "address": "Lituania", "label": "negative", "script": "latin" }, { "name": "Maulana Ubaidullah (latin)", "dob": "1985-1-31", "address": "Afghanistan", "label": "positive", "script": "latin" }, { "name": "aurelia diez (latin)", "dob": "1971-02-17", "address": "Cuba", "label": "High Risk", "script": "latin" }, { "name": "magdalena español (latin)", "dob": "1925-01-12", "address": "Sudáfrica", "label": "negative", "script": "latin" }, { "name": "Valery GABRIEL (latin)", "dob": "1956-1-3", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "lourdes flor (latin)", "dob": "1926-07-22", "address": "Nicaragua", "label": "High Risk", "script": "latin" }, { "name": "Khalil TRINIDAD (latin)", "dob": "1978-3-20", "address": "Philippines", "label": "positive", "script": "latin" }, { "name": "josé duque (latin)", "dob": "1936-09-08", "address": "Nicaragua", "label": "High Risk", "script": "latin" }, { "name": "adam hale (latin)", "dob": "1966-11-22", "address": "British Virgin Islands", "label": "High Risk", "script": "latin" }, { "name": "флорентин соколов (cyrillic)", "dob": "2004-06-20", "address": "Luhansk", "label": "High Risk", "script": "cyrillic" }, { "name": "كبير رصاص (arabic)", "dob": "1999-08-07", "address": "Colombia", "label": "negative", "script": "arabic" }, { "name": "eulalia cano (latin)", "dob": "2003-10-14", "address": "Turquía", "label": "negative", "script": "latin" } ], "query_template": "Generate 10 variations of {name}, ensuring phonetic similarity (30% Light, 40% Medium, 30% Far) and orthographic similarity (20% Light, 60% Medium, 20% Far). Approximately 53% of the total 10 variations should follow these rule-based transformations: Additionally, generate variations that: Remove first letter, remove last letter, replace first name with middle name, replace middle name with first name, swap adjacent consonants. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 10, "phonetic_similarity": { "Light": 0.3, "Medium": 0.4, "Far": 0.3 }, "orthographic_similarity": { "Light": 0.2, "Medium": 0.6, "Far": 0.2 }, "rule_based": { "rule_percentage": 53, "selected_rules": [ "swap_adjacent_consonants" ], "rule_descriptions": { "swap_adjacent_consonants": "Swap adjacent consonants" }, "percentage": 53 } } }, { "seed_identities_with_labels": [ { "name": "andrew strong (latin)", "dob": "2007-07-27", "address": "Kiribati", "label": "negative", "script": "latin" }, { "name": "Андрей Деркач (cyrillic)", "dob": "1967-8-19", "address": "Ukraine", "label": "positive", "script": "cyrillic" }, { "name": "michelle mahe (latin)", "dob": "1983-11-21", "address": "Burkina Faso", "label": "High Risk", "script": "latin" }, { "name": "антип петров (cyrillic)", "dob": "1979-02-09", "address": "Armenia", "label": "High Risk", "script": "cyrillic" }, { "name": "renée albert (latin)", "dob": "1928-12-22", "address": "Haiti", "label": "High Risk", "script": "latin" }, { "name": "захар засуха (cyrillic)", "dob": "1941-10-02", "address": "Liechtenstein", "label": "negative", "script": "cyrillic" }, { "name": "douglas reyes (latin)", "dob": "1997-11-24", "address": "Estonia", "label": "negative", "script": "latin" }, { "name": "stephanie baker (latin)", "dob": "1999-11-13", "address": "Nigeria", "label": "High Risk", "script": "latin" }, { "name": "Serhiy Melnychuk (latin)", "dob": "1976-9-30", "address": "Ukraine", "label": "positive", "script": "latin" }, { "name": "felicia hansen (latin)", "dob": "1958-11-16", "address": "Kenya", "label": "High Risk", "script": "latin" }, { "name": "Waad Qado (latin)", "dob": "1971-1-1", "address": "Iraq", "label": "positive", "script": "latin" }, { "name": "tyler hopkins (latin)", "dob": "1992-07-20", "address": "Paraguay", "label": "negative", "script": "latin" }, { "name": "Alam Zaib (latin)", "dob": "1974-1-1", "address": "Pakistan", "label": "positive", "script": "latin" }, { "name": "natasha nelson (latin)", "dob": "1934-09-13", "address": "Trinidad and Tobago", "label": "negative", "script": "latin" }, { "name": "Oleg Nikolayev (latin)", "dob": "1969-12-10", "address": "Russia", "label": "positive", "script": "latin" } ], "query_template": "Generate exactly 14 execution vectors for {name} ensuring phonetic similarity (10% Light, 30% Medium, 60% Far) and orthographic similarity (30% Light, 40% Medium, 30% Far). Approximately 11% of the total 14 variations should follow these rule-based transformations: Additionally, generate variations that: Replace random consonants with different consonants. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 14, "phonetic_similarity": { "Light": 0.1, "Medium": 0.3, "Far": 0.6 }, "orthographic_similarity": { "Light": 0.3, "Medium": 0.4, "Far": 0.3 }, "rule_based": { "rule_percentage": 11, "selected_rules": [ "replace_random_consonant_with_random_consonant" ], "rule_descriptions": { "replace_random_consonant_with_random_consonant": "Replace random consonants with different consonants" }, "percentage": 11 } } }, { "seed_identities_with_labels": [ { "name": "sophie vidal (latin)", "dob": "1931-01-30", "address": "Liban", "label": "negative", "script": "latin" }, { "name": "Rajabi Al-Zahir (latin)", "dob": "1969-7-5", "address": "United Arab Emirates", "label": "positive", "script": "latin" }, { "name": "dillon webb (latin)", "dob": "1996-04-01", "address": "South Sudan", "label": "High Risk", "script": "latin" }, { "name": "jessica reese (latin)", "dob": "1970-07-17", "address": "Nigeria", "label": "High Risk", "script": "latin" }, { "name": "Александр Лукашенко (cyrillic)", "dob": "1954-8-31", "address": "Belarus", "label": "positive", "script": "cyrillic" }, { "name": "simone faivre (latin)", "dob": "1950-06-14", "address": "Irlande", "label": "negative", "script": "latin" }, { "name": "устим колесниченко (cyrillic)", "dob": "1960-12-13", "address": "Sierra Leone", "label": "negative", "script": "cyrillic" }, { "name": "corinne bernier (latin)", "dob": "1984-01-05", "address": "Haïti", "label": "negative", "script": "latin" }, { "name": "Malek Reuben (latin)", "dob": "1960-1-1", "address": "South Sudan", "label": "positive", "script": "latin" }, { "name": "madeleine pruvost (latin)", "dob": "2001-05-24", "address": "Albanie", "label": "negative", "script": "latin" }, { "name": "Ella Pamfilova (latin)", "dob": "1953-9-12", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "ерофей тетерин (cyrillic)", "dob": "1929-02-09", "address": "Russia", "label": "High Risk", "script": "cyrillic" }, { "name": "Kwo'n-u Han (latin)", "dob": "1962-8-21", "address": "China", "label": "positive", "script": "latin" }, { "name": "josefina salvador (latin)", "dob": "1954-05-24", "address": "Cuba", "label": "High Risk", "script": "latin" }, { "name": "nicole bazin (latin)", "dob": "1986-05-25", "address": "Haiti", "label": "High Risk", "script": "latin" } ], "query_template": "Generate 14 variations of {name}. Ensuring phonetic similarity (30% Light: {name} + suffix/Latin/numeral), 40% Medium: Phonetic alteration with metaphone/phoneme shift), and 30% Far: Sound-alike names with multiple syllable shifts). Additionally, generating orthographic similarity (10% Light: Remove minor vowel variations), 50% Medium: Replace single characters with visually similar alternatives), and 40% Far: Visual name similarities with case sensitivity changes). Approximately 24% of the total 14 variations should follow these rule-based transformations: Additionally, generate variations that perform these transformations: Duplicate a random letter in {name}, Remove all spaces from {name}. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 14, "phonetic_similarity": { "Light": 0.3, "Medium": 0.4, "Far": 0.3 }, "orthographic_similarity": { "Light": 0.1, "Medium": 0.5, "Far": 0.4 }, "rule_based": { "rule_percentage": 24, "selected_rules": [ "duplicate_random_letter_as_double_letter", "remove_all_spaces" ], "rule_descriptions": { "duplicate_random_letter_as_double_letter": "Duplicate a random letter", "remove_all_spaces": "Remove all spaces" }, "percentage": 24 } } }, { "seed_identities_with_labels": [ { "name": "miguel aroca (latin)", "dob": "1931-12-14", "address": "Venezuela", "label": "High Risk", "script": "latin" }, { "name": "حسنين ارناؤوط (arabic)", "dob": "2006-06-17", "address": "Yemen", "label": "High Risk", "script": "arabic" }, { "name": "john mooney (latin)", "dob": "1955-01-29", "address": "Moldova", "label": "negative", "script": "latin" }, { "name": "Galina Danchikova (latin)", "dob": "1954-8-13", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "Vladimir Yakushev (latin)", "dob": "1968-6-14", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "Александр Волфович (cyrillic)", "dob": "1961-10-26", "address": "Cyprus", "label": "positive", "script": "cyrillic" }, { "name": "jacob farmer (latin)", "dob": "1933-10-19", "address": "Indonesia", "label": "negative", "script": "latin" }, { "name": "elba lamas (latin)", "dob": "1984-01-06", "address": "Bolivia", "label": "High Risk", "script": "latin" }, { "name": "justin wilson (latin)", "dob": "1996-08-11", "address": "British Virgin Islands", "label": "High Risk", "script": "latin" }, { "name": "Dmitry Khotimskiy (latin)", "dob": "1973-6-29", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "paige mcdonald (latin)", "dob": "2006-08-03", "address": "Macao", "label": "negative", "script": "latin" }, { "name": "sheila reid (latin)", "dob": "1988-02-29", "address": "British Virgin Islands", "label": "High Risk", "script": "latin" }, { "name": "tommy smith (latin)", "dob": "1978-05-12", "address": "Tajikistan", "label": "negative", "script": "latin" }, { "name": "Hossein Arani (latin)", "dob": "1964-12-22", "address": "Iran", "label": "positive", "script": "latin" }, { "name": "тетяна закусило (cyrillic)", "dob": "1987-08-28", "address": "Netherlands Antilles", "label": "negative", "script": "cyrillic" } ], "query_template": "The following name is the seed name to generate variations for: {name}. Generate 10 variations of the name {name}, ensuring phonetic similarity: 10% Light, 30% Medium, 60% Far, and orthographic similarity: 30% Light, 40% Medium, 30% Far, and also include 26% of variations that follow: Additionally, generate variations that perform these transformations: Replace double letters with a single letter, Remove all spaces, and Remove a random vowel. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 10, "phonetic_similarity": { "Light": 0.1, "Medium": 0.3, "Far": 0.6 }, "orthographic_similarity": { "Light": 0.3, "Medium": 0.4, "Far": 0.3 }, "rule_based": { "rule_percentage": 26, "selected_rules": [ "replace_double_letters_with_single_letter", "remove_all_spaces", "remove_random_vowel" ], "rule_descriptions": { "replace_double_letters_with_single_letter": "Replace double letters with a single letter", "remove_all_spaces": "Remove all spaces", "remove_random_vowel": "Remove a random vowel" }, "percentage": 26 } } }, { "seed_identities_with_labels": [ { "name": "Hassan Afgoye (latin)", "dob": "1966-1-1", "address": "Somalia", "label": "positive", "script": "latin" }, { "name": "robert barbe (latin)", "dob": "1946-12-11", "address": "Democratic Republic of Congo", "label": "High Risk", "script": "latin" }, { "name": "margot lemaître (latin)", "dob": "1961-11-16", "address": "Mali", "label": "High Risk", "script": "latin" }, { "name": "Віталій Булюк (cyrillic)", "dob": "1969-12-21", "address": "Ukraine", "label": "positive", "script": "cyrillic" }, { "name": "ana gonçalves (latin)", "dob": "1969-04-08", "address": "Angola", "label": "High Risk", "script": "latin" }, { "name": "gaspar martins (latin)", "dob": "1936-09-10", "address": "Jamaica", "label": "negative", "script": "latin" }, { "name": "Siarhei Kalinouski (latin)", "dob": "1969-1-3", "address": "Belarus", "label": "positive", "script": "latin" }, { "name": "عارف الشايع (arabic)", "dob": "1995-05-06", "address": "Maldives", "label": "negative", "script": "arabic" }, { "name": "lucas andrade (latin)", "dob": "1991-10-16", "address": "Mozambique", "label": "High Risk", "script": "latin" }, { "name": "naiara carneiro (latin)", "dob": "1974-08-24", "address": "Camarões", "label": "negative", "script": "latin" }, { "name": "amélia amorim (latin)", "dob": "1994-01-29", "address": "Tajiquistão", "label": "negative", "script": "latin" }, { "name": "ладислав андреев (cyrillic)", "dob": "1956-08-07", "address": "Armenia", "label": "High Risk", "script": "cyrillic" }, { "name": "Mohammed Matnee (latin)", "dob": "1983-4-15", "address": "Iraq", "label": "positive", "script": "latin" }, { "name": "filipe carneiro (latin)", "dob": "1943-08-26", "address": "Somália", "label": "negative", "script": "latin" }, { "name": "Cholung Choe (latin)", "dob": "1973-5-16", "address": "Korea, North", "label": "positive", "script": "latin" } ], "query_template": "Generate 7 execution vectors for {name}, ensuring phonetic similarity by including 30% Light variations such as \"{name}\" and \"{name}\", 40% Medium variations such as \"{name}y\" and \"{name}e\", and 30% Far variations such as \"{name}on\" and \"{name}en\". Ensure orthographic similarity by including 50% Light variations with similar spellings such as \"{name}\" and \"{name}\" and 50% Medium variations with visually similar spellings such as \"{name}\" and \"{name}\". Approximately 48% of the total 7 variations should follow these rule-based transformations: Additionally, generate variations that perform these transformations: Swap adjacent consonants to produce {name} -> \"{name}\", Replace spaces with special characters to produce \"{name}\" The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 7, "phonetic_similarity": { "Light": 0.3, "Medium": 0.4, "Far": 0.3 }, "orthographic_similarity": { "Light": 0.5, "Medium": 0.5 }, "rule_based": { "rule_percentage": 48, "selected_rules": [ "swap_adjacent_consonants", "replace_spaces_with_random_special_characters" ], "rule_descriptions": { "swap_adjacent_consonants": "Swap adjacent consonants", "replace_spaces_with_random_special_characters": "Replace spaces with special characters" }, "percentage": 48 } } }, { "seed_identities_with_labels": [ { "name": "alexandria leclercq (latin)", "dob": "1947-01-11", "address": "Côte d'Ivoire", "label": "High Risk", "script": "latin" }, { "name": "antoine teixeira (latin)", "dob": "2002-09-01", "address": "Australie", "label": "negative", "script": "latin" }, { "name": "soraya criado (latin)", "dob": "1944-03-10", "address": "Cuba", "label": "High Risk", "script": "latin" }, { "name": "Светлана Горячева (cyrillic)", "dob": "1947-6-3", "address": "Russia", "label": "positive", "script": "cyrillic" }, { "name": "olivia davidson (latin)", "dob": "1926-03-24", "address": "Namibia", "label": "High Risk", "script": "latin" }, { "name": "Hussam Al-Qatirji (latin)", "dob": "1982-1-11", "address": "Syria", "label": "positive", "script": "latin" }, { "name": "Andrei Troshev (latin)", "dob": "1953-4-5", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "victor walsh (latin)", "dob": "1994-09-07", "address": "Nigeria", "label": "High Risk", "script": "latin" }, { "name": "мирон смирнова (cyrillic)", "dob": "1980-09-07", "address": "Russia", "label": "High Risk", "script": "cyrillic" }, { "name": "émilie chevallier (latin)", "dob": "1963-10-23", "address": "Libye", "label": "negative", "script": "latin" }, { "name": "Yusuf al-Hatum (latin)", "dob": "1966-1-5", "address": "Syria", "label": "positive", "script": "latin" }, { "name": "xavier thomas (latin)", "dob": "2005-05-22", "address": "Cook (Îles)", "label": "negative", "script": "latin" }, { "name": "юлия пестов (cyrillic)", "dob": "2004-09-10", "address": "Czech Republic", "label": "negative", "script": "cyrillic" }, { "name": "thierry guillet (latin)", "dob": "1968-01-24", "address": "Kazakhstan", "label": "negative", "script": "latin" }, { "name": "Viktor Ignatov (latin)", "dob": "1968-10-15", "address": "Russia", "label": "positive", "script": "latin" } ], "query_template": "Generate exactly 14 variations of {name}, ensuring phonetic similarity (50% Light, 50% Medium) and orthographic similarity (10% Light, 30% Medium, 60% Far). Approximately 31% of the total 14 variations should follow these rule-based transformations: \nUse first name initial with last name. Replace double letters with a single letter. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 14, "phonetic_similarity": { "Light": 0.5, "Medium": 0.5 }, "orthographic_similarity": { "Light": 0.1, "Medium": 0.3, "Far": 0.6 }, "rule_based": { "rule_percentage": 31, "selected_rules": [ "initial_only_first_name", "replace_double_letters_with_single_letter" ], "rule_descriptions": { "initial_only_first_name": "Use first name initial with last name", "replace_double_letters_with_single_letter": "Replace double letters with a single letter" }, "percentage": 31 } } }, { "seed_identities_with_labels": [ { "name": "Tigran KHACHATUROV (latin)", "dob": "1979-2-7", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "caitlin carr (latin)", "dob": "1977-08-08", "address": "Nigeria", "label": "High Risk", "script": "latin" }, { "name": "Pavel Marinychev (latin)", "dob": "1978-10-25", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "deborah irwin (latin)", "dob": "1965-03-16", "address": "Antarctica (the territory South of 60 deg S)", "label": "negative", "script": "latin" }, { "name": "اسما محمدی (arabic)", "dob": "1983-11-14", "address": "Iran", "label": "High Risk", "script": "arabic" }, { "name": "sandra park (latin)", "dob": "1958-09-11", "address": "Nigeria", "label": "High Risk", "script": "latin" }, { "name": "brent hawkins (latin)", "dob": "2003-08-17", "address": "Lesotho", "label": "negative", "script": "latin" }, { "name": "valentine marie (latin)", "dob": "1974-04-10", "address": "Mali", "label": "High Risk", "script": "latin" }, { "name": "pamela garrison (latin)", "dob": "1989-07-16", "address": "Marshall Islands", "label": "negative", "script": "latin" }, { "name": "lori jones (latin)", "dob": "2006-12-24", "address": "Malta", "label": "negative", "script": "latin" }, { "name": "Павел Завальный (cyrillic)", "dob": "1961-8-11", "address": "Russia", "label": "positive", "script": "cyrillic" }, { "name": "Waseem al-Assad (latin)", "dob": "1980-7-18", "address": "Syria", "label": "positive", "script": "latin" }, { "name": "عبد الغفّار مضر (arabic)", "dob": "1973-05-07", "address": "Philippines", "label": "negative", "script": "arabic" }, { "name": "Omid Noori (latin)", "dob": "1976-3-12", "address": "Iran", "label": "positive", "script": "latin" }, { "name": "george snyder (latin)", "dob": "1971-07-26", "address": "Namibia", "label": "High Risk", "script": "latin" } ], "query_template": "The following name is the seed name to generate variations for: {name}. Generate 12 variations of the name {name}, ensuring phonetic similarity: 20% Light, 60% Medium, 20% Far, and orthographic similarity: 10% Light, 50% Medium, 40% Far, and also include 45% of variations that follow: Additionally, generate variations that: Remove a random vowel. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 12, "phonetic_similarity": { "Light": 0.2, "Medium": 0.6, "Far": 0.2 }, "orthographic_similarity": { "Light": 0.1, "Medium": 0.5, "Far": 0.4 }, "rule_based": { "rule_percentage": 45, "selected_rules": [ "remove_random_vowel" ], "rule_descriptions": { "remove_random_vowel": "Remove a random vowel" }, "percentage": 45 } } }, { "seed_identities_with_labels": [ { "name": "samantha montes (latin)", "dob": "1960-11-05", "address": "Namibia", "label": "High Risk", "script": "latin" }, { "name": "Vakhtang Gomelauri (latin)", "dob": "1975-12-24", "address": "Georgia", "label": "positive", "script": "latin" }, { "name": "margot pascal (latin)", "dob": "1969-10-03", "address": "Brésil", "label": "negative", "script": "latin" }, { "name": "Gennadii Kudriavtsev (latin)", "dob": "1947-8-20", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "gérard lévy (latin)", "dob": "1950-01-31", "address": "Cocos (Îles)", "label": "negative", "script": "latin" }, { "name": "carrie hall (latin)", "dob": "1933-12-15", "address": "Kenya", "label": "High Risk", "script": "latin" }, { "name": "Анна Тодорова (cyrillic)", "dob": "1988-2-20", "address": "Ukraine", "label": "positive", "script": "cyrillic" }, { "name": "marie marie (latin)", "dob": "1942-02-03", "address": "Democratic Republic of Congo", "label": "High Risk", "script": "latin" }, { "name": "سارا ملکیان (arabic)", "dob": "1937-03-21", "address": "Iran", "label": "High Risk", "script": "arabic" }, { "name": "هاشم جبيلي (arabic)", "dob": "2006-02-04", "address": "Mongolia", "label": "negative", "script": "arabic" }, { "name": "stéphanie evrard (latin)", "dob": "1928-07-22", "address": "Israël", "label": "negative", "script": "latin" }, { "name": "Kia Sadeghi (latin)", "dob": "1986-3-21", "address": "Iran", "label": "positive", "script": "latin" }, { "name": "abigaíl ribas (latin)", "dob": "1990-01-30", "address": "Venezuela", "label": "High Risk", "script": "latin" }, { "name": "Oleksiy MOZHOVY (latin)", "dob": "1975-4-3", "address": "Ukraine", "label": "positive", "script": "latin" }, { "name": "émilie pascal (latin)", "dob": "2007-04-06", "address": "Swaziland", "label": "negative", "script": "latin" } ], "query_template": "Generate 15 variations of {name} for phonetic similarity (100% Medium) by implementing SoundEx, Metaphone, or NYSIIS algorithms to detect sound-alike names. Additionally, generate 15 variations for orthographic similarity with the following distribution: 10% Light using transpositions (e.g., swapping two adjacent letters), 50% Medium using substitutions (e.g., replacing one letter with another), and 40% Far using insertions/deletions (e.g., adding or removing a letter). Approximately 18% of these variations should follow the rule-based transformations: Abbreviate name parts, and Replace random consonants with different consonants. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 15, "phonetic_similarity": { "Medium": 1.0 }, "orthographic_similarity": { "Light": 0.1, "Medium": 0.5, "Far": 0.4 }, "rule_based": { "rule_percentage": 18, "selected_rules": [ "shorten_name_to_abbreviations", "replace_random_consonant_with_random_consonant" ], "rule_descriptions": { "shorten_name_to_abbreviations": "Abbreviate name parts", "replace_random_consonant_with_random_consonant": "Replace random consonants with different consonants" }, "percentage": 18 } } }, { "seed_identities_with_labels": [ { "name": "Варвара Скоч (cyrillic)", "dob": "2002-5-19", "address": "United Kingdom", "label": "positive", "script": "cyrillic" }, { "name": "demetrio paniagua (latin)", "dob": "1934-04-12", "address": "Sudáfrica", "label": "negative", "script": "latin" }, { "name": "Aliasghar Norouzi (latin)", "dob": "1962-11-11", "address": "Iran", "label": "positive", "script": "latin" }, { "name": "jovita aranda (latin)", "dob": "1996-03-23", "address": "Cuba", "label": "High Risk", "script": "latin" }, { "name": "aurelia nebot (latin)", "dob": "2001-01-30", "address": "República de Macedonia del Norte", "label": "negative", "script": "latin" }, { "name": "пелай китов (cyrillic)", "dob": "1993-11-12", "address": "Bulgaria", "label": "High Risk", "script": "cyrillic" }, { "name": "هاشم بدر (arabic)", "dob": "1966-08-03", "address": "Congo", "label": "negative", "script": "arabic" }, { "name": "quirino olivares (latin)", "dob": "1953-06-27", "address": "Afganistán", "label": "negative", "script": "latin" }, { "name": "zaida peñas (latin)", "dob": "1963-05-24", "address": "Cuba", "label": "High Risk", "script": "latin" }, { "name": "mateo sans (latin)", "dob": "1936-01-11", "address": "Djibouti", "label": "negative", "script": "latin" }, { "name": "sally nguyen (latin)", "dob": "1926-11-04", "address": "Nigeria", "label": "High Risk", "script": "latin" }, { "name": "Marina Mordashova (latin)", "dob": "1979-5-17", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "Nikita Samoylenko (latin)", "dob": "1992-8-28", "address": "Ukraine", "label": "positive", "script": "latin" }, { "name": "Ilyas Umahanov (latin)", "dob": "1957-3-27", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "julie williams (latin)", "dob": "1981-04-01", "address": "British Virgin Islands", "label": "High Risk", "script": "latin" } ], "query_template": "Generate 7 variations of {name} ensuring phonetic similarity (10% Light, 30% Medium, 60% Far) and orthographic similarity (100% Medium). Approximately 12% of the total 7 variations should follow these rule-based transformations: Additionally, generate variations that perform these transformations: Add a title prefix (Mr., Dr., etc.), and Swap random adjacent letters. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 7, "phonetic_similarity": { "Light": 0.1, "Medium": 0.3, "Far": 0.6 }, "orthographic_similarity": { "Medium": 1.0 }, "rule_based": { "rule_percentage": 12, "selected_rules": [ "add_random_leading_title", "swap_random_letter" ], "rule_descriptions": { "add_random_leading_title": "Add a title prefix (Mr., Dr., etc.)", "swap_random_letter": "Swap random adjacent letters" }, "percentage": 12 } } }, { "seed_identities_with_labels": [ { "name": "rufina feijoo (latin)", "dob": "1941-05-31", "address": "Santa Lucía", "label": "negative", "script": "latin" }, { "name": "martin yang (latin)", "dob": "1950-11-26", "address": "South Sudan", "label": "High Risk", "script": "latin" }, { "name": "roberto montesinos (latin)", "dob": "1948-09-21", "address": "Reino Unido de Gran Bretaña e Irlanda del Norte", "label": "negative", "script": "latin" }, { "name": "Aleksandra Oksenchuk (latin)", "dob": "1992-10-16", "address": "Belarus", "label": "positive", "script": "latin" }, { "name": "nando oliver (latin)", "dob": "1979-04-09", "address": "Letonia", "label": "negative", "script": "latin" }, { "name": "françois lebreton (latin)", "dob": "1995-03-09", "address": "Cameroon", "label": "High Risk", "script": "latin" }, { "name": "عرفان بنو ليث (arabic)", "dob": "1958-05-18", "address": "Sudan", "label": "High Risk", "script": "arabic" }, { "name": "джана бурханларски (cyrillic)", "dob": "1952-05-27", "address": "Turks and Caicos Islands", "label": "negative", "script": "cyrillic" }, { "name": "Дзмiтрый Лукашэнка (cyrillic)", "dob": "1980-3-23", "address": "Belarus", "label": "positive", "script": "cyrillic" }, { "name": "Saad AL-FAGIH (latin)", "dob": "1957-1-31", "address": "United Kingdom", "label": "positive", "script": "latin" }, { "name": "ángel escalona (latin)", "dob": "1946-09-05", "address": "Nicaragua", "label": "High Risk", "script": "latin" }, { "name": "josefina almansa (latin)", "dob": "1950-12-25", "address": "Túnez", "label": "negative", "script": "latin" }, { "name": "Aman Abdurahman (latin)", "dob": "1972-1-5", "address": "Indonesia", "label": "positive", "script": "latin" }, { "name": "diego ríos (latin)", "dob": "2007-10-02", "address": "Nicaragua", "label": "High Risk", "script": "latin" }, { "name": "Rimma Utyasheva (latin)", "dob": "1952-1-3", "address": "Russia", "label": "positive", "script": "latin" } ], "query_template": "Generate 11 variations of {name}, ensuring phonetic similarity (50% Light, 50% Medium) and orthographic similarity (50% Light, 50% Medium). Approximately 44% of the total 11 variations should follow these rule-based transformations: Add a title prefix (Mr., Dr., etc.), Abbreviate name parts, and Remove all spaces.. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 11, "phonetic_similarity": { "Light": 0.5, "Medium": 0.5 }, "orthographic_similarity": { "Light": 0.5, "Medium": 0.5 }, "rule_based": { "rule_percentage": 44, "selected_rules": [ "add_random_leading_title", "shorten_name_to_abbreviations", "remove_all_spaces" ], "rule_descriptions": { "add_random_leading_title": "Add a title prefix (Mr., Dr., etc.)", "shorten_name_to_abbreviations": "Abbreviate name parts", "remove_all_spaces": "Remove all spaces" }, "percentage": 44 } } }, { "seed_identities_with_labels": [ { "name": "rhonda brooks (latin)", "dob": "1948-11-30", "address": "Cote d'Ivoire", "label": "negative", "script": "latin" }, { "name": "Олег Михайлов (cyrillic)", "dob": "1987-1-6", "address": "Russia", "label": "positive", "script": "cyrillic" }, { "name": "teresa lee (latin)", "dob": "1982-12-22", "address": "Kenya", "label": "High Risk", "script": "latin" }, { "name": "clifford johnson (latin)", "dob": "1979-08-09", "address": "Namibia", "label": "High Risk", "script": "latin" }, { "name": "Esam Ettehadi (latin)", "dob": "1989-7-31", "address": "Iran", "label": "positive", "script": "latin" }, { "name": "عبد المحيي أبو غليون (arabic)", "dob": "1995-08-31", "address": "San Marino", "label": "negative", "script": "arabic" }, { "name": "basilio torrents (latin)", "dob": "1994-06-04", "address": "Nicaragua", "label": "High Risk", "script": "latin" }, { "name": "brian weaver (latin)", "dob": "1944-11-17", "address": "Equatorial Guinea", "label": "negative", "script": "latin" }, { "name": "Cha-hyo'ng Ku (latin)", "dob": "1957-9-8", "address": "Libya", "label": "positive", "script": "latin" }, { "name": "Esteban VALENCIA (latin)", "dob": "1964-12-19", "address": "Mexico", "label": "positive", "script": "latin" }, { "name": "bautista moles (latin)", "dob": "1987-09-18", "address": "Venezuela", "label": "High Risk", "script": "latin" }, { "name": "johnny bennett (latin)", "dob": "2000-06-08", "address": "Hong Kong", "label": "negative", "script": "latin" }, { "name": "Abdullah Al-Faisal (latin)", "dob": "1963-9-10", "address": "Jamaica", "label": "positive", "script": "latin" }, { "name": "زيدان بحمدوني (arabic)", "dob": "1932-12-07", "address": "Sudan", "label": "High Risk", "script": "arabic" }, { "name": "brian stanton (latin)", "dob": "1946-12-08", "address": "Micronesia", "label": "negative", "script": "latin" } ], "query_template": "Generate 11 variations of {name}. Ensuring phonetic similarity (30% Light, e.g. {name} -> {name}y, 40% Medium, e.g. {name} -> {naym}, 30% Far, e.g. {name} -> {nejm}), and orthographic similarity (50% Light, e.g. {name} -> {nam3}, 50% Medium, e.g. {name} -> {nm3}). Approximately 36% of the total 11 variations should follow these rule-based transformations: Additionally, generate variations that: Duplicate a random letter, e.g. {name} -> {nammename}. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 11, "phonetic_similarity": { "Light": 0.3, "Medium": 0.4, "Far": 0.3 }, "orthographic_similarity": { "Light": 0.5, "Medium": 0.5 }, "rule_based": { "rule_percentage": 36, "selected_rules": [ "duplicate_random_letter_as_double_letter" ], "rule_descriptions": { "duplicate_random_letter_as_double_letter": "Duplicate a random letter" }, "percentage": 36 } } }, { "seed_identities_with_labels": [ { "name": "sérgio maia (latin)", "dob": "1992-02-09", "address": "Mozambique", "label": "High Risk", "script": "latin" }, { "name": "luísa magalhães (latin)", "dob": "1959-10-04", "address": "França", "label": "negative", "script": "latin" }, { "name": "Irina Bubnova (latin)", "dob": "1983-4-1", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "alexandra terry (latin)", "dob": "1987-12-03", "address": "Kenya", "label": "High Risk", "script": "latin" }, { "name": "نصر الدّين ابو الحاج (arabic)", "dob": "1943-07-25", "address": "Syria", "label": "High Risk", "script": "arabic" }, { "name": "مسلم النجار (arabic)", "dob": "1963-02-04", "address": "Palestinian Territory", "label": "negative", "script": "arabic" }, { "name": "Алег Пятроў (cyrillic)", "dob": "1962-3-26", "address": "Belarus", "label": "positive", "script": "cyrillic" }, { "name": "nath fischer (latin)", "dob": "1959-03-11", "address": "Haiti", "label": "High Risk", "script": "latin" }, { "name": "soraia valente (latin)", "dob": "1985-07-14", "address": "Laos", "label": "negative", "script": "latin" }, { "name": "luca pinho (latin)", "dob": "1978-08-12", "address": "Suazilândia", "label": "negative", "script": "latin" }, { "name": "benjamim mota (latin)", "dob": "1970-09-10", "address": "Mozambique", "label": "High Risk", "script": "latin" }, { "name": "Umm Layth (latin)", "dob": "1994-5-11", "address": "Syria", "label": "positive", "script": "latin" }, { "name": "Ra'i al-Ras (latin)", "dob": "1974-9-5", "address": "Yemen", "label": "positive", "script": "latin" }, { "name": "Maria Faassen (latin)", "dob": "1985-4-28", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "diego reis (latin)", "dob": "1928-07-05", "address": "Síria", "label": "negative", "script": "latin" } ], "query_template": "Generate exactly 8 execution vectors for {name}, ensuring phonetic similarity (20% Light, e.g. {name}-lite, 60% Medium, e.g. {name}yon, 20% Far, e.g. {name}ian) and orthographic similarity (50% Light, e.g. {name}y, 50% Medium, e.g. {name}ey). Approximately 22% of the total 8 variations should follow these rule-based transformations: Additionally, generate variations that perform these transformations: Abbreviate name parts to \"J\" or \"S\", Replace random vowels with different vowels (e.g. {name} -> {name}e), and Reorder name parts (e.g. John Smith -> Smith John). The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 8, "phonetic_similarity": { "Light": 0.2, "Medium": 0.6, "Far": 0.2 }, "orthographic_similarity": { "Light": 0.5, "Medium": 0.5 }, "rule_based": { "rule_percentage": 22, "selected_rules": [ "shorten_name_to_abbreviations", "replace_random_vowel_with_random_vowel", "name_parts_permutations" ], "rule_descriptions": { "shorten_name_to_abbreviations": "Abbreviate name parts", "replace_random_vowel_with_random_vowel": "Replace random vowels with different vowels", "name_parts_permutations": "Reorder name parts" }, "percentage": 22 } } }, { "seed_identities_with_labels": [ { "name": "stephen russell (latin)", "dob": "2002-09-20", "address": "South Sudan", "label": "High Risk", "script": "latin" }, { "name": "robert martinez (latin)", "dob": "1947-03-19", "address": "South Sudan", "label": "High Risk", "script": "latin" }, { "name": "Waseem al-Assad (latin)", "dob": "1980-7-18", "address": "Syria", "label": "positive", "script": "latin" }, { "name": "eusebia lago (latin)", "dob": "1938-01-19", "address": "Bangladesh", "label": "negative", "script": "latin" }, { "name": "teófilo ruano (latin)", "dob": "1984-10-18", "address": "Bolivia", "label": "High Risk", "script": "latin" }, { "name": "иван гаврилова (cyrillic)", "dob": "1973-11-03", "address": "Luhansk", "label": "High Risk", "script": "cyrillic" }, { "name": "lupe seguí (latin)", "dob": "1976-06-18", "address": "Botswana", "label": "negative", "script": "latin" }, { "name": "Aleksei MOZHOVYY (latin)", "dob": "1975-4-3", "address": "Ukraine", "label": "positive", "script": "latin" }, { "name": "мартьян чернов (cyrillic)", "dob": "1989-05-23", "address": "Gibraltar", "label": "negative", "script": "cyrillic" }, { "name": "tristán carballo (latin)", "dob": "1977-12-31", "address": "San Vicente y las Granadinas", "label": "negative", "script": "latin" }, { "name": "frédéric pineau (latin)", "dob": "1945-10-05", "address": "Democratic Republic of Congo", "label": "High Risk", "script": "latin" }, { "name": "soraya llopis (latin)", "dob": "1960-09-24", "address": "Brasil", "label": "negative", "script": "latin" }, { "name": "Nataliya KHORSHEVA (latin)", "dob": "1972-7-14", "address": "Ukraine", "label": "positive", "script": "latin" }, { "name": "Andrei Trofimov (latin)", "dob": "1972-8-14", "address": "Ukraine", "label": "positive", "script": "latin" }, { "name": "Олег Валенчук (cyrillic)", "dob": "1960-9-14", "address": "Russia", "label": "positive", "script": "cyrillic" } ], "query_template": "Generate exactly 15 execution vectors for each target identity {name}, ensuring phonetic similarity (50% Light, 50% Medium) and orthographic similarity (10% Light, 50% Medium, 40% Far). Approximately 11% of the total 15 variations should follow these rule-based transformations: Additionally, generate variations that perform these transformations: Swap random adjacent letters, Delete a random letter, and Replace spaces with special characters. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 15, "phonetic_similarity": { "Light": 0.5, "Medium": 0.5 }, "orthographic_similarity": { "Light": 0.1, "Medium": 0.5, "Far": 0.4 }, "rule_based": { "rule_percentage": 11, "selected_rules": [ "swap_random_letter", "delete_random_letter", "replace_spaces_with_random_special_characters" ], "rule_descriptions": { "swap_random_letter": "Swap random adjacent letters", "delete_random_letter": "Delete a random letter", "replace_spaces_with_random_special_characters": "Replace spaces with special characters" }, "percentage": 11 } } }, { "seed_identities_with_labels": [ { "name": "marcos santos (latin)", "dob": "1927-03-04", "address": "Geórgia", "label": "negative", "script": "latin" }, { "name": "débora lopes (latin)", "dob": "1924-12-06", "address": "Belize", "label": "negative", "script": "latin" }, { "name": "jorge vicente (latin)", "dob": "1927-10-08", "address": "Camarões", "label": "negative", "script": "latin" }, { "name": "cheryl neal (latin)", "dob": "1973-02-21", "address": "British Virgin Islands", "label": "High Risk", "script": "latin" }, { "name": "Ekaterina Kharchenko (latin)", "dob": "1977-8-11", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "jacqueline barre (latin)", "dob": "1944-08-09", "address": "Monaco", "label": "High Risk", "script": "latin" }, { "name": "xavier leal (latin)", "dob": "1953-02-11", "address": "Angola", "label": "High Risk", "script": "latin" }, { "name": "Анатоль Лапо (cyrillic)", "dob": "1963-5-24", "address": "Belarus", "label": "positive", "script": "cyrillic" }, { "name": "герман давыдов (cyrillic)", "dob": "1939-08-28", "address": "Donetsk", "label": "High Risk", "script": "cyrillic" }, { "name": "Aleksander Drozdenko (latin)", "dob": "1964-11-1", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "fábio almeida (latin)", "dob": "1936-06-15", "address": "Polinésia Francesa", "label": "negative", "script": "latin" }, { "name": "Aleksandr Akimov (latin)", "dob": "1954-11-10", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "Nasif Barakat (latin)", "dob": "1970-11-30", "address": "Syria", "label": "positive", "script": "latin" }, { "name": "رواء حنبولي (arabic)", "dob": "1940-08-22", "address": "Congo", "label": "negative", "script": "arabic" }, { "name": "jeannine vallée (latin)", "dob": "1983-07-04", "address": "Burkina Faso", "label": "High Risk", "script": "latin" } ], "query_template": "Generate exactly 8 execution vectors ({name} variations) for the target identity {name}. Ensure phonetic similarity by implementing 100% Far variation. For orthographic similarity, implement 2 variations with 10% Light spelling changes, 3 variations with 50% Medium spelling changes, and 3 variations with 40% Far spelling changes. Approximately 24% of the total 8 variations should follow these rule-based transformations: Additionally, generate variations that perform these transformations: Add a title suffix (Jr., PhD, etc.) to {name}, Add a title prefix (Mr., Dr., etc.) to {name}, and Delete a random letter from {name}. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 8, "phonetic_similarity": { "Far": 1.0 }, "orthographic_similarity": { "Light": 0.1, "Medium": 0.5, "Far": 0.4 }, "rule_based": { "rule_percentage": 24, "selected_rules": [ "add_random_trailing_title", "add_random_leading_title", "delete_random_letter" ], "rule_descriptions": { "add_random_trailing_title": "Add a title suffix (Jr., PhD, etc.)", "add_random_leading_title": "Add a title prefix (Mr., Dr., etc.)", "delete_random_letter": "Delete a random letter" }, "percentage": 24 } } }, { "seed_identities_with_labels": [ { "name": "Harun Nasir-Al-Din (latin)", "dob": "1970-6-5", "address": "Turkey", "label": "positive", "script": "latin" }, { "name": "оксенія гайда (cyrillic)", "dob": "1959-02-16", "address": "Myanmar", "label": "negative", "script": "cyrillic" }, { "name": "pascuala luján (latin)", "dob": "1999-09-04", "address": "Cuba", "label": "High Risk", "script": "latin" }, { "name": "luce marin (latin)", "dob": "1935-06-22", "address": "Burkina Faso", "label": "High Risk", "script": "latin" }, { "name": "adrián sáenz (latin)", "dob": "1960-01-14", "address": "Kirguistán", "label": "negative", "script": "latin" }, { "name": "Павел Тараканов (cyrillic)", "dob": "1982-6-21", "address": "Russia", "label": "positive", "script": "cyrillic" }, { "name": "gary parker (latin)", "dob": "1935-09-30", "address": "British Virgin Islands", "label": "High Risk", "script": "latin" }, { "name": "Elizaveta Brodskaia (latin)", "dob": "1979-4-22", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "Francois Okunji (latin)", "dob": "1949-7-10", "address": "Congo, Democratic Republic of the", "label": "positive", "script": "latin" }, { "name": "javi garay (latin)", "dob": "1950-10-02", "address": "Guinea", "label": "negative", "script": "latin" }, { "name": "vasco monteiro (latin)", "dob": "1946-12-25", "address": "Mozambique", "label": "High Risk", "script": "latin" }, { "name": "imelda yáñez (latin)", "dob": "1932-05-03", "address": "Singapur", "label": "negative", "script": "latin" }, { "name": "Chapo ISIDRO (latin)", "dob": "1982-6-19", "address": "Mexico", "label": "positive", "script": "latin" }, { "name": "علی اصغر محمد پور (arabic)", "dob": "1938-12-26", "address": "Afghanistan", "label": "High Risk", "script": "arabic" }, { "name": "chucho valera (latin)", "dob": "1925-01-03", "address": "El Salvador", "label": "negative", "script": "latin" } ], "query_template": "Generate 11 name variations for {name}, ensuring phonetic similarity (70% Light, 30% Medium) and orthographic similarity (20% Light, 60% Medium, 20% Far). Approximately 29% of the total 11 variations should follow these rule-based transformations: \nReplace \"a\" with \"e\", \nReplace \"o\" with \"u\", \nReplace random vowels with different vowels. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 11, "phonetic_similarity": { "Light": 0.7, "Medium": 0.3 }, "orthographic_similarity": { "Light": 0.2, "Medium": 0.6, "Far": 0.2 }, "rule_based": { "rule_percentage": 29, "selected_rules": [ "replace_random_vowel_with_random_vowel" ], "rule_descriptions": { "replace_random_vowel_with_random_vowel": "Replace random vowels with different vowels" }, "percentage": 29 } } }, { "seed_identities_with_labels": [ { "name": "Дмитрий Гусев (cyrillic)", "dob": "1972-7-23", "address": "Russia", "label": "positive", "script": "cyrillic" }, { "name": "michelle weiss (latin)", "dob": "1987-04-04", "address": "Maldives (Îles)", "label": "negative", "script": "latin" }, { "name": "مصطفى الجنيدي (arabic)", "dob": "1996-06-12", "address": "Wallis and Futuna", "label": "negative", "script": "arabic" }, { "name": "martin collet (latin)", "dob": "1981-07-09", "address": "Saint Pierre et Miquelon", "label": "negative", "script": "latin" }, { "name": "christine bridges (latin)", "dob": "1946-03-05", "address": "British Virgin Islands", "label": "High Risk", "script": "latin" }, { "name": "marisol alonso (latin)", "dob": "1952-05-14", "address": "Venezuela", "label": "High Risk", "script": "latin" }, { "name": "Kia Sadeghi (latin)", "dob": "1986-3-21", "address": "Iran", "label": "positive", "script": "latin" }, { "name": "paz solís (latin)", "dob": "1956-01-15", "address": "Nicaragua", "label": "High Risk", "script": "latin" }, { "name": "Asadollah Seify (latin)", "dob": "1965-4-4", "address": "Iran", "label": "positive", "script": "latin" }, { "name": "Shahab Javanmardy (latin)", "dob": "1974-3-21", "address": "Iran", "label": "positive", "script": "latin" }, { "name": "Tuah Febriwansyah (latin)", "dob": "1968-2-18", "address": "Indonesia", "label": "positive", "script": "latin" }, { "name": "océane germain (latin)", "dob": "1945-05-28", "address": "Féroé (Îles)", "label": "negative", "script": "latin" }, { "name": "مرتضي يافع (arabic)", "dob": "1991-11-01", "address": "Syria", "label": "High Risk", "script": "arabic" }, { "name": "marthe poulain (latin)", "dob": "1947-01-08", "address": "Mauritanie", "label": "negative", "script": "latin" }, { "name": "adélaïde gaudin (latin)", "dob": "1990-01-12", "address": "Haiti", "label": "High Risk", "script": "latin" } ], "query_template": "Generate 11 variations of {name} ensuring phonetic similarity (20% Light, 60% Medium, 20% Far) and orthographic similarity (10% Light, 50% Medium, 40% Far). Approximately 20% of the total 11 variations should follow these rule-based transformations: Additionally, generate variations that perform these transformations: Replace random vowels with different vowels, Reorder name parts, and Insert a random letter. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 11, "phonetic_similarity": { "Light": 0.2, "Medium": 0.6, "Far": 0.2 }, "orthographic_similarity": { "Light": 0.1, "Medium": 0.5, "Far": 0.4 }, "rule_based": { "rule_percentage": 20, "selected_rules": [ "replace_random_vowel_with_random_vowel", "name_parts_permutations", "insert_random_letter" ], "rule_descriptions": { "replace_random_vowel_with_random_vowel": "Replace random vowels with different vowels", "name_parts_permutations": "Reorder name parts", "insert_random_letter": "Insert a random letter" }, "percentage": 20 } } }, { "seed_identities_with_labels": [ { "name": "валерия станишев (cyrillic)", "dob": "1957-12-21", "address": "Korea", "label": "negative", "script": "cyrillic" }, { "name": "jeannine hoareau (latin)", "dob": "1932-04-27", "address": "Monaco", "label": "High Risk", "script": "latin" }, { "name": "cecilia tovar (latin)", "dob": "1973-07-06", "address": "Pakistán", "label": "negative", "script": "latin" }, { "name": "Chapito ISIDRO (latin)", "dob": "1982-6-19", "address": "Mexico", "label": "positive", "script": "latin" }, { "name": "sigfrido neira (latin)", "dob": "2001-01-06", "address": "República de Moldova", "label": "negative", "script": "latin" }, { "name": "Andrey Doukhvalov (latin)", "dob": "1957-12-3", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "madeleine bonnin (latin)", "dob": "1935-03-16", "address": "Côte d'Ivoire", "label": "High Risk", "script": "latin" }, { "name": "кондрат большакова (cyrillic)", "dob": "1968-06-08", "address": "Armenia", "label": "High Risk", "script": "cyrillic" }, { "name": "lázaro ramos (latin)", "dob": "1954-02-01", "address": "República Dominicana", "label": "negative", "script": "latin" }, { "name": "Александр Ершов (cyrillic)", "dob": "1985-9-28", "address": "Russia", "label": "positive", "script": "cyrillic" }, { "name": "Viktor Bondarev (latin)", "dob": "1959-12-7", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "thomas taylor (latin)", "dob": "1957-07-16", "address": "South Sudan", "label": "High Risk", "script": "latin" }, { "name": "Elena Milskaya (latin)", "dob": "1980-12-20", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "maría manuela sala (latin)", "dob": "2004-01-01", "address": "Perú", "label": "negative", "script": "latin" }, { "name": "marcia perea (latin)", "dob": "1958-10-29", "address": "Cuba", "label": "High Risk", "script": "latin" } ], "query_template": "Generate 14 variations of {name} ensuring phonetic similarity (10% Light, 30% Medium, 60% Far) and orthographic similarity (50% Light, 50% Medium). Approximately 48% of the total 14 variations should follow these rule-based transformations: Use first name with middle initial, Use first name initial with last name. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 14, "phonetic_similarity": { "Light": 0.1, "Medium": 0.3, "Far": 0.6 }, "orthographic_similarity": { "Light": 0.5, "Medium": 0.5 }, "rule_based": { "rule_percentage": 48, "selected_rules": [ "initial_only_first_name" ], "rule_descriptions": { "initial_only_first_name": "Use first name initial with last name" }, "percentage": 48 } } }, { "seed_identities_with_labels": [ { "name": "Ruslan Lechkhadzhiev (latin)", "dob": "1965-7-2", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "trevor stevens (latin)", "dob": "1952-11-24", "address": "South Africa", "label": "High Risk", "script": "latin" }, { "name": "david neto (latin)", "dob": "1957-08-15", "address": "Angola", "label": "High Risk", "script": "latin" }, { "name": "جلاء يافع (arabic)", "dob": "1927-02-19", "address": "Jordan", "label": "negative", "script": "arabic" }, { "name": "Александр Бортников (cyrillic)", "dob": "1951-11-15", "address": "Russia", "label": "positive", "script": "cyrillic" }, { "name": "ângelo castro (latin)", "dob": "2005-02-09", "address": "Territórios Austrais Franceses", "label": "negative", "script": "latin" }, { "name": "Mikhail Afanasov (latin)", "dob": "1953-6-15", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "jose jackson (latin)", "dob": "1947-08-30", "address": "Namibia", "label": "High Risk", "script": "latin" }, { "name": "Vladimir Polin (latin)", "dob": "1962-8-10", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "исай муравьева (cyrillic)", "dob": "1960-06-29", "address": "Armenia", "label": "High Risk", "script": "cyrillic" }, { "name": "Myo Oo (latin)", "dob": "1960-6-23", "address": "Burma", "label": "positive", "script": "latin" }, { "name": "eugène aubry (latin)", "dob": "2000-03-15", "address": "Burkina Faso", "label": "High Risk", "script": "latin" }, { "name": "catarina marques (latin)", "dob": "1941-12-26", "address": "Bulgária", "label": "negative", "script": "latin" }, { "name": "alícia carneiro (latin)", "dob": "1940-04-03", "address": "União Europeia", "label": "negative", "script": "latin" }, { "name": "pilar nascimento (latin)", "dob": "1952-08-30", "address": "Gana", "label": "negative", "script": "latin" } ], "query_template": "Generate 12 variations of {name}. Ensuring phonetic similarity and orthographic similarity. For phonetic similarity, generate 70% Light variations using techniques such as Metaphone or Soundex, and 30% Medium variations using techniques such as NYSIIS or Caverphone. For orthographic similarity, generate 20% Light variations by changing single characters, 60% Medium variations by transposing adjacent characters or adding/removing a character, and 20% Far variations by significantly altering the spelling. Approximately 25% of the total 12 variations should follow these rule-based transformations: Additionally, generate variations that replace spaces with special characters (e.g. '{name} John', 'John{name}', '{name}John', etc.). The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 12, "phonetic_similarity": { "Light": 0.7, "Medium": 0.3 }, "orthographic_similarity": { "Light": 0.2, "Medium": 0.6, "Far": 0.2 }, "rule_based": { "rule_percentage": 25, "selected_rules": [ "replace_spaces_with_random_special_characters" ], "rule_descriptions": { "replace_spaces_with_random_special_characters": "Replace spaces with special characters" }, "percentage": 25 } } }, { "seed_identities_with_labels": [ { "name": "Dmitry Ishchenko (latin)", "dob": "1983-1-24", "address": "Belarus", "label": "positive", "script": "latin" }, { "name": "debra weber (latin)", "dob": "1963-08-31", "address": "South Africa", "label": "High Risk", "script": "latin" }, { "name": "emma quirós (latin)", "dob": "1978-02-28", "address": "Venezuela", "label": "High Risk", "script": "latin" }, { "name": "felipa castrillo (latin)", "dob": "2007-09-12", "address": "Cuba", "label": "High Risk", "script": "latin" }, { "name": "Элеонора Федоренко (cyrillic)", "dob": "1972-10-28", "address": "Ukraine", "label": "positive", "script": "cyrillic" }, { "name": "Antonis Vakanas (latin)", "dob": "1977-6-20", "address": "Cyprus", "label": "positive", "script": "latin" }, { "name": "enrique medina (latin)", "dob": "1930-09-19", "address": "Malta", "label": "negative", "script": "latin" }, { "name": "Hathaiwan WORAWATWICHAI (latin)", "dob": "1956-1-9", "address": "Thailand", "label": "positive", "script": "latin" }, { "name": "Sergey Kozlov (latin)", "dob": "1960-5-31", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "хома запорожець (cyrillic)", "dob": "1962-09-11", "address": "Luxembourg", "label": "negative", "script": "cyrillic" }, { "name": "javiera pacheco (latin)", "dob": "1949-08-20", "address": "Tonga", "label": "negative", "script": "latin" }, { "name": "plácido esteban (latin)", "dob": "1925-03-10", "address": "Grecia", "label": "negative", "script": "latin" }, { "name": "عبد الشّكور جهينة (arabic)", "dob": "1968-08-17", "address": "Sudan", "label": "High Risk", "script": "arabic" }, { "name": "vanessa gonzales (latin)", "dob": "1981-04-23", "address": "Kenya", "label": "High Risk", "script": "latin" }, { "name": "anabel peral (latin)", "dob": "1926-05-14", "address": "Arabia Saudita", "label": "negative", "script": "latin" } ], "query_template": "Generate 14 variations of {name}, ensuring phonetic similarity (10% Light, 30% Medium, 60% Far) and orthographic similarity (50% Light, 50% Medium). Approximately 33% of the total 14 variations should follow these rule-based transformations: Additionally, generate variations that perform these transformations: Remove a random consonant, and Remove all spaces. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 14, "phonetic_similarity": { "Light": 0.1, "Medium": 0.3, "Far": 0.6 }, "orthographic_similarity": { "Light": 0.5, "Medium": 0.5 }, "rule_based": { "rule_percentage": 33, "selected_rules": [ "remove_random_consonant", "remove_all_spaces" ], "rule_descriptions": { "remove_random_consonant": "Remove a random consonant", "remove_all_spaces": "Remove all spaces" }, "percentage": 33 } } }, { "seed_identities_with_labels": [ { "name": "aimée blot (latin)", "dob": "1943-08-09", "address": "Îles Mineures Éloignées des États-Unis", "label": "negative", "script": "latin" }, { "name": "Михаил Развожаев (cyrillic)", "dob": "1980-12-30", "address": "Ukraine", "label": "positive", "script": "cyrillic" }, { "name": "ладимир гуляева (cyrillic)", "dob": "1941-11-28", "address": "Crimea", "label": "High Risk", "script": "cyrillic" }, { "name": "مازن آل جعفر (arabic)", "dob": "1943-01-09", "address": "Serbia", "label": "negative", "script": "arabic" }, { "name": "michelle dupuis (latin)", "dob": "1934-07-20", "address": "Cameroon", "label": "High Risk", "script": "latin" }, { "name": "claudine bazin (latin)", "dob": "1969-05-19", "address": "Cameroon", "label": "High Risk", "script": "latin" }, { "name": "Zurab Makiyev (latin)", "dob": "1976-9-30", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "caroline bazin (latin)", "dob": "1968-07-21", "address": "Palau", "label": "negative", "script": "latin" }, { "name": "Edgar Sarrias (latin)", "dob": "1976-1-24", "address": "Venezuela", "label": "positive", "script": "latin" }, { "name": "Esam Ettehadi (latin)", "dob": "1989-7-31", "address": "Iran", "label": "positive", "script": "latin" }, { "name": "soraia faria (latin)", "dob": "1986-04-22", "address": "Mozambique", "label": "High Risk", "script": "latin" }, { "name": "Bella Zlatkis (latin)", "dob": "1948-7-5", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "felipe criado (latin)", "dob": "1986-06-18", "address": "Bolivia", "label": "High Risk", "script": "latin" }, { "name": "thierry imbert (latin)", "dob": "1944-11-08", "address": "Sahara Occidental", "label": "negative", "script": "latin" }, { "name": "marthe martineau (latin)", "dob": "1949-11-12", "address": "Honduras", "label": "negative", "script": "latin" } ], "query_template": "Generate 9 name variations of {name}, ensuring phonetic similarity (30% Light, e.g. {name}y, {name}i, etc., 40% Medium, e.g. {name}e, {name}o, etc., 30% Far, e.g. {name}ku, {name}lo, etc.) and orthographic similarity (10% Light, e.g. {name}, {name}y, etc., 30% Medium, e.g. {name}e, {name}o, etc., 60% Far, e.g. {name}kk, {name}tt, etc.). Approximately 37% of the total 9 variations should follow these rule-based transformations: Additionally, generate variations that: Remove all spaces from {name}. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 9, "phonetic_similarity": { "Light": 0.3, "Medium": 0.4, "Far": 0.3 }, "orthographic_similarity": { "Light": 0.1, "Medium": 0.3, "Far": 0.6 }, "rule_based": { "rule_percentage": 37, "selected_rules": [ "remove_all_spaces" ], "rule_descriptions": { "remove_all_spaces": "Remove all spaces" }, "percentage": 37 } } }, { "seed_identities_with_labels": [ { "name": "tomás dominguez (latin)", "dob": "1929-03-26", "address": "Djibouti", "label": "negative", "script": "latin" }, { "name": "Andrei Khokhlun (latin)", "dob": "1966-12-4", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "duilio benito (latin)", "dob": "1973-05-10", "address": "Nicaragua", "label": "High Risk", "script": "latin" }, { "name": "Sergei Yeliseyev (latin)", "dob": "1971-5-5", "address": "Ukraine", "label": "positive", "script": "latin" }, { "name": "cayetana giralt (latin)", "dob": "1957-11-13", "address": "Kuwait", "label": "negative", "script": "latin" }, { "name": "mauricio gibert (latin)", "dob": "1943-05-10", "address": "Japón", "label": "negative", "script": "latin" }, { "name": "Wei Zhang (latin)", "dob": "1977-6-16", "address": "China", "label": "positive", "script": "latin" }, { "name": "Василий Юрченко (cyrillic)", "dob": "1960-9-26", "address": "Russia", "label": "positive", "script": "cyrillic" }, { "name": "karen oliver (latin)", "dob": "1957-11-29", "address": "Nigeria", "label": "High Risk", "script": "latin" }, { "name": "Rajabi Al-Zahir (latin)", "dob": "1969-7-5", "address": "United Arab Emirates", "label": "positive", "script": "latin" }, { "name": "غانم بنو الحارث بن كعب (arabic)", "dob": "1990-04-29", "address": "Syria", "label": "High Risk", "script": "arabic" }, { "name": "георги тодорова (cyrillic)", "dob": "1962-05-10", "address": "Guam", "label": "negative", "script": "cyrillic" }, { "name": "epifanio agustín (latin)", "dob": "1926-10-21", "address": "Argelia", "label": "negative", "script": "latin" }, { "name": "marthe laporte (latin)", "dob": "1937-06-11", "address": "Cameroon", "label": "High Risk", "script": "latin" }, { "name": "candace vega (latin)", "dob": "2002-12-20", "address": "South Africa", "label": "High Risk", "script": "latin" } ], "query_template": "The following name is the seed name to generate variations for: {name}. Generate 7 variations of the name {name}, ensuring phonetic similarity: 10% Light, 50% Medium, 40% Far, and orthographic similarity: 10% Light, 50% Medium, 40% Far, and also include 15% of variations that follow: Additionally, generate variations that perform these transformations: Remove a random consonant, and Abbreviate name parts. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 7, "phonetic_similarity": { "Light": 0.1, "Medium": 0.5, "Far": 0.4 }, "orthographic_similarity": { "Light": 0.1, "Medium": 0.5, "Far": 0.4 }, "rule_based": { "rule_percentage": 15, "selected_rules": [ "remove_random_consonant", "shorten_name_to_abbreviations" ], "rule_descriptions": { "remove_random_consonant": "Remove a random consonant", "shorten_name_to_abbreviations": "Abbreviate name parts" }, "percentage": 15 } } }, { "seed_identities_with_labels": [ { "name": "denise pratt (latin)", "dob": "1957-03-23", "address": "Antarctica (the territory South of 60 deg S)", "label": "negative", "script": "latin" }, { "name": "Rida Gull (latin)", "dob": "1981-12-25", "address": "Pakistan", "label": "positive", "script": "latin" }, { "name": "ryan hall (latin)", "dob": "1963-10-05", "address": "British Virgin Islands", "label": "High Risk", "script": "latin" }, { "name": "Леонид Пасечник (cyrillic)", "dob": "1970-3-15", "address": "Ukraine", "label": "positive", "script": "cyrillic" }, { "name": "Bushra Shawkat (latin)", "dob": "1960-10-24", "address": "United Arab Emirates", "label": "positive", "script": "latin" }, { "name": "لورا آل بن ظافر (arabic)", "dob": "1988-02-02", "address": "Palestinian Territory", "label": "negative", "script": "arabic" }, { "name": "Aleksandr Sarkisyan (latin)", "dob": "1946-8-17", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "Xutong Qin (latin)", "dob": "1994-4-29", "address": "China", "label": "positive", "script": "latin" }, { "name": "brandon hendricks (latin)", "dob": "1992-02-25", "address": "Tajikistan", "label": "negative", "script": "latin" }, { "name": "ema recio (latin)", "dob": "1929-02-18", "address": "Cuba", "label": "High Risk", "script": "latin" }, { "name": "يعرب جعفر (arabic)", "dob": "1929-08-23", "address": "Libya", "label": "High Risk", "script": "arabic" }, { "name": "anthony roberts (latin)", "dob": "1993-12-03", "address": "Albania", "label": "negative", "script": "latin" }, { "name": "david smith (latin)", "dob": "1970-09-14", "address": "Belize", "label": "negative", "script": "latin" }, { "name": "jean bègue (latin)", "dob": "1960-06-04", "address": "Central African Republic", "label": "High Risk", "script": "latin" }, { "name": "sylvie ruiz (latin)", "dob": "1926-02-27", "address": "Burkina Faso", "label": "High Risk", "script": "latin" } ], "query_template": "Generate exactly 12 execution vectors for each target identity {name}, ensuring phonetic similarity with the following distributions: \n70% of variations exhibit Light phonetic similarity, while 30% exhibit Medium phonetic similarity. \nSimilarly, ensure orthographic similarity with the following distributions: \n10% of variations exhibit Light orthographic similarity, \n30% of variations exhibit Medium orthographic similarity, and \n60% of variations exhibit Far orthographic similarity. \nApproximately 13% of the total 12 variations should follow these rule-based transformations: \nAdditionally, generate variations that perform these transformations: Use first name initial with last name, Remove a random consonant, and Remove a random vowel. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 12, "phonetic_similarity": { "Light": 0.7, "Medium": 0.3 }, "orthographic_similarity": { "Light": 0.1, "Medium": 0.3, "Far": 0.6 }, "rule_based": { "rule_percentage": 13, "selected_rules": [ "initial_only_first_name", "remove_random_consonant", "remove_random_vowel" ], "rule_descriptions": { "initial_only_first_name": "Use first name initial with last name", "remove_random_consonant": "Remove a random consonant", "remove_random_vowel": "Remove a random vowel" }, "percentage": 13 } } }, { "seed_identities_with_labels": [ { "name": "lucie collin (latin)", "dob": "1982-05-02", "address": "Bangladesh", "label": "negative", "script": "latin" }, { "name": "Alam Zaib (latin)", "dob": "1974-1-1", "address": "Pakistan", "label": "positive", "script": "latin" }, { "name": "luna vieira (latin)", "dob": "2002-11-20", "address": "Mozambique", "label": "High Risk", "script": "latin" }, { "name": "фичо куртакова (cyrillic)", "dob": "1958-05-16", "address": "Bulgaria", "label": "High Risk", "script": "cyrillic" }, { "name": "madeleine rivière (latin)", "dob": "1979-09-08", "address": "Soudan", "label": "negative", "script": "latin" }, { "name": "aimé rey (latin)", "dob": "1970-09-03", "address": "Cameroon", "label": "High Risk", "script": "latin" }, { "name": "Viktoriya Savruk (latin)", "dob": "1980-2-12", "address": "Belarus", "label": "positive", "script": "latin" }, { "name": "لوجين المهيدب (arabic)", "dob": "2006-08-21", "address": "Falkland Islands (Malvinas)", "label": "negative", "script": "arabic" }, { "name": "andrée fernandez (latin)", "dob": "1984-10-09", "address": "Côte d'Ivoire", "label": "High Risk", "script": "latin" }, { "name": "martine guillon (latin)", "dob": "1937-12-21", "address": "États-Unis", "label": "negative", "script": "latin" }, { "name": "Надежда Лашкарёва (cyrillic)", "dob": "1961-11-8", "address": "Ukraine", "label": "positive", "script": "cyrillic" }, { "name": "Ahmet DURI (latin)", "dob": "1987-1-12", "address": "Turkey", "label": "positive", "script": "latin" }, { "name": "marcelle loiseau (latin)", "dob": "1965-07-06", "address": "Burkina Faso", "label": "High Risk", "script": "latin" }, { "name": "andré martinez (latin)", "dob": "1947-03-19", "address": "Laos", "label": "negative", "script": "latin" }, { "name": "Andrei Troshev (latin)", "dob": "1953-4-5", "address": "Russia", "label": "positive", "script": "latin" } ], "query_template": "The following name is the seed name to generate variations for: {name}. Generate 10 variations of the name {name}, ensuring phonetic similarity: 50% Light, 50% Medium, and orthographic similarity: 50% Light, 50% Medium, and also include 18% of variations that follow: Additionally, generate variations that perform these transformations: Swap adjacent syllables, and Abbreviate name parts. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 10, "phonetic_similarity": { "Light": 0.5, "Medium": 0.5 }, "orthographic_similarity": { "Light": 0.5, "Medium": 0.5 }, "rule_based": { "rule_percentage": 18, "selected_rules": [ "swap_adjacent_syllables", "shorten_name_to_abbreviations" ], "rule_descriptions": { "swap_adjacent_syllables": "Swap adjacent syllables", "shorten_name_to_abbreviations": "Abbreviate name parts" }, "percentage": 18 } } }, { "seed_identities_with_labels": [ { "name": "وحيد صليبا (arabic)", "dob": "1949-07-14", "address": "El Salvador", "label": "negative", "script": "arabic" }, { "name": "inmaculada grande (latin)", "dob": "1950-07-10", "address": "Nicaragua", "label": "High Risk", "script": "latin" }, { "name": "espiridión antón (latin)", "dob": "1998-03-06", "address": "Bolivia", "label": "High Risk", "script": "latin" }, { "name": "emigdio cardona (latin)", "dob": "1956-05-25", "address": "Francia", "label": "negative", "script": "latin" }, { "name": "adolfo moreno (latin)", "dob": "1966-10-18", "address": "Malasia", "label": "negative", "script": "latin" }, { "name": "monique norton (latin)", "dob": "2002-01-01", "address": "Kenya", "label": "High Risk", "script": "latin" }, { "name": "narcisa calleja (latin)", "dob": "1926-02-09", "address": "Eslovenia", "label": "negative", "script": "latin" }, { "name": "Aleksandr Zhivlyuk (latin)", "dob": "1981-1-13", "address": "Belarus", "label": "positive", "script": "latin" }, { "name": "Song Jong (latin)", "dob": "1972-11-15", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "Mohammed Matnee (latin)", "dob": "1983-4-15", "address": "Iraq", "label": "positive", "script": "latin" }, { "name": "noa baños (latin)", "dob": "1954-01-08", "address": "Benin", "label": "negative", "script": "latin" }, { "name": "علي نشار (arabic)", "dob": "1967-7-30", "address": "Lebanon", "label": "positive", "script": "arabic" }, { "name": "astrid imbert (latin)", "dob": "1939-06-14", "address": "Haiti", "label": "High Risk", "script": "latin" }, { "name": "Sergei Savchenkov (latin)", "dob": "1954-10-1", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "венедикт рябова (cyrillic)", "dob": "1963-11-07", "address": "Donetsk", "label": "High Risk", "script": "cyrillic" } ], "query_template": "The following name is the seed name to generate variations for: {name}. Generate 11 variations of the name {name}, ensuring phonetic similarity: 10% Light, 50% Medium, 40% Far, and orthographic similarity: 50% Light, 50% Medium, and also include 26% of variations that follow: Additionally, generate variations that: Swap adjacent consonants. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 11, "phonetic_similarity": { "Light": 0.1, "Medium": 0.5, "Far": 0.4 }, "orthographic_similarity": { "Light": 0.5, "Medium": 0.5 }, "rule_based": { "rule_percentage": 26, "selected_rules": [ "swap_adjacent_consonants" ], "rule_descriptions": { "swap_adjacent_consonants": "Swap adjacent consonants" }, "percentage": 26 } } }, { "seed_identities_with_labels": [ { "name": "Aleksey MOZGOVOY (latin)", "dob": "1975-4-3", "address": "Ukraine", "label": "positive", "script": "latin" }, { "name": "елисей степанов (cyrillic)", "dob": "1952-06-05", "address": "Saint Kitts and Nevis", "label": "negative", "script": "cyrillic" }, { "name": "Mohammad Raad (latin)", "dob": "1955-8-28", "address": "Lebanon", "label": "positive", "script": "latin" }, { "name": "mark williams (latin)", "dob": "2000-08-14", "address": "Turkey", "label": "negative", "script": "latin" }, { "name": "matthew rollins (latin)", "dob": "1963-10-21", "address": "Barbados", "label": "negative", "script": "latin" }, { "name": "хвойне белоконска-вражалска (cyrillic)", "dob": "1952-06-30", "address": "Bulgaria", "label": "High Risk", "script": "cyrillic" }, { "name": "wyatt johnson (latin)", "dob": "1937-12-03", "address": "Western Sahara", "label": "negative", "script": "latin" }, { "name": "matthew jones (latin)", "dob": "1984-03-24", "address": "South Africa", "label": "High Risk", "script": "latin" }, { "name": "Asadollah Seify (latin)", "dob": "1965-4-4", "address": "Iran", "label": "positive", "script": "latin" }, { "name": "debra clark (latin)", "dob": "1964-03-29", "address": "Namibia", "label": "High Risk", "script": "latin" }, { "name": "RASOUL JALILI (latin)", "dob": "1961-8-19", "address": "Iran", "label": "positive", "script": "latin" }, { "name": "学礼 江 (chinese)", "dob": "1972-5-22", "address": "Hong Kong", "label": "positive", "script": "chinese" }, { "name": "cynthia pierce (latin)", "dob": "1969-09-28", "address": "South Africa", "label": "High Risk", "script": "latin" }, { "name": "colin ferrell (latin)", "dob": "1973-09-11", "address": "Saint Vincent and the Grenadines", "label": "negative", "script": "latin" }, { "name": "isabelle turpin (latin)", "dob": "1968-01-21", "address": "Monaco", "label": "High Risk", "script": "latin" } ], "query_template": "Generate exactly 12 execution vectors for {name}, ensuring phonetic similarity (30% Light, 40% Medium, 30% Far) by using sounds-alike names. For orthographic similarity (30% Light, 40% Medium, 30% Far), generate visually similar spellings. Approximately 16% of the total variations should follow these rule-based transformations: Additionally, generate variations that perform these transformations: Swap random adjacent letters, and Duplicate a random letter. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 12, "phonetic_similarity": { "Light": 0.3, "Medium": 0.4, "Far": 0.3 }, "orthographic_similarity": { "Light": 0.3, "Medium": 0.4, "Far": 0.3 }, "rule_based": { "rule_percentage": 16, "selected_rules": [ "swap_random_letter", "duplicate_random_letter_as_double_letter" ], "rule_descriptions": { "swap_random_letter": "Swap random adjacent letters", "duplicate_random_letter_as_double_letter": "Duplicate a random letter" }, "percentage": 16 } } }, { "seed_identities_with_labels": [ { "name": "thomas caldwell (latin)", "dob": "1984-01-20", "address": "Cote d'Ivoire", "label": "negative", "script": "latin" }, { "name": "алла макаренко (cyrillic)", "dob": "1979-05-31", "address": "Brazil", "label": "negative", "script": "cyrillic" }, { "name": "andrew garrett (latin)", "dob": "1977-09-18", "address": "Kenya", "label": "High Risk", "script": "latin" }, { "name": "Татьяна Томилина (cyrillic)", "dob": "1966-4-18", "address": "Ukraine", "label": "positive", "script": "cyrillic" }, { "name": "kristen gonzales (latin)", "dob": "1960-11-16", "address": "Slovakia (Slovak Republic)", "label": "negative", "script": "latin" }, { "name": "leslie boyd (latin)", "dob": "1928-08-24", "address": "Congo", "label": "negative", "script": "latin" }, { "name": "amy wilson (latin)", "dob": "1934-05-08", "address": "Mayotte", "label": "negative", "script": "latin" }, { "name": "éric marchand (latin)", "dob": "1939-11-24", "address": "Mali", "label": "High Risk", "script": "latin" }, { "name": "Yuri Afonin (latin)", "dob": "1977-3-22", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "justin robinson (latin)", "dob": "1933-03-23", "address": "South Africa", "label": "High Risk", "script": "latin" }, { "name": "Airat Gibatdinov (latin)", "dob": "1986-1-16", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "Ramezan Oladi (latin)", "dob": "1963-5-29", "address": "Iran", "label": "positive", "script": "latin" }, { "name": "timothy anderson (latin)", "dob": "2004-05-02", "address": "Kenya", "label": "High Risk", "script": "latin" }, { "name": "Aleksey Budnev (latin)", "dob": "1964-7-9", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "джана площаков (cyrillic)", "dob": "1990-03-30", "address": "Bulgaria", "label": "High Risk", "script": "cyrillic" } ], "query_template": "Generate 12 variations of {name} ensuring phonetic similarity (50% Light, 50% Medium) and orthographic similarity (20% Light, 60% Medium, 20% Far). Approximately 28% of the total 12 variations should follow these rule-based transformations: Additionally, generate variations that perform these transformations: Duplicate a random letter, Use first name initial with last name, and Convert name to initials. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 12, "phonetic_similarity": { "Light": 0.5, "Medium": 0.5 }, "orthographic_similarity": { "Light": 0.2, "Medium": 0.6, "Far": 0.2 }, "rule_based": { "rule_percentage": 28, "selected_rules": [ "duplicate_random_letter_as_double_letter", "initial_only_first_name", "shorten_name_to_initials" ], "rule_descriptions": { "duplicate_random_letter_as_double_letter": "Duplicate a random letter", "initial_only_first_name": "Use first name initial with last name", "shorten_name_to_initials": "Convert name to initials" }, "percentage": 28 } } }, { "seed_identities_with_labels": [ { "name": "Ahmad Shafa'i (latin)", "dob": "1968-5-21", "address": "Iran", "label": "positive", "script": "latin" }, { "name": "Геннадий Казакевич (cyrillic)", "dob": "1975-2-14", "address": "Belarus", "label": "positive", "script": "cyrillic" }, { "name": "marcelle morel (latin)", "dob": "1987-09-27", "address": "Autriche", "label": "negative", "script": "latin" }, { "name": "william gallet (latin)", "dob": "1940-06-30", "address": "Sierra Leone", "label": "negative", "script": "latin" }, { "name": "روفيدا الحكير (arabic)", "dob": "1935-05-16", "address": "Estonia", "label": "negative", "script": "arabic" }, { "name": "paul rivière (latin)", "dob": "1935-08-29", "address": "Ouganda", "label": "negative", "script": "latin" }, { "name": "Lucy Miller (latin)", "dob": "1973-7-2", "address": "Thailand", "label": "positive", "script": "latin" }, { "name": "Nikolay Burlyayev (latin)", "dob": "1946-8-3", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "césar pereira (latin)", "dob": "1998-07-14", "address": "Mozambique", "label": "High Risk", "script": "latin" }, { "name": "alain royer (latin)", "dob": "1954-10-31", "address": "Cameroon", "label": "High Risk", "script": "latin" }, { "name": "angelica woods (latin)", "dob": "2006-12-27", "address": "Kenya", "label": "High Risk", "script": "latin" }, { "name": "доброслав данилов (cyrillic)", "dob": "1946-03-28", "address": "Donetsk", "label": "High Risk", "script": "cyrillic" }, { "name": "Dzmitryri Braim (latin)", "dob": "1976-4-18", "address": "Belarus", "label": "positive", "script": "latin" }, { "name": "anastasie allard (latin)", "dob": "1973-10-30", "address": "Maldives (Îles)", "label": "negative", "script": "latin" }, { "name": "corinne de sousa (latin)", "dob": "1932-10-05", "address": "Côte d'Ivoire", "label": "High Risk", "script": "latin" } ], "query_template": "Generate 15 execution vectors for {name}, ensuring phonetic similarity (30% Light variations such as \"Simeon\", 40% Medium variations like \"Simen\" or \"Seeman\", 30% Far variations including \"Shymon\") and orthographic similarity (50% Light variations with minimal changes to the original spelling, 50% Medium variations with slight modifications to the original spelling). Approximately 57% of the total 15 variations should follow these rule-based transformations: Additionally, generate variations that: Swap adjacent consonants. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 15, "phonetic_similarity": { "Light": 0.3, "Medium": 0.4, "Far": 0.3 }, "orthographic_similarity": { "Light": 0.5, "Medium": 0.5 }, "rule_based": { "rule_percentage": 57, "selected_rules": [ "swap_adjacent_consonants" ], "rule_descriptions": { "swap_adjacent_consonants": "Swap adjacent consonants" }, "percentage": 57 } } }, { "seed_identities_with_labels": [ { "name": "Ekaterina Krivoruchko (latin)", "dob": "1986-3-26", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "pauline meunier (latin)", "dob": "2000-11-27", "address": "Haïti", "label": "negative", "script": "latin" }, { "name": "susan perrot (latin)", "dob": "1979-08-05", "address": "Vierges (Îles)", "label": "negative", "script": "latin" }, { "name": "andrée pages (latin)", "dob": "1934-09-15", "address": "Cocos (Îles)", "label": "negative", "script": "latin" }, { "name": "marianne guyon (latin)", "dob": "1930-08-03", "address": "Pakistan", "label": "negative", "script": "latin" }, { "name": "صبور عنس (arabic)", "dob": "2006-07-18", "address": "Anguilla", "label": "negative", "script": "arabic" }, { "name": "Harun Nasir-Al-Din (latin)", "dob": "1970-6-5", "address": "Turkey", "label": "positive", "script": "latin" }, { "name": "Aleksandr Akimov (latin)", "dob": "1954-11-10", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "fábio pinho (latin)", "dob": "1942-02-17", "address": "Mozambique", "label": "High Risk", "script": "latin" }, { "name": "anne faure (latin)", "dob": "1967-04-20", "address": "Monaco", "label": "High Risk", "script": "latin" }, { "name": "aimé richard (latin)", "dob": "1966-01-12", "address": "Democratic Republic of Congo", "label": "High Risk", "script": "latin" }, { "name": "irina monteiro (latin)", "dob": "1971-06-14", "address": "Mozambique", "label": "High Risk", "script": "latin" }, { "name": "Алексей Орлов (cyrillic)", "dob": "1961-10-9", "address": "Russia", "label": "positive", "script": "cyrillic" }, { "name": "агап орлов (cyrillic)", "dob": "1993-04-11", "address": "Crimea", "label": "High Risk", "script": "cyrillic" }, { "name": "Andrey Klishas (latin)", "dob": "1972-11-9", "address": "Russia", "label": "positive", "script": "latin" } ], "query_template": "The following name is the seed name to generate variations for: {name}. Generate 7 variations of the name {name}, ensuring phonetic similarity: 30% Light, 40% Medium, 30% Far, and orthographic similarity: 50% Light, 50% Medium, and also include 28% of variations that follow: Additionally, generate variations that perform these transformations: Remove a random vowel, and Add a title suffix (Jr., PhD, etc.). The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 7, "phonetic_similarity": { "Light": 0.3, "Medium": 0.4, "Far": 0.3 }, "orthographic_similarity": { "Light": 0.5, "Medium": 0.5 }, "rule_based": { "rule_percentage": 28, "selected_rules": [ "remove_random_vowel", "add_random_trailing_title" ], "rule_descriptions": { "remove_random_vowel": "Remove a random vowel", "add_random_trailing_title": "Add a title suffix (Jr., PhD, etc.)" }, "percentage": 28 } } }, { "seed_identities_with_labels": [ { "name": "Віктор Емеляненко (cyrillic)", "dob": "1953-10-11", "address": "Ukraine", "label": "positive", "script": "cyrillic" }, { "name": "amor sierra (latin)", "dob": "1942-05-05", "address": "Egipto", "label": "negative", "script": "latin" }, { "name": "alberto pelayo (latin)", "dob": "1984-09-03", "address": "República Dominicana", "label": "negative", "script": "latin" }, { "name": "rosendo llanos (latin)", "dob": "1949-01-20", "address": "Líbano", "label": "negative", "script": "latin" }, { "name": "margot moulin (latin)", "dob": "1953-12-28", "address": "Burkina Faso", "label": "High Risk", "script": "latin" }, { "name": "Mykola Vorobei (latin)", "dob": "1963-5-4", "address": "Belarus", "label": "positive", "script": "latin" }, { "name": "rosa maría esparza (latin)", "dob": "1936-12-30", "address": "Ucrania", "label": "negative", "script": "latin" }, { "name": "Hosseyn Akbari (latin)", "dob": "1961-6-17", "address": "Iran", "label": "positive", "script": "latin" }, { "name": "امیررضا تنزیلی (arabic)", "dob": "1996-10-06", "address": "Turkey", "label": "negative", "script": "arabic" }, { "name": "حسيب همدان (arabic)", "dob": "1940-10-17", "address": "Yemen", "label": "High Risk", "script": "arabic" }, { "name": "reyna cid (latin)", "dob": "1994-03-12", "address": "Cuba", "label": "High Risk", "script": "latin" }, { "name": "laetitia perret (latin)", "dob": "1972-07-05", "address": "Mali", "label": "High Risk", "script": "latin" }, { "name": "marie michaud (latin)", "dob": "2006-10-12", "address": "Mali", "label": "High Risk", "script": "latin" }, { "name": "Alinkhan Tokhtakhunov (latin)", "dob": "1949-12-31", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "Airat Gibatdinov (latin)", "dob": "1986-1-16", "address": "Russia", "label": "positive", "script": "latin" } ], "query_template": "Generate 13 name variations of {name}, ensuring phonetic similarity (30% Light: {name} with Light modifications, 40% Medium: {name} with Medium modifications, 30% Far: {name} with Far modifications) and orthographic similarity (100% Far: {name} with Far spelling modifications). Approximately 16% of the total 13 variations should follow these rule-based transformations: Additionally, generate variations that: Swap random adjacent letters in {name}. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 13, "phonetic_similarity": { "Light": 0.3, "Medium": 0.4, "Far": 0.3 }, "orthographic_similarity": { "Far": 1.0 }, "rule_based": { "rule_percentage": 16, "selected_rules": [ "swap_random_letter" ], "rule_descriptions": { "swap_random_letter": "Swap random adjacent letters" }, "percentage": 16 } } }, { "seed_identities_with_labels": [ { "name": "arnaude martins (latin)", "dob": "1968-07-20", "address": "Nouvelle Calédonie", "label": "negative", "script": "latin" }, { "name": "عرفان تنوخ (arabic)", "dob": "1951-09-09", "address": "Iraq", "label": "High Risk", "script": "arabic" }, { "name": "william thibault (latin)", "dob": "1947-11-17", "address": "Libye", "label": "negative", "script": "latin" }, { "name": "Ratka Kamceva (latin)", "dob": "1945-10-16", "address": "North Macedonia, The Republic of", "label": "positive", "script": "latin" }, { "name": "Andrei Troshev (latin)", "dob": "1953-4-5", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "luciana teixeira (latin)", "dob": "2001-06-06", "address": "Angola", "label": "High Risk", "script": "latin" }, { "name": "Nikolai GAICHUK (latin)", "dob": "1973-5-31", "address": "Belarus", "label": "positive", "script": "latin" }, { "name": "بوران عكاوي (arabic)", "dob": "2003-04-10", "address": "Ethiopia", "label": "negative", "script": "arabic" }, { "name": "Светлана Емельянова (cyrillic)", "dob": "1971-10-7", "address": "Russia", "label": "positive", "script": "cyrillic" }, { "name": "thibault de sousa (latin)", "dob": "2004-12-07", "address": "Nouvelle-Zélande", "label": "negative", "script": "latin" }, { "name": "ângelo neves (latin)", "dob": "2006-03-29", "address": "Angola", "label": "High Risk", "script": "latin" }, { "name": "marthe lecomte (latin)", "dob": "2006-06-05", "address": "Martinique", "label": "negative", "script": "latin" }, { "name": "hortense payet (latin)", "dob": "2001-07-05", "address": "Burkina Faso", "label": "High Risk", "script": "latin" }, { "name": "samuel pacheco (latin)", "dob": "1980-11-08", "address": "Angola", "label": "High Risk", "script": "latin" }, { "name": "Andrei Khokhlun (latin)", "dob": "1966-12-4", "address": "Russia", "label": "positive", "script": "latin" } ], "query_template": "Generate exactly 9 execution vectors for {name}, ensuring phonetic similarity (30% Light variations of the form \"{name}ly\" or \"{name}ie\", 40% Medium variations of the form \"{name}ski\" or \"{name}cz\", and 30% Far variations of the form \"Z{name}\" or \"{name}tron\"), and orthographic similarity (100% Light variations of the form \"{name}y\" or \"{name}i\"). Approximately 46% of the total 9 variations should follow these rule-based transformations: Additionally, generate variations that perform these transformations: Abbreviate name parts to \"{nameabbr}\", and Replace random consonants with different consonants in \"{nameconsonant replaced}\". The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 9, "phonetic_similarity": { "Light": 0.3, "Medium": 0.4, "Far": 0.3 }, "orthographic_similarity": { "Light": 1.0 }, "rule_based": { "rule_percentage": 46, "selected_rules": [ "shorten_name_to_abbreviations", "replace_random_consonant_with_random_consonant" ], "rule_descriptions": { "shorten_name_to_abbreviations": "Abbreviate name parts", "replace_random_consonant_with_random_consonant": "Replace random consonants with different consonants" }, "percentage": 46 } } }, { "seed_identities_with_labels": [ { "name": "raymond bazin (latin)", "dob": "1935-10-06", "address": "Monaco", "label": "High Risk", "script": "latin" }, { "name": "Abdulwahhab AL-HUMAIKANI (latin)", "dob": "1972-8-4", "address": "Yemen", "label": "positive", "script": "latin" }, { "name": "chita urrutia (latin)", "dob": "2002-08-16", "address": "Luxemburgo", "label": "negative", "script": "latin" }, { "name": "reinaldo catalá (latin)", "dob": "1986-04-24", "address": "Saint Kitts y Nevis", "label": "negative", "script": "latin" }, { "name": "nicole pruvost (latin)", "dob": "1977-04-06", "address": "Monaco", "label": "High Risk", "script": "latin" }, { "name": "maurice charpentier (latin)", "dob": "1957-04-01", "address": "Cameroon", "label": "High Risk", "script": "latin" }, { "name": "Xutong Qin (latin)", "dob": "1994-4-29", "address": "China", "label": "positive", "script": "latin" }, { "name": "Mahdy Helbawi (latin)", "dob": "1987-10-30", "address": "Colombia", "label": "positive", "script": "latin" }, { "name": "maite corbacho (latin)", "dob": "1959-01-25", "address": "Chad", "label": "negative", "script": "latin" }, { "name": "Віталій Булюк (cyrillic)", "dob": "1969-12-21", "address": "Ukraine", "label": "positive", "script": "cyrillic" }, { "name": "Aleksandr Ganov (latin)", "dob": "1974-10-24", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "гедеон суворов (cyrillic)", "dob": "1989-09-10", "address": "Belarus", "label": "High Risk", "script": "cyrillic" }, { "name": "адам крылова (cyrillic)", "dob": "1958-12-30", "address": "Saint Kitts and Nevis", "label": "negative", "script": "cyrillic" }, { "name": "olegario campoy (latin)", "dob": "2002-12-14", "address": "Islas Marshall", "label": "negative", "script": "latin" }, { "name": "nuria botella (latin)", "dob": "1948-01-20", "address": "Cuba", "label": "High Risk", "script": "latin" } ], "query_template": "Generate 8 variations of {name}, ensuring phonetic similarity (10% Light, 30% Medium, 60% Far) and orthographic similarity (100% Far). Approximately 55% of the total 8 variations should follow these rule-based transformations: Additionally, generate variations that: Replace random consonants with different consonants. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 8, "phonetic_similarity": { "Light": 0.1, "Medium": 0.3, "Far": 0.6 }, "orthographic_similarity": { "Far": 1.0 }, "rule_based": { "rule_percentage": 55, "selected_rules": [ "replace_random_consonant_with_random_consonant" ], "rule_descriptions": { "replace_random_consonant_with_random_consonant": "Replace random consonants with different consonants" }, "percentage": 55 } } }, { "seed_identities_with_labels": [ { "name": "james johnson (latin)", "dob": "1948-07-03", "address": "Namibia", "label": "High Risk", "script": "latin" }, { "name": "renata roman (latin)", "dob": "1990-10-04", "address": "Venezuela", "label": "High Risk", "script": "latin" }, { "name": "véronique renard (latin)", "dob": "1961-02-13", "address": "Burkina Faso", "label": "High Risk", "script": "latin" }, { "name": "Олег Герасим (cyrillic)", "dob": "1962-3-3", "address": "Belarus", "label": "positive", "script": "cyrillic" }, { "name": "rómulo prats (latin)", "dob": "1939-04-27", "address": "Cuba", "label": "High Risk", "script": "latin" }, { "name": "деспот овнарски (cyrillic)", "dob": "1984-09-10", "address": "Saint Kitts and Nevis", "label": "negative", "script": "cyrillic" }, { "name": "scott jordan (latin)", "dob": "1931-05-20", "address": "Mexico", "label": "negative", "script": "latin" }, { "name": "Aleksandr Akimov (latin)", "dob": "1954-11-10", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "анатолий бобров (cyrillic)", "dob": "1942-09-02", "address": "Belarus", "label": "High Risk", "script": "cyrillic" }, { "name": "emma riley (latin)", "dob": "1958-10-06", "address": "Faroe Islands", "label": "negative", "script": "latin" }, { "name": "david anderson (latin)", "dob": "1987-08-30", "address": "Cote d'Ivoire", "label": "negative", "script": "latin" }, { "name": "Aegli Tamani-Phella (latin)", "dob": "1978-6-21", "address": "Cyprus", "label": "positive", "script": "latin" }, { "name": "Abdelkader LAAGOUB (latin)", "dob": "1966-4-23", "address": "Italy", "label": "positive", "script": "latin" }, { "name": "Anatoliy Lappo (latin)", "dob": "1963-5-24", "address": "Belarus", "label": "positive", "script": "latin" }, { "name": "tricia stein (latin)", "dob": "1959-07-21", "address": "Sweden", "label": "negative", "script": "latin" } ], "query_template": "Generate 11 name variations for {name}, ensuring phonetic similarity (50% Light, e.g. \"Jhon\", \"Jon\", etc., 50% Medium, e.g. \"Jonah\", \"Jan\") and orthographic similarity (30% Light, e.g. \"Johny\", \"Jonny\", etc., 40% Medium, e.g. \"Johnny\", \"Jonn\", etc., 30% Far, e.g. \"Jonothan\", \"Jonne\"). Approximately 32% of the total 11 variations should follow these rule-based transformations: Replace random consonants with different consonants (e.g. \"John\" becomes \"Jorn\"), and Swap random adjacent letters (e.g. \"John\" becomes \"Honj\"). The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 11, "phonetic_similarity": { "Light": 0.5, "Medium": 0.5 }, "orthographic_similarity": { "Light": 0.3, "Medium": 0.4, "Far": 0.3 }, "rule_based": { "rule_percentage": 32, "selected_rules": [ "replace_random_consonant_with_random_consonant", "swap_random_letter" ], "rule_descriptions": { "replace_random_consonant_with_random_consonant": "Replace random consonants with different consonants", "swap_random_letter": "Swap random adjacent letters" }, "percentage": 32 } } }, { "seed_identities_with_labels": [ { "name": "alix rey (latin)", "dob": "1944-01-07", "address": "Mariannes du Nord (Îles)", "label": "negative", "script": "latin" }, { "name": "Riyadh al-Azzawi (latin)", "dob": "1976-6-7", "address": "Iraq", "label": "positive", "script": "latin" }, { "name": "jacques grégoire (latin)", "dob": "1936-01-22", "address": "Sahara Occidental", "label": "negative", "script": "latin" }, { "name": "еремей фомичев (cyrillic)", "dob": "1950-02-27", "address": "Russia", "label": "High Risk", "script": "cyrillic" }, { "name": "lucie da silva (latin)", "dob": "1936-04-23", "address": "Monaco", "label": "High Risk", "script": "latin" }, { "name": "adrienne chevallier (latin)", "dob": "1958-01-06", "address": "Marshall (Îles)", "label": "negative", "script": "latin" }, { "name": "Vildan Zinnurov (latin)", "dob": "1964-1-30", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "che mesa (latin)", "dob": "1959-12-23", "address": "Venezuela", "label": "High Risk", "script": "latin" }, { "name": "guy hoarau (latin)", "dob": "1955-06-23", "address": "Chine (Rép. pop.)", "label": "negative", "script": "latin" }, { "name": "ангелина медведева (cyrillic)", "dob": "1997-04-20", "address": "Christmas Island", "label": "negative", "script": "cyrillic" }, { "name": "lucas lesage (latin)", "dob": "1925-05-14", "address": "Cameroon", "label": "High Risk", "script": "latin" }, { "name": "‫فرزین‬ مزلقانچای (arabic)", "dob": "1992-12-7", "address": "Iran", "label": "positive", "script": "arabic" }, { "name": "Hafiz SAEED (latin)", "dob": "1950-6-5", "address": "Pakistan", "label": "positive", "script": "latin" }, { "name": "paige waller (latin)", "dob": "1948-06-11", "address": "Nigeria", "label": "High Risk", "script": "latin" }, { "name": "Dmitry Pirog (latin)", "dob": "1980-6-27", "address": "Russia", "label": "positive", "script": "latin" } ], "query_template": "Generate 14 variations of {name} ensuring phonetic similarity (100% Medium) and orthographic similarity (30% Light, 40% Medium, 30% Far). Approximately 20% of the total 14 variations should follow these rule-based transformations: Add a title suffix (Jr., PhD, etc.). The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 14, "phonetic_similarity": { "Medium": 1.0 }, "orthographic_similarity": { "Light": 0.3, "Medium": 0.4, "Far": 0.3 }, "rule_based": { "rule_percentage": 20, "selected_rules": [ "add_random_trailing_title" ], "rule_descriptions": { "add_random_trailing_title": "Add a title suffix (Jr., PhD, etc.)" }, "percentage": 20 } } }, { "seed_identities_with_labels": [ { "name": "Rajabi Al-Zahir (latin)", "dob": "1969-7-5", "address": "United Arab Emirates", "label": "positive", "script": "latin" }, { "name": "andrea williams (latin)", "dob": "1979-11-18", "address": "Palestinian Territory", "label": "negative", "script": "latin" }, { "name": "lucas cohen (latin)", "dob": "1967-04-26", "address": "Mali", "label": "High Risk", "script": "latin" }, { "name": "leonor vargas (latin)", "dob": "1974-12-14", "address": "Venezuela", "label": "High Risk", "script": "latin" }, { "name": "karen walker (latin)", "dob": "1994-12-01", "address": "Ukraine", "label": "negative", "script": "latin" }, { "name": "Сергей Аземша (cyrillic)", "dob": "1974-7-17", "address": "Belarus", "label": "positive", "script": "cyrillic" }, { "name": "Yuri Shvytkin (latin)", "dob": "1965-5-24", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "веца чанлиева (cyrillic)", "dob": "2005-05-15", "address": "Bulgaria", "label": "High Risk", "script": "cyrillic" }, { "name": "mark berry (latin)", "dob": "1980-10-06", "address": "Ireland", "label": "negative", "script": "latin" }, { "name": "stephanie carter (latin)", "dob": "1927-10-22", "address": "Namibia", "label": "High Risk", "script": "latin" }, { "name": "andrew gardner (latin)", "dob": "1936-03-01", "address": "Montenegro", "label": "negative", "script": "latin" }, { "name": "شدّاد ابو الحاج (arabic)", "dob": "1965-12-18", "address": "Papua New Guinea", "label": "negative", "script": "arabic" }, { "name": "éric loiseau (latin)", "dob": "1959-10-11", "address": "Democratic Republic of Congo", "label": "High Risk", "script": "latin" }, { "name": "Mu Xiaolu (latin)", "dob": "1990-3-8", "address": "China", "label": "positive", "script": "latin" }, { "name": "Saad AL-FAGIH (latin)", "dob": "1957-1-31", "address": "United Kingdom", "label": "positive", "script": "latin" } ], "query_template": "Generate 10 name variations for {name} ensuring phonetic similarity (70% Light, 30% Medium) and orthographic similarity (10% Light, 50% Medium, 40% Far). Approximately 36% of the total 10 name variations should follow these rule-based transformations: Additionally, generate variations that: Replace spaces with special characters. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 10, "phonetic_similarity": { "Light": 0.7, "Medium": 0.3 }, "orthographic_similarity": { "Light": 0.1, "Medium": 0.5, "Far": 0.4 }, "rule_based": { "rule_percentage": 36, "selected_rules": [ "replace_spaces_with_random_special_characters" ], "rule_descriptions": { "replace_spaces_with_random_special_characters": "Replace spaces with special characters" }, "percentage": 36 } } }, { "seed_identities_with_labels": [ { "name": "paul carlier (latin)", "dob": "1985-04-20", "address": "Côte d'Ivoire", "label": "High Risk", "script": "latin" }, { "name": "joaquina tomás (latin)", "dob": "1927-07-04", "address": "República Unida de Tanzanía", "label": "negative", "script": "latin" }, { "name": "emigdio mora (latin)", "dob": "1940-06-15", "address": "Eslovaquia", "label": "negative", "script": "latin" }, { "name": "радим ермаков (cyrillic)", "dob": "1967-11-17", "address": "Luhansk", "label": "High Risk", "script": "cyrillic" }, { "name": "emilio carpio (latin)", "dob": "1952-05-26", "address": "Samoa", "label": "negative", "script": "latin" }, { "name": "Theint Htet (latin)", "dob": "1999-5-21", "address": "Burma", "label": "positive", "script": "latin" }, { "name": "Aleyona Chuguleva (latin)", "dob": "1986-5-14", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "Hosseyn Akbari (latin)", "dob": "1961-6-17", "address": "Iran", "label": "positive", "script": "latin" }, { "name": "sonia tomás (latin)", "dob": "1946-02-01", "address": "Venezuela", "label": "High Risk", "script": "latin" }, { "name": "Володимир Сальдо (cyrillic)", "dob": "1956-6-12", "address": "Ukraine", "label": "positive", "script": "cyrillic" }, { "name": "manu ribera (latin)", "dob": "1967-08-15", "address": "Guinea Bissau", "label": "negative", "script": "latin" }, { "name": "dinis martins (latin)", "dob": "1939-06-21", "address": "Angola", "label": "High Risk", "script": "latin" }, { "name": "Hussam Al-Qatirji (latin)", "dob": "1982-1-11", "address": "Syria", "label": "positive", "script": "latin" }, { "name": "benjamín nieto (latin)", "dob": "1966-02-26", "address": "Nicaragua", "label": "High Risk", "script": "latin" }, { "name": "حنانه جهانی (arabic)", "dob": "1944-05-31", "address": "Cote d'Ivoire", "label": "negative", "script": "arabic" } ], "query_template": "The following name is the seed name to generate variations for: {name}. Generate 6 variations of the name {name}, ensuring phonetic similarity: 20% Light, 60% Medium, 20% Far, and orthographic similarity: 10% Light, 30% Medium, 60% Far, and also include 37% of variations that follow: Additionally, generate variations that perform these transformations: Swap random adjacent letters, Remove a random vowel, and Remove all spaces. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 6, "phonetic_similarity": { "Light": 0.2, "Medium": 0.6, "Far": 0.2 }, "orthographic_similarity": { "Light": 0.1, "Medium": 0.3, "Far": 0.6 }, "rule_based": { "rule_percentage": 37, "selected_rules": [ "swap_random_letter", "remove_random_vowel", "remove_all_spaces" ], "rule_descriptions": { "swap_random_letter": "Swap random adjacent letters", "remove_random_vowel": "Remove a random vowel", "remove_all_spaces": "Remove all spaces" }, "percentage": 37 } } }, { "seed_identities_with_labels": [ { "name": "парфен орехова (cyrillic)", "dob": "1949-03-04", "address": "Donetsk", "label": "High Risk", "script": "cyrillic" }, { "name": "Aping JUNTARAPRAPORN (latin)", "dob": "1955-1-14", "address": "Thailand", "label": "positive", "script": "latin" }, { "name": "marcel chevallier (latin)", "dob": "1953-12-27", "address": "Sainte Hélène", "label": "negative", "script": "latin" }, { "name": "thibaut bertin (latin)", "dob": "1981-08-23", "address": "Saint-Marin (Rép. de)", "label": "negative", "script": "latin" }, { "name": "Muhsin al-Zibin (latin)", "dob": "1973-7-1", "address": "Kuwait", "label": "positive", "script": "latin" }, { "name": "مرتجي عبد اللطيف (arabic)", "dob": "1942-12-03", "address": "Egypt", "label": "negative", "script": "arabic" }, { "name": "brandon wilson (latin)", "dob": "1941-02-01", "address": "Nigeria", "label": "High Risk", "script": "latin" }, { "name": "norberto beltrán (latin)", "dob": "1978-09-05", "address": "Nicaragua", "label": "High Risk", "script": "latin" }, { "name": "emmanuel hoarau (latin)", "dob": "1980-06-19", "address": "Haiti", "label": "High Risk", "script": "latin" }, { "name": "Jianming Cao (latin)", "dob": "1955-9-24", "address": "China", "label": "positive", "script": "latin" }, { "name": "Айсен Николаев (cyrillic)", "dob": "1972-1-22", "address": "Russia", "label": "positive", "script": "cyrillic" }, { "name": "renée gomes (latin)", "dob": "1980-08-10", "address": "Suisse", "label": "negative", "script": "latin" }, { "name": "Naser Neser (latin)", "dob": "1963-4-20", "address": "Lebanon", "label": "positive", "script": "latin" }, { "name": "georges charrier (latin)", "dob": "1980-11-02", "address": "Oman", "label": "negative", "script": "latin" }, { "name": "robin rogers (latin)", "dob": "1961-10-27", "address": "Kenya", "label": "High Risk", "script": "latin" } ], "query_template": "Generate 10 execution vectors for {name} by implementing phonetic similarity with 10% Light variations (\"{name}\" with minor pronunciation modifications), 30% Medium variations (\"{name}-like\" or \"{name}-sound\"), and 60% Far variations (completely different sounds); implement orthographic similarity with 50% Light variations (\"{name}\" with a single character alteration) and 50% Medium variations (\"{name}\" with multiple character alterations). Approximately 19% of the total 10 variations should follow these rule-based transformations: Additionally, generate variations that perform these transformations: Abbreviate name parts to \"{partial-name}\", and Remove all spaces from \"{name}\". The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 10, "phonetic_similarity": { "Light": 0.1, "Medium": 0.3, "Far": 0.6 }, "orthographic_similarity": { "Light": 0.5, "Medium": 0.5 }, "rule_based": { "rule_percentage": 19, "selected_rules": [ "shorten_name_to_abbreviations", "remove_all_spaces" ], "rule_descriptions": { "shorten_name_to_abbreviations": "Abbreviate name parts", "remove_all_spaces": "Remove all spaces" }, "percentage": 19 } } }, { "seed_identities_with_labels": [ { "name": "bernard prévost (latin)", "dob": "1945-02-24", "address": "Cameroon", "label": "High Risk", "script": "latin" }, { "name": "Qari Amjad (latin)", "dob": "1979-4-17", "address": "Afghanistan", "label": "positive", "script": "latin" }, { "name": "Abu-Ahmad Zakkur (latin)", "dob": "1979-1-5", "address": "Turkey", "label": "positive", "script": "latin" }, { "name": "Ruslan Bulatov (latin)", "dob": "1986-4-4", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "andrea dixon (latin)", "dob": "1980-01-27", "address": "Benin", "label": "negative", "script": "latin" }, { "name": "stephanie estes (latin)", "dob": "1951-03-18", "address": "Cayman Islands", "label": "negative", "script": "latin" }, { "name": "william lutz (latin)", "dob": "2002-05-01", "address": "British Virgin Islands", "label": "High Risk", "script": "latin" }, { "name": "سليمان البناى (arabic)", "dob": "1979-2-16", "address": "Qatar", "label": "positive", "script": "arabic" }, { "name": "nicolas breton (latin)", "dob": "1987-11-14", "address": "Central African Republic", "label": "High Risk", "script": "latin" }, { "name": "василий зыкова (cyrillic)", "dob": "2000-10-28", "address": "Russia", "label": "High Risk", "script": "cyrillic" }, { "name": "sean buchanan (latin)", "dob": "2001-12-28", "address": "Romania", "label": "negative", "script": "latin" }, { "name": "ратмир вишнякова (cyrillic)", "dob": "1937-05-11", "address": "Montenegro", "label": "negative", "script": "cyrillic" }, { "name": "james ramirez (latin)", "dob": "1959-08-15", "address": "South Africa", "label": "High Risk", "script": "latin" }, { "name": "Jianming Cao (latin)", "dob": "1955-9-24", "address": "China", "label": "positive", "script": "latin" }, { "name": "carol adkins (latin)", "dob": "2000-11-05", "address": "Palau", "label": "negative", "script": "latin" } ], "query_template": "Generate 12 execution vectors for {name}, ensuring phonetic similarity (10% Light, 50% Medium, 40% Far) and orthographic similarity (100% Medium). Approximately 58% of the total 12 variations should follow these rule-based transformations: \n\nAdditionally, generate variations that perform these transformations: Delete a random letter from {name} to create {deleted_letter_variation}, and Remove a random consonant from {name} to create {removed_consonant_variation}. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 12, "phonetic_similarity": { "Light": 0.1, "Medium": 0.5, "Far": 0.4 }, "orthographic_similarity": { "Medium": 1.0 }, "rule_based": { "rule_percentage": 58, "selected_rules": [ "delete_random_letter", "remove_random_consonant" ], "rule_descriptions": { "delete_random_letter": "Delete a random letter", "remove_random_consonant": "Remove a random consonant" }, "percentage": 58 } } }, { "seed_identities_with_labels": [ { "name": "Mehdi Lashgarian (latin)", "dob": "1989-6-2", "address": "Iran", "label": "positive", "script": "latin" }, { "name": "'Abdallah AL-JAMAL (latin)", "dob": "1997-2-2", "address": "Yemen", "label": "positive", "script": "latin" }, { "name": "noël klein (latin)", "dob": "1943-11-12", "address": "Somalie", "label": "negative", "script": "latin" }, { "name": "célina nguyen (latin)", "dob": "1936-01-25", "address": "Tuvalu", "label": "negative", "script": "latin" }, { "name": "лариса александров (cyrillic)", "dob": "1993-04-29", "address": "Tonga", "label": "negative", "script": "cyrillic" }, { "name": "catherine guillou (latin)", "dob": "1945-01-02", "address": "Palau", "label": "negative", "script": "latin" }, { "name": "натан елисеева (cyrillic)", "dob": "1933-01-31", "address": "Russia", "label": "High Risk", "script": "cyrillic" }, { "name": "virginie fournier (latin)", "dob": "1999-09-05", "address": "Mali", "label": "High Risk", "script": "latin" }, { "name": "Ibrahim Jathran (latin)", "dob": "1982-10-29", "address": "Libya", "label": "positive", "script": "latin" }, { "name": "laurent lemoine (latin)", "dob": "2005-11-13", "address": "Cameroon", "label": "High Risk", "script": "latin" }, { "name": "jean gimenez (latin)", "dob": "2001-10-25", "address": "Democratic Republic of Congo", "label": "High Risk", "script": "latin" }, { "name": "adélaïde dupont (latin)", "dob": "1992-03-05", "address": "Arabie saoudite", "label": "negative", "script": "latin" }, { "name": "Sveta Boyko (latin)", "dob": "1990-4-14", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "raymond joly (latin)", "dob": "1951-02-25", "address": "Côte d'Ivoire", "label": "High Risk", "script": "latin" }, { "name": "Андрей Горохов (cyrillic)", "dob": "1960-1-13", "address": "Russia", "label": "positive", "script": "cyrillic" } ], "query_template": "Generate 8 execution vectors for {name} ensuring phonetic similarity by generating names that sound-alike (100% Medium), and orthographic similarity by generating visually similar spellings (30% Light, 40% Medium, 30% Far). Approximately 35% of the total 8 variations should follow these rule-based transformations: Additionally, generate variations that replace random consonants with different consonants. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 8, "phonetic_similarity": { "Medium": 1.0 }, "orthographic_similarity": { "Light": 0.3, "Medium": 0.4, "Far": 0.3 }, "rule_based": { "rule_percentage": 35, "selected_rules": [ "replace_random_consonant_with_random_consonant" ], "rule_descriptions": { "replace_random_consonant_with_random_consonant": "Replace random consonants with different consonants" }, "percentage": 35 } } }, { "seed_identities_with_labels": [ { "name": "augustin benard (latin)", "dob": "1960-01-26", "address": "Burkina Faso", "label": "High Risk", "script": "latin" }, { "name": "céline rossi (latin)", "dob": "1997-03-15", "address": "Cameroon", "label": "High Risk", "script": "latin" }, { "name": "милован юдин (cyrillic)", "dob": "1929-03-09", "address": "Luhansk", "label": "High Risk", "script": "cyrillic" }, { "name": "باران هنری (arabic)", "dob": "1966-06-17", "address": "Cyprus", "label": "negative", "script": "arabic" }, { "name": "Elena Milskaya (latin)", "dob": "1980-12-20", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "elías bayo (latin)", "dob": "1937-04-04", "address": "Liechtenstein", "label": "negative", "script": "latin" }, { "name": "Zhiguang Ou (latin)", "dob": "1961-8-16", "address": "China", "label": "positive", "script": "latin" }, { "name": "julia garrido (latin)", "dob": "1986-10-17", "address": "Reino Unido de Gran Bretaña e Irlanda del Norte", "label": "negative", "script": "latin" }, { "name": "Павел Сорокин (cyrillic)", "dob": "1985-8-1", "address": "Russia", "label": "positive", "script": "cyrillic" }, { "name": "telmo barragán (latin)", "dob": "2006-01-18", "address": "Irán", "label": "negative", "script": "latin" }, { "name": "carmina cózar (latin)", "dob": "1943-08-18", "address": "Cuba", "label": "High Risk", "script": "latin" }, { "name": "encarnacion ochoa (latin)", "dob": "1941-07-20", "address": "Dominicana", "label": "negative", "script": "latin" }, { "name": "Aleksandr Sokolov (latin)", "dob": "1970-8-4", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "shane carter (latin)", "dob": "1981-05-16", "address": "Nigeria", "label": "High Risk", "script": "latin" }, { "name": "Ghunia ABDRABBA (latin)", "dob": "1957-9-2", "address": "United Kingdom", "label": "positive", "script": "latin" } ], "query_template": "The following name is the seed name to generate variations for: {name}. Generate 14 variations of the name {name}, ensuring phonetic similarity: 30% Light, 40% Medium, 30% Far, and orthographic similarity: 10% Light, 30% Medium, 60% Far, and also include 31% of variations that follow: Additionally, generate variations that: Replace double letters with a single letter. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 14, "phonetic_similarity": { "Light": 0.3, "Medium": 0.4, "Far": 0.3 }, "orthographic_similarity": { "Light": 0.1, "Medium": 0.3, "Far": 0.6 }, "rule_based": { "rule_percentage": 31, "selected_rules": [ "replace_double_letters_with_single_letter" ], "rule_descriptions": { "replace_double_letters_with_single_letter": "Replace double letters with a single letter" }, "percentage": 31 } } }, { "seed_identities_with_labels": [ { "name": "gabriel da costa (latin)", "dob": "1936-09-21", "address": "Rwanda", "label": "negative", "script": "latin" }, { "name": "Геннадий Орденов (cyrillic)", "dob": "1957-9-4", "address": "Russia", "label": "positive", "script": "cyrillic" }, { "name": "jacques allain (latin)", "dob": "2000-04-05", "address": "Bouvet (Îles)", "label": "negative", "script": "latin" }, { "name": "claude merle (latin)", "dob": "1925-04-30", "address": "Burkina Faso", "label": "High Risk", "script": "latin" }, { "name": "Ra'i al-Ras (latin)", "dob": "1974-9-5", "address": "Yemen", "label": "positive", "script": "latin" }, { "name": "pauline pinto (latin)", "dob": "1991-02-11", "address": "Canada", "label": "negative", "script": "latin" }, { "name": "Alex Schetinin (latin)", "dob": "1987-8-22", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "Nikolay Arefyev (latin)", "dob": "1949-3-11", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "judith cohen (latin)", "dob": "1926-09-04", "address": "South Africa", "label": "High Risk", "script": "latin" }, { "name": "Oleg Nikolayev (latin)", "dob": "1969-12-10", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "رواء الأيوبي (arabic)", "dob": "1983-02-18", "address": "Syria", "label": "High Risk", "script": "arabic" }, { "name": "клавдий третьяков (cyrillic)", "dob": "1993-08-24", "address": "Burundi", "label": "negative", "script": "cyrillic" }, { "name": "benjamin lemonnier (latin)", "dob": "1991-03-06", "address": "Israël", "label": "negative", "script": "latin" }, { "name": "mariano pellicer (latin)", "dob": "1991-06-03", "address": "Nicaragua", "label": "High Risk", "script": "latin" }, { "name": "marianne philippe (latin)", "dob": "1949-02-25", "address": "Côte d'Ivoire", "label": "High Risk", "script": "latin" } ], "query_template": "Generate 12 name variations of {name}, ensuring phonetic similarity with 10% Light ({name} variants that sound the same but have minor differences), 50% Medium ({name} variants that sound somewhat similar but not identical), and 40% Far ({name} variants that do not sound like the original); orthographic similarity with 70% Light (variants that appear slightly different in spelling) and 30% Medium (variants that appear moderately different in spelling). Approximately 18% of the total 12 variations should follow these rule-based transformations: Additionally, generate variations that replace random consonants with different consonants. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 12, "phonetic_similarity": { "Light": 0.1, "Medium": 0.5, "Far": 0.4 }, "orthographic_similarity": { "Light": 0.7, "Medium": 0.3 }, "rule_based": { "rule_percentage": 18, "selected_rules": [ "replace_random_consonant_with_random_consonant" ], "rule_descriptions": { "replace_random_consonant_with_random_consonant": "Replace random consonants with different consonants" }, "percentage": 18 } } }, { "seed_identities_with_labels": [ { "name": "Ilyas Umahanov (latin)", "dob": "1957-3-27", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "marguerite pineau (latin)", "dob": "1958-01-08", "address": "Mali", "label": "High Risk", "script": "latin" }, { "name": "Amin Shary (latin)", "dob": "1957-8-2", "address": "Lebanon", "label": "positive", "script": "latin" }, { "name": "dionisio luján (latin)", "dob": "1989-09-12", "address": "Croacia", "label": "negative", "script": "latin" }, { "name": "авксентий мартынова (cyrillic)", "dob": "1955-12-19", "address": "Russia", "label": "High Risk", "script": "cyrillic" }, { "name": "merche cabanillas (latin)", "dob": "2002-11-10", "address": "Nicaragua", "label": "High Risk", "script": "latin" }, { "name": "Elena Drapeko (latin)", "dob": "1948-10-29", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "sara ribeiro (latin)", "dob": "1934-12-02", "address": "Angola", "label": "High Risk", "script": "latin" }, { "name": "anacleto murillo (latin)", "dob": "1964-04-17", "address": "Bahamas", "label": "negative", "script": "latin" }, { "name": "Светлана Горячева (cyrillic)", "dob": "1947-6-3", "address": "Russia", "label": "positive", "script": "cyrillic" }, { "name": "'Abdallah AL-JAMAL (latin)", "dob": "1997-2-2", "address": "Yemen", "label": "positive", "script": "latin" }, { "name": "jacqueline lebreton (latin)", "dob": "1962-10-08", "address": "Central African Republic", "label": "High Risk", "script": "latin" }, { "name": "نصور عسير (arabic)", "dob": "1939-05-31", "address": "Isle of Man", "label": "negative", "script": "arabic" }, { "name": "balduino gisbert (latin)", "dob": "1941-02-08", "address": "Kiribati", "label": "negative", "script": "latin" }, { "name": "dulce cabeza (latin)", "dob": "1971-05-10", "address": "Serbia", "label": "negative", "script": "latin" } ], "query_template": "Generate 10 variations of {name} with phonetic similarity of 100% Medium. \nGenerate 10 variations of {name} with orthographic similarity of 100% Far. \nApproximately 29% of the total 20 variations should follow these rule-based transformations: \nAdditionally, generate variations that perform these transformations on the name {name}: Remove all spaces; Duplicate a random letter; and Add a title prefix (Mr., Dr., etc.). The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 10, "phonetic_similarity": { "Medium": 1.0 }, "orthographic_similarity": { "Far": 1.0 }, "rule_based": { "rule_percentage": 29, "selected_rules": [ "remove_all_spaces", "duplicate_random_letter_as_double_letter", "add_random_leading_title" ], "rule_descriptions": { "remove_all_spaces": "Remove all spaces", "duplicate_random_letter_as_double_letter": "Duplicate a random letter", "add_random_leading_title": "Add a title prefix (Mr., Dr., etc.)" }, "percentage": 29 } } }, { "seed_identities_with_labels": [ { "name": "cécile lecoq (latin)", "dob": "1960-02-21", "address": "Égypte", "label": "negative", "script": "latin" }, { "name": "Oleksiy MOZGOVY (latin)", "dob": "1975-4-3", "address": "Ukraine", "label": "positive", "script": "latin" }, { "name": "нина муравьев (cyrillic)", "dob": "1954-06-14", "address": "Turks and Caicos Islands", "label": "negative", "script": "cyrillic" }, { "name": "Elena Drapeko (latin)", "dob": "1948-10-29", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "pierre schmitt (latin)", "dob": "1951-06-08", "address": "Cameroon", "label": "High Risk", "script": "latin" }, { "name": "thomas renard (latin)", "dob": "1942-08-11", "address": "Albanie", "label": "negative", "script": "latin" }, { "name": "Роман Лябихов (cyrillic)", "dob": "1973-5-7", "address": "Russia", "label": "positive", "script": "cyrillic" }, { "name": "ирина корнилов (cyrillic)", "dob": "1936-11-27", "address": "Crimea", "label": "High Risk", "script": "cyrillic" }, { "name": "alexandre delaunay (latin)", "dob": "1940-02-19", "address": "Haiti", "label": "High Risk", "script": "latin" }, { "name": "julie albert (latin)", "dob": "1977-12-19", "address": "Democratic Republic of Congo", "label": "High Risk", "script": "latin" }, { "name": "nathan graves (latin)", "dob": "1925-08-20", "address": "Namibia", "label": "High Risk", "script": "latin" }, { "name": "Gulbuddin HEKHMARTYAR (latin)", "dob": "1949-8-1", "address": "Iran", "label": "positive", "script": "latin" }, { "name": "pauline aubert (latin)", "dob": "1988-11-21", "address": "Tuvalu", "label": "negative", "script": "latin" }, { "name": "margot pons (latin)", "dob": "1952-03-18", "address": "Bolivie", "label": "negative", "script": "latin" }, { "name": "Muhammad Dayri (latin)", "dob": "1961-6-6", "address": "Syria", "label": "positive", "script": "latin" } ], "query_template": "Generate 8 execution vectors for {name} with phonetic similarity (10% Light, 30% Medium, 60% Far) and orthographic similarity (100% Far). Approximately 44% of the total variations should follow these rule-based transformations: Use first name initial with last name; Duplicate a random letter. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 8, "phonetic_similarity": { "Light": 0.1, "Medium": 0.3, "Far": 0.6 }, "orthographic_similarity": { "Far": 1.0 }, "rule_based": { "rule_percentage": 44, "selected_rules": [ "initial_only_first_name", "duplicate_random_letter_as_double_letter" ], "rule_descriptions": { "initial_only_first_name": "Use first name initial with last name", "duplicate_random_letter_as_double_letter": "Duplicate a random letter" }, "percentage": 44 } } }, { "seed_identities_with_labels": [ { "name": "Evgeni Chernet (latin)", "dob": "1946-11-18", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "Yuri Karayev (latin)", "dob": "1966-6-21", "address": "Belarus", "label": "positive", "script": "latin" }, { "name": "emperatriz prats (latin)", "dob": "1966-08-30", "address": "Venezuela", "label": "High Risk", "script": "latin" }, { "name": "Ajmal Rahmani (latin)", "dob": "1982-1-1", "address": "United Arab Emirates", "label": "positive", "script": "latin" }, { "name": "timothy deleon (latin)", "dob": "2000-02-06", "address": "Liberia", "label": "negative", "script": "latin" }, { "name": "james hall (latin)", "dob": "1977-03-04", "address": "Brazil", "label": "negative", "script": "latin" }, { "name": "петрийка недялков (cyrillic)", "dob": "1984-12-27", "address": "Bulgaria", "label": "High Risk", "script": "cyrillic" }, { "name": "glenn houston (latin)", "dob": "1962-01-04", "address": "Azerbaijan", "label": "negative", "script": "latin" }, { "name": "nicholas collins (latin)", "dob": "1972-09-20", "address": "Kenya", "label": "High Risk", "script": "latin" }, { "name": "timothy welch (latin)", "dob": "1982-05-05", "address": "Namibia", "label": "High Risk", "script": "latin" }, { "name": "عادل برجیسیان (arabic)", "dob": "1981-3-12", "address": "Iran", "label": "positive", "script": "arabic" }, { "name": "Yevgeniya Podgornova (latin)", "dob": "1980-7-16", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "агап фомичев (cyrillic)", "dob": "1946-07-27", "address": "Netherlands Antilles", "label": "negative", "script": "cyrillic" }, { "name": "ann pennington (latin)", "dob": "2003-08-24", "address": "South Sudan", "label": "High Risk", "script": "latin" }, { "name": "brett fox (latin)", "dob": "1951-07-05", "address": "Sweden", "label": "negative", "script": "latin" } ], "query_template": "The following name is the seed name to generate variations for: {name}. Generate 7 variations of the name {name}, ensuring phonetic similarity: 10% Light, 30% Medium, 60% Far, and orthographic similarity: 10% Light, 50% Medium, 40% Far, and also include 22% of variations that follow: Additionally, generate variations that perform these transformations: Add a title suffix (Jr., PhD, etc.), Use first name initial with last name, and Remove a random vowel. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 7, "phonetic_similarity": { "Light": 0.1, "Medium": 0.3, "Far": 0.6 }, "orthographic_similarity": { "Light": 0.1, "Medium": 0.5, "Far": 0.4 }, "rule_based": { "rule_percentage": 22, "selected_rules": [ "add_random_trailing_title", "initial_only_first_name", "remove_random_vowel" ], "rule_descriptions": { "add_random_trailing_title": "Add a title suffix (Jr., PhD, etc.)", "initial_only_first_name": "Use first name initial with last name", "remove_random_vowel": "Remove a random vowel" }, "percentage": 22 } } }, { "seed_identities_with_labels": [ { "name": "Nikolay Levichev (latin)", "dob": "1953-5-28", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "рубен архипова (cyrillic)", "dob": "1927-05-20", "address": "Russia", "label": "High Risk", "script": "cyrillic" }, { "name": "Aleksei Simanovskiy (latin)", "dob": "1955-9-6", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "armando nevado (latin)", "dob": "1988-04-01", "address": "Belarús", "label": "negative", "script": "latin" }, { "name": "nicole melendez (latin)", "dob": "1955-05-06", "address": "British Virgin Islands", "label": "High Risk", "script": "latin" }, { "name": "éléonore morin (latin)", "dob": "1971-09-15", "address": "Haiti", "label": "High Risk", "script": "latin" }, { "name": "zaira múñiz (latin)", "dob": "1970-07-19", "address": "Venezuela", "label": "High Risk", "script": "latin" }, { "name": "Sergey Omelnitskii (latin)", "dob": "1980-6-2", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "شعيب مذحج (arabic)", "dob": "1959-03-23", "address": "Ethiopia", "label": "negative", "script": "arabic" }, { "name": "guillermo ponce (latin)", "dob": "1957-11-26", "address": "Kiribati", "label": "negative", "script": "latin" }, { "name": "soledad quero (latin)", "dob": "2003-10-09", "address": "Suriname", "label": "negative", "script": "latin" }, { "name": "kelsey ryan (latin)", "dob": "1966-06-10", "address": "South Sudan", "label": "High Risk", "script": "latin" }, { "name": "severiano bas (latin)", "dob": "1970-07-29", "address": "Niger", "label": "negative", "script": "latin" }, { "name": "Viktor Mozhelyansky (latin)", "dob": "1964-5-10", "address": "Ukraine", "label": "positive", "script": "latin" }, { "name": "‫محمدعلی‬ دمیرچی‌لو (arabic)", "dob": "1992-5-24", "address": "Iran", "label": "positive", "script": "arabic" } ], "query_template": "Generate 15 variations of {name} ensuring phonetic similarity (20% Light, 60% Medium, 20% Far) and orthographic similarity (10% Light, 50% Medium, 40% Far). Approximately 41% of the total 15 variations should follow these rule-based transformations: Replace spaces with special characters, and Reorder name parts. Additionally generate phonetically similar variations for {name} such as Light variation of {name}, Medium variation of {name}, and Far variation of {name}. Also generate orthographically similar variations for {name} such as Light variation of {name}, Medium variation of {name}, and Far variation of {name}. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 15, "phonetic_similarity": { "Light": 0.2, "Medium": 0.6, "Far": 0.2 }, "orthographic_similarity": { "Light": 0.1, "Medium": 0.5, "Far": 0.4 }, "rule_based": { "rule_percentage": 41, "selected_rules": [ "replace_spaces_with_random_special_characters", "name_parts_permutations" ], "rule_descriptions": { "replace_spaces_with_random_special_characters": "Replace spaces with special characters", "name_parts_permutations": "Reorder name parts" }, "percentage": 41 } } }, { "seed_identities_with_labels": [ { "name": "осип зыков (cyrillic)", "dob": "1986-05-30", "address": "Luhansk", "label": "High Risk", "script": "cyrillic" }, { "name": "stéphane benoit (latin)", "dob": "1948-11-19", "address": "Dominique", "label": "negative", "script": "latin" }, { "name": "céline jean (latin)", "dob": "1974-06-19", "address": "Wallis et Futuna (Îles)", "label": "negative", "script": "latin" }, { "name": "david becker (latin)", "dob": "1979-03-07", "address": "Zimbabwe", "label": "negative", "script": "latin" }, { "name": "gabrielle bernier (latin)", "dob": "1970-04-24", "address": "Kirghizistan", "label": "negative", "script": "latin" }, { "name": "juan antonio torres (latin)", "dob": "1937-05-31", "address": "Nicaragua", "label": "High Risk", "script": "latin" }, { "name": "Leontiy Kondrakhin (latin)", "dob": "2001-4-28", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "Houssam Katrangi (latin)", "dob": "1973-11-27", "address": "Lebanon", "label": "positive", "script": "latin" }, { "name": "станимир нестеров (cyrillic)", "dob": "1961-10-12", "address": "Liechtenstein", "label": "negative", "script": "cyrillic" }, { "name": "élisabeth berthelot (latin)", "dob": "1950-07-11", "address": "Central African Republic", "label": "High Risk", "script": "latin" }, { "name": "hugues techer (latin)", "dob": "1942-11-03", "address": "Burkina Faso", "label": "High Risk", "script": "latin" }, { "name": "Aleksey Mordashov (latin)", "dob": "1965-9-26", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "Harun Nasir-Al-Din (latin)", "dob": "1970-6-5", "address": "Turkey", "label": "positive", "script": "latin" }, { "name": "Хазалбек Атабекаў (cyrillic)", "dob": "1967-3-18", "address": "Belarus", "label": "positive", "script": "cyrillic" }, { "name": "susanne leclerc (latin)", "dob": "1925-08-26", "address": "Burkina Faso", "label": "High Risk", "script": "latin" } ], "query_template": "Generate 15 name variations for {name} ensuring phonetic similarity with 10.5 Light and 4.5 Medium variations, and orthographic similarity with 1.5 Light, 7.5 Medium, and 6 Far variations. Approximately 37% of the total 15 variations should follow these rule-based transformations: Generate a variation where {name} is duplicated (e.g., {name}{name}), and generate a variation where a random letter in {name} is duplicated.\n[VALIDATION HINTS]: Phonetic similarity: 70% Light.; Orthographic similarity: 10% Light. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 15, "phonetic_similarity": { "Light": 0.7, "Medium": 0.3 }, "orthographic_similarity": { "Light": 0.1, "Medium": 0.5, "Far": 0.4 }, "rule_based": { "rule_percentage": 37, "selected_rules": [ "duplicate_random_letter_as_double_letter" ], "rule_descriptions": { "duplicate_random_letter_as_double_letter": "Duplicate a random letter" }, "percentage": 37 } } }, { "seed_identities_with_labels": [ { "name": "Felix Okpoh (latin)", "dob": "1989-3-9", "address": "Nigeria", "label": "positive", "script": "latin" }, { "name": "Gholamreza Eyni (latin)", "dob": "1975-7-11", "address": "Iran", "label": "positive", "script": "latin" }, { "name": "jessica brooks (latin)", "dob": "1990-07-20", "address": "Falkland Islands (Malvinas)", "label": "negative", "script": "latin" }, { "name": "علي علیجانی (arabic)", "dob": "1957-06-16", "address": "Afghanistan", "label": "High Risk", "script": "arabic" }, { "name": "william fox (latin)", "dob": "1926-06-25", "address": "Eritrea", "label": "negative", "script": "latin" }, { "name": "jennifer smith (latin)", "dob": "1968-03-24", "address": "Sri Lanka", "label": "negative", "script": "latin" }, { "name": "arthur mathieu (latin)", "dob": "1957-08-11", "address": "Côte d'Ivoire", "label": "High Risk", "script": "latin" }, { "name": "Дмитрий Лоцманов (cyrillic)", "dob": "1975-3-2", "address": "Russia", "label": "positive", "script": "cyrillic" }, { "name": "april lowe (latin)", "dob": "1959-08-10", "address": "Saint Helena", "label": "negative", "script": "latin" }, { "name": "christine monroe (latin)", "dob": "1925-04-10", "address": "Nigeria", "label": "High Risk", "script": "latin" }, { "name": "christine vasseur (latin)", "dob": "1973-07-20", "address": "Haiti", "label": "High Risk", "script": "latin" }, { "name": "Rady Khabirov (latin)", "dob": "1964-3-20", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "jasmine davis (latin)", "dob": "1986-06-27", "address": "South Sudan", "label": "High Risk", "script": "latin" }, { "name": "مشرف مزرعاني (arabic)", "dob": "1999-06-10", "address": "Jersey", "label": "negative", "script": "arabic" }, { "name": "Hamid Boord (latin)", "dob": "1964-3-27", "address": "Iran", "label": "positive", "script": "latin" } ], "query_template": "The following name is the seed name to generate variations for: {name}. Generate 7 variations of the name {name}, ensuring phonetic similarity: 30% Light, 40% Medium, 30% Far, and orthographic similarity: 70% Light, 30% Medium, and also include 26% of variations that follow: Additionally, generate variations that: Replace spaces with special characters. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 7, "phonetic_similarity": { "Light": 0.3, "Medium": 0.4, "Far": 0.3 }, "orthographic_similarity": { "Light": 0.7, "Medium": 0.3 }, "rule_based": { "rule_percentage": 26, "selected_rules": [ "replace_spaces_with_random_special_characters" ], "rule_descriptions": { "replace_spaces_with_random_special_characters": "Replace spaces with special characters" }, "percentage": 26 } } }, { "seed_identities_with_labels": [ { "name": "wesley snyder (latin)", "dob": "1983-03-08", "address": "Burundi", "label": "negative", "script": "latin" }, { "name": "consuela palau (latin)", "dob": "1954-11-12", "address": "Nicaragua", "label": "High Risk", "script": "latin" }, { "name": "Heider Pasandideh (latin)", "dob": "1976-7-16", "address": "Iran", "label": "positive", "script": "latin" }, { "name": "Muhammad Dayri (latin)", "dob": "1961-6-6", "address": "Syria", "label": "positive", "script": "latin" }, { "name": "Алег Пятроў (cyrillic)", "dob": "1962-3-26", "address": "Belarus", "label": "positive", "script": "cyrillic" }, { "name": "christine west (latin)", "dob": "1956-06-08", "address": "Turkmenistan", "label": "negative", "script": "latin" }, { "name": "Amin Shary (latin)", "dob": "1957-8-2", "address": "Lebanon", "label": "positive", "script": "latin" }, { "name": "نبهان العسلي (arabic)", "dob": "1925-12-11", "address": "Sudan", "label": "High Risk", "script": "arabic" }, { "name": "بدرالدّجى آل صفوان (arabic)", "dob": "1945-01-31", "address": "Cyprus", "label": "negative", "script": "arabic" }, { "name": "kelly scott (latin)", "dob": "1934-09-07", "address": "South Sudan", "label": "High Risk", "script": "latin" }, { "name": "Ahmad Noroozi (latin)", "dob": "1987-5-5", "address": "Iran", "label": "positive", "script": "latin" }, { "name": "tina davies (latin)", "dob": "1984-09-25", "address": "South Sudan", "label": "High Risk", "script": "latin" }, { "name": "alexander skinner (latin)", "dob": "1947-05-27", "address": "American Samoa", "label": "negative", "script": "latin" }, { "name": "julia garrison (latin)", "dob": "1929-11-25", "address": "Nigeria", "label": "High Risk", "script": "latin" }, { "name": "april stephens (latin)", "dob": "1991-03-17", "address": "Saint Barthelemy", "label": "negative", "script": "latin" } ], "query_template": "Generate 7 execution vectors for {name}, ensuring phonetic similarity (10% Light, 30% Medium, 60% Far) by applying these transformations: Replace \"a\" with \"e\", Replace \"e\" with \"a\", Replace \"o\" with \"u\", and Replace \"i\" with \"y\". \nAdditionally, generate variations that perform these rule-based transformations: Approximately 21% of the total 7 variations should follow these transformations: Duplicate a random letter, Swap adjacent consonants, and Add a title prefix (Mr., Dr., etc.).\n[VALIDATION HINTS]: Orthographic similarity: 20% Light. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 7, "phonetic_similarity": { "Light": 0.1, "Medium": 0.3, "Far": 0.6 }, "orthographic_similarity": { "Light": 0.2, "Medium": 0.6, "Far": 0.2 }, "rule_based": { "rule_percentage": 21, "selected_rules": [ "duplicate_random_letter_as_double_letter", "swap_adjacent_consonants", "add_random_leading_title" ], "rule_descriptions": { "duplicate_random_letter_as_double_letter": "Duplicate a random letter", "swap_adjacent_consonants": "Swap adjacent consonants", "add_random_leading_title": "Add a title prefix (Mr., Dr., etc.)" }, "percentage": 21 } } }, { "seed_identities_with_labels": [ { "name": "jessica schroeder (latin)", "dob": "1965-09-24", "address": "Kenya", "label": "High Risk", "script": "latin" }, { "name": "Abu-'Ubaydah Al-Agha (latin)", "dob": "1964-5-2", "address": "Saudi Arabia", "label": "positive", "script": "latin" }, { "name": "tiffany allen (latin)", "dob": "2002-07-14", "address": "Croatia", "label": "negative", "script": "latin" }, { "name": "charles mills (latin)", "dob": "1988-04-17", "address": "Lao People's Democratic Republic", "label": "negative", "script": "latin" }, { "name": "ячо илиева (cyrillic)", "dob": "1932-11-18", "address": "Saint Lucia", "label": "negative", "script": "cyrillic" }, { "name": "christopher ramos (latin)", "dob": "1990-02-23", "address": "Cote d'Ivoire", "label": "negative", "script": "latin" }, { "name": "diana wright (latin)", "dob": "1982-09-12", "address": "Cayman Islands", "label": "negative", "script": "latin" }, { "name": "Ruslan Lechkhadzhiev (latin)", "dob": "1965-7-2", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "brittany hicks (latin)", "dob": "1977-12-10", "address": "South Africa", "label": "High Risk", "script": "latin" }, { "name": "ضياء بني رشيد (arabic)", "dob": "1997-06-11", "address": "Algeria", "label": "High Risk", "script": "arabic" }, { "name": "Vinai PICHAYOT (latin)", "dob": "1957-12-1", "address": "Thailand", "label": "positive", "script": "latin" }, { "name": "iara ribeiro (latin)", "dob": "1935-08-10", "address": "Mozambique", "label": "High Risk", "script": "latin" }, { "name": "anaïs bertrand (latin)", "dob": "1982-11-10", "address": "Mali", "label": "High Risk", "script": "latin" }, { "name": "Анна Тодорова (cyrillic)", "dob": "1988-2-20", "address": "Ukraine", "label": "positive", "script": "cyrillic" }, { "name": "Andrei Burov (latin)", "dob": "1971-11-30", "address": "Russia", "label": "positive", "script": "latin" } ], "query_template": "Generate 8 variations of {name} ensuring phonetic similarity (30% Light, 40% Medium, 30% Far) and orthographic similarity (10% Light, 30% Medium, 60% Far). Approximately 40% of the total 8 variations should follow these rule-based transformations: Replace spaces with special characters. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 8, "phonetic_similarity": { "Light": 0.3, "Medium": 0.4, "Far": 0.3 }, "orthographic_similarity": { "Light": 0.1, "Medium": 0.3, "Far": 0.6 }, "rule_based": { "rule_percentage": 40, "selected_rules": [ "replace_spaces_with_random_special_characters" ], "rule_descriptions": { "replace_spaces_with_random_special_characters": "Replace spaces with special characters" }, "percentage": 40 } } }, { "seed_identities_with_labels": [ { "name": "Aegli Tamani-Phella (latin)", "dob": "1978-6-21", "address": "Cyprus", "label": "positive", "script": "latin" }, { "name": "margaux legrand (latin)", "dob": "1978-03-14", "address": "Cameroon", "label": "High Risk", "script": "latin" }, { "name": "роксана бодуров (cyrillic)", "dob": "1947-11-23", "address": "Niue", "label": "negative", "script": "cyrillic" }, { "name": "javi mateu (latin)", "dob": "2000-11-09", "address": "Nicaragua", "label": "High Risk", "script": "latin" }, { "name": "rené sanchez (latin)", "dob": "2007-07-29", "address": "Nicaragua", "label": "High Risk", "script": "latin" }, { "name": "سالم المغربي (arabic)", "dob": "1948-09-07", "address": "Libya", "label": "High Risk", "script": "arabic" }, { "name": "Yuri Afonin (latin)", "dob": "1977-3-22", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "mark gonzalez (latin)", "dob": "2004-01-18", "address": "United States of America", "label": "negative", "script": "latin" }, { "name": "andrew werner (latin)", "dob": "1928-12-16", "address": "South Sudan", "label": "High Risk", "script": "latin" }, { "name": "pamela mclaughlin (latin)", "dob": "1984-01-21", "address": "Brazil", "label": "negative", "script": "latin" }, { "name": "misty branch (latin)", "dob": "2006-03-18", "address": "Mauritius", "label": "negative", "script": "latin" }, { "name": "Alam Zaib (latin)", "dob": "1974-1-1", "address": "Pakistan", "label": "positive", "script": "latin" }, { "name": "david franco (latin)", "dob": "1998-12-11", "address": "Isle of Man", "label": "negative", "script": "latin" }, { "name": "Дмитрий Пьянов (cyrillic)", "dob": "1977-12-7", "address": "Russia", "label": "positive", "script": "cyrillic" }, { "name": "Alexander Malkevich (latin)", "dob": "1975-6-14", "address": "Russia", "label": "positive", "script": "latin" } ], "query_template": "Generate 10 name variations of {name}, ensuring phonetic similarity (20% Light, 60% Medium, 20% Far) and orthographic similarity (100% Far). Approximately 34% of the total 10 variations should follow these rule-based transformations: Additionally, generate variations that perform these transformations: Replace a random vowel with \"a\" or \"e\", Insert a random letter, Remove a consecutive pair of identical letters. The remaining 66% should be randomly distributed between Light and Medium phonetic similarity levels for the other six variations. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 10, "phonetic_similarity": { "Light": 0.2, "Medium": 0.6, "Far": 0.2 }, "orthographic_similarity": { "Far": 1.0 }, "rule_based": { "rule_percentage": 34, "selected_rules": [ "remove_random_vowel", "insert_random_letter" ], "rule_descriptions": { "remove_random_vowel": "Remove a random vowel", "insert_random_letter": "Insert a random letter" }, "percentage": 34 } } }, { "seed_identities_with_labels": [ { "name": "Katerina Tikhonova (latin)", "dob": "1986-8-31", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "tammy smith (latin)", "dob": "1979-07-17", "address": "Namibia", "label": "High Risk", "script": "latin" }, { "name": "xavier rolland (latin)", "dob": "1976-02-29", "address": "Benin", "label": "negative", "script": "latin" }, { "name": "ریحانه کمالی (arabic)", "dob": "1940-05-19", "address": "Iran", "label": "High Risk", "script": "arabic" }, { "name": "Muhammad Adhiguna (latin)", "dob": "1996-7-30", "address": "Turkey", "label": "positive", "script": "latin" }, { "name": "عبداله محرابي (arabic)", "dob": "1961-12-22", "address": "Iran", "label": "positive", "script": "arabic" }, { "name": "joann adams (latin)", "dob": "1996-07-23", "address": "British Virgin Islands", "label": "High Risk", "script": "latin" }, { "name": "geneviève diallo (latin)", "dob": "1971-05-04", "address": "Haiti", "label": "High Risk", "script": "latin" }, { "name": "Esam Ettehadi (latin)", "dob": "1989-7-31", "address": "Iran", "label": "positive", "script": "latin" }, { "name": "olivier marty (latin)", "dob": "1974-06-11", "address": "Pérou", "label": "negative", "script": "latin" }, { "name": "pamela sullivan (latin)", "dob": "1999-07-16", "address": "South Sudan", "label": "High Risk", "script": "latin" }, { "name": "olivie da silva (latin)", "dob": "1989-05-23", "address": "Andorre", "label": "negative", "script": "latin" }, { "name": "michelle loiseau (latin)", "dob": "1985-05-29", "address": "Qatar", "label": "negative", "script": "latin" }, { "name": "Banyar Moe (latin)", "dob": "1947-8-14", "address": "Burma", "label": "positive", "script": "latin" }, { "name": "денница дачев (cyrillic)", "dob": "1998-11-18", "address": "French Guiana", "label": "negative", "script": "cyrillic" } ], "query_template": "Generate 10 variations of {name}, ensuring phonetic similarity (10% Light, 50% Medium, 40% Far) and orthographic similarity (10% Light, 30% Medium, 60% Far). Approximately 54% of the total 10 variations should follow these rule-based transformations: Replace random vowels with different vowels, and Convert name to initials. The remaining 46% should randomly sample from phonetic and orthographic similarity categories with no rule-based transformations applied. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 10, "phonetic_similarity": { "Light": 0.1, "Medium": 0.5, "Far": 0.4 }, "orthographic_similarity": { "Light": 0.1, "Medium": 0.3, "Far": 0.6 }, "rule_based": { "rule_percentage": 54, "selected_rules": [ "replace_random_vowel_with_random_vowel", "shorten_name_to_initials" ], "rule_descriptions": { "replace_random_vowel_with_random_vowel": "Replace random vowels with different vowels", "shorten_name_to_initials": "Convert name to initials" }, "percentage": 54 } } }, { "seed_identities_with_labels": [ { "name": "Alexander Demin (latin)", "dob": "1988-9-23", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "عدلي العلمي (arabic)", "dob": "1997-05-12", "address": "Bouvet Island (Bouvetoya)", "label": "negative", "script": "arabic" }, { "name": "Andrey Klishas (latin)", "dob": "1972-11-9", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "luisa carlos (latin)", "dob": "1965-03-07", "address": "Nicaragua", "label": "High Risk", "script": "latin" }, { "name": "michael arellano (latin)", "dob": "1999-03-27", "address": "British Virgin Islands", "label": "High Risk", "script": "latin" }, { "name": "guadalupe ariño (latin)", "dob": "1961-02-06", "address": "Malasia", "label": "negative", "script": "latin" }, { "name": "thierry briand (latin)", "dob": "1970-11-06", "address": "Central African Republic", "label": "High Risk", "script": "latin" }, { "name": "poncio correa (latin)", "dob": "1955-02-08", "address": "Lesotho", "label": "negative", "script": "latin" }, { "name": "هيكل بلغازي (arabic)", "dob": "1934-08-09", "address": "Libya", "label": "High Risk", "script": "arabic" }, { "name": "Владимир Ежиков (cyrillic)", "dob": "1987-6-20", "address": "Russia", "label": "positive", "script": "cyrillic" }, { "name": "loreto ripoll (latin)", "dob": "1950-01-24", "address": "Croacia", "label": "negative", "script": "latin" }, { "name": "Tigran Srabionov (latin)", "dob": "1986-4-17", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "William ZHOU (latin)", "dob": "1977-3-25", "address": "China", "label": "positive", "script": "latin" }, { "name": "cornelio moya (latin)", "dob": "1984-07-05", "address": "Sudán", "label": "negative", "script": "latin" }, { "name": "abril salas (latin)", "dob": "1966-10-10", "address": "Bolivia", "label": "High Risk", "script": "latin" } ], "query_template": "The following name is the seed name to generate variations for: {name}. Generate 8 variations of the name {name}, ensuring phonetic similarity: 50% Light, 50% Medium, and orthographic similarity: 10% Light, 30% Medium, 60% Far, and also include 19% of variations that follow: Additionally, generate variations that perform these transformations: Convert name to initials, Delete a random letter, and Replace spaces with special characters. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 8, "phonetic_similarity": { "Light": 0.5, "Medium": 0.5 }, "orthographic_similarity": { "Light": 0.1, "Medium": 0.3, "Far": 0.6 }, "rule_based": { "rule_percentage": 19, "selected_rules": [ "shorten_name_to_initials", "delete_random_letter", "replace_spaces_with_random_special_characters" ], "rule_descriptions": { "shorten_name_to_initials": "Convert name to initials", "delete_random_letter": "Delete a random letter", "replace_spaces_with_random_special_characters": "Replace spaces with special characters" }, "percentage": 19 } } }, { "seed_identities_with_labels": [ { "name": "ناضر القضماني (arabic)", "dob": "1970-10-30", "address": "Yemen", "label": "High Risk", "script": "arabic" }, { "name": "pierre diaz (latin)", "dob": "1942-11-14", "address": "Vierges britanniques (Îles)", "label": "negative", "script": "latin" }, { "name": "vincent dubois (latin)", "dob": "1971-12-07", "address": "Maldives (Îles)", "label": "negative", "script": "latin" }, { "name": "márcio cardoso (latin)", "dob": "1978-08-05", "address": "Mozambique", "label": "High Risk", "script": "latin" }, { "name": "حمدان كسواني (arabic)", "dob": "1933-08-10", "address": "Burundi", "label": "negative", "script": "arabic" }, { "name": "emanuel jesus (latin)", "dob": "1954-07-06", "address": "Angola", "label": "High Risk", "script": "latin" }, { "name": "alexandre jacquet (latin)", "dob": "1939-12-04", "address": "Saint-Kitts et Nevis", "label": "negative", "script": "latin" }, { "name": "astrid marin (latin)", "dob": "1970-09-29", "address": "Haiti", "label": "High Risk", "script": "latin" }, { "name": "Taher Kayali (latin)", "dob": "1960-7-11", "address": "Syria", "label": "positive", "script": "latin" }, { "name": "Touraj Zangeneh (latin)", "dob": "1958-8-5", "address": "Iran", "label": "positive", "script": "latin" }, { "name": "Галина Пятых (cyrillic)", "dob": "1970-5-12", "address": "Russia", "label": "positive", "script": "cyrillic" }, { "name": "yves lévêque (latin)", "dob": "1966-12-15", "address": "Lesotho", "label": "negative", "script": "latin" }, { "name": "Rafael Bastardo (latin)", "dob": "1978-9-22", "address": "Venezuela", "label": "positive", "script": "latin" }, { "name": "Tigran Srabionov (latin)", "dob": "1986-4-17", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "jennifer bauer (latin)", "dob": "1992-12-31", "address": "Kenya", "label": "High Risk", "script": "latin" } ], "query_template": "Generate 14 execution vectors for {name}, ensuring phonetic similarity (20% variations sound approximately like {name} when spoken, 60% variations sound similar to {name} but not exactly, 20% variations sound distant from {name}) and orthographic similarity (50% variations have visually similar spellings that differ only slightly from {name}, 50% variations have visually similar spellings that differ moderately from {name}). Approximately 42% of the total 14 execution vectors should follow these rule-based transformations: Additionally, generate variations that perform these transformations for {name}: Abbreviate name parts, Swap adjacent consonants, and Replace double letters with a single letter.\n[VALIDATION HINTS]: Phonetic similarity: 20% Light, 60% Medium.; Orthographic similarity: 50% Light, 50% Medium. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 14, "phonetic_similarity": { "Light": 0.2, "Medium": 0.6, "Far": 0.2 }, "orthographic_similarity": { "Light": 0.5, "Medium": 0.5 }, "rule_based": { "rule_percentage": 42, "selected_rules": [ "shorten_name_to_abbreviations", "swap_adjacent_consonants", "replace_double_letters_with_single_letter" ], "rule_descriptions": { "shorten_name_to_abbreviations": "Abbreviate name parts", "swap_adjacent_consonants": "Swap adjacent consonants", "replace_double_letters_with_single_letter": "Replace double letters with a single letter" }, "percentage": 42 } } }, { "seed_identities_with_labels": [ { "name": "bernardo ferreira (latin)", "dob": "1933-09-30", "address": "Indian Ocean", "label": "negative", "script": "latin" }, { "name": "Abdelbasit Khair (latin)", "dob": "1955-8-28", "address": "Sudan", "label": "positive", "script": "latin" }, { "name": "erica vaz (latin)", "dob": "1939-10-11", "address": "Mozambique", "label": "High Risk", "script": "latin" }, { "name": "людмила красильникова (cyrillic)", "dob": "1967-12-02", "address": "Russia", "label": "High Risk", "script": "cyrillic" }, { "name": "julien diaz (latin)", "dob": "1965-04-03", "address": "Central African Republic", "label": "High Risk", "script": "latin" }, { "name": "dylan mcknight (latin)", "dob": "1928-11-26", "address": "Nigeria", "label": "High Risk", "script": "latin" }, { "name": "nicole monteiro (latin)", "dob": "1990-05-21", "address": "Tailândia", "label": "negative", "script": "latin" }, { "name": "violeta lopes (latin)", "dob": "1936-02-12", "address": "Ilhas Cook", "label": "negative", "script": "latin" }, { "name": "Artyom Verkhov (latin)", "dob": "1986-8-3", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "manuel ribeiro (latin)", "dob": "1946-03-22", "address": "Suíça", "label": "negative", "script": "latin" }, { "name": "Dinar Gilmutdinov (latin)", "dob": "1969-8-10", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "Андрей Колесник (cyrillic)", "dob": "1960-2-26", "address": "Russia", "label": "positive", "script": "cyrillic" }, { "name": "sandy sanchez (latin)", "dob": "2006-04-21", "address": "South Sudan", "label": "High Risk", "script": "latin" }, { "name": "Pavel Kachkayev (latin)", "dob": "1951-10-4", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "مريم آل سلطان (arabic)", "dob": "1939-01-01", "address": "Malawi", "label": "negative", "script": "arabic" } ], "query_template": "Generate 11 variations of {name}. Ensuring phonetic similarity and orthographic similarity as follows: \nPhonetic: \n20% Light variations e.g. {pronunciation} -> (insert pronunciation guide for light variation), \n60% Medium variations e.g. {phonetic alteration}, \n20% Far variations e.g. {far phonetic alteration}. \nOrthographic: \n50% Light variations e.g. {spelling alteration}, \n50% Medium variations e.g. {medium spelling alteration}. \nApproximately 59% of the total 11 variations should follow these rule-based transformations: \nAdditionally, generate variations that: \nAbbreviate name parts (e.g. John -> J., abbreviating \"John\"). The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 11, "phonetic_similarity": { "Light": 0.2, "Medium": 0.6, "Far": 0.2 }, "orthographic_similarity": { "Light": 0.5, "Medium": 0.5 }, "rule_based": { "rule_percentage": 59, "selected_rules": [ "shorten_name_to_abbreviations" ], "rule_descriptions": { "shorten_name_to_abbreviations": "Abbreviate name parts" }, "percentage": 59 } } }, { "seed_identities_with_labels": [ { "name": "Muhammad Al-Nabi (latin)", "dob": "1952-1-1", "address": "Yemen", "label": "positive", "script": "latin" }, { "name": "Xiaohong Ma (latin)", "dob": "1971-12-15", "address": "China", "label": "positive", "script": "latin" }, { "name": "marianela torrents (latin)", "dob": "1978-05-06", "address": "Bolivia", "label": "High Risk", "script": "latin" }, { "name": "clemente solís (latin)", "dob": "1974-11-04", "address": "Federación de Rusia", "label": "negative", "script": "latin" }, { "name": "Andrey Parshin (latin)", "dob": "1974-2-19", "address": "Belarus", "label": "positive", "script": "latin" }, { "name": "josé gutiérrez (latin)", "dob": "1968-02-09", "address": "Jordania", "label": "negative", "script": "latin" }, { "name": "творимир лаврентьева (cyrillic)", "dob": "1997-08-07", "address": "Russia", "label": "High Risk", "script": "cyrillic" }, { "name": "loida rincón (latin)", "dob": "1987-01-25", "address": "Comoras", "label": "negative", "script": "latin" }, { "name": "alexandrie dupont (latin)", "dob": "1930-03-21", "address": "Mali", "label": "High Risk", "script": "latin" }, { "name": "Дмитрий Перминов (cyrillic)", "dob": "1979-4-3", "address": "Russia", "label": "positive", "script": "cyrillic" }, { "name": "salomé anjos (latin)", "dob": "1953-03-26", "address": "Mozambique", "label": "High Risk", "script": "latin" }, { "name": "Hosein Hemsi (latin)", "dob": "1982-10-27", "address": "Iran", "label": "positive", "script": "latin" }, { "name": "ligia delgado (latin)", "dob": "1961-07-25", "address": "Santo Tomé y Príncipe", "label": "negative", "script": "latin" }, { "name": "عبد الغفور آل عواض (arabic)", "dob": "2004-11-21", "address": "Liberia", "label": "negative", "script": "arabic" }, { "name": "stanley martinez (latin)", "dob": "1926-10-24", "address": "Nigeria", "label": "High Risk", "script": "latin" } ], "query_template": "Generate exactly 10 execution vectors for each target identity {name}, ensuring phonetic similarity with the following distribution:\n- 1 variation exhibits Light phonetic similarity\n- 5 variations exhibit Medium phonetic similarity\n- 4 variations exhibit Far phonetic similarity\n\nAdditionally, generate variations that ensure orthographic similarity with the following distribution:\n- 7 variations exhibit Light orthographic similarity\n- 3 variations exhibit Medium orthographic similarity\n\nApproximately 28% of the total 10 variations should follow these rule-based transformations: Additionally, generate variations that Remove all spaces.\n\nExample variations may include {name}, {name} (no spaces), {name} (phonetic variation), and so on.\n[VALIDATION HINTS]: Phonetic similarity: 10% Light.; Orthographic similarity: 70% Light. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 10, "phonetic_similarity": { "Light": 0.1, "Medium": 0.5, "Far": 0.4 }, "orthographic_similarity": { "Light": 0.7, "Medium": 0.3 }, "rule_based": { "rule_percentage": 28, "selected_rules": [ "remove_all_spaces" ], "rule_descriptions": { "remove_all_spaces": "Remove all spaces" }, "percentage": 28 } } }, { "seed_identities_with_labels": [ { "name": "micael baptista (latin)", "dob": "1964-02-29", "address": "Mozambique", "label": "High Risk", "script": "latin" }, { "name": "ricardo guerreiro (latin)", "dob": "2006-07-14", "address": "Birmânia", "label": "negative", "script": "latin" }, { "name": "raquel abreu (latin)", "dob": "1952-04-27", "address": "Turquia", "label": "negative", "script": "latin" }, { "name": "高彬 倪 (chinese)", "dob": "1985-10-27", "address": "China", "label": "positive", "script": "chinese" }, { "name": "anthony stevenson (latin)", "dob": "1959-11-29", "address": "South Africa", "label": "High Risk", "script": "latin" }, { "name": "inês gonçalves (latin)", "dob": "1969-12-31", "address": "Ilhas Turcas e Caicos", "label": "negative", "script": "latin" }, { "name": "Andrei Burov (latin)", "dob": "1971-11-30", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "علي حمیدی (arabic)", "dob": "1972-03-01", "address": "Afghanistan", "label": "High Risk", "script": "arabic" }, { "name": "Viktor Boyarkin (latin)", "dob": "1958-10-12", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "Jihad Kansou (latin)", "dob": "1966-2-10", "address": "Lebanon", "label": "positive", "script": "latin" }, { "name": "patrícia garcia (latin)", "dob": "2007-03-21", "address": "Mauritânia", "label": "negative", "script": "latin" }, { "name": "زهير آل صفوان (arabic)", "dob": "1946-03-13", "address": "Lithuania", "label": "negative", "script": "arabic" }, { "name": "jennifer jenkins (latin)", "dob": "1997-01-14", "address": "Nigeria", "label": "High Risk", "script": "latin" }, { "name": "catherine cohen (latin)", "dob": "1945-08-24", "address": "Cameroon", "label": "High Risk", "script": "latin" }, { "name": "Tun Latt (latin)", "dob": "1969-2-6", "address": "Burma", "label": "positive", "script": "latin" } ], "query_template": "Generate exactly 6 execution vectors for each target identity {name}, ensuring phonetic similarity (30% Light: {name} pronounced slightly differently, 40% Medium: {name} sounded similar but not identical, 30% Far: {name} sounded significantly different) and orthographic similarity (10% Light: slight spelling variation, 30% Medium: minor alteration of letters or characters, 60% Far: extensive modification of the original spelling). Approximately 16% of the total 6 variations should follow these rule-based transformations: Additionally, generate variations that perform these transformations: Convert {name} to initials, Insert a random letter into {name}, and Replace one or more consonants with different consonants in {name}. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 6, "phonetic_similarity": { "Light": 0.3, "Medium": 0.4, "Far": 0.3 }, "orthographic_similarity": { "Light": 0.1, "Medium": 0.3, "Far": 0.6 }, "rule_based": { "rule_percentage": 16, "selected_rules": [ "shorten_name_to_initials", "insert_random_letter", "replace_random_consonant_with_random_consonant" ], "rule_descriptions": { "shorten_name_to_initials": "Convert name to initials", "insert_random_letter": "Insert a random letter", "replace_random_consonant_with_random_consonant": "Replace random consonants with different consonants" }, "percentage": 16 } } }, { "seed_identities_with_labels": [ { "name": "Даниил Борщев (cyrillic)", "dob": "1975-12-4", "address": "Russia", "label": "positive", "script": "cyrillic" }, { "name": "lauren payne (latin)", "dob": "1925-06-19", "address": "Saint Vincent and the Grenadines", "label": "negative", "script": "latin" }, { "name": "auguste dubois (latin)", "dob": "1952-05-01", "address": "Democratic Republic of Congo", "label": "High Risk", "script": "latin" }, { "name": "tom stone (latin)", "dob": "1969-09-24", "address": "Guinea", "label": "negative", "script": "latin" }, { "name": "candela baena (latin)", "dob": "2002-09-13", "address": "Venezuela", "label": "High Risk", "script": "latin" }, { "name": "laure chrétien (latin)", "dob": "2001-11-20", "address": "Burkina Faso", "label": "High Risk", "script": "latin" }, { "name": "пенко чанлиева (cyrillic)", "dob": "2007-08-19", "address": "Eritrea", "label": "negative", "script": "cyrillic" }, { "name": "Natalya Skorokhodova (latin)", "dob": "1968-8-25", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "Ivan Demchenko (latin)", "dob": "1960-9-27", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "timothy williams (latin)", "dob": "1966-07-30", "address": "South Sudan", "label": "High Risk", "script": "latin" }, { "name": "Leonid Kalashnikov (latin)", "dob": "1960-8-6", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "kevin wilson (latin)", "dob": "2000-02-04", "address": "Saint Helena", "label": "negative", "script": "latin" }, { "name": "Vali Arlanizadeh (latin)", "dob": "1979-11-22", "address": "Iran", "label": "positive", "script": "latin" }, { "name": "joseph roberts (latin)", "dob": "1993-04-06", "address": "Bermuda", "label": "negative", "script": "latin" }, { "name": "климент орехова (cyrillic)", "dob": "1956-12-01", "address": "Armenia", "label": "High Risk", "script": "cyrillic" } ], "query_template": "Generate 13 variations of {name}, ensuring phonetic similarity (20% Light, such as \"L{name}\" or \"{name}ly\", 60% Medium, such as \"{name}son\" or \"{name}ny\", 20% Far) and orthographic similarity (10% Light, such as \".{name}\" or \"{name}.\", 30% Medium, such as \"_{name}\" or \"{name}_\", 60% Far). Approximately 12% of the total 13 variations should follow these rule-based transformations: Additionally, generate variations that Abbreviate name parts. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 13, "phonetic_similarity": { "Light": 0.2, "Medium": 0.6, "Far": 0.2 }, "orthographic_similarity": { "Light": 0.1, "Medium": 0.3, "Far": 0.6 }, "rule_based": { "rule_percentage": 12, "selected_rules": [ "shorten_name_to_abbreviations" ], "rule_descriptions": { "shorten_name_to_abbreviations": "Abbreviate name parts" }, "percentage": 12 } } }, { "seed_identities_with_labels": [ { "name": "Marko Svorcan (latin)", "dob": "1967-5-7", "address": "Serbia", "label": "positive", "script": "latin" }, { "name": "Xiaohong Ma (latin)", "dob": "1971-12-15", "address": "China", "label": "positive", "script": "latin" }, { "name": "Krym Kazanokov (latin)", "dob": "1962-7-19", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "Валерий Семёнов (cyrillic)", "dob": "1960-9-16", "address": "Russia", "label": "positive", "script": "cyrillic" }, { "name": "william joly (latin)", "dob": "1946-10-20", "address": "Équateur", "label": "negative", "script": "latin" }, { "name": "stéphanie tanguy (latin)", "dob": "2006-05-14", "address": "Myanmar", "label": "negative", "script": "latin" }, { "name": "bernadette delorme (latin)", "dob": "1991-06-29", "address": "Libye", "label": "negative", "script": "latin" }, { "name": "أروى مذحج (arabic)", "dob": "1955-03-20", "address": "Faroe Islands", "label": "negative", "script": "arabic" }, { "name": "jill anderson (latin)", "dob": "1954-10-24", "address": "Nigeria", "label": "High Risk", "script": "latin" }, { "name": "مريم هنری (arabic)", "dob": "1968-09-05", "address": "Afghanistan", "label": "High Risk", "script": "arabic" }, { "name": "auguste merle (latin)", "dob": "2005-07-27", "address": "Rép. Dém. du Congo", "label": "negative", "script": "latin" }, { "name": "henrique tavares (latin)", "dob": "1961-03-11", "address": "Angola", "label": "High Risk", "script": "latin" }, { "name": "Rezaei Mehdi (latin)", "dob": "1976-9-21", "address": "Syria", "label": "positive", "script": "latin" }, { "name": "adélaïde hoarau (latin)", "dob": "1940-10-28", "address": "Monaco", "label": "High Risk", "script": "latin" }, { "name": "valentine grégoire (latin)", "dob": "2002-02-02", "address": "Central African Republic", "label": "High Risk", "script": "latin" } ], "query_template": "Generate 14 variations of {name} ensuring phonetic similarity (70% Light, 30% Medium) and orthographic similarity (50% Light, 50% Medium). Approximately 28% of the total 14 variations should follow these rule-based transformations: Additionally, generate variations that perform these transformations: Swap adjacent consonants, Replace random consonants with different consonants, and Remove all spaces. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 14, "phonetic_similarity": { "Light": 0.7, "Medium": 0.3 }, "orthographic_similarity": { "Light": 0.5, "Medium": 0.5 }, "rule_based": { "rule_percentage": 28, "selected_rules": [ "swap_adjacent_consonants", "replace_random_consonant_with_random_consonant", "remove_all_spaces" ], "rule_descriptions": { "swap_adjacent_consonants": "Swap adjacent consonants", "replace_random_consonant_with_random_consonant": "Replace random consonants with different consonants", "remove_all_spaces": "Remove all spaces" }, "percentage": 28 } } }, { "seed_identities_with_labels": [ { "name": "adélaïde prévost (latin)", "dob": "1960-03-03", "address": "Cameroon", "label": "High Risk", "script": "latin" }, { "name": "steven powers (latin)", "dob": "1954-12-26", "address": "Chad", "label": "negative", "script": "latin" }, { "name": "Михаил Гербер (cyrillic)", "dob": "1983-1-25", "address": "Russia", "label": "positive", "script": "cyrillic" }, { "name": "Touraj Zangeneh (latin)", "dob": "1958-8-5", "address": "Iran", "label": "positive", "script": "latin" }, { "name": "william perez (latin)", "dob": "1946-11-15", "address": "Guyana", "label": "negative", "script": "latin" }, { "name": "crystal harding (latin)", "dob": "2004-09-16", "address": "Saint Pierre and Miquelon", "label": "negative", "script": "latin" }, { "name": "evan green (latin)", "dob": "2004-05-09", "address": "Falkland Islands (Malvinas)", "label": "negative", "script": "latin" }, { "name": "Sergey NEVEROV (latin)", "dob": "1961-12-21", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "валентина игнатьев (cyrillic)", "dob": "1997-07-08", "address": "Andorra", "label": "negative", "script": "cyrillic" }, { "name": "margarida nunes (latin)", "dob": "1935-07-04", "address": "Angola", "label": "High Risk", "script": "latin" }, { "name": "German Gref (latin)", "dob": "1964-2-8", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "Vakhtang Gomelauri (latin)", "dob": "1975-12-24", "address": "Georgia", "label": "positive", "script": "latin" }, { "name": "celestino río (latin)", "dob": "1940-02-05", "address": "Venezuela", "label": "High Risk", "script": "latin" }, { "name": "patricia richardson (latin)", "dob": "1930-06-26", "address": "South Africa", "label": "High Risk", "script": "latin" }, { "name": "кузьма ефремов (cyrillic)", "dob": "1931-11-16", "address": "Luhansk", "label": "High Risk", "script": "cyrillic" } ], "query_template": "The following name is the seed name to generate variations for: {name}. Generate 12 variations of the name {name}, ensuring phonetic similarity: 20% Light, 60% Medium, 20% Far, and orthographic similarity: 10% Light, 50% Medium, 40% Far, and also include 23% of variations that follow: Additionally, generate variations that perform these transformations: Remove a random consonant, and Duplicate a random letter. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 12, "phonetic_similarity": { "Light": 0.2, "Medium": 0.6, "Far": 0.2 }, "orthographic_similarity": { "Light": 0.1, "Medium": 0.5, "Far": 0.4 }, "rule_based": { "rule_percentage": 23, "selected_rules": [ "remove_random_consonant", "duplicate_random_letter_as_double_letter" ], "rule_descriptions": { "remove_random_consonant": "Remove a random consonant", "duplicate_random_letter_as_double_letter": "Duplicate a random letter" }, "percentage": 23 } } }, { "seed_identities_with_labels": [ { "name": "clementina gisbert (latin)", "dob": "1966-03-01", "address": "Haití", "label": "negative", "script": "latin" }, { "name": "Walid Al-Rawi (latin)", "dob": "1988-11-11", "address": "Iraq", "label": "positive", "script": "latin" }, { "name": "Андрей Сапелин (cyrillic)", "dob": "1965-9-16", "address": "Russia", "label": "positive", "script": "cyrillic" }, { "name": "susanne martins (latin)", "dob": "1940-10-15", "address": "Democratic Republic of Congo", "label": "High Risk", "script": "latin" }, { "name": "jon perez (latin)", "dob": "1932-12-26", "address": "Namibia", "label": "High Risk", "script": "latin" }, { "name": "Jihad Kansou (latin)", "dob": "1966-2-10", "address": "Lebanon", "label": "positive", "script": "latin" }, { "name": "daniel jacquet (latin)", "dob": "1929-03-16", "address": "Burkina Faso", "label": "High Risk", "script": "latin" }, { "name": "Serhiy Melnychuk (latin)", "dob": "1976-9-30", "address": "Ukraine", "label": "positive", "script": "latin" }, { "name": "فاروق الوعري (arabic)", "dob": "1999-03-05", "address": "Libya", "label": "High Risk", "script": "arabic" }, { "name": "marina haro (latin)", "dob": "1985-03-23", "address": "Malta", "label": "negative", "script": "latin" }, { "name": "amaya peralta (latin)", "dob": "1970-05-08", "address": "Bolivia", "label": "High Risk", "script": "latin" }, { "name": "emelina campo (latin)", "dob": "2005-12-10", "address": "Sri Lanka", "label": "negative", "script": "latin" }, { "name": "Elias AL-KASHMIRI (latin)", "dob": "1964-2-10", "address": "Pakistan", "label": "positive", "script": "latin" }, { "name": "лоранс златков (cyrillic)", "dob": "1997-09-21", "address": "Oman", "label": "negative", "script": "cyrillic" }, { "name": "iván zaragoza (latin)", "dob": "1962-06-16", "address": "Austria", "label": "negative", "script": "latin" } ], "query_template": "Generate exactly 11 execution vectors (name variations) for {name}. ensuring phonetic similarity (20% Light, 60% Medium, 20% Far) and orthographic similarity (10% Light, 30% Medium, 60% Far). Approximately 26% of the total 11 variations should follow these rule-based transformations: Additionally, generate variations that perform these transformations: Duplicate a random letter, and Replace random vowels with different vowels. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 11, "phonetic_similarity": { "Light": 0.2, "Medium": 0.6, "Far": 0.2 }, "orthographic_similarity": { "Light": 0.1, "Medium": 0.3, "Far": 0.6 }, "rule_based": { "rule_percentage": 26, "selected_rules": [ "duplicate_random_letter_as_double_letter", "replace_random_vowel_with_random_vowel" ], "rule_descriptions": { "duplicate_random_letter_as_double_letter": "Duplicate a random letter", "replace_random_vowel_with_random_vowel": "Replace random vowels with different vowels" }, "percentage": 26 } } }, { "seed_identities_with_labels": [ { "name": "lázaro luz (latin)", "dob": "2006-02-27", "address": "Granada", "label": "negative", "script": "latin" }, { "name": "Aleksei MOZHOVYY (latin)", "dob": "1975-4-3", "address": "Ukraine", "label": "positive", "script": "latin" }, { "name": "كاملة الراشد (arabic)", "dob": "1981-11-13", "address": "Guatemala", "label": "negative", "script": "arabic" }, { "name": "Wafiq Naser (latin)", "dob": "1964-7-10", "address": "Syria", "label": "positive", "script": "latin" }, { "name": "rosalinda bustamante (latin)", "dob": "1935-04-01", "address": "Azerbaiyán", "label": "negative", "script": "latin" }, { "name": "ceferino bautista (latin)", "dob": "1962-06-20", "address": "Nicaragua", "label": "High Risk", "script": "latin" }, { "name": "suzanne texier (latin)", "dob": "1984-03-04", "address": "Cameroon", "label": "High Risk", "script": "latin" }, { "name": "Sergei Savchenkov (latin)", "dob": "1954-10-1", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "edmundo puga (latin)", "dob": "1929-11-14", "address": "Camerún", "label": "negative", "script": "latin" }, { "name": "amanda villena (latin)", "dob": "1976-02-04", "address": "Venezuela", "label": "High Risk", "script": "latin" }, { "name": "كامل بنو ليث (arabic)", "dob": "1944-10-06", "address": "Libya", "label": "High Risk", "script": "arabic" }, { "name": "مجید مسلط (arabic)", "dob": "1968-10-15", "address": "Iran", "label": "positive", "script": "arabic" }, { "name": "Hossein VAZIRI (latin)", "dob": "1961-3-21", "address": "Malaysia", "label": "positive", "script": "latin" }, { "name": "coral carpio (latin)", "dob": "1933-01-26", "address": "Túnez", "label": "negative", "script": "latin" }, { "name": "josep vidal (latin)", "dob": "1937-09-21", "address": "Bolivia", "label": "High Risk", "script": "latin" } ], "query_template": "The following name is the seed name to generate variations for: {name}. Generate 11 variations of the name {name}, ensuring phonetic similarity: 70% Light, 30% Medium, and orthographic similarity: 10% Light, 30% Medium, 60% Far, and also include 19% of variations that follow: Additionally, generate variations that perform these transformations: Abbreviate name parts, Replace double letters with a single letter, and Remove all spaces. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 11, "phonetic_similarity": { "Light": 0.7, "Medium": 0.3 }, "orthographic_similarity": { "Light": 0.1, "Medium": 0.3, "Far": 0.6 }, "rule_based": { "rule_percentage": 19, "selected_rules": [ "shorten_name_to_abbreviations", "replace_double_letters_with_single_letter", "remove_all_spaces" ], "rule_descriptions": { "shorten_name_to_abbreviations": "Abbreviate name parts", "replace_double_letters_with_single_letter": "Replace double letters with a single letter", "remove_all_spaces": "Remove all spaces" }, "percentage": 19 } } }, { "seed_identities_with_labels": [ { "name": "kimberly hill (latin)", "dob": "2001-09-20", "address": "South Africa", "label": "High Risk", "script": "latin" }, { "name": "chelsea hamilton (latin)", "dob": "1932-09-19", "address": "Bahrain", "label": "negative", "script": "latin" }, { "name": "ellen oliver (latin)", "dob": "1956-09-26", "address": "British Virgin Islands", "label": "High Risk", "script": "latin" }, { "name": "Aleksey Chumakov (latin)", "dob": "1974-5-6", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "اعتكاف عنز بن وائل (arabic)", "dob": "1990-06-13", "address": "Puerto Rico", "label": "negative", "script": "arabic" }, { "name": "miguel solomon (latin)", "dob": "1938-07-09", "address": "Hungary", "label": "negative", "script": "latin" }, { "name": "Fatemeh Sadeghi (latin)", "dob": "1995-11-28", "address": "Iran", "label": "positive", "script": "latin" }, { "name": "jeanne phillips (latin)", "dob": "1983-12-24", "address": "Namibia", "label": "High Risk", "script": "latin" }, { "name": "kaylee castillo (latin)", "dob": "2004-11-07", "address": "Reunion", "label": "negative", "script": "latin" }, { "name": "kathy wilkinson (latin)", "dob": "1976-06-18", "address": "Namibia", "label": "High Risk", "script": "latin" }, { "name": "Ochur-Suge Mongush (latin)", "dob": "1993-3-28", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "allen moore (latin)", "dob": "1940-04-25", "address": "Mauritius", "label": "negative", "script": "latin" }, { "name": "Alexander Rakitin (latin)", "dob": "1958-5-17", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "عتاب المفتي (arabic)", "dob": "1977-03-11", "address": "Somalia", "label": "High Risk", "script": "arabic" }, { "name": "محمود بغلانی (arabic)", "dob": "1978-3-20", "address": "Iran", "label": "positive", "script": "arabic" } ], "query_template": "Generate exactly 15 name variations for {name}, ensuring phonetic similarity (20% Light, 60% Medium, 20% Far) and orthographic similarity (50% Light, 50% Medium). Approximately 38% of the total 15 variations should follow these rule-based transformations: Replace spaces with special characters. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 15, "phonetic_similarity": { "Light": 0.2, "Medium": 0.6, "Far": 0.2 }, "orthographic_similarity": { "Light": 0.5, "Medium": 0.5 }, "rule_based": { "rule_percentage": 38, "selected_rules": [ "replace_spaces_with_random_special_characters" ], "rule_descriptions": { "replace_spaces_with_random_special_characters": "Replace spaces with special characters" }, "percentage": 38 } } }, { "seed_identities_with_labels": [ { "name": "richard blanchet (latin)", "dob": "1951-11-06", "address": "Haiti", "label": "High Risk", "script": "latin" }, { "name": "أسرار اهرام (arabic)", "dob": "1956-09-12", "address": "Cote d'Ivoire", "label": "negative", "script": "arabic" }, { "name": "Yevgeniya Podgornova (latin)", "dob": "1980-7-16", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "lola sevillano (latin)", "dob": "1992-05-01", "address": "Nicaragua", "label": "High Risk", "script": "latin" }, { "name": "Владимир Антонов (cyrillic)", "dob": "1979-12-24", "address": "Ukraine", "label": "positive", "script": "cyrillic" }, { "name": "звездолет пенджакова (cyrillic)", "dob": "1959-03-09", "address": "Bulgaria", "label": "High Risk", "script": "cyrillic" }, { "name": "kristen cline (latin)", "dob": "1925-08-17", "address": "United States Virgin Islands", "label": "negative", "script": "latin" }, { "name": "erica davis (latin)", "dob": "2002-09-19", "address": "Guernsey", "label": "negative", "script": "latin" }, { "name": "franck besson (latin)", "dob": "2006-08-12", "address": "Burkina Faso", "label": "High Risk", "script": "latin" }, { "name": "ryan carter (latin)", "dob": "1933-04-21", "address": "Sweden", "label": "negative", "script": "latin" }, { "name": "Tatyana Dyakonova (latin)", "dob": "1970-4-22", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "patricia ortiz (latin)", "dob": "1931-02-02", "address": "British Virgin Islands", "label": "High Risk", "script": "latin" }, { "name": "Maria Faassen (latin)", "dob": "1985-4-28", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "Ruslan Geremeyev (latin)", "dob": "1978-5-10", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "brian meadows (latin)", "dob": "1928-08-14", "address": "Svalbard & Jan Mayen Islands", "label": "negative", "script": "latin" } ], "query_template": "Generate exactly 7 execution vectors for {name}, ensuring phonetic similarity (30% Light, 40% Medium, 30% Far) and orthographic similarity (10% Light, 50% Medium, 40% Far). Approximately 49% of the total 7 variations should follow these rule-based transformations: Generate variations that Reorder name parts. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 7, "phonetic_similarity": { "Light": 0.3, "Medium": 0.4, "Far": 0.3 }, "orthographic_similarity": { "Light": 0.1, "Medium": 0.5, "Far": 0.4 }, "rule_based": { "rule_percentage": 49, "selected_rules": [ "name_parts_permutations" ], "rule_descriptions": { "name_parts_permutations": "Reorder name parts" }, "percentage": 49 } } }, { "seed_identities_with_labels": [ { "name": "Галина Пятых (cyrillic)", "dob": "1970-5-12", "address": "Russia", "label": "positive", "script": "cyrillic" }, { "name": "Mikalai Verabei (latin)", "dob": "1963-5-4", "address": "Belarus", "label": "positive", "script": "latin" }, { "name": "цоцо габровлиева (cyrillic)", "dob": "1982-10-22", "address": "Burundi", "label": "negative", "script": "cyrillic" }, { "name": "thierry charrier (latin)", "dob": "1980-12-04", "address": "Monaco", "label": "High Risk", "script": "latin" }, { "name": "Tatyana Dyakonova (latin)", "dob": "1970-4-22", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "astrid traore (latin)", "dob": "1949-10-19", "address": "Micronésie (États fédérés de)", "label": "negative", "script": "latin" }, { "name": "telmo tomé (latin)", "dob": "1983-10-21", "address": "Venezuela", "label": "High Risk", "script": "latin" }, { "name": "cécile tessier (latin)", "dob": "1962-05-08", "address": "Tanzanie", "label": "negative", "script": "latin" }, { "name": "фаина белоусова (cyrillic)", "dob": "1997-02-25", "address": "Crimea", "label": "High Risk", "script": "cyrillic" }, { "name": "michelle grondin (latin)", "dob": "2000-08-04", "address": "Bahamas", "label": "negative", "script": "latin" }, { "name": "Hataiwan WORAWATVICHAI (latin)", "dob": "1956-1-9", "address": "Thailand", "label": "positive", "script": "latin" }, { "name": "Myo Oo (latin)", "dob": "1960-6-23", "address": "Burma", "label": "positive", "script": "latin" }, { "name": "michael sanchez (latin)", "dob": "1976-10-19", "address": "Nigeria", "label": "High Risk", "script": "latin" }, { "name": "lucas myers (latin)", "dob": "1978-04-27", "address": "Kenya", "label": "High Risk", "script": "latin" }, { "name": "susan boutin (latin)", "dob": "1935-07-24", "address": "Polynésie française", "label": "negative", "script": "latin" } ], "query_template": "The following name is the seed name to generate variations for: {name}. Generate 13 variations of the name {name}, ensuring phonetic similarity: 50% Light, 50% Medium, and orthographic similarity: 10% Light, 50% Medium, 40% Far, and also include 22% of variations that follow: Additionally, generate variations that perform these transformations: Duplicate a random letter, Reorder name parts, and Insert a random letter. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 13, "phonetic_similarity": { "Light": 0.5, "Medium": 0.5 }, "orthographic_similarity": { "Light": 0.1, "Medium": 0.5, "Far": 0.4 }, "rule_based": { "rule_percentage": 22, "selected_rules": [ "duplicate_random_letter_as_double_letter", "name_parts_permutations", "insert_random_letter" ], "rule_descriptions": { "duplicate_random_letter_as_double_letter": "Duplicate a random letter", "name_parts_permutations": "Reorder name parts", "insert_random_letter": "Insert a random letter" }, "percentage": 22 } } }, { "seed_identities_with_labels": [ { "name": "Ahmad Seyedoshohada (latin)", "dob": "1959-4-27", "address": "Iran", "label": "positive", "script": "latin" }, { "name": "alexandrie marion (latin)", "dob": "1992-11-17", "address": "France", "label": "negative", "script": "latin" }, { "name": "Timur Badr (latin)", "dob": "1984-10-18", "address": "United Arab Emirates", "label": "positive", "script": "latin" }, { "name": "isaac launay (latin)", "dob": "1984-12-09", "address": "Haiti", "label": "High Risk", "script": "latin" }, { "name": "аникей казаков (cyrillic)", "dob": "2003-11-24", "address": "Armenia", "label": "High Risk", "script": "cyrillic" }, { "name": "alex perrin (latin)", "dob": "1927-04-26", "address": "Haiti", "label": "High Risk", "script": "latin" }, { "name": "Pierre Obeid (latin)", "dob": "1959-12-14", "address": "Lebanon", "label": "positive", "script": "latin" }, { "name": "Михаил Берулава (cyrillic)", "dob": "1950-8-3", "address": "Russia", "label": "positive", "script": "cyrillic" }, { "name": "arnaude rodriguez (latin)", "dob": "1927-09-15", "address": "Pérou", "label": "negative", "script": "latin" }, { "name": "Abbas Abdiasjerd (latin)", "dob": "1960-9-9", "address": "Iran", "label": "positive", "script": "latin" }, { "name": "madeleine hamel (latin)", "dob": "2003-01-10", "address": "Democratic Republic of Congo", "label": "High Risk", "script": "latin" }, { "name": "élisabeth thierry (latin)", "dob": "1967-07-21", "address": "Ouganda", "label": "negative", "script": "latin" }, { "name": "бодромир ждраков (cyrillic)", "dob": "2003-02-15", "address": "Comoros", "label": "negative", "script": "cyrillic" }, { "name": "vincent dubois (latin)", "dob": "1949-01-06", "address": "Colombie", "label": "negative", "script": "latin" }, { "name": "arturo sala (latin)", "dob": "1970-04-19", "address": "Venezuela", "label": "High Risk", "script": "latin" } ], "query_template": "The following name is the seed name to generate variations for: {name}. Generate 10 variations of the name {name}, ensuring phonetic similarity: 30% Light, 40% Medium, 30% Far, and orthographic similarity: 10% Light, 30% Medium, 60% Far, and also include 55% of variations that follow: Additionally, generate variations that perform these transformations: Convert name to initials, Replace random consonants with different consonants, and Reorder name parts. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 10, "phonetic_similarity": { "Light": 0.3, "Medium": 0.4, "Far": 0.3 }, "orthographic_similarity": { "Light": 0.1, "Medium": 0.3, "Far": 0.6 }, "rule_based": { "rule_percentage": 55, "selected_rules": [ "shorten_name_to_initials", "replace_random_consonant_with_random_consonant", "name_parts_permutations" ], "rule_descriptions": { "shorten_name_to_initials": "Convert name to initials", "replace_random_consonant_with_random_consonant": "Replace random consonants with different consonants", "name_parts_permutations": "Reorder name parts" }, "percentage": 55 } } }, { "seed_identities_with_labels": [ { "name": "Vasiliy Yurchenko (latin)", "dob": "1960-9-26", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "diego valente (latin)", "dob": "1926-10-28", "address": "Angola", "label": "High Risk", "script": "latin" }, { "name": "michèle boucher (latin)", "dob": "1949-06-21", "address": "Zimbabwe", "label": "negative", "script": "latin" }, { "name": "hugues martinez (latin)", "dob": "1983-07-02", "address": "Sri Lanka", "label": "negative", "script": "latin" }, { "name": "Юрый Назаранка (cyrillic)", "dob": "1976-4-17", "address": "Belarus", "label": "positive", "script": "cyrillic" }, { "name": "Aysen Nikolayev (latin)", "dob": "1972-1-22", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "геннадий третьякова (cyrillic)", "dob": "1992-03-09", "address": "Armenia", "label": "High Risk", "script": "cyrillic" }, { "name": "Mahdy Helbawi (latin)", "dob": "1987-10-30", "address": "Colombia", "label": "positive", "script": "latin" }, { "name": "françois blot (latin)", "dob": "1947-02-06", "address": "Féroé (Îles)", "label": "negative", "script": "latin" }, { "name": "marguerite bigot (latin)", "dob": "1951-10-24", "address": "Monaco", "label": "High Risk", "script": "latin" }, { "name": "adalberto río (latin)", "dob": "1931-10-18", "address": "Bolivia", "label": "High Risk", "script": "latin" }, { "name": "charles bishop (latin)", "dob": "1947-06-12", "address": "Namibia", "label": "High Risk", "script": "latin" }, { "name": "inès deschamps (latin)", "dob": "1936-04-24", "address": "Espagne", "label": "negative", "script": "latin" }, { "name": "Recep Aydin (latin)", "dob": "1996-11-14", "address": "Turkey", "label": "positive", "script": "latin" }, { "name": "عقيل الكالوتي (arabic)", "dob": "1980-05-30", "address": "Jordan", "label": "negative", "script": "arabic" } ], "query_template": "Generate exactly 6 execution vectors for each target identity {name}, ensuring phonetic similarity (10% Light, 30% Medium, 60% Far) and orthographic similarity (20% Light, 60% Medium, 20% Far). Approximately 28% of the total 6 variations should follow these rule-based transformations: Additionally, generate variations that: Replace random consonants with different consonants. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 6, "phonetic_similarity": { "Light": 0.1, "Medium": 0.3, "Far": 0.6 }, "orthographic_similarity": { "Light": 0.2, "Medium": 0.6, "Far": 0.2 }, "rule_based": { "rule_percentage": 28, "selected_rules": [ "replace_random_consonant_with_random_consonant" ], "rule_descriptions": { "replace_random_consonant_with_random_consonant": "Replace random consonants with different consonants" }, "percentage": 28 } } }, { "seed_identities_with_labels": [ { "name": "roxana carrasco (latin)", "dob": "1967-08-30", "address": "Hungría", "label": "negative", "script": "latin" }, { "name": "sandalio manso (latin)", "dob": "1945-01-23", "address": "Bolivia", "label": "High Risk", "script": "latin" }, { "name": "anna burns (latin)", "dob": "1983-12-18", "address": "Nigeria", "label": "High Risk", "script": "latin" }, { "name": "marciano marin (latin)", "dob": "1962-10-03", "address": "Andorra", "label": "negative", "script": "latin" }, { "name": "auguste parent (latin)", "dob": "1955-07-13", "address": "Mali", "label": "High Risk", "script": "latin" }, { "name": "دهمان الحسيني (arabic)", "dob": "1996-03-21", "address": "Iraq", "label": "High Risk", "script": "arabic" }, { "name": "sabine mallet (latin)", "dob": "1938-01-11", "address": "Mali", "label": "High Risk", "script": "latin" }, { "name": "lara perez (latin)", "dob": "1994-05-20", "address": "Liechtenstein", "label": "negative", "script": "latin" }, { "name": "Yuriy Zaitsev (latin)", "dob": "1970-12-16", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "teresa méndez (latin)", "dob": "1983-09-03", "address": "Filipinas", "label": "negative", "script": "latin" }, { "name": "Gulbuddin HEKHMARTYAR (latin)", "dob": "1949-8-1", "address": "Iran", "label": "positive", "script": "latin" }, { "name": "Mikalai Verabei (latin)", "dob": "1963-5-4", "address": "Belarus", "label": "positive", "script": "latin" }, { "name": "Aleksei Budnev (latin)", "dob": "1964-7-9", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "موفّق بن لادن (arabic)", "dob": "2001-08-09", "address": "Greenland", "label": "negative", "script": "arabic" }, { "name": "Галина Токарева (cyrillic)", "dob": "1951-9-24", "address": "Russia", "label": "positive", "script": "cyrillic" } ], "query_template": "Generate 7 variations of {name} ensuring phonetic similarity (50% Light, 50% Medium) and orthographic similarity (30% Light, 40% Medium, 30% Far). Approximately 28% of the total 7 variations should follow these rule-based transformations: Reorder name parts, Replace double letters with a single letter, and Replace random consonants with different consonants. Additionally, generate an equal number of phonetic similarity variations for each category (Light and Medium) that are exact matches to the original name, but with added middle names or suffixes/prefixes. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 7, "phonetic_similarity": { "Light": 0.5, "Medium": 0.5 }, "orthographic_similarity": { "Light": 0.3, "Medium": 0.4, "Far": 0.3 }, "rule_based": { "rule_percentage": 28, "selected_rules": [ "name_parts_permutations", "replace_double_letters_with_single_letter", "replace_random_consonant_with_random_consonant" ], "rule_descriptions": { "name_parts_permutations": "Reorder name parts", "replace_double_letters_with_single_letter": "Replace double letters with a single letter", "replace_random_consonant_with_random_consonant": "Replace random consonants with different consonants" }, "percentage": 28 } } }, { "seed_identities_with_labels": [ { "name": "истилияна пъков (cyrillic)", "dob": "1989-01-18", "address": "Saint Helena", "label": "negative", "script": "cyrillic" }, { "name": "lisa torres (latin)", "dob": "1982-01-18", "address": "Italy", "label": "negative", "script": "latin" }, { "name": "david freeman (latin)", "dob": "1964-05-26", "address": "Kazakhstan", "label": "negative", "script": "latin" }, { "name": "Alireza Chegha-Marani (latin)", "dob": "1962-8-11", "address": "Iran", "label": "positive", "script": "latin" }, { "name": "Amir Khamzat (latin)", "dob": "1974-10-22", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "Майя Токарева (cyrillic)", "dob": "1975-1-18", "address": "Russia", "label": "positive", "script": "cyrillic" }, { "name": "nicole caldwell (latin)", "dob": "1963-08-21", "address": "Colombia", "label": "negative", "script": "latin" }, { "name": "shane valencia (latin)", "dob": "1940-12-13", "address": "Namibia", "label": "High Risk", "script": "latin" }, { "name": "илья михеев (cyrillic)", "dob": "1949-04-16", "address": "Crimea", "label": "High Risk", "script": "cyrillic" }, { "name": "Krym Kazanokov (latin)", "dob": "1962-7-19", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "Egor Mozhaev (latin)", "dob": "1982-5-31", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "rené tessier (latin)", "dob": "1989-08-27", "address": "Côte d'Ivoire", "label": "High Risk", "script": "latin" }, { "name": "iara sousa (latin)", "dob": "1942-08-26", "address": "Mozambique", "label": "High Risk", "script": "latin" }, { "name": "meagan williams (latin)", "dob": "1955-12-24", "address": "Morocco", "label": "negative", "script": "latin" }, { "name": "alice matias (latin)", "dob": "1979-03-05", "address": "Mozambique", "label": "High Risk", "script": "latin" } ], "query_template": "Generate exactly 10 execution vectors for {name}, ensuring phonetic similarity (50% Light: {name}n, {name}s, 50% Medium: {name}in, {name}on) and orthographic similarity (20% Light: {name}y, {name}ie, 60% Medium: {name}i, {name}o, 20% Far: {name}z, {name}x). Approximately 38% of the total variations should follow these rule-based transformations: Additionally, generate variations that perform these transformations: Add a title prefix (Mr., Dr., etc.) to become {title}{name}, Abbreviate name parts to become {shorten}{name}, and Remove all spaces to become {removespaces}{name}. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 10, "phonetic_similarity": { "Light": 0.5, "Medium": 0.5 }, "orthographic_similarity": { "Light": 0.2, "Medium": 0.6, "Far": 0.2 }, "rule_based": { "rule_percentage": 38, "selected_rules": [ "add_random_leading_title", "shorten_name_to_abbreviations", "remove_all_spaces" ], "rule_descriptions": { "add_random_leading_title": "Add a title prefix (Mr., Dr., etc.)", "shorten_name_to_abbreviations": "Abbreviate name parts", "remove_all_spaces": "Remove all spaces" }, "percentage": 38 } } }, { "seed_identities_with_labels": [ { "name": "Hataiwan WORAWATVICHAI (latin)", "dob": "1956-1-9", "address": "Thailand", "label": "positive", "script": "latin" }, { "name": "erin navarro (latin)", "dob": "2001-11-23", "address": "Romania", "label": "negative", "script": "latin" }, { "name": "guilherme gaspar (latin)", "dob": "1973-10-02", "address": "Angola", "label": "High Risk", "script": "latin" }, { "name": "فاطمه محمد پور (arabic)", "dob": "1937-05-18", "address": "Sao Tome and Principe", "label": "negative", "script": "arabic" }, { "name": "eric walters (latin)", "dob": "1932-11-08", "address": "Belize", "label": "negative", "script": "latin" }, { "name": "чоно джогов (cyrillic)", "dob": "1996-02-01", "address": "Bulgaria", "label": "High Risk", "script": "cyrillic" }, { "name": "Василий Юрченко (cyrillic)", "dob": "1960-9-26", "address": "Russia", "label": "positive", "script": "cyrillic" }, { "name": "zacharie barbier (latin)", "dob": "1953-10-13", "address": "Côte d'Ivoire", "label": "High Risk", "script": "latin" }, { "name": "david campos (latin)", "dob": "1984-11-26", "address": "Kenya", "label": "High Risk", "script": "latin" }, { "name": "melissa arnold (latin)", "dob": "1954-12-26", "address": "Northern Mariana Islands", "label": "negative", "script": "latin" }, { "name": "ryan jones (latin)", "dob": "1941-10-27", "address": "British Virgin Islands", "label": "High Risk", "script": "latin" }, { "name": "Malek Ruben (latin)", "dob": "1960-1-1", "address": "South Sudan", "label": "positive", "script": "latin" }, { "name": "Yakiv Antonov (latin)", "dob": "1972-11-7", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "megan meza (latin)", "dob": "1940-01-09", "address": "Norfolk Island", "label": "negative", "script": "latin" }, { "name": "Kseniya Shoigu (latin)", "dob": "1991-1-10", "address": "Russia", "label": "positive", "script": "latin" } ], "query_template": "Generate exactly 15 execution vectors ({name} variations) for each target identity. Ensure phonetic similarity with 20% Light variations (e.g., {name}ly, {name}ski), 60% Medium variations (e.g., {name}n, {name}z), and 20% Far variations (e.g., {name}tron, {name}x). Additionally, ensure orthographic similarity with 50% Light variations (e.g., {name}, {name}-y) and 50% Medium variations (e.g., {name}o, {name}i). Approximately 25% of the total variations should follow these rule-based transformations: Additionally, generate variations that: Remove a random vowel. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 15, "phonetic_similarity": { "Light": 0.2, "Medium": 0.6, "Far": 0.2 }, "orthographic_similarity": { "Light": 0.5, "Medium": 0.5 }, "rule_based": { "rule_percentage": 25, "selected_rules": [ "remove_random_vowel" ], "rule_descriptions": { "remove_random_vowel": "Remove a random vowel" }, "percentage": 25 } } }, { "seed_identities_with_labels": [ { "name": "аникей воронцова (cyrillic)", "dob": "1951-12-25", "address": "Belarus", "label": "High Risk", "script": "cyrillic" }, { "name": "Igor Kobzev (latin)", "dob": "1966-10-29", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "anselmo bayona (latin)", "dob": "1946-10-17", "address": "Nicaragua", "label": "High Risk", "script": "latin" }, { "name": "febe muñoz (latin)", "dob": "1972-03-02", "address": "Nicaragua", "label": "High Risk", "script": "latin" }, { "name": "bob hernandez (latin)", "dob": "2004-03-22", "address": "Liberia", "label": "negative", "script": "latin" }, { "name": "heather martinez (latin)", "dob": "1994-07-13", "address": "Kuwait", "label": "negative", "script": "latin" }, { "name": "борислав рябошапка (cyrillic)", "dob": "1933-11-05", "address": "Zambia", "label": "negative", "script": "cyrillic" }, { "name": "jennifer chan (latin)", "dob": "1948-11-14", "address": "British Virgin Islands", "label": "High Risk", "script": "latin" }, { "name": "karen herrera (latin)", "dob": "2004-04-28", "address": "Reunion", "label": "negative", "script": "latin" }, { "name": "Amer ALshawa (latin)", "dob": "1964-4-29", "address": "Turkey", "label": "positive", "script": "latin" }, { "name": "jeffery nichols (latin)", "dob": "1926-01-27", "address": "Cote d'Ivoire", "label": "negative", "script": "latin" }, { "name": "Abdul AL-MAGHREBI (latin)", "dob": "1970-7-1", "address": "Iran", "label": "positive", "script": "latin" }, { "name": "Галина Лукашенко (cyrillic)", "dob": "1955-1-1", "address": "Belarus", "label": "positive", "script": "cyrillic" }, { "name": "daniel thomas (latin)", "dob": "1954-12-02", "address": "South Africa", "label": "High Risk", "script": "latin" }, { "name": "Iurii Hotsaniuk (latin)", "dob": "1966-7-18", "address": "Ukraine", "label": "positive", "script": "latin" } ], "query_template": "The following name is the seed name to generate variations for: {name}. Generate 8 variations of the name {name}, ensuring phonetic similarity: 30% Light, 40% Medium, 30% Far, and orthographic similarity: 10% Light, 30% Medium, 60% Far, and also include 44% of variations that follow: Additionally, generate variations that perform these transformations: Remove all spaces, and Duplicate a random letter. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 8, "phonetic_similarity": { "Light": 0.3, "Medium": 0.4, "Far": 0.3 }, "orthographic_similarity": { "Light": 0.1, "Medium": 0.3, "Far": 0.6 }, "rule_based": { "rule_percentage": 44, "selected_rules": [ "remove_all_spaces", "duplicate_random_letter_as_double_letter" ], "rule_descriptions": { "remove_all_spaces": "Remove all spaces", "duplicate_random_letter_as_double_letter": "Duplicate a random letter" }, "percentage": 44 } } }, { "seed_identities_with_labels": [ { "name": "sabine guillon (latin)", "dob": "1968-12-19", "address": "Cameroon", "label": "High Risk", "script": "latin" }, { "name": "зиновій чаленко (cyrillic)", "dob": "1981-08-18", "address": "Australia", "label": "negative", "script": "cyrillic" }, { "name": "Elvis KELJMENDI (latin)", "dob": "1978-5-3", "address": "Kosovo", "label": "positive", "script": "latin" }, { "name": "jaime marques (latin)", "dob": "1950-11-18", "address": "Mónaco", "label": "negative", "script": "latin" }, { "name": "Kyo'ng-il Kim (latin)", "dob": "1979-8-1", "address": "Libya", "label": "positive", "script": "latin" }, { "name": "رامي سرندح (arabic)", "dob": "2001-08-12", "address": "Libya", "label": "High Risk", "script": "arabic" }, { "name": "Rustam Kalimullin (latin)", "dob": "1958-1-2", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "СЕРГЕЙ ЧЕРГЕЙКО (cyrillic)", "dob": "1986-8-27", "address": "Belarus", "label": "positive", "script": "cyrillic" }, { "name": "josué garay (latin)", "dob": "1974-01-23", "address": "Nicaragua", "label": "High Risk", "script": "latin" }, { "name": "emmanuel dumont (latin)", "dob": "1953-01-04", "address": "Haiti", "label": "High Risk", "script": "latin" }, { "name": "Sergei Kudryashov (latin)", "dob": "1967-7-25", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "nicole rocha (latin)", "dob": "1931-01-19", "address": "Ilhas Heard e McDonald", "label": "negative", "script": "latin" }, { "name": "victória melo (latin)", "dob": "1952-08-14", "address": "Irlanda", "label": "negative", "script": "latin" }, { "name": "raquel santos (latin)", "dob": "1960-12-22", "address": "Roménia", "label": "negative", "script": "latin" }, { "name": "fernando cunha (latin)", "dob": "1931-10-29", "address": "Mozambique", "label": "High Risk", "script": "latin" } ], "query_template": "The following name is the seed name to generate variations for: {name}. Generate 6 variations of the name {name}, ensuring phonetic similarity: 50% Light, 50% Medium, and orthographic similarity: 100% Medium, and also include 51% of variations that follow: Additionally, generate variations that perform these transformations: Replace random vowels with different vowels, and Swap adjacent syllables. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 6, "phonetic_similarity": { "Light": 0.5, "Medium": 0.5 }, "orthographic_similarity": { "Medium": 1.0 }, "rule_based": { "rule_percentage": 51, "selected_rules": [ "replace_random_vowel_with_random_vowel", "swap_adjacent_syllables" ], "rule_descriptions": { "replace_random_vowel_with_random_vowel": "Replace random vowels with different vowels", "swap_adjacent_syllables": "Swap adjacent syllables" }, "percentage": 51 } } }, { "seed_identities_with_labels": [ { "name": "michael perez (latin)", "dob": "1961-06-11", "address": "Nigeria", "label": "High Risk", "script": "latin" }, { "name": "عبد القدّوس هذيل (arabic)", "dob": "1992-04-08", "address": "Serbia", "label": "negative", "script": "arabic" }, { "name": "daniel thompson (latin)", "dob": "1979-07-17", "address": "Sao Tome and Principe", "label": "negative", "script": "latin" }, { "name": "Olexiy KOSTRUBITSKY (latin)", "dob": "1978-8-24", "address": "Ukraine", "label": "positive", "script": "latin" }, { "name": "kristen leblanc (latin)", "dob": "1936-04-22", "address": "Nigeria", "label": "High Risk", "script": "latin" }, { "name": "tyler smith (latin)", "dob": "1978-06-04", "address": "Moldova", "label": "negative", "script": "latin" }, { "name": "kimberly adams (latin)", "dob": "2001-10-20", "address": "Chile", "label": "negative", "script": "latin" }, { "name": "sandra estes (latin)", "dob": "1985-05-08", "address": "Namibia", "label": "High Risk", "script": "latin" }, { "name": "David Crosby (latin)", "dob": "1990-5-5", "address": "Zimbabwe", "label": "positive", "script": "latin" }, { "name": "Геннадий Гаркуша (cyrillic)", "dob": "1968-1-26", "address": "Ukraine", "label": "positive", "script": "cyrillic" }, { "name": "аггей логинова (cyrillic)", "dob": "1931-08-22", "address": "Belarus", "label": "High Risk", "script": "cyrillic" }, { "name": "Guowei CENG (latin)", "dob": "1963-9-1", "address": "Hong Kong", "label": "positive", "script": "latin" }, { "name": "Yuriy Karaev (latin)", "dob": "1966-6-21", "address": "Belarus", "label": "positive", "script": "latin" }, { "name": "donald wolf (latin)", "dob": "1999-02-19", "address": "Marshall Islands", "label": "negative", "script": "latin" }, { "name": "marguerite tessier (latin)", "dob": "1996-12-28", "address": "Central African Republic", "label": "High Risk", "script": "latin" } ], "query_template": "Generate a total of 6 variations of {name} ensuring phonetic similarity (10% Light, 30% Medium, 60% Far) and orthographic similarity (30% Light, 40% Medium, 30% Far). Approximately 33% of the total 6 variations should follow these rule-based transformations: Additionally, generate variations that replace random vowels with different vowels. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 6, "phonetic_similarity": { "Light": 0.1, "Medium": 0.3, "Far": 0.6 }, "orthographic_similarity": { "Light": 0.3, "Medium": 0.4, "Far": 0.3 }, "rule_based": { "rule_percentage": 33, "selected_rules": [ "replace_random_vowel_with_random_vowel" ], "rule_descriptions": { "replace_random_vowel_with_random_vowel": "Replace random vowels with different vowels" }, "percentage": 33 } } }, { "seed_identities_with_labels": [ { "name": "rolando estrada (latin)", "dob": "1944-04-18", "address": "Túnez", "label": "negative", "script": "latin" }, { "name": "Aleksei Budnev (latin)", "dob": "1964-7-9", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "paola castro (latin)", "dob": "1982-06-07", "address": "Bahrein", "label": "negative", "script": "latin" }, { "name": "وضّاح آل العسكري (arabic)", "dob": "1993-02-24", "address": "Lesotho", "label": "negative", "script": "arabic" }, { "name": "فربد اهری (arabic)", "dob": "1990-4-20", "address": "Iran", "label": "positive", "script": "arabic" }, { "name": "brigitte michaud (latin)", "dob": "1961-02-26", "address": "Burkina Faso", "label": "High Risk", "script": "latin" }, { "name": "samuel doménech (latin)", "dob": "1948-04-22", "address": "Cuba", "label": "High Risk", "script": "latin" }, { "name": "eloy sastre (latin)", "dob": "1987-11-21", "address": "Letonia", "label": "negative", "script": "latin" }, { "name": "Arkady Ponomarev (latin)", "dob": "1956-5-16", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "кир воронова (cyrillic)", "dob": "1934-09-01", "address": "Donetsk", "label": "High Risk", "script": "cyrillic" }, { "name": "Muhammad Al-Nabi (latin)", "dob": "1952-1-1", "address": "Yemen", "label": "positive", "script": "latin" }, { "name": "zacharie garnier (latin)", "dob": "1967-08-20", "address": "Cameroon", "label": "High Risk", "script": "latin" }, { "name": "patricia dawson (latin)", "dob": "1948-12-02", "address": "Kenya", "label": "High Risk", "script": "latin" }, { "name": "aurora ríos (latin)", "dob": "1949-03-14", "address": "Cabo Verde", "label": "negative", "script": "latin" }, { "name": "Aleksei Gnedovskii (latin)", "dob": "1964-12-31", "address": "Russia", "label": "positive", "script": "latin" } ], "query_template": "Generate exactly 15 execution vectors (name variations) for {name}, ensuring phonetic similarity with 50% Light and 50% Medium, and orthographic similarity with 10% Light, 30% Medium, and 60% Far. Approximately 20% of the total variations should follow these rule-based transformations: Additionally, generate variations that perform these transformations: Add a title prefix (Mr., Dr., etc.), and Swap random adjacent letters. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 15, "phonetic_similarity": { "Light": 0.5, "Medium": 0.5 }, "orthographic_similarity": { "Light": 0.1, "Medium": 0.3, "Far": 0.6 }, "rule_based": { "rule_percentage": 20, "selected_rules": [ "add_random_leading_title", "swap_random_letter" ], "rule_descriptions": { "add_random_leading_title": "Add a title prefix (Mr., Dr., etc.)", "swap_random_letter": "Swap random adjacent letters" }, "percentage": 20 } } }, { "seed_identities_with_labels": [ { "name": "عبد المحيي جزيني (arabic)", "dob": "1979-01-17", "address": "Iraq", "label": "High Risk", "script": "arabic" }, { "name": "Su WEI (latin)", "dob": "1959-12-3", "address": "Hong Kong", "label": "positive", "script": "latin" }, { "name": "evelia caparrós (latin)", "dob": "1959-08-11", "address": "Cuba", "label": "High Risk", "script": "latin" }, { "name": "luna fernandes (latin)", "dob": "1989-08-31", "address": "Antígua e Barbuda", "label": "negative", "script": "latin" }, { "name": "eleuterio briones (latin)", "dob": "1926-03-17", "address": "Bolivia", "label": "High Risk", "script": "latin" }, { "name": "césar pinto (latin)", "dob": "1965-07-15", "address": "Ilha do Natal", "label": "negative", "script": "latin" }, { "name": "Gary Lam (latin)", "dob": "1982-12-3", "address": "China", "label": "positive", "script": "latin" }, { "name": "álvaro brito (latin)", "dob": "1932-07-04", "address": "Mozambique", "label": "High Risk", "script": "latin" }, { "name": "Yuri Valyaev (latin)", "dob": "1959-4-18", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "موفّق آل قصير (arabic)", "dob": "1948-01-23", "address": "Tokelau", "label": "negative", "script": "arabic" }, { "name": "Muhammad Dayri (latin)", "dob": "1961-6-6", "address": "Syria", "label": "positive", "script": "latin" }, { "name": "Дмитрий Киселев (cyrillic)", "dob": "1954-4-26", "address": "Russia", "label": "positive", "script": "cyrillic" }, { "name": "larry smith (latin)", "dob": "1953-09-20", "address": "British Virgin Islands", "label": "High Risk", "script": "latin" }, { "name": "kevin pereira (latin)", "dob": "1953-07-24", "address": "Clipperton Island", "label": "negative", "script": "latin" }, { "name": "edgar rodrigues (latin)", "dob": "1942-07-17", "address": "Atlantic Ocean", "label": "negative", "script": "latin" } ], "query_template": "Generate exactly 6 execution vectors for each target identity {name}, ensuring phonetic similarity (50% Light, 50% Medium) and orthographic similarity (20% Light, 60% Medium, 20% Far). Approximately 18% of the total 6 variations should follow these rule-based transformations: Duplicate a random letter, Insert a random letter, and Replace spaces with special characters. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 6, "phonetic_similarity": { "Light": 0.5, "Medium": 0.5 }, "orthographic_similarity": { "Light": 0.2, "Medium": 0.6, "Far": 0.2 }, "rule_based": { "rule_percentage": 18, "selected_rules": [ "duplicate_random_letter_as_double_letter", "insert_random_letter", "replace_spaces_with_random_special_characters" ], "rule_descriptions": { "duplicate_random_letter_as_double_letter": "Duplicate a random letter", "insert_random_letter": "Insert a random letter", "replace_spaces_with_random_special_characters": "Replace spaces with special characters" }, "percentage": 18 } } }, { "seed_identities_with_labels": [ { "name": "amber harris (latin)", "dob": "2005-09-05", "address": "South Sudan", "label": "High Risk", "script": "latin" }, { "name": "sébastien rousset (latin)", "dob": "1986-10-17", "address": "Irlande", "label": "negative", "script": "latin" }, { "name": "عبد القدّوس بقشان (arabic)", "dob": "1974-01-29", "address": "Ethiopia", "label": "negative", "script": "arabic" }, { "name": "Wai-chung Lo (latin)", "dob": "1961-11-19", "address": "Hong Kong", "label": "positive", "script": "latin" }, { "name": "noémi meyer (latin)", "dob": "1960-02-17", "address": "Costa Rica", "label": "negative", "script": "latin" }, { "name": "renée thierry (latin)", "dob": "1936-12-06", "address": "Laos", "label": "negative", "script": "latin" }, { "name": "Andrei DUBEN (latin)", "dob": "1970-12-12", "address": "Chile", "label": "positive", "script": "latin" }, { "name": "mathilde pelletier (latin)", "dob": "1936-04-13", "address": "Côte d'Ivoire", "label": "High Risk", "script": "latin" }, { "name": "emmanuelle mallet (latin)", "dob": "1948-02-08", "address": "Corée, Sud", "label": "negative", "script": "latin" }, { "name": "toribio español (latin)", "dob": "2000-04-07", "address": "Venezuela", "label": "High Risk", "script": "latin" }, { "name": "Alexander Udodov (latin)", "dob": "1969-6-10", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "Сергей Тен (cyrillic)", "dob": "1976-8-25", "address": "Russia", "label": "positive", "script": "cyrillic" }, { "name": "Abdullah Faisal (latin)", "dob": "1963-9-10", "address": "Jamaica", "label": "positive", "script": "latin" }, { "name": "адам логинова (cyrillic)", "dob": "1944-07-16", "address": "Belarus", "label": "High Risk", "script": "cyrillic" }, { "name": "virginie dupuy (latin)", "dob": "1966-07-31", "address": "Mali", "label": "High Risk", "script": "latin" } ], "query_template": "Generate exactly 6 execution vectors ({name} variations) for each target identity. Ensure phonetic similarity by implementing 30% Light, 40% Medium, and 30% Far sound-alike names. Additionally, ensure orthographic similarity by implementing 30% Light, 40% Medium, and 30% Far visually similar spellings. Approximately 46% of the total 6 variations should follow these rule-based transformations: Replace random vowels with different vowels, Convert {name} to initials, and Add a title suffix (Jr., PhD, etc.). Provide the remaining 54% as random name variations without any specific similarity requirements or transformation rules. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 6, "phonetic_similarity": { "Light": 0.3, "Medium": 0.4, "Far": 0.3 }, "orthographic_similarity": { "Light": 0.3, "Medium": 0.4, "Far": 0.3 }, "rule_based": { "rule_percentage": 46, "selected_rules": [ "replace_random_vowel_with_random_vowel", "shorten_name_to_initials", "add_random_trailing_title" ], "rule_descriptions": { "replace_random_vowel_with_random_vowel": "Replace random vowels with different vowels", "shorten_name_to_initials": "Convert name to initials", "add_random_trailing_title": "Add a title suffix (Jr., PhD, etc.)" }, "percentage": 46 } } }, { "seed_identities_with_labels": [ { "name": "Aras Karim (latin)", "dob": "1967-8-6", "address": "Iraq", "label": "positive", "script": "latin" }, { "name": "isabelle teixeira (latin)", "dob": "1988-04-14", "address": "Democratic Republic of Congo", "label": "High Risk", "script": "latin" }, { "name": "Egor Mozhaev (latin)", "dob": "1982-5-31", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "Ekaterina Kharchenko (latin)", "dob": "1977-8-11", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "Muhammad YUNUS (latin)", "dob": "1979-3-3", "address": "Indonesia", "label": "positive", "script": "latin" }, { "name": "lupe estevez (latin)", "dob": "1981-04-10", "address": "Portugal", "label": "negative", "script": "latin" }, { "name": "pilar calatayud (latin)", "dob": "1944-11-16", "address": "Tonga", "label": "negative", "script": "latin" }, { "name": "صادح بكيل (arabic)", "dob": "1983-04-17", "address": "Saint Martin", "label": "negative", "script": "arabic" }, { "name": "nazaret carbonell (latin)", "dob": "1926-03-16", "address": "Honduras", "label": "negative", "script": "latin" }, { "name": "morena flor (latin)", "dob": "1999-09-26", "address": "Nicaragua", "label": "High Risk", "script": "latin" }, { "name": "ignacio sureda (latin)", "dob": "1940-03-26", "address": "Venezuela", "label": "High Risk", "script": "latin" }, { "name": "ефрем белозерова (cyrillic)", "dob": "1971-10-10", "address": "Armenia", "label": "High Risk", "script": "cyrillic" }, { "name": "志光 区 (chinese)", "dob": "1961-8-16", "address": "China", "label": "positive", "script": "chinese" }, { "name": "federico carnero (latin)", "dob": "1978-07-10", "address": "El Salvador", "label": "negative", "script": "latin" }, { "name": "juliette coste (latin)", "dob": "1997-11-28", "address": "Burkina Faso", "label": "High Risk", "script": "latin" } ], "query_template": "The following name is the seed name to generate variations for: {name}. Generate 11 variations of the name {name}, ensuring phonetic similarity: 20% Light, 60% Medium, 20% Far, and orthographic similarity: 10% Light, 30% Medium, 60% Far, and also include 38% of variations that follow: Additionally, generate variations that perform these transformations: Swap random adjacent letters, and Replace random vowels with different vowels. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 11, "phonetic_similarity": { "Light": 0.2, "Medium": 0.6, "Far": 0.2 }, "orthographic_similarity": { "Light": 0.1, "Medium": 0.3, "Far": 0.6 }, "rule_based": { "rule_percentage": 38, "selected_rules": [ "swap_random_letter", "replace_random_vowel_with_random_vowel" ], "rule_descriptions": { "swap_random_letter": "Swap random adjacent letters", "replace_random_vowel_with_random_vowel": "Replace random vowels with different vowels" }, "percentage": 38 } } }, { "seed_identities_with_labels": [ { "name": "lorraine delmas (latin)", "dob": "1975-08-20", "address": "Haiti", "label": "High Risk", "script": "latin" }, { "name": "Ella Pamfilova (latin)", "dob": "1953-9-12", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "marguerite richard (latin)", "dob": "1953-12-31", "address": "Vatican (Etat du)", "label": "negative", "script": "latin" }, { "name": "Abu Aisyah (latin)", "dob": "1983-9-6", "address": "Syria", "label": "positive", "script": "latin" }, { "name": "Aleksander Drozdenko (latin)", "dob": "1964-11-1", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "نوح زلاطيمو (arabic)", "dob": "1952-09-11", "address": "Marshall Islands", "label": "negative", "script": "arabic" }, { "name": "élisabeth benard (latin)", "dob": "1975-10-09", "address": "Vatican (Etat du)", "label": "negative", "script": "latin" }, { "name": "alphonse fournier (latin)", "dob": "1935-09-03", "address": "Haiti", "label": "High Risk", "script": "latin" }, { "name": "степан лапина (cyrillic)", "dob": "1947-08-02", "address": "Belarus", "label": "High Risk", "script": "cyrillic" }, { "name": "eliseo reina (latin)", "dob": "1973-01-12", "address": "Venezuela", "label": "High Risk", "script": "latin" }, { "name": "William ZHOU (latin)", "dob": "1977-3-25", "address": "China", "label": "positive", "script": "latin" }, { "name": "chantal andre (latin)", "dob": "1964-08-06", "address": "Central African Republic", "label": "High Risk", "script": "latin" }, { "name": "adélaïde boulay (latin)", "dob": "1950-08-18", "address": "Chypre", "label": "negative", "script": "latin" }, { "name": "antoinette grenier (latin)", "dob": "1984-09-28", "address": "Slovaquie", "label": "negative", "script": "latin" }, { "name": "Сергей Аземша (cyrillic)", "dob": "1974-7-17", "address": "Belarus", "label": "positive", "script": "cyrillic" } ], "query_template": "The following name is the seed name to generate variations for: {name}. Generate 15 variations of the name {name}, ensuring phonetic similarity: 70% Light, 30% Medium, and orthographic similarity: 20% Light, 60% Medium, 20% Far, and also include 57% of variations that follow: Additionally, generate variations that perform these transformations: Duplicate a random letter, Convert name to initials, and Replace spaces with special characters. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 15, "phonetic_similarity": { "Light": 0.7, "Medium": 0.3 }, "orthographic_similarity": { "Light": 0.2, "Medium": 0.6, "Far": 0.2 }, "rule_based": { "rule_percentage": 57, "selected_rules": [ "duplicate_random_letter_as_double_letter", "shorten_name_to_initials", "replace_spaces_with_random_special_characters" ], "rule_descriptions": { "duplicate_random_letter_as_double_letter": "Duplicate a random letter", "shorten_name_to_initials": "Convert name to initials", "replace_spaces_with_random_special_characters": "Replace spaces with special characters" }, "percentage": 57 } } }, { "seed_identities_with_labels": [ { "name": "نغم صليبا (arabic)", "dob": "1932-05-19", "address": "Guinea-Bissau", "label": "negative", "script": "arabic" }, { "name": "dylan guzman (latin)", "dob": "1951-08-04", "address": "South Sudan", "label": "High Risk", "script": "latin" }, { "name": "Su WEI (latin)", "dob": "1959-12-3", "address": "Hong Kong", "label": "positive", "script": "latin" }, { "name": "Saad AL-FAGIH (latin)", "dob": "1957-1-31", "address": "United Kingdom", "label": "positive", "script": "latin" }, { "name": "sabrina harris (latin)", "dob": "1986-11-19", "address": "Uganda", "label": "negative", "script": "latin" }, { "name": "deborah marshall (latin)", "dob": "1974-05-03", "address": "Tunisia", "label": "negative", "script": "latin" }, { "name": "timothy rowe (latin)", "dob": "2003-10-20", "address": "Brunei Darussalam", "label": "negative", "script": "latin" }, { "name": "Ibrahim Jathran (latin)", "dob": "1982-10-29", "address": "Libya", "label": "positive", "script": "latin" }, { "name": "Leonid Kalashnikov (latin)", "dob": "1960-8-6", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "margarida antunes (latin)", "dob": "1980-12-20", "address": "Mozambique", "label": "High Risk", "script": "latin" }, { "name": "nathaniel miller (latin)", "dob": "1983-06-25", "address": "South Sudan", "label": "High Risk", "script": "latin" }, { "name": "capucine lombard (latin)", "dob": "1927-05-30", "address": "Haiti", "label": "High Risk", "script": "latin" }, { "name": "courtney davis (latin)", "dob": "1929-02-05", "address": "Seychelles", "label": "negative", "script": "latin" }, { "name": "авдей шубина (cyrillic)", "dob": "1962-02-13", "address": "Russia", "label": "High Risk", "script": "cyrillic" }, { "name": "Медни Кадырова (cyrillic)", "dob": "1978-9-7", "address": "Russia", "label": "positive", "script": "cyrillic" } ], "query_template": "Generate 11 variations of {name}. ensuring phonetic similarity (10% Light, 50% Medium, 40% Far) and orthographic similarity (100% Medium). Approximately 39% of the total 11 variations should follow these rule-based transformations: Replace double letters with a single letter. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 11, "phonetic_similarity": { "Light": 0.1, "Medium": 0.5, "Far": 0.4 }, "orthographic_similarity": { "Medium": 1.0 }, "rule_based": { "rule_percentage": 39, "selected_rules": [ "replace_double_letters_with_single_letter" ], "rule_descriptions": { "replace_double_letters_with_single_letter": "Replace double letters with a single letter" }, "percentage": 39 } } }, { "seed_identities_with_labels": [ { "name": "corinne alexandre (latin)", "dob": "1992-10-04", "address": "République Dominicaine", "label": "negative", "script": "latin" }, { "name": "constance camus (latin)", "dob": "1950-06-26", "address": "Turks et Caïques (Îles)", "label": "negative", "script": "latin" }, { "name": "astrid leclerc (latin)", "dob": "1952-12-21", "address": "Democratic Republic of Congo", "label": "High Risk", "script": "latin" }, { "name": "Artem Malyshev (latin)", "dob": "1988-2-2", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "Алексей Кузьмичёв (cyrillic)", "dob": "1962-10-15", "address": "France", "label": "positive", "script": "cyrillic" }, { "name": "Waseem al-Assad (latin)", "dob": "1980-7-18", "address": "Syria", "label": "positive", "script": "latin" }, { "name": "тоска занов (cyrillic)", "dob": "1933-11-14", "address": "Argentina", "label": "negative", "script": "cyrillic" }, { "name": "مرعي غامد (arabic)", "dob": "1956-09-05", "address": "Yemen", "label": "High Risk", "script": "arabic" }, { "name": "Abu LLONGGO (latin)", "dob": "1972-5-15", "address": "Philippines", "label": "positive", "script": "latin" }, { "name": "ramón sanz (latin)", "dob": "1953-12-14", "address": "Venezuela", "label": "High Risk", "script": "latin" }, { "name": "hortense michel (latin)", "dob": "1938-07-29", "address": "Vierges britanniques (Îles)", "label": "negative", "script": "latin" }, { "name": "margaret roussel (latin)", "dob": "1936-09-14", "address": "Syrie", "label": "negative", "script": "latin" }, { "name": "Ismatullah Khalozai (latin)", "dob": "1995-1-1", "address": "Afghanistan", "label": "positive", "script": "latin" }, { "name": "camille legrand (latin)", "dob": "1978-09-22", "address": "Central African Republic", "label": "High Risk", "script": "latin" }, { "name": "nicolás seguí (latin)", "dob": "1935-04-07", "address": "Venezuela", "label": "High Risk", "script": "latin" } ], "query_template": "The following name is the seed name to generate variations for: {name}. Generate 9 variations of the name {name}, ensuring phonetic similarity: 50% Light, 50% Medium, and orthographic similarity: 20% Light, 60% Medium, 20% Far, and also include 15% of variations that follow: Additionally, generate variations that perform these transformations: Swap adjacent consonants, Remove a random consonant, and Replace random vowels with different vowels. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 9, "phonetic_similarity": { "Light": 0.5, "Medium": 0.5 }, "orthographic_similarity": { "Light": 0.2, "Medium": 0.6, "Far": 0.2 }, "rule_based": { "rule_percentage": 15, "selected_rules": [ "swap_adjacent_consonants", "remove_random_consonant", "replace_random_vowel_with_random_vowel" ], "rule_descriptions": { "swap_adjacent_consonants": "Swap adjacent consonants", "remove_random_consonant": "Remove a random consonant", "replace_random_vowel_with_random_vowel": "Replace random vowels with different vowels" }, "percentage": 15 } } }, { "seed_identities_with_labels": [ { "name": "милий тимофеев (cyrillic)", "dob": "1952-02-19", "address": "Donetsk", "label": "High Risk", "script": "cyrillic" }, { "name": "Walid Al-Rawi (latin)", "dob": "1988-11-11", "address": "Iraq", "label": "positive", "script": "latin" }, { "name": "maggie chauveau (latin)", "dob": "1933-12-01", "address": "Côte d'Ivoire", "label": "High Risk", "script": "latin" }, { "name": "Aegli Tamani-Phella (latin)", "dob": "1978-6-21", "address": "Cyprus", "label": "positive", "script": "latin" }, { "name": "alphonse bouvet (latin)", "dob": "2002-02-15", "address": "Antigua et Barbuda", "label": "negative", "script": "latin" }, { "name": "Кирилл Царёв (cyrillic)", "dob": "1978-9-25", "address": "Russia", "label": "positive", "script": "cyrillic" }, { "name": "agnès lombard (latin)", "dob": "1927-05-22", "address": "Monaco", "label": "High Risk", "script": "latin" }, { "name": "tristan joly (latin)", "dob": "1932-08-24", "address": "Suisse", "label": "negative", "script": "latin" }, { "name": "paulette leroux (latin)", "dob": "1995-03-08", "address": "Rwanda", "label": "negative", "script": "latin" }, { "name": "bernard menard (latin)", "dob": "1983-12-19", "address": "Burkina Faso", "label": "High Risk", "script": "latin" }, { "name": "Sergey YURASOV (latin)", "dob": "1964-1-4", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "antónio cruz (latin)", "dob": "1946-01-04", "address": "Angola", "label": "High Risk", "script": "latin" }, { "name": "Kseniya Shoigu (latin)", "dob": "1991-1-10", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "василь атаманюк (cyrillic)", "dob": "2001-12-31", "address": "Gambia", "label": "negative", "script": "cyrillic" }, { "name": "valentine joly (latin)", "dob": "1935-02-14", "address": "États-Unis", "label": "negative", "script": "latin" } ], "query_template": "Generate 7 variations of {name}. Ensuring phonetic similarity (50% Light, 50% Medium) and orthographic similarity (100% Light). Approximately 41% of the total 7 variations should follow these rule-based transformations: Use first name initial with last name. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 7, "phonetic_similarity": { "Light": 0.5, "Medium": 0.5 }, "orthographic_similarity": { "Light": 1.0 }, "rule_based": { "rule_percentage": 41, "selected_rules": [ "initial_only_first_name" ], "rule_descriptions": { "initial_only_first_name": "Use first name initial with last name" }, "percentage": 41 } } }, { "seed_identities_with_labels": [ { "name": "caroline bourgeois (latin)", "dob": "1924-10-11", "address": "Monaco", "label": "High Risk", "script": "latin" }, { "name": "Maria Faassen (latin)", "dob": "1985-4-28", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "Hamzah al-Khalidi (latin)", "dob": "1984-7-22", "address": "Iran", "label": "positive", "script": "latin" }, { "name": "saturnina alegria (latin)", "dob": "2007-06-28", "address": "San Vicente y las Granadinas", "label": "negative", "script": "latin" }, { "name": "زياد بكر بن وائل (arabic)", "dob": "1988-11-13", "address": "Tajikistan", "label": "negative", "script": "arabic" }, { "name": "florina farré (latin)", "dob": "1987-04-19", "address": "Cuba", "label": "High Risk", "script": "latin" }, { "name": "роза катърова (cyrillic)", "dob": "1951-04-19", "address": "Bulgaria", "label": "High Risk", "script": "cyrillic" }, { "name": "arsenio gallo (latin)", "dob": "1928-06-18", "address": "Nicaragua", "label": "High Risk", "script": "latin" }, { "name": "Aleksey Mordashov (latin)", "dob": "1965-9-26", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "pastora dueñas (latin)", "dob": "1989-12-22", "address": "San Vicente y las Granadinas", "label": "negative", "script": "latin" }, { "name": "carla baquero (latin)", "dob": "1966-10-10", "address": "República Democrática del Congo", "label": "negative", "script": "latin" }, { "name": "maría carmen porta (latin)", "dob": "1948-04-29", "address": "Madagascar", "label": "negative", "script": "latin" }, { "name": "ismael cunha (latin)", "dob": "1954-10-03", "address": "Angola", "label": "High Risk", "script": "latin" }, { "name": "عبداله محرابي (arabic)", "dob": "1961-12-22", "address": "Iran", "label": "positive", "script": "arabic" }, { "name": "Aleksandr Kobets (latin)", "dob": "1959-9-27", "address": "Ukraine", "label": "positive", "script": "latin" } ], "query_template": "Generate 15 variations of {name}, ensuring phonetic similarity (10% Light, 50% Medium, 40% Far) and orthographic similarity (20% Light, 60% Medium, 20% Far). Approximately 56% of the total 15 variations should follow these rule-based transformations: Additionally, generate variations that perform these transformations: Remove a random vowel from {name}, Use first name initial with last name as \"F{first_name}{last_name}\", and Delete a random letter from {name}. The remaining 44% should be generated using phonetic and orthographic similarity. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 15, "phonetic_similarity": { "Light": 0.1, "Medium": 0.5, "Far": 0.4 }, "orthographic_similarity": { "Light": 0.2, "Medium": 0.6, "Far": 0.2 }, "rule_based": { "rule_percentage": 56, "selected_rules": [ "remove_random_vowel", "initial_only_first_name", "delete_random_letter" ], "rule_descriptions": { "remove_random_vowel": "Remove a random vowel", "initial_only_first_name": "Use first name initial with last name", "delete_random_letter": "Delete a random letter" }, "percentage": 56 } } }, { "seed_identities_with_labels": [ { "name": "Nataliya KHORSHEVA (latin)", "dob": "1972-7-14", "address": "Ukraine", "label": "positive", "script": "latin" }, { "name": "Amjad Sazgar (latin)", "dob": "1979-4-16", "address": "Iran", "label": "positive", "script": "latin" }, { "name": "robert perrier (latin)", "dob": "1984-03-16", "address": "Nauru", "label": "negative", "script": "latin" }, { "name": "Ch'o'l-man Han (latin)", "dob": "1978-5-6", "address": "China", "label": "positive", "script": "latin" }, { "name": "brandy duke (latin)", "dob": "1932-05-02", "address": "Nigeria", "label": "High Risk", "script": "latin" }, { "name": "frédéric weber (latin)", "dob": "1942-09-19", "address": "Géorgie du Sud et Sandwich du Sud (Îles)", "label": "negative", "script": "latin" }, { "name": "Vladimir Yakushev (latin)", "dob": "1968-6-14", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "michelle garnier (latin)", "dob": "1941-01-02", "address": "Tchad", "label": "negative", "script": "latin" }, { "name": "حمدان آل علي (arabic)", "dob": "1976-12-25", "address": "Palau", "label": "negative", "script": "arabic" }, { "name": "Иван Мусатов (cyrillic)", "dob": "1976-2-14", "address": "Russia", "label": "positive", "script": "cyrillic" }, { "name": "marthe adam (latin)", "dob": "1949-08-02", "address": "Central African Republic", "label": "High Risk", "script": "latin" }, { "name": "élodie rousset (latin)", "dob": "2001-12-12", "address": "Côte d'Ivoire", "label": "High Risk", "script": "latin" }, { "name": "гурий николаева (cyrillic)", "dob": "1998-08-15", "address": "Belarus", "label": "High Risk", "script": "cyrillic" }, { "name": "michel moulin (latin)", "dob": "1947-06-09", "address": "Luxembourg", "label": "negative", "script": "latin" }, { "name": "patrick beard (latin)", "dob": "1945-06-28", "address": "Nigeria", "label": "High Risk", "script": "latin" } ], "query_template": "Generate exactly 14 execution vectors for each target identity {name}, ensuring phonetic similarity (10% Light: {name} with minor phonetic variation, 30% Medium: {name} with moderate phonetic variation, 60% Far: {name} with significant phonetic variation) and orthographic similarity (50% Light: {name} with minor orthographic variation, 50% Medium: {name} with moderate orthographic variation). Approximately 33% of the total variations should follow these rule-based transformations: Replace random consonants with different consonants in {name}, Replace spaces with special characters in {name}. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 14, "phonetic_similarity": { "Light": 0.1, "Medium": 0.3, "Far": 0.6 }, "orthographic_similarity": { "Light": 0.5, "Medium": 0.5 }, "rule_based": { "rule_percentage": 33, "selected_rules": [ "replace_random_consonant_with_random_consonant", "replace_spaces_with_random_special_characters" ], "rule_descriptions": { "replace_random_consonant_with_random_consonant": "Replace random consonants with different consonants", "replace_spaces_with_random_special_characters": "Replace spaces with special characters" }, "percentage": 33 } } }, { "seed_identities_with_labels": [ { "name": "مُناضل التوتنجي (arabic)", "dob": "1985-08-02", "address": "Libya", "label": "High Risk", "script": "arabic" }, { "name": "frédérique bernard (latin)", "dob": "1950-03-07", "address": "Îles Mineures Éloignées des États-Unis", "label": "negative", "script": "latin" }, { "name": "paulette guérin (latin)", "dob": "1928-01-03", "address": "Libéria", "label": "negative", "script": "latin" }, { "name": "charlotte durand (latin)", "dob": "1956-07-27", "address": "Central African Republic", "label": "High Risk", "script": "latin" }, { "name": "brigitte vaillant (latin)", "dob": "1969-03-04", "address": "Guadeloupe", "label": "negative", "script": "latin" }, { "name": "madeleine pascal (latin)", "dob": "1942-01-23", "address": "Central African Republic", "label": "High Risk", "script": "latin" }, { "name": "marine cousin (latin)", "dob": "1989-12-12", "address": "Cameroon", "label": "High Risk", "script": "latin" }, { "name": "أفراح كسواني (arabic)", "dob": "2000-07-25", "address": "France", "label": "negative", "script": "arabic" }, { "name": "Marko Svorcan (latin)", "dob": "1967-5-7", "address": "Serbia", "label": "positive", "script": "latin" }, { "name": "Aleksander Drozdenko (latin)", "dob": "1964-11-1", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "Aleksandr Nikolov (latin)", "dob": "1962-2-19", "address": "Bulgaria", "label": "positive", "script": "latin" }, { "name": "océane colin (latin)", "dob": "1994-08-10", "address": "Burundi", "label": "negative", "script": "latin" }, { "name": "marthe leclercq (latin)", "dob": "1983-09-06", "address": "Cameroon", "label": "High Risk", "script": "latin" }, { "name": "Pye Tayza (latin)", "dob": "1987-1-29", "address": "Burma", "label": "positive", "script": "latin" }, { "name": "Николай Левичев (cyrillic)", "dob": "1953-5-28", "address": "Russia", "label": "positive", "script": "cyrillic" } ], "query_template": "Generate 6 name variations for {name} ensuring phonetic similarity (20% Light, e.g. Mc{name}, 60% Medium, e.g. Mac{name}, 20% Far, e.g. Mark{s}name) and orthographic similarity (100% Medium). Approximately 43% of the total 6 variations should follow these rule-based transformations: Additionally, generate variations that perform these transformations: Replace spaces with special characters, Insert a random letter, and Replace random consonants with different consonants.. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 6, "phonetic_similarity": { "Light": 0.2, "Medium": 0.6, "Far": 0.2 }, "orthographic_similarity": { "Medium": 1.0 }, "rule_based": { "rule_percentage": 43, "selected_rules": [ "replace_spaces_with_random_special_characters", "insert_random_letter", "replace_random_consonant_with_random_consonant" ], "rule_descriptions": { "replace_spaces_with_random_special_characters": "Replace spaces with special characters", "insert_random_letter": "Insert a random letter", "replace_random_consonant_with_random_consonant": "Replace random consonants with different consonants" }, "percentage": 43 } } }, { "seed_identities_with_labels": [ { "name": "nazaret bello (latin)", "dob": "1929-10-18", "address": "Camerún", "label": "negative", "script": "latin" }, { "name": "elías tudela (latin)", "dob": "1926-01-01", "address": "Croacia", "label": "negative", "script": "latin" }, { "name": "Vakhtang Gomelauri (latin)", "dob": "1975-12-24", "address": "Georgia", "label": "positive", "script": "latin" }, { "name": "marc bonnin (latin)", "dob": "2000-07-18", "address": "Mali", "label": "High Risk", "script": "latin" }, { "name": "palmira puig (latin)", "dob": "1927-12-12", "address": "Madagascar", "label": "negative", "script": "latin" }, { "name": "ظاعن بنو أسد (arabic)", "dob": "2007-04-15", "address": "Iraq", "label": "High Risk", "script": "arabic" }, { "name": "федосий елисеев (cyrillic)", "dob": "1987-02-16", "address": "Barbados", "label": "negative", "script": "cyrillic" }, { "name": "margot laporte (latin)", "dob": "1958-02-26", "address": "Mali", "label": "High Risk", "script": "latin" }, { "name": "lázaro sebastián (latin)", "dob": "1976-08-11", "address": "Nicaragua", "label": "High Risk", "script": "latin" }, { "name": "Hussam Al-Qatirji (latin)", "dob": "1982-1-11", "address": "Syria", "label": "positive", "script": "latin" }, { "name": "Abu-'Ubaydah Al-Agha (latin)", "dob": "1964-5-2", "address": "Saudi Arabia", "label": "positive", "script": "latin" }, { "name": "Sergey Topor-Gilka (latin)", "dob": "1970-2-17", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "Михаило Развожаєв (cyrillic)", "dob": "1980-12-30", "address": "Ukraine", "label": "positive", "script": "cyrillic" }, { "name": "kimberly jones (latin)", "dob": "2004-06-24", "address": "British Virgin Islands", "label": "High Risk", "script": "latin" }, { "name": "anunciación espada (latin)", "dob": "1925-04-15", "address": "Tonga", "label": "negative", "script": "latin" } ], "query_template": "Generate 7 variations of {name}. Ensuring phonetic similarity (10% Light, 50% Medium, 40% Far) and orthographic similarity (10% Light, 50% Medium, 40% Far). Approximately 60% of the total 7 variations should follow these rule-based transformations: Additionally, generate variations that: Delete a random letter. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 7, "phonetic_similarity": { "Light": 0.1, "Medium": 0.5, "Far": 0.4 }, "orthographic_similarity": { "Light": 0.1, "Medium": 0.5, "Far": 0.4 }, "rule_based": { "rule_percentage": 60, "selected_rules": [ "delete_random_letter" ], "rule_descriptions": { "delete_random_letter": "Delete a random letter" }, "percentage": 60 } } }, { "seed_identities_with_labels": [ { "name": "anthony johnson (latin)", "dob": "1967-06-24", "address": "Antarctica (the territory South of 60 deg S)", "label": "negative", "script": "latin" }, { "name": "deborah braun (latin)", "dob": "1979-09-18", "address": "Jordan", "label": "negative", "script": "latin" }, { "name": "joseph gill (latin)", "dob": "1946-09-05", "address": "Svalbard & Jan Mayen Islands", "label": "negative", "script": "latin" }, { "name": "rafael henriques (latin)", "dob": "1984-12-04", "address": "Mozambique", "label": "High Risk", "script": "latin" }, { "name": "Павел Андреев (cyrillic)", "dob": "1980-2-6", "address": "Russia", "label": "positive", "script": "cyrillic" }, { "name": "anouk reynaud (latin)", "dob": "2003-07-03", "address": "Democratic Republic of Congo", "label": "High Risk", "script": "latin" }, { "name": "Aleksei Simanovskiy (latin)", "dob": "1955-9-6", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "مرسال بنو ليث (arabic)", "dob": "1999-02-14", "address": "Syria", "label": "High Risk", "script": "arabic" }, { "name": "steven santana (latin)", "dob": "1964-04-16", "address": "Saint Helena", "label": "negative", "script": "latin" }, { "name": "lucas rodrigues (latin)", "dob": "2000-03-27", "address": "Mozambique", "label": "High Risk", "script": "latin" }, { "name": "Oleksandr Basov (latin)", "dob": "1971-10-16", "address": "Ukraine", "label": "positive", "script": "latin" }, { "name": "savannah king (latin)", "dob": "1983-10-20", "address": "Nigeria", "label": "High Risk", "script": "latin" }, { "name": "полина бобров (cyrillic)", "dob": "1956-09-27", "address": "Belgium", "label": "negative", "script": "cyrillic" }, { "name": "Oman Abdulrohman (latin)", "dob": "1972-1-5", "address": "Indonesia", "label": "positive", "script": "latin" }, { "name": "Aegli Tamani-Phella (latin)", "dob": "1978-6-21", "address": "Cyprus", "label": "positive", "script": "latin" } ], "query_template": "Generate exactly 6 execution vectors for each target identity \"{name}\". ensuring phonetic similarity (10% Light, 30% Medium, 60% Far) and orthographic similarity (100% Medium). Approximately 33% of the total 6 variations should follow these rule-based transformations: Additionally, generate variations that perform these transformations: Convert {name} to initials, Insert a random letter into \"{name}\", and Swap adjacent consonants in \"{name}\". The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 6, "phonetic_similarity": { "Light": 0.1, "Medium": 0.3, "Far": 0.6 }, "orthographic_similarity": { "Medium": 1.0 }, "rule_based": { "rule_percentage": 33, "selected_rules": [ "shorten_name_to_initials", "insert_random_letter", "swap_adjacent_consonants" ], "rule_descriptions": { "shorten_name_to_initials": "Convert name to initials", "insert_random_letter": "Insert a random letter", "swap_adjacent_consonants": "Swap adjacent consonants" }, "percentage": 33 } } }, { "seed_identities_with_labels": [ { "name": "zacharie gérard (latin)", "dob": "1994-12-25", "address": "Monaco", "label": "High Risk", "script": "latin" }, { "name": "Dmitry Perminov (latin)", "dob": "1979-4-3", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "georges perrot (latin)", "dob": "1967-07-31", "address": "Liechtenstein", "label": "negative", "script": "latin" }, { "name": "laure lucas (latin)", "dob": "1940-05-23", "address": "Cameroon", "label": "High Risk", "script": "latin" }, { "name": "olivier techer (latin)", "dob": "1926-03-17", "address": "Niue", "label": "negative", "script": "latin" }, { "name": "bryan lima (latin)", "dob": "1967-05-25", "address": "Angola", "label": "High Risk", "script": "latin" }, { "name": "laurence rousseau (latin)", "dob": "1963-04-22", "address": "Togo", "label": "negative", "script": "latin" }, { "name": "герман куликов (cyrillic)", "dob": "2005-06-24", "address": "Armenia", "label": "High Risk", "script": "cyrillic" }, { "name": "françoise gomes (latin)", "dob": "1984-02-09", "address": "Turkménistan", "label": "negative", "script": "latin" }, { "name": "émilie guillaume (latin)", "dob": "1953-05-19", "address": "Cameroon", "label": "High Risk", "script": "latin" }, { "name": "фили братухчев (cyrillic)", "dob": "1989-05-10", "address": "Eritrea", "label": "negative", "script": "cyrillic" }, { "name": "Vakhtang Gomelauri (latin)", "dob": "1975-12-24", "address": "Georgia", "label": "positive", "script": "latin" }, { "name": "Manal al-akhraz (latin)", "dob": "1970-2-2", "address": "Syria", "label": "positive", "script": "latin" }, { "name": "Виктор Кидяев (cyrillic)", "dob": "1956-7-9", "address": "Russia", "label": "positive", "script": "cyrillic" }, { "name": "Aleksey Russkikh (latin)", "dob": "1968-7-17", "address": "Russia", "label": "positive", "script": "latin" } ], "query_template": "Generate 6 variations of {name}, ensuring phonetic similarity (20% Light, 60% Medium, 20% Far) and orthographic similarity (10% Light, 50% Medium, 40% Far). Approximately 15% of the total 6 variations should follow these rule-based transformations: Remove a random vowel from {name} to create \"{removed_vowel_{name}}\", abbreviate name parts in {name} to create \"{abbreviated_{name}}\" (e.g. \"John Smith\" becomes \"J.S.\" or \"JS\"), and swap random adjacent letters in {name} to create \"{swapped_{name}}\" (e.g. \"John Smith\" becomes \"Hjonn Smiht\"). The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 6, "phonetic_similarity": { "Light": 0.2, "Medium": 0.6, "Far": 0.2 }, "orthographic_similarity": { "Light": 0.1, "Medium": 0.5, "Far": 0.4 }, "rule_based": { "rule_percentage": 15, "selected_rules": [ "remove_random_vowel", "shorten_name_to_abbreviations", "swap_random_letter" ], "rule_descriptions": { "remove_random_vowel": "Remove a random vowel", "shorten_name_to_abbreviations": "Abbreviate name parts", "swap_random_letter": "Swap random adjacent letters" }, "percentage": 15 } } }, { "seed_identities_with_labels": [ { "name": "Irina Bubnova (latin)", "dob": "1983-4-1", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "یسنا کمالی (arabic)", "dob": "1996-03-24", "address": "Afghanistan", "label": "High Risk", "script": "arabic" }, { "name": "juliette monnier (latin)", "dob": "1928-12-30", "address": "Central African Republic", "label": "High Risk", "script": "latin" }, { "name": "Gholamali Mohammadi (latin)", "dob": "1963-6-22", "address": "Iran", "label": "positive", "script": "latin" }, { "name": "louis guillot (latin)", "dob": "1987-07-15", "address": "Tokelau", "label": "negative", "script": "latin" }, { "name": "teófilo llamas (latin)", "dob": "1987-11-17", "address": "Nicaragua", "label": "High Risk", "script": "latin" }, { "name": "Валентина Лаврик (cyrillic)", "dob": "1969-2-25", "address": "Ukraine", "label": "positive", "script": "cyrillic" }, { "name": "daniel hansen (latin)", "dob": "1970-11-17", "address": "Nigeria", "label": "High Risk", "script": "latin" }, { "name": "marine sanchez (latin)", "dob": "2006-11-03", "address": "Israël", "label": "negative", "script": "latin" }, { "name": "véronique bonneau (latin)", "dob": "1959-01-13", "address": "Guinée-Bissau", "label": "negative", "script": "latin" }, { "name": "cécile berger (latin)", "dob": "2005-04-07", "address": "Democratic Republic of Congo", "label": "High Risk", "script": "latin" }, { "name": "emmanuel leduc (latin)", "dob": "1937-10-10", "address": "Togo", "label": "negative", "script": "latin" }, { "name": "Kia Sadeghi (latin)", "dob": "1986-3-21", "address": "Iran", "label": "positive", "script": "latin" }, { "name": "'Adnan Yusuf (latin)", "dob": "1956-6-6", "address": "Lebanon", "label": "positive", "script": "latin" }, { "name": "تولين بنو الدئل (arabic)", "dob": "2003-11-20", "address": "Palau", "label": "negative", "script": "arabic" } ], "query_template": "Generate 14 variations of {name}, ensuring phonetic similarity (30% Light, 40% Medium, 30% Far) and orthographic similarity (30% Light, 40% Medium, 30% Far). Approximately 13% of the total 14 variations should follow these rule-based transformations: Additionally, generate variations that perform these transformations: Swap adjacent consonants, Convert name to initials, and Use first name initial with last name. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 14, "phonetic_similarity": { "Light": 0.3, "Medium": 0.4, "Far": 0.3 }, "orthographic_similarity": { "Light": 0.3, "Medium": 0.4, "Far": 0.3 }, "rule_based": { "rule_percentage": 13, "selected_rules": [ "swap_adjacent_consonants", "shorten_name_to_initials", "initial_only_first_name" ], "rule_descriptions": { "swap_adjacent_consonants": "Swap adjacent consonants", "shorten_name_to_initials": "Convert name to initials", "initial_only_first_name": "Use first name initial with last name" }, "percentage": 13 } } }, { "seed_identities_with_labels": [ { "name": "Ciro FERREIRA (latin)", "dob": "1987-8-27", "address": "Brazil", "label": "positive", "script": "latin" }, { "name": "piedad sevillano (latin)", "dob": "1957-11-11", "address": "Cuba", "label": "High Risk", "script": "latin" }, { "name": "christine mary (latin)", "dob": "1962-02-07", "address": "Central African Republic", "label": "High Risk", "script": "latin" }, { "name": "عرشیا طلوعی (arabic)", "dob": "1963-12-31", "address": "Afghanistan", "label": "High Risk", "script": "arabic" }, { "name": "joel sousa (latin)", "dob": "1993-12-21", "address": "Argentina", "label": "negative", "script": "latin" }, { "name": "alexandre charpentier (latin)", "dob": "1991-03-06", "address": "Burkina Faso", "label": "High Risk", "script": "latin" }, { "name": "luís moura (latin)", "dob": "1967-05-16", "address": "Samoa", "label": "negative", "script": "latin" }, { "name": "benedita leite (latin)", "dob": "1970-07-08", "address": "Peru", "label": "negative", "script": "latin" }, { "name": "Daniel He (latin)", "dob": "1965-7-6", "address": "China", "label": "positive", "script": "latin" }, { "name": "Mariam Barreh (latin)", "dob": "1971-4-10", "address": "United Arab Emirates", "label": "positive", "script": "latin" }, { "name": "afonso andrade (latin)", "dob": "2002-01-18", "address": "Mozambique", "label": "High Risk", "script": "latin" }, { "name": "'Abdallah AL-JAMAL (latin)", "dob": "1997-2-2", "address": "Yemen", "label": "positive", "script": "latin" }, { "name": "gonçalo alves (latin)", "dob": "1932-03-26", "address": "Vaticano", "label": "negative", "script": "latin" }, { "name": "ханко мангъфова (cyrillic)", "dob": "1928-07-10", "address": "Bahrain", "label": "negative", "script": "cyrillic" }, { "name": "Сергей Мальцев (cyrillic)", "dob": "1973-2-28", "address": "Russia", "label": "positive", "script": "cyrillic" } ], "query_template": "Generate exactly 13 execution vectors for each target identity {name}, ensuring phonetic similarity with the following distributions:\n50% Light variations of {name} that are phonetically similar \n50% Medium variations of {name} that are phonetically similar \n\nAnd, ensure orthographic similarity (visually similar spellings) is 100% Medium.\nApproximately 12% of the total 13 variations should follow these rule-based transformations:\nAdditionally, generate variations that perform these transformations: Convert name to initials, Abbreviate name parts, and Remove all spaces. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 13, "phonetic_similarity": { "Light": 0.5, "Medium": 0.5 }, "orthographic_similarity": { "Medium": 1.0 }, "rule_based": { "rule_percentage": 12, "selected_rules": [ "shorten_name_to_initials", "shorten_name_to_abbreviations", "remove_all_spaces" ], "rule_descriptions": { "shorten_name_to_initials": "Convert name to initials", "shorten_name_to_abbreviations": "Abbreviate name parts", "remove_all_spaces": "Remove all spaces" }, "percentage": 12 } } }, { "seed_identities_with_labels": [ { "name": "paul gallagher (latin)", "dob": "1990-06-07", "address": "Grenada", "label": "negative", "script": "latin" }, { "name": "Александр Жуков (cyrillic)", "dob": "1956-6-1", "address": "Russia", "label": "positive", "script": "cyrillic" }, { "name": "Andrey Ivanov (latin)", "dob": "1983-4-13", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "Arif Khot (latin)", "dob": "1970-8-21", "address": "India", "label": "positive", "script": "latin" }, { "name": "justin martin (latin)", "dob": "1932-03-14", "address": "Nigeria", "label": "High Risk", "script": "latin" }, { "name": "noémi bouvier (latin)", "dob": "1948-10-22", "address": "Democratic Republic of Congo", "label": "High Risk", "script": "latin" }, { "name": "jack weber (latin)", "dob": "1965-02-05", "address": "South Africa", "label": "High Risk", "script": "latin" }, { "name": "robin lane (latin)", "dob": "1930-07-14", "address": "American Samoa", "label": "negative", "script": "latin" }, { "name": "Andrei Siguta (latin)", "dob": "1979-5-5", "address": "Ukraine", "label": "positive", "script": "latin" }, { "name": "galo sans (latin)", "dob": "1947-05-02", "address": "Cuba", "label": "High Risk", "script": "latin" }, { "name": "Ella Pamfilova (latin)", "dob": "1953-9-12", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "ермолай ларионова (cyrillic)", "dob": "1985-01-30", "address": "Belarus", "label": "High Risk", "script": "cyrillic" }, { "name": "paige davis (latin)", "dob": "1931-08-12", "address": "Rwanda", "label": "negative", "script": "latin" }, { "name": "jeffery bennett (latin)", "dob": "1972-04-08", "address": "Gibraltar", "label": "negative", "script": "latin" }, { "name": "جابر بقشان (arabic)", "dob": "1965-05-15", "address": "South Georgia and the South Sandwich Islands", "label": "negative", "script": "arabic" } ], "query_template": "Generate 8 variations of {name} ensuring phonetic similarity (70% Light, 30% Medium) and orthographic similarity (30% Light, 40% Medium, 30% Far). Approximately 55% of the total 8 variations should follow these rule-based transformations: Replace spaces with special characters, such as ~ or *, and Insert a random letter. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 8, "phonetic_similarity": { "Light": 0.7, "Medium": 0.3 }, "orthographic_similarity": { "Light": 0.3, "Medium": 0.4, "Far": 0.3 }, "rule_based": { "rule_percentage": 55, "selected_rules": [ "replace_spaces_with_random_special_characters", "insert_random_letter" ], "rule_descriptions": { "replace_spaces_with_random_special_characters": "Replace spaces with special characters", "insert_random_letter": "Insert a random letter" }, "percentage": 55 } } }, { "seed_identities_with_labels": [ { "name": "christine robinson (latin)", "dob": "2000-06-23", "address": "Kenya", "label": "High Risk", "script": "latin" }, { "name": "Faysal al-Wadi (latin)", "dob": "1976-12-15", "address": "Malta", "label": "positive", "script": "latin" }, { "name": "Iryna Cherkasova (latin)", "dob": "1963-5-30", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "misty nguyen (latin)", "dob": "1986-08-20", "address": "Russian Federation", "label": "negative", "script": "latin" }, { "name": "lauren ramirez (latin)", "dob": "1949-06-15", "address": "British Virgin Islands", "label": "High Risk", "script": "latin" }, { "name": "Александр Ведяхин (cyrillic)", "dob": "1977-2-20", "address": "Russia", "label": "positive", "script": "cyrillic" }, { "name": "tina rodriguez (latin)", "dob": "1929-01-24", "address": "Turkmenistan", "label": "negative", "script": "latin" }, { "name": "christopher webb (latin)", "dob": "1994-10-08", "address": "Serbia", "label": "negative", "script": "latin" }, { "name": "rachel cortez (latin)", "dob": "2005-01-06", "address": "Ukraine", "label": "negative", "script": "latin" }, { "name": "michael strickland (latin)", "dob": "1981-01-26", "address": "Nigeria", "label": "High Risk", "script": "latin" }, { "name": "кузьма александрова (cyrillic)", "dob": "1974-09-10", "address": "Crimea", "label": "High Risk", "script": "cyrillic" }, { "name": "василь філіпенко (cyrillic)", "dob": "2000-05-14", "address": "South Georgia and the South Sandwich Islands", "label": "negative", "script": "cyrillic" }, { "name": "Nikolay Arefyev (latin)", "dob": "1949-3-11", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "martina quero (latin)", "dob": "1950-07-22", "address": "Bolivia", "label": "High Risk", "script": "latin" }, { "name": "Zelimkhan Mutsoev (latin)", "dob": "1959-10-13", "address": "Russia", "label": "positive", "script": "latin" } ], "query_template": "Generate 6 variations of {name}, ensuring phonetic similarity (50% Light, 50% Medium) and orthographic similarity (50% Light, 50% Medium). Approximately 22% of the total 6 variations should follow these rule-based transformations: Additionally, generate variations that: Add a title prefix (Mr., Dr., etc.). The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 6, "phonetic_similarity": { "Light": 0.5, "Medium": 0.5 }, "orthographic_similarity": { "Light": 0.5, "Medium": 0.5 }, "rule_based": { "rule_percentage": 22, "selected_rules": [ "add_random_leading_title" ], "rule_descriptions": { "add_random_leading_title": "Add a title prefix (Mr., Dr., etc.)" }, "percentage": 22 } } }, { "seed_identities_with_labels": [ { "name": "سوراجو محمد (arabic)", "dob": "1979-7-3", "address": "United Arab Emirates", "label": "positive", "script": "arabic" }, { "name": "paola leiva (latin)", "dob": "1985-06-08", "address": "Nauru", "label": "negative", "script": "latin" }, { "name": "narcisa rosales (latin)", "dob": "2006-05-31", "address": "Sri Lanka", "label": "negative", "script": "latin" }, { "name": "ناهد نسيبة (arabic)", "dob": "1981-08-04", "address": "New Caledonia", "label": "negative", "script": "arabic" }, { "name": "alexandra santamaria (latin)", "dob": "1934-07-24", "address": "República Dominicana", "label": "negative", "script": "latin" }, { "name": "Aleksandr Kurenkov (latin)", "dob": "1972-6-2", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "haydée verdejo (latin)", "dob": "1984-02-03", "address": "Venezuela", "label": "High Risk", "script": "latin" }, { "name": "Alexander GUSEV (latin)", "dob": "1963-7-27", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "виталий филиппов (cyrillic)", "dob": "1960-02-11", "address": "Russia", "label": "High Risk", "script": "cyrillic" }, { "name": "adela goñi (latin)", "dob": "1931-08-18", "address": "Bahrein", "label": "negative", "script": "latin" }, { "name": "Kazbek Kokov (latin)", "dob": "1973-7-20", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "encarnación benavent (latin)", "dob": "1989-06-20", "address": "Venezuela", "label": "High Risk", "script": "latin" }, { "name": "michael davis (latin)", "dob": "1940-12-01", "address": "South Africa", "label": "High Risk", "script": "latin" }, { "name": "Hiran CHARI-APHAPHON (latin)", "dob": "1950-1-3", "address": "Thailand", "label": "positive", "script": "latin" }, { "name": "josette schmitt (latin)", "dob": "1946-09-09", "address": "Cameroon", "label": "High Risk", "script": "latin" } ], "query_template": "Generate 7 execution vectors for {name} ensuring phonetic similarity (10% Light, 50% Medium, 40% Far) and orthographic similarity (100% Far). Approximately 26% of the total 7 variations should follow these rule-based transformations: \nAdditionally, generate variations that:\n Convert name to initials. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 7, "phonetic_similarity": { "Light": 0.1, "Medium": 0.5, "Far": 0.4 }, "orthographic_similarity": { "Far": 1.0 }, "rule_based": { "rule_percentage": 26, "selected_rules": [ "shorten_name_to_initials" ], "rule_descriptions": { "shorten_name_to_initials": "Convert name to initials" }, "percentage": 26 } } }, { "seed_identities_with_labels": [ { "name": "melissa davenport (latin)", "dob": "1996-10-08", "address": "Nigeria", "label": "High Risk", "script": "latin" }, { "name": "ángel camacho (latin)", "dob": "1972-09-24", "address": "Bolivia", "label": "High Risk", "script": "latin" }, { "name": "deborah bishop (latin)", "dob": "1937-09-29", "address": "Jamaica", "label": "negative", "script": "latin" }, { "name": "jeffrey esparza (latin)", "dob": "1996-03-14", "address": "Nigeria", "label": "High Risk", "script": "latin" }, { "name": "donald martinez (latin)", "dob": "1954-02-10", "address": "Namibia", "label": "High Risk", "script": "latin" }, { "name": "эммануил казаков (cyrillic)", "dob": "1975-10-05", "address": "Belarus", "label": "High Risk", "script": "cyrillic" }, { "name": "angela pittman (latin)", "dob": "1989-06-27", "address": "Lithuania", "label": "negative", "script": "latin" }, { "name": "Osman Hamid (latin)", "dob": "1966-1-1", "address": "Sudan", "label": "positive", "script": "latin" }, { "name": "Chilli Yuan (latin)", "dob": "1985-5-8", "address": "China", "label": "positive", "script": "latin" }, { "name": "Chris Tang (latin)", "dob": "1965-7-4", "address": "Hong Kong", "label": "positive", "script": "latin" }, { "name": "وحيد سبيتي (arabic)", "dob": "1961-2-23", "address": "Lebanon", "label": "positive", "script": "arabic" }, { "name": "angela anderson (latin)", "dob": "1925-01-14", "address": "Saint Lucia", "label": "negative", "script": "latin" }, { "name": "paul foster (latin)", "dob": "1965-06-14", "address": "Uzbekistan", "label": "negative", "script": "latin" }, { "name": "Sergey Kravtsov (latin)", "dob": "1974-3-17", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "آرتين کرمانی (arabic)", "dob": "1996-10-25", "address": "Saint Pierre and Miquelon", "label": "negative", "script": "arabic" } ], "query_template": "Generate 7 variations of {name}. ensuring phonetic similarity (10% Light, 50% Medium, 40% Far) and orthographic similarity (30% Light, 40% Medium, 30% Far). Approximately 47% of the total 7 variations should follow these rule-based transformations: Generate name with a common suffix (e.g. Jr., Sr.), Generate name with a common prefix (e.g. Mr., Mrs.), Generate name with a middle initial, Convert name to initials, Replace first letter with a similar-sounding letter (e.g. 'b' to 'p'), Replace last letter with a similar-sounding letter (e.g. 't' to 'd'). The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 7, "phonetic_similarity": { "Light": 0.1, "Medium": 0.5, "Far": 0.4 }, "orthographic_similarity": { "Light": 0.3, "Medium": 0.4, "Far": 0.3 }, "rule_based": { "rule_percentage": 47, "selected_rules": [ "shorten_name_to_initials" ], "rule_descriptions": { "shorten_name_to_initials": "Convert name to initials" }, "percentage": 47 } } }, { "seed_identities_with_labels": [ { "name": "Hassan AYACH (latin)", "dob": "1963-5-1", "address": "Lebanon", "label": "positive", "script": "latin" }, { "name": "Fatemeh Sadeghi (latin)", "dob": "1995-11-28", "address": "Iran", "label": "positive", "script": "latin" }, { "name": "vera coelho (latin)", "dob": "2007-09-30", "address": "Antígua e Barbuda", "label": "negative", "script": "latin" }, { "name": "сократ потапова (cyrillic)", "dob": "1992-11-11", "address": "Belarus", "label": "High Risk", "script": "cyrillic" }, { "name": "manuel ferreira (latin)", "dob": "1974-11-15", "address": "Vanuatu", "label": "negative", "script": "latin" }, { "name": "Reuben LAVILLA (latin)", "dob": "1972-10-4", "address": "Philippines", "label": "positive", "script": "latin" }, { "name": "joão faria (latin)", "dob": "1991-04-11", "address": "Faroé", "label": "negative", "script": "latin" }, { "name": "nicolás blazquez (latin)", "dob": "1973-03-04", "address": "Nicaragua", "label": "High Risk", "script": "latin" }, { "name": "james mckenzie (latin)", "dob": "2001-03-27", "address": "South Sudan", "label": "High Risk", "script": "latin" }, { "name": "gaspar gonçalves (latin)", "dob": "2007-07-18", "address": "Alemanha", "label": "negative", "script": "latin" }, { "name": "Abdullah Faisal (latin)", "dob": "1963-9-10", "address": "Jamaica", "label": "positive", "script": "latin" }, { "name": "رجاء آل خضير (arabic)", "dob": "1958-10-14", "address": "Palestinian Territory", "label": "negative", "script": "arabic" }, { "name": "isabel gomes (latin)", "dob": "1998-09-21", "address": "Mozambique", "label": "High Risk", "script": "latin" }, { "name": "suzanne vidal (latin)", "dob": "1935-04-09", "address": "Central African Republic", "label": "High Risk", "script": "latin" }, { "name": "Григорий Карасин (cyrillic)", "dob": "1949-8-23", "address": "Russia", "label": "positive", "script": "cyrillic" } ], "query_template": "Generate 15 name variations for {name} ensuring phonetic similarity (10% Light, e.g. \"John\" -> \"Jon\", 30% Medium, e.g. \"John\" -> \"Jan\", 60% Far, e.g. \"John\" -> \"Johann), and orthographic similarity (50% Light, e.g. \"Michael\" -> \"Mikael\", 50% Medium, e.g. \"Michael\" -> \"Michel). Approximately 16% of the total 15 variations should follow these rule-based transformations: Additionally, generate variations that perform these transformations: Swap random adjacent letters, and Swap adjacent consonants. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 15, "phonetic_similarity": { "Light": 0.1, "Medium": 0.3, "Far": 0.6 }, "orthographic_similarity": { "Light": 0.5, "Medium": 0.5 }, "rule_based": { "rule_percentage": 16, "selected_rules": [ "swap_random_letter", "swap_adjacent_consonants" ], "rule_descriptions": { "swap_random_letter": "Swap random adjacent letters", "swap_adjacent_consonants": "Swap adjacent consonants" }, "percentage": 16 } } }, { "seed_identities_with_labels": [ { "name": "Asadollah Seify (latin)", "dob": "1965-4-4", "address": "Iran", "label": "positive", "script": "latin" }, { "name": "carlota santos (latin)", "dob": "1996-01-08", "address": "Mozambique", "label": "High Risk", "script": "latin" }, { "name": "adriana cases (latin)", "dob": "1936-06-04", "address": "República Centroafricana", "label": "negative", "script": "latin" }, { "name": "Awqad al-Hamidawi (latin)", "dob": "1982-2-3", "address": "Iraq", "label": "positive", "script": "latin" }, { "name": "виталий князева (cyrillic)", "dob": "1986-08-22", "address": "Donetsk", "label": "High Risk", "script": "cyrillic" }, { "name": "araceli ripoll (latin)", "dob": "1928-07-29", "address": "República Checa", "label": "negative", "script": "latin" }, { "name": "Tun Naing (latin)", "dob": "1963-4-30", "address": "Burma", "label": "positive", "script": "latin" }, { "name": "محمدرضا عبدالعلی (arabic)", "dob": "1981-02-17", "address": "Lao People's Democratic Republic", "label": "negative", "script": "arabic" }, { "name": "sarita alba (latin)", "dob": "1983-06-23", "address": "Cuba", "label": "High Risk", "script": "latin" }, { "name": "raúl rodriguez (latin)", "dob": "1965-01-09", "address": "Cuba", "label": "High Risk", "script": "latin" }, { "name": "concepción figueras (latin)", "dob": "1963-08-29", "address": "Túnez", "label": "negative", "script": "latin" }, { "name": "Николай Коломейцев (cyrillic)", "dob": "1956-9-1", "address": "Russia", "label": "positive", "script": "cyrillic" }, { "name": "Andrey Parshin (latin)", "dob": "1974-2-19", "address": "Belarus", "label": "positive", "script": "latin" }, { "name": "flavio andres (latin)", "dob": "1989-12-04", "address": "Botswana", "label": "negative", "script": "latin" }, { "name": "merche samper (latin)", "dob": "1992-07-22", "address": "Cuba", "label": "High Risk", "script": "latin" } ], "query_template": "The following name is the seed name to generate variations for: {name}. Generate 10 variations of the name {name}, ensuring phonetic similarity: 20% Light, 60% Medium, 20% Far, and orthographic similarity: 30% Light, 40% Medium, 30% Far, and also include 48% of variations that follow: Additionally, generate variations that perform these transformations: Remove a random vowel, and Abbreviate name parts. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 10, "phonetic_similarity": { "Light": 0.2, "Medium": 0.6, "Far": 0.2 }, "orthographic_similarity": { "Light": 0.3, "Medium": 0.4, "Far": 0.3 }, "rule_based": { "rule_percentage": 48, "selected_rules": [ "remove_random_vowel", "shorten_name_to_abbreviations" ], "rule_descriptions": { "remove_random_vowel": "Remove a random vowel", "shorten_name_to_abbreviations": "Abbreviate name parts" }, "percentage": 48 } } }, { "seed_identities_with_labels": [ { "name": "camilo gómez (latin)", "dob": "1925-05-02", "address": "Canadá", "label": "negative", "script": "latin" }, { "name": "яков дорофеев (cyrillic)", "dob": "1953-06-10", "address": "Belarus", "label": "High Risk", "script": "cyrillic" }, { "name": "Evgeniy Polyanin (latin)", "dob": "1993-3-4", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "rosa nieto (latin)", "dob": "1975-11-08", "address": "Nueva Zelandia", "label": "negative", "script": "latin" }, { "name": "بشار الأسد (arabic)", "dob": "1965-9-11", "address": "Syria", "label": "positive", "script": "arabic" }, { "name": "Aleksei Simanovskiy (latin)", "dob": "1955-9-6", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "etelvina villena (latin)", "dob": "1992-07-29", "address": "Zambia", "label": "negative", "script": "latin" }, { "name": "andrea olivares (latin)", "dob": "1924-11-15", "address": "Venezuela", "label": "High Risk", "script": "latin" }, { "name": "Elvis KELJMENDI (latin)", "dob": "1978-5-3", "address": "Kosovo", "label": "positive", "script": "latin" }, { "name": "هلیا لاچینی (arabic)", "dob": "1978-02-22", "address": "Slovenia", "label": "negative", "script": "arabic" }, { "name": "margaux david (latin)", "dob": "1992-08-22", "address": "Monaco", "label": "High Risk", "script": "latin" }, { "name": "caitlin martinez (latin)", "dob": "1947-05-26", "address": "South Africa", "label": "High Risk", "script": "latin" }, { "name": "lupe rodrigo (latin)", "dob": "1924-12-26", "address": "San Marino", "label": "negative", "script": "latin" }, { "name": "eligia flores (latin)", "dob": "2005-11-10", "address": "Estonia", "label": "negative", "script": "latin" }, { "name": "Mahmoud Baghlani (latin)", "dob": "1978-3-20", "address": "Iran", "label": "positive", "script": "latin" } ], "query_template": "The following name is the seed name to generate variations for: {name}. Generate 12 variations of the name {name}, ensuring phonetic similarity: 30% Light, 40% Medium, 30% Far, and orthographic similarity: 100% Light, and also include 32% of variations that follow: Additionally, generate variations that perform these transformations: Convert name to initials, Add a title prefix (Mr., Dr., etc.), and Replace random consonants with different consonants. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 12, "phonetic_similarity": { "Light": 0.3, "Medium": 0.4, "Far": 0.3 }, "orthographic_similarity": { "Light": 1.0 }, "rule_based": { "rule_percentage": 32, "selected_rules": [ "shorten_name_to_initials", "add_random_leading_title", "replace_random_consonant_with_random_consonant" ], "rule_descriptions": { "shorten_name_to_initials": "Convert name to initials", "add_random_leading_title": "Add a title prefix (Mr., Dr., etc.)", "replace_random_consonant_with_random_consonant": "Replace random consonants with different consonants" }, "percentage": 32 } } }, { "seed_identities_with_labels": [ { "name": "Alexei Sheshenya (latin)", "dob": "1971-4-16", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "Ruslan Sarkisov (latin)", "dob": "1978-12-1", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "Abbas Abdiasjerd (latin)", "dob": "1960-9-9", "address": "Iran", "label": "positive", "script": "latin" }, { "name": "jon galvan (latin)", "dob": "1948-02-05", "address": "Tanzania", "label": "negative", "script": "latin" }, { "name": "sarah webb (latin)", "dob": "1957-08-25", "address": "Botswana", "label": "negative", "script": "latin" }, { "name": "claude julien (latin)", "dob": "1942-11-13", "address": "Democratic Republic of Congo", "label": "High Risk", "script": "latin" }, { "name": "lisa sparks (latin)", "dob": "1967-06-24", "address": "Macao", "label": "negative", "script": "latin" }, { "name": "sheri farrell (latin)", "dob": "1973-03-16", "address": "British Virgin Islands", "label": "High Risk", "script": "latin" }, { "name": "настимир бодуров (cyrillic)", "dob": "2001-06-27", "address": "Bulgaria", "label": "High Risk", "script": "cyrillic" }, { "name": "noël leroy (latin)", "dob": "1941-07-12", "address": "Democratic Republic of Congo", "label": "High Risk", "script": "latin" }, { "name": "Viktor Netyksho (latin)", "dob": "1966-9-8", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "thérèse masson (latin)", "dob": "1988-08-02", "address": "Cameroon", "label": "High Risk", "script": "latin" }, { "name": "Светлана Емельянова (cyrillic)", "dob": "1971-10-7", "address": "Russia", "label": "positive", "script": "cyrillic" }, { "name": "richard carter (latin)", "dob": "1976-08-13", "address": "Italy", "label": "negative", "script": "latin" }, { "name": "فائق جرار (arabic)", "dob": "1954-06-23", "address": "Qatar", "label": "negative", "script": "arabic" } ], "query_template": "Generate exactly 7 execution vectors for each target identity {name}, ensuring phonetic similarity (100% Medium) and orthographic similarity (20% Light, 60% Medium, 20% Far). Approximately 56% of the total variations should follow these rule-based transformations: \nAdditionally, generate variations that perform these transformations: Replace random consonants with different consonants, and Insert a random letter. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 7, "phonetic_similarity": { "Medium": 1.0 }, "orthographic_similarity": { "Light": 0.2, "Medium": 0.6, "Far": 0.2 }, "rule_based": { "rule_percentage": 56, "selected_rules": [ "replace_random_consonant_with_random_consonant", "insert_random_letter" ], "rule_descriptions": { "replace_random_consonant_with_random_consonant": "Replace random consonants with different consonants", "insert_random_letter": "Insert a random letter" }, "percentage": 56 } } }, { "seed_identities_with_labels": [ { "name": "carmina bartolomé (latin)", "dob": "1991-12-30", "address": "Bolivia", "label": "High Risk", "script": "latin" }, { "name": "jennifer barton (latin)", "dob": "1964-09-07", "address": "Papua New Guinea", "label": "negative", "script": "latin" }, { "name": "герасим брагин (cyrillic)", "dob": "1945-02-26", "address": "Belarus", "label": "High Risk", "script": "cyrillic" }, { "name": "hayley newton (latin)", "dob": "1977-08-17", "address": "Trinidad and Tobago", "label": "negative", "script": "latin" }, { "name": "isabel brandt (latin)", "dob": "1926-06-08", "address": "Kenya", "label": "High Risk", "script": "latin" }, { "name": "taylor sexton (latin)", "dob": "1978-09-18", "address": "South Sudan", "label": "High Risk", "script": "latin" }, { "name": "emanuel neto (latin)", "dob": "1955-12-20", "address": "Mozambique", "label": "High Risk", "script": "latin" }, { "name": "patricia burch (latin)", "dob": "1969-06-27", "address": "Guinea-Bissau", "label": "negative", "script": "latin" }, { "name": "Nasser Nesr (latin)", "dob": "1963-4-20", "address": "Lebanon", "label": "positive", "script": "latin" }, { "name": "Evgeny NOVITSKIY (latin)", "dob": "1957-11-19", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "ناهد آل عطفة (arabic)", "dob": "1942-07-14", "address": "Barbados", "label": "negative", "script": "arabic" }, { "name": "Aman Abdurahman (latin)", "dob": "1972-1-5", "address": "Indonesia", "label": "positive", "script": "latin" }, { "name": "tammie stephenson (latin)", "dob": "1957-11-09", "address": "Cook Islands", "label": "negative", "script": "latin" }, { "name": "Ekaterina Kharchenko (latin)", "dob": "1977-8-11", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "حسین تناور (arabic)", "dob": "1981-8-30", "address": "Iran", "label": "positive", "script": "arabic" } ], "query_template": "Generate 10 variations of {name} ensuring phonetic similarity (100% Medium) and orthographic similarity (10% Light, 30% Medium, 60% Far). Approximately 16% of the total 10 variations should follow these rule-based transformations: Additionally, generate variations that perform these transformations: Convert name to initials, Delete a random letter, and Use first name initial with last name. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 10, "phonetic_similarity": { "Medium": 1.0 }, "orthographic_similarity": { "Light": 0.1, "Medium": 0.3, "Far": 0.6 }, "rule_based": { "rule_percentage": 16, "selected_rules": [ "shorten_name_to_initials", "delete_random_letter", "initial_only_first_name" ], "rule_descriptions": { "shorten_name_to_initials": "Convert name to initials", "delete_random_letter": "Delete a random letter", "initial_only_first_name": "Use first name initial with last name" }, "percentage": 16 } } }, { "seed_identities_with_labels": [ { "name": "Andrei Shved (latin)", "dob": "1973-4-21", "address": "Belarus", "label": "positive", "script": "latin" }, { "name": "stephen bass (latin)", "dob": "1991-01-17", "address": "British Virgin Islands", "label": "High Risk", "script": "latin" }, { "name": "marc madrid (latin)", "dob": "2006-05-15", "address": "Cuba", "label": "High Risk", "script": "latin" }, { "name": "amber carter (latin)", "dob": "1977-09-12", "address": "Ukraine", "label": "negative", "script": "latin" }, { "name": "fred roberts (latin)", "dob": "1954-01-29", "address": "Lesotho", "label": "negative", "script": "latin" }, { "name": "شهاب طقش (arabic)", "dob": "1963-09-30", "address": "Jordan", "label": "negative", "script": "arabic" }, { "name": "chelsea alvarez (latin)", "dob": "1933-11-29", "address": "Holy See (Vatican City State)", "label": "negative", "script": "latin" }, { "name": "mary martinez (latin)", "dob": "1941-11-15", "address": "San Marino", "label": "negative", "script": "latin" }, { "name": "Юлия Беглова (cyrillic)", "dob": "1981-6-8", "address": "Russia", "label": "positive", "script": "cyrillic" }, { "name": "gérard marques (latin)", "dob": "1955-06-17", "address": "Cameroon", "label": "High Risk", "script": "latin" }, { "name": "Sergey NEVEROV (latin)", "dob": "1961-12-21", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "gustavo henriques (latin)", "dob": "2001-07-25", "address": "Angola", "label": "High Risk", "script": "latin" }, { "name": "روفيدا هوازن (arabic)", "dob": "1963-03-21", "address": "Lebanon", "label": "High Risk", "script": "arabic" }, { "name": "Alexei Rakhmanov (latin)", "dob": "1964-7-18", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "Marko Svorcan (latin)", "dob": "1967-5-7", "address": "Serbia", "label": "positive", "script": "latin" } ], "query_template": "Generate 9 variations of {name}, ensuring phonetic similarity (100% Far) and orthographic similarity (50% Light, 50% Medium). Approximately 13% of the total 9 variations should follow these rule-based transformations: Additionally, generate variations that perform these transformations: Delete a random letter, Remove all spaces, and Remove a random vowel. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 9, "phonetic_similarity": { "Far": 1.0 }, "orthographic_similarity": { "Light": 0.5, "Medium": 0.5 }, "rule_based": { "rule_percentage": 13, "selected_rules": [ "delete_random_letter", "remove_all_spaces", "remove_random_vowel" ], "rule_descriptions": { "delete_random_letter": "Delete a random letter", "remove_all_spaces": "Remove all spaces", "remove_random_vowel": "Remove a random vowel" }, "percentage": 13 } } }, { "seed_identities_with_labels": [ { "name": "andrew stevens (latin)", "dob": "1976-04-01", "address": "Nigeria", "label": "High Risk", "script": "latin" }, { "name": "مياس جذام (arabic)", "dob": "1947-08-24", "address": "Lebanon", "label": "High Risk", "script": "arabic" }, { "name": "luc aubry (latin)", "dob": "2004-12-24", "address": "Saint-Kitts et Nevis", "label": "negative", "script": "latin" }, { "name": "denis pereira (latin)", "dob": "1982-04-03", "address": "Lettonie", "label": "negative", "script": "latin" }, { "name": "Jingfeng Gao (latin)", "dob": "1975-7-5", "address": "China", "label": "positive", "script": "latin" }, { "name": "jeanne boyer (latin)", "dob": "1991-11-12", "address": "Cameroon", "label": "High Risk", "script": "latin" }, { "name": "luísa moreira (latin)", "dob": "1982-04-21", "address": "Angola", "label": "High Risk", "script": "latin" }, { "name": "Виталий Савельев (cyrillic)", "dob": "1954-1-18", "address": "Russia", "label": "positive", "script": "cyrillic" }, { "name": "margot michaud (latin)", "dob": "1986-07-09", "address": "Mali", "label": "High Risk", "script": "latin" }, { "name": "Bella Zlatkis (latin)", "dob": "1948-7-5", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "susanne pierre (latin)", "dob": "2002-02-26", "address": "République centrafricaine", "label": "negative", "script": "latin" }, { "name": "adélaïde menard (latin)", "dob": "1996-10-20", "address": "Ouzbékistan", "label": "negative", "script": "latin" }, { "name": "عتيق كانو (arabic)", "dob": "1960-08-28", "address": "El Salvador", "label": "negative", "script": "arabic" }, { "name": "Parviz Soltanizadeh (latin)", "dob": "1960-7-13", "address": "Iran", "label": "positive", "script": "latin" }, { "name": "Rao Anwar (latin)", "dob": "1959-1-1", "address": "Pakistan", "label": "positive", "script": "latin" } ], "query_template": "The following name is the seed name to generate variations for: {name}. Generate 13 variations of the name {name}, ensuring phonetic similarity: 50% Light, 50% Medium, and orthographic similarity: 30% Light, 40% Medium, 30% Far, and also include 11% of variations that follow: Additionally, generate variations that perform these transformations: Remove a random vowel, and Replace random vowels with different vowels. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 13, "phonetic_similarity": { "Light": 0.5, "Medium": 0.5 }, "orthographic_similarity": { "Light": 0.3, "Medium": 0.4, "Far": 0.3 }, "rule_based": { "rule_percentage": 11, "selected_rules": [ "remove_random_vowel", "replace_random_vowel_with_random_vowel" ], "rule_descriptions": { "remove_random_vowel": "Remove a random vowel", "replace_random_vowel_with_random_vowel": "Replace random vowels with different vowels" }, "percentage": 11 } } }, { "seed_identities_with_labels": [ { "name": "jeannine bodin (latin)", "dob": "1985-12-25", "address": "Côte d'Ivoire", "label": "High Risk", "script": "latin" }, { "name": "catherine da costa (latin)", "dob": "1943-01-15", "address": "Lithuanie", "label": "negative", "script": "latin" }, { "name": "marcel roux (latin)", "dob": "1930-10-17", "address": "Laos", "label": "negative", "script": "latin" }, { "name": "jessica morata (latin)", "dob": "1959-08-29", "address": "Venezuela", "label": "High Risk", "script": "latin" }, { "name": "віолетта асаула (cyrillic)", "dob": "1990-08-15", "address": "American Samoa", "label": "negative", "script": "cyrillic" }, { "name": "autumn mason (latin)", "dob": "1990-04-26", "address": "British Virgin Islands", "label": "High Risk", "script": "latin" }, { "name": "وحيد سبيتي (arabic)", "dob": "1961-2-23", "address": "Lebanon", "label": "positive", "script": "arabic" }, { "name": "эдуард борисов (cyrillic)", "dob": "1927-11-09", "address": "Donetsk", "label": "High Risk", "script": "cyrillic" }, { "name": "christophe lucas (latin)", "dob": "1925-04-25", "address": "Cook (Îles)", "label": "negative", "script": "latin" }, { "name": "Zurab Makiyev (latin)", "dob": "1976-9-30", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "éric masson (latin)", "dob": "2007-01-04", "address": "Suriname", "label": "negative", "script": "latin" }, { "name": "Atul Gupta (latin)", "dob": "1968-6-14", "address": "United Arab Emirates", "label": "positive", "script": "latin" }, { "name": "Mohsen NAFTCHI (latin)", "dob": "1988-2-16", "address": "Iran", "label": "positive", "script": "latin" }, { "name": "beverly stevens (latin)", "dob": "1982-01-30", "address": "South Sudan", "label": "High Risk", "script": "latin" }, { "name": "Leonid MIKHAILIUK (latin)", "dob": "1970-1-1", "address": "Ukraine", "label": "positive", "script": "latin" } ], "query_template": "The following name is the seed name to generate variations for: {name}. Generate 12 variations of the name {name}, ensuring phonetic similarity: 10% Light, 50% Medium, 40% Far, and orthographic similarity: 30% Light, 40% Medium, 30% Far, and also include 44% of variations that follow: Additionally, generate variations that perform these transformations: Insert a random letter, Replace double letters with a single letter, and Remove a random consonant. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 12, "phonetic_similarity": { "Light": 0.1, "Medium": 0.5, "Far": 0.4 }, "orthographic_similarity": { "Light": 0.3, "Medium": 0.4, "Far": 0.3 }, "rule_based": { "rule_percentage": 44, "selected_rules": [ "insert_random_letter", "replace_double_letters_with_single_letter", "remove_random_consonant" ], "rule_descriptions": { "insert_random_letter": "Insert a random letter", "replace_double_letters_with_single_letter": "Replace double letters with a single letter", "remove_random_consonant": "Remove a random consonant" }, "percentage": 44 } } }, { "seed_identities_with_labels": [ { "name": "jeffrey burke (latin)", "dob": "1962-03-08", "address": "Palau", "label": "negative", "script": "latin" }, { "name": "شعلان الأيوبي (arabic)", "dob": "1994-12-10", "address": "Congo", "label": "negative", "script": "arabic" }, { "name": "kevin hernandez (latin)", "dob": "1978-03-07", "address": "British Virgin Islands", "label": "High Risk", "script": "latin" }, { "name": "Kwo'n-u Han (latin)", "dob": "1962-8-21", "address": "China", "label": "positive", "script": "latin" }, { "name": "Abdullah Faisal (latin)", "dob": "1963-9-10", "address": "Jamaica", "label": "positive", "script": "latin" }, { "name": "Alireza Chegha-Marani (latin)", "dob": "1962-8-11", "address": "Iran", "label": "positive", "script": "latin" }, { "name": "春艳 郭 (chinese)", "dob": "1983-4-15", "address": "China", "label": "positive", "script": "chinese" }, { "name": "michael fletcher (latin)", "dob": "2001-05-15", "address": "Namibia", "label": "High Risk", "script": "latin" }, { "name": "nathan peterson (latin)", "dob": "1985-04-30", "address": "United States of America", "label": "negative", "script": "latin" }, { "name": "jessica webb (latin)", "dob": "1996-01-30", "address": "Namibia", "label": "High Risk", "script": "latin" }, { "name": "потап борисова (cyrillic)", "dob": "1952-07-21", "address": "Crimea", "label": "High Risk", "script": "cyrillic" }, { "name": "rebecca castillo (latin)", "dob": "1982-11-25", "address": "South Sudan", "label": "High Risk", "script": "latin" }, { "name": "dylan barnett (latin)", "dob": "1938-10-27", "address": "Bangladesh", "label": "negative", "script": "latin" }, { "name": "Manal al-akhraz (latin)", "dob": "1970-2-2", "address": "Syria", "label": "positive", "script": "latin" }, { "name": "sarah lowe (latin)", "dob": "2007-01-18", "address": "Italy", "label": "negative", "script": "latin" } ], "query_template": "Generate 8 execution vectors for {name}, ensuring phonetic similarity (10% Light variations using Soundex algorithm, 50% Medium variations using Metaphone algorithm, 40% Far variations using Levenshtein distance) and orthographic similarity (30% Light variations with single character edits, 40% Medium variations with swapping characters, 30% Far variations with insertion/deletion of characters). Approximately 47% of the total 8 variations should follow these rule-based transformations: Additionally, generate variations that Add a title prefix (Mr., Dr., etc.). The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 8, "phonetic_similarity": { "Light": 0.1, "Medium": 0.5, "Far": 0.4 }, "orthographic_similarity": { "Light": 0.3, "Medium": 0.4, "Far": 0.3 }, "rule_based": { "rule_percentage": 47, "selected_rules": [ "add_random_leading_title" ], "rule_descriptions": { "add_random_leading_title": "Add a title prefix (Mr., Dr., etc.)" }, "percentage": 47 } } }, { "seed_identities_with_labels": [ { "name": "حسام یزدی (arabic)", "dob": "1998-06-01", "address": "Afghanistan", "label": "High Risk", "script": "arabic" }, { "name": "brittney robertson (latin)", "dob": "1946-08-21", "address": "British Virgin Islands", "label": "High Risk", "script": "latin" }, { "name": "Evgenii Kraiushkin (latin)", "dob": "1990-3-28", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "guy perrot (latin)", "dob": "2007-06-14", "address": "Suède", "label": "negative", "script": "latin" }, { "name": "christine lemonnier (latin)", "dob": "1954-11-12", "address": "Mariannes du Nord (Îles)", "label": "negative", "script": "latin" }, { "name": "لتين الخالدي (arabic)", "dob": "1998-02-21", "address": "Sri Lanka", "label": "negative", "script": "arabic" }, { "name": "domitila cruz (latin)", "dob": "1943-02-17", "address": "Venezuela", "label": "High Risk", "script": "latin" }, { "name": "michelle guillou (latin)", "dob": "1957-03-21", "address": "Mali", "label": "High Risk", "script": "latin" }, { "name": "Олег Кожемяко (cyrillic)", "dob": "1962-3-17", "address": "Russia", "label": "positive", "script": "cyrillic" }, { "name": "Andrei DUBEN (latin)", "dob": "1970-12-12", "address": "Chile", "label": "positive", "script": "latin" }, { "name": "victor lefebvre (latin)", "dob": "1972-04-19", "address": "Togo", "label": "negative", "script": "latin" }, { "name": "Aleksey Chumakov (latin)", "dob": "1974-5-6", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "daniel diaz (latin)", "dob": "2005-10-04", "address": "Burkina Faso", "label": "High Risk", "script": "latin" }, { "name": "Ra'i al-Ras (latin)", "dob": "1974-9-5", "address": "Yemen", "label": "positive", "script": "latin" }, { "name": "anouk ruiz (latin)", "dob": "1982-02-21", "address": "Danemark", "label": "negative", "script": "latin" } ], "query_template": "Generate exactly 10 execution vectors for {name}, ensuring phonetic similarity with 100% Far and orthographic similarity with 20% Light, 60% Medium, 20% Far. Approximately 1-2 of the total 10 variations should follow these rule-based transformations: Additionally, generate variations that perform these transformations: Add a title prefix (Mr., Dr., etc.), and Duplicate a random letter.\n[VALIDATION HINTS]: Approximately 15% of the variations should follow rule-based transformations. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 10, "phonetic_similarity": { "Far": 1.0 }, "orthographic_similarity": { "Light": 0.2, "Medium": 0.6, "Far": 0.2 }, "rule_based": { "rule_percentage": 15, "selected_rules": [ "add_random_leading_title", "duplicate_random_letter_as_double_letter" ], "rule_descriptions": { "add_random_leading_title": "Add a title prefix (Mr., Dr., etc.)", "duplicate_random_letter_as_double_letter": "Duplicate a random letter" }, "percentage": 15 } } }, { "seed_identities_with_labels": [ { "name": "زيد البرغوثي (arabic)", "dob": "1971-11-05", "address": "Cyprus", "label": "negative", "script": "arabic" }, { "name": "jeannine didier (latin)", "dob": "1975-05-16", "address": "Democratic Republic of Congo", "label": "High Risk", "script": "latin" }, { "name": "suzanne vincent (latin)", "dob": "1953-04-11", "address": "Pitcairn (Îles)", "label": "negative", "script": "latin" }, { "name": "نذير طسم (arabic)", "dob": "1962-03-25", "address": "Syria", "label": "High Risk", "script": "arabic" }, { "name": "Anatoly Bifov (latin)", "dob": "1963-1-7", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "sabine laroche (latin)", "dob": "1948-10-21", "address": "Turquie", "label": "negative", "script": "latin" }, { "name": "claudine leclerc (latin)", "dob": "1945-06-23", "address": "Porto Rico", "label": "negative", "script": "latin" }, { "name": "Samer Ismail (latin)", "dob": "1980-10-25", "address": "Syria", "label": "positive", "script": "latin" }, { "name": "Мария ШУВАЛОВА (cyrillic)", "dob": "1998-8-4", "address": "Russia", "label": "positive", "script": "cyrillic" }, { "name": "emmanuel pereira (latin)", "dob": "1929-04-27", "address": "Honduras", "label": "negative", "script": "latin" }, { "name": "Irina Petina (latin)", "dob": "1972-8-31", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "Hassan Afgoye (latin)", "dob": "1966-1-1", "address": "Somalia", "label": "positive", "script": "latin" }, { "name": "olivier fernandez (latin)", "dob": "1998-11-26", "address": "Democratic Republic of Congo", "label": "High Risk", "script": "latin" }, { "name": "jacqueline diaz (latin)", "dob": "1949-11-07", "address": "Haiti", "label": "High Risk", "script": "latin" }, { "name": "marisela nebot (latin)", "dob": "2007-03-02", "address": "Venezuela", "label": "High Risk", "script": "latin" } ], "query_template": "Generate exactly 14 execution vectors for each target identity {name}, ensuring phonetic similarity (100% Medium) and orthographic similarity (50% Light, 50% Medium). Approximately 38% of the total 14 variations should follow these rule-based transformations: Additionally, generate variations that: Replace random consonants with different consonants. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 14, "phonetic_similarity": { "Medium": 1.0 }, "orthographic_similarity": { "Light": 0.5, "Medium": 0.5 }, "rule_based": { "rule_percentage": 38, "selected_rules": [ "replace_random_consonant_with_random_consonant" ], "rule_descriptions": { "replace_random_consonant_with_random_consonant": "Replace random consonants with different consonants" }, "percentage": 38 } } }, { "seed_identities_with_labels": [ { "name": "мокей веселова (cyrillic)", "dob": "1979-09-20", "address": "Kuwait", "label": "negative", "script": "cyrillic" }, { "name": "sofia moura (latin)", "dob": "2001-11-10", "address": "Peru", "label": "negative", "script": "latin" }, { "name": "Tatiana Pereverzeva (latin)", "dob": "1964-6-20", "address": "Ukraine", "label": "positive", "script": "latin" }, { "name": "Александр Щербаков (cyrillic)", "dob": "1965-5-12", "address": "Russia", "label": "positive", "script": "cyrillic" }, { "name": "xavier pires (latin)", "dob": "1969-11-15", "address": "Listenstaine", "label": "negative", "script": "latin" }, { "name": "constança figueiredo (latin)", "dob": "1960-03-28", "address": "Geórgia do Sul e Sandwich do Sul", "label": "negative", "script": "latin" }, { "name": "Vitaly Markelov (latin)", "dob": "1963-8-5", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "jules fernandez (latin)", "dob": "1953-08-19", "address": "Democratic Republic of Congo", "label": "High Risk", "script": "latin" }, { "name": "odette vallet (latin)", "dob": "1963-01-30", "address": "Haiti", "label": "High Risk", "script": "latin" }, { "name": "Sergey NARYSHKIN (latin)", "dob": "1954-10-27", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "alexandrie hubert (latin)", "dob": "1967-01-12", "address": "Central African Republic", "label": "High Risk", "script": "latin" }, { "name": "naiara faria (latin)", "dob": "1953-07-15", "address": "Angola", "label": "High Risk", "script": "latin" }, { "name": "پارسا ابوطالبی (arabic)", "dob": "2006-02-16", "address": "Iran", "label": "High Risk", "script": "arabic" }, { "name": "alexandre monteiro (latin)", "dob": "1972-04-19", "address": "Paquistão", "label": "negative", "script": "latin" }, { "name": "Svetlana Yemilianova (latin)", "dob": "1971-10-7", "address": "Russia", "label": "positive", "script": "latin" } ], "query_template": "Generate 12 execution vectors for {name} ensuring phonetic similarity (50% Light, 50% Medium) and orthographic similarity (70% Light, 30% Medium). Approximately 18% of the total 12 variations should follow these rule-based transformations: Additionally, generate variations that perform these transformations: Reorder name parts, and Replace random vowels with different vowels. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 12, "phonetic_similarity": { "Light": 0.5, "Medium": 0.5 }, "orthographic_similarity": { "Light": 0.7, "Medium": 0.3 }, "rule_based": { "rule_percentage": 18, "selected_rules": [ "name_parts_permutations", "replace_random_vowel_with_random_vowel" ], "rule_descriptions": { "name_parts_permutations": "Reorder name parts", "replace_random_vowel_with_random_vowel": "Replace random vowels with different vowels" }, "percentage": 18 } } }, { "seed_identities_with_labels": [ { "name": "mark flynn (latin)", "dob": "1980-11-30", "address": "South Africa", "label": "High Risk", "script": "latin" }, { "name": "Yury Nabokov (latin)", "dob": "1981-1-1", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "Евген Балицький (cyrillic)", "dob": "1969-12-10", "address": "Ukraine", "label": "positive", "script": "cyrillic" }, { "name": "григорий павлова (cyrillic)", "dob": "1927-05-27", "address": "Chad", "label": "negative", "script": "cyrillic" }, { "name": "Valentina Tereshkova (latin)", "dob": "1937-3-6", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "Asif SHALISH (latin)", "dob": "1959-1-1", "address": "Syria", "label": "positive", "script": "latin" }, { "name": "александра князев (cyrillic)", "dob": "1938-06-11", "address": "Russia", "label": "High Risk", "script": "cyrillic" }, { "name": "marc gomez (latin)", "dob": "1955-12-30", "address": "South Africa", "label": "High Risk", "script": "latin" }, { "name": "sandra hill (latin)", "dob": "1985-02-12", "address": "Slovenia", "label": "negative", "script": "latin" }, { "name": "yvonne miller (latin)", "dob": "1927-09-21", "address": "Qatar", "label": "negative", "script": "latin" }, { "name": "hannah shaw (latin)", "dob": "1955-06-11", "address": "American Samoa", "label": "negative", "script": "latin" }, { "name": "pamela castillo (latin)", "dob": "1939-08-10", "address": "Comoros", "label": "negative", "script": "latin" }, { "name": "sara guerrero (latin)", "dob": "1955-01-08", "address": "British Virgin Islands", "label": "High Risk", "script": "latin" }, { "name": "Recep Aydin (latin)", "dob": "1996-11-14", "address": "Turkey", "label": "positive", "script": "latin" }, { "name": "isaac voisin (latin)", "dob": "1976-08-21", "address": "Cameroon", "label": "High Risk", "script": "latin" } ], "query_template": "Generate exactly 12 execution vectors for each target identity {name}, ensuring phonetic similarity by incorporating variations with the following levels of Light, Medium, and Far names respectively: 10% Light, 30% Medium, and 60% Far. Additionally, ensure orthographic similarity by including the same proportion of variations: 10% Light, 30% Medium, and 60% Far.\n\nApproximately 46% of these total 12 variations should follow the rule-based transformations below:\n\n* Swap random adjacent letters.\n* Remove a random consonant. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 12, "phonetic_similarity": { "Light": 0.1, "Medium": 0.3, "Far": 0.6 }, "orthographic_similarity": { "Light": 0.1, "Medium": 0.3, "Far": 0.6 }, "rule_based": { "rule_percentage": 46, "selected_rules": [ "swap_random_letter", "remove_random_consonant" ], "rule_descriptions": { "swap_random_letter": "Swap random adjacent letters", "remove_random_consonant": "Remove a random consonant" }, "percentage": 46 } } }, { "seed_identities_with_labels": [ { "name": "philippine blin (latin)", "dob": "1927-10-01", "address": "Mali", "label": "High Risk", "script": "latin" }, { "name": "نديم الرباب (arabic)", "dob": "1961-02-24", "address": "Iraq", "label": "High Risk", "script": "arabic" }, { "name": "thérèse pruvost (latin)", "dob": "1927-09-27", "address": "Mali", "label": "High Risk", "script": "latin" }, { "name": "adélaïde gomes (latin)", "dob": "1986-06-07", "address": "Indonésie", "label": "negative", "script": "latin" }, { "name": "рия златков (cyrillic)", "dob": "1954-12-07", "address": "Turkmenistan", "label": "negative", "script": "cyrillic" }, { "name": "Muhammad Rahmun (latin)", "dob": "1957-4-1", "address": "Syria", "label": "positive", "script": "latin" }, { "name": "christine martel (latin)", "dob": "1928-07-22", "address": "Côte d'Ivoire", "label": "High Risk", "script": "latin" }, { "name": "randy shaw (latin)", "dob": "2005-05-04", "address": "South Sudan", "label": "High Risk", "script": "latin" }, { "name": "Neli Parutenco (latin)", "dob": "1962-3-21", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "Евгений Попов (cyrillic)", "dob": "1978-9-11", "address": "Russia", "label": "positive", "script": "cyrillic" }, { "name": "océane thibault (latin)", "dob": "1970-09-08", "address": "Afrique du sud", "label": "negative", "script": "latin" }, { "name": "luce paul (latin)", "dob": "1983-06-06", "address": "Suède", "label": "negative", "script": "latin" }, { "name": "Alexey Sukhodolov (latin)", "dob": "1974-4-19", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "Kassem Hijazi (latin)", "dob": "1972-9-15", "address": "Paraguay", "label": "positive", "script": "latin" }, { "name": "nathalie petit (latin)", "dob": "1993-11-04", "address": "Zambie", "label": "negative", "script": "latin" } ], "query_template": "Generate 6 variations of {name}. ensuring phonetic similarity (20% Light, 60% Medium, 20% Far) and orthographic similarity (20% Light, 60% Medium, 20% Far). Approximately 15% of the total 6 variations should follow these rule-based transformations: Replace spaces with special characters, Insert a random letter, and Append hyphen and number. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 6, "phonetic_similarity": { "Light": 0.2, "Medium": 0.6, "Far": 0.2 }, "orthographic_similarity": { "Light": 0.2, "Medium": 0.6, "Far": 0.2 }, "rule_based": { "rule_percentage": 15, "selected_rules": [ "replace_spaces_with_random_special_characters", "insert_random_letter" ], "rule_descriptions": { "replace_spaces_with_random_special_characters": "Replace spaces with special characters", "insert_random_letter": "Insert a random letter" }, "percentage": 15 } } }, { "seed_identities_with_labels": [ { "name": "karen jackson (latin)", "dob": "1925-05-23", "address": "South Sudan", "label": "High Risk", "script": "latin" }, { "name": "roger kemp (latin)", "dob": "1978-04-04", "address": "Nigeria", "label": "High Risk", "script": "latin" }, { "name": "German Belous (latin)", "dob": "1977-11-14", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "Aras Karim (latin)", "dob": "1967-8-6", "address": "Iraq", "label": "positive", "script": "latin" }, { "name": "Osman Hamid (latin)", "dob": "1966-1-1", "address": "Sudan", "label": "positive", "script": "latin" }, { "name": "кузьма макаров (cyrillic)", "dob": "2006-04-11", "address": "Armenia", "label": "High Risk", "script": "cyrillic" }, { "name": "éléonore gauthier (latin)", "dob": "1927-01-04", "address": "Liban", "label": "negative", "script": "latin" }, { "name": "capucine lévêque (latin)", "dob": "1926-02-06", "address": "Anguilla", "label": "negative", "script": "latin" }, { "name": "левент куртажова (cyrillic)", "dob": "1939-03-25", "address": "Liechtenstein", "label": "negative", "script": "cyrillic" }, { "name": "louis gay (latin)", "dob": "1953-03-22", "address": "Chili", "label": "negative", "script": "latin" }, { "name": "Андрей Горохов (cyrillic)", "dob": "1960-1-13", "address": "Russia", "label": "positive", "script": "cyrillic" }, { "name": "Ivan KUSOV (latin)", "dob": "1987-1-24", "address": "Ukraine", "label": "positive", "script": "latin" }, { "name": "honoré gomes (latin)", "dob": "1974-02-09", "address": "Cameroon", "label": "High Risk", "script": "latin" }, { "name": "mathilde lejeune (latin)", "dob": "1947-06-26", "address": "Tanzanie", "label": "negative", "script": "latin" }, { "name": "john barron (latin)", "dob": "1988-03-18", "address": "British Virgin Islands", "label": "High Risk", "script": "latin" } ], "query_template": "Generate 12 variations of {name}, ensuring phonetic similarity (50% Light, 50% Medium) and orthographic similarity (10% Light, 30% Medium, 60% Far). Approximately 44% of the total 12 variations should follow these rule-based transformations: Additionally, generate variations that: Convert name to initials. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 12, "phonetic_similarity": { "Light": 0.5, "Medium": 0.5 }, "orthographic_similarity": { "Light": 0.1, "Medium": 0.3, "Far": 0.6 }, "rule_based": { "rule_percentage": 44, "selected_rules": [ "shorten_name_to_initials" ], "rule_descriptions": { "shorten_name_to_initials": "Convert name to initials" }, "percentage": 44 } } }, { "seed_identities_with_labels": [ { "name": "Oleg Nikolayev (latin)", "dob": "1969-12-10", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "aimée brunet (latin)", "dob": "1956-07-06", "address": "Cameroon", "label": "High Risk", "script": "latin" }, { "name": "Saad AL-FAKIH (latin)", "dob": "1957-1-31", "address": "United Kingdom", "label": "positive", "script": "latin" }, { "name": "tristan chauvet (latin)", "dob": "1941-10-29", "address": "Tuvalu", "label": "negative", "script": "latin" }, { "name": "henri launay (latin)", "dob": "1963-03-03", "address": "Central African Republic", "label": "High Risk", "script": "latin" }, { "name": "patricia jacques (latin)", "dob": "1967-01-24", "address": "Territoire britannique de l'océan Indien", "label": "negative", "script": "latin" }, { "name": "dominique cordier (latin)", "dob": "1953-04-20", "address": "République Dominicaine", "label": "negative", "script": "latin" }, { "name": "معارف بكر بن وائل (arabic)", "dob": "1926-05-18", "address": "Algeria", "label": "High Risk", "script": "arabic" }, { "name": "elpidio carbonell (latin)", "dob": "1929-11-15", "address": "Venezuela", "label": "High Risk", "script": "latin" }, { "name": "Vyacheslav Rossolay (latin)", "dob": "1981-10-17", "address": "Belarus", "label": "positive", "script": "latin" }, { "name": "родион колев (cyrillic)", "dob": "1986-05-22", "address": "Saint Helena", "label": "negative", "script": "cyrillic" }, { "name": "Abu Aisyah (latin)", "dob": "1983-9-6", "address": "Syria", "label": "positive", "script": "latin" }, { "name": "خدارحم سارانی (arabic)", "dob": "1976-3-21", "address": "Iran", "label": "positive", "script": "arabic" }, { "name": "cécile hoareau (latin)", "dob": "1997-12-09", "address": "Haiti", "label": "High Risk", "script": "latin" }, { "name": "benoît joly (latin)", "dob": "1931-10-05", "address": "Chypre", "label": "negative", "script": "latin" } ], "query_template": "Generate 8 variations of {name} ensuring phonetic similarity by applying the following transformations with the specified probability percentages: \n30% Light: replace each word with its closest phonetic match using Levenshtein distance and Jaro-Winkler distance algorithms, \n40% Medium: add or remove a single letter that maintains phonetic pronunciation, and\n30% Far: substitute each letter with a similar-sounding letter (e.g. b to p) without regard for pronunciation.\n\nAdditionally, generate variations that perform these rule-based transformations: Approximately 28% of the total 8 variations should follow these transformations: Swap adjacent syllables, Insert a random letter, and Delete a random letter. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 8, "phonetic_similarity": { "Light": 0.3, "Medium": 0.4, "Far": 0.3 }, "orthographic_similarity": { "Light": 0.3, "Medium": 0.4, "Far": 0.3 }, "rule_based": { "rule_percentage": 28, "selected_rules": [ "swap_adjacent_syllables", "insert_random_letter", "delete_random_letter" ], "rule_descriptions": { "swap_adjacent_syllables": "Swap adjacent syllables", "insert_random_letter": "Insert a random letter", "delete_random_letter": "Delete a random letter" }, "percentage": 28 } } }, { "seed_identities_with_labels": [ { "name": "Андрей Белоусов (cyrillic)", "dob": "1959-3-17", "address": "Russia", "label": "positive", "script": "cyrillic" }, { "name": "andrew lloyd (latin)", "dob": "1941-02-22", "address": "South Africa", "label": "High Risk", "script": "latin" }, { "name": "richard watson (latin)", "dob": "1999-08-02", "address": "Eritrea", "label": "negative", "script": "latin" }, { "name": "كنار البشيتي (arabic)", "dob": "1986-09-19", "address": "Algeria", "label": "High Risk", "script": "arabic" }, { "name": "geneviève benoit (latin)", "dob": "1951-01-27", "address": "Côte d'Ivoire", "label": "High Risk", "script": "latin" }, { "name": "lauren williams (latin)", "dob": "1947-04-05", "address": "Kenya", "label": "High Risk", "script": "latin" }, { "name": "Elena Milskaya (latin)", "dob": "1980-12-20", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "mary lee (latin)", "dob": "1932-10-03", "address": "Zimbabwe", "label": "negative", "script": "latin" }, { "name": "Rajabi Al-Zahir (latin)", "dob": "1969-7-5", "address": "United Arab Emirates", "label": "positive", "script": "latin" }, { "name": "Irina Kaverzina (latin)", "dob": "1986-7-18", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "леонтий емельянова (cyrillic)", "dob": "1998-03-17", "address": "Bermuda", "label": "negative", "script": "cyrillic" }, { "name": "Abu Arif (latin)", "dob": "1957-10-22", "address": "Saudi Arabia", "label": "positive", "script": "latin" }, { "name": "david bryant (latin)", "dob": "1954-11-22", "address": "Saint Lucia", "label": "negative", "script": "latin" }, { "name": "alexandre schneider (latin)", "dob": "1928-09-03", "address": "Monaco", "label": "High Risk", "script": "latin" }, { "name": "cheyenne garza (latin)", "dob": "1961-01-22", "address": "Togo", "label": "negative", "script": "latin" } ], "query_template": "The following name is the seed name to generate variations for: {name}. Generate 8 variations of the name {name}, ensuring phonetic similarity: 30% Light, 40% Medium, 30% Far, and orthographic similarity: 30% Light, 40% Medium, 30% Far, and also include 11% of variations that follow: Additionally, generate variations that perform these transformations: Replace random consonants with different consonants, Replace double letters with a single letter, and Insert a random letter. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 8, "phonetic_similarity": { "Light": 0.3, "Medium": 0.4, "Far": 0.3 }, "orthographic_similarity": { "Light": 0.3, "Medium": 0.4, "Far": 0.3 }, "rule_based": { "rule_percentage": 11, "selected_rules": [ "replace_random_consonant_with_random_consonant", "replace_double_letters_with_single_letter", "insert_random_letter" ], "rule_descriptions": { "replace_random_consonant_with_random_consonant": "Replace random consonants with different consonants", "replace_double_letters_with_single_letter": "Replace double letters with a single letter", "insert_random_letter": "Insert a random letter" }, "percentage": 11 } } }, { "seed_identities_with_labels": [ { "name": "émile briand (latin)", "dob": "1968-08-10", "address": "Brunei", "label": "negative", "script": "latin" }, { "name": "alix leclerc (latin)", "dob": "1939-12-12", "address": "Turks et Caïques (Îles)", "label": "negative", "script": "latin" }, { "name": "randy moon (latin)", "dob": "2007-02-15", "address": "Nigeria", "label": "High Risk", "script": "latin" }, { "name": "helena ribeiro (latin)", "dob": "1986-09-21", "address": "Angola", "label": "High Risk", "script": "latin" }, { "name": "danielle moulin (latin)", "dob": "1989-10-30", "address": "Burkina Faso", "label": "High Risk", "script": "latin" }, { "name": "Recep Aydin (latin)", "dob": "1996-11-14", "address": "Turkey", "label": "positive", "script": "latin" }, { "name": "pauline le roux (latin)", "dob": "2007-04-16", "address": "Bosnie-Herzégovine", "label": "negative", "script": "latin" }, { "name": "Katerina Pawlowska (latin)", "dob": "1977-3-28", "address": "United Arab Emirates", "label": "positive", "script": "latin" }, { "name": "Aleksandr Nikolov (latin)", "dob": "1962-2-19", "address": "Bulgaria", "label": "positive", "script": "latin" }, { "name": "roland chauvet (latin)", "dob": "1974-01-30", "address": "Mayotte", "label": "negative", "script": "latin" }, { "name": "سهل سرندح (arabic)", "dob": "1987-11-28", "address": "Sudan", "label": "High Risk", "script": "arabic" }, { "name": "Александр Усс (cyrillic)", "dob": "1954-11-3", "address": "Russia", "label": "positive", "script": "cyrillic" }, { "name": "Aleksander Zhuchkovskiy (latin)", "dob": "1986-9-9", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "маруся гавриленко (cyrillic)", "dob": "1979-03-25", "address": "Costa Rica", "label": "negative", "script": "cyrillic" }, { "name": "sabas segarra (latin)", "dob": "1973-11-19", "address": "Venezuela", "label": "High Risk", "script": "latin" } ], "query_template": "Generate 15 variations of {name}, ensuring phonetic similarity (10% Light, 30% Medium, 60% Far) and orthographic similarity (30% Light, 40% Medium, 30% Far). Approximately 23% of the total 15 variations should follow these rule-based transformations: Additionally, generate variations that: Remove a random consonant. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 15, "phonetic_similarity": { "Light": 0.1, "Medium": 0.3, "Far": 0.6 }, "orthographic_similarity": { "Light": 0.3, "Medium": 0.4, "Far": 0.3 }, "rule_based": { "rule_percentage": 23, "selected_rules": [ "remove_random_consonant" ], "rule_descriptions": { "remove_random_consonant": "Remove a random consonant" }, "percentage": 23 } } }, { "seed_identities_with_labels": [ { "name": "Kambiz Rostamian (latin)", "dob": "1960-8-27", "address": "United Arab Emirates", "label": "positive", "script": "latin" }, { "name": "Мансур Солтаев (cyrillic)", "dob": "1978-6-13", "address": "Russia", "label": "positive", "script": "cyrillic" }, { "name": "Aliasghar Norouzi (latin)", "dob": "1962-11-11", "address": "Iran", "label": "positive", "script": "latin" }, { "name": "filipe martins (latin)", "dob": "1929-05-06", "address": "Mauritânia", "label": "negative", "script": "latin" }, { "name": "Wei Zhang (latin)", "dob": "1977-6-16", "address": "China", "label": "positive", "script": "latin" }, { "name": "афиноген пахомова (cyrillic)", "dob": "1975-10-26", "address": "Belarus", "label": "High Risk", "script": "cyrillic" }, { "name": "адрианиа куртажова (cyrillic)", "dob": "1936-06-17", "address": "Pakistan", "label": "negative", "script": "cyrillic" }, { "name": "aurore leleu (latin)", "dob": "2006-01-05", "address": "Mali", "label": "High Risk", "script": "latin" }, { "name": "daniel lima (latin)", "dob": "1945-07-22", "address": "Mozambique", "label": "High Risk", "script": "latin" }, { "name": "constance fouquet (latin)", "dob": "1950-11-06", "address": "Mali", "label": "High Risk", "script": "latin" }, { "name": "isaac baudry (latin)", "dob": "1998-07-06", "address": "Côte d'Ivoire", "label": "High Risk", "script": "latin" }, { "name": "duarte coelho (latin)", "dob": "1996-11-05", "address": "Etiópia", "label": "negative", "script": "latin" }, { "name": "In O (latin)", "dob": "1969-7-3", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "ivan mota (latin)", "dob": "1985-12-03", "address": "São Tomé e Príncipe", "label": "negative", "script": "latin" }, { "name": "mariana gomes (latin)", "dob": "1950-03-16", "address": "Ilhas Cook", "label": "negative", "script": "latin" } ], "query_template": "Generate 15 variations of {name}, ensuring phonetic similarity (10% Light, 50% Medium, 40% Far) and orthographic similarity (50% Light, 50% Medium). Approximately 18% of the total 15 variations should follow these rule-based transformations: Insert a random letter, Swap random adjacent letters, and Duplicate a random letter. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 15, "phonetic_similarity": { "Light": 0.1, "Medium": 0.5, "Far": 0.4 }, "orthographic_similarity": { "Light": 0.5, "Medium": 0.5 }, "rule_based": { "rule_percentage": 18, "selected_rules": [ "insert_random_letter", "swap_random_letter", "duplicate_random_letter_as_double_letter" ], "rule_descriptions": { "insert_random_letter": "Insert a random letter", "swap_random_letter": "Swap random adjacent letters", "duplicate_random_letter_as_double_letter": "Duplicate a random letter" }, "percentage": 18 } } }, { "seed_identities_with_labels": [ { "name": "katelyn avila (latin)", "dob": "1960-12-02", "address": "South Africa", "label": "High Risk", "script": "latin" }, { "name": "tracy lee (latin)", "dob": "1938-04-26", "address": "Namibia", "label": "High Risk", "script": "latin" }, { "name": "Ramezan Oladi (latin)", "dob": "1963-5-29", "address": "Iran", "label": "positive", "script": "latin" }, { "name": "ashley meyers (latin)", "dob": "1983-01-11", "address": "Netherlands Antilles", "label": "negative", "script": "latin" }, { "name": "Nikolay Kosov (latin)", "dob": "1955-6-30", "address": "Hungary", "label": "positive", "script": "latin" }, { "name": "پرنيا علی شاهی (arabic)", "dob": "1993-04-17", "address": "Cote d'Ivoire", "label": "negative", "script": "arabic" }, { "name": "monique smith (latin)", "dob": "1990-05-24", "address": "Micronesia", "label": "negative", "script": "latin" }, { "name": "janet rivera (latin)", "dob": "1991-02-05", "address": "Cambodia", "label": "negative", "script": "latin" }, { "name": "susan gutierrez (latin)", "dob": "1943-08-15", "address": "Panama", "label": "negative", "script": "latin" }, { "name": "arthur garnier (latin)", "dob": "1968-12-27", "address": "Central African Republic", "label": "High Risk", "script": "latin" }, { "name": "Stanislav Voskresenskiy (latin)", "dob": "1976-9-29", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "pamela harvey (latin)", "dob": "1969-03-26", "address": "South Sudan", "label": "High Risk", "script": "latin" }, { "name": "Дмитрий Джиникашвили (cyrillic)", "dob": "1987-7-17", "address": "Russia", "label": "positive", "script": "cyrillic" }, { "name": "Min Shi (latin)", "dob": "1979-5-20", "address": "China", "label": "positive", "script": "latin" }, { "name": "صلاح الدّين بو مدين (arabic)", "dob": "1974-08-05", "address": "Yemen", "label": "High Risk", "script": "arabic" } ], "query_template": "The following name is the seed name to generate variations for: {name}. Generate 12 variations of the name {name}, ensuring phonetic similarity: 50% Light, 50% Medium, and orthographic similarity: 30% Light, 40% Medium, 30% Far, and also include 26% of variations that follow: Additionally, generate variations that perform these transformations: Remove all spaces, Duplicate a random letter, and Use first name initial with last name. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 12, "phonetic_similarity": { "Light": 0.5, "Medium": 0.5 }, "orthographic_similarity": { "Light": 0.3, "Medium": 0.4, "Far": 0.3 }, "rule_based": { "rule_percentage": 26, "selected_rules": [ "remove_all_spaces", "duplicate_random_letter_as_double_letter", "initial_only_first_name" ], "rule_descriptions": { "remove_all_spaces": "Remove all spaces", "duplicate_random_letter_as_double_letter": "Duplicate a random letter", "initial_only_first_name": "Use first name initial with last name" }, "percentage": 26 } } }, { "seed_identities_with_labels": [ { "name": "emmanuelle bouchet (latin)", "dob": "2004-02-28", "address": "Central African Republic", "label": "High Risk", "script": "latin" }, { "name": "Gennadii Kudriavtsev (latin)", "dob": "1947-8-20", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "Yuri Valyaev (latin)", "dob": "1959-4-18", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "kelly quinn (latin)", "dob": "1928-10-12", "address": "Cape Verde", "label": "negative", "script": "latin" }, { "name": "Andrey Parshin (latin)", "dob": "1974-2-19", "address": "Belarus", "label": "positive", "script": "latin" }, { "name": "'Ali al-Shufa (latin)", "dob": "1991-7-25", "address": "Iran", "label": "positive", "script": "latin" }, { "name": "عالية القباني (arabic)", "dob": "1950-09-27", "address": "Libya", "label": "High Risk", "script": "arabic" }, { "name": "emmanuelle pascal (latin)", "dob": "1977-08-01", "address": "Cameroon", "label": "High Risk", "script": "latin" }, { "name": "ann simon (latin)", "dob": "1979-05-13", "address": "Namibia", "label": "High Risk", "script": "latin" }, { "name": "سجا أبو داوود (arabic)", "dob": "1996-05-24", "address": "Niger", "label": "negative", "script": "arabic" }, { "name": "ابراهيم ضاهر (arabic)", "dob": "1964-7-4", "address": "Lebanon", "label": "positive", "script": "arabic" }, { "name": "shari hernandez (latin)", "dob": "1927-11-30", "address": "Pitcairn Islands", "label": "negative", "script": "latin" }, { "name": "justin fisher (latin)", "dob": "1934-02-22", "address": "Hong Kong", "label": "negative", "script": "latin" }, { "name": "jason adams (latin)", "dob": "1928-11-05", "address": "Vanuatu", "label": "negative", "script": "latin" }, { "name": "hayden reyes (latin)", "dob": "1980-04-22", "address": "Kenya", "label": "High Risk", "script": "latin" } ], "query_template": "Generate 12 variations of {name}. Ensuring phonetic similarity (100% Light) and orthographic similarity (10% Light, 50% Medium, 40% Far). Approximately 59% of the total 12 variations should follow these rule-based transformations: Add a title prefix (Mr., Dr., etc.). The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 12, "phonetic_similarity": { "Light": 1.0 }, "orthographic_similarity": { "Light": 0.1, "Medium": 0.5, "Far": 0.4 }, "rule_based": { "rule_percentage": 59, "selected_rules": [ "add_random_leading_title" ], "rule_descriptions": { "add_random_leading_title": "Add a title prefix (Mr., Dr., etc.)" }, "percentage": 59 } } }, { "seed_identities_with_labels": [ { "name": "натан воробьева (cyrillic)", "dob": "1934-12-14", "address": "Saint Helena", "label": "negative", "script": "cyrillic" }, { "name": "john bullock (latin)", "dob": "1955-10-17", "address": "Saint Martin", "label": "negative", "script": "latin" }, { "name": "Jay PEREZ (latin)", "dob": "1973-9-15", "address": "Philippines", "label": "positive", "script": "latin" }, { "name": "leocadia vidal (latin)", "dob": "1957-02-02", "address": "Cuba", "label": "High Risk", "script": "latin" }, { "name": "sarah lucas (latin)", "dob": "1937-11-01", "address": "South Sudan", "label": "High Risk", "script": "latin" }, { "name": "samuel hill (latin)", "dob": "2002-12-22", "address": "Rwanda", "label": "negative", "script": "latin" }, { "name": "Rajabi Al-Zahir (latin)", "dob": "1969-7-5", "address": "United Arab Emirates", "label": "positive", "script": "latin" }, { "name": "Rayimbek Matraimov (latin)", "dob": "1971-5-3", "address": "Kyrgyzstan", "label": "positive", "script": "latin" }, { "name": "Дзмітрьій Замулевіч (cyrillic)", "dob": "1974-5-7", "address": "Belarus", "label": "positive", "script": "cyrillic" }, { "name": "Arkady Ponomarev (latin)", "dob": "1956-5-16", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "shannon ballard (latin)", "dob": "1952-03-02", "address": "Brazil", "label": "negative", "script": "latin" }, { "name": "alejo pont (latin)", "dob": "1984-02-22", "address": "Cuba", "label": "High Risk", "script": "latin" }, { "name": "matilde lloret (latin)", "dob": "1950-11-08", "address": "Cuba", "label": "High Risk", "script": "latin" }, { "name": "светозар морозова (cyrillic)", "dob": "1949-07-27", "address": "Donetsk", "label": "High Risk", "script": "cyrillic" }, { "name": "samantha bowen (latin)", "dob": "1983-07-13", "address": "Iceland", "label": "negative", "script": "latin" } ], "query_template": "Generate 6 variations of {name}, ensuring phonetic similarity (50% Light, 50% Medium) and orthographic similarity (100% Medium). Approximately 51% of the total 6 variations should follow these rule-based transformations: Additionally, generate variations that perform these transformations: Delete a random letter from {name}, Add a title prefix (Mr., Dr., etc.) to {name}, Replace {name} with its phonetic equivalent Light for 50% of cases and Medium for another 50% of cases, Replace {name} with its phonetic equivalent Medium in the remaining 49% of cases. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 6, "phonetic_similarity": { "Light": 0.5, "Medium": 0.5 }, "orthographic_similarity": { "Medium": 1.0 }, "rule_based": { "rule_percentage": 51, "selected_rules": [ "delete_random_letter", "add_random_leading_title" ], "rule_descriptions": { "delete_random_letter": "Delete a random letter", "add_random_leading_title": "Add a title prefix (Mr., Dr., etc.)" }, "percentage": 51 } } }, { "seed_identities_with_labels": [ { "name": "بشرى بدر (arabic)", "dob": "1947-01-04", "address": "Andorra", "label": "negative", "script": "arabic" }, { "name": "amélia machado (latin)", "dob": "1937-01-01", "address": "Ilhas Salomão", "label": "negative", "script": "latin" }, { "name": "سمانه دمیرچی‌لو (arabic)", "dob": "1990-8-26", "address": "Iran", "label": "positive", "script": "arabic" }, { "name": "sofia costa (latin)", "dob": "1960-10-06", "address": "Mozambique", "label": "High Risk", "script": "latin" }, { "name": "Ahmad Shafa'i (latin)", "dob": "1968-5-21", "address": "Iran", "label": "positive", "script": "latin" }, { "name": "kevin machado (latin)", "dob": "1939-02-23", "address": "Maurícia", "label": "negative", "script": "latin" }, { "name": "melissa andrews (latin)", "dob": "1936-09-06", "address": "South Africa", "label": "High Risk", "script": "latin" }, { "name": "erika santos (latin)", "dob": "1988-03-12", "address": "Tanzânia", "label": "negative", "script": "latin" }, { "name": "Aping JUNTARAPRAPORN (latin)", "dob": "1955-1-14", "address": "Thailand", "label": "positive", "script": "latin" }, { "name": "غيداء خزاعة (arabic)", "dob": "1959-10-05", "address": "Algeria", "label": "High Risk", "script": "arabic" }, { "name": "mafalda gonçalves (latin)", "dob": "1970-08-07", "address": "África do Sul", "label": "negative", "script": "latin" }, { "name": "colette vasseur (latin)", "dob": "1945-11-02", "address": "Mali", "label": "High Risk", "script": "latin" }, { "name": "Kambiz Rostamian (latin)", "dob": "1960-8-27", "address": "United Arab Emirates", "label": "positive", "script": "latin" }, { "name": "thibaut ramos (latin)", "dob": "1955-07-13", "address": "Monaco", "label": "High Risk", "script": "latin" }, { "name": "Sufian QUMU (latin)", "dob": "1959-6-26", "address": "Libya", "label": "positive", "script": "latin" } ], "query_template": "Generate 14 variations of {name}, ensuring phonetic similarity (30% using Light similarity metrics such as Soundex or Metaphone to create similar-sounding names like \"Smith\" -> \"Smythe\", 40% using Medium similarity metrics such as Levenshtein distance to create moderately similar names, and 30% using Far similarity metrics such as Jaro-Winkler distance to create dissimilar names) and orthographic similarity (50% generating Light variations that replace single characters like \"John\" -> \"Joun\", and 50% generating Medium variations that swap syllables). Approximately 28% of the total 14 variations should follow these rule-based transformations: Additionally, generate variations that: Swap adjacent syllables. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 14, "phonetic_similarity": { "Light": 0.3, "Medium": 0.4, "Far": 0.3 }, "orthographic_similarity": { "Light": 0.5, "Medium": 0.5 }, "rule_based": { "rule_percentage": 28, "selected_rules": [ "swap_adjacent_syllables" ], "rule_descriptions": { "swap_adjacent_syllables": "Swap adjacent syllables" }, "percentage": 28 } } }, { "seed_identities_with_labels": [ { "name": "الينا عقیلی (arabic)", "dob": "1987-11-12", "address": "Afghanistan", "label": "High Risk", "script": "arabic" }, { "name": "Aping JUNTARAPRAPORN (latin)", "dob": "1955-1-14", "address": "Thailand", "label": "positive", "script": "latin" }, { "name": "كیان اکبر پور (arabic)", "dob": "1967-05-11", "address": "Saint Helena", "label": "negative", "script": "arabic" }, { "name": "سلطان اسعد (arabic)", "dob": "1962-10-31", "address": "Lebanon", "label": "positive", "script": "arabic" }, { "name": "laure michel (latin)", "dob": "1992-08-06", "address": "Haiti", "label": "High Risk", "script": "latin" }, { "name": "michael young (latin)", "dob": "1986-08-31", "address": "Antarctica (the territory South of 60 deg S)", "label": "negative", "script": "latin" }, { "name": "jonathan campbell (latin)", "dob": "1927-06-29", "address": "Tuvalu", "label": "negative", "script": "latin" }, { "name": "Rezaei Mehdi (latin)", "dob": "1976-9-21", "address": "Syria", "label": "positive", "script": "latin" }, { "name": "susan strickland (latin)", "dob": "1968-10-23", "address": "Dominica", "label": "negative", "script": "latin" }, { "name": "Xenia Iudaeva (latin)", "dob": "1970-3-17", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "alexandrie albert (latin)", "dob": "1995-03-11", "address": "Central African Republic", "label": "High Risk", "script": "latin" }, { "name": "james brewer (latin)", "dob": "1993-12-31", "address": "South Sudan", "label": "High Risk", "script": "latin" }, { "name": "victor zimmerman (latin)", "dob": "1946-01-05", "address": "Slovenia", "label": "negative", "script": "latin" }, { "name": "Nufail Akbar (latin)", "dob": "1972-3-26", "address": "South Africa", "label": "positive", "script": "latin" }, { "name": "jennifer barker (latin)", "dob": "1985-01-16", "address": "Nigeria", "label": "High Risk", "script": "latin" } ], "query_template": "The following name is the seed name to generate variations for: {name}. Generate 10 variations of the name {name}, ensuring phonetic similarity: 30% Light, 40% Medium, 30% Far, and orthographic similarity: 10% Light, 50% Medium, 40% Far, and also include 55% of variations that follow: Additionally, generate variations that perform these transformations: Remove all spaces, and Remove a random consonant. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 10, "phonetic_similarity": { "Light": 0.3, "Medium": 0.4, "Far": 0.3 }, "orthographic_similarity": { "Light": 0.1, "Medium": 0.5, "Far": 0.4 }, "rule_based": { "rule_percentage": 55, "selected_rules": [ "remove_all_spaces", "remove_random_consonant" ], "rule_descriptions": { "remove_all_spaces": "Remove all spaces", "remove_random_consonant": "Remove a random consonant" }, "percentage": 55 } } }, { "seed_identities_with_labels": [ { "name": "isaac verdejo (latin)", "dob": "1941-03-25", "address": "Venezuela", "label": "High Risk", "script": "latin" }, { "name": "Aleksandr Zhivlyuk (latin)", "dob": "1981-1-13", "address": "Belarus", "label": "positive", "script": "latin" }, { "name": "Alexander Rakitin (latin)", "dob": "1958-5-17", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "Павел Тараканов (cyrillic)", "dob": "1982-6-21", "address": "Russia", "label": "positive", "script": "cyrillic" }, { "name": "mohamed valenciano (latin)", "dob": "1979-01-18", "address": "India", "label": "negative", "script": "latin" }, { "name": "Theint Htet (latin)", "dob": "1999-5-21", "address": "Burma", "label": "positive", "script": "latin" }, { "name": "amor capdevila (latin)", "dob": "1998-11-25", "address": "Myanmar", "label": "negative", "script": "latin" }, { "name": "áfrica font (latin)", "dob": "1984-11-17", "address": "Venezuela", "label": "High Risk", "script": "latin" }, { "name": "coral bertrán (latin)", "dob": "1929-07-31", "address": "Nicaragua", "label": "High Risk", "script": "latin" }, { "name": "kelly campos (latin)", "dob": "1966-12-31", "address": "Mozambique", "label": "High Risk", "script": "latin" }, { "name": "concha nuñez (latin)", "dob": "1960-08-19", "address": "El Salvador", "label": "negative", "script": "latin" }, { "name": "عبد الشّكور الحسيني (arabic)", "dob": "1938-04-17", "address": "Fiji", "label": "negative", "script": "arabic" }, { "name": "аристарх быков (cyrillic)", "dob": "1934-04-01", "address": "Luhansk", "label": "High Risk", "script": "cyrillic" }, { "name": "chucho sancho (latin)", "dob": "2005-09-14", "address": "República Unida de Tanzanía", "label": "negative", "script": "latin" }, { "name": "Tun Naing (latin)", "dob": "1963-4-30", "address": "Burma", "label": "positive", "script": "latin" } ], "query_template": "Generate 12 variations of {name}. ensuring phonetic similarity and orthographic similarity. For phonetic similarity, implement: 10% Light ({name}ly), 50% Medium ({name}i, {name}y, {name}ie), 40% Far (ph{name}, k{name}). For orthographic similarity, implement: 100% Light ({name}e, {name}a). Additionally, generate variations that approximately represent 43% of the total 12 variations: Replace double letters with a single letter. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 12, "phonetic_similarity": { "Light": 0.1, "Medium": 0.5, "Far": 0.4 }, "orthographic_similarity": { "Light": 1.0 }, "rule_based": { "rule_percentage": 43, "selected_rules": [ "replace_double_letters_with_single_letter" ], "rule_descriptions": { "replace_double_letters_with_single_letter": "Replace double letters with a single letter" }, "percentage": 43 } } }, { "seed_identities_with_labels": [ { "name": "carolyn davis (latin)", "dob": "1938-11-18", "address": "South Sudan", "label": "High Risk", "script": "latin" }, { "name": "susan seguin (latin)", "dob": "1940-02-14", "address": "Mali", "label": "High Risk", "script": "latin" }, { "name": "édith rodrigues (latin)", "dob": "2004-08-09", "address": "Bulgarie", "label": "negative", "script": "latin" }, { "name": "Jingfeng Gao (latin)", "dob": "1975-7-5", "address": "China", "label": "positive", "script": "latin" }, { "name": "richard martinez (latin)", "dob": "1950-07-16", "address": "Cameroon", "label": "High Risk", "script": "latin" }, { "name": "yves duhamel (latin)", "dob": "1999-08-09", "address": "Aruba", "label": "negative", "script": "latin" }, { "name": "Николай Коломейцев (cyrillic)", "dob": "1956-9-1", "address": "Russia", "label": "positive", "script": "cyrillic" }, { "name": "Wai-chung Lo (latin)", "dob": "1961-11-19", "address": "Hong Kong", "label": "positive", "script": "latin" }, { "name": "sophie picard (latin)", "dob": "2004-12-29", "address": "Wallis et Futuna (Îles)", "label": "negative", "script": "latin" }, { "name": "макар игнатьев (cyrillic)", "dob": "1991-05-29", "address": "Armenia", "label": "High Risk", "script": "cyrillic" }, { "name": "américo almansa (latin)", "dob": "1975-03-03", "address": "Cuba", "label": "High Risk", "script": "latin" }, { "name": "فرج آل سلطان (arabic)", "dob": "1987-10-30", "address": "Vanuatu", "label": "negative", "script": "arabic" }, { "name": "Abdullah Faisal (latin)", "dob": "1963-9-10", "address": "Jamaica", "label": "positive", "script": "latin" }, { "name": "Viktor Mozhelyansky (latin)", "dob": "1964-5-10", "address": "Ukraine", "label": "positive", "script": "latin" }, { "name": "alice olivier (latin)", "dob": "1950-08-16", "address": "Madagascar", "label": "negative", "script": "latin" } ], "query_template": "The following name is the seed name to generate variations for: {name}. Generate 11 variations of the name {name}, ensuring phonetic similarity: 20% Light, 60% Medium, 20% Far, and orthographic similarity: 20% Light, 60% Medium, 20% Far, and also include 38% of variations that follow: Additionally, generate variations that perform these transformations: Add a title prefix (Mr., Dr., etc.), Remove all spaces, and Add a title suffix (Jr., PhD, etc.). The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 11, "phonetic_similarity": { "Light": 0.2, "Medium": 0.6, "Far": 0.2 }, "orthographic_similarity": { "Light": 0.2, "Medium": 0.6, "Far": 0.2 }, "rule_based": { "rule_percentage": 38, "selected_rules": [ "add_random_leading_title", "remove_all_spaces", "add_random_trailing_title" ], "rule_descriptions": { "add_random_leading_title": "Add a title prefix (Mr., Dr., etc.)", "remove_all_spaces": "Remove all spaces", "add_random_trailing_title": "Add a title suffix (Jr., PhD, etc.)" }, "percentage": 38 } } }, { "seed_identities_with_labels": [ { "name": "elvira valenciano (latin)", "dob": "1997-11-03", "address": "Papua Nueva Guinea", "label": "negative", "script": "latin" }, { "name": "клавдий трофимова (cyrillic)", "dob": "1998-09-29", "address": "Armenia", "label": "High Risk", "script": "cyrillic" }, { "name": "catherine mitchell (latin)", "dob": "1929-05-13", "address": "Nigeria", "label": "High Risk", "script": "latin" }, { "name": "charles gimenez (latin)", "dob": "1974-05-17", "address": "Mali", "label": "High Risk", "script": "latin" }, { "name": "teobaldo galindo (latin)", "dob": "1940-03-05", "address": "Libia", "label": "negative", "script": "latin" }, { "name": "Алег Пятроў (cyrillic)", "dob": "1962-3-26", "address": "Belarus", "label": "positive", "script": "cyrillic" }, { "name": "Irina Kosenko (latin)", "dob": "1973-1-16", "address": "North Macedonia, The Republic of", "label": "positive", "script": "latin" }, { "name": "juliana atienza (latin)", "dob": "1997-09-19", "address": "Sri Lanka", "label": "negative", "script": "latin" }, { "name": "Anatoly Bifov (latin)", "dob": "1963-1-7", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "Boris Kovalchuk (latin)", "dob": "1977-12-1", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "priscila galindo (latin)", "dob": "1992-12-10", "address": "Togo", "label": "negative", "script": "latin" }, { "name": "gaspar farré (latin)", "dob": "1932-04-07", "address": "Bolivia", "label": "High Risk", "script": "latin" }, { "name": "إخلاص آل مقطة (arabic)", "dob": "1986-10-05", "address": "Rwanda", "label": "negative", "script": "arabic" }, { "name": "matthieu cordier (latin)", "dob": "1990-08-18", "address": "Democratic Republic of Congo", "label": "High Risk", "script": "latin" }, { "name": "Vasiliy Golubev (latin)", "dob": "1957-1-30", "address": "Russia", "label": "positive", "script": "latin" } ], "query_template": "Generate 8 variations of {name}. Ensuring phonetic similarity (20% Light, e.g. {name}e, 60% Medium, e.g. {name}ian, 20% Far, e.g. Z{n}a{m}{e}), and orthographic similarity (100% Light, e.g. V{n}{a}n{e}). Approximately 18% of the total 8 variations should follow these rule-based transformations: Additionally, generate variations that perform these transformations: Convert name to initials (e.g. {n}.{a}.{e}), and Replace random vowels with different vowels (e.g. V{n}{i}{e}, Z{n}{u}{o}). The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 8, "phonetic_similarity": { "Light": 0.2, "Medium": 0.6, "Far": 0.2 }, "orthographic_similarity": { "Light": 1.0 }, "rule_based": { "rule_percentage": 18, "selected_rules": [ "shorten_name_to_initials", "replace_random_vowel_with_random_vowel" ], "rule_descriptions": { "shorten_name_to_initials": "Convert name to initials", "replace_random_vowel_with_random_vowel": "Replace random vowels with different vowels" }, "percentage": 18 } } }, { "seed_identities_with_labels": [ { "name": "philippe grégoire (latin)", "dob": "1961-11-15", "address": "Botswana", "label": "negative", "script": "latin" }, { "name": "tiffany blair (latin)", "dob": "1992-05-10", "address": "Kenya", "label": "High Risk", "script": "latin" }, { "name": "luc rodriguez (latin)", "dob": "1957-06-06", "address": "Géorgie du Sud et Sandwich du Sud (Îles)", "label": "negative", "script": "latin" }, { "name": "فائق آل قصير (arabic)", "dob": "1975-04-27", "address": "France", "label": "negative", "script": "arabic" }, { "name": "laure toussaint (latin)", "dob": "1933-02-17", "address": "Zaïre", "label": "negative", "script": "latin" }, { "name": "alexandrie barbe (latin)", "dob": "1943-06-16", "address": "Haiti", "label": "High Risk", "script": "latin" }, { "name": "Mukhtar Shah (latin)", "dob": "1939-11-8", "address": "Pakistan", "label": "positive", "script": "latin" }, { "name": "Aleksandr Nikolov (latin)", "dob": "1962-2-19", "address": "Bulgaria", "label": "positive", "script": "latin" }, { "name": "Констянтин Калашніков (cyrillic)", "dob": "1993-1-26", "address": "Russia", "label": "positive", "script": "cyrillic" }, { "name": "Vali Arlanizadeh (latin)", "dob": "1979-11-22", "address": "Iran", "label": "positive", "script": "latin" }, { "name": "агафья мишина (cyrillic)", "dob": "1965-05-14", "address": "Luhansk", "label": "High Risk", "script": "cyrillic" }, { "name": "kyle jones (latin)", "dob": "1938-10-26", "address": "Namibia", "label": "High Risk", "script": "latin" }, { "name": "alice lambert (latin)", "dob": "2002-02-04", "address": "Central African Republic", "label": "High Risk", "script": "latin" }, { "name": "guillaume bourdon (latin)", "dob": "1992-11-30", "address": "Géorgie", "label": "negative", "script": "latin" }, { "name": "Arif Khot (latin)", "dob": "1970-8-21", "address": "India", "label": "positive", "script": "latin" } ], "query_template": "The following name is the seed name to generate variations for: {name}. Generate 14 variations of the name {name}, ensuring phonetic similarity: 20% Light, 60% Medium, 20% Far, and orthographic similarity: 30% Light, 40% Medium, 30% Far, and also include 57% of variations that follow: Additionally, generate variations that perform these transformations: Replace spaces with special characters, Use first name initial with last name, and Duplicate a random letter. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 14, "phonetic_similarity": { "Light": 0.2, "Medium": 0.6, "Far": 0.2 }, "orthographic_similarity": { "Light": 0.3, "Medium": 0.4, "Far": 0.3 }, "rule_based": { "rule_percentage": 57, "selected_rules": [ "replace_spaces_with_random_special_characters", "initial_only_first_name", "duplicate_random_letter_as_double_letter" ], "rule_descriptions": { "replace_spaces_with_random_special_characters": "Replace spaces with special characters", "initial_only_first_name": "Use first name initial with last name", "duplicate_random_letter_as_double_letter": "Duplicate a random letter" }, "percentage": 57 } } }, { "seed_identities_with_labels": [ { "name": "rebecca fernandez (latin)", "dob": "1925-03-07", "address": "Guinea", "label": "negative", "script": "latin" }, { "name": "carolina cuevas (latin)", "dob": "1945-12-29", "address": "Cuba", "label": "High Risk", "script": "latin" }, { "name": "Valery Pakhnits (latin)", "dob": "1953-1-22", "address": "Ukraine", "label": "positive", "script": "latin" }, { "name": "clara antunes (latin)", "dob": "1952-01-20", "address": "Mozambique", "label": "High Risk", "script": "latin" }, { "name": "duane schmidt (latin)", "dob": "2005-08-05", "address": "Namibia", "label": "High Risk", "script": "latin" }, { "name": "وضّاح حوالة (arabic)", "dob": "2007-04-24", "address": "Kiribati", "label": "negative", "script": "arabic" }, { "name": "Rayimbek Matraimov (latin)", "dob": "1971-5-3", "address": "Kyrgyzstan", "label": "positive", "script": "latin" }, { "name": "Muhammad Ahmad (latin)", "dob": "1990-10-7", "address": "Syria", "label": "positive", "script": "latin" }, { "name": "урима симеонов (cyrillic)", "dob": "1992-05-08", "address": "Bulgaria", "label": "High Risk", "script": "cyrillic" }, { "name": "justin smith (latin)", "dob": "1972-09-18", "address": "Austria", "label": "negative", "script": "latin" }, { "name": "Yuriy Danyltsev (latin)", "dob": "1974-9-2", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "geneviève rousset (latin)", "dob": "1994-07-09", "address": "Democratic Republic of Congo", "label": "High Risk", "script": "latin" }, { "name": "katherine winters (latin)", "dob": "1962-10-19", "address": "Tanzania", "label": "negative", "script": "latin" }, { "name": "theresa miller (latin)", "dob": "1930-10-01", "address": "Botswana", "label": "negative", "script": "latin" }, { "name": "Виктория Родина (cyrillic)", "dob": "1989-10-29", "address": "Russia", "label": "positive", "script": "cyrillic" } ], "query_template": "Generate 9 variations of {name}. Ensuring phonetic similarity (20% Light, 60% Medium, 20% Far) and orthographic similarity (30% Light, 40% Medium, 30% Far). Approximately 31% of the total 9 variations should follow these rule-based transformations: Additionally, generate variations that perform these transformations: Add a title suffix (Jr., PhD, etc.), and Swap random adjacent letters. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 9, "phonetic_similarity": { "Light": 0.2, "Medium": 0.6, "Far": 0.2 }, "orthographic_similarity": { "Light": 0.3, "Medium": 0.4, "Far": 0.3 }, "rule_based": { "rule_percentage": 31, "selected_rules": [ "add_random_trailing_title", "swap_random_letter" ], "rule_descriptions": { "add_random_trailing_title": "Add a title suffix (Jr., PhD, etc.)", "swap_random_letter": "Swap random adjacent letters" }, "percentage": 31 } } }, { "seed_identities_with_labels": [ { "name": "Андрей Белоусов (cyrillic)", "dob": "1959-3-17", "address": "Russia", "label": "positive", "script": "cyrillic" }, { "name": "alfred letellier (latin)", "dob": "1942-01-29", "address": "Moldavie", "label": "negative", "script": "latin" }, { "name": "Vadym Valiakhmetov (latin)", "dob": "1981-5-7", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "сидор фадеев (cyrillic)", "dob": "1964-09-21", "address": "Denmark", "label": "negative", "script": "cyrillic" }, { "name": "Xutong Qin (latin)", "dob": "1994-4-29", "address": "China", "label": "positive", "script": "latin" }, { "name": "roland labbé (latin)", "dob": "1989-11-01", "address": "Haiti", "label": "High Risk", "script": "latin" }, { "name": "tristan masson (latin)", "dob": "1935-12-11", "address": "Monaco", "label": "High Risk", "script": "latin" }, { "name": "virginie leclercq (latin)", "dob": "1973-02-09", "address": "Côte d'Ivoire", "label": "High Risk", "script": "latin" }, { "name": "stéphanie payet (latin)", "dob": "1961-11-13", "address": "Haïti", "label": "negative", "script": "latin" }, { "name": "مبین پویان (arabic)", "dob": "1943-02-24", "address": "Iran", "label": "High Risk", "script": "arabic" }, { "name": "michel lefort (latin)", "dob": "1977-07-10", "address": "Haiti", "label": "High Risk", "script": "latin" }, { "name": "Ghunia ABDRABBA (latin)", "dob": "1957-9-2", "address": "United Kingdom", "label": "positive", "script": "latin" }, { "name": "richard laurent (latin)", "dob": "1991-01-04", "address": "Kazakhstan", "label": "negative", "script": "latin" }, { "name": "raymond boulanger (latin)", "dob": "1948-02-01", "address": "Burundi", "label": "negative", "script": "latin" }, { "name": "Alexey Sukhodolov (latin)", "dob": "1974-4-19", "address": "Russia", "label": "positive", "script": "latin" } ], "query_template": "Generate exactly 11 execution vectors for each target identity {name}, ensuring phonetic similarity (70% Light variations that differ by one syllable, 30% Medium variations that sound similar but not identical) and orthographic similarity (30% Light variations with a single character change, 40% Medium variations with two to three character changes, 30% Far variations with four or more character changes). Approximately 36% of the total 11 variations should follow these rule-based transformations: Additionally, generate variations that perform these transformations: Remove a random consonant from {name}, and Replace one or more random vowels in {name} with different vowels. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 11, "phonetic_similarity": { "Light": 0.7, "Medium": 0.3 }, "orthographic_similarity": { "Light": 0.3, "Medium": 0.4, "Far": 0.3 }, "rule_based": { "rule_percentage": 36, "selected_rules": [ "remove_random_consonant", "replace_random_vowel_with_random_vowel" ], "rule_descriptions": { "remove_random_consonant": "Remove a random consonant", "replace_random_vowel_with_random_vowel": "Replace random vowels with different vowels" }, "percentage": 36 } } }, { "seed_identities_with_labels": [ { "name": "imelda valderrama (latin)", "dob": "2003-07-21", "address": "Cuba", "label": "High Risk", "script": "latin" }, { "name": "Igor Rotenberh (latin)", "dob": "1973-5-9", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "Светлана Горячева (cyrillic)", "dob": "1947-6-3", "address": "Russia", "label": "positive", "script": "cyrillic" }, { "name": "madeleine clément (latin)", "dob": "1972-12-18", "address": "Ghana", "label": "negative", "script": "latin" }, { "name": "Sergey Lavrov (latin)", "dob": "1950-3-21", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "jeannine robin (latin)", "dob": "1928-05-27", "address": "Allemagne", "label": "negative", "script": "latin" }, { "name": "аникей зайцев (cyrillic)", "dob": "2005-07-19", "address": "Russia", "label": "High Risk", "script": "cyrillic" }, { "name": "Tun Latt (latin)", "dob": "1969-2-6", "address": "Burma", "label": "positive", "script": "latin" }, { "name": "Andrei DUBEN (latin)", "dob": "1970-12-12", "address": "Chile", "label": "positive", "script": "latin" }, { "name": "павлин йоткова (cyrillic)", "dob": "1969-03-17", "address": "British Indian Ocean Territory (Chagos Archipelago)", "label": "negative", "script": "cyrillic" }, { "name": "virginie raymond (latin)", "dob": "1934-11-27", "address": "Comores", "label": "negative", "script": "latin" }, { "name": "maggie dufour (latin)", "dob": "1956-05-12", "address": "Roumanie", "label": "negative", "script": "latin" }, { "name": "renée verdier (latin)", "dob": "2003-06-24", "address": "Monaco", "label": "High Risk", "script": "latin" }, { "name": "maryse benoit (latin)", "dob": "1984-09-18", "address": "Monaco", "label": "High Risk", "script": "latin" }, { "name": "laura green (latin)", "dob": "1965-01-17", "address": "South Africa", "label": "High Risk", "script": "latin" } ], "query_template": "Generate 11 variations of {name}, ensuring phonetic similarity (20% Light, 60% Medium, 20% Far) and orthographic similarity (30% Light, 40% Medium, 30% Far). Approximately 40% of the total 11 variations should follow these rule-based transformations: \nReplace {name} with \"{name}\". Replace spaces in {name} with \"%\", \"#\" or \"$\". Replace \"a\" with \"e\" and vice versa in {name}.\n[VALIDATION HINTS]: Apply these rule-based transformations: Replace spaces with special characters. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 11, "phonetic_similarity": { "Light": 0.2, "Medium": 0.6, "Far": 0.2 }, "orthographic_similarity": { "Light": 0.3, "Medium": 0.4, "Far": 0.3 }, "rule_based": { "rule_percentage": 40, "selected_rules": [ "replace_spaces_with_random_special_characters" ], "rule_descriptions": { "replace_spaces_with_random_special_characters": "Replace spaces with special characters" }, "percentage": 40 } } }, { "seed_identities_with_labels": [ { "name": "展鹏 蔡 (chinese)", "dob": "1970-8-28", "address": "Hong Kong", "label": "positive", "script": "chinese" }, { "name": "katrina davidson (latin)", "dob": "1947-10-22", "address": "Gambia", "label": "negative", "script": "latin" }, { "name": "jean toussaint (latin)", "dob": "1991-03-05", "address": "Central African Republic", "label": "High Risk", "script": "latin" }, { "name": "stephen cruz (latin)", "dob": "1969-09-12", "address": "Faroe Islands", "label": "negative", "script": "latin" }, { "name": "andrea kelly (latin)", "dob": "1926-03-19", "address": "Niue", "label": "negative", "script": "latin" }, { "name": "Valery GABRIEL (latin)", "dob": "1956-1-3", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "hector burns (latin)", "dob": "1973-10-19", "address": "Italy", "label": "negative", "script": "latin" }, { "name": "Yuri Karayev (latin)", "dob": "1966-6-21", "address": "Belarus", "label": "positive", "script": "latin" }, { "name": "joshua henderson (latin)", "dob": "1991-09-05", "address": "Kenya", "label": "High Risk", "script": "latin" }, { "name": "Hassan AYACH (latin)", "dob": "1963-5-1", "address": "Lebanon", "label": "positive", "script": "latin" }, { "name": "Ihor Rotenberg (latin)", "dob": "1973-5-9", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "نرگس محمدی (arabic)", "dob": "1986-09-01", "address": "Malaysia", "label": "negative", "script": "arabic" }, { "name": "валерий бобылев (cyrillic)", "dob": "1941-07-15", "address": "Russia", "label": "High Risk", "script": "cyrillic" }, { "name": "francisca millán (latin)", "dob": "1999-02-19", "address": "Venezuela", "label": "High Risk", "script": "latin" }, { "name": "shirley hernandez (latin)", "dob": "1958-08-27", "address": "Namibia", "label": "High Risk", "script": "latin" } ], "query_template": "The following name is the seed name to generate variations for: {name}. Generate 6 variations of the name {name}, ensuring phonetic similarity: 100% Medium, and orthographic similarity: 20% Light, 60% Medium, 20% Far, and also include 29% of variations that follow: Additionally, generate variations that perform these transformations: Remove a random consonant, Convert name to initials, and Replace random consonants with different consonants. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 6, "phonetic_similarity": { "Medium": 1.0 }, "orthographic_similarity": { "Light": 0.2, "Medium": 0.6, "Far": 0.2 }, "rule_based": { "rule_percentage": 29, "selected_rules": [ "remove_random_consonant", "shorten_name_to_initials", "replace_random_consonant_with_random_consonant" ], "rule_descriptions": { "remove_random_consonant": "Remove a random consonant", "shorten_name_to_initials": "Convert name to initials", "replace_random_consonant_with_random_consonant": "Replace random consonants with different consonants" }, "percentage": 29 } } }, { "seed_identities_with_labels": [ { "name": "philippine cousin (latin)", "dob": "1984-07-02", "address": "Central African Republic", "label": "High Risk", "script": "latin" }, { "name": "مهدي أستيتية (arabic)", "dob": "1935-02-22", "address": "Mayotte", "label": "negative", "script": "arabic" }, { "name": "Vyacheslav Fomichev (latin)", "dob": "1965-4-26", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "caroline rodriguez (latin)", "dob": "1968-09-08", "address": "Koweit", "label": "negative", "script": "latin" }, { "name": "emmanuelle bouvet (latin)", "dob": "1953-03-14", "address": "Benin", "label": "negative", "script": "latin" }, { "name": "Saad AL-FAGIH (latin)", "dob": "1957-1-31", "address": "United Kingdom", "label": "positive", "script": "latin" }, { "name": "Yuriy Soloviev (latin)", "dob": "1970-4-13", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "rosa manzano (latin)", "dob": "1980-10-01", "address": "Bolivia", "label": "High Risk", "script": "latin" }, { "name": "رشيد البزال (arabic)", "dob": "1994-8-7", "address": "Lebanon", "label": "positive", "script": "arabic" }, { "name": "christine skinner (latin)", "dob": "1955-02-10", "address": "British Virgin Islands", "label": "High Risk", "script": "latin" }, { "name": "رستم الموسوس (arabic)", "dob": "1944-09-06", "address": "Yemen", "label": "High Risk", "script": "arabic" }, { "name": "patricia godard (latin)", "dob": "1929-09-29", "address": "Polynésie française", "label": "negative", "script": "latin" }, { "name": "Mohammad Ansari (latin)", "dob": "1975-11-22", "address": "Syria", "label": "positive", "script": "latin" }, { "name": "antoinette jean (latin)", "dob": "1958-04-06", "address": "Haiti", "label": "High Risk", "script": "latin" }, { "name": "margaux colas (latin)", "dob": "1980-02-04", "address": "Cayman (Îles)", "label": "negative", "script": "latin" } ], "query_template": "Generate exactly 7 execution vectors for each target identity {name}, ensuring phonetic similarity with the following distributions: 10% Light, 30% Medium, and 60% Far. Ensure orthographic similarity with the following distributions: 30% Light, 40% Medium, and 30% Far. Approximately 11% of the total 7 variations should follow these rule-based transformations: Additionally, generate variations that perform these transformations: Remove a random vowel from {name}, and Swap adjacent consonants in {name}. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 7, "phonetic_similarity": { "Light": 0.1, "Medium": 0.3, "Far": 0.6 }, "orthographic_similarity": { "Light": 0.3, "Medium": 0.4, "Far": 0.3 }, "rule_based": { "rule_percentage": 11, "selected_rules": [ "remove_random_vowel", "swap_adjacent_consonants" ], "rule_descriptions": { "remove_random_vowel": "Remove a random vowel", "swap_adjacent_consonants": "Swap adjacent consonants" }, "percentage": 11 } } }, { "seed_identities_with_labels": [ { "name": "olivie renaud (latin)", "dob": "1968-12-16", "address": "Haiti", "label": "High Risk", "script": "latin" }, { "name": "Алексей Гуреев (cyrillic)", "dob": "1984-4-26", "address": "Turkey", "label": "positive", "script": "cyrillic" }, { "name": "ariana machado (latin)", "dob": "1970-01-03", "address": "Mozambique", "label": "High Risk", "script": "latin" }, { "name": "Oman Abdulrohman (latin)", "dob": "1972-1-5", "address": "Indonesia", "label": "positive", "script": "latin" }, { "name": "océane diallo (latin)", "dob": "1941-03-29", "address": "Vatican (Etat du)", "label": "negative", "script": "latin" }, { "name": "августин кучкуделова (cyrillic)", "dob": "1940-09-10", "address": "Singapore", "label": "negative", "script": "cyrillic" }, { "name": "Rafi Udin (latin)", "dob": "1966-6-3", "address": "Syria", "label": "positive", "script": "latin" }, { "name": "jacques roux (latin)", "dob": "1970-12-26", "address": "Bermudes (Les)", "label": "negative", "script": "latin" }, { "name": "raymond mercier (latin)", "dob": "1951-04-15", "address": "Central African Republic", "label": "High Risk", "script": "latin" }, { "name": "дементий сорокина (cyrillic)", "dob": "1940-03-09", "address": "Crimea", "label": "High Risk", "script": "cyrillic" }, { "name": "roland renault (latin)", "dob": "1985-01-16", "address": "Qatar", "label": "negative", "script": "latin" }, { "name": "Qari Amjad (latin)", "dob": "1979-4-17", "address": "Afghanistan", "label": "positive", "script": "latin" }, { "name": "élise marin (latin)", "dob": "1951-04-08", "address": "Côte d'Ivoire", "label": "High Risk", "script": "latin" }, { "name": "Yunnian GUO (latin)", "dob": "1961-12-5", "address": "China", "label": "positive", "script": "latin" }, { "name": "brigitte normand (latin)", "dob": "1944-02-16", "address": "Ethiopie", "label": "negative", "script": "latin" } ], "query_template": "The following name is the seed name to generate variations for: {name}. Generate 11 variations of the name {name}, ensuring phonetic similarity: 50% Light, 50% Medium, and orthographic similarity: 10% Light, 50% Medium, 40% Far, and also include 31% of variations that follow: Additionally, generate variations that perform these transformations: Replace random vowels with different vowels, Replace spaces with special characters, and Add a title suffix (Jr., PhD, etc.). The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 11, "phonetic_similarity": { "Light": 0.5, "Medium": 0.5 }, "orthographic_similarity": { "Light": 0.1, "Medium": 0.5, "Far": 0.4 }, "rule_based": { "rule_percentage": 31, "selected_rules": [ "replace_random_vowel_with_random_vowel", "replace_spaces_with_random_special_characters", "add_random_trailing_title" ], "rule_descriptions": { "replace_random_vowel_with_random_vowel": "Replace random vowels with different vowels", "replace_spaces_with_random_special_characters": "Replace spaces with special characters", "add_random_trailing_title": "Add a title suffix (Jr., PhD, etc.)" }, "percentage": 31 } } }, { "seed_identities_with_labels": [ { "name": "capucine lacombe (latin)", "dob": "1996-05-03", "address": "Guyane française", "label": "negative", "script": "latin" }, { "name": "chus sevillano (latin)", "dob": "1990-07-15", "address": "Venezuela", "label": "High Risk", "script": "latin" }, { "name": "reginald henderson (latin)", "dob": "2005-12-11", "address": "South Sudan", "label": "High Risk", "script": "latin" }, { "name": "jeanne maillot (latin)", "dob": "1936-07-20", "address": "Central African Republic", "label": "High Risk", "script": "latin" }, { "name": "victor masse (latin)", "dob": "1930-01-18", "address": "Roumanie", "label": "negative", "script": "latin" }, { "name": "елица сапунджиева (cyrillic)", "dob": "1981-03-12", "address": "Bulgaria", "label": "High Risk", "script": "cyrillic" }, { "name": "'Abdul-Wahab AL-HUMAYQANI (latin)", "dob": "1972-8-4", "address": "Yemen", "label": "positive", "script": "latin" }, { "name": "nicolas michaud (latin)", "dob": "1987-07-31", "address": "Burkina Faso", "label": "High Risk", "script": "latin" }, { "name": "محيي الدّين الإغباري (arabic)", "dob": "2000-04-19", "address": "Albania", "label": "negative", "script": "arabic" }, { "name": "madeleine chauvin (latin)", "dob": "2000-12-28", "address": "Ghana", "label": "negative", "script": "latin" }, { "name": "Min Shi (latin)", "dob": "1979-5-20", "address": "China", "label": "positive", "script": "latin" }, { "name": "Cha-hyo'ng Ku (latin)", "dob": "1957-9-8", "address": "Libya", "label": "positive", "script": "latin" }, { "name": "Alexander Rakitin (latin)", "dob": "1958-5-17", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "margot benard (latin)", "dob": "1999-05-24", "address": "Ukraine", "label": "negative", "script": "latin" }, { "name": "Аляксандр Вецяневіч (cyrillic)", "dob": "1976-6-20", "address": "Belarus", "label": "positive", "script": "cyrillic" } ], "query_template": "Generate 15 variations of {name}. Ensuring phonetic similarity (100% Medium) and orthographic similarity (70% Light, 30% Medium). Approximately 53% of the total 15 variations should follow these rule-based transformations: Use first name initial with last name. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 15, "phonetic_similarity": { "Medium": 1.0 }, "orthographic_similarity": { "Light": 0.7, "Medium": 0.3 }, "rule_based": { "rule_percentage": 53, "selected_rules": [ "initial_only_first_name" ], "rule_descriptions": { "initial_only_first_name": "Use first name initial with last name" }, "percentage": 53 } } }, { "seed_identities_with_labels": [ { "name": "flor cunha (latin)", "dob": "1967-11-15", "address": "Hungria", "label": "negative", "script": "latin" }, { "name": "Yakub Zakriyev (latin)", "dob": "1990-10-16", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "luana monteiro (latin)", "dob": "1963-12-01", "address": "Mauritânia", "label": "negative", "script": "latin" }, { "name": "севастьян осипов (cyrillic)", "dob": "1987-01-01", "address": "Donetsk", "label": "High Risk", "script": "cyrillic" }, { "name": "che quevedo (latin)", "dob": "1932-01-29", "address": "Venezuela", "label": "High Risk", "script": "latin" }, { "name": "lara andrade (latin)", "dob": "1984-04-26", "address": "Guame", "label": "negative", "script": "latin" }, { "name": "Maher Salah (latin)", "dob": "1957-10-22", "address": "Saudi Arabia", "label": "positive", "script": "latin" }, { "name": "ana gonçalves (latin)", "dob": "2004-10-07", "address": "África do Sul", "label": "negative", "script": "latin" }, { "name": "Юлия Афанасьева (cyrillic)", "dob": "1988-2-24", "address": "Russia", "label": "positive", "script": "cyrillic" }, { "name": "Umm Layth (latin)", "dob": "1994-5-11", "address": "Syria", "label": "positive", "script": "latin" }, { "name": "joseph harding (latin)", "dob": "1986-03-09", "address": "South Africa", "label": "High Risk", "script": "latin" }, { "name": "Marina Mordashova (latin)", "dob": "1979-5-17", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "blake martinez (latin)", "dob": "1941-08-13", "address": "Nigeria", "label": "High Risk", "script": "latin" }, { "name": "miriam melo (latin)", "dob": "1969-06-21", "address": "Angola", "label": "High Risk", "script": "latin" }, { "name": "добрина тухчиев (cyrillic)", "dob": "1992-06-19", "address": "Dominica", "label": "negative", "script": "cyrillic" } ], "query_template": "Generate 6 name variations for {name}, ensuring phonetic similarity by applying 30% Light transformations (e.g., Metaphone, Soundex), 40% Medium transformations (e.g., Levenshtein distance), and 30% Far transformations. \nAdditionally, generate name variations that follow orthographic similarity requirements by applying 30% Light transformations (e.g., typo-like errors), 40% Medium transformations (e.g., visually similar spellings), and 30% Far transformations.\nApproximately 57% of the total 6 variations should follow these rule-based transformations: Additionally, generate variations that perform these transformations: Remove all spaces from {name}, and Duplicate a random letter in {name}. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 6, "phonetic_similarity": { "Light": 0.3, "Medium": 0.4, "Far": 0.3 }, "orthographic_similarity": { "Light": 0.3, "Medium": 0.4, "Far": 0.3 }, "rule_based": { "rule_percentage": 57, "selected_rules": [ "remove_all_spaces", "duplicate_random_letter_as_double_letter" ], "rule_descriptions": { "remove_all_spaces": "Remove all spaces", "duplicate_random_letter_as_double_letter": "Duplicate a random letter" }, "percentage": 57 } } }, { "seed_identities_with_labels": [ { "name": "محيا مجتبوی (arabic)", "dob": "1945-01-24", "address": "Korea", "label": "negative", "script": "arabic" }, { "name": "Денис Чемоданов (cyrillic)", "dob": "1977-9-11", "address": "Belarus", "label": "positive", "script": "cyrillic" }, { "name": "Yuri Shvytkin (latin)", "dob": "1965-5-24", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "celestina cabello (latin)", "dob": "1979-02-26", "address": "Suecia", "label": "negative", "script": "latin" }, { "name": "brian thomas (latin)", "dob": "1925-09-28", "address": "Namibia", "label": "High Risk", "script": "latin" }, { "name": "godofredo león (latin)", "dob": "1940-02-17", "address": "Bolivia", "label": "High Risk", "script": "latin" }, { "name": "tomás guardiola (latin)", "dob": "1979-06-14", "address": "República Centroafricana", "label": "negative", "script": "latin" }, { "name": "Wai-chung Lo (latin)", "dob": "1961-11-19", "address": "Hong Kong", "label": "positive", "script": "latin" }, { "name": "maura vallejo (latin)", "dob": "1980-01-20", "address": "Belarús", "label": "negative", "script": "latin" }, { "name": "martine salmon (latin)", "dob": "1951-06-20", "address": "Côte d'Ivoire", "label": "High Risk", "script": "latin" }, { "name": "Saad AL-FAGIH (latin)", "dob": "1957-1-31", "address": "United Kingdom", "label": "positive", "script": "latin" }, { "name": "julien guillet (latin)", "dob": "1934-03-13", "address": "Mali", "label": "High Risk", "script": "latin" }, { "name": "paula serna (latin)", "dob": "1966-11-07", "address": "Vietman", "label": "negative", "script": "latin" }, { "name": "клавдия тимофеев (cyrillic)", "dob": "1974-10-09", "address": "Donetsk", "label": "High Risk", "script": "cyrillic" }, { "name": "Dmitry Lelikov (latin)", "dob": "1968-5-9", "address": "Russia", "label": "positive", "script": "latin" } ], "query_template": "The following name is the seed name to generate variations for: {name}. Generate 12 variations of the name {name}, ensuring phonetic similarity: 10% Light, 50% Medium, 40% Far, and orthographic similarity: 20% Light, 60% Medium, 20% Far, and also include 45% of variations that follow: Additionally, generate variations that: Convert name to initials. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 12, "phonetic_similarity": { "Light": 0.1, "Medium": 0.5, "Far": 0.4 }, "orthographic_similarity": { "Light": 0.2, "Medium": 0.6, "Far": 0.2 }, "rule_based": { "rule_percentage": 45, "selected_rules": [ "shorten_name_to_initials" ], "rule_descriptions": { "shorten_name_to_initials": "Convert name to initials" }, "percentage": 45 } } }, { "seed_identities_with_labels": [ { "name": "فطين بني هاجر (arabic)", "dob": "1960-03-22", "address": "Sudan", "label": "High Risk", "script": "arabic" }, { "name": "Andrey Shishkin (latin)", "dob": "1959-3-13", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "james jones (latin)", "dob": "1987-12-02", "address": "Bahamas", "label": "negative", "script": "latin" }, { "name": "demetrio cabanillas (latin)", "dob": "1982-01-15", "address": "Cuba", "label": "High Risk", "script": "latin" }, { "name": "фока киселева (cyrillic)", "dob": "1941-07-21", "address": "Marshall Islands", "label": "negative", "script": "cyrillic" }, { "name": "Maria Faassen (latin)", "dob": "1985-4-28", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "raymond goodwin (latin)", "dob": "1951-10-14", "address": "Denmark", "label": "negative", "script": "latin" }, { "name": "rodney wong (latin)", "dob": "1960-01-27", "address": "Namibia", "label": "High Risk", "script": "latin" }, { "name": "Роман Лябихов (cyrillic)", "dob": "1973-5-7", "address": "Russia", "label": "positive", "script": "cyrillic" }, { "name": "Kassem Hijazi (latin)", "dob": "1972-9-15", "address": "Paraguay", "label": "positive", "script": "latin" }, { "name": "Olimjon Tohtahunov (latin)", "dob": "1949-12-31", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "nathaniel martin (latin)", "dob": "1939-06-03", "address": "Marshall Islands", "label": "negative", "script": "latin" }, { "name": "vanessa velez (latin)", "dob": "1981-04-09", "address": "Kenya", "label": "High Risk", "script": "latin" }, { "name": "colette lemaire (latin)", "dob": "1993-04-28", "address": "Burkina Faso", "label": "High Risk", "script": "latin" }, { "name": "samantha clark (latin)", "dob": "1989-11-12", "address": "Eritrea", "label": "negative", "script": "latin" } ], "query_template": "The following name is the seed name to generate variations for: {name}. Generate 10 variations of the name {name}, ensuring phonetic similarity: 30% Light, 40% Medium, 30% Far, and orthographic similarity: 10% Light, 30% Medium, 60% Far, and also include 37% of variations that follow: Additionally, generate variations that perform these transformations: Swap random adjacent letters, and Replace random consonants with different consonants. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 10, "phonetic_similarity": { "Light": 0.3, "Medium": 0.4, "Far": 0.3 }, "orthographic_similarity": { "Light": 0.1, "Medium": 0.3, "Far": 0.6 }, "rule_based": { "rule_percentage": 37, "selected_rules": [ "swap_random_letter", "replace_random_consonant_with_random_consonant" ], "rule_descriptions": { "swap_random_letter": "Swap random adjacent letters", "replace_random_consonant_with_random_consonant": "Replace random consonants with different consonants" }, "percentage": 37 } } }, { "seed_identities_with_labels": [ { "name": "valentine gaudin (latin)", "dob": "1934-05-14", "address": "Burkina Faso", "label": "High Risk", "script": "latin" }, { "name": "alyssa thomas (latin)", "dob": "1926-04-18", "address": "Korea", "label": "negative", "script": "latin" }, { "name": "david jones (latin)", "dob": "1934-01-22", "address": "Montserrat", "label": "negative", "script": "latin" }, { "name": "Laith Al-Khazali (latin)", "dob": "1975-10-14", "address": "Iraq", "label": "positive", "script": "latin" }, { "name": "Aegli Tamani-Phella (latin)", "dob": "1978-6-21", "address": "Cyprus", "label": "positive", "script": "latin" }, { "name": "фадей беляева (cyrillic)", "dob": "1990-02-23", "address": "Seychelles", "label": "negative", "script": "cyrillic" }, { "name": "ian campbell (latin)", "dob": "1966-07-30", "address": "Nigeria", "label": "High Risk", "script": "latin" }, { "name": "kelsey bush (latin)", "dob": "1969-04-01", "address": "Marshall Islands", "label": "negative", "script": "latin" }, { "name": "monique guillon (latin)", "dob": "1984-11-09", "address": "Burkina Faso", "label": "High Risk", "script": "latin" }, { "name": "Ekaterina Kharchenko (latin)", "dob": "1977-8-11", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "Rida Gull (latin)", "dob": "1981-12-25", "address": "Pakistan", "label": "positive", "script": "latin" }, { "name": "melanie cooper (latin)", "dob": "2006-09-01", "address": "Denmark", "label": "negative", "script": "latin" }, { "name": "diana garrett (latin)", "dob": "1957-11-21", "address": "Kenya", "label": "High Risk", "script": "latin" }, { "name": "евстигней селезнева (cyrillic)", "dob": "1996-08-23", "address": "Armenia", "label": "High Risk", "script": "cyrillic" }, { "name": "Максут Шадаев (cyrillic)", "dob": "1979-11-11", "address": "Russia", "label": "positive", "script": "cyrillic" } ], "query_template": "Generate exactly 9 name variations for {name}, ensuring phonetic similarity by applying a transformation that changes sound to: \n- For 30% of the variations: Add \"son\" or \"sen\"\n- For 40% of the variations: Replace initial consonant(s) with an approximate English pronunciation\n- For 30% of the variations: Change final consonant cluster to an equivalent one in another language\nAnd orthographic similarity by applying a transformation that changes spelling to: \n- For 100% of the variations: Medium similarity (e.g. remove/replace/add letter \"i\", change double consonants, swap vowels)\nApproximately 55% of the total 9 variations should follow these rule-based transformations: Additionally, generate variations that: Insert a random letter at a randomly chosen position in the name {name}.\n[VALIDATION HINTS]: Phonetic similarity: 30% Light. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 9, "phonetic_similarity": { "Light": 0.3, "Medium": 0.4, "Far": 0.3 }, "orthographic_similarity": { "Medium": 1.0 }, "rule_based": { "rule_percentage": 55, "selected_rules": [ "insert_random_letter" ], "rule_descriptions": { "insert_random_letter": "Insert a random letter" }, "percentage": 55 } } }, { "seed_identities_with_labels": [ { "name": "isaura peña (latin)", "dob": "1970-06-14", "address": "Dinamarca", "label": "negative", "script": "latin" }, { "name": "Zurab Makiyev (latin)", "dob": "1976-9-30", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "لتين شهران (arabic)", "dob": "1990-01-11", "address": "Lebanon", "label": "High Risk", "script": "arabic" }, { "name": "rogelio palau (latin)", "dob": "1996-06-13", "address": "Belice", "label": "negative", "script": "latin" }, { "name": "مهسا تهرانی (arabic)", "dob": "1930-03-11", "address": "Marshall Islands", "label": "negative", "script": "arabic" }, { "name": "albino marqués (latin)", "dob": "1982-01-16", "address": "Turkmenistán", "label": "negative", "script": "latin" }, { "name": "goyo alcolea (latin)", "dob": "1988-11-30", "address": "Islas Salomón", "label": "negative", "script": "latin" }, { "name": "mary cox (latin)", "dob": "1935-08-01", "address": "Namibia", "label": "High Risk", "script": "latin" }, { "name": "Francois Okunji (latin)", "dob": "1949-7-10", "address": "Congo, Democratic Republic of the", "label": "positive", "script": "latin" }, { "name": "simone martineau (latin)", "dob": "1987-04-23", "address": "Central African Republic", "label": "High Risk", "script": "latin" }, { "name": "Elias AL-KASHMIRI (latin)", "dob": "1964-2-10", "address": "Pakistan", "label": "positive", "script": "latin" }, { "name": "Oleg Tkach (latin)", "dob": "1967-9-23", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "Иван Мусатов (cyrillic)", "dob": "1976-2-14", "address": "Russia", "label": "positive", "script": "cyrillic" }, { "name": "audrey dupuis (latin)", "dob": "1929-02-14", "address": "Burkina Faso", "label": "High Risk", "script": "latin" }, { "name": "trinidad bernad (latin)", "dob": "1985-07-15", "address": "Bolivia", "label": "High Risk", "script": "latin" } ], "query_template": "Generate exactly 8 execution vectors for each target identity {name}, ensuring phonetic similarity with the following percentages:\n- Light: 20%\n- Medium: 60%\n- Far: 20%\nAnd orthographic similarity with the following percentages:\n- Light: 10%\n- Medium: 50%\n- Far: 40%.\nApproximately 42% of the total variations should follow these rule-based transformations: \n- Remove a random consonant\n- Swap adjacent syllables\n- Swap adjacent consonants. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 8, "phonetic_similarity": { "Light": 0.2, "Medium": 0.6, "Far": 0.2 }, "orthographic_similarity": { "Light": 0.1, "Medium": 0.5, "Far": 0.4 }, "rule_based": { "rule_percentage": 42, "selected_rules": [ "remove_random_consonant", "swap_adjacent_syllables", "swap_adjacent_consonants" ], "rule_descriptions": { "remove_random_consonant": "Remove a random consonant", "swap_adjacent_syllables": "Swap adjacent syllables", "swap_adjacent_consonants": "Swap adjacent consonants" }, "percentage": 42 } } }, { "seed_identities_with_labels": [ { "name": "peter odonnell (latin)", "dob": "1973-09-19", "address": "Saint Pierre and Miquelon", "label": "negative", "script": "latin" }, { "name": "maría luisa perelló (latin)", "dob": "1945-06-09", "address": "Bolivia", "label": "High Risk", "script": "latin" }, { "name": "cynthia ryan (latin)", "dob": "1971-02-01", "address": "Malta", "label": "negative", "script": "latin" }, { "name": "julie bonilla (latin)", "dob": "1944-04-22", "address": "Nicaragua", "label": "High Risk", "script": "latin" }, { "name": "مرتجي بنو ضمرة (arabic)", "dob": "1982-12-25", "address": "Libya", "label": "High Risk", "script": "arabic" }, { "name": "trevor thomas (latin)", "dob": "1946-09-08", "address": "Romania", "label": "negative", "script": "latin" }, { "name": "Ivan Prokopenko (latin)", "dob": "1973-9-28", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "Дмитрий Барышников (cyrillic)", "dob": "1976-11-6", "address": "Russia", "label": "positive", "script": "cyrillic" }, { "name": "лозан европовкирилов (cyrillic)", "dob": "1930-09-07", "address": "Chile", "label": "negative", "script": "cyrillic" }, { "name": "colleen sexton (latin)", "dob": "1938-09-06", "address": "British Virgin Islands", "label": "High Risk", "script": "latin" }, { "name": "Mariam Barreh (latin)", "dob": "1971-4-10", "address": "United Arab Emirates", "label": "positive", "script": "latin" }, { "name": "amy hinton (latin)", "dob": "1955-08-02", "address": "Guinea", "label": "negative", "script": "latin" }, { "name": "Hossein Arani (latin)", "dob": "1964-12-22", "address": "Iran", "label": "positive", "script": "latin" }, { "name": "rachel vasquez (latin)", "dob": "1951-06-19", "address": "South Sudan", "label": "High Risk", "script": "latin" }, { "name": "Vladimir Uyba (latin)", "dob": "1958-10-4", "address": "Russia", "label": "positive", "script": "latin" } ], "query_template": "Generate exactly 8 execution vectors ({name} variations) for each target identity. \nEnsure phonetic similarity (sound-alike names) with the following distribution: 10% Light, 30% Medium, 60% Far. \nEnsure orthographic similarity (visually similar spellings) with the following distribution: 30% Light, 40% Medium, 30% Far. \nApproximately 27% of the total 8 variations should follow these rule-based transformations: Additionally, generate variations that: Remove a random consonant from {name}. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 8, "phonetic_similarity": { "Light": 0.1, "Medium": 0.3, "Far": 0.6 }, "orthographic_similarity": { "Light": 0.3, "Medium": 0.4, "Far": 0.3 }, "rule_based": { "rule_percentage": 27, "selected_rules": [ "remove_random_consonant" ], "rule_descriptions": { "remove_random_consonant": "Remove a random consonant" }, "percentage": 27 } } }, { "seed_identities_with_labels": [ { "name": "vincent chevalier (latin)", "dob": "1953-08-19", "address": "Pitcairn (Îles)", "label": "negative", "script": "latin" }, { "name": "اميرمحمد ملکیان (arabic)", "dob": "1940-11-20", "address": "Estonia", "label": "negative", "script": "arabic" }, { "name": "christiane ramos (latin)", "dob": "2001-08-23", "address": "Ouzbékistan", "label": "negative", "script": "latin" }, { "name": "laurence muller (latin)", "dob": "1947-01-20", "address": "Luxembourg", "label": "negative", "script": "latin" }, { "name": "emmanuelle brun (latin)", "dob": "1932-10-17", "address": "Arménie", "label": "negative", "script": "latin" }, { "name": "Володимир Сальдо (cyrillic)", "dob": "1956-6-12", "address": "Ukraine", "label": "positive", "script": "cyrillic" }, { "name": "وائل بني رشيد (arabic)", "dob": "1954-05-27", "address": "Sudan", "label": "High Risk", "script": "arabic" }, { "name": "leonardo rodrigues (latin)", "dob": "1948-11-26", "address": "Angola", "label": "High Risk", "script": "latin" }, { "name": "jérôme sauvage (latin)", "dob": "1986-11-05", "address": "Mali", "label": "High Risk", "script": "latin" }, { "name": "Mikhail Shchapov (latin)", "dob": "1975-9-20", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "michelle robinson (latin)", "dob": "1973-06-29", "address": "Nigeria", "label": "High Risk", "script": "latin" }, { "name": "anastasie leblanc (latin)", "dob": "1995-12-24", "address": "Cameroon", "label": "High Risk", "script": "latin" }, { "name": "Pye Tayza (latin)", "dob": "1987-1-29", "address": "Burma", "label": "positive", "script": "latin" }, { "name": "Irina Petina (latin)", "dob": "1972-8-31", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "Slimane Chabani (latin)", "dob": "1965-12-5", "address": "Syria", "label": "positive", "script": "latin" } ], "query_template": "Generate exactly 13 execution vectors (name variations) for each target identity {name}, ensuring phonetic similarity with 10% Light variations using techniques such as \"Soundex\" or \"Metaphone\", 30% Medium variations applying more aggressive transformations, and 60% Far variations using distant name matches. Additionally, ensure orthographic similarity with 50% Light variations making minor spelling adjustments and 50% Medium variations applying moderate visual changes to the original name.\n\nApproximately 44% of the total 13 variations should follow these rule-based transformations: Reorder name parts by swapping or re-arranging the order of the first, middle, and last names. Delete a random letter from one or more of the name parts. Insert a random letter into one or more of the name parts, maintaining the same overall structure as the original name. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 13, "phonetic_similarity": { "Light": 0.1, "Medium": 0.3, "Far": 0.6 }, "orthographic_similarity": { "Light": 0.5, "Medium": 0.5 }, "rule_based": { "rule_percentage": 44, "selected_rules": [ "name_parts_permutations", "delete_random_letter", "insert_random_letter" ], "rule_descriptions": { "name_parts_permutations": "Reorder name parts", "delete_random_letter": "Delete a random letter", "insert_random_letter": "Insert a random letter" }, "percentage": 44 } } }, { "seed_identities_with_labels": [ { "name": "богдан игнатова (cyrillic)", "dob": "2006-06-17", "address": "Saint Helena", "label": "negative", "script": "cyrillic" }, { "name": "Serhiy Melnychuk (latin)", "dob": "1976-9-30", "address": "Ukraine", "label": "positive", "script": "latin" }, { "name": "sébastien huet (latin)", "dob": "1941-02-09", "address": "Democratic Republic of Congo", "label": "High Risk", "script": "latin" }, { "name": "Дмитрий Джиникашвили (cyrillic)", "dob": "1987-7-17", "address": "Russia", "label": "positive", "script": "cyrillic" }, { "name": "steven thomas (latin)", "dob": "1953-07-08", "address": "Reunion", "label": "negative", "script": "latin" }, { "name": "جميل بعلبكي (arabic)", "dob": "1949-12-09", "address": "Libya", "label": "High Risk", "script": "arabic" }, { "name": "Seyed Ghoreishi (latin)", "dob": "1964-9-27", "address": "Iran", "label": "positive", "script": "latin" }, { "name": "gilbert courtois (latin)", "dob": "2007-03-08", "address": "Monaco", "label": "High Risk", "script": "latin" }, { "name": "carlos lane (latin)", "dob": "1950-11-04", "address": "Papua New Guinea", "label": "negative", "script": "latin" }, { "name": "Bella Chen (latin)", "dob": "1992-11-15", "address": "China", "label": "positive", "script": "latin" }, { "name": "adélaïde pages (latin)", "dob": "2002-10-11", "address": "Côte d'Ivoire", "label": "High Risk", "script": "latin" }, { "name": "lisa greene (latin)", "dob": "1939-05-19", "address": "South Africa", "label": "High Risk", "script": "latin" }, { "name": "edward moss (latin)", "dob": "1961-09-08", "address": "Trinidad and Tobago", "label": "negative", "script": "latin" }, { "name": "bonnie reynolds (latin)", "dob": "1967-11-27", "address": "Luxembourg", "label": "negative", "script": "latin" }, { "name": "Elena Milskaya (latin)", "dob": "1980-12-20", "address": "Russia", "label": "positive", "script": "latin" } ], "query_template": "The following name is the seed name to generate variations for: {name}. Generate 11 variations of the name {name}, ensuring phonetic similarity: 10% Light, 30% Medium, 60% Far, and orthographic similarity: 30% Light, 40% Medium, 30% Far, and also include 50% of variations that follow: Additionally, generate variations that perform these transformations: Replace double letters with a single letter, Replace spaces with special characters, and Add a title prefix (Mr., Dr., etc.). The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 11, "phonetic_similarity": { "Light": 0.1, "Medium": 0.3, "Far": 0.6 }, "orthographic_similarity": { "Light": 0.3, "Medium": 0.4, "Far": 0.3 }, "rule_based": { "rule_percentage": 50, "selected_rules": [ "replace_double_letters_with_single_letter", "replace_spaces_with_random_special_characters", "add_random_leading_title" ], "rule_descriptions": { "replace_double_letters_with_single_letter": "Replace double letters with a single letter", "replace_spaces_with_random_special_characters": "Replace spaces with special characters", "add_random_leading_title": "Add a title prefix (Mr., Dr., etc.)" }, "percentage": 50 } } }, { "seed_identities_with_labels": [ { "name": "wilson cruz (latin)", "dob": "1936-12-19", "address": "Angola", "label": "High Risk", "script": "latin" }, { "name": "jason spears (latin)", "dob": "1929-10-16", "address": "Kenya", "label": "High Risk", "script": "latin" }, { "name": "michelle brooks (latin)", "dob": "1945-11-23", "address": "Namibia", "label": "High Risk", "script": "latin" }, { "name": "Ekaterina Krivoruchko (latin)", "dob": "1986-3-26", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "Aras Karim (latin)", "dob": "1967-8-6", "address": "Iraq", "label": "positive", "script": "latin" }, { "name": "jessica vega (latin)", "dob": "1956-04-02", "address": "North Macedonia", "label": "negative", "script": "latin" }, { "name": "Vildan Zinnurov (latin)", "dob": "1964-1-30", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "james lopez (latin)", "dob": "1968-12-03", "address": "Cape Verde", "label": "negative", "script": "latin" }, { "name": "érica nascimento (latin)", "dob": "1932-02-25", "address": "Mozambique", "label": "High Risk", "script": "latin" }, { "name": "wayne wilson (latin)", "dob": "1986-12-25", "address": "Montenegro", "label": "negative", "script": "latin" }, { "name": "virginia warren (latin)", "dob": "1999-10-08", "address": "Pitcairn Islands", "label": "negative", "script": "latin" }, { "name": "Надежда Лашкарёва (cyrillic)", "dob": "1961-11-8", "address": "Ukraine", "label": "positive", "script": "cyrillic" }, { "name": "Ahmad 'Ali (latin)", "dob": "1969-12-2", "address": "Syria", "label": "positive", "script": "latin" }, { "name": "باهرة حوالة (arabic)", "dob": "1993-12-17", "address": "Sudan", "label": "High Risk", "script": "arabic" }, { "name": "عذب بدر (arabic)", "dob": "1968-11-04", "address": "Cook Islands", "label": "negative", "script": "arabic" } ], "query_template": "Generate 11 execution vectors for {name}, ensuring phonetic similarity with 10% variations using Light transformation ({name} variant with Light phonetic modification), 30% variations using Medium transformation ({name} variant with Medium phonetic modification), and 60% variations using Far transformation ({name} variant with Far phonetic modification). Additionally, ensure orthographic similarity with 10% variations using Light transformation ({name} variant with Light orthographic modification), 50% variations using Medium transformation ({name} variant with Medium orthographic modification), and 40% variations using Far transformation ({name} variant with Far orthographic modification). Approximately 29% of the total 11 variations should follow these rule-based transformations: Additionally, generate variations that: Remove all spaces from {name}. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 11, "phonetic_similarity": { "Light": 0.1, "Medium": 0.3, "Far": 0.6 }, "orthographic_similarity": { "Light": 0.1, "Medium": 0.5, "Far": 0.4 }, "rule_based": { "rule_percentage": 29, "selected_rules": [ "remove_all_spaces" ], "rule_descriptions": { "remove_all_spaces": "Remove all spaces" }, "percentage": 29 } } }, { "seed_identities_with_labels": [ { "name": "Jianming Cao (latin)", "dob": "1955-9-24", "address": "China", "label": "positive", "script": "latin" }, { "name": "isidro márquez (latin)", "dob": "2006-01-02", "address": "Bolivia", "label": "High Risk", "script": "latin" }, { "name": "amanda jones (latin)", "dob": "1974-08-01", "address": "British Virgin Islands", "label": "High Risk", "script": "latin" }, { "name": "прохор артемьева (cyrillic)", "dob": "1951-12-03", "address": "Luhansk", "label": "High Risk", "script": "cyrillic" }, { "name": "lilia miranda (latin)", "dob": "1948-07-19", "address": "Botswana", "label": "negative", "script": "latin" }, { "name": "مشاري العليان (arabic)", "dob": "2005-07-03", "address": "Barbados", "label": "negative", "script": "arabic" }, { "name": "Володимир Сальдо (cyrillic)", "dob": "1956-6-12", "address": "Ukraine", "label": "positive", "script": "cyrillic" }, { "name": "esperanza leiva (latin)", "dob": "1933-03-24", "address": "Nicaragua", "label": "High Risk", "script": "latin" }, { "name": "pastor maestre (latin)", "dob": "1951-07-15", "address": "República Dominicana", "label": "negative", "script": "latin" }, { "name": "Ahmed Afraah (latin)", "dob": "1985-8-17", "address": "Maldives", "label": "positive", "script": "latin" }, { "name": "Ho'-kyu Kim (latin)", "dob": "1970-9-15", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "théophile perez (latin)", "dob": "1933-01-17", "address": "Mali", "label": "High Risk", "script": "latin" }, { "name": "claudia sola (latin)", "dob": "1932-12-06", "address": "Micronesia", "label": "negative", "script": "latin" }, { "name": "ismael sebastián (latin)", "dob": "1926-05-14", "address": "Chile", "label": "negative", "script": "latin" }, { "name": "Dzhasharbek Uzdenov (latin)", "dob": "1967-1-25", "address": "Russia", "label": "positive", "script": "latin" } ], "query_template": "Generate 14 variations of {name}, ensuring phonetic similarity (20% Light, e.g. Soundex({name}), Metaphone({name}) with 1 edit distance) and (60% Medium, e.g. LevenshteinRatio({name})) and (20% Far, e.g. JaroWinklerDistance({name})). Approximately 15% of the total 14 variations should follow these rule-based transformations: Additionally, generate variations that: Remove all spaces from {name}.\n[VALIDATION HINTS]: Orthographic similarity: 100% Light. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 14, "phonetic_similarity": { "Light": 0.2, "Medium": 0.6, "Far": 0.2 }, "orthographic_similarity": { "Light": 1.0 }, "rule_based": { "rule_percentage": 15, "selected_rules": [ "remove_all_spaces" ], "rule_descriptions": { "remove_all_spaces": "Remove all spaces" }, "percentage": 15 } } }, { "seed_identities_with_labels": [ { "name": "constance lebon (latin)", "dob": "1961-11-10", "address": "Côte d'Ivoire", "label": "High Risk", "script": "latin" }, { "name": "Boris Kovalchuk (latin)", "dob": "1977-12-1", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "Abu-Ahmad Zakkur (latin)", "dob": "1979-1-5", "address": "Turkey", "label": "positive", "script": "latin" }, { "name": "Сайгидпаша Умаханов (cyrillic)", "dob": "1962-4-3", "address": "Russia", "label": "positive", "script": "cyrillic" }, { "name": "dominique lemonnier (latin)", "dob": "1996-11-11", "address": "Hong Kong", "label": "negative", "script": "latin" }, { "name": "aimée fouquet (latin)", "dob": "1948-10-29", "address": "Kiribati", "label": "negative", "script": "latin" }, { "name": "david willis (latin)", "dob": "1962-07-05", "address": "Kenya", "label": "High Risk", "script": "latin" }, { "name": "وليد السادة الراويون (arabic)", "dob": "2000-01-27", "address": "Somalia", "label": "High Risk", "script": "arabic" }, { "name": "кирка цоцов (cyrillic)", "dob": "1946-07-10", "address": "Mayotte", "label": "negative", "script": "cyrillic" }, { "name": "Muhammad Dayri (latin)", "dob": "1961-6-6", "address": "Syria", "label": "positive", "script": "latin" }, { "name": "Osama Ibrahim (latin)", "dob": "1976-4-2", "address": "Libya", "label": "positive", "script": "latin" }, { "name": "valérie martin (latin)", "dob": "1961-06-10", "address": "Tokelau", "label": "negative", "script": "latin" }, { "name": "bertrand bazin (latin)", "dob": "1939-05-23", "address": "Mali", "label": "High Risk", "script": "latin" }, { "name": "susan giraud (latin)", "dob": "1971-03-31", "address": "Sénégal", "label": "negative", "script": "latin" }, { "name": "laetitia nguyen (latin)", "dob": "1936-10-27", "address": "Haiti", "label": "High Risk", "script": "latin" } ], "query_template": "Generate exactly 14 execution vectors for {name} ensuring phonetic similarity (100% Light) and orthographic similarity (20% Light, 60% Medium, 20% Far). Approximately 35% of the total 14 variations should follow these rule-based transformations: Reorder name parts. \n\nAdditionally generate variations that: The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 14, "phonetic_similarity": { "Light": 1.0 }, "orthographic_similarity": { "Light": 0.2, "Medium": 0.6, "Far": 0.2 }, "rule_based": { "rule_percentage": 35, "selected_rules": [ "name_parts_permutations" ], "rule_descriptions": { "name_parts_permutations": "Reorder name parts" }, "percentage": 35 } } }, { "seed_identities_with_labels": [ { "name": "Bella Chen (latin)", "dob": "1992-11-15", "address": "China", "label": "positive", "script": "latin" }, { "name": "Делян ПЕЕВСКИ (cyrillic)", "dob": "1980-7-27", "address": "Bulgaria", "label": "positive", "script": "cyrillic" }, { "name": "fortunato baeza (latin)", "dob": "1985-07-22", "address": "Cuba", "label": "High Risk", "script": "latin" }, { "name": "carlota simões (latin)", "dob": "2001-04-08", "address": "Guiné-Bissau", "label": "negative", "script": "latin" }, { "name": "leonardo pacheco (latin)", "dob": "1966-11-06", "address": "Angola", "label": "High Risk", "script": "latin" }, { "name": "مأمون بنو معقل (arabic)", "dob": "1973-06-18", "address": "Syria", "label": "High Risk", "script": "arabic" }, { "name": "Anton Cherepennikov (latin)", "dob": "1983-5-7", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "علیرضا محجوب (arabic)", "dob": "2003-06-07", "address": "Aruba", "label": "negative", "script": "arabic" }, { "name": "daniel batista (latin)", "dob": "1946-08-26", "address": "Spratly Islands", "label": "negative", "script": "latin" }, { "name": "Ko Oo (latin)", "dob": "1972-12-2", "address": "Burma", "label": "positive", "script": "latin" }, { "name": "remigio vidal (latin)", "dob": "1959-01-28", "address": "Bolivia", "label": "High Risk", "script": "latin" }, { "name": "sofia fonseca (latin)", "dob": "1978-03-05", "address": "Butão", "label": "negative", "script": "latin" }, { "name": "cynthia lawson (latin)", "dob": "1954-04-15", "address": "Namibia", "label": "High Risk", "script": "latin" }, { "name": "rúben amorim (latin)", "dob": "1956-08-12", "address": "Ilha Norfolk", "label": "negative", "script": "latin" }, { "name": "Min Shi (latin)", "dob": "1979-5-20", "address": "China", "label": "positive", "script": "latin" } ], "query_template": "Generate 7 variations of {name}, ensuring phonetic similarity (20% Light, 60% Medium, 20% Far) and orthographic similarity (50% Light, 50% Medium). Approximately 51% of the total 7 variations should follow these rule-based transformations: Use first name initial with last name ({first_name_initial} {last_name}), Use last name followed by first name ({last_name} {first_name}), Reorder name parts to place last name first and middle name last ({middle_name} is omitted for simplicity) ({last_name}, {first_name}), Reorder name parts to place first name first and last name second ({first_name}, {last_name}), Use title case for the entire name (Title Case: {name}), Use all uppercase for the entire name (ALL CAPS: {name}). The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 7, "phonetic_similarity": { "Light": 0.2, "Medium": 0.6, "Far": 0.2 }, "orthographic_similarity": { "Light": 0.5, "Medium": 0.5 }, "rule_based": { "rule_percentage": 51, "selected_rules": [ "initial_only_first_name", "name_parts_permutations" ], "rule_descriptions": { "initial_only_first_name": "Use first name initial with last name", "name_parts_permutations": "Reorder name parts" }, "percentage": 51 } } }, { "seed_identities_with_labels": [ { "name": "سعد جديس (arabic)", "dob": "2006-11-11", "address": "Andorra", "label": "negative", "script": "arabic" }, { "name": "ryan rodriguez (latin)", "dob": "1976-03-03", "address": "South Africa", "label": "High Risk", "script": "latin" }, { "name": "susan archer (latin)", "dob": "1931-04-27", "address": "Tuvalu", "label": "negative", "script": "latin" }, { "name": "paulette camus (latin)", "dob": "1955-05-08", "address": "Burkina Faso", "label": "High Risk", "script": "latin" }, { "name": "Igor Zavyalov (latin)", "dob": "1960-1-19", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "Mohammad Ansari (latin)", "dob": "1975-11-22", "address": "Syria", "label": "positive", "script": "latin" }, { "name": "richard ledoux (latin)", "dob": "1951-05-03", "address": "Democratic Republic of Congo", "label": "High Risk", "script": "latin" }, { "name": "Виктория Родина (cyrillic)", "dob": "1989-10-29", "address": "Russia", "label": "positive", "script": "cyrillic" }, { "name": "Yury Nabokov (latin)", "dob": "1981-1-1", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "леонид кудряшов (cyrillic)", "dob": "1971-06-22", "address": "Donetsk", "label": "High Risk", "script": "cyrillic" }, { "name": "brittney gonzalez (latin)", "dob": "1933-07-30", "address": "Maldives", "label": "negative", "script": "latin" }, { "name": "david roberts (latin)", "dob": "1932-11-14", "address": "Slovakia (Slovak Republic)", "label": "negative", "script": "latin" }, { "name": "Wafiq Naser (latin)", "dob": "1964-7-10", "address": "Syria", "label": "positive", "script": "latin" }, { "name": "benjamin thompson (latin)", "dob": "1944-12-27", "address": "Bosnia and Herzegovina", "label": "negative", "script": "latin" }, { "name": "james copeland (latin)", "dob": "1958-04-13", "address": "South Sudan", "label": "High Risk", "script": "latin" } ], "query_template": "Generate 6 execution vectors for {name}, ensuring phonetic similarity (30% Light variants using suffixes -in, -en, -an, 40% Medium variants using suffixes -ie, -y, -ey, and 30% Far variants using suffixes -ster, -son) and orthographic similarity (10% Light variants by changing one letter, 50% Medium variants by swapping two letters, and 40% Far variants by adding or removing a letter). Approximately 35% of the total 6 variations should follow these rule-based transformations: Additionally, generate variations that perform these transformations: Replace random vowels with different vowels, and Replace spaces with special characters. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 6, "phonetic_similarity": { "Light": 0.3, "Medium": 0.4, "Far": 0.3 }, "orthographic_similarity": { "Light": 0.1, "Medium": 0.5, "Far": 0.4 }, "rule_based": { "rule_percentage": 35, "selected_rules": [ "replace_random_vowel_with_random_vowel", "replace_spaces_with_random_special_characters" ], "rule_descriptions": { "replace_random_vowel_with_random_vowel": "Replace random vowels with different vowels", "replace_spaces_with_random_special_characters": "Replace spaces with special characters" }, "percentage": 35 } } }, { "seed_identities_with_labels": [ { "name": "Olimjon Tohtahunov (latin)", "dob": "1949-12-31", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "مُتعب الشرفاء (arabic)", "dob": "1964-10-25", "address": "Uganda", "label": "negative", "script": "arabic" }, { "name": "brooke ponce (latin)", "dob": "1933-10-30", "address": "Tuvalu", "label": "negative", "script": "latin" }, { "name": "God Nisanov (latin)", "dob": "1972-4-24", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "matthew wiggins (latin)", "dob": "1948-06-10", "address": "Lao People's Democratic Republic", "label": "negative", "script": "latin" }, { "name": "juliana almeida (latin)", "dob": "1995-07-06", "address": "Angola", "label": "High Risk", "script": "latin" }, { "name": "shawn harris (latin)", "dob": "1926-03-29", "address": "Micronesia", "label": "negative", "script": "latin" }, { "name": "franck hamel (latin)", "dob": "1970-08-09", "address": "Côte d'Ivoire", "label": "High Risk", "script": "latin" }, { "name": "Natalya Skorokhodova (latin)", "dob": "1968-8-25", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "james evans (latin)", "dob": "1980-12-11", "address": "Kenya", "label": "High Risk", "script": "latin" }, { "name": "sarah cooper (latin)", "dob": "1994-11-15", "address": "South Sudan", "label": "High Risk", "script": "latin" }, { "name": "мариоллита самоходов (cyrillic)", "dob": "1998-02-25", "address": "Bulgaria", "label": "High Risk", "script": "cyrillic" }, { "name": "David Crosby (latin)", "dob": "1990-5-5", "address": "Zimbabwe", "label": "positive", "script": "latin" }, { "name": "Евгений Марченко (cyrillic)", "dob": "1972-7-17", "address": "Russia", "label": "positive", "script": "cyrillic" }, { "name": "michael chavez (latin)", "dob": "2001-07-18", "address": "Saint Vincent and the Grenadines", "label": "negative", "script": "latin" } ], "query_template": "Generate 11 variations of {name}. Ensuring phonetic similarity (sound-alike names) with a distribution of 20% Light, 60% Medium, and 20% Far. Ensuring orthographic similarity (visually similar spellings) with a distribution of 20% Light, 60% Medium, and 20% Far. Approximately 42% of the total 11 variations should follow these rule-based transformations: Additionally, generate variations that: Convert {name} to initials. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 11, "phonetic_similarity": { "Light": 0.2, "Medium": 0.6, "Far": 0.2 }, "orthographic_similarity": { "Light": 0.2, "Medium": 0.6, "Far": 0.2 }, "rule_based": { "rule_percentage": 42, "selected_rules": [ "shorten_name_to_initials" ], "rule_descriptions": { "shorten_name_to_initials": "Convert name to initials" }, "percentage": 42 } } }, { "seed_identities_with_labels": [ { "name": "Tun Latt (latin)", "dob": "1969-2-6", "address": "Burma", "label": "positive", "script": "latin" }, { "name": "Madhy Akil (latin)", "dob": "1987-10-30", "address": "Colombia", "label": "positive", "script": "latin" }, { "name": "Олег Кожемяко (cyrillic)", "dob": "1962-3-17", "address": "Russia", "label": "positive", "script": "cyrillic" }, { "name": "Evgenii Kraiushkin (latin)", "dob": "1990-3-28", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "Iryna Cherkasova (latin)", "dob": "1963-5-30", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "roland guibert (latin)", "dob": "1976-01-04", "address": "Albanie", "label": "negative", "script": "latin" }, { "name": "michèle andre (latin)", "dob": "1978-03-31", "address": "Central African Republic", "label": "High Risk", "script": "latin" }, { "name": "isabel barros (latin)", "dob": "1941-03-23", "address": "Angola", "label": "High Risk", "script": "latin" }, { "name": "benoît texier (latin)", "dob": "1942-07-25", "address": "Italie", "label": "negative", "script": "latin" }, { "name": "رؤوف آل علي (arabic)", "dob": "1994-06-09", "address": "Germany", "label": "negative", "script": "arabic" }, { "name": "alexandre dubois (latin)", "dob": "1965-05-05", "address": "Finlande", "label": "negative", "script": "latin" }, { "name": "исус дзезов (cyrillic)", "dob": "1926-08-29", "address": "Bulgaria", "label": "High Risk", "script": "cyrillic" }, { "name": "juan josé páez (latin)", "dob": "1980-05-20", "address": "Nicaragua", "label": "High Risk", "script": "latin" }, { "name": "paul gilles (latin)", "dob": "2004-08-31", "address": "Gabon", "label": "negative", "script": "latin" }, { "name": "luciana freitas (latin)", "dob": "1993-11-27", "address": "Mozambique", "label": "High Risk", "script": "latin" } ], "query_template": "Generate 13 variations of {name} ensuring phonetic similarity (30% Light, 40% Medium, 30% Far) and orthographic similarity (30% Light, 40% Medium, 30% Far). Approximately 20% of the total 13 variations should follow these rule-based transformations: Additionally, generate variations that: Swap adjacent syllables. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 13, "phonetic_similarity": { "Light": 0.3, "Medium": 0.4, "Far": 0.3 }, "orthographic_similarity": { "Light": 0.3, "Medium": 0.4, "Far": 0.3 }, "rule_based": { "rule_percentage": 20, "selected_rules": [ "swap_adjacent_syllables" ], "rule_descriptions": { "swap_adjacent_syllables": "Swap adjacent syllables" }, "percentage": 20 } } }, { "seed_identities_with_labels": [ { "name": "encarnita redondo (latin)", "dob": "1933-02-05", "address": "Australia", "label": "negative", "script": "latin" }, { "name": "Yusef Meraj (latin)", "dob": "1978-6-10", "address": "Iran", "label": "positive", "script": "latin" }, { "name": "Sergey Lavrov (latin)", "dob": "1950-3-21", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "sabina izaguirre (latin)", "dob": "1959-11-25", "address": "Líbano", "label": "negative", "script": "latin" }, { "name": "brian morgan (latin)", "dob": "1961-01-14", "address": "Namibia", "label": "High Risk", "script": "latin" }, { "name": "olga niño (latin)", "dob": "1992-08-27", "address": "Bolivia", "label": "High Risk", "script": "latin" }, { "name": "ипатий сорокина (cyrillic)", "dob": "1950-12-15", "address": "Crimea", "label": "High Risk", "script": "cyrillic" }, { "name": "ستایش طلوعی (arabic)", "dob": "1947-02-01", "address": "Uganda", "label": "negative", "script": "arabic" }, { "name": "esther lozano (latin)", "dob": "1972-03-16", "address": "Cuba", "label": "High Risk", "script": "latin" }, { "name": "dafne barbero (latin)", "dob": "2000-01-13", "address": "República Unida de Tanzanía", "label": "negative", "script": "latin" }, { "name": "edu boada (latin)", "dob": "1961-03-18", "address": "Nueva Zelandia", "label": "negative", "script": "latin" }, { "name": "Vladimir Polin (latin)", "dob": "1962-8-10", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "Pavel Akifyev (latin)", "dob": "1985-12-19", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "Андрей Бунаков (cyrillic)", "dob": "1971-7-5", "address": "Belarus", "label": "positive", "script": "cyrillic" }, { "name": "pierre guillaume (latin)", "dob": "1988-12-16", "address": "Cameroon", "label": "High Risk", "script": "latin" } ], "query_template": "Generate exactly 10 execution vectors (name variations) for each target identity {name}. \nEnsure phonetic similarity of 100% Far. \nFor orthographic similarity, implement the following distribution: 20% Light, 60% Medium, and 20% Far.\nApproximately 42% of the total 10 variations should follow these rule-based transformations: Additionally, generate variations that perform these transformations:\nRemove a random vowel from {name}, resulting in e.g. \"{name}a\" or \"{name}e\".\nReplace double letters with a single letter in {name}, resulting in e.g. \"ee\" becoming \"e\". The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 10, "phonetic_similarity": { "Far": 1.0 }, "orthographic_similarity": { "Light": 0.2, "Medium": 0.6, "Far": 0.2 }, "rule_based": { "rule_percentage": 42, "selected_rules": [ "remove_random_vowel", "replace_double_letters_with_single_letter" ], "rule_descriptions": { "remove_random_vowel": "Remove a random vowel", "replace_double_letters_with_single_letter": "Replace double letters with a single letter" }, "percentage": 42 } } }, { "seed_identities_with_labels": [ { "name": "adélaïde thibault (latin)", "dob": "1925-10-03", "address": "Central African Republic", "label": "High Risk", "script": "latin" }, { "name": "مبينا صارمی (arabic)", "dob": "1982-11-17", "address": "Afghanistan", "label": "High Risk", "script": "arabic" }, { "name": "mark smith (latin)", "dob": "1968-04-21", "address": "Namibia", "label": "High Risk", "script": "latin" }, { "name": "dean ayers (latin)", "dob": "1965-03-20", "address": "Cayman Islands", "label": "negative", "script": "latin" }, { "name": "Vitaly Markelov (latin)", "dob": "1963-8-5", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "joe herring (latin)", "dob": "1935-11-29", "address": "Trinidad and Tobago", "label": "negative", "script": "latin" }, { "name": "يافع شاهين (arabic)", "dob": "1944-02-25", "address": "Portugal", "label": "negative", "script": "arabic" }, { "name": "jonathan combs (latin)", "dob": "1967-01-28", "address": "Malaysia", "label": "negative", "script": "latin" }, { "name": "philippe dupont (latin)", "dob": "1997-03-15", "address": "Monaco", "label": "High Risk", "script": "latin" }, { "name": "wendy jones (latin)", "dob": "1928-10-16", "address": "Rwanda", "label": "negative", "script": "latin" }, { "name": "Chol Kim (latin)", "dob": "1964-9-27", "address": "China", "label": "positive", "script": "latin" }, { "name": "Kirill SELEZNEV (latin)", "dob": "1974-4-23", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "william gomez (latin)", "dob": "1973-07-10", "address": "Nigeria", "label": "High Risk", "script": "latin" }, { "name": "Mohamed Ibrahim (latin)", "dob": "1977-4-5", "address": "Brazil", "label": "positive", "script": "latin" }, { "name": "Николай Журавлёв (cyrillic)", "dob": "1976-9-1", "address": "Russia", "label": "positive", "script": "cyrillic" } ], "query_template": "Generate 7 variations of {name} ensuring phonetic similarity (50% Light, 50% Medium) and orthographic similarity (20% Light, 60% Medium, 20% Far). Approximately 14% of the total 7 variations should follow these rule-based transformations: \nSwap first name with last name, \nSwap middle names with each other, \nInsert \"Mc\" or \"Mac\" prefix to the start of the name, \nRemove one syllable from the first name, \nReplace one letter in the middle name, \nSwap adjacent syllables, \nChange surname suffix (e.g. Jr., Sr.) The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 7, "phonetic_similarity": { "Light": 0.5, "Medium": 0.5 }, "orthographic_similarity": { "Light": 0.2, "Medium": 0.6, "Far": 0.2 }, "rule_based": { "rule_percentage": 14, "selected_rules": [ "swap_adjacent_syllables" ], "rule_descriptions": { "swap_adjacent_syllables": "Swap adjacent syllables" }, "percentage": 14 } } }, { "seed_identities_with_labels": [ { "name": "nath marchal (latin)", "dob": "1985-10-22", "address": "Montserrat", "label": "negative", "script": "latin" }, { "name": "Aman Abdurahman (latin)", "dob": "1972-1-5", "address": "Indonesia", "label": "positive", "script": "latin" }, { "name": "Mohammad Khademi (latin)", "dob": "1966-4-5", "address": "United Arab Emirates", "label": "positive", "script": "latin" }, { "name": "jonathan schmidt (latin)", "dob": "1928-05-26", "address": "British Virgin Islands", "label": "High Risk", "script": "latin" }, { "name": "tristan germain (latin)", "dob": "1927-09-01", "address": "Îles Mineures Éloignées des États-Unis", "label": "negative", "script": "latin" }, { "name": "roger fernandez (latin)", "dob": "1942-06-26", "address": "Mali", "label": "High Risk", "script": "latin" }, { "name": "عيسى قصير (arabic)", "dob": "1977-7-21", "address": "Lebanon", "label": "positive", "script": "arabic" }, { "name": "kurt lee (latin)", "dob": "1973-01-27", "address": "Namibia", "label": "High Risk", "script": "latin" }, { "name": "Vasiliy Yurchenko (latin)", "dob": "1960-9-26", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "lucas grégoire (latin)", "dob": "1933-08-26", "address": "Italie", "label": "negative", "script": "latin" }, { "name": "вера мясников (cyrillic)", "dob": "1936-06-28", "address": "Russia", "label": "High Risk", "script": "cyrillic" }, { "name": "تامر عنس (arabic)", "dob": "1941-10-02", "address": "Finland", "label": "negative", "script": "arabic" }, { "name": "michelle rivière (latin)", "dob": "1999-09-05", "address": "Monaco", "label": "High Risk", "script": "latin" }, { "name": "Manal al-akhraz (latin)", "dob": "1970-2-2", "address": "Syria", "label": "positive", "script": "latin" }, { "name": "margaud bousquet (latin)", "dob": "1995-04-04", "address": "Italie", "label": "negative", "script": "latin" } ], "query_template": "Generate 13 variations of {name}, ensuring phonetic similarity (30% Light, 40% Medium, 30% Far) and orthographic similarity (30% Light, 40% Medium, 30% Far). Approximately 17% of the total 13 variations should follow these rule-based transformations: Additionally, generate variations that perform these transformations: Swap random adjacent letters, and Reorder name parts. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 13, "phonetic_similarity": { "Light": 0.3, "Medium": 0.4, "Far": 0.3 }, "orthographic_similarity": { "Light": 0.3, "Medium": 0.4, "Far": 0.3 }, "rule_based": { "rule_percentage": 17, "selected_rules": [ "swap_random_letter", "name_parts_permutations" ], "rule_descriptions": { "swap_random_letter": "Swap random adjacent letters", "name_parts_permutations": "Reorder name parts" }, "percentage": 17 } } }, { "seed_identities_with_labels": [ { "name": "emmanuelle ramos (latin)", "dob": "1979-12-14", "address": "Monaco", "label": "High Risk", "script": "latin" }, { "name": "جسور المهيدب (arabic)", "dob": "1958-06-14", "address": "Anguilla", "label": "negative", "script": "arabic" }, { "name": "Svetlana Yemilianova (latin)", "dob": "1971-10-7", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "carly griffin (latin)", "dob": "1946-06-22", "address": "South Sudan", "label": "High Risk", "script": "latin" }, { "name": "Maher Salah (latin)", "dob": "1957-10-22", "address": "Saudi Arabia", "label": "positive", "script": "latin" }, { "name": "Alexander Udodov (latin)", "dob": "1969-6-10", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "teodosio lara (latin)", "dob": "1945-03-09", "address": "Venezuela", "label": "High Risk", "script": "latin" }, { "name": "وصاف ابو السعود (arabic)", "dob": "1924-10-30", "address": "Yemen", "label": "High Risk", "script": "arabic" }, { "name": "康男 高木 (chinese)", "dob": "1948-1-30", "address": "Japan", "label": "positive", "script": "chinese" }, { "name": "jérôme leclercq (latin)", "dob": "1966-07-19", "address": "Bahamas", "label": "negative", "script": "latin" }, { "name": "guillaume dubois (latin)", "dob": "1962-11-18", "address": "Grenade", "label": "negative", "script": "latin" }, { "name": "andrée roussel (latin)", "dob": "1974-01-02", "address": "Algérie", "label": "negative", "script": "latin" }, { "name": "laurence perrin (latin)", "dob": "1981-01-16", "address": "Honduras", "label": "negative", "script": "latin" }, { "name": "Abdul al-Mohammedawi (latin)", "dob": "1968-1-20", "address": "Iraq", "label": "positive", "script": "latin" }, { "name": "nathalie benoit (latin)", "dob": "1997-07-22", "address": "Côte d'Ivoire", "label": "High Risk", "script": "latin" } ], "query_template": "Generate 6 variations of {name}. Ensuring phonetic similarity (sound-alike names) with the following distribution: 10% Light ({name}*), 30% Medium ({name}i), 60% Far ({name}ee). Approximately 11% of the total 6 variations should follow these rule-based transformations, in addition to the phonetic and orthographic similarities. Additionally, generate variations that perform these transformations for approximately 11% of the total: Convert name to initials (N{name}), Remove a random vowel ({name}e), Convert name to initials and remove a random vowel (Ni{name}e).\n[VALIDATION HINTS]: Orthographic similarity: 20% Light. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 6, "phonetic_similarity": { "Light": 0.1, "Medium": 0.3, "Far": 0.6 }, "orthographic_similarity": { "Light": 0.2, "Medium": 0.6, "Far": 0.2 }, "rule_based": { "rule_percentage": 11, "selected_rules": [ "shorten_name_to_initials", "remove_random_vowel" ], "rule_descriptions": { "shorten_name_to_initials": "Convert name to initials", "remove_random_vowel": "Remove a random vowel" }, "percentage": 11 } } }, { "seed_identities_with_labels": [ { "name": "melissa thornton (latin)", "dob": "1975-03-07", "address": "South Africa", "label": "High Risk", "script": "latin" }, { "name": "jim garcia (latin)", "dob": "1928-04-14", "address": "Gibraltar", "label": "negative", "script": "latin" }, { "name": "luc lefèvre (latin)", "dob": "1966-04-11", "address": "Democratic Republic of Congo", "label": "High Risk", "script": "latin" }, { "name": "jacqueline bates (latin)", "dob": "1960-09-26", "address": "Martinique", "label": "negative", "script": "latin" }, { "name": "рубен григорьева (cyrillic)", "dob": "1997-08-16", "address": "Donetsk", "label": "High Risk", "script": "cyrillic" }, { "name": "Герман Белоус (cyrillic)", "dob": "1977-11-14", "address": "Russia", "label": "positive", "script": "cyrillic" }, { "name": "georgina corominas (latin)", "dob": "1940-12-19", "address": "Nicaragua", "label": "High Risk", "script": "latin" }, { "name": "Ilyas Umahanov (latin)", "dob": "1957-3-27", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "олесь ґерета (cyrillic)", "dob": "1951-11-05", "address": "Benin", "label": "negative", "script": "cyrillic" }, { "name": "aaron lawrence (latin)", "dob": "1938-01-04", "address": "Saint Helena", "label": "negative", "script": "latin" }, { "name": "Marianne Houwayek (latin)", "dob": "1980-5-14", "address": "Lebanon", "label": "positive", "script": "latin" }, { "name": "Evgeny NOVITSKIY (latin)", "dob": "1957-11-19", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "noelia luna (latin)", "dob": "1927-11-25", "address": "Venezuela", "label": "High Risk", "script": "latin" }, { "name": "andrea wilkins (latin)", "dob": "1979-05-20", "address": "Bermuda", "label": "negative", "script": "latin" }, { "name": "Zaki Ararawi (latin)", "dob": "1961-5-23", "address": "Turkey", "label": "positive", "script": "latin" } ], "query_template": "The following name is the seed name to generate variations for: {name}. Generate 6 variations of the name {name}, ensuring phonetic similarity: 10% Light, 30% Medium, 60% Far, and orthographic similarity: 70% Light, 30% Medium, and also include 45% of variations that follow: Additionally, generate variations that perform these transformations: Abbreviate name parts, Delete a random letter, and Replace double letters with a single letter. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 6, "phonetic_similarity": { "Light": 0.1, "Medium": 0.3, "Far": 0.6 }, "orthographic_similarity": { "Light": 0.7, "Medium": 0.3 }, "rule_based": { "rule_percentage": 45, "selected_rules": [ "shorten_name_to_abbreviations", "delete_random_letter", "replace_double_letters_with_single_letter" ], "rule_descriptions": { "shorten_name_to_abbreviations": "Abbreviate name parts", "delete_random_letter": "Delete a random letter", "replace_double_letters_with_single_letter": "Replace double letters with a single letter" }, "percentage": 45 } } }, { "seed_identities_with_labels": [ { "name": "راغب الحكم بن سعد العشيرة (arabic)", "dob": "1990-01-16", "address": "Czech Republic", "label": "negative", "script": "arabic" }, { "name": "ريتال الخلفاوي (arabic)", "dob": "1945-02-11", "address": "Algeria", "label": "High Risk", "script": "arabic" }, { "name": "carlos alonso (latin)", "dob": "1980-10-09", "address": "Bolivia", "label": "High Risk", "script": "latin" }, { "name": "william hayes (latin)", "dob": "1946-02-12", "address": "Namibia", "label": "High Risk", "script": "latin" }, { "name": "tara christensen (latin)", "dob": "1953-10-14", "address": "New Zealand", "label": "negative", "script": "latin" }, { "name": "Katerina Pawlowska (latin)", "dob": "1977-3-28", "address": "United Arab Emirates", "label": "positive", "script": "latin" }, { "name": "justin krueger (latin)", "dob": "1957-09-06", "address": "Pitcairn Islands", "label": "negative", "script": "latin" }, { "name": "michelle bouchet (latin)", "dob": "1973-08-27", "address": "Cameroon", "label": "High Risk", "script": "latin" }, { "name": "Hamid Zareikajosangi (latin)", "dob": "1987-5-20", "address": "Iran", "label": "positive", "script": "latin" }, { "name": "robert moore (latin)", "dob": "1942-01-29", "address": "British Virgin Islands", "label": "High Risk", "script": "latin" }, { "name": "In O (latin)", "dob": "1969-7-3", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "محمد آقااحمدی (arabic)", "dob": "1995-3-1", "address": "Iran", "label": "positive", "script": "arabic" }, { "name": "crystal martin (latin)", "dob": "1946-10-23", "address": "Saint Vincent and the Grenadines", "label": "negative", "script": "latin" }, { "name": "kyle garcia (latin)", "dob": "1983-02-26", "address": "Congo", "label": "negative", "script": "latin" }, { "name": "Fadlallah Taher (latin)", "dob": "1963-11-10", "address": "Guinea", "label": "positive", "script": "latin" } ], "query_template": "Generate exactly 8 execution vectors for each target identity {name}, ensuring phonetic similarity (100% Medium) and orthographic similarity (30% Light, 40% Medium, 30% Far). Approximately 58% of the total 8 variations should follow these rule-based transformations: Additionally, generate variations that perform these transformations: Swap adjacent consonants, and Swap random adjacent letters. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 8, "phonetic_similarity": { "Medium": 1.0 }, "orthographic_similarity": { "Light": 0.3, "Medium": 0.4, "Far": 0.3 }, "rule_based": { "rule_percentage": 58, "selected_rules": [ "swap_adjacent_consonants", "swap_random_letter" ], "rule_descriptions": { "swap_adjacent_consonants": "Swap adjacent consonants", "swap_random_letter": "Swap random adjacent letters" }, "percentage": 58 } } }, { "seed_identities_with_labels": [ { "name": "Ahmad Shafa'i (latin)", "dob": "1968-5-21", "address": "Iran", "label": "positive", "script": "latin" }, { "name": "christina davies (latin)", "dob": "1936-02-10", "address": "South Africa", "label": "High Risk", "script": "latin" }, { "name": "پرهام صنایعی (arabic)", "dob": "1970-12-31", "address": "Iran", "label": "High Risk", "script": "arabic" }, { "name": "james adams (latin)", "dob": "1937-06-07", "address": "Sweden", "label": "negative", "script": "latin" }, { "name": "Сергей Тен (cyrillic)", "dob": "1976-8-25", "address": "Russia", "label": "positive", "script": "cyrillic" }, { "name": "dorita castejón (latin)", "dob": "1991-02-24", "address": "Nicaragua", "label": "High Risk", "script": "latin" }, { "name": "باهرة الخزرج (arabic)", "dob": "1949-04-22", "address": "Bahrain", "label": "negative", "script": "arabic" }, { "name": "laura boyd (latin)", "dob": "1988-02-25", "address": "Bouvet Island (Bouvetoya)", "label": "negative", "script": "latin" }, { "name": "Marcelin Makolo (latin)", "dob": "1985-11-30", "address": "Congo, Democratic Republic of the", "label": "positive", "script": "latin" }, { "name": "andrew perez (latin)", "dob": "1973-10-22", "address": "South Africa", "label": "High Risk", "script": "latin" }, { "name": "christopher luna (latin)", "dob": "1977-07-05", "address": "Pakistan", "label": "negative", "script": "latin" }, { "name": "Nikolay Arefyev (latin)", "dob": "1949-3-11", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "julie salazar (latin)", "dob": "1938-05-26", "address": "Sierra Leone", "label": "negative", "script": "latin" }, { "name": "brenda lopez (latin)", "dob": "1928-08-30", "address": "South Sudan", "label": "High Risk", "script": "latin" }, { "name": "German Belous (latin)", "dob": "1977-11-14", "address": "Russia", "label": "positive", "script": "latin" } ], "query_template": "Generate 15 variations of {name}, ensuring phonetic similarity (10% Light, 30% Medium, 60% Far) and orthographic similarity (30% Light, 40% Medium, 30% Far). Approximately 49% of the total 15 variations should follow these rule-based transformations: Additionally, generate variations that perform these transformations: Swap adjacent syllables, Delete a random letter, and Add a title suffix (Jr., PhD, etc.). The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 15, "phonetic_similarity": { "Light": 0.1, "Medium": 0.3, "Far": 0.6 }, "orthographic_similarity": { "Light": 0.3, "Medium": 0.4, "Far": 0.3 }, "rule_based": { "rule_percentage": 49, "selected_rules": [ "swap_adjacent_syllables", "delete_random_letter", "add_random_trailing_title" ], "rule_descriptions": { "swap_adjacent_syllables": "Swap adjacent syllables", "delete_random_letter": "Delete a random letter", "add_random_trailing_title": "Add a title suffix (Jr., PhD, etc.)" }, "percentage": 49 } } }, { "seed_identities_with_labels": [ { "name": "Tarandzhit Kambo (latin)", "dob": "1976-3-29", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "Aleksandr Ganov (latin)", "dob": "1974-10-24", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "pauline rocher (latin)", "dob": "1996-05-07", "address": "Kirghizistan", "label": "negative", "script": "latin" }, { "name": "alphonse duhamel (latin)", "dob": "1981-02-03", "address": "Argentine", "label": "negative", "script": "latin" }, { "name": "hugues delattre (latin)", "dob": "1995-12-03", "address": "Israël", "label": "negative", "script": "latin" }, { "name": "مُعتز المهرة (arabic)", "dob": "1995-12-28", "address": "Yemen", "label": "High Risk", "script": "arabic" }, { "name": "isabelle lacroix (latin)", "dob": "1957-05-14", "address": "Haiti", "label": "High Risk", "script": "latin" }, { "name": "stéphanie girard (latin)", "dob": "1980-07-14", "address": "Suède", "label": "negative", "script": "latin" }, { "name": "sarah collins (latin)", "dob": "1985-09-30", "address": "Namibia", "label": "High Risk", "script": "latin" }, { "name": "Алексей Мордашов (cyrillic)", "dob": "1965-9-26", "address": "Russia", "label": "positive", "script": "cyrillic" }, { "name": "émilie moreno (latin)", "dob": "1940-11-18", "address": "Haiti", "label": "High Risk", "script": "latin" }, { "name": "Ihor Rotenberg (latin)", "dob": "1973-5-9", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "luna costa (latin)", "dob": "1998-10-24", "address": "Angola", "label": "High Risk", "script": "latin" }, { "name": "иваничка количков (cyrillic)", "dob": "1979-11-17", "address": "Netherlands Antilles", "label": "negative", "script": "cyrillic" }, { "name": "Chapito ISIDRO (latin)", "dob": "1982-6-19", "address": "Mexico", "label": "positive", "script": "latin" } ], "query_template": "Generate 15 name variations for {name}, ensuring phonetic similarity (10% Light, \"sound-alike name\", 30% Medium, \"similar sound but not exactly\", 60% Far) and orthographic similarity (20% Light, \"visually similar but not exact spelling\", 60% Medium, \"almost identical spelling\", 20% Far). Approximately 46% of the total 15 variations should follow these rule-based transformations: Additionally, generate variations that: Swap adjacent consonants, to create name variations such as {name} -> \"{adjacent consonant swap}\". The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 15, "phonetic_similarity": { "Light": 0.1, "Medium": 0.3, "Far": 0.6 }, "orthographic_similarity": { "Light": 0.2, "Medium": 0.6, "Far": 0.2 }, "rule_based": { "rule_percentage": 46, "selected_rules": [ "swap_adjacent_consonants" ], "rule_descriptions": { "swap_adjacent_consonants": "Swap adjacent consonants" }, "percentage": 46 } } }, { "seed_identities_with_labels": [ { "name": "gabrielle leconte (latin)", "dob": "1936-07-09", "address": "Burkina Faso", "label": "High Risk", "script": "latin" }, { "name": "Aysen Nikolayev (latin)", "dob": "1972-1-22", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "emelina cáceres (latin)", "dob": "1979-02-07", "address": "Nicaragua", "label": "High Risk", "script": "latin" }, { "name": "Myo THITSAR (latin)", "dob": "1972-11-24", "address": "Burma", "label": "positive", "script": "latin" }, { "name": "joel guzmán (latin)", "dob": "1972-06-18", "address": "Egipto", "label": "negative", "script": "latin" }, { "name": "Maxim Reshetnikov (latin)", "dob": "1979-7-11", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "herberto ortuño (latin)", "dob": "1984-10-06", "address": "Venezuela", "label": "High Risk", "script": "latin" }, { "name": "Abu-Ahmad Zakkur (latin)", "dob": "1979-1-5", "address": "Turkey", "label": "positive", "script": "latin" }, { "name": "واثق آل عايض (arabic)", "dob": "1932-12-26", "address": "Benin", "label": "negative", "script": "arabic" }, { "name": "inocencio lumbreras (latin)", "dob": "1987-04-25", "address": "Islas Marshall", "label": "negative", "script": "latin" }, { "name": "octavio gomis (latin)", "dob": "1930-01-07", "address": "Lesotho", "label": "negative", "script": "latin" }, { "name": "كاملة تنوخ (arabic)", "dob": "1983-11-19", "address": "Lebanon", "label": "High Risk", "script": "arabic" }, { "name": "natanael paredes (latin)", "dob": "1938-03-21", "address": "Azerbaiyán", "label": "negative", "script": "latin" }, { "name": "Сергей Гордеев (cyrillic)", "dob": "1972-11-22", "address": "Russia", "label": "positive", "script": "cyrillic" }, { "name": "julie martineau (latin)", "dob": "1965-02-05", "address": "Burkina Faso", "label": "High Risk", "script": "latin" } ], "query_template": "Generate 14 name variations for {name}, ensuring phonetic similarity (10% Light, 50% Medium, 40% Far) and orthographic similarity (50% Light, 50% Medium). Approximately 43% of the total 14 variations should follow these rule-based transformations: Additionally, generate variations that perform these transformations: Swap adjacent consonants, Replace spaces with special characters, and Insert a random letter. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 14, "phonetic_similarity": { "Light": 0.1, "Medium": 0.5, "Far": 0.4 }, "orthographic_similarity": { "Light": 0.5, "Medium": 0.5 }, "rule_based": { "rule_percentage": 43, "selected_rules": [ "swap_adjacent_consonants", "replace_spaces_with_random_special_characters", "insert_random_letter" ], "rule_descriptions": { "swap_adjacent_consonants": "Swap adjacent consonants", "replace_spaces_with_random_special_characters": "Replace spaces with special characters", "insert_random_letter": "Insert a random letter" }, "percentage": 43 } } }, { "seed_identities_with_labels": [ { "name": "camille dumont (latin)", "dob": "1978-12-14", "address": "Saint Pierre et Miquelon", "label": "negative", "script": "latin" }, { "name": "Abulghasem Valagohar (latin)", "dob": "1969-8-15", "address": "Iran", "label": "positive", "script": "latin" }, { "name": "Дмитрий Киселев (cyrillic)", "dob": "1954-4-26", "address": "Russia", "label": "positive", "script": "cyrillic" }, { "name": "Abbas-Ali Mohammadian (latin)", "dob": "1964-2-20", "address": "Iran", "label": "positive", "script": "latin" }, { "name": "аркадий бирюков (cyrillic)", "dob": "1959-01-24", "address": "Armenia", "label": "High Risk", "script": "cyrillic" }, { "name": "christiane ferrand (latin)", "dob": "1969-07-06", "address": "Haiti", "label": "High Risk", "script": "latin" }, { "name": "laurent collin (latin)", "dob": "1944-02-18", "address": "Albanie", "label": "negative", "script": "latin" }, { "name": "maryse regnier (latin)", "dob": "1926-06-11", "address": "Maurice", "label": "negative", "script": "latin" }, { "name": "نصر الدّين الشايع (arabic)", "dob": "1974-03-09", "address": "Niger", "label": "negative", "script": "arabic" }, { "name": "Neli Parutenco (latin)", "dob": "1962-3-21", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "pilar mendes (latin)", "dob": "1989-12-22", "address": "Angola", "label": "High Risk", "script": "latin" }, { "name": "francisco castro (latin)", "dob": "1952-09-30", "address": "Mozambique", "label": "High Risk", "script": "latin" }, { "name": "victor seguin (latin)", "dob": "1963-09-04", "address": "Argentine", "label": "negative", "script": "latin" }, { "name": "Zaki Ararawi (latin)", "dob": "1961-5-23", "address": "Turkey", "label": "positive", "script": "latin" }, { "name": "étienne fouquet (latin)", "dob": "1938-04-19", "address": "Mali", "label": "High Risk", "script": "latin" } ], "query_template": "Generate 10 execution vectors for {name}, ensuring phonetic similarity with the following distribution: Light (30%), Medium (40%), Far (30%). Approximately 14% of the total variations should follow these rule-based transformations: Add a title prefix, and Swap random adjacent letters. The remaining 86% of the variations should be generated as follows:\n\nFor phonetic similarity:\n- Generate 3 variations with Light similarity\n- Generate 4 variations with Medium similarity\n- Generate 3 variations with Far similarity\n\nFor orthographic similarity:\n- Generate 3 variations with Light similarity\n- Generate 4 variations with Medium similarity\n- Generate 3 variations with Far similarity The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 10, "phonetic_similarity": { "Light": 0.3, "Medium": 0.4, "Far": 0.3 }, "orthographic_similarity": { "Light": 0.3, "Medium": 0.4, "Far": 0.3 }, "rule_based": { "rule_percentage": 14, "selected_rules": [ "add_random_leading_title", "swap_random_letter" ], "rule_descriptions": { "add_random_leading_title": "Add a title prefix (Mr., Dr., etc.)", "swap_random_letter": "Swap random adjacent letters" }, "percentage": 14 } } }, { "seed_identities_with_labels": [ { "name": "Andrei Burov (latin)", "dob": "1971-11-30", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "وسيل العارف (arabic)", "dob": "1944-11-30", "address": "Somalia", "label": "High Risk", "script": "arabic" }, { "name": "ياسر العقيل (arabic)", "dob": "1932-02-23", "address": "Bahamas", "label": "negative", "script": "arabic" }, { "name": "Dirk Troendle (latin)", "dob": "1956-3-11", "address": "Germany", "label": "positive", "script": "latin" }, { "name": "matthieu moreno (latin)", "dob": "1954-05-27", "address": "Côte d'Ivoire", "label": "High Risk", "script": "latin" }, { "name": "michael powell (latin)", "dob": "1969-03-14", "address": "Kiribati", "label": "negative", "script": "latin" }, { "name": "philippe launay (latin)", "dob": "1944-06-03", "address": "Côte d'Ivoire", "label": "High Risk", "script": "latin" }, { "name": "Тимур Каноков (cyrillic)", "dob": "1972-9-24", "address": "Russia", "label": "positive", "script": "cyrillic" }, { "name": "chad jones (latin)", "dob": "1996-06-09", "address": "South Africa", "label": "High Risk", "script": "latin" }, { "name": "david hampton (latin)", "dob": "1959-12-14", "address": "Paraguay", "label": "negative", "script": "latin" }, { "name": "gilles caron (latin)", "dob": "1932-09-03", "address": "Monaco", "label": "High Risk", "script": "latin" }, { "name": "gregory mendoza (latin)", "dob": "1972-02-19", "address": "New Caledonia", "label": "negative", "script": "latin" }, { "name": "david casey (latin)", "dob": "1930-04-05", "address": "Norfolk Island", "label": "negative", "script": "latin" }, { "name": "Rafi Udin (latin)", "dob": "1966-6-3", "address": "Syria", "label": "positive", "script": "latin" }, { "name": "Aleksey Chumakov (latin)", "dob": "1974-5-6", "address": "Russia", "label": "positive", "script": "latin" } ], "query_template": "The following name is the seed name to generate variations for: {name}. Generate 8 variations of the name {name}, ensuring phonetic similarity: 30% Light, 40% Medium, 30% Far, and orthographic similarity: 70% Light, 30% Medium, and also include 25% of variations that follow: Additionally, generate variations that perform these transformations: Add a title prefix (Mr., Dr., etc.), Remove a random vowel, and Convert name to initials. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 8, "phonetic_similarity": { "Light": 0.3, "Medium": 0.4, "Far": 0.3 }, "orthographic_similarity": { "Light": 0.7, "Medium": 0.3 }, "rule_based": { "rule_percentage": 25, "selected_rules": [ "add_random_leading_title", "remove_random_vowel", "shorten_name_to_initials" ], "rule_descriptions": { "add_random_leading_title": "Add a title prefix (Mr., Dr., etc.)", "remove_random_vowel": "Remove a random vowel", "shorten_name_to_initials": "Convert name to initials" }, "percentage": 25 } } }, { "seed_identities_with_labels": [ { "name": "Amin Shary (latin)", "dob": "1957-8-2", "address": "Lebanon", "label": "positive", "script": "latin" }, { "name": "ema fonseca (latin)", "dob": "1976-12-08", "address": "Angola", "label": "High Risk", "script": "latin" }, { "name": "Роман Нечипорук (cyrillic)", "dob": "1980-2-21", "address": "Russia", "label": "positive", "script": "cyrillic" }, { "name": "леко недялков (cyrillic)", "dob": "1975-12-14", "address": "Bulgaria", "label": "High Risk", "script": "cyrillic" }, { "name": "Igor KORNET (latin)", "dob": "1973-4-29", "address": "Ukraine", "label": "positive", "script": "latin" }, { "name": "arnaude paul (latin)", "dob": "1941-12-10", "address": "Burkina Faso", "label": "High Risk", "script": "latin" }, { "name": "silvio rubio (latin)", "dob": "1935-06-18", "address": "Nicaragua", "label": "High Risk", "script": "latin" }, { "name": "nilo arroyo (latin)", "dob": "1958-02-09", "address": "Guatemala", "label": "negative", "script": "latin" }, { "name": "Elias AL-KASHMIRI (latin)", "dob": "1964-2-10", "address": "Pakistan", "label": "positive", "script": "latin" }, { "name": "عارف الجابر (arabic)", "dob": "1995-06-22", "address": "Cambodia", "label": "negative", "script": "arabic" }, { "name": "nélida bartolomé (latin)", "dob": "1998-10-17", "address": "España", "label": "negative", "script": "latin" }, { "name": "leire folch (latin)", "dob": "2002-09-27", "address": "Italia", "label": "negative", "script": "latin" }, { "name": "evelia díaz (latin)", "dob": "2003-05-21", "address": "Saint Kitts y Nevis", "label": "negative", "script": "latin" }, { "name": "dionisio barberá (latin)", "dob": "1980-07-06", "address": "Venezuela", "label": "High Risk", "script": "latin" }, { "name": "Ch'un-hwan RI (latin)", "dob": "1957-8-21", "address": "China", "label": "positive", "script": "latin" } ], "query_template": "Generate 6 variations of {name}, ensuring phonetic similarity (10% variation with similar sound but lighter pronunciation, 30% variation with similar sound and medium pronunciation, 60% variation with far-off sound) and orthographic similarity (70% variation with visually similar spelling but light differences, 30% variation with visually similar spelling but medium differences). Approximately 11% of the total 6 variations should follow these rule-based transformations: Additionally, generate variations that: \n1. Remove first letter\n2. Remove last letter\n3. Add suffix '-ov' or '-in'\n4. Replace 'a' with 'e'\n5. Replace 'o' with 'u'\n6. Swap adjacent consonants. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 6, "phonetic_similarity": { "Light": 0.1, "Medium": 0.3, "Far": 0.6 }, "orthographic_similarity": { "Light": 0.7, "Medium": 0.3 }, "rule_based": { "rule_percentage": 11, "selected_rules": [ "swap_adjacent_consonants" ], "rule_descriptions": { "swap_adjacent_consonants": "Swap adjacent consonants" }, "percentage": 11 } } }, { "seed_identities_with_labels": [ { "name": "Alexander Demin (latin)", "dob": "1988-9-23", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "Адальби Шхагошев (cyrillic)", "dob": "1967-6-6", "address": "Russia", "label": "positive", "script": "cyrillic" }, { "name": "charles molina (latin)", "dob": "1960-07-02", "address": "South Sudan", "label": "High Risk", "script": "latin" }, { "name": "adam oneill (latin)", "dob": "1994-12-14", "address": "British Virgin Islands", "label": "High Risk", "script": "latin" }, { "name": "steven chavez (latin)", "dob": "1957-04-13", "address": "Philippines", "label": "negative", "script": "latin" }, { "name": "Irina Pankina (latin)", "dob": "1986-3-8", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "david sims (latin)", "dob": "1988-03-03", "address": "Costa Rica", "label": "negative", "script": "latin" }, { "name": "وفاء بنو فراس (arabic)", "dob": "1927-08-20", "address": "Somalia", "label": "High Risk", "script": "arabic" }, { "name": "harold stokes (latin)", "dob": "1926-03-03", "address": "Papua New Guinea", "label": "negative", "script": "latin" }, { "name": "chantal charles (latin)", "dob": "1935-07-20", "address": "Cameroon", "label": "High Risk", "script": "latin" }, { "name": "Yuri Shvytkin (latin)", "dob": "1965-5-24", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "christopher tucker (latin)", "dob": "1950-02-23", "address": "British Virgin Islands", "label": "High Risk", "script": "latin" }, { "name": "тотьо мочев (cyrillic)", "dob": "1994-10-25", "address": "Azerbaijan", "label": "negative", "script": "cyrillic" }, { "name": "Dmitry Pleshevskiy (latin)", "dob": "1992-7-30", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "joseph williams (latin)", "dob": "1988-03-09", "address": "Tajikistan", "label": "negative", "script": "latin" } ], "query_template": "The following name is the seed name to generate variations for: {name}. Generate 15 variations of the name {name}, ensuring phonetic similarity: 50% Light, 50% Medium, and orthographic similarity: 50% Light, 50% Medium, and also include 48% of variations that follow: Additionally, generate variations that perform these transformations: Delete a random letter, Convert name to initials, and Swap random adjacent letters. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 15, "phonetic_similarity": { "Light": 0.5, "Medium": 0.5 }, "orthographic_similarity": { "Light": 0.5, "Medium": 0.5 }, "rule_based": { "rule_percentage": 48, "selected_rules": [ "delete_random_letter", "shorten_name_to_initials", "swap_random_letter" ], "rule_descriptions": { "delete_random_letter": "Delete a random letter", "shorten_name_to_initials": "Convert name to initials", "swap_random_letter": "Swap random adjacent letters" }, "percentage": 48 } } }, { "seed_identities_with_labels": [ { "name": "olivier pinto (latin)", "dob": "2004-01-03", "address": "Luxembourg", "label": "negative", "script": "latin" }, { "name": "charles hebert (latin)", "dob": "1967-11-17", "address": "Antilles néerlandaises", "label": "negative", "script": "latin" }, { "name": "christophe moreau (latin)", "dob": "1998-02-12", "address": "Pakistan", "label": "negative", "script": "latin" }, { "name": "ипат суворов (cyrillic)", "dob": "2004-05-01", "address": "Armenia", "label": "High Risk", "script": "cyrillic" }, { "name": "sylvie jourdan (latin)", "dob": "1966-06-28", "address": "Haiti", "label": "High Risk", "script": "latin" }, { "name": "Olimxon Ismailov (latin)", "dob": "1996-10-4", "address": "Uzbekistan", "label": "positive", "script": "latin" }, { "name": "Aleyona Chuguleva (latin)", "dob": "1986-5-14", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "Andrei Tikhonov (latin)", "dob": "1966-11-9", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "Николай Харитонов (cyrillic)", "dob": "1948-10-30", "address": "Russia", "label": "positive", "script": "cyrillic" }, { "name": "Wai-chung Lo (latin)", "dob": "1961-11-19", "address": "Hong Kong", "label": "positive", "script": "latin" }, { "name": "yaiza olivares (latin)", "dob": "1928-05-02", "address": "Venezuela", "label": "High Risk", "script": "latin" }, { "name": "dominique guillot (latin)", "dob": "1936-05-03", "address": "Danemark", "label": "negative", "script": "latin" }, { "name": "aimée le gall (latin)", "dob": "1994-03-04", "address": "Burkina Faso", "label": "High Risk", "script": "latin" }, { "name": "kike aguado (latin)", "dob": "1967-04-06", "address": "Venezuela", "label": "High Risk", "script": "latin" }, { "name": "петрушка клатикрушев (cyrillic)", "dob": "1940-03-21", "address": "Solomon Islands", "label": "negative", "script": "cyrillic" } ], "query_template": "Generate 11 variations of {name} ensuring phonetic similarity (30% Light, 40% Medium, 30% Far) and orthographic similarity (50% Light, 50% Medium). Approximately 10% of the total 11 variations should follow these rule-based transformations: Replace double letters with a single letter. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 11, "phonetic_similarity": { "Light": 0.3, "Medium": 0.4, "Far": 0.3 }, "orthographic_similarity": { "Light": 0.5, "Medium": 0.5 }, "rule_based": { "rule_percentage": 10, "selected_rules": [ "replace_double_letters_with_single_letter" ], "rule_descriptions": { "replace_double_letters_with_single_letter": "Replace double letters with a single letter" }, "percentage": 10 } } }, { "seed_identities_with_labels": [ { "name": "alix guillou (latin)", "dob": "1932-01-16", "address": "Timor", "label": "negative", "script": "latin" }, { "name": "Aleksandr Kurenkov (latin)", "dob": "1972-6-2", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "aimé perrot (latin)", "dob": "1957-04-22", "address": "Burkina Faso", "label": "High Risk", "script": "latin" }, { "name": "روعة سليم (arabic)", "dob": "1961-04-19", "address": "Sudan", "label": "High Risk", "script": "arabic" }, { "name": "Mu Xiaolu (latin)", "dob": "1990-3-8", "address": "China", "label": "positive", "script": "latin" }, { "name": "Галина Пятых (cyrillic)", "dob": "1970-5-12", "address": "Russia", "label": "positive", "script": "cyrillic" }, { "name": "olivier hubert (latin)", "dob": "1945-02-03", "address": "Heard et McDonald (Îles)", "label": "negative", "script": "latin" }, { "name": "manuel nogueira (latin)", "dob": "1931-03-05", "address": "Angola", "label": "High Risk", "script": "latin" }, { "name": "anne bodin (latin)", "dob": "1983-12-25", "address": "Democratic Republic of Congo", "label": "High Risk", "script": "latin" }, { "name": "élodie laurent (latin)", "dob": "1955-05-02", "address": "Antilles néerlandaises", "label": "negative", "script": "latin" }, { "name": "Andrey Doukhvalov (latin)", "dob": "1957-12-3", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "всемил одинцов (cyrillic)", "dob": "1965-03-26", "address": "Portugal", "label": "negative", "script": "cyrillic" }, { "name": "jeannine goncalves (latin)", "dob": "1930-08-06", "address": "Jordanie", "label": "negative", "script": "latin" }, { "name": "lucy toussaint (latin)", "dob": "1925-10-09", "address": "Monaco", "label": "High Risk", "script": "latin" }, { "name": "Abu LLONGGO (latin)", "dob": "1972-5-15", "address": "Philippines", "label": "positive", "script": "latin" } ], "query_template": "Generate 9 variations of {name}. Ensuring phonetic similarity (100% Medium) and orthographic similarity (30% Light, 40% Medium, 30% Far). Approximately 38% of the total 9 variations should follow these rule-based transformations: Additionally, generate variations that: Remove a random consonant. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 9, "phonetic_similarity": { "Medium": 1.0 }, "orthographic_similarity": { "Light": 0.3, "Medium": 0.4, "Far": 0.3 }, "rule_based": { "rule_percentage": 38, "selected_rules": [ "remove_random_consonant" ], "rule_descriptions": { "remove_random_consonant": "Remove a random consonant" }, "percentage": 38 } } }, { "seed_identities_with_labels": [ { "name": "مهنّد بيروتي (arabic)", "dob": "1942-06-12", "address": "Sudan", "label": "High Risk", "script": "arabic" }, { "name": "irene patiño (latin)", "dob": "2000-05-30", "address": "Túnez", "label": "negative", "script": "latin" }, { "name": "sarah bates (latin)", "dob": "1977-05-16", "address": "Kenya", "label": "High Risk", "script": "latin" }, { "name": "هلیا شاکری (arabic)", "dob": "1975-07-24", "address": "Ukraine", "label": "negative", "script": "arabic" }, { "name": "cruz barrena (latin)", "dob": "1925-06-06", "address": "Bolivia", "label": "High Risk", "script": "latin" }, { "name": "Leonid MIKHAILIUK (latin)", "dob": "1970-1-1", "address": "Ukraine", "label": "positive", "script": "latin" }, { "name": "tito cantón (latin)", "dob": "1951-06-28", "address": "Canadá", "label": "negative", "script": "latin" }, { "name": "Alexey Russkikh (latin)", "dob": "1968-7-17", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "Ahmad Seyedoshohada (latin)", "dob": "1959-4-27", "address": "Iran", "label": "positive", "script": "latin" }, { "name": "yves louis (latin)", "dob": "1973-07-13", "address": "Cameroon", "label": "High Risk", "script": "latin" }, { "name": "michelle gilbert (latin)", "dob": "1947-10-23", "address": "Haiti", "label": "High Risk", "script": "latin" }, { "name": "micaela pelayo (latin)", "dob": "1931-01-08", "address": "Kiribati", "label": "negative", "script": "latin" }, { "name": "Юрий Чайка (cyrillic)", "dob": "1951-5-21", "address": "Russia", "label": "positive", "script": "cyrillic" }, { "name": "eli rivero (latin)", "dob": "1951-09-13", "address": "Chad", "label": "negative", "script": "latin" }, { "name": "Alexey Sukhodolov (latin)", "dob": "1974-4-19", "address": "Russia", "label": "positive", "script": "latin" } ], "query_template": "Generate exactly 7 execution vectors ({name} variations) for each target identity. Ensure phonetic similarity by generating 0.7 variation where {name} sounds like a word 10% of the time, 3 variations where it sounds somewhat similar but not as much (50%), and 3.3 variations where it sounds very different (40%). Additionally, ensure orthographic similarity by generating 2.1 variations where {name} is visually similar in spelling 30% of the time, 2.8 variations where it's somewhat similar but not as much (40%), and 2 variations where it looks very different (30%). Approximately 49% of the total 7 variations should follow these rule-based transformations: Additionally, generate variations that swap random adjacent letters.\n[VALIDATION HINTS]: Phonetic similarity: 10% Light.; Orthographic similarity: 30% Light. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 7, "phonetic_similarity": { "Light": 0.1, "Medium": 0.5, "Far": 0.4 }, "orthographic_similarity": { "Light": 0.3, "Medium": 0.4, "Far": 0.3 }, "rule_based": { "rule_percentage": 49, "selected_rules": [ "swap_random_letter" ], "rule_descriptions": { "swap_random_letter": "Swap random adjacent letters" }, "percentage": 49 } } }, { "seed_identities_with_labels": [ { "name": "Ramezan Oladi (latin)", "dob": "1963-5-29", "address": "Iran", "label": "positive", "script": "latin" }, { "name": "Олег Белозёров (cyrillic)", "dob": "1969-9-26", "address": "Russia", "label": "positive", "script": "cyrillic" }, { "name": "wesley harris (latin)", "dob": "1929-05-23", "address": "Croatia", "label": "negative", "script": "latin" }, { "name": "Ch'o'l-man Han (latin)", "dob": "1978-5-6", "address": "China", "label": "positive", "script": "latin" }, { "name": "diane techer (latin)", "dob": "1972-04-30", "address": "Democratic Republic of Congo", "label": "High Risk", "script": "latin" }, { "name": "teresa bradley (latin)", "dob": "2004-08-28", "address": "Tuvalu", "label": "negative", "script": "latin" }, { "name": "michelle wright (latin)", "dob": "1980-01-04", "address": "Finland", "label": "negative", "script": "latin" }, { "name": "alex atkins (latin)", "dob": "1928-10-27", "address": "Nigeria", "label": "High Risk", "script": "latin" }, { "name": "felicia becker (latin)", "dob": "1951-09-16", "address": "Cyprus", "label": "negative", "script": "latin" }, { "name": "صامد النجار (arabic)", "dob": "1927-08-02", "address": "Barbados", "label": "negative", "script": "arabic" }, { "name": "jared woods (latin)", "dob": "1968-11-14", "address": "South Sudan", "label": "High Risk", "script": "latin" }, { "name": "eduarda maia (latin)", "dob": "1961-06-29", "address": "Angola", "label": "High Risk", "script": "latin" }, { "name": "Zelimir Petrovic (latin)", "dob": "1981-9-1", "address": "Serbia", "label": "positive", "script": "latin" }, { "name": "محمدیاسین نعمتی (arabic)", "dob": "2005-01-07", "address": "Afghanistan", "label": "High Risk", "script": "arabic" }, { "name": "Marko Svorcan (latin)", "dob": "1967-5-7", "address": "Serbia", "label": "positive", "script": "latin" } ], "query_template": "Generate 7 variations of {name}, ensuring phonetic similarity (30% Light, 40% Medium, 30% Far) and orthographic similarity (30% Light, 40% Medium, 30% Far). Approximately 36% of the total 7 variations should follow these rule-based transformations: \n\nGenerate 2.56 variations using \"Remove all spaces\" transformation\nGenerate 2.88 variations using \"Delete a random letter\" transformation\nGenerate 2.54 variations using \"Remove a random consonant\" transformation\n\nAdditionally, generate variations that perform these transformations: Remove all spaces, Delete a random letter, and Remove a random consonant.. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 7, "phonetic_similarity": { "Light": 0.3, "Medium": 0.4, "Far": 0.3 }, "orthographic_similarity": { "Light": 0.3, "Medium": 0.4, "Far": 0.3 }, "rule_based": { "rule_percentage": 36, "selected_rules": [ "remove_all_spaces", "delete_random_letter", "remove_random_consonant" ], "rule_descriptions": { "remove_all_spaces": "Remove all spaces", "delete_random_letter": "Delete a random letter", "remove_random_consonant": "Remove a random consonant" }, "percentage": 36 } } }, { "seed_identities_with_labels": [ { "name": "Aleksey Russkikh (latin)", "dob": "1968-7-17", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "jessica lawrence (latin)", "dob": "1998-01-04", "address": "Saint Vincent and the Grenadines", "label": "negative", "script": "latin" }, { "name": "ناصر نهد (arabic)", "dob": "1967-12-15", "address": "Kiribati", "label": "negative", "script": "arabic" }, { "name": "وسيل مطير (arabic)", "dob": "1992-10-15", "address": "Algeria", "label": "High Risk", "script": "arabic" }, { "name": "daniel reid (latin)", "dob": "1959-07-29", "address": "Burundi", "label": "negative", "script": "latin" }, { "name": "amanda may (latin)", "dob": "2001-03-09", "address": "Kenya", "label": "High Risk", "script": "latin" }, { "name": "Андрей Рыбаков (cyrillic)", "dob": "1976-7-11", "address": "Belarus", "label": "positive", "script": "cyrillic" }, { "name": "natalie ross (latin)", "dob": "1926-06-27", "address": "Uruguay", "label": "negative", "script": "latin" }, { "name": "jessica pratt (latin)", "dob": "1948-05-24", "address": "South Sudan", "label": "High Risk", "script": "latin" }, { "name": "Ahmad Shafa'i (latin)", "dob": "1968-5-21", "address": "Iran", "label": "positive", "script": "latin" }, { "name": "Hossein Akbari (latin)", "dob": "1961-6-17", "address": "Iran", "label": "positive", "script": "latin" }, { "name": "françoise roche (latin)", "dob": "1961-06-12", "address": "Mali", "label": "High Risk", "script": "latin" }, { "name": "inès bailly (latin)", "dob": "1987-03-12", "address": "Haiti", "label": "High Risk", "script": "latin" }, { "name": "Elena Yampolskaya (latin)", "dob": "1971-6-20", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "karen white (latin)", "dob": "1985-02-28", "address": "India", "label": "negative", "script": "latin" } ], "query_template": "Generate 9 variations of {name} ensuring phonetic similarity (30% Light, 40% Medium, 30% Far) and orthographic similarity (10% Light, 30% Medium, 60% Far). Approximately 56% of the total 9 variations should follow these rule-based transformations: Remove vowels from first name, remove vowels from last name, append \"son\" or \"daughter\" to end of full name, prepend \"Mr.\" or \"Ms.\" to beginning of full name, swap first and last names, replace all digits in name with letters (e.g. 123 becomes OneTwoThree), change all spaces to underscores, reorder middle name(s) to begin at the end, and/or reorder name parts. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 9, "phonetic_similarity": { "Light": 0.3, "Medium": 0.4, "Far": 0.3 }, "orthographic_similarity": { "Light": 0.1, "Medium": 0.3, "Far": 0.6 }, "rule_based": { "rule_percentage": 56, "selected_rules": [ "name_parts_permutations" ], "rule_descriptions": { "name_parts_permutations": "Reorder name parts" }, "percentage": 56 } } }, { "seed_identities_with_labels": [ { "name": "bruce bright (latin)", "dob": "1946-12-28", "address": "Luxembourg", "label": "negative", "script": "latin" }, { "name": "william yates (latin)", "dob": "1959-03-08", "address": "Australia", "label": "negative", "script": "latin" }, { "name": "Yuri Karayev (latin)", "dob": "1966-6-21", "address": "Belarus", "label": "positive", "script": "latin" }, { "name": "نور الكالوتي (arabic)", "dob": "1931-03-18", "address": "Western Sahara", "label": "negative", "script": "arabic" }, { "name": "reina mena (latin)", "dob": "1952-07-14", "address": "Nicaragua", "label": "High Risk", "script": "latin" }, { "name": "david mcknight (latin)", "dob": "1987-07-05", "address": "Morocco", "label": "negative", "script": "latin" }, { "name": "Александap Добрић (cyrillic)", "dob": "1991-7-15", "address": "Bosnia and Herzegovina", "label": "positive", "script": "cyrillic" }, { "name": "Banyar Moe (latin)", "dob": "1947-8-14", "address": "Burma", "label": "positive", "script": "latin" }, { "name": "ângelo valente (latin)", "dob": "1956-03-05", "address": "Angola", "label": "High Risk", "script": "latin" }, { "name": "mary weeks (latin)", "dob": "1989-05-12", "address": "Namibia", "label": "High Risk", "script": "latin" }, { "name": "ciríaco perales (latin)", "dob": "1939-12-30", "address": "Bolivia", "label": "High Risk", "script": "latin" }, { "name": "феврония семенова (cyrillic)", "dob": "1991-03-27", "address": "Armenia", "label": "High Risk", "script": "cyrillic" }, { "name": "tracy jenkins (latin)", "dob": "1948-07-12", "address": "Grenada", "label": "negative", "script": "latin" }, { "name": "Madhy Akil (latin)", "dob": "1987-10-30", "address": "Colombia", "label": "positive", "script": "latin" }, { "name": "Aman Abdurahman (latin)", "dob": "1972-1-5", "address": "Indonesia", "label": "positive", "script": "latin" } ], "query_template": "The following name is the seed name to generate variations for: {name}. Generate 14 variations of the name {name}, ensuring phonetic similarity: 20% Light, 60% Medium, 20% Far, and orthographic similarity: 20% Light, 60% Medium, 20% Far, and also include 59% of variations that follow: Additionally, generate variations that perform these transformations: Swap adjacent consonants, Add a title prefix (Mr., Dr., etc.), and Replace random consonants with different consonants. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 14, "phonetic_similarity": { "Light": 0.2, "Medium": 0.6, "Far": 0.2 }, "orthographic_similarity": { "Light": 0.2, "Medium": 0.6, "Far": 0.2 }, "rule_based": { "rule_percentage": 59, "selected_rules": [ "swap_adjacent_consonants", "add_random_leading_title", "replace_random_consonant_with_random_consonant" ], "rule_descriptions": { "swap_adjacent_consonants": "Swap adjacent consonants", "add_random_leading_title": "Add a title prefix (Mr., Dr., etc.)", "replace_random_consonant_with_random_consonant": "Replace random consonants with different consonants" }, "percentage": 59 } } }, { "seed_identities_with_labels": [ { "name": "Waad Qado (latin)", "dob": "1971-1-1", "address": "Iraq", "label": "positive", "script": "latin" }, { "name": "mario asenjo (latin)", "dob": "1956-12-19", "address": "Santa Lucía", "label": "negative", "script": "latin" }, { "name": "галактион фомина (cyrillic)", "dob": "2000-05-05", "address": "Crimea", "label": "High Risk", "script": "cyrillic" }, { "name": "Александр Бортников (cyrillic)", "dob": "1951-11-15", "address": "Russia", "label": "positive", "script": "cyrillic" }, { "name": "matthew tucker (latin)", "dob": "2004-09-01", "address": "Nigeria", "label": "High Risk", "script": "latin" }, { "name": "Ella Pamfilova (latin)", "dob": "1953-9-12", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "merche salgado (latin)", "dob": "1961-03-20", "address": "Congo", "label": "negative", "script": "latin" }, { "name": "Rafael Bastardo (latin)", "dob": "1978-9-22", "address": "Venezuela", "label": "positive", "script": "latin" }, { "name": "danilo checa (latin)", "dob": "1999-07-24", "address": "Lesotho", "label": "negative", "script": "latin" }, { "name": "ксения романова (cyrillic)", "dob": "1955-04-11", "address": "Saint Martin", "label": "negative", "script": "cyrillic" }, { "name": "haydée atienza (latin)", "dob": "2000-02-22", "address": "Ecuador", "label": "negative", "script": "latin" }, { "name": "lisa parker (latin)", "dob": "1967-02-01", "address": "South Africa", "label": "High Risk", "script": "latin" }, { "name": "luciana posada (latin)", "dob": "1978-08-09", "address": "Venezuela", "label": "High Risk", "script": "latin" }, { "name": "Gholamreza Ardakani (latin)", "dob": "1992-8-20", "address": "Iran", "label": "positive", "script": "latin" }, { "name": "santiago vizcaíno (latin)", "dob": "2002-09-20", "address": "Bolivia", "label": "High Risk", "script": "latin" } ], "query_template": "The following name is the seed name to generate variations for: {name}. Generate 12 variations of the name {name}, ensuring phonetic similarity: 30% Light, 40% Medium, 30% Far, and orthographic similarity: 30% Light, 40% Medium, 30% Far, and also include 16% of variations that follow: Additionally, generate variations that perform these transformations: Remove a random consonant, and Swap random adjacent letters. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 12, "phonetic_similarity": { "Light": 0.3, "Medium": 0.4, "Far": 0.3 }, "orthographic_similarity": { "Light": 0.3, "Medium": 0.4, "Far": 0.3 }, "rule_based": { "rule_percentage": 16, "selected_rules": [ "remove_random_consonant", "swap_random_letter" ], "rule_descriptions": { "remove_random_consonant": "Remove a random consonant", "swap_random_letter": "Swap random adjacent letters" }, "percentage": 16 } } }, { "seed_identities_with_labels": [ { "name": "adriana torres (latin)", "dob": "1941-03-10", "address": "Austrália", "label": "negative", "script": "latin" }, { "name": "Chapito ISIDRO (latin)", "dob": "1982-6-19", "address": "Mexico", "label": "positive", "script": "latin" }, { "name": "jules moreau (latin)", "dob": "1935-05-25", "address": "Burkina Faso", "label": "High Risk", "script": "latin" }, { "name": "رجا سلامة (arabic)", "dob": "1960-8-15", "address": "Lebanon", "label": "positive", "script": "arabic" }, { "name": "robin barnett (latin)", "dob": "1967-06-02", "address": "South Africa", "label": "High Risk", "script": "latin" }, { "name": "valentim mota (latin)", "dob": "1939-01-21", "address": "Jan Mayen", "label": "negative", "script": "latin" }, { "name": "adelia checa (latin)", "dob": "1945-03-15", "address": "Venezuela", "label": "High Risk", "script": "latin" }, { "name": "سائد ألمع (arabic)", "dob": "1938-08-13", "address": "Poland", "label": "negative", "script": "arabic" }, { "name": "paulo borges (latin)", "dob": "1959-03-09", "address": "Comores", "label": "negative", "script": "latin" }, { "name": "ariana gomes (latin)", "dob": "1936-02-18", "address": "Mozambique", "label": "High Risk", "script": "latin" }, { "name": "Esam Ettehadi (latin)", "dob": "1989-7-31", "address": "Iran", "label": "positive", "script": "latin" }, { "name": "ермолай зимин (cyrillic)", "dob": "2004-02-15", "address": "Donetsk", "label": "High Risk", "script": "cyrillic" }, { "name": "isabela nogueira (latin)", "dob": "2001-04-08", "address": "Ilhas Salomão", "label": "negative", "script": "latin" }, { "name": "Muhammad Rahmun (latin)", "dob": "1957-4-1", "address": "Syria", "label": "positive", "script": "latin" }, { "name": "Saad AL-FAKIH (latin)", "dob": "1957-1-31", "address": "United Kingdom", "label": "positive", "script": "latin" } ], "query_template": "Generate exactly 13 execution vectors (name variations) for each target identity {name}, ensuring phonetic similarity of sound-alike names (70% Light, 30% Medium) and orthographic similarity of visually similar spellings (100% Far). Approximately 33% of the total 13 variations should follow these rule-based transformations: Additionally, generate variations that perform these transformations: Add a title prefix (Mr., Dr., etc.), Replace random vowels with different vowels. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 13, "phonetic_similarity": { "Light": 0.7, "Medium": 0.3 }, "orthographic_similarity": { "Far": 1.0 }, "rule_based": { "rule_percentage": 33, "selected_rules": [ "add_random_leading_title", "replace_random_vowel_with_random_vowel" ], "rule_descriptions": { "add_random_leading_title": "Add a title prefix (Mr., Dr., etc.)", "replace_random_vowel_with_random_vowel": "Replace random vowels with different vowels" }, "percentage": 33 } } }, { "seed_identities_with_labels": [ { "name": "margot gauthier (latin)", "dob": "2004-04-13", "address": "Mariannes du Nord (Îles)", "label": "negative", "script": "latin" }, { "name": "Abdelbasit Khair (latin)", "dob": "1955-8-28", "address": "Sudan", "label": "positive", "script": "latin" }, { "name": "nicholas berg (latin)", "dob": "1972-10-25", "address": "Namibia", "label": "High Risk", "script": "latin" }, { "name": "eugène godard (latin)", "dob": "1933-08-01", "address": "Tchad", "label": "negative", "script": "latin" }, { "name": "tamara ingram (latin)", "dob": "1997-10-12", "address": "South Africa", "label": "High Risk", "script": "latin" }, { "name": "Awqad al-Hamidawi (latin)", "dob": "1982-2-3", "address": "Iraq", "label": "positive", "script": "latin" }, { "name": "Алексей Русских (cyrillic)", "dob": "1968-7-17", "address": "Russia", "label": "positive", "script": "cyrillic" }, { "name": "caroline guérin (latin)", "dob": "1947-04-26", "address": "Burkina Faso", "label": "High Risk", "script": "latin" }, { "name": "Waad Qado (latin)", "dob": "1971-1-1", "address": "Iraq", "label": "positive", "script": "latin" }, { "name": "شعلان طقش (arabic)", "dob": "1938-12-01", "address": "Yemen", "label": "High Risk", "script": "arabic" }, { "name": "christophe lelièvre (latin)", "dob": "1929-11-21", "address": "Antilles néerlandaises", "label": "negative", "script": "latin" }, { "name": "marine thierry (latin)", "dob": "1934-03-26", "address": "Burkina Faso", "label": "High Risk", "script": "latin" }, { "name": "ابوالفضل زارعی (arabic)", "dob": "1998-08-08", "address": "Austria", "label": "negative", "script": "arabic" }, { "name": "susanne joly (latin)", "dob": "1990-05-24", "address": "Jordanie", "label": "negative", "script": "latin" }, { "name": "Hussam Al-Qatirji (latin)", "dob": "1982-1-11", "address": "Syria", "label": "positive", "script": "latin" } ], "query_template": "Generate 12 variations of {name}, ensuring phonetic similarity (20% Light, 60% Medium, 20% Far) and orthographic similarity (20% Light, 60% Medium, 20% Far). Approximately 23% of the total 12 variations should follow these rule-based transformations: Additionally, generate variations that: Add a title prefix (Mr., Dr., etc.).. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 12, "phonetic_similarity": { "Light": 0.2, "Medium": 0.6, "Far": 0.2 }, "orthographic_similarity": { "Light": 0.2, "Medium": 0.6, "Far": 0.2 }, "rule_based": { "rule_percentage": 23, "selected_rules": [ "add_random_leading_title" ], "rule_descriptions": { "add_random_leading_title": "Add a title prefix (Mr., Dr., etc.)" }, "percentage": 23 } } }, { "seed_identities_with_labels": [ { "name": "théodore ferreira (latin)", "dob": "1943-05-07", "address": "Zimbabwe", "label": "negative", "script": "latin" }, { "name": "elizabeth williams (latin)", "dob": "1978-03-22", "address": "South Sudan", "label": "High Risk", "script": "latin" }, { "name": "alix ollivier (latin)", "dob": "2000-08-25", "address": "Central African Republic", "label": "High Risk", "script": "latin" }, { "name": "Dragan Dragas (latin)", "dob": "1982-6-20", "address": "Serbia", "label": "positive", "script": "latin" }, { "name": "nidia montero (latin)", "dob": "1939-01-22", "address": "Venezuela", "label": "High Risk", "script": "latin" }, { "name": "gilbert vallée (latin)", "dob": "1983-08-14", "address": "Christmas (Île)", "label": "negative", "script": "latin" }, { "name": "charlotte rousseau (latin)", "dob": "1984-11-01", "address": "Central African Republic", "label": "High Risk", "script": "latin" }, { "name": "andrée bourdon (latin)", "dob": "1991-05-20", "address": "Madagascar", "label": "negative", "script": "latin" }, { "name": "златослава єсипенко (cyrillic)", "dob": "1978-06-27", "address": "Cambodia", "label": "negative", "script": "cyrillic" }, { "name": "ذريع الزماميري (arabic)", "dob": "1949-03-18", "address": "Lebanon", "label": "High Risk", "script": "arabic" }, { "name": "Denis Degtyarenko (latin)", "dob": "1989-10-9", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "Андрей Шевченко (cyrillic)", "dob": "1965-5-29", "address": "Russia", "label": "positive", "script": "cyrillic" }, { "name": "lucas gosselin (latin)", "dob": "1943-07-18", "address": "Kiribati", "label": "negative", "script": "latin" }, { "name": "Artur Parfenchikov (latin)", "dob": "1964-11-29", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "Oleg Tkach (latin)", "dob": "1967-9-23", "address": "Russia", "label": "positive", "script": "latin" } ], "query_template": "Generate exactly 10 execution vectors for {name}, ensuring phonetic similarity (30% Light, 40% Medium, 30% Far) and orthographic similarity (20% Light, 60% Medium, 20% Far). Approximately 50% of the total 10 variations should follow these rule-based transformations: Replace random vowels with different vowels, Remove a random consonant, and Remove all spaces. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 10, "phonetic_similarity": { "Light": 0.3, "Medium": 0.4, "Far": 0.3 }, "orthographic_similarity": { "Light": 0.2, "Medium": 0.6, "Far": 0.2 }, "rule_based": { "rule_percentage": 50, "selected_rules": [ "replace_random_vowel_with_random_vowel", "remove_random_consonant", "remove_all_spaces" ], "rule_descriptions": { "replace_random_vowel_with_random_vowel": "Replace random vowels with different vowels", "remove_random_consonant": "Remove a random consonant", "remove_all_spaces": "Remove all spaces" }, "percentage": 50 } } }, { "seed_identities_with_labels": [ { "name": "Peter Mbaga (latin)", "dob": "1976-9-25", "address": "South Africa", "label": "positive", "script": "latin" }, { "name": "vinicio martín (latin)", "dob": "1991-04-22", "address": "Bolivia", "label": "High Risk", "script": "latin" }, { "name": "shannon williams (latin)", "dob": "1964-06-09", "address": "Hong Kong", "label": "negative", "script": "latin" }, { "name": "adela boada (latin)", "dob": "1991-12-29", "address": "Venezuela", "label": "High Risk", "script": "latin" }, { "name": "nicole mccormick (latin)", "dob": "1960-02-06", "address": "Croatia", "label": "negative", "script": "latin" }, { "name": "Vladimir Pavlenko (latin)", "dob": "1962-4-14", "address": "Ukraine", "label": "positive", "script": "latin" }, { "name": "ظاهر بني عطية (arabic)", "dob": "1971-03-20", "address": "Libya", "label": "High Risk", "script": "arabic" }, { "name": "леонід вдовенко (cyrillic)", "dob": "2000-08-09", "address": "Pakistan", "label": "negative", "script": "cyrillic" }, { "name": "savannah jones (latin)", "dob": "1926-03-17", "address": "Namibia", "label": "High Risk", "script": "latin" }, { "name": "Vladimir Yakushev (latin)", "dob": "1968-6-14", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "sean morgan (latin)", "dob": "1926-02-12", "address": "Sao Tome and Principe", "label": "negative", "script": "latin" }, { "name": "melanie heath (latin)", "dob": "1951-07-26", "address": "Sierra Leone", "label": "negative", "script": "latin" }, { "name": "Сергій Черевко (cyrillic)", "dob": "1975-8-11", "address": "Ukraine", "label": "positive", "script": "cyrillic" }, { "name": "nicholas hopkins (latin)", "dob": "1983-02-08", "address": "Nigeria", "label": "High Risk", "script": "latin" }, { "name": "Oleg Smolin (latin)", "dob": "1952-2-10", "address": "Russia", "label": "positive", "script": "latin" } ], "query_template": "Generate exactly 10 execution vectors (name variations) for {name}, ensuring phonetic similarity (30% Light, e.g. \"Jhone\" instead of \"John\", 40% Medium, e.g. \"Jonah\", 30% Far) and orthographic similarity (10% Light, e.g. \"{name}o\", 30% Medium, e.g. \"{name}ian\", 60% Far). Approximately 43% of the total variations should follow these rule-based transformations: Additionally, generate variations that perform these transformations: Remove all spaces from {name}, Replace double letters in {name} with a single letter, and Replace random consonants in {name} with different consonants. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 10, "phonetic_similarity": { "Light": 0.3, "Medium": 0.4, "Far": 0.3 }, "orthographic_similarity": { "Light": 0.1, "Medium": 0.3, "Far": 0.6 }, "rule_based": { "rule_percentage": 43, "selected_rules": [ "remove_all_spaces", "replace_double_letters_with_single_letter", "replace_random_consonant_with_random_consonant" ], "rule_descriptions": { "remove_all_spaces": "Remove all spaces", "replace_double_letters_with_single_letter": "Replace double letters with a single letter", "replace_random_consonant_with_random_consonant": "Replace random consonants with different consonants" }, "percentage": 43 } } }, { "seed_identities_with_labels": [ { "name": "رجب الامام (arabic)", "dob": "1989-07-27", "address": "Gabon", "label": "negative", "script": "arabic" }, { "name": "caroline thomas (latin)", "dob": "2002-12-27", "address": "Mali", "label": "High Risk", "script": "latin" }, { "name": "édith mendès (latin)", "dob": "1947-01-10", "address": "Argentine", "label": "negative", "script": "latin" }, { "name": "charlotte gérard (latin)", "dob": "1949-09-20", "address": "Cayman (Îles)", "label": "negative", "script": "latin" }, { "name": "Myo Oo (latin)", "dob": "1960-6-23", "address": "Burma", "label": "positive", "script": "latin" }, { "name": "Yury Nabokov (latin)", "dob": "1981-1-1", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "Павел Тараканов (cyrillic)", "dob": "1982-6-21", "address": "Russia", "label": "positive", "script": "cyrillic" }, { "name": "frédérique peron (latin)", "dob": "1991-02-12", "address": "Mali", "label": "High Risk", "script": "latin" }, { "name": "grégoire maillot (latin)", "dob": "1932-05-20", "address": "Dominique", "label": "negative", "script": "latin" }, { "name": "Nelli Parutenko (latin)", "dob": "1962-3-21", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "clémence lebreton (latin)", "dob": "1925-03-28", "address": "Palau", "label": "negative", "script": "latin" }, { "name": "chad vargas (latin)", "dob": "1988-06-01", "address": "South Sudan", "label": "High Risk", "script": "latin" }, { "name": "aurélie bodin (latin)", "dob": "1961-02-18", "address": "Mali", "label": "High Risk", "script": "latin" }, { "name": "Waad Qado (latin)", "dob": "1971-1-1", "address": "Iraq", "label": "positive", "script": "latin" }, { "name": "لوجين الزرقان (arabic)", "dob": "2001-01-15", "address": "Syria", "label": "High Risk", "script": "arabic" } ], "query_template": "Generate 15 variations of {name}. ensuring phonetic similarity (20% Light, 60% Medium, 20% Far) and orthographic similarity (30% Light, 40% Medium, 30% Far). Approximately 27% of the total 15 variations should follow these rule-based transformations: Reorder name parts. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 15, "phonetic_similarity": { "Light": 0.2, "Medium": 0.6, "Far": 0.2 }, "orthographic_similarity": { "Light": 0.3, "Medium": 0.4, "Far": 0.3 }, "rule_based": { "rule_percentage": 27, "selected_rules": [ "name_parts_permutations" ], "rule_descriptions": { "name_parts_permutations": "Reorder name parts" }, "percentage": 27 } } }, { "seed_identities_with_labels": [ { "name": "james ellis (latin)", "dob": "1935-01-26", "address": "Bahamas", "label": "negative", "script": "latin" }, { "name": "Alexander Rakitin (latin)", "dob": "1958-5-17", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "alphonse picard (latin)", "dob": "1985-06-01", "address": "Cameroon", "label": "High Risk", "script": "latin" }, { "name": "christine long (latin)", "dob": "1961-03-20", "address": "Italy", "label": "negative", "script": "latin" }, { "name": "Tun Naing (latin)", "dob": "1963-4-30", "address": "Burma", "label": "positive", "script": "latin" }, { "name": "Mahmoud Baghlani (latin)", "dob": "1978-3-20", "address": "Iran", "label": "positive", "script": "latin" }, { "name": "david garcia (latin)", "dob": "2002-12-05", "address": "British Virgin Islands", "label": "High Risk", "script": "latin" }, { "name": "اسرا جنتی (arabic)", "dob": "1962-09-12", "address": "Ireland", "label": "negative", "script": "arabic" }, { "name": "luce rivière (latin)", "dob": "1989-12-09", "address": "Haiti", "label": "High Risk", "script": "latin" }, { "name": "حنانه نوبختی (arabic)", "dob": "1938-11-25", "address": "Afghanistan", "label": "High Risk", "script": "arabic" }, { "name": "timothy johnson (latin)", "dob": "1972-03-23", "address": "Mauritius", "label": "negative", "script": "latin" }, { "name": "Faouzi CAN'AN (latin)", "dob": "1943-6-7", "address": "Venezuela", "label": "positive", "script": "latin" }, { "name": "alyssa willis (latin)", "dob": "1998-07-27", "address": "Romania", "label": "negative", "script": "latin" }, { "name": "richard pierre (latin)", "dob": "2004-01-19", "address": "Côte d'Ivoire", "label": "High Risk", "script": "latin" }, { "name": "Виктор Игнатов (cyrillic)", "dob": "1968-10-15", "address": "Russia", "label": "positive", "script": "cyrillic" } ], "query_template": "Generate 8 variations of {name}. ensuring phonetic similarity (10% Light, 30% Medium, 60% Far) and orthographic similarity (30% Light, 40% Medium, 30% Far). Approximately 44% of the total 8 variations should follow these rule-based transformations: Remove a random consonant. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 8, "phonetic_similarity": { "Light": 0.1, "Medium": 0.3, "Far": 0.6 }, "orthographic_similarity": { "Light": 0.3, "Medium": 0.4, "Far": 0.3 }, "rule_based": { "rule_percentage": 44, "selected_rules": [ "remove_random_consonant" ], "rule_descriptions": { "remove_random_consonant": "Remove a random consonant" }, "percentage": 44 } } }, { "seed_identities_with_labels": [ { "name": "bernabé aguilar (latin)", "dob": "1995-01-21", "address": "Camerún", "label": "negative", "script": "latin" }, { "name": "جسور القزاز (arabic)", "dob": "1934-06-20", "address": "Lebanon", "label": "High Risk", "script": "arabic" }, { "name": "Sergey Topor-Gilka (latin)", "dob": "1970-2-17", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "雅琴 吴 (chinese)", "dob": "1992-11-13", "address": "China", "label": "positive", "script": "chinese" }, { "name": "concepción becerra (latin)", "dob": "1985-04-08", "address": "Filipinas", "label": "negative", "script": "latin" }, { "name": "perlita llobet (latin)", "dob": "1963-11-19", "address": "Pakistán", "label": "negative", "script": "latin" }, { "name": "joseph neal (latin)", "dob": "1950-12-17", "address": "Namibia", "label": "High Risk", "script": "latin" }, { "name": "демʼян данильчук (cyrillic)", "dob": "1961-03-03", "address": "Colombia", "label": "negative", "script": "cyrillic" }, { "name": "mary barrett (latin)", "dob": "1931-05-08", "address": "Kenya", "label": "High Risk", "script": "latin" }, { "name": "Artyom Verkhov (latin)", "dob": "1986-8-3", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "almudena zabaleta (latin)", "dob": "1979-09-08", "address": "Venezuela", "label": "High Risk", "script": "latin" }, { "name": "Ch'o'l-so'ng Cho (latin)", "dob": "1984-9-25", "address": "China", "label": "positive", "script": "latin" }, { "name": "Andrey Parshin (latin)", "dob": "1974-2-19", "address": "Belarus", "label": "positive", "script": "latin" }, { "name": "manuel falcó (latin)", "dob": "1983-12-07", "address": "Gambia", "label": "negative", "script": "latin" }, { "name": "lucie le roux (latin)", "dob": "1989-12-16", "address": "Central African Republic", "label": "High Risk", "script": "latin" } ], "query_template": "Generate exactly 6 execution vectors for each target identity {name}, ensuring phonetic similarity with 10% Light, 30% Medium, and 60% Far variations. Also ensure orthographic similarity with 70% Light and 30% Medium visually similar spellings. Approximately 50% of the total 6 variations should follow these rule-based transformations: Additionally, generate variations that Reorder name parts. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 6, "phonetic_similarity": { "Light": 0.1, "Medium": 0.3, "Far": 0.6 }, "orthographic_similarity": { "Light": 0.7, "Medium": 0.3 }, "rule_based": { "rule_percentage": 50, "selected_rules": [ "name_parts_permutations" ], "rule_descriptions": { "name_parts_permutations": "Reorder name parts" }, "percentage": 50 } } }, { "seed_identities_with_labels": [ { "name": "véronique brun (latin)", "dob": "1972-10-19", "address": "Côte d'Ivoire", "label": "High Risk", "script": "latin" }, { "name": "Jury Gotsanyuk (latin)", "dob": "1966-7-18", "address": "Ukraine", "label": "positive", "script": "latin" }, { "name": "tracy thompson (latin)", "dob": "1964-01-12", "address": "British Virgin Islands", "label": "High Risk", "script": "latin" }, { "name": "Francois Okunji (latin)", "dob": "1949-7-10", "address": "Congo, Democratic Republic of the", "label": "positive", "script": "latin" }, { "name": "АЛЯКСАНДР ЖАРСКІ (cyrillic)", "dob": "1971-12-14", "address": "Belarus", "label": "positive", "script": "cyrillic" }, { "name": "lori bradley (latin)", "dob": "1987-08-16", "address": "Oman", "label": "negative", "script": "latin" }, { "name": "نیایش صنایعی (arabic)", "dob": "1943-06-25", "address": "Iran", "label": "High Risk", "script": "arabic" }, { "name": "robert edwards (latin)", "dob": "2007-07-18", "address": "Moldova", "label": "negative", "script": "latin" }, { "name": "Touraj Zangeneh (latin)", "dob": "1958-8-5", "address": "Iran", "label": "positive", "script": "latin" }, { "name": "christopher strong (latin)", "dob": "1925-10-07", "address": "Mayotte", "label": "negative", "script": "latin" }, { "name": "Oleksandr Basov (latin)", "dob": "1971-10-16", "address": "Ukraine", "label": "positive", "script": "latin" }, { "name": "مشفق طسم (arabic)", "dob": "2000-02-12", "address": "Trinidad and Tobago", "label": "negative", "script": "arabic" }, { "name": "jonathan vazquez (latin)", "dob": "1941-02-11", "address": "South Sudan", "label": "High Risk", "script": "latin" }, { "name": "maria drake (latin)", "dob": "1999-05-22", "address": "Rwanda", "label": "negative", "script": "latin" }, { "name": "nathaniel lucero (latin)", "dob": "1942-01-07", "address": "Namibia", "label": "High Risk", "script": "latin" } ], "query_template": "Generate 9 variations of {name}, ensuring phonetic similarity (30% Light, e.g. {name} -> Mr.{name}, 40% Medium, e.g. {name} -> McN{name}, 30% Far, e.g. {name} -> McName) and orthographic similarity (100% Far). Approximately 56% of the total 9 variations should follow these rule-based transformations: Additionally, generate variations that perform these transformations: Swap random adjacent letters, e.g. Sm{name} -> Snm{name}, and Replace random vowels with different vowels, e.g. Ma{name} -> Mo{name}. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 9, "phonetic_similarity": { "Light": 0.3, "Medium": 0.4, "Far": 0.3 }, "orthographic_similarity": { "Far": 1.0 }, "rule_based": { "rule_percentage": 56, "selected_rules": [ "swap_random_letter", "replace_random_vowel_with_random_vowel" ], "rule_descriptions": { "swap_random_letter": "Swap random adjacent letters", "replace_random_vowel_with_random_vowel": "Replace random vowels with different vowels" }, "percentage": 56 } } }, { "seed_identities_with_labels": [ { "name": "brittany bradley (latin)", "dob": "1932-12-12", "address": "Namibia", "label": "High Risk", "script": "latin" }, { "name": "sophie maillet (latin)", "dob": "1939-11-29", "address": "États-Unis", "label": "negative", "script": "latin" }, { "name": "Alexander Malkevich (latin)", "dob": "1975-6-14", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "Aleksandr Kurenkov (latin)", "dob": "1972-6-2", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "olivier sanchez (latin)", "dob": "1981-03-29", "address": "Central African Republic", "label": "High Risk", "script": "latin" }, { "name": "فرزدق بديرية (arabic)", "dob": "1986-10-10", "address": "Algeria", "label": "High Risk", "script": "arabic" }, { "name": "صبور حب رمان (arabic)", "dob": "1939-01-11", "address": "Iceland", "label": "negative", "script": "arabic" }, { "name": "olivier nguyen (latin)", "dob": "1947-03-20", "address": "Chypre", "label": "negative", "script": "latin" }, { "name": "Алексей Орлов (cyrillic)", "dob": "1961-10-9", "address": "Russia", "label": "positive", "script": "cyrillic" }, { "name": "hugues peltier (latin)", "dob": "1942-02-28", "address": "Cameroon", "label": "High Risk", "script": "latin" }, { "name": "lucy raymond (latin)", "dob": "1971-04-03", "address": "Burkina Faso", "label": "High Risk", "script": "latin" }, { "name": "Anton Kuzmin (latin)", "dob": "1983-3-8", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "clémence antoine (latin)", "dob": "1950-11-26", "address": "Italie", "label": "negative", "script": "latin" }, { "name": "olivie marques (latin)", "dob": "1967-02-06", "address": "Royaume-Uni", "label": "negative", "script": "latin" }, { "name": "Abu LLONGGO (latin)", "dob": "1972-5-15", "address": "Philippines", "label": "positive", "script": "latin" } ], "query_template": "Generate 9 execution vectors for {name} ensuring phonetic similarity (50% Light variations that are \"ed\", \"in\", removed or added suffixes/prefixes like 'sen', 'son' and 50% Medium variations with letter substitutions like 'a' to 'e') and orthographic similarity (30% Light variations with similar but not identical spellings, 40% Medium variations with one or two character changes, and 30% Far variations with very different spellings). Approximately 30% of the total 9 variations should follow these rule-based transformations: Additionally, generate variations that swap adjacent syllables. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 9, "phonetic_similarity": { "Light": 0.5, "Medium": 0.5 }, "orthographic_similarity": { "Light": 0.3, "Medium": 0.4, "Far": 0.3 }, "rule_based": { "rule_percentage": 30, "selected_rules": [ "swap_adjacent_syllables" ], "rule_descriptions": { "swap_adjacent_syllables": "Swap adjacent syllables" }, "percentage": 30 } } }, { "seed_identities_with_labels": [ { "name": "Andrey Lavrishchev (latin)", "dob": "1959-10-12", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "Михаил Авдеев (cyrillic)", "dob": "1977-3-6", "address": "Russia", "label": "positive", "script": "cyrillic" }, { "name": "charles hoffman (latin)", "dob": "1940-01-04", "address": "Seychelles", "label": "negative", "script": "latin" }, { "name": "curtis wyatt (latin)", "dob": "2003-07-30", "address": "Nigeria", "label": "High Risk", "script": "latin" }, { "name": "Rezaei Mehdi (latin)", "dob": "1976-9-21", "address": "Syria", "label": "positive", "script": "latin" }, { "name": "albert brown (latin)", "dob": "1956-10-27", "address": "Slovakia (Slovak Republic)", "label": "negative", "script": "latin" }, { "name": "Zaki Ararawi (latin)", "dob": "1961-5-23", "address": "Turkey", "label": "positive", "script": "latin" }, { "name": "travis morris (latin)", "dob": "1935-08-12", "address": "British Virgin Islands", "label": "High Risk", "script": "latin" }, { "name": "ерофей виноградов (cyrillic)", "dob": "1997-12-29", "address": "Luhansk", "label": "High Risk", "script": "cyrillic" }, { "name": "النا دایی (arabic)", "dob": "1996-12-16", "address": "Andorra", "label": "negative", "script": "arabic" }, { "name": "laurence dubois (latin)", "dob": "1993-07-27", "address": "Côte d'Ivoire", "label": "High Risk", "script": "latin" }, { "name": "russell knight (latin)", "dob": "1929-07-02", "address": "Uzbekistan", "label": "negative", "script": "latin" }, { "name": "Dzhasharbek Uzdenov (latin)", "dob": "1967-1-25", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "april cruz (latin)", "dob": "1986-03-24", "address": "British Virgin Islands", "label": "High Risk", "script": "latin" }, { "name": "renee crosby (latin)", "dob": "2004-07-14", "address": "Belize", "label": "negative", "script": "latin" } ], "query_template": "Generate 13 variations of {name}, ensuring phonetic similarity (30% Light, 40% Medium, 30% Far) and orthographic similarity (30% Light, 40% Medium, 30% Far). Approximately 19% of the total 13 variations should follow these rule-based transformations: \nUse first name initial with last name. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 13, "phonetic_similarity": { "Light": 0.3, "Medium": 0.4, "Far": 0.3 }, "orthographic_similarity": { "Light": 0.3, "Medium": 0.4, "Far": 0.3 }, "rule_based": { "rule_percentage": 19, "selected_rules": [ "initial_only_first_name" ], "rule_descriptions": { "initial_only_first_name": "Use first name initial with last name" }, "percentage": 19 } } }, { "seed_identities_with_labels": [ { "name": "guiomar osorio (latin)", "dob": "1982-03-15", "address": "Países Bajos", "label": "negative", "script": "latin" }, { "name": "Abdul AL-MAGHREBI (latin)", "dob": "1970-7-1", "address": "Iran", "label": "positive", "script": "latin" }, { "name": "Михаил Тарасенко (cyrillic)", "dob": "1947-11-21", "address": "Russia", "label": "positive", "script": "cyrillic" }, { "name": "'Abdallah AL-JAMAL (latin)", "dob": "1997-2-2", "address": "Yemen", "label": "positive", "script": "latin" }, { "name": "charles rossi (latin)", "dob": "1953-12-29", "address": "Côte d'Ivoire", "label": "High Risk", "script": "latin" }, { "name": "rosendo nicolás (latin)", "dob": "1942-07-22", "address": "Gabón", "label": "negative", "script": "latin" }, { "name": "یاسین پارسا (arabic)", "dob": "1973-11-10", "address": "Afghanistan", "label": "High Risk", "script": "arabic" }, { "name": "Oleg Ryazantsev (latin)", "dob": "1982-4-16", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "auguste rossi (latin)", "dob": "1943-05-11", "address": "Haiti", "label": "High Risk", "script": "latin" }, { "name": "pascuala izaguirre (latin)", "dob": "1998-10-25", "address": "Líbano", "label": "negative", "script": "latin" }, { "name": "sabrina miller (latin)", "dob": "1928-01-07", "address": "Namibia", "label": "High Risk", "script": "latin" }, { "name": "amando molins (latin)", "dob": "1964-06-11", "address": "Bolivia", "label": "High Risk", "script": "latin" }, { "name": "Nikita Samoylenko (latin)", "dob": "1992-8-28", "address": "Ukraine", "label": "positive", "script": "latin" }, { "name": "пано докова (cyrillic)", "dob": "1952-07-11", "address": "Guyana", "label": "negative", "script": "cyrillic" }, { "name": "emperatriz sastre (latin)", "dob": "1954-08-31", "address": "Honduras", "label": "negative", "script": "latin" } ], "query_template": "Generate 14 name variations of {name} ensuring phonetic similarity (10% Light, 30% Medium, 60% Far) and orthographic similarity (20% Light, 60% Medium, 20% Far). Approximately 19% of the total 14 variations should follow these rule-based transformations: Additionally, generate variations that perform these transformations: Replace double letters with a single letter, and Swap adjacent syllables. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 14, "phonetic_similarity": { "Light": 0.1, "Medium": 0.3, "Far": 0.6 }, "orthographic_similarity": { "Light": 0.2, "Medium": 0.6, "Far": 0.2 }, "rule_based": { "rule_percentage": 19, "selected_rules": [ "replace_double_letters_with_single_letter", "swap_adjacent_syllables" ], "rule_descriptions": { "replace_double_letters_with_single_letter": "Replace double letters with a single letter", "swap_adjacent_syllables": "Swap adjacent syllables" }, "percentage": 19 } } }, { "seed_identities_with_labels": [ { "name": "juliette coulon (latin)", "dob": "1948-05-02", "address": "Cameroon", "label": "High Risk", "script": "latin" }, { "name": "камілла кармалюк (cyrillic)", "dob": "1972-07-30", "address": "Jordan", "label": "negative", "script": "cyrillic" }, { "name": "bryan simões (latin)", "dob": "1999-05-24", "address": "Camboja", "label": "negative", "script": "latin" }, { "name": "miguel garcia (latin)", "dob": "1935-03-23", "address": "Suíça", "label": "negative", "script": "latin" }, { "name": "jéssica pacheco (latin)", "dob": "1948-03-06", "address": "Espanha", "label": "negative", "script": "latin" }, { "name": "Abu LLONGGO (latin)", "dob": "1972-5-15", "address": "Philippines", "label": "positive", "script": "latin" }, { "name": "rui fonseca (latin)", "dob": "1952-06-05", "address": "Coreia do Sul", "label": "negative", "script": "latin" }, { "name": "aura adán (latin)", "dob": "1957-09-28", "address": "Venezuela", "label": "High Risk", "script": "latin" }, { "name": "Somboon KRAPOOMPORN (latin)", "dob": "1959-5-6", "address": "Thailand", "label": "positive", "script": "latin" }, { "name": "henriette fernandes (latin)", "dob": "1934-11-10", "address": "Democratic Republic of Congo", "label": "High Risk", "script": "latin" }, { "name": "benedita monteiro (latin)", "dob": "1949-10-31", "address": "Angola", "label": "High Risk", "script": "latin" }, { "name": "Vitaly Likhachev (latin)", "dob": "1964-2-22", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "Alexander Udodov (latin)", "dob": "1969-6-10", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "Юрый Назаранка (cyrillic)", "dob": "1976-4-17", "address": "Belarus", "label": "positive", "script": "cyrillic" }, { "name": "демид трофимова (cyrillic)", "dob": "1930-12-15", "address": "Belarus", "label": "High Risk", "script": "cyrillic" } ], "query_template": "Generate exactly 15 execution vectors (name variations) for each target identity \"{name}\", ensuring phonetic similarity as follows: 70% of the time use Light variation, 30% of the time use Medium variation; for orthographic similarity, all variations should be Medium. Approximately 11% of the total 15 variations should follow these rule-based transformations: Additionally, generate variations that perform these transformations: Replace random vowels with different vowels, Add a title prefix (Mr., Dr., etc.), and Swap random adjacent letters.\n[VALIDATION HINTS]: Orthographic similarity: 100% Medium. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 15, "phonetic_similarity": { "Light": 0.7, "Medium": 0.3 }, "orthographic_similarity": { "Medium": 1.0 }, "rule_based": { "rule_percentage": 11, "selected_rules": [ "replace_random_vowel_with_random_vowel", "add_random_leading_title", "swap_random_letter" ], "rule_descriptions": { "replace_random_vowel_with_random_vowel": "Replace random vowels with different vowels", "add_random_leading_title": "Add a title prefix (Mr., Dr., etc.)", "swap_random_letter": "Swap random adjacent letters" }, "percentage": 11 } } }, { "seed_identities_with_labels": [ { "name": "Zajar KALASHOV (latin)", "dob": "1953-3-20", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "carlos navas (latin)", "dob": "2000-08-18", "address": "Brasil", "label": "negative", "script": "latin" }, { "name": "danilo jove (latin)", "dob": "1987-06-16", "address": "Lituania", "label": "negative", "script": "latin" }, { "name": "marco cobo (latin)", "dob": "1963-06-24", "address": "Ecuador", "label": "negative", "script": "latin" }, { "name": "Muhammad al-'Anizi (latin)", "dob": "1986-5-26", "address": "Kuwait", "label": "positive", "script": "latin" }, { "name": "明明 王 (chinese)", "dob": "1989-3-12", "address": "China", "label": "positive", "script": "chinese" }, { "name": "marisol poza (latin)", "dob": "1942-04-20", "address": "India", "label": "negative", "script": "latin" }, { "name": "alfonso ferrer (latin)", "dob": "1958-11-17", "address": "Venezuela", "label": "High Risk", "script": "latin" }, { "name": "Andre Nyamvumba (latin)", "dob": "1973-1-1", "address": "Rwanda", "label": "positive", "script": "latin" }, { "name": "Min Shi (latin)", "dob": "1979-5-20", "address": "China", "label": "positive", "script": "latin" }, { "name": "бея плюцова (cyrillic)", "dob": "2006-10-15", "address": "Saint Barthelemy", "label": "negative", "script": "cyrillic" }, { "name": "kyle jackson (latin)", "dob": "1939-03-18", "address": "South Africa", "label": "High Risk", "script": "latin" }, { "name": "faustino estevez (latin)", "dob": "1937-05-07", "address": "Cuba", "label": "High Risk", "script": "latin" }, { "name": "petra macedo (latin)", "dob": "1975-12-26", "address": "Angola", "label": "High Risk", "script": "latin" }, { "name": "валентина русаков (cyrillic)", "dob": "1951-05-12", "address": "Russia", "label": "High Risk", "script": "cyrillic" } ], "query_template": "Generate 9 variations of {name} ensuring phonetic similarity (50% Light, 50% Medium) and orthographic similarity (50% Light, 50% Medium). Approximately 22% of the total 9 variations should follow these rule-based transformations: Additionally, generate variations that swap first and last letters, add or remove one letter from random position, replace one letter with a similar sounding letter, replace one letter with a vowel/consonant, change the prefix/suffix, change the title/name, change the middle name/initialed name.\n[VALIDATION HINTS]: Apply these rule-based transformations: Swap random adjacent letters. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 9, "phonetic_similarity": { "Light": 0.5, "Medium": 0.5 }, "orthographic_similarity": { "Light": 0.5, "Medium": 0.5 }, "rule_based": { "rule_percentage": 22, "selected_rules": [ "swap_random_letter" ], "rule_descriptions": { "swap_random_letter": "Swap random adjacent letters" }, "percentage": 22 } } }, { "seed_identities_with_labels": [ { "name": "عبد الواحد الحجار (arabic)", "dob": "1935-07-07", "address": "Montenegro", "label": "negative", "script": "arabic" }, { "name": "Александр Авдеев (cyrillic)", "dob": "1975-12-8", "address": "Russia", "label": "positive", "script": "cyrillic" }, { "name": "ester andrade (latin)", "dob": "1942-06-26", "address": "Bolivia", "label": "High Risk", "script": "latin" }, { "name": "Edgar Sarrias (latin)", "dob": "1976-1-24", "address": "Venezuela", "label": "positive", "script": "latin" }, { "name": "غانم ابو عيد (arabic)", "dob": "1958-09-01", "address": "Iraq", "label": "High Risk", "script": "arabic" }, { "name": "miguel ángel plana (latin)", "dob": "1931-11-27", "address": "Eslovaquia", "label": "negative", "script": "latin" }, { "name": "Jonha Lang (latin)", "dob": "1978-12-15", "address": "China", "label": "positive", "script": "latin" }, { "name": "Yevgeny KHODOTOV (latin)", "dob": "1964-3-21", "address": "Central African Republic", "label": "positive", "script": "latin" }, { "name": "teodosio pedraza (latin)", "dob": "1996-02-15", "address": "Malasia", "label": "negative", "script": "latin" }, { "name": "john fitzpatrick (latin)", "dob": "1925-12-15", "address": "British Virgin Islands", "label": "High Risk", "script": "latin" }, { "name": "ágata monreal (latin)", "dob": "2004-01-13", "address": "Cuba", "label": "High Risk", "script": "latin" }, { "name": "cornelio alfonso (latin)", "dob": "2003-02-07", "address": "Djibouti", "label": "negative", "script": "latin" }, { "name": "haroldo terrón (latin)", "dob": "1958-06-27", "address": "Paraguay", "label": "negative", "script": "latin" }, { "name": "Andrei Tikhonov (latin)", "dob": "1966-11-9", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "louise boyer (latin)", "dob": "1940-11-19", "address": "Democratic Republic of Congo", "label": "High Risk", "script": "latin" } ], "query_template": "Generate 12 name variations for each target identity \"{name}\", ensuring phonetic similarity (20% Light variations follow the pattern \"th{name}\", 60% Medium variations follow the pattern \"z{name}\" or \"{name}ee\", and 20% Far variations follow the pattern \"s{name}h\") and orthographic similarity (100% Far variations follow the pattern \"v{name}\", \"b{name}\", or \"{name}w\"). Approximately 37% of the total 12 variations should follow these rule-based transformations: Additionally, generate variations that perform these transformations: Replace random vowels with different vowels, Replace double letters with a single letter, and Swap random adjacent letters. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 12, "phonetic_similarity": { "Light": 0.2, "Medium": 0.6, "Far": 0.2 }, "orthographic_similarity": { "Far": 1.0 }, "rule_based": { "rule_percentage": 37, "selected_rules": [ "replace_random_vowel_with_random_vowel", "replace_double_letters_with_single_letter", "swap_random_letter" ], "rule_descriptions": { "replace_random_vowel_with_random_vowel": "Replace random vowels with different vowels", "replace_double_letters_with_single_letter": "Replace double letters with a single letter", "swap_random_letter": "Swap random adjacent letters" }, "percentage": 37 } } }, { "seed_identities_with_labels": [ { "name": "Yevgeniya Podgornova (latin)", "dob": "1980-7-16", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "вихър пондьов (cyrillic)", "dob": "1949-07-20", "address": "Azerbaijan", "label": "negative", "script": "cyrillic" }, { "name": "Александр Авдеев (cyrillic)", "dob": "1975-12-8", "address": "Russia", "label": "positive", "script": "cyrillic" }, { "name": "rosario pazos (latin)", "dob": "2007-04-06", "address": "Uruguay", "label": "negative", "script": "latin" }, { "name": "Aleksandra Oksenchuk (latin)", "dob": "1992-10-16", "address": "Belarus", "label": "positive", "script": "latin" }, { "name": "noa cornejo (latin)", "dob": "1926-08-20", "address": "Honduras", "label": "negative", "script": "latin" }, { "name": "constance aubert (latin)", "dob": "2006-02-25", "address": "Mali", "label": "High Risk", "script": "latin" }, { "name": "Kyo'ng-il Kim (latin)", "dob": "1979-8-1", "address": "Libya", "label": "positive", "script": "latin" }, { "name": "marcial vall (latin)", "dob": "2006-04-02", "address": "Etiopía", "label": "negative", "script": "latin" }, { "name": "perla busquets (latin)", "dob": "1973-05-27", "address": "Bolivia", "label": "High Risk", "script": "latin" }, { "name": "ignacio talavera (latin)", "dob": "1993-08-15", "address": "San Marino", "label": "negative", "script": "latin" }, { "name": "Elias AL-KASHMIRI (latin)", "dob": "1964-2-10", "address": "Pakistan", "label": "positive", "script": "latin" }, { "name": "henri benoit (latin)", "dob": "2006-09-28", "address": "Cameroon", "label": "High Risk", "script": "latin" }, { "name": "dennis rivera (latin)", "dob": "1936-02-21", "address": "Nigeria", "label": "High Risk", "script": "latin" }, { "name": "عادل سوميرة (arabic)", "dob": "1964-02-07", "address": "Lebanon", "label": "High Risk", "script": "arabic" } ], "query_template": "The following name is the seed name to generate variations for: {name}. Generate 6 variations of the name {name}, ensuring phonetic similarity: 30% Light, 40% Medium, 30% Far, and orthographic similarity: 30% Light, 40% Medium, 30% Far, and also include 40% of variations that follow: Additionally, generate variations that perform these transformations: Use first name initial with last name, Abbreviate name parts, and Remove a random vowel. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 6, "phonetic_similarity": { "Light": 0.3, "Medium": 0.4, "Far": 0.3 }, "orthographic_similarity": { "Light": 0.3, "Medium": 0.4, "Far": 0.3 }, "rule_based": { "rule_percentage": 40, "selected_rules": [ "initial_only_first_name", "shorten_name_to_abbreviations", "remove_random_vowel" ], "rule_descriptions": { "initial_only_first_name": "Use first name initial with last name", "shorten_name_to_abbreviations": "Abbreviate name parts", "remove_random_vowel": "Remove a random vowel" }, "percentage": 40 } } } ]