{ "seed_identities_with_labels": [ { "name": "natalio gordillo (latin)", "dob": "1991-06-23", "address": "Cuba", "label": "High Risk", "script": "latin" }, { "name": "دانة مرازيق البقوم (arabic)", "dob": "1950-05-13", "address": "Somalia", "label": "High Risk", "script": "arabic" }, { "name": "Наталья Орлова (cyrillic)", "dob": "1969-8-29", "address": "Russia", "label": "positive", "script": "cyrillic" }, { "name": "bryan ferreira (latin)", "dob": "1986-10-13", "address": "Mozambique", "label": "High Risk", "script": "latin" }, { "name": "محمد سماوات (arabic)", "dob": "1985-12-26", "address": "Malta", "label": "negative", "script": "arabic" }, { "name": "michael smith (latin)", "dob": "2003-11-16", "address": "South Africa", "label": "High Risk", "script": "latin" }, { "name": "pío hurtado (latin)", "dob": "1959-11-29", "address": "Venezuela", "label": "High Risk", "script": "latin" }, { "name": "Rajabi Al-Zahir (latin)", "dob": "1969-7-5", "address": "United Arab Emirates", "label": "positive", "script": "latin" }, { "name": "Boris Vologdin (latin)", "dob": "1955-7-22", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "Wendy Morales (latin)", "dob": "1980-5-28", "address": "Nicaragua", "label": "positive", "script": "latin" }, { "name": "edu montalbán (latin)", "dob": "2002-03-06", "address": "Libia", "label": "negative", "script": "latin" }, { "name": "Evgeniy Polyanin (latin)", "dob": "1993-3-4", "address": "Russia", "label": "positive", "script": "latin" }, { "name": "leandro vilanova (latin)", "dob": "2005-09-08", "address": "Pakistán", "label": "negative", "script": "latin" }, { "name": "valentín lledó (latin)", "dob": "1929-01-03", "address": "Timor-Leste", "label": "negative", "script": "latin" }, { "name": "lupe esteve (latin)", "dob": "1951-03-24", "address": "Ucrania", "label": "negative", "script": "latin" } ], "query_template": "The following name is the seed name to generate variations for: {name}. Generate 10 variations of the name {name}, ensuring phonetic similarity: 30% Light, 40% Medium, 30% Far, and orthographic similarity: 30% Light, 40% Medium, 30% Far, and also include 17% of variations that follow: Additionally, generate variations that perform these transformations: Add a title suffix (Jr., PhD, etc.), Abbreviate name parts, and Duplicate a random letter. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", "query_labels": { "variation_count": 10, "phonetic_similarity": { "Light": 0.3, "Medium": 0.4, "Far": 0.3 }, "orthographic_similarity": { "Light": 0.3, "Medium": 0.4, "Far": 0.3 }, "rule_based": { "rule_percentage": 17, "selected_rules": [ "add_random_trailing_title", "shorten_name_to_abbreviations", "duplicate_random_letter_as_double_letter" ], "rule_descriptions": { "add_random_trailing_title": "Add a title suffix (Jr., PhD, etc.)", "shorten_name_to_abbreviations": "Abbreviate name parts", "duplicate_random_letter_as_double_letter": "Duplicate a random letter" }, "percentage": 17 } }, "generation_metadata": { "successful_model": null, "successful_timeout": null, "successful_judge_model": null, "successful_judge_timeout": null, "generation_log": { "attempts": [ { "model": "llama3.1:latest", "timeout": 60, "status": "failed_with_timeout: timed out", "raw_template": null, "validation": null, "repair_attempt": null } ], "decision": "Fell back to simple template after all LLM generation attempts failed.", "final_template": "The following name is the seed name to generate variations for: {name}. Generate 10 variations of the name {name}, ensuring phonetic similarity: 30% Light, 40% Medium, 30% Far, and orthographic similarity: 30% Light, 40% Medium, 30% Far, and also include 17% of variations that follow: Additionally, generate variations that perform these transformations: Add a title suffix (Jr., PhD, etc.), Abbreviate name parts, and Duplicate a random letter.", "labels": { "variation_count": 10, "phonetic_similarity": { "Light": 0.3, "Medium": 0.4, "Far": 0.3 }, "orthographic_similarity": { "Light": 0.3, "Medium": 0.4, "Far": 0.3 }, "rule_based": { "rule_percentage": 17, "selected_rules": [ "add_random_trailing_title", "shorten_name_to_abbreviations", "duplicate_random_letter_as_double_letter" ], "rule_descriptions": { "add_random_trailing_title": "Add a title suffix (Jr., PhD, etc.)", "shorten_name_to_abbreviations": "Abbreviate name parts", "duplicate_random_letter_as_double_letter": "Duplicate a random letter" }, "percentage": 17 } }, "validation": { "static_issues": [], "judge_model": null, "judge_timeout": null, "judge_issues": [], "final_issues": [] } } } }