| { | |
| "seed_identities_with_labels": [ | |
| { | |
| "name": "natalio gordillo (latin)", | |
| "dob": "1991-06-23", | |
| "address": "Cuba", | |
| "label": "High Risk", | |
| "script": "latin" | |
| }, | |
| { | |
| "name": "دانة مرازيق البقوم (arabic)", | |
| "dob": "1950-05-13", | |
| "address": "Somalia", | |
| "label": "High Risk", | |
| "script": "arabic" | |
| }, | |
| { | |
| "name": "Наталья Орлова (cyrillic)", | |
| "dob": "1969-8-29", | |
| "address": "Russia", | |
| "label": "positive", | |
| "script": "cyrillic" | |
| }, | |
| { | |
| "name": "bryan ferreira (latin)", | |
| "dob": "1986-10-13", | |
| "address": "Mozambique", | |
| "label": "High Risk", | |
| "script": "latin" | |
| }, | |
| { | |
| "name": "محمد سماوات (arabic)", | |
| "dob": "1985-12-26", | |
| "address": "Malta", | |
| "label": "negative", | |
| "script": "arabic" | |
| }, | |
| { | |
| "name": "michael smith (latin)", | |
| "dob": "2003-11-16", | |
| "address": "South Africa", | |
| "label": "High Risk", | |
| "script": "latin" | |
| }, | |
| { | |
| "name": "pío hurtado (latin)", | |
| "dob": "1959-11-29", | |
| "address": "Venezuela", | |
| "label": "High Risk", | |
| "script": "latin" | |
| }, | |
| { | |
| "name": "Rajabi Al-Zahir (latin)", | |
| "dob": "1969-7-5", | |
| "address": "United Arab Emirates", | |
| "label": "positive", | |
| "script": "latin" | |
| }, | |
| { | |
| "name": "Boris Vologdin (latin)", | |
| "dob": "1955-7-22", | |
| "address": "Russia", | |
| "label": "positive", | |
| "script": "latin" | |
| }, | |
| { | |
| "name": "Wendy Morales (latin)", | |
| "dob": "1980-5-28", | |
| "address": "Nicaragua", | |
| "label": "positive", | |
| "script": "latin" | |
| }, | |
| { | |
| "name": "edu montalbán (latin)", | |
| "dob": "2002-03-06", | |
| "address": "Libia", | |
| "label": "negative", | |
| "script": "latin" | |
| }, | |
| { | |
| "name": "Evgeniy Polyanin (latin)", | |
| "dob": "1993-3-4", | |
| "address": "Russia", | |
| "label": "positive", | |
| "script": "latin" | |
| }, | |
| { | |
| "name": "leandro vilanova (latin)", | |
| "dob": "2005-09-08", | |
| "address": "Pakistán", | |
| "label": "negative", | |
| "script": "latin" | |
| }, | |
| { | |
| "name": "valentín lledó (latin)", | |
| "dob": "1929-01-03", | |
| "address": "Timor-Leste", | |
| "label": "negative", | |
| "script": "latin" | |
| }, | |
| { | |
| "name": "lupe esteve (latin)", | |
| "dob": "1951-03-24", | |
| "address": "Ucrania", | |
| "label": "negative", | |
| "script": "latin" | |
| } | |
| ], | |
| "query_template": "The following name is the seed name to generate variations for: {name}. Generate 10 variations of the name {name}, ensuring phonetic similarity: 30% Light, 40% Medium, 30% Far, and orthographic similarity: 30% Light, 40% Medium, 30% Far, and also include 17% of variations that follow: Additionally, generate variations that perform these transformations: Add a title suffix (Jr., PhD, etc.), Abbreviate name parts, and Duplicate a random letter. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB", | |
| "query_labels": { | |
| "variation_count": 10, | |
| "phonetic_similarity": { | |
| "Light": 0.3, | |
| "Medium": 0.4, | |
| "Far": 0.3 | |
| }, | |
| "orthographic_similarity": { | |
| "Light": 0.3, | |
| "Medium": 0.4, | |
| "Far": 0.3 | |
| }, | |
| "rule_based": { | |
| "rule_percentage": 17, | |
| "selected_rules": [ | |
| "add_random_trailing_title", | |
| "shorten_name_to_abbreviations", | |
| "duplicate_random_letter_as_double_letter" | |
| ], | |
| "rule_descriptions": { | |
| "add_random_trailing_title": "Add a title suffix (Jr., PhD, etc.)", | |
| "shorten_name_to_abbreviations": "Abbreviate name parts", | |
| "duplicate_random_letter_as_double_letter": "Duplicate a random letter" | |
| }, | |
| "percentage": 17 | |
| } | |
| }, | |
| "generation_metadata": { | |
| "successful_model": null, | |
| "successful_timeout": null, | |
| "successful_judge_model": null, | |
| "successful_judge_timeout": null, | |
| "generation_log": { | |
| "attempts": [ | |
| { | |
| "model": "llama3.1:latest", | |
| "timeout": 60, | |
| "status": "failed_with_timeout: timed out", | |
| "raw_template": null, | |
| "validation": null, | |
| "repair_attempt": null | |
| } | |
| ], | |
| "decision": "Fell back to simple template after all LLM generation attempts failed.", | |
| "final_template": "The following name is the seed name to generate variations for: {name}. Generate 10 variations of the name {name}, ensuring phonetic similarity: 30% Light, 40% Medium, 30% Far, and orthographic similarity: 30% Light, 40% Medium, 30% Far, and also include 17% of variations that follow: Additionally, generate variations that perform these transformations: Add a title suffix (Jr., PhD, etc.), Abbreviate name parts, and Duplicate a random letter.", | |
| "labels": { | |
| "variation_count": 10, | |
| "phonetic_similarity": { | |
| "Light": 0.3, | |
| "Medium": 0.4, | |
| "Far": 0.3 | |
| }, | |
| "orthographic_similarity": { | |
| "Light": 0.3, | |
| "Medium": 0.4, | |
| "Far": 0.3 | |
| }, | |
| "rule_based": { | |
| "rule_percentage": 17, | |
| "selected_rules": [ | |
| "add_random_trailing_title", | |
| "shorten_name_to_abbreviations", | |
| "duplicate_random_letter_as_double_letter" | |
| ], | |
| "rule_descriptions": { | |
| "add_random_trailing_title": "Add a title suffix (Jr., PhD, etc.)", | |
| "shorten_name_to_abbreviations": "Abbreviate name parts", | |
| "duplicate_random_letter_as_double_letter": "Duplicate a random letter" | |
| }, | |
| "percentage": 17 | |
| } | |
| }, | |
| "validation": { | |
| "static_issues": [], | |
| "judge_model": null, | |
| "judge_timeout": null, | |
| "judge_issues": [], | |
| "final_issues": [] | |
| } | |
| } | |
| } | |
| } |