test1 / generated_query.json
HUY2612's picture
Upload generated_query.json with huggingface_hub
8d3f796 verified
{
"seed_identities_with_labels": [
{
"name": "natalio gordillo (latin)",
"dob": "1991-06-23",
"address": "Cuba",
"label": "High Risk",
"script": "latin"
},
{
"name": "دانة مرازيق البقوم (arabic)",
"dob": "1950-05-13",
"address": "Somalia",
"label": "High Risk",
"script": "arabic"
},
{
"name": "Наталья Орлова (cyrillic)",
"dob": "1969-8-29",
"address": "Russia",
"label": "positive",
"script": "cyrillic"
},
{
"name": "bryan ferreira (latin)",
"dob": "1986-10-13",
"address": "Mozambique",
"label": "High Risk",
"script": "latin"
},
{
"name": "محمد سماوات (arabic)",
"dob": "1985-12-26",
"address": "Malta",
"label": "negative",
"script": "arabic"
},
{
"name": "michael smith (latin)",
"dob": "2003-11-16",
"address": "South Africa",
"label": "High Risk",
"script": "latin"
},
{
"name": "pío hurtado (latin)",
"dob": "1959-11-29",
"address": "Venezuela",
"label": "High Risk",
"script": "latin"
},
{
"name": "Rajabi Al-Zahir (latin)",
"dob": "1969-7-5",
"address": "United Arab Emirates",
"label": "positive",
"script": "latin"
},
{
"name": "Boris Vologdin (latin)",
"dob": "1955-7-22",
"address": "Russia",
"label": "positive",
"script": "latin"
},
{
"name": "Wendy Morales (latin)",
"dob": "1980-5-28",
"address": "Nicaragua",
"label": "positive",
"script": "latin"
},
{
"name": "edu montalbán (latin)",
"dob": "2002-03-06",
"address": "Libia",
"label": "negative",
"script": "latin"
},
{
"name": "Evgeniy Polyanin (latin)",
"dob": "1993-3-4",
"address": "Russia",
"label": "positive",
"script": "latin"
},
{
"name": "leandro vilanova (latin)",
"dob": "2005-09-08",
"address": "Pakistán",
"label": "negative",
"script": "latin"
},
{
"name": "valentín lledó (latin)",
"dob": "1929-01-03",
"address": "Timor-Leste",
"label": "negative",
"script": "latin"
},
{
"name": "lupe esteve (latin)",
"dob": "1951-03-24",
"address": "Ucrania",
"label": "negative",
"script": "latin"
}
],
"query_template": "The following name is the seed name to generate variations for: {name}. Generate 10 variations of the name {name}, ensuring phonetic similarity: 30% Light, 40% Medium, 30% Far, and orthographic similarity: 30% Light, 40% Medium, 30% Far, and also include 17% of variations that follow: Additionally, generate variations that perform these transformations: Add a title suffix (Jr., PhD, etc.), Abbreviate name parts, and Duplicate a random letter. The following address is the seed country/city to generate address variations for: {address}. Generate unique real addresses within the specified country/city for each variation. The following date of birth is the seed DOB to generate variations for: {dob}.\n\n[ADDITIONAL CONTEXT]:\n- Address variations should be realistic addresses within the specified country/city\n- DOB variations ATLEAST one in each category (±1 day, ±3 days, ±30 days, ±90 days, ±365 days, year+month only)\n- For year+month, generate the exact DOB without day\n- Each variation must have a different, realistic address and DOB",
"query_labels": {
"variation_count": 10,
"phonetic_similarity": {
"Light": 0.3,
"Medium": 0.4,
"Far": 0.3
},
"orthographic_similarity": {
"Light": 0.3,
"Medium": 0.4,
"Far": 0.3
},
"rule_based": {
"rule_percentage": 17,
"selected_rules": [
"add_random_trailing_title",
"shorten_name_to_abbreviations",
"duplicate_random_letter_as_double_letter"
],
"rule_descriptions": {
"add_random_trailing_title": "Add a title suffix (Jr., PhD, etc.)",
"shorten_name_to_abbreviations": "Abbreviate name parts",
"duplicate_random_letter_as_double_letter": "Duplicate a random letter"
},
"percentage": 17
}
},
"generation_metadata": {
"successful_model": null,
"successful_timeout": null,
"successful_judge_model": null,
"successful_judge_timeout": null,
"generation_log": {
"attempts": [
{
"model": "llama3.1:latest",
"timeout": 60,
"status": "failed_with_timeout: timed out",
"raw_template": null,
"validation": null,
"repair_attempt": null
}
],
"decision": "Fell back to simple template after all LLM generation attempts failed.",
"final_template": "The following name is the seed name to generate variations for: {name}. Generate 10 variations of the name {name}, ensuring phonetic similarity: 30% Light, 40% Medium, 30% Far, and orthographic similarity: 30% Light, 40% Medium, 30% Far, and also include 17% of variations that follow: Additionally, generate variations that perform these transformations: Add a title suffix (Jr., PhD, etc.), Abbreviate name parts, and Duplicate a random letter.",
"labels": {
"variation_count": 10,
"phonetic_similarity": {
"Light": 0.3,
"Medium": 0.4,
"Far": 0.3
},
"orthographic_similarity": {
"Light": 0.3,
"Medium": 0.4,
"Far": 0.3
},
"rule_based": {
"rule_percentage": 17,
"selected_rules": [
"add_random_trailing_title",
"shorten_name_to_abbreviations",
"duplicate_random_letter_as_double_letter"
],
"rule_descriptions": {
"add_random_trailing_title": "Add a title suffix (Jr., PhD, etc.)",
"shorten_name_to_abbreviations": "Abbreviate name parts",
"duplicate_random_letter_as_double_letter": "Duplicate a random letter"
},
"percentage": 17
}
},
"validation": {
"static_issues": [],
"judge_model": null,
"judge_timeout": null,
"judge_issues": [],
"final_issues": []
}
}
}
}