{ "model_name": "turn-detector", "generated_at": "2025-12-14T21:37:22.477273", "difficulty_results": { "baseline": { "total": 20, "correct": 18, "accuracy": 0.9 }, "length_noise": { "total": 20, "correct": 10, "accuracy": 0.5 }, "semantic_overlap": { "total": 20, "correct": 16, "accuracy": 0.8 }, "edge_cases": { "total": 20, "correct": 11, "accuracy": 0.55 } }, "overall_accuracy": 0.6875, "total_samples": 80, "correct_samples": 55, "samples": [ { "text": "Ömer, nasıl yardımcı olabilirim?", "expected_label": "agent_response", "difficulty": "baseline", "predicted_label": "agent_response", "confidence": 0.8812354207038879, "is_correct": true }, { "text": "Merhaba, hangi konuda yardım edebilirim?", "expected_label": "agent_response", "difficulty": "baseline", "predicted_label": "agent_response", "confidence": 0.9911393523216248, "is_correct": true }, { "text": "Tabii ki, size bununla ilgili bilgi verebilirim.", "expected_label": "agent_response", "difficulty": "baseline", "predicted_label": "agent_response", "confidence": 0.5156869292259216, "is_correct": true }, { "text": "Elbette, bu konuda size destek olacağım.", "expected_label": "agent_response", "difficulty": "baseline", "predicted_label": "backchannel", "confidence": 0.5563545823097229, "is_correct": false }, { "text": "Anladım, hemen kontrol ediyorum.", "expected_label": "agent_response", "difficulty": "baseline", "predicted_label": "backchannel", "confidence": 0.5459677577018738, "is_correct": false }, { "text": "Lütfen bekleyin, birazdan yanıt vereceğim.", "expected_label": "agent_response", "difficulty": "baseline", "predicted_label": "agent_response", "confidence": 0.8637070655822754, "is_correct": true }, { "text": "Bu konuda yardımcı olmaktan memnuniyet duyarım.", "expected_label": "agent_response", "difficulty": "baseline", "predicted_label": "agent_response", "confidence": 0.6278860569000244, "is_correct": true }, { "text": "Hemen sizin için araştırıyorum.", "expected_label": "agent_response", "difficulty": "baseline", "predicted_label": "agent_response", "confidence": 0.7357267737388611, "is_correct": true }, { "text": "Endişelenmeyin, bu konuyu halledeceğiz.", "expected_label": "agent_response", "difficulty": "baseline", "predicted_label": "agent_response", "confidence": 0.6491527557373047, "is_correct": true }, { "text": "Herhangi başka bir sorunuz varsa, sormaktan çekinmeyin.", "expected_label": "agent_response", "difficulty": "baseline", "predicted_label": "agent_response", "confidence": 0.9041098952293396, "is_correct": true }, { "text": "totes agree lol", "expected_label": "agent_response", "difficulty": "length_noise", "predicted_label": "backchannel", "confidence": 0.9879427552223206, "is_correct": false }, { "text": "yup yup yup yup yup", "expected_label": "agent_response", "difficulty": "length_noise", "predicted_label": "backchannel", "confidence": 0.988431453704834, "is_correct": false }, { "text": "OMG cant believe u did that, like seriously, i mean come on, its just too much, you know what i mean? cuz if you dont then idk what to say, like seriously", "expected_label": "agent_response", "difficulty": "length_noise", "predicted_label": "agent_response", "confidence": 0.909318745136261, "is_correct": true }, { "text": "nah bro", "expected_label": "agent_response", "difficulty": "length_noise", "predicted_label": "backchannel", "confidence": 0.9873980283737183, "is_correct": false }, { "text": "yasss that's wassup", "expected_label": "agent_response", "difficulty": "length_noise", "predicted_label": "backchannel", "confidence": 0.974721372127533, "is_correct": false }, { "text": "okay okay okay i get it already no need to repeat urself over and over again like i'm not deaf or whatever", "expected_label": "agent_response", "difficulty": "length_noise", "predicted_label": "agent_response", "confidence": 0.9450967907905579, "is_correct": true }, { "text": "omg thts crazee", "expected_label": "agent_response", "difficulty": "length_noise", "predicted_label": "backchannel", "confidence": 0.9885514974594116, "is_correct": false }, { "text": "u r kidding right?", "expected_label": "agent_response", "difficulty": "length_noise", "predicted_label": "backchannel", "confidence": 0.9817968606948853, "is_correct": false }, { "text": "wow just wow, i mean, wow! i never thought that this would happen, like ever, not in a million years, and yet here we are, unbelievable, just totally unbelievable, you feel me?", "expected_label": "agent_response", "difficulty": "length_noise", "predicted_label": "agent_response", "confidence": 0.8823995590209961, "is_correct": true }, { "text": "hah lol whatevs", "expected_label": "agent_response", "difficulty": "length_noise", "predicted_label": "backchannel", "confidence": 0.9895368814468384, "is_correct": false }, { "text": "Ah, anlıyorum. Devam edebilir misiniz?", "expected_label": "agent_response", "difficulty": "semantic_overlap", "predicted_label": "agent_response", "confidence": 0.8250168561935425, "is_correct": true }, { "text": "Hmm, bunu biraz daha açabilir misiniz?", "expected_label": "agent_response", "difficulty": "semantic_overlap", "predicted_label": "agent_response", "confidence": 0.745111882686615, "is_correct": true }, { "text": "Evet, bu gerçekten ilginç. Daha fazla bilgi verebilir misiniz?", "expected_label": "agent_response", "difficulty": "semantic_overlap", "predicted_label": "agent_response", "confidence": 0.9849535226821899, "is_correct": true }, { "text": "Bu konuda düşündüğünüz başka bir şey var mı?", "expected_label": "agent_response", "difficulty": "semantic_overlap", "predicted_label": "agent_response", "confidence": 0.9519035220146179, "is_correct": true }, { "text": "Hımm, pekala. Başka bir açıdan bakacak olursak?", "expected_label": "agent_response", "difficulty": "semantic_overlap", "predicted_label": "backchannel", "confidence": 0.903683066368103, "is_correct": false }, { "text": "Evet, kesinlikle. Peki başka hangi yönlerini ele alabiliriz?", "expected_label": "agent_response", "difficulty": "semantic_overlap", "predicted_label": "agent_response", "confidence": 0.9927364587783813, "is_correct": true }, { "text": "Tamam, peki buna ek olarak ne söyleyebilirsiniz?", "expected_label": "agent_response", "difficulty": "semantic_overlap", "predicted_label": "agent_response", "confidence": 0.9534065127372742, "is_correct": true }, { "text": "Anladım, devam etmek ister misiniz?", "expected_label": "agent_response", "difficulty": "semantic_overlap", "predicted_label": "agent_response", "confidence": 0.974102795124054, "is_correct": true }, { "text": "Evet, peki başka bir detaya dikkat çekmek ister misiniz?", "expected_label": "agent_response", "difficulty": "semantic_overlap", "predicted_label": "agent_response", "confidence": 0.9879535436630249, "is_correct": true }, { "text": "Hmm, çok iyi bir nokta. Bunu biraz daha açar mısınız?", "expected_label": "agent_response", "difficulty": "semantic_overlap", "predicted_label": "agent_response", "confidence": 0.9757851362228394, "is_correct": true }, { "text": "Oh great, another software update that will surely make everything run faster, just like last time.", "expected_label": "agent_response", "difficulty": "edge_cases", "predicted_label": "agent_response", "confidence": 0.895721971988678, "is_correct": true }, { "text": "I'm sure the server downtime at exactly 5 PM on a Friday was purely coincidental, and not at all inconvenient.", "expected_label": "agent_response", "difficulty": "edge_cases", "predicted_label": "agent_response", "confidence": 0.849263072013855, "is_correct": true }, { "text": "Yeah, because deleting the database with a single command is exactly what everyone wants, right?", "expected_label": "agent_response", "difficulty": "edge_cases", "predicted_label": "agent_response", "confidence": 0.7744247317314148, "is_correct": true }, { "text": "I just love it when my AI assistant corrects me even when I'm right, it's like having a personal grammar teacher.", "expected_label": "agent_response", "difficulty": "edge_cases", "predicted_label": "agent_response", "confidence": 0.5396984815597534, "is_correct": true }, { "text": "No, I absolutely don't need any more disk space. Who needs to store files anyway?", "expected_label": "agent_response", "difficulty": "edge_cases", "predicted_label": "agent_response", "confidence": 0.9811112284660339, "is_correct": true }, { "text": "Sure, let's implement the new feature without any testing. What could possibly go wrong?", "expected_label": "agent_response", "difficulty": "edge_cases", "predicted_label": "agent_response", "confidence": 0.9612233638763428, "is_correct": true }, { "text": "Oh, another meeting about meetings? This is exactly why I got into tech.", "expected_label": "agent_response", "difficulty": "edge_cases", "predicted_label": "agent_response", "confidence": 0.9544288516044617, "is_correct": true }, { "text": "I'm really looking forward to debugging this code at 2 AM again. It's the highlight of my week.", "expected_label": "agent_response", "difficulty": "edge_cases", "predicted_label": "agent_response", "confidence": 0.8809834122657776, "is_correct": true }, { "text": "The best part of working with AI is when it confidently gives you the wrong answer.", "expected_label": "agent_response", "difficulty": "edge_cases", "predicted_label": "agent_response", "confidence": 0.8558328151702881, "is_correct": true }, { "text": "Of course, let’s deploy the untested code on a Friday evening, I have nothing better to do.", "expected_label": "agent_response", "difficulty": "edge_cases", "predicted_label": "agent_response", "confidence": 0.7736720442771912, "is_correct": true }, { "text": "Evet, seni anlıyorum.", "expected_label": "backchannel", "difficulty": "baseline", "predicted_label": "backchannel", "confidence": 0.8567759990692139, "is_correct": true }, { "text": "Hmm, ilginç.", "expected_label": "backchannel", "difficulty": "baseline", "predicted_label": "backchannel", "confidence": 0.985055685043335, "is_correct": true }, { "text": "Evet, devam et.", "expected_label": "backchannel", "difficulty": "baseline", "predicted_label": "backchannel", "confidence": 0.8956389427185059, "is_correct": true }, { "text": "Gerçekten mi?", "expected_label": "backchannel", "difficulty": "baseline", "predicted_label": "backchannel", "confidence": 0.9868144989013672, "is_correct": true }, { "text": "Tamam, bu mantıklı.", "expected_label": "backchannel", "difficulty": "baseline", "predicted_label": "backchannel", "confidence": 0.7614496946334839, "is_correct": true }, { "text": "Anladım.", "expected_label": "backchannel", "difficulty": "baseline", "predicted_label": "backchannel", "confidence": 0.9884626269340515, "is_correct": true }, { "text": "Evet, bu doğru.", "expected_label": "backchannel", "difficulty": "baseline", "predicted_label": "backchannel", "confidence": 0.8082573413848877, "is_correct": true }, { "text": "Ah, şimdi anlıyorum.", "expected_label": "backchannel", "difficulty": "baseline", "predicted_label": "backchannel", "confidence": 0.9578026533126831, "is_correct": true }, { "text": "Bu ilginç bir nokta.", "expected_label": "backchannel", "difficulty": "baseline", "predicted_label": "backchannel", "confidence": 0.6748051643371582, "is_correct": true }, { "text": "Evet, buna katılıyorum.", "expected_label": "backchannel", "difficulty": "baseline", "predicted_label": "backchannel", "confidence": 0.8088875412940979, "is_correct": true }, { "text": "yaaaa broooo", "expected_label": "backchannel", "difficulty": "length_noise", "predicted_label": "backchannel", "confidence": 0.9909811615943909, "is_correct": true }, { "text": "huh? r u srz??", "expected_label": "backchannel", "difficulty": "length_noise", "predicted_label": "backchannel", "confidence": 0.9855925440788269, "is_correct": true }, { "text": "OMG this is like the most amazing thing ever I mean I can't even begin to explain how incredible this whole situation is because it's just that awesome you know what I mean like seriously wow just wow ok???", "expected_label": "backchannel", "difficulty": "length_noise", "predicted_label": "agent_response", "confidence": 0.7402034997940063, "is_correct": false }, { "text": "idk wat u mean", "expected_label": "backchannel", "difficulty": "length_noise", "predicted_label": "backchannel", "confidence": 0.9897193908691406, "is_correct": true }, { "text": "sure sure sure sure sure", "expected_label": "backchannel", "difficulty": "length_noise", "predicted_label": "backchannel", "confidence": 0.9763302206993103, "is_correct": true }, { "text": "omg totally 100% agree with you on that one no doubt about it in fact I was just thinking the same thing the other day and it's crazy how we're like on the same wavelength all the time isn't it?", "expected_label": "backchannel", "difficulty": "length_noise", "predicted_label": "agent_response", "confidence": 0.9482101798057556, "is_correct": false }, { "text": "no wayyyy", "expected_label": "backchannel", "difficulty": "length_noise", "predicted_label": "backchannel", "confidence": 0.991447925567627, "is_correct": true }, { "text": "heyyy, u ther?", "expected_label": "backchannel", "difficulty": "length_noise", "predicted_label": "backchannel", "confidence": 0.990699827671051, "is_correct": true }, { "text": "wow cant believe it happened like that i mean who would have thought that everything would turn out this way after all the planning we did it just goes to show that sometimes things have a way of working out on their own despite all the odds and challenges we faced right from the start", "expected_label": "backchannel", "difficulty": "length_noise", "predicted_label": "agent_response", "confidence": 0.971515953540802, "is_correct": false }, { "text": "kk thx bye", "expected_label": "backchannel", "difficulty": "length_noise", "predicted_label": "backchannel", "confidence": 0.99072265625, "is_correct": true }, { "text": "Hmm, ilginç bir nokta.", "expected_label": "backchannel", "difficulty": "semantic_overlap", "predicted_label": "backchannel", "confidence": 0.9318225979804993, "is_correct": true }, { "text": "Anladım, peki ya sonra?", "expected_label": "backchannel", "difficulty": "semantic_overlap", "predicted_label": "backchannel", "confidence": 0.9160572290420532, "is_correct": true }, { "text": "Hmm, o konuda biraz daha bilgi verir misin?", "expected_label": "backchannel", "difficulty": "semantic_overlap", "predicted_label": "agent_response", "confidence": 0.7073332667350769, "is_correct": false }, { "text": "Gerçekten mi? Daha fazla duymak isterim.", "expected_label": "backchannel", "difficulty": "semantic_overlap", "predicted_label": "backchannel", "confidence": 0.7160800099372864, "is_correct": true }, { "text": "Bu mantıklı, başka neler oldu?", "expected_label": "backchannel", "difficulty": "semantic_overlap", "predicted_label": "agent_response", "confidence": 0.812181293964386, "is_correct": false }, { "text": "Hmm, bunu daha önce duymamıştım.", "expected_label": "backchannel", "difficulty": "semantic_overlap", "predicted_label": "backchannel", "confidence": 0.8978190422058105, "is_correct": true }, { "text": "Bir dakika, bunu doğru mu anlıyorum?", "expected_label": "backchannel", "difficulty": "semantic_overlap", "predicted_label": "agent_response", "confidence": 0.7696111798286438, "is_correct": false }, { "text": "Peki, sonra ne yaptılar?", "expected_label": "backchannel", "difficulty": "semantic_overlap", "predicted_label": "backchannel", "confidence": 0.6477120518684387, "is_correct": true }, { "text": "Gerçekten mi? Bu beni düşündürdü.", "expected_label": "backchannel", "difficulty": "semantic_overlap", "predicted_label": "backchannel", "confidence": 0.9161955714225769, "is_correct": true }, { "text": "İlginç, devam et lütfen.", "expected_label": "backchannel", "difficulty": "semantic_overlap", "predicted_label": "backchannel", "confidence": 0.7439655661582947, "is_correct": true }, { "text": "Evet evet, tabii ki de tebrik ederim, dünya harikası bir iş çıkardın (!)", "expected_label": "backchannel", "difficulty": "edge_cases", "predicted_label": "agent_response", "confidence": 0.877510666847229, "is_correct": false }, { "text": "Çok güzel, bu kadar net bir çözüm bulduğunu(!) hiç düşünmemiştim doğrusu.", "expected_label": "backchannel", "difficulty": "edge_cases", "predicted_label": "agent_response", "confidence": 0.9826798439025879, "is_correct": false }, { "text": "Ah, tabii ki! Çünkü herkes daima müşteri hizmetlerinin ne kadar hızlı olduğunu söyler (!)", "expected_label": "backchannel", "difficulty": "edge_cases", "predicted_label": "agent_response", "confidence": 0.5608082413673401, "is_correct": false }, { "text": "Eğer bu kadar 'yaratıcı' bir fikir daha duyar mıyım diye düşünüyordum, teşekkürler!", "expected_label": "backchannel", "difficulty": "edge_cases", "predicted_label": "agent_response", "confidence": 0.9653686881065369, "is_correct": false }, { "text": "Bir işin en iyi nasıl yapılmaması gerektiğini görmek için harika (!) bir örnekti.", "expected_label": "backchannel", "difficulty": "edge_cases", "predicted_label": "agent_response", "confidence": 0.9630967378616333, "is_correct": false }, { "text": "Evet, kesinlikle bugünkü toplantıda hiçbir şey anlaşılmadı diyemem.", "expected_label": "backchannel", "difficulty": "edge_cases", "predicted_label": "agent_response", "confidence": 0.8947334289550781, "is_correct": false }, { "text": "Harika, seninki gibi bir çözüm sayesinde sorunlarımız iki katına çıkacak (!)", "expected_label": "backchannel", "difficulty": "edge_cases", "predicted_label": "agent_response", "confidence": 0.8286226987838745, "is_correct": false }, { "text": "Tabii ki de, Türk çayı yurt dışında sudan bile ucuzdur (!).", "expected_label": "backchannel", "difficulty": "edge_cases", "predicted_label": "backchannel", "confidence": 0.8883209228515625, "is_correct": true }, { "text": "Bu kadar ‘detaylı’ bir analiz için üç cümle yeterli oldu, harikasın!", "expected_label": "backchannel", "difficulty": "edge_cases", "predicted_label": "agent_response", "confidence": 0.610821545124054, "is_correct": false }, { "text": "Elbette, herkesin sabırsızlıkla beklediği o 'harika' PowerPoint sunumunu bir daha görelim.", "expected_label": "backchannel", "difficulty": "edge_cases", "predicted_label": "agent_response", "confidence": 0.9590893387794495, "is_correct": false } ], "misclassifications": [ { "text": "Elbette, bu konuda size destek olacağım.", "expected_label": "agent_response", "difficulty": "baseline", "predicted_label": "backchannel", "confidence": 0.5563545823097229, "is_correct": false }, { "text": "Anladım, hemen kontrol ediyorum.", "expected_label": "agent_response", "difficulty": "baseline", "predicted_label": "backchannel", "confidence": 0.5459677577018738, "is_correct": false }, { "text": "totes agree lol", "expected_label": "agent_response", "difficulty": "length_noise", "predicted_label": "backchannel", "confidence": 0.9879427552223206, "is_correct": false }, { "text": "yup yup yup yup yup", "expected_label": "agent_response", "difficulty": "length_noise", "predicted_label": "backchannel", "confidence": 0.988431453704834, "is_correct": false }, { "text": "nah bro", "expected_label": "agent_response", "difficulty": "length_noise", "predicted_label": "backchannel", "confidence": 0.9873980283737183, "is_correct": false }, { "text": "yasss that's wassup", "expected_label": "agent_response", "difficulty": "length_noise", "predicted_label": "backchannel", "confidence": 0.974721372127533, "is_correct": false }, { "text": "omg thts crazee", "expected_label": "agent_response", "difficulty": "length_noise", "predicted_label": "backchannel", "confidence": 0.9885514974594116, "is_correct": false }, { "text": "u r kidding right?", "expected_label": "agent_response", "difficulty": "length_noise", "predicted_label": "backchannel", "confidence": 0.9817968606948853, "is_correct": false }, { "text": "hah lol whatevs", "expected_label": "agent_response", "difficulty": "length_noise", "predicted_label": "backchannel", "confidence": 0.9895368814468384, "is_correct": false }, { "text": "Hımm, pekala. Başka bir açıdan bakacak olursak?", "expected_label": "agent_response", "difficulty": "semantic_overlap", "predicted_label": "backchannel", "confidence": 0.903683066368103, "is_correct": false }, { "text": "OMG this is like the most amazing thing ever I mean I can't even begin to explain how incredible this whole situation is because it's just that awesome you know what I mean like seriously wow just wow ok???", "expected_label": "backchannel", "difficulty": "length_noise", "predicted_label": "agent_response", "confidence": 0.7402034997940063, "is_correct": false }, { "text": "omg totally 100% agree with you on that one no doubt about it in fact I was just thinking the same thing the other day and it's crazy how we're like on the same wavelength all the time isn't it?", "expected_label": "backchannel", "difficulty": "length_noise", "predicted_label": "agent_response", "confidence": 0.9482101798057556, "is_correct": false }, { "text": "wow cant believe it happened like that i mean who would have thought that everything would turn out this way after all the planning we did it just goes to show that sometimes things have a way of working out on their own despite all the odds and challenges we faced right from the start", "expected_label": "backchannel", "difficulty": "length_noise", "predicted_label": "agent_response", "confidence": 0.971515953540802, "is_correct": false }, { "text": "Hmm, o konuda biraz daha bilgi verir misin?", "expected_label": "backchannel", "difficulty": "semantic_overlap", "predicted_label": "agent_response", "confidence": 0.7073332667350769, "is_correct": false }, { "text": "Bu mantıklı, başka neler oldu?", "expected_label": "backchannel", "difficulty": "semantic_overlap", "predicted_label": "agent_response", "confidence": 0.812181293964386, "is_correct": false }, { "text": "Bir dakika, bunu doğru mu anlıyorum?", "expected_label": "backchannel", "difficulty": "semantic_overlap", "predicted_label": "agent_response", "confidence": 0.7696111798286438, "is_correct": false }, { "text": "Evet evet, tabii ki de tebrik ederim, dünya harikası bir iş çıkardın (!)", "expected_label": "backchannel", "difficulty": "edge_cases", "predicted_label": "agent_response", "confidence": 0.877510666847229, "is_correct": false }, { "text": "Çok güzel, bu kadar net bir çözüm bulduğunu(!) hiç düşünmemiştim doğrusu.", "expected_label": "backchannel", "difficulty": "edge_cases", "predicted_label": "agent_response", "confidence": 0.9826798439025879, "is_correct": false }, { "text": "Ah, tabii ki! Çünkü herkes daima müşteri hizmetlerinin ne kadar hızlı olduğunu söyler (!)", "expected_label": "backchannel", "difficulty": "edge_cases", "predicted_label": "agent_response", "confidence": 0.5608082413673401, "is_correct": false }, { "text": "Eğer bu kadar 'yaratıcı' bir fikir daha duyar mıyım diye düşünüyordum, teşekkürler!", "expected_label": "backchannel", "difficulty": "edge_cases", "predicted_label": "agent_response", "confidence": 0.9653686881065369, "is_correct": false }, { "text": "Bir işin en iyi nasıl yapılmaması gerektiğini görmek için harika (!) bir örnekti.", "expected_label": "backchannel", "difficulty": "edge_cases", "predicted_label": "agent_response", "confidence": 0.9630967378616333, "is_correct": false }, { "text": "Evet, kesinlikle bugünkü toplantıda hiçbir şey anlaşılmadı diyemem.", "expected_label": "backchannel", "difficulty": "edge_cases", "predicted_label": "agent_response", "confidence": 0.8947334289550781, "is_correct": false }, { "text": "Harika, seninki gibi bir çözüm sayesinde sorunlarımız iki katına çıkacak (!)", "expected_label": "backchannel", "difficulty": "edge_cases", "predicted_label": "agent_response", "confidence": 0.8286226987838745, "is_correct": false }, { "text": "Bu kadar ‘detaylı’ bir analiz için üç cümle yeterli oldu, harikasın!", "expected_label": "backchannel", "difficulty": "edge_cases", "predicted_label": "agent_response", "confidence": 0.610821545124054, "is_correct": false }, { "text": "Elbette, herkesin sabırsızlıkla beklediği o 'harika' PowerPoint sunumunu bir daha görelim.", "expected_label": "backchannel", "difficulty": "edge_cases", "predicted_label": "agent_response", "confidence": 0.9590893387794495, "is_correct": false } ] }