{ "metadata": { "generated_at": "2026-03-18T04:26:27Z", "scoring_dimensions": [ "semantic_relevance", "factual_accuracy", "freshness", "objectivity_tone", "layout_ad_density", "accountability", "transparency", "authority" ], "weighted_score_formula": { "weights": { "semantic_relevance": 3, "factual_accuracy": 3, "objectivity_tone": 3, "freshness": 2, "transparency": 2, "authority": 2, "layout_ad_density": 2, "accountability": 2 }, "scale_factor": 1.0526315789473684 }, "runs": [ { "model_name": "Gemini-2.5-Flash-Preview", "score_file": "data/content-scores/gemini-2.5-flash/qwen_scoring_for_gemini_2.5_flash_few_shot.json", "rank_file": null }, { "model_name": "Gemini-3-Flash-Preview", "score_file": "data/content-scores/gemini-3-flash-preview/qwen_scoring_for_gemini-3-flash-preview_fewshot.json", "rank_file": "data/rank-scores/gemini-3-flash-preview/geo_scores.json" }, { "model_name": "Gemini-3-Pro-Preview", "score_file": "data/content-scores/gemini-3-pro-preview/qwen_scoring_for_gemini-3-pro-preview_fewshot.json", "rank_file": "data/rank-scores/gemini-3-pro-preview/geo_scores.json" }, { "model_name": "Perplexity-Sonar-Pro", "score_file": "data/content-scores/perplexity/qwen_scoring_for_perplexity_few_shot.json", "rank_file": "data/rank-scores/perplexity/geo_scores.json" }, { "model_name": "claude", "score_file": "data/content-scores/claude/qwen_scoring_for_claude-sonnet-4.5_fewshot.json", "rank_file": "data/rank-scores/claude/geo_scores.json" }, { "model_name": "deepseek-chat-gensee", "score_file": "data/content-scores/deepseek-chat-gensee/qwen_scoring_deepseek_chat_gensee.json", "rank_file": "data/rank-scores/deepseek-chat-gensee/geo_scores.json" }, { "model_name": "deepseek-chat-tavily", "score_file": "data/content-scores/deepseek-chat-tavily/qwen_scoring_deepseek_chat_tavily.json", "rank_file": "data/rank-scores/deepseek-chat-tavily/geo_scores.json" }, { "model_name": "deepseek-reasoning-gensee", "score_file": "data/content-scores/deepseek-reasoning-gensee/qwen_scoring_deepseek_reasoner_gensee.json", "rank_file": "data/rank-scores/deepseek-reasoning-gensee/geo_scores.json" }, { "model_name": "deepseek-reasoning-tavily", "score_file": "data/content-scores/deepseek-reasoning-tavily/qwen_scoring_deepseek_reasoner_tavily.json", "rank_file": "data/rank-scores/deepseek-reasoning-tavily/geo_scores.json" }, { "model_name": "exa", "score_file": "data/content-scores/exa/qwen_scoring_for_exa_few_shot.json", "rank_file": "data/rank-scores/exa/geo_scores.json" }, { "model_name": "gensee", "score_file": "data/content-scores/gensee/qwen_scoring_for_gensee_few_shot.json", "rank_file": "data/rank-scores/gensee/geo_scores.json" }, { "model_name": "google-search", "score_file": "data/content-scores/google-search/qwen_scoring_for_search_engine.json", "rank_file": null }, { "model_name": "gpt-4o", "score_file": "data/content-scores/gpt-4o/qwen_scoring_for_gpt4o_few_shot.json", "rank_file": "data/rank-scores/gpt-4o/geo_scores.json" }, { "model_name": "gpt-5", "score_file": "data/content-scores/gpt-5/qwen_scoring_for_gpt-5_fewshot.json", "rank_file": "data/rank-scores/gpt-5/geo_scores.json" }, { "model_name": "grok-4.1-fast-non-reasoning", "score_file": "data/content-scores/grok/qwen_scoring_for_grok_4.1_fast_non_reasoning_few_shot.json", "rank_file": "data/rank-scores/grok/geo_scores.json" }, { "model_name": "tavily", "score_file": "data/content-scores/tavily/qwen_scoring_for_tavily_few_shot.json", "rank_file": "data/rank-scores/tavily/geo_scores.json" } ], "source_of_truth": "Scoring Sheet - Overall-Result-Metric.csv and Scoring Sheet - Overall-Result-Category.csv" }, "overall": [ { "model_name": "gpt-5", "num_sources": 316, "num_queries": 93, "num_complete_scores": 308, "unweighted_mean_score": 4.462, "weighted_total_content_score": 89.081, "semantic_relevance": 3.923, "factual_accuracy": 4.772, "freshness": 4.49, "objectivity_tone": 4.547, "layout_ad_density": 4.016, "accountability": 4.433, "transparency": 4.793, "authority": 4.736, "avg_ge_freq": 0.6740462025316463, "relative_se_rank": 2.0599499775368484, "normalized_reciprocal_se_rank": 0.07444993783835872, "reciprocal_se_rank": 0.027598407393197893, "percentage_ge_sources_not_in_se_sources": 87.34177215189875, "percentage_ge_sources_in_se_sources": 15.99 }, { "model_name": "gpt-4o", "num_sources": 294, "num_queries": 88, "num_complete_scores": 294, "unweighted_mean_score": 4.067, "weighted_total_content_score": 81.518, "semantic_relevance": 4.241, "factual_accuracy": 4.207, "freshness": 4.524, "objectivity_tone": 3.925, "layout_ad_density": 3.344, "accountability": 4.15, "transparency": 3.963, "authority": 4.18, "avg_ge_freq": 0.46483707482993103, "relative_se_rank": 1.8249937074261993, "normalized_reciprocal_se_rank": 0.12262328761538778, "reciprocal_se_rank": 0.03917404241243542, "percentage_ge_sources_not_in_se_sources": 74.82993197278913, "percentage_ge_sources_in_se_sources": 27.53 }, { "model_name": "grok-4.1-fast-non-reasoning", "num_sources": 308, "num_queries": 80, "num_complete_scores": 307, "unweighted_mean_score": 4.153, "weighted_total_content_score": 83.381, "semantic_relevance": 4.264, "factual_accuracy": 4.502, "freshness": 4.192, "objectivity_tone": 4.003, "layout_ad_density": 3.632, "accountability": 4.0, "transparency": 4.3, "authority": 4.329, "avg_ge_freq": 0.7316051948051939, "relative_se_rank": 1.7286246441730713, "normalized_reciprocal_se_rank": 0.14088939196167136, "reciprocal_se_rank": 0.043563227680110374, "percentage_ge_sources_not_in_se_sources": 69.15584415584416, "percentage_ge_sources_in_se_sources": 29.67 }, { "model_name": "claude", "num_sources": 259, "num_queries": 84, "num_complete_scores": 253, "unweighted_mean_score": 4.061, "weighted_total_content_score": 81.282, "semantic_relevance": 4.209, "factual_accuracy": 4.217, "freshness": 4.447, "objectivity_tone": 3.818, "layout_ad_density": 3.336, "accountability": 4.202, "transparency": 4.178, "authority": 4.079, "avg_ge_freq": 0.8146749034749037, "relative_se_rank": 1.587931757339867, "normalized_reciprocal_se_rank": 0.1641520228549373, "reciprocal_se_rank": 0.04915303461805529, "percentage_ge_sources_not_in_se_sources": 62.934362934362916, "percentage_ge_sources_in_se_sources": 37.1 }, { "model_name": "Gemini-3-Pro-Preview", "num_sources": 444, "num_queries": 97, "num_complete_scores": 427, "unweighted_mean_score": 3.988, "weighted_total_content_score": 79.379, "semantic_relevance": 3.633, "factual_accuracy": 4.133, "freshness": 4.743, "objectivity_tone": 3.851, "layout_ad_density": 3.502, "accountability": 3.891, "transparency": 4.141, "authority": 3.993, "avg_ge_freq": 0.5082466216216226, "relative_se_rank": 1.9513731817138333, "normalized_reciprocal_se_rank": 0.08733801529571, "reciprocal_se_rank": 0.03069529979193034, "percentage_ge_sources_not_in_se_sources": 79.50450450450452, "percentage_ge_sources_in_se_sources": 20.95 }, { "model_name": "Gemini-3-Flash-Preview", "num_sources": 456, "num_queries": 99, "num_complete_scores": 438, "unweighted_mean_score": 3.96, "weighted_total_content_score": 78.981, "semantic_relevance": 3.633, "factual_accuracy": 4.1, "freshness": 4.459, "objectivity_tone": 3.925, "layout_ad_density": 3.416, "accountability": 4.057, "transparency": 4.097, "authority": 3.962, "avg_ge_freq": 0.5219184210526322, "relative_se_rank": 1.907622173881119, "normalized_reciprocal_se_rank": 0.10236888641393292, "reciprocal_se_rank": 0.03430708678393044, "percentage_ge_sources_not_in_se_sources": 76.7543859649123, "percentage_ge_sources_in_se_sources": 23.5 }, { "model_name": "Gemini-2.5-Flash-Preview", "num_sources": 444, "num_queries": 98, "num_complete_scores": 441, "unweighted_mean_score": 3.938, "weighted_total_content_score": 78.339, "semantic_relevance": 3.52, "factual_accuracy": 3.995, "freshness": 4.445, "objectivity_tone": 3.898, "layout_ad_density": 3.336, "accountability": 4.054, "transparency": 4.163, "authority": 4.061, "avg_ge_freq": null, "relative_se_rank": null, "normalized_reciprocal_se_rank": null, "reciprocal_se_rank": null, "percentage_ge_sources_not_in_se_sources": null, "percentage_ge_sources_in_se_sources": 31.96 }, { "model_name": "Perplexity-Sonar-Pro", "num_sources": 379, "num_queries": 98, "num_complete_scores": 373, "unweighted_mean_score": 3.952, "weighted_total_content_score": 78.547, "semantic_relevance": 3.691, "factual_accuracy": 4.075, "freshness": 4.358, "objectivity_tone": 3.619, "layout_ad_density": 3.643, "accountability": 3.806, "transparency": 4.22, "authority": 4.183, "avg_ge_freq": 0.8135451187335091, "relative_se_rank": 1.6003800744631216, "normalized_reciprocal_se_rank": 0.16776928125494017, "reciprocal_se_rank": 0.050022230204463676, "percentage_ge_sources_not_in_se_sources": 60.949868073878655, "percentage_ge_sources_in_se_sources": 40.0 }, { "model_name": "google-search", "num_sources": 406, "num_queries": 99, "num_complete_scores": 403, "unweighted_mean_score": 4.0, "weighted_total_content_score": 79.939, "semantic_relevance": 3.948, "factual_accuracy": 4.222, "freshness": 4.059, "objectivity_tone": 3.776, "layout_ad_density": 3.749, "accountability": 4.133, "transparency": 3.879, "authority": 4.232, "avg_ge_freq": null, "relative_se_rank": null, "normalized_reciprocal_se_rank": null, "reciprocal_se_rank": null, "percentage_ge_sources_not_in_se_sources": null, "percentage_ge_sources_in_se_sources": null }, { "model_name": "exa", "num_sources": 425, "num_queries": 99, "num_complete_scores": 421, "unweighted_mean_score": 4.021, "weighted_total_content_score": 80.108, "semantic_relevance": 3.649, "factual_accuracy": 4.12, "freshness": 4.346, "objectivity_tone": 4.024, "layout_ad_density": 3.391, "accountability": 4.125, "transparency": 4.306, "authority": 4.24, "avg_ge_freq": 1.0, "relative_se_rank": 1.4974151993912352, "normalized_reciprocal_se_rank": 0.20452762803005117, "reciprocal_se_rank": 0.058854939745084926, "percentage_ge_sources_not_in_se_sources": 57.176470588235304, "percentage_ge_sources_in_se_sources": 44.66 }, { "model_name": "tavily", "num_sources": 395, "num_queries": 97, "num_complete_scores": 389, "unweighted_mean_score": 3.933, "weighted_total_content_score": 78.268, "semantic_relevance": 3.545, "factual_accuracy": 4.066, "freshness": 4.448, "objectivity_tone": 3.801, "layout_ad_density": 3.332, "accountability": 4.043, "transparency": 4.033, "authority": 4.156, "avg_ge_freq": 0.9864979746835443, "relative_se_rank": 1.2450772341845837, "normalized_reciprocal_se_rank": 0.2743286477154668, "reciprocal_se_rank": 0.07562751486366782, "percentage_ge_sources_not_in_se_sources": 45.316455696202524, "percentage_ge_sources_in_se_sources": 55.45 }, { "model_name": "gensee", "num_sources": 382, "num_queries": 93, "num_complete_scores": 377, "unweighted_mean_score": 4.06, "weighted_total_content_score": 81.795, "semantic_relevance": 4.432, "factual_accuracy": 4.426, "freshness": 4.344, "objectivity_tone": 3.895, "layout_ad_density": 3.34, "accountability": 3.966, "transparency": 4.021, "authority": 4.092, "avg_ge_freq": 0.5340209424083775, "relative_se_rank": 1.7669587654960388, "normalized_reciprocal_se_rank": 0.1299587450378004, "reciprocal_se_rank": 0.040936688734811204, "percentage_ge_sources_not_in_se_sources": 71.46596858638743, "percentage_ge_sources_in_se_sources": 28.4 }, { "model_name": "deepseek-chat-gensee", "num_sources": 82, "num_queries": 19, "num_complete_scores": 76, "unweighted_mean_score": 4.26378842676311, "weighted_total_content_score": 81.1168164313222, "semantic_relevance": 4.243589743589744, "factual_accuracy": 4.564102564102564, "freshness": 4.423076923076923, "objectivity_tone": 4.153846153846154, "layout_ad_density": 3.9220779220779223, "accountability": 3.9871794871794872, "transparency": 4.32051282051282, "authority": 4.461538461538462, "avg_ge_freq": 1.0, "relative_se_rank": 1.9224627653967212, "normalized_reciprocal_se_rank": 0.12477656633162594, "reciprocal_se_rank": 0.03969145647289068, "percentage_ge_sources_not_in_se_sources": 74.390243902439, "percentage_ge_sources_in_se_sources": 25.609756097560982 }, { "model_name": "deepseek-reasoning-tavily", "num_sources": 62, "num_queries": 19, "num_complete_scores": 58, "unweighted_mean_score": 4.282327586206897, "weighted_total_content_score": 80.1018675721562, "semantic_relevance": 4.189655172413793, "factual_accuracy": 4.603448275862069, "freshness": 4.396551724137931, "objectivity_tone": 4.0344827586206895, "layout_ad_density": 3.7241379310344827, "accountability": 4.103448275862069, "transparency": 4.551724137931035, "authority": 4.655172413793103, "avg_ge_freq": 1.0, "relative_se_rank": 1.4716965265158806, "normalized_reciprocal_se_rank": 0.2609989378665101, "reciprocal_se_rank": 0.07242450206015655, "percentage_ge_sources_not_in_se_sources": 56.45161290322581, "percentage_ge_sources_in_se_sources": 43.54838709677419 }, { "model_name": "deepseek-reasoning-gensee", "num_sources": 81, "num_queries": 19, "num_complete_scores": 74, "unweighted_mean_score": 4.166666666666667, "weighted_total_content_score": 76.71215074723848, "semantic_relevance": 4.04, "factual_accuracy": 4.466666666666667, "freshness": 4.351351351351352, "objectivity_tone": 3.986666666666667, "layout_ad_density": 3.7866666666666666, "accountability": 4.081081081081081, "transparency": 4.22972972972973, "authority": 4.391891891891892, "avg_ge_freq": 1.0, "relative_se_rank": 1.9087325248138731, "normalized_reciprocal_se_rank": 0.14292879978187134, "reciprocal_se_rank": 0.044053279559236075, "percentage_ge_sources_not_in_se_sources": 74.07407407407408, "percentage_ge_sources_in_se_sources": 25.925925925925927 }, { "model_name": "deepseek-chat-tavily", "num_sources": 75, "num_queries": 19, "num_complete_scores": 69, "unweighted_mean_score": 3.963768115942029, "weighted_total_content_score": 72.70175438596492, "semantic_relevance": 3.782608695652174, "factual_accuracy": 4.086956521739131, "freshness": 4.217391304347826, "objectivity_tone": 3.782608695652174, "layout_ad_density": 3.5072463768115942, "accountability": 3.8550724637681157, "transparency": 4.231884057971015, "authority": 4.246376811594203, "avg_ge_freq": 1.0, "relative_se_rank": 1.648791164687895, "normalized_reciprocal_se_rank": 0.19786996874326315, "reciprocal_se_rank": 0.05725516239219187, "percentage_ge_sources_not_in_se_sources": 61.333333333333336, "percentage_ge_sources_in_se_sources": 38.666666666666664 } ], "by_query_type": [ { "model_name": "gpt-5", "query_type": "DebateQA", "num_sources": 85, "num_queries": 20, "num_complete_scores": 79, "unweighted_mean_score": 4.651, "weighted_total_content_score": 90.19195046, "semantic_relevance": null, "factual_accuracy": null, "freshness": null, "objectivity_tone": null, "layout_ad_density": null, "accountability": null, "transparency": null, "authority": null, "avg_ge_freq": 0.6274458823529414, "relative_se_rank": 2.0396269615741502, "normalized_reciprocal_se_rank": 0.024647681783897292, "reciprocal_se_rank": 0.015631360428654928, "percentage_ge_sources_not_in_se_sources": 89.41176470588235, "percentage_ge_sources_in_se_sources": 15.99 }, { "model_name": "gpt-5", "query_type": "HotpotQA", "num_sources": 38, "num_queries": 19, "num_complete_scores": 38, "unweighted_mean_score": 4.414, "weighted_total_content_score": 88.33795014, "semantic_relevance": null, "factual_accuracy": null, "freshness": null, "objectivity_tone": null, "layout_ad_density": null, "accountability": null, "transparency": null, "authority": null, "avg_ge_freq": 0.5526184210526318, "relative_se_rank": 1.6329072044063837, "normalized_reciprocal_se_rank": 0.24588844317711944, "reciprocal_se_rank": 0.06879358221974471, "percentage_ge_sources_not_in_se_sources": 71.05263157894737, "percentage_ge_sources_in_se_sources": 15.99 }, { "model_name": "gpt-5", "query_type": "Pinocchios", "num_sources": 55, "num_queries": 20, "num_complete_scores": 55, "unweighted_mean_score": 4.434, "weighted_total_content_score": 88.99521531, "semantic_relevance": null, "factual_accuracy": null, "freshness": null, "objectivity_tone": null, "layout_ad_density": null, "accountability": null, "transparency": null, "authority": null, "avg_ge_freq": 0.6605890909090913, "relative_se_rank": 1.8746770791895104, "normalized_reciprocal_se_rank": 0.16050373868555687, "reciprocal_se_rank": 0.048276383810364386, "percentage_ge_sources_not_in_se_sources": 74.54545454545455, "percentage_ge_sources_in_se_sources": 15.99 }, { "model_name": "gpt-5", "query_type": "QuoraQuestions", "num_sources": 58, "num_queries": 14, "num_complete_scores": 56, "unweighted_mean_score": 4.491, "weighted_total_content_score": 87.84029038, "semantic_relevance": null, "factual_accuracy": null, "freshness": null, "objectivity_tone": null, "layout_ad_density": null, "accountability": null, "transparency": null, "authority": null, "avg_ge_freq": 0.6379275862068965, "relative_se_rank": 2.1757824170978175, "normalized_reciprocal_se_rank": 0.04254727146439185, "reciprocal_se_rank": 0.019932475424696096, "percentage_ge_sources_not_in_se_sources": 91.37931034482759, "percentage_ge_sources_in_se_sources": 15.99 }, { "model_name": "gpt-5", "query_type": "VA-COS NLQ", "num_sources": 80, "num_queries": 20, "num_complete_scores": 80, "unweighted_mean_score": 4.295, "weighted_total_content_score": 85.55263158, "semantic_relevance": null, "factual_accuracy": null, "freshness": null, "objectivity_tone": null, "layout_ad_density": null, "accountability": null, "transparency": null, "authority": null, "avg_ge_freq": 0.8166749999999997, "relative_se_rank": 2.327785098166267, "normalized_reciprocal_se_rank": 0.0098989898989899, "reciprocal_se_rank": 0.012087378640776695, "percentage_ge_sources_not_in_se_sources": 98.75, "percentage_ge_sources_in_se_sources": 15.99 }, { "model_name": "gpt-4o", "query_type": "DebateQA", "num_sources": 78, "num_queries": 18, "num_complete_scores": 78, "unweighted_mean_score": 4.12, "weighted_total_content_score": 82.60458839, "semantic_relevance": null, "factual_accuracy": null, "freshness": null, "objectivity_tone": null, "layout_ad_density": null, "accountability": null, "transparency": null, "authority": null, "avg_ge_freq": 0.4999897435897439, "relative_se_rank": 1.7141289179822279, "normalized_reciprocal_se_rank": 0.12281919725976252, "reciprocal_se_rank": 0.03922111778814682, "percentage_ge_sources_not_in_se_sources": 71.7948717948718, "percentage_ge_sources_in_se_sources": 27.53 }, { "model_name": "gpt-4o", "query_type": "HotpotQA", "num_sources": 19, "num_queries": 14, "num_complete_scores": 19, "unweighted_mean_score": 3.921, "weighted_total_content_score": 78.28254848, "semantic_relevance": null, "factual_accuracy": null, "freshness": null, "objectivity_tone": null, "layout_ad_density": null, "accountability": null, "transparency": null, "authority": null, "avg_ge_freq": 0.3683947368421053, "relative_se_rank": 2.1738400706504932, "normalized_reciprocal_se_rank": 0.08335991493886231, "reciprocal_se_rank": 0.029739397036280018, "percentage_ge_sources_not_in_se_sources": 89.47368421052632, "percentage_ge_sources_in_se_sources": 27.53 }, { "model_name": "gpt-4o", "query_type": "Pinocchios", "num_sources": 40, "num_queries": 18, "num_complete_scores": 40, "unweighted_mean_score": 4.294, "weighted_total_content_score": 86.60526316, "semantic_relevance": null, "factual_accuracy": null, "freshness": null, "objectivity_tone": null, "layout_ad_density": null, "accountability": null, "transparency": null, "authority": null, "avg_ge_freq": 0.44164499999999995, "relative_se_rank": 1.5765700005314691, "normalized_reciprocal_se_rank": 0.28054167213258124, "reciprocal_se_rank": 0.07712045034253773, "percentage_ge_sources_not_in_se_sources": 62.5, "percentage_ge_sources_in_se_sources": 27.53 }, { "model_name": "gpt-4o", "query_type": "QuoraQuestions", "num_sources": 76, "num_queries": 19, "num_complete_scores": 76, "unweighted_mean_score": 3.965, "weighted_total_content_score": 79.21052632, "semantic_relevance": null, "factual_accuracy": null, "freshness": null, "objectivity_tone": null, "layout_ad_density": null, "accountability": null, "transparency": null, "authority": null, "avg_ge_freq": 0.4298052631578951, "relative_se_rank": 1.7644798178150203, "normalized_reciprocal_se_rank": 0.0996490754594708, "reciprocal_se_rank": 0.03365353997691167, "percentage_ge_sources_not_in_se_sources": 71.05263157894738, "percentage_ge_sources_in_se_sources": 27.53 }, { "model_name": "gpt-4o", "query_type": "VA-COS NLQ", "num_sources": 81, "num_queries": 19, "num_complete_scores": 81, "unweighted_mean_score": 4.032, "weighted_total_content_score": 80.88369071, "semantic_relevance": null, "factual_accuracy": null, "freshness": null, "objectivity_tone": null, "layout_ad_density": null, "accountability": null, "transparency": null, "authority": null, "avg_ge_freq": 0.49793086419753113, "relative_se_rank": 2.029381072384311, "normalized_reciprocal_se_rank": 0.07521622430371759, "reciprocal_se_rank": 0.027782539335116603, "percentage_ge_sources_not_in_se_sources": 83.95061728395062, "percentage_ge_sources_in_se_sources": 27.53 }, { "model_name": "Grok-4.1-Fast", "query_type": "DebateQA", "num_sources": null, "num_queries": null, "num_complete_scores": null, "unweighted_mean_score": 4.5, "weighted_total_content_score": 90.35087719, "semantic_relevance": null, "factual_accuracy": null, "freshness": null, "objectivity_tone": null, "layout_ad_density": null, "accountability": null, "transparency": null, "authority": null, "avg_ge_freq": null, "relative_se_rank": null, "normalized_reciprocal_se_rank": null, "reciprocal_se_rank": null, "percentage_ge_sources_not_in_se_sources": null, "percentage_ge_sources_in_se_sources": null }, { "model_name": "Grok-4.1-Fast", "query_type": "HotpotQA", "num_sources": null, "num_queries": null, "num_complete_scores": null, "unweighted_mean_score": 3.897, "weighted_total_content_score": 78.01169591, "semantic_relevance": null, "factual_accuracy": null, "freshness": null, "objectivity_tone": null, "layout_ad_density": null, "accountability": null, "transparency": null, "authority": null, "avg_ge_freq": null, "relative_se_rank": null, "normalized_reciprocal_se_rank": null, "reciprocal_se_rank": null, "percentage_ge_sources_not_in_se_sources": null, "percentage_ge_sources_in_se_sources": null }, { "model_name": "Grok-4.1-Fast", "query_type": "Pinocchios", "num_sources": null, "num_queries": null, "num_complete_scores": null, "unweighted_mean_score": 4.271, "weighted_total_content_score": 86.00157109, "semantic_relevance": null, "factual_accuracy": null, "freshness": null, "objectivity_tone": null, "layout_ad_density": null, "accountability": null, "transparency": null, "authority": null, "avg_ge_freq": null, "relative_se_rank": null, "normalized_reciprocal_se_rank": null, "reciprocal_se_rank": null, "percentage_ge_sources_not_in_se_sources": null, "percentage_ge_sources_in_se_sources": null }, { "model_name": "Grok-4.1-Fast", "query_type": "QuoraQuestions", "num_sources": null, "num_queries": null, "num_complete_scores": null, "unweighted_mean_score": 4.0, "weighted_total_content_score": 78.82032668, "semantic_relevance": null, "factual_accuracy": null, "freshness": null, "objectivity_tone": null, "layout_ad_density": null, "accountability": null, "transparency": null, "authority": null, "avg_ge_freq": null, "relative_se_rank": null, "normalized_reciprocal_se_rank": null, "reciprocal_se_rank": null, "percentage_ge_sources_not_in_se_sources": null, "percentage_ge_sources_in_se_sources": null }, { "model_name": "Grok-4.1-Fast", "query_type": "VA-COS NLQ", "num_sources": null, "num_queries": null, "num_complete_scores": null, "unweighted_mean_score": 4.205, "weighted_total_content_score": 84.48621554, "semantic_relevance": null, "factual_accuracy": null, "freshness": null, "objectivity_tone": null, "layout_ad_density": null, "accountability": null, "transparency": null, "authority": null, "avg_ge_freq": null, "relative_se_rank": null, "normalized_reciprocal_se_rank": null, "reciprocal_se_rank": null, "percentage_ge_sources_not_in_se_sources": null, "percentage_ge_sources_in_se_sources": null }, { "model_name": "Gemini-3-Pro-Preview", "query_type": "DebateQA", "num_sources": 100, "num_queries": 20, "num_complete_scores": 96, "unweighted_mean_score": 4.173469388, "weighted_total_content_score": 81.42105263, "semantic_relevance": null, "factual_accuracy": null, "freshness": null, "objectivity_tone": null, "layout_ad_density": null, "accountability": null, "transparency": null, "authority": null, "avg_ge_freq": 0.4833250000000001, "relative_se_rank": 1.7292861678201157, "normalized_reciprocal_se_rank": 0.10523315112286376, "reciprocal_se_rank": 0.03499534456593085, "percentage_ge_sources_not_in_se_sources": 72.0, "percentage_ge_sources_in_se_sources": 20.95 }, { "model_name": "Gemini-3-Pro-Preview", "query_type": "HotpotQA", "num_sources": 83, "num_queries": 20, "num_complete_scores": 77, "unweighted_mean_score": 3.720848389, "weighted_total_content_score": 72.63157895, "semantic_relevance": null, "factual_accuracy": null, "freshness": null, "objectivity_tone": null, "layout_ad_density": null, "accountability": null, "transparency": null, "authority": null, "avg_ge_freq": 0.57830843373494, "relative_se_rank": 1.9786482310103346, "normalized_reciprocal_se_rank": 0.056782624848369385, "reciprocal_se_rank": 0.02335310645628293, "percentage_ge_sources_not_in_se_sources": 83.13253012048195, "percentage_ge_sources_in_se_sources": 20.95 }, { "model_name": "Gemini-3-Pro-Preview", "query_type": "Pinocchios", "num_sources": 86, "num_queries": 20, "num_complete_scores": 84, "unweighted_mean_score": 4.159148427, "weighted_total_content_score": 81.87270502, "semantic_relevance": null, "factual_accuracy": null, "freshness": null, "objectivity_tone": null, "layout_ad_density": null, "accountability": null, "transparency": null, "authority": null, "avg_ge_freq": 0.5736325581395352, "relative_se_rank": 1.941468415075042, "normalized_reciprocal_se_rank": 0.15368587669053982, "reciprocal_se_rank": 0.04663811114651323, "percentage_ge_sources_not_in_se_sources": 73.25581395348837, "percentage_ge_sources_in_se_sources": 20.95 }, { "model_name": "Gemini-3-Pro-Preview", "query_type": "QuoraQuestions", "num_sources": 81, "num_queries": 17, "num_complete_scores": 76, "unweighted_mean_score": 3.750843732, "weighted_total_content_score": 72.37166992, "semantic_relevance": null, "factual_accuracy": null, "freshness": null, "objectivity_tone": null, "layout_ad_density": null, "accountability": null, "transparency": null, "authority": null, "avg_ge_freq": 0.5061604938271607, "relative_se_rank": 1.7841348598268654, "normalized_reciprocal_se_rank": 0.11912889330801175, "reciprocal_se_rank": 0.038334369993915436, "percentage_ge_sources_not_in_se_sources": 71.60493827160494, "percentage_ge_sources_in_se_sources": 20.95 }, { "model_name": "Gemini-3-Pro-Preview", "query_type": "VA-COS NLQ", "num_sources": 94, "num_queries": 20, "num_complete_scores": 94, "unweighted_mean_score": 4.059840426, "weighted_total_content_score": 81.3549832, "semantic_relevance": null, "factual_accuracy": null, "freshness": null, "objectivity_tone": null, "layout_ad_density": null, "accountability": null, "transparency": null, "authority": null, "avg_ge_freq": 0.4148723404255325, "relative_se_rank": 2.3167240998153185, "normalized_reciprocal_se_rank": 0.007184894289987778, "reciprocal_se_rank": 0.01143520518133201, "percentage_ge_sources_not_in_se_sources": 96.80851063829788, "percentage_ge_sources_in_se_sources": 20.95 }, { "model_name": "Gemini-3-Flash-Preview", "query_type": "DebateQA", "num_sources": 100, "num_queries": 20, "num_complete_scores": 95, "unweighted_mean_score": 4.147, "weighted_total_content_score": 81.05263158, "semantic_relevance": null, "factual_accuracy": null, "freshness": null, "objectivity_tone": null, "layout_ad_density": null, "accountability": null, "transparency": null, "authority": null, "avg_ge_freq": 0.4899890000000004, "relative_se_rank": 1.6757336210963478, "normalized_reciprocal_se_rank": 0.12549266202008144, "reciprocal_se_rank": 0.039863528009679766, "percentage_ge_sources_not_in_se_sources": 71.0, "percentage_ge_sources_in_se_sources": 23.5 }, { "model_name": "Gemini-3-Flash-Preview", "query_type": "HotpotQA", "num_sources": 85, "num_queries": 20, "num_complete_scores": 81, "unweighted_mean_score": 3.68, "weighted_total_content_score": 72.28482972, "semantic_relevance": null, "factual_accuracy": null, "freshness": null, "objectivity_tone": null, "layout_ad_density": null, "accountability": null, "transparency": null, "authority": null, "avg_ge_freq": 0.5999917647058826, "relative_se_rank": 1.9004372674862968, "normalized_reciprocal_se_rank": 0.06427878374204764, "reciprocal_se_rank": 0.0251543679380163, "percentage_ge_sources_not_in_se_sources": 78.82352941176471, "percentage_ge_sources_in_se_sources": 23.5 }, { "model_name": "Gemini-3-Flash-Preview", "query_type": "Pinocchios", "num_sources": 89, "num_queries": 20, "num_complete_scores": 88, "unweighted_mean_score": 4.074, "weighted_total_content_score": 80.40212892, "semantic_relevance": null, "factual_accuracy": null, "freshness": null, "objectivity_tone": null, "layout_ad_density": null, "accountability": null, "transparency": null, "authority": null, "avg_ge_freq": 0.5842617977528091, "relative_se_rank": 2.0481831598330293, "normalized_reciprocal_se_rank": 0.14693960626866004, "reciprocal_se_rank": 0.04501704131212949, "percentage_ge_sources_not_in_se_sources": 73.03370786516854, "percentage_ge_sources_in_se_sources": 23.5 }, { "model_name": "Gemini-3-Flash-Preview", "query_type": "QuoraQuestions", "num_sources": 90, "num_queries": 19, "num_complete_scores": 86, "unweighted_mean_score": 3.822, "weighted_total_content_score": 74.25730994, "semantic_relevance": null, "factual_accuracy": null, "freshness": null, "objectivity_tone": null, "layout_ad_density": null, "accountability": null, "transparency": null, "authority": null, "avg_ge_freq": 0.49258000000000024, "relative_se_rank": 1.7659524786645306, "normalized_reciprocal_se_rank": 0.14047144175948556, "reciprocal_se_rank": 0.04346279789851716, "percentage_ge_sources_not_in_se_sources": 72.22222222222223, "percentage_ge_sources_in_se_sources": 23.5 }, { "model_name": "Gemini-3-Flash-Preview", "query_type": "VA-COS NLQ", "num_sources": 92, "num_queries": 20, "num_complete_scores": 88, "unweighted_mean_score": 4.022, "weighted_total_content_score": 78.72997712, "semantic_relevance": null, "factual_accuracy": null, "freshness": null, "objectivity_tone": null, "layout_ad_density": null, "accountability": null, "transparency": null, "authority": null, "avg_ge_freq": 0.45288152173913093, "relative_se_rank": 2.168925621074675, "normalized_reciprocal_se_rank": 0.032034724656595535, "reciprocal_se_rank": 0.017406402283987762, "percentage_ge_sources_not_in_se_sources": 89.1304347826087, "percentage_ge_sources_in_se_sources": 23.5 }, { "model_name": "Gemini-2.5-Flash-Preview", "query_type": "DebateQA", "num_sources": 100, "num_queries": 20, "num_complete_scores": 100, "unweighted_mean_score": 4.133, "weighted_total_content_score": 82.126, "semantic_relevance": null, "factual_accuracy": null, "freshness": null, "objectivity_tone": null, "layout_ad_density": null, "accountability": null, "transparency": null, "authority": null, "avg_ge_freq": null, "relative_se_rank": null, "normalized_reciprocal_se_rank": null, "reciprocal_se_rank": null, "percentage_ge_sources_not_in_se_sources": null, "percentage_ge_sources_in_se_sources": 31.96 }, { "model_name": "Gemini-2.5-Flash-Preview", "query_type": "HotpotQA", "num_sources": 84, "num_queries": 20, "num_complete_scores": 84, "unweighted_mean_score": 3.72, "weighted_total_content_score": 73.86, "semantic_relevance": null, "factual_accuracy": null, "freshness": null, "objectivity_tone": null, "layout_ad_density": null, "accountability": null, "transparency": null, "authority": null, "avg_ge_freq": null, "relative_se_rank": null, "normalized_reciprocal_se_rank": null, "reciprocal_se_rank": null, "percentage_ge_sources_not_in_se_sources": null, "percentage_ge_sources_in_se_sources": 31.96 }, { "model_name": "Gemini-2.5-Flash-Preview", "query_type": "Pinocchios", "num_sources": 86, "num_queries": 20, "num_complete_scores": 85, "unweighted_mean_score": 3.931, "weighted_total_content_score": 77.6744186, "semantic_relevance": null, "factual_accuracy": null, "freshness": null, "objectivity_tone": null, "layout_ad_density": null, "accountability": null, "transparency": null, "authority": null, "avg_ge_freq": null, "relative_se_rank": null, "normalized_reciprocal_se_rank": null, "reciprocal_se_rank": null, "percentage_ge_sources_not_in_se_sources": null, "percentage_ge_sources_in_se_sources": 31.96 }, { "model_name": "Gemini-2.5-Flash-Preview", "query_type": "QuoraQuestions", "num_sources": 83, "num_queries": 18, "num_complete_scores": 81, "unweighted_mean_score": 3.79, "weighted_total_content_score": 74.40710209, "semantic_relevance": null, "factual_accuracy": null, "freshness": null, "objectivity_tone": null, "layout_ad_density": null, "accountability": null, "transparency": null, "authority": null, "avg_ge_freq": null, "relative_se_rank": null, "normalized_reciprocal_se_rank": null, "reciprocal_se_rank": null, "percentage_ge_sources_not_in_se_sources": null, "percentage_ge_sources_in_se_sources": 31.96 }, { "model_name": "Gemini-2.5-Flash-Preview", "query_type": "VA-COS NLQ", "num_sources": 91, "num_queries": 20, "num_complete_scores": 91, "unweighted_mean_score": 4.047, "weighted_total_content_score": 81.01792944, "semantic_relevance": null, "factual_accuracy": null, "freshness": null, "objectivity_tone": null, "layout_ad_density": null, "accountability": null, "transparency": null, "authority": null, "avg_ge_freq": null, "relative_se_rank": null, "normalized_reciprocal_se_rank": null, "reciprocal_se_rank": null, "percentage_ge_sources_not_in_se_sources": null, "percentage_ge_sources_in_se_sources": 31.96 }, { "model_name": "claude", "query_type": "DebateQA", "num_sources": 70, "num_queries": 20, "num_complete_scores": 65, "unweighted_mean_score": 4.281, "weighted_total_content_score": 79.39849624, "semantic_relevance": null, "factual_accuracy": null, "freshness": null, "objectivity_tone": null, "layout_ad_density": null, "accountability": null, "transparency": null, "authority": null, "avg_ge_freq": 0.8523828571428572, "relative_se_rank": 1.3421362086210757, "normalized_reciprocal_se_rank": 0.21041652104583275, "reciprocal_se_rank": 0.06026998928043071, "percentage_ge_sources_not_in_se_sources": 54.28571428571426, "percentage_ge_sources_in_se_sources": 37.1 }, { "model_name": "claude", "query_type": "HotpotQA", "num_sources": 10, "num_queries": 7, "num_complete_scores": 10, "unweighted_mean_score": 3.925, "weighted_total_content_score": 79.15789474, "semantic_relevance": null, "factual_accuracy": null, "freshness": null, "objectivity_tone": null, "layout_ad_density": null, "accountability": null, "transparency": null, "authority": null, "avg_ge_freq": 0.8333400000000001, "relative_se_rank": 1.8379612104849017, "normalized_reciprocal_se_rank": 0.1327922077922078, "reciprocal_se_rank": 0.04161754507628294, "percentage_ge_sources_not_in_se_sources": 70.00000000000001, "percentage_ge_sources_in_se_sources": 37.1 }, { "model_name": "claude", "query_type": "Pinocchios", "num_sources": 39, "num_queries": 20, "num_complete_scores": 39, "unweighted_mean_score": 4.263, "weighted_total_content_score": 85.56005398, "semantic_relevance": null, "factual_accuracy": null, "freshness": null, "objectivity_tone": null, "layout_ad_density": null, "accountability": null, "transparency": null, "authority": null, "avg_ge_freq": 0.8205102564102565, "relative_se_rank": 1.4724542616408207, "normalized_reciprocal_se_rank": 0.28256007847697834, "reciprocal_se_rank": 0.07760545575053605, "percentage_ge_sources_not_in_se_sources": 53.84615384615383, "percentage_ge_sources_in_se_sources": 37.1 }, { "model_name": "claude", "query_type": "QuoraQuestions", "num_sources": 59, "num_queries": 18, "num_complete_scores": 58, "unweighted_mean_score": 3.802, "weighted_total_content_score": 74.39785905, "semantic_relevance": null, "factual_accuracy": null, "freshness": null, "objectivity_tone": null, "layout_ad_density": null, "accountability": null, "transparency": null, "authority": null, "avg_ge_freq": 0.8079084745762712, "relative_se_rank": 1.2724077582318005, "normalized_reciprocal_se_rank": 0.18350554762304847, "reciprocal_se_rank": 0.0538035175113636, "percentage_ge_sources_not_in_se_sources": 49.15254237288135, "percentage_ge_sources_in_se_sources": 37.1 }, { "model_name": "claude", "query_type": "VA-COS NLQ", "num_sources": 81, "num_queries": 19, "num_complete_scores": 81, "unweighted_mean_score": 3.989, "weighted_total_content_score": 80.10396361, "semantic_relevance": null, "factual_accuracy": null, "freshness": null, "objectivity_tone": null, "layout_ad_density": null, "accountability": null, "transparency": null, "authority": null, "avg_ge_freq": 0.7819024691358022, "relative_se_rank": 2.0549062284325044, "normalized_reciprocal_se_rank": 0.056933641949831956, "reciprocal_se_rank": 0.02338939454619748, "percentage_ge_sources_not_in_se_sources": 83.95061728395062, "percentage_ge_sources_in_se_sources": 37.1 }, { "model_name": "Perplexity-Sonar-Pro", "query_type": "DebateQA", "num_sources": 82, "num_queries": 20, "num_complete_scores": 79, "unweighted_mean_score": 4.235, "weighted_total_content_score": 82.51604621, "semantic_relevance": null, "factual_accuracy": null, "freshness": null, "objectivity_tone": null, "layout_ad_density": null, "accountability": null, "transparency": null, "authority": null, "avg_ge_freq": 0.7804853658536585, "relative_se_rank": 1.185618672325219, "normalized_reciprocal_se_rank": 0.2274442114543135, "reciprocal_se_rank": 0.0643615944999443, "percentage_ge_sources_not_in_se_sources": 46.34146341463415, "percentage_ge_sources_in_se_sources": 40.0 }, { "model_name": "Perplexity-Sonar-Pro", "query_type": "HotpotQA", "num_sources": 79, "num_queries": 20, "num_complete_scores": 78, "unweighted_mean_score": 3.591, "weighted_total_content_score": 71.13924051, "semantic_relevance": null, "factual_accuracy": null, "freshness": null, "objectivity_tone": null, "layout_ad_density": null, "accountability": null, "transparency": null, "authority": null, "avg_ge_freq": 0.8143506329113921, "relative_se_rank": 1.61782985019127, "normalized_reciprocal_se_rank": 0.13063939371395492, "reciprocal_se_rank": 0.04110024266427558, "percentage_ge_sources_not_in_se_sources": 63.29113924050633, "percentage_ge_sources_in_se_sources": 40.0 }, { "model_name": "Perplexity-Sonar-Pro", "query_type": "Pinocchios", "num_sources": 72, "num_queries": 19, "num_complete_scores": 72, "unweighted_mean_score": 4.087, "weighted_total_content_score": 80.99415205, "semantic_relevance": null, "factual_accuracy": null, "freshness": null, "objectivity_tone": null, "layout_ad_density": null, "accountability": null, "transparency": null, "authority": null, "avg_ge_freq": 0.8148138888888888, "relative_se_rank": 1.9321450595878684, "normalized_reciprocal_se_rank": 0.20414708640646176, "reciprocal_se_rank": 0.05876349891805757, "percentage_ge_sources_not_in_se_sources": 62.5, "percentage_ge_sources_in_se_sources": 40.0 }, { "model_name": "Perplexity-Sonar-Pro", "query_type": "QuoraQuestions", "num_sources": 82, "num_queries": 19, "num_complete_scores": 82, "unweighted_mean_score": 3.819, "weighted_total_content_score": 75.69961489, "semantic_relevance": null, "factual_accuracy": null, "freshness": null, "objectivity_tone": null, "layout_ad_density": null, "accountability": null, "transparency": null, "authority": null, "avg_ge_freq": 0.8252060975609754, "relative_se_rank": 1.3424344412739029, "normalized_reciprocal_se_rank": 0.2024394067077523, "reciprocal_se_rank": 0.05835315840793079, "percentage_ge_sources_not_in_se_sources": 52.4390243902439, "percentage_ge_sources_in_se_sources": 40.0 }, { "model_name": "Perplexity-Sonar-Pro", "query_type": "VA-COS NLQ", "num_sources": 64, "num_queries": 20, "num_complete_scores": 62, "unweighted_mean_score": 4.048, "weighted_total_content_score": 79.39144737, "semantic_relevance": null, "factual_accuracy": null, "freshness": null, "objectivity_tone": null, "layout_ad_density": null, "accountability": null, "transparency": null, "authority": null, "avg_ge_freq": 0.8385406249999999, "relative_se_rank": 2.067510788296202, "normalized_reciprocal_se_rank": 0.051796852838519515, "reciprocal_se_rank": 0.02215506900731415, "percentage_ge_sources_not_in_se_sources": 85.93750000000001, "percentage_ge_sources_in_se_sources": 40.0 }, { "model_name": "google-search", "query_type": "DebateQA", "num_sources": 89, "num_queries": 20, "num_complete_scores": 89, "unweighted_mean_score": 4.263, "weighted_total_content_score": 85.27498522, "semantic_relevance": null, "factual_accuracy": null, "freshness": null, "objectivity_tone": null, "layout_ad_density": null, "accountability": null, "transparency": null, "authority": null, "avg_ge_freq": null, "relative_se_rank": null, "normalized_reciprocal_se_rank": null, "reciprocal_se_rank": null, "percentage_ge_sources_not_in_se_sources": null, "percentage_ge_sources_in_se_sources": null }, { "model_name": "google-search", "query_type": "HotpotQA", "num_sources": 70, "num_queries": 20, "num_complete_scores": 69, "unweighted_mean_score": 3.813, "weighted_total_content_score": 75.33834586, "semantic_relevance": null, "factual_accuracy": null, "freshness": null, "objectivity_tone": null, "layout_ad_density": null, "accountability": null, "transparency": null, "authority": null, "avg_ge_freq": null, "relative_se_rank": null, "normalized_reciprocal_se_rank": null, "reciprocal_se_rank": null, "percentage_ge_sources_not_in_se_sources": null, "percentage_ge_sources_in_se_sources": null }, { "model_name": "google-search", "query_type": "Pinocchios", "num_sources": 93, "num_queries": 20, "num_complete_scores": 91, "unweighted_mean_score": 4.176, "weighted_total_content_score": 83.53140917, "semantic_relevance": null, "factual_accuracy": null, "freshness": null, "objectivity_tone": null, "layout_ad_density": null, "accountability": null, "transparency": null, "authority": null, "avg_ge_freq": null, "relative_se_rank": null, "normalized_reciprocal_se_rank": null, "reciprocal_se_rank": null, "percentage_ge_sources_not_in_se_sources": null, "percentage_ge_sources_in_se_sources": null }, { "model_name": "google-search", "query_type": "QuoraQuestions", "num_sources": 80, "num_queries": 19, "num_complete_scores": 80, "unweighted_mean_score": 3.809, "weighted_total_content_score": 76.22368421, "semantic_relevance": null, "factual_accuracy": null, "freshness": null, "objectivity_tone": null, "layout_ad_density": null, "accountability": null, "transparency": null, "authority": null, "avg_ge_freq": null, "relative_se_rank": null, "normalized_reciprocal_se_rank": null, "reciprocal_se_rank": null, "percentage_ge_sources_not_in_se_sources": null, "percentage_ge_sources_in_se_sources": null }, { "model_name": "google-search", "query_type": "VA-COS NLQ", "num_sources": 74, "num_queries": 20, "num_complete_scores": 74, "unweighted_mean_score": 3.846, "weighted_total_content_score": 77.05547653, "semantic_relevance": null, "factual_accuracy": null, "freshness": null, "objectivity_tone": null, "layout_ad_density": null, "accountability": null, "transparency": null, "authority": null, "avg_ge_freq": null, "relative_se_rank": null, "normalized_reciprocal_se_rank": null, "reciprocal_se_rank": null, "percentage_ge_sources_not_in_se_sources": null, "percentage_ge_sources_in_se_sources": null }, { "model_name": "exa", "query_type": "DebateQA", "num_sources": 89, "num_queries": 20, "num_complete_scores": 87, "unweighted_mean_score": 4.327, "weighted_total_content_score": 86.44589001, "semantic_relevance": null, "factual_accuracy": null, "freshness": null, "objectivity_tone": null, "layout_ad_density": null, "accountability": null, "transparency": null, "authority": null, "avg_ge_freq": 1.0, "relative_se_rank": 1.010284064126628, "normalized_reciprocal_se_rank": 0.28063309301928224, "reciprocal_se_rank": 0.07714241798278879, "percentage_ge_sources_not_in_se_sources": 38.20224719101124, "percentage_ge_sources_in_se_sources": 44.66 }, { "model_name": "exa", "query_type": "HotpotQA", "num_sources": 83, "num_queries": 20, "num_complete_scores": 83, "unweighted_mean_score": 3.572, "weighted_total_content_score": 70.83069119, "semantic_relevance": null, "factual_accuracy": null, "freshness": null, "objectivity_tone": null, "layout_ad_density": null, "accountability": null, "transparency": null, "authority": null, "avg_ge_freq": 1.0, "relative_se_rank": 1.371670293189712, "normalized_reciprocal_se_rank": 0.22448376867351463, "reciprocal_se_rank": 0.06365022596766494, "percentage_ge_sources_not_in_se_sources": 54.21686746987952, "percentage_ge_sources_in_se_sources": 44.66 }, { "model_name": "exa", "query_type": "Pinocchios", "num_sources": 87, "num_queries": 20, "num_complete_scores": 86, "unweighted_mean_score": 4.199, "weighted_total_content_score": 83.59346642, "semantic_relevance": null, "factual_accuracy": null, "freshness": null, "objectivity_tone": null, "layout_ad_density": null, "accountability": null, "transparency": null, "authority": null, "avg_ge_freq": 1.0, "relative_se_rank": 1.80952333519013, "normalized_reciprocal_se_rank": 0.2313966587355651, "reciprocal_se_rank": 0.06531133304568201, "percentage_ge_sources_not_in_se_sources": 62.06896551724138, "percentage_ge_sources_in_se_sources": 44.66 }, { "model_name": "exa", "query_type": "QuoraQuestions", "num_sources": 85, "num_queries": 19, "num_complete_scores": 85, "unweighted_mean_score": 3.871, "weighted_total_content_score": 76.96594427, "semantic_relevance": null, "factual_accuracy": null, "freshness": null, "objectivity_tone": null, "layout_ad_density": null, "accountability": null, "transparency": null, "authority": null, "avg_ge_freq": 1.0, "relative_se_rank": 1.329000189685051, "normalized_reciprocal_se_rank": 0.1994493307755928, "reciprocal_se_rank": 0.05763466928830994, "percentage_ge_sources_not_in_se_sources": 50.588235294117645, "percentage_ge_sources_in_se_sources": 44.66 }, { "model_name": "exa", "query_type": "VA-COS NLQ", "num_sources": 81, "num_queries": 20, "num_complete_scores": 80, "unweighted_mean_score": 4.135, "weighted_total_content_score": 81.97530864, "semantic_relevance": null, "factual_accuracy": null, "freshness": null, "objectivity_tone": null, "layout_ad_density": null, "accountability": null, "transparency": null, "authority": null, "avg_ge_freq": 1.0, "relative_se_rank": 2.003012313759094, "normalized_reciprocal_se_rank": 0.07692643713869617, "reciprocal_se_rank": 0.028193488535754666, "percentage_ge_sources_not_in_se_sources": 82.71604938271606, "percentage_ge_sources_in_se_sources": 44.66 }, { "model_name": "tavily", "query_type": "DebateQA", "num_sources": 76, "num_queries": 20, "num_complete_scores": 76, "unweighted_mean_score": 4.184, "weighted_total_content_score": 83.55955679, "semantic_relevance": null, "factual_accuracy": null, "freshness": null, "objectivity_tone": null, "layout_ad_density": null, "accountability": null, "transparency": null, "authority": null, "avg_ge_freq": 1.0, "relative_se_rank": 0.9508661126222719, "normalized_reciprocal_se_rank": 0.3434220529106368, "reciprocal_se_rank": 0.09223005640328419, "percentage_ge_sources_not_in_se_sources": 36.8421052631579, "percentage_ge_sources_in_se_sources": 55.45 }, { "model_name": "tavily", "query_type": "HotpotQA", "num_sources": 77, "num_queries": 18, "num_complete_scores": 73, "unweighted_mean_score": 3.578, "weighted_total_content_score": 68.33902939, "semantic_relevance": null, "factual_accuracy": null, "freshness": null, "objectivity_tone": null, "layout_ad_density": null, "accountability": null, "transparency": null, "authority": null, "avg_ge_freq": 1.0, "relative_se_rank": 1.150415973913549, "normalized_reciprocal_se_rank": 0.27249775517340985, "reciprocal_se_rank": 0.07518756738390195, "percentage_ge_sources_not_in_se_sources": 41.55844155844156, "percentage_ge_sources_in_se_sources": 55.45 }, { "model_name": "tavily", "query_type": "Pinocchios", "num_sources": 81, "num_queries": 20, "num_complete_scores": 79, "unweighted_mean_score": 4.192, "weighted_total_content_score": 82.27420403, "semantic_relevance": null, "factual_accuracy": null, "freshness": null, "objectivity_tone": null, "layout_ad_density": null, "accountability": null, "transparency": null, "authority": null, "avg_ge_freq": 0.9958851851851852, "relative_se_rank": 1.4422121160476211, "normalized_reciprocal_se_rank": 0.3128941728047197, "reciprocal_se_rank": 0.08489447356229925, "percentage_ge_sources_not_in_se_sources": 45.67901234567901, "percentage_ge_sources_in_se_sources": 55.45 }, { "model_name": "tavily", "query_type": "QuoraQuestions", "num_sources": 78, "num_queries": 19, "num_complete_scores": 78, "unweighted_mean_score": 3.712, "weighted_total_content_score": 73.67071525, "semantic_relevance": null, "factual_accuracy": null, "freshness": null, "objectivity_tone": null, "layout_ad_density": null, "accountability": null, "transparency": null, "authority": null, "avg_ge_freq": 1.0, "relative_se_rank": 0.9222314853068678, "normalized_reciprocal_se_rank": 0.31851954838074426, "reciprocal_se_rank": 0.08624620215945074, "percentage_ge_sources_not_in_se_sources": 32.05128205128205, "percentage_ge_sources_in_se_sources": 55.45 }, { "model_name": "tavily", "query_type": "VA-COS NLQ", "num_sources": 83, "num_queries": 20, "num_complete_scores": 83, "unweighted_mean_score": 3.955, "weighted_total_content_score": 78.80786303, "semantic_relevance": null, "factual_accuracy": null, "freshness": null, "objectivity_tone": null, "layout_ad_density": null, "accountability": null, "transparency": null, "authority": null, "avg_ge_freq": 0.9397590361445783, "relative_se_rank": 1.7133062132347119, "normalized_reciprocal_se_rank": 0.1335959024960005, "reciprocal_se_rank": 0.04181066589102925, "percentage_ge_sources_not_in_se_sources": 68.67469879518072, "percentage_ge_sources_in_se_sources": 55.45 }, { "model_name": "gensee", "query_type": "DebateQA", "num_sources": 89, "num_queries": 20, "num_complete_scores": 87, "unweighted_mean_score": 4.278, "weighted_total_content_score": 85.36960378, "semantic_relevance": null, "factual_accuracy": null, "freshness": null, "objectivity_tone": null, "layout_ad_density": null, "accountability": null, "transparency": null, "authority": null, "avg_ge_freq": 0.5730269662921349, "relative_se_rank": 1.455758534903284, "normalized_reciprocal_se_rank": 0.17012042957025347, "reciprocal_se_rank": 0.05058719060061921, "percentage_ge_sources_not_in_se_sources": 58.42696629213483, "percentage_ge_sources_in_se_sources": 28.4 }, { "model_name": "gensee", "query_type": "HotpotQA", "num_sources": 45, "num_queries": 16, "num_complete_scores": 45, "unweighted_mean_score": 3.786, "weighted_total_content_score": 76.67836257, "semantic_relevance": null, "factual_accuracy": null, "freshness": null, "objectivity_tone": null, "layout_ad_density": null, "accountability": null, "transparency": null, "authority": null, "avg_ge_freq": 0.4888711111111113, "relative_se_rank": 1.9318191821883404, "normalized_reciprocal_se_rank": 0.06456158601930041, "reciprocal_se_rank": 0.025222322854152286, "percentage_ge_sources_not_in_se_sources": 84.44444444444443, "percentage_ge_sources_in_se_sources": 28.4 }, { "model_name": "gensee", "query_type": "Pinocchios", "num_sources": 77, "num_queries": 20, "num_complete_scores": 75, "unweighted_mean_score": 4.283, "weighted_total_content_score": 85.94668489, "semantic_relevance": null, "factual_accuracy": null, "freshness": null, "objectivity_tone": null, "layout_ad_density": null, "accountability": null, "transparency": null, "authority": null, "avg_ge_freq": 0.4934883116883122, "relative_se_rank": 1.7999418903256372, "normalized_reciprocal_se_rank": 0.19747215967140244, "reciprocal_se_rank": 0.05715957234822537, "percentage_ge_sources_not_in_se_sources": 70.12987012987011, "percentage_ge_sources_in_se_sources": 28.4 }, { "model_name": "gensee", "query_type": "QuoraQuestions", "num_sources": 83, "num_queries": 18, "num_complete_scores": 82, "unweighted_mean_score": 3.924, "weighted_total_content_score": 77.94546607, "semantic_relevance": null, "factual_accuracy": null, "freshness": null, "objectivity_tone": null, "layout_ad_density": null, "accountability": null, "transparency": null, "authority": null, "avg_ge_freq": 0.5542120481927711, "relative_se_rank": 1.6111832468899239, "normalized_reciprocal_se_rank": 0.12268532386073742, "reciprocal_se_rank": 0.03918894918012865, "percentage_ge_sources_not_in_se_sources": 62.650602409638545, "percentage_ge_sources_in_se_sources": 28.4 }, { "model_name": "gensee", "query_type": "VA-COS NLQ", "num_sources": 88, "num_queries": 19, "num_complete_scores": 88, "unweighted_mean_score": 3.933, "weighted_total_content_score": 79.43779904, "semantic_relevance": null, "factual_accuracy": null, "freshness": null, "objectivity_tone": null, "layout_ad_density": null, "accountability": null, "transparency": null, "authority": null, "avg_ge_freq": 0.5340818181818183, "relative_se_rank": 2.115456142814551, "normalized_reciprocal_se_rank": 0.07056832757590334, "reciprocal_se_rank": 0.02666569036411269, "percentage_ge_sources_not_in_se_sources": 87.5, "percentage_ge_sources_in_se_sources": 28.4 } ], "queries": [ { "model_name": "claude", "query_id": 38, "query_type": "DebateQA", "num_sources": 2, "unweighted_mean_score": 5.0, "weighted_total_content_score": 100.0, "semantic_relevance": 5.0, "factual_accuracy": 5.0, "freshness": 5.0, "objectivity_tone": 5.0, "layout_ad_density": 5.0, "accountability": 5.0, "transparency": 5.0, "authority": 5.0, "avg_ge_freq": 0.66665, "relative_se_rank": 2.1739130434782608, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "gensee", "query_id": 24, "query_type": "DebateQA", "num_sources": 1, "unweighted_mean_score": 5.0, "weighted_total_content_score": 100.0, "semantic_relevance": 5.0, "factual_accuracy": 5.0, "freshness": 5.0, "objectivity_tone": 5.0, "layout_ad_density": 5.0, "accountability": 5.0, "transparency": 5.0, "authority": 5.0, "avg_ge_freq": 1.0, "relative_se_rank": 0.2127659574468085, "normalized_reciprocal_se_rank": 0.27972027972027974, "reciprocal_se_rank": 0.07692307692307693, "percentage_ge_sources_not_in_se_sources": 0.0, "percentage_ge_sources_in_se_sources": 100.0 }, { "model_name": "gpt-5", "query_id": 25, "query_type": "DebateQA", "num_sources": 2, "unweighted_mean_score": 5.0, "weighted_total_content_score": 100.0, "semantic_relevance": 5.0, "factual_accuracy": 5.0, "freshness": 5.0, "objectivity_tone": 5.0, "layout_ad_density": 5.0, "accountability": 5.0, "transparency": 5.0, "authority": 5.0, "avg_ge_freq": 0.66665, "relative_se_rank": 2.4390243902439024, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "gpt-5", "query_id": 50, "query_type": "HotpotQA", "num_sources": 2, "unweighted_mean_score": 5.0, "weighted_total_content_score": 100.0, "semantic_relevance": 5.0, "factual_accuracy": 5.0, "freshness": 5.0, "objectivity_tone": 5.0, "layout_ad_density": 5.0, "accountability": 5.0, "transparency": 5.0, "authority": 5.0, "avg_ge_freq": 0.8332999999999999, "relative_se_rank": 0.031914893617021274, "normalized_reciprocal_se_rank": 0.8959595959595961, "reciprocal_se_rank": 0.225, "percentage_ge_sources_not_in_se_sources": 0.0, "percentage_ge_sources_in_se_sources": 100.0 }, { "model_name": "gpt-5", "query_id": 24, "query_type": "DebateQA", "num_sources": 5, "unweighted_mean_score": 4.95, "weighted_total_content_score": 99.15789473684211, "semantic_relevance": 5.0, "factual_accuracy": 5.0, "freshness": 4.6, "objectivity_tone": 5.0, "layout_ad_density": 5.0, "accountability": 5.0, "transparency": 5.0, "authority": 5.0, "avg_ge_freq": 0.9333200000000001, "relative_se_rank": 2.127659574468085, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "gpt-5", "query_id": 70, "query_type": "Pinocchios", "num_sources": 5, "unweighted_mean_score": 4.95, "weighted_total_content_score": 99.15789473684211, "semantic_relevance": 5.0, "factual_accuracy": 5.0, "freshness": 5.0, "objectivity_tone": 5.0, "layout_ad_density": 4.8, "accountability": 4.8, "transparency": 5.0, "authority": 5.0, "avg_ge_freq": 0.8666600000000001, "relative_se_rank": 1.3041666666666667, "normalized_reciprocal_se_rank": 0.1721019721019721, "reciprocal_se_rank": 0.05106333795654183, "percentage_ge_sources_not_in_se_sources": 60.0, "percentage_ge_sources_in_se_sources": 40.0 }, { "model_name": "tavily", "query_id": 50, "query_type": "HotpotQA", "num_sources": 4, "unweighted_mean_score": 4.9375, "weighted_total_content_score": 98.94736842105263, "semantic_relevance": 5.0, "factual_accuracy": 5.0, "freshness": 5.0, "objectivity_tone": 5.0, "layout_ad_density": 4.75, "accountability": 5.0, "transparency": 4.75, "authority": 5.0, "avg_ge_freq": 1.0, "relative_se_rank": 0.5744680851063829, "normalized_reciprocal_se_rank": 0.567929292929293, "reciprocal_se_rank": 0.1461771844660194, "percentage_ge_sources_not_in_se_sources": 25.0, "percentage_ge_sources_in_se_sources": 75.0 }, { "model_name": "gpt-5", "query_id": 38, "query_type": "DebateQA", "num_sources": 3, "unweighted_mean_score": 4.916666666666667, "weighted_total_content_score": 98.59649122807018, "semantic_relevance": 5.0, "factual_accuracy": 5.0, "freshness": 5.0, "objectivity_tone": 5.0, "layout_ad_density": 4.666666666666667, "accountability": 4.666666666666667, "transparency": 5.0, "authority": 5.0, "avg_ge_freq": 0.3333, "relative_se_rank": 2.1739130434782608, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "gpt-5", "query_id": 30, "query_type": "DebateQA", "num_sources": 5, "unweighted_mean_score": 4.925, "weighted_total_content_score": 98.52631578947368, "semantic_relevance": 5.0, "factual_accuracy": 4.8, "freshness": 5.0, "objectivity_tone": 5.0, "layout_ad_density": 5.0, "accountability": 4.6, "transparency": 5.0, "authority": 5.0, "avg_ge_freq": 0.9333199999999999, "relative_se_rank": 1.5391304347826087, "normalized_reciprocal_se_rank": 0.040330910919146215, "reciprocal_se_rank": 0.019399903351930765, "percentage_ge_sources_not_in_se_sources": 60.0, "percentage_ge_sources_in_se_sources": 40.0 }, { "model_name": "Gemini-2.5-Flash-Preview", "query_id": 75, "query_type": "Pinocchios", "num_sources": 1, "unweighted_mean_score": 4.875, "weighted_total_content_score": 97.89473684210526, "semantic_relevance": 5.0, "factual_accuracy": 5.0, "freshness": 4.0, "objectivity_tone": 5.0, "layout_ad_density": 5.0, "accountability": 5.0, "transparency": 5.0, "authority": 5.0, "avg_ge_freq": null, "relative_se_rank": null, "normalized_reciprocal_se_rank": null, "reciprocal_se_rank": null, "percentage_ge_sources_not_in_se_sources": null, "percentage_ge_sources_in_se_sources": null }, { "model_name": "claude", "query_id": 50, "query_type": "HotpotQA", "num_sources": 3, "unweighted_mean_score": 4.875, "weighted_total_content_score": 97.89473684210526, "semantic_relevance": 5.0, "factual_accuracy": 5.0, "freshness": 5.0, "objectivity_tone": 5.0, "layout_ad_density": 4.333333333333333, "accountability": 4.666666666666667, "transparency": 5.0, "authority": 5.0, "avg_ge_freq": 0.8889, "relative_se_rank": 0.8936170212765958, "normalized_reciprocal_se_rank": 0.3694083694083694, "reciprocal_se_rank": 0.09847434119278779, "percentage_ge_sources_not_in_se_sources": 33.333333333333336, "percentage_ge_sources_in_se_sources": 66.66666666666666 }, { "model_name": "deepseek-reasoning-tavily", "query_id": 62, "query_type": "Pinocchios", "num_sources": 4, "unweighted_mean_score": 4.875, "weighted_total_content_score": 97.89473684210526, "semantic_relevance": 5.0, "factual_accuracy": 5.0, "freshness": 4.0, "objectivity_tone": 5.0, "layout_ad_density": 5.0, "accountability": 5.0, "transparency": 5.0, "authority": 5.0, "avg_ge_freq": 1.0, "relative_se_rank": 0.8414634146341463, "normalized_reciprocal_se_rank": 0.46525784157363104, "reciprocal_se_rank": 0.12150613183444048, "percentage_ge_sources_not_in_se_sources": 25.0, "percentage_ge_sources_in_se_sources": 75.0 }, { "model_name": "exa", "query_id": 39, "query_type": "DebateQA", "num_sources": 5, "unweighted_mean_score": 4.875, "weighted_total_content_score": 97.89473684210526, "semantic_relevance": 5.0, "factual_accuracy": 5.0, "freshness": 5.0, "objectivity_tone": 5.0, "layout_ad_density": 4.0, "accountability": 5.0, "transparency": 5.0, "authority": 5.0, "avg_ge_freq": 1.0, "relative_se_rank": 2.0833333333333335, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "gpt-4o", "query_id": 50, "query_type": "HotpotQA", "num_sources": 2, "unweighted_mean_score": 4.875, "weighted_total_content_score": 97.89473684210526, "semantic_relevance": 5.0, "factual_accuracy": 5.0, "freshness": 5.0, "objectivity_tone": 5.0, "layout_ad_density": 5.0, "accountability": 4.0, "transparency": 5.0, "authority": 5.0, "avg_ge_freq": 0.3333, "relative_se_rank": 1.0851063829787233, "normalized_reciprocal_se_rank": 0.395959595959596, "reciprocal_se_rank": 0.10485436893203884, "percentage_ge_sources_not_in_se_sources": 50.0, "percentage_ge_sources_in_se_sources": 50.0 }, { "model_name": "gpt-4o", "query_id": 80, "query_type": "Pinocchios", "num_sources": 1, "unweighted_mean_score": 4.875, "weighted_total_content_score": 97.89473684210526, "semantic_relevance": 5.0, "factual_accuracy": 5.0, "freshness": 5.0, "objectivity_tone": 5.0, "layout_ad_density": 4.0, "accountability": 5.0, "transparency": 5.0, "authority": 5.0, "avg_ge_freq": 0.3333, "relative_se_rank": 2.127659574468085, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "gpt-5", "query_id": 75, "query_type": "Pinocchios", "num_sources": 2, "unweighted_mean_score": 4.875, "weighted_total_content_score": 97.89473684210526, "semantic_relevance": 5.0, "factual_accuracy": 5.0, "freshness": 4.0, "objectivity_tone": 5.0, "layout_ad_density": 5.0, "accountability": 5.0, "transparency": 5.0, "authority": 5.0, "avg_ge_freq": 0.3333, "relative_se_rank": 2.127659574468085, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "grok-4.1-fast-non-reasoning", "query_id": 24, "query_type": "DebateQA", "num_sources": 4, "unweighted_mean_score": 4.875, "weighted_total_content_score": 97.89473684210526, "semantic_relevance": 5.0, "factual_accuracy": 5.0, "freshness": 5.0, "objectivity_tone": 5.0, "layout_ad_density": 4.5, "accountability": 4.75, "transparency": 4.75, "authority": 5.0, "avg_ge_freq": 0.83335, "relative_se_rank": 1.1329787234042552, "normalized_reciprocal_se_rank": 0.23322973322973323, "reciprocal_se_rank": 0.06575180482947474, "percentage_ge_sources_not_in_se_sources": 50.0, "percentage_ge_sources_in_se_sources": 50.0 }, { "model_name": "tavily", "query_id": 62, "query_type": "Pinocchios", "num_sources": 2, "unweighted_mean_score": 4.875, "weighted_total_content_score": 97.89473684210526, "semantic_relevance": 5.0, "factual_accuracy": 5.0, "freshness": 5.0, "objectivity_tone": 5.0, "layout_ad_density": 4.5, "accountability": 4.5, "transparency": 5.0, "authority": 5.0, "avg_ge_freq": 1.0, "relative_se_rank": 0.06097560975609756, "normalized_reciprocal_se_rank": 0.7225589225589226, "reciprocal_se_rank": 0.18333333333333335, "percentage_ge_sources_not_in_se_sources": 0.0, "percentage_ge_sources_in_se_sources": 100.0 }, { "model_name": "Gemini-3-Pro-Preview", "query_id": 53, "query_type": "HotpotQA", "num_sources": 2, "unweighted_mean_score": 4.875, "weighted_total_content_score": 97.36842105263158, "semantic_relevance": 4.5, "factual_accuracy": 5.0, "freshness": 5.0, "objectivity_tone": 5.0, "layout_ad_density": 5.0, "accountability": 4.5, "transparency": 5.0, "authority": 5.0, "avg_ge_freq": 0.3333, "relative_se_rank": 2.127659574468085, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "gpt-5", "query_id": 29, "query_type": "DebateQA", "num_sources": 5, "unweighted_mean_score": 4.85, "weighted_total_content_score": 97.05263157894737, "semantic_relevance": 4.6, "factual_accuracy": 5.0, "freshness": 4.6, "objectivity_tone": 5.0, "layout_ad_density": 4.8, "accountability": 5.0, "transparency": 4.8, "authority": 5.0, "avg_ge_freq": 0.3333, "relative_se_rank": 2.2222222222222223, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "Gemini-3-Flash-Preview", "query_id": 70, "query_type": "Pinocchios", "num_sources": 5, "unweighted_mean_score": 4.85, "weighted_total_content_score": 97.05263157894736, "semantic_relevance": 4.6, "factual_accuracy": 5.0, "freshness": 5.0, "objectivity_tone": 5.0, "layout_ad_density": 4.6, "accountability": 4.8, "transparency": 5.0, "authority": 4.8, "avg_ge_freq": 0.93334, "relative_se_rank": 0.9083333333333334, "normalized_reciprocal_se_rank": 0.26806156806156806, "reciprocal_se_rank": 0.0741215903837263, "percentage_ge_sources_not_in_se_sources": 40.0, "percentage_ge_sources_in_se_sources": 60.0 }, { "model_name": "gensee", "query_id": 70, "query_type": "Pinocchios", "num_sources": 5, "unweighted_mean_score": 4.825, "weighted_total_content_score": 96.84210526315789, "semantic_relevance": 4.8, "factual_accuracy": 5.0, "freshness": 4.8, "objectivity_tone": 5.0, "layout_ad_density": 5.0, "accountability": 4.0, "transparency": 5.0, "authority": 5.0, "avg_ge_freq": 0.46663999999999994, "relative_se_rank": 1.3041666666666667, "normalized_reciprocal_se_rank": 0.1721019721019721, "reciprocal_se_rank": 0.05106333795654183, "percentage_ge_sources_not_in_se_sources": 60.0, "percentage_ge_sources_in_se_sources": 40.0 }, { "model_name": "claude", "query_id": 77, "query_type": "Pinocchios", "num_sources": 3, "unweighted_mean_score": 4.833333333333333, "weighted_total_content_score": 96.84210526315788, "semantic_relevance": 5.0, "factual_accuracy": 5.0, "freshness": 5.0, "objectivity_tone": 4.666666666666667, "layout_ad_density": 4.333333333333333, "accountability": 4.666666666666667, "transparency": 5.0, "authority": 5.0, "avg_ge_freq": 0.8889, "relative_se_rank": 0.8222222222222223, "normalized_reciprocal_se_rank": 0.3661054994388328, "reciprocal_se_rank": 0.097680690399137, "percentage_ge_sources_not_in_se_sources": 33.333333333333336, "percentage_ge_sources_in_se_sources": 66.66666666666666 }, { "model_name": "exa", "query_id": 31, "query_type": "DebateQA", "num_sources": 5, "unweighted_mean_score": 4.825, "weighted_total_content_score": 96.63157894736841, "semantic_relevance": 4.6, "factual_accuracy": 5.0, "freshness": 5.0, "objectivity_tone": 5.0, "layout_ad_density": 4.2, "accountability": 4.8, "transparency": 5.0, "authority": 5.0, "avg_ge_freq": 1.0, "relative_se_rank": 0.06666666666666667, "normalized_reciprocal_se_rank": 0.6958056758056758, "reciprocal_se_rank": 0.1769047619047619, "percentage_ge_sources_not_in_se_sources": 0.0, "percentage_ge_sources_in_se_sources": 100.0 }, { "model_name": "exa", "query_id": 70, "query_type": "Pinocchios", "num_sources": 5, "unweighted_mean_score": 4.8, "weighted_total_content_score": 96.63157894736841, "semantic_relevance": 5.0, "factual_accuracy": 5.0, "freshness": 5.0, "objectivity_tone": 5.0, "layout_ad_density": 4.2, "accountability": 4.8, "transparency": 5.0, "authority": 4.4, "avg_ge_freq": 1.0, "relative_se_rank": 0.10833333333333335, "normalized_reciprocal_se_rank": 0.5108449575116241, "reciprocal_se_rank": 0.13246031746031744, "percentage_ge_sources_not_in_se_sources": 0.0, "percentage_ge_sources_in_se_sources": 100.0 }, { "model_name": "claude", "query_id": 99, "query_type": "QuoraQuestions", "num_sources": 4, "unweighted_mean_score": 4.8125, "weighted_total_content_score": 96.57894736842105, "semantic_relevance": 5.0, "factual_accuracy": 5.0, "freshness": 5.0, "objectivity_tone": 4.75, "layout_ad_density": 4.0, "accountability": 5.0, "transparency": 5.0, "authority": 4.75, "avg_ge_freq": 0.916675, "relative_se_rank": 0.9085365853658537, "normalized_reciprocal_se_rank": 0.1628658283716935, "reciprocal_se_rank": 0.0488439733223244, "percentage_ge_sources_not_in_se_sources": 25.0, "percentage_ge_sources_in_se_sources": 75.0 }, { "model_name": "exa", "query_id": 16, "query_type": "VACOS", "num_sources": 4, "unweighted_mean_score": 4.84375, "weighted_total_content_score": 96.57894736842104, "semantic_relevance": 4.5, "factual_accuracy": 5.0, "freshness": 5.0, "objectivity_tone": 4.75, "layout_ad_density": 4.5, "accountability": 5.0, "transparency": 5.0, "authority": 5.0, "avg_ge_freq": 1.0, "relative_se_rank": 2.380952380952381, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "gpt-5", "query_id": 37, "query_type": "DebateQA", "num_sources": 4, "unweighted_mean_score": 4.8125, "weighted_total_content_score": 96.57894736842104, "semantic_relevance": 4.75, "factual_accuracy": 5.0, "freshness": 3.75, "objectivity_tone": 5.0, "layout_ad_density": 5.0, "accountability": 5.0, "transparency": 5.0, "authority": 5.0, "avg_ge_freq": 0.3333, "relative_se_rank": 2.0408163265306123, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "tavily", "query_id": 24, "query_type": "DebateQA", "num_sources": 5, "unweighted_mean_score": 4.8, "weighted_total_content_score": 96.42105263157895, "semantic_relevance": 4.8, "factual_accuracy": 5.0, "freshness": 5.0, "objectivity_tone": 5.0, "layout_ad_density": 4.4, "accountability": 4.6, "transparency": 4.8, "authority": 4.8, "avg_ge_freq": 1.0, "relative_se_rank": 0.5191489361702127, "normalized_reciprocal_se_rank": 0.39336589336589334, "reciprocal_se_rank": 0.10423112486219281, "percentage_ge_sources_not_in_se_sources": 20.0, "percentage_ge_sources_in_se_sources": 80.0 }, { "model_name": "grok-4.1-fast-non-reasoning", "query_id": 50, "query_type": "HotpotQA", "num_sources": 5, "unweighted_mean_score": 4.8, "weighted_total_content_score": 96.42105263157893, "semantic_relevance": 5.0, "factual_accuracy": 5.0, "freshness": 4.6, "objectivity_tone": 4.8, "layout_ad_density": 5.0, "accountability": 4.0, "transparency": 5.0, "authority": 5.0, "avg_ge_freq": 0.93334, "relative_se_rank": 0.9702127659574469, "normalized_reciprocal_se_rank": 0.38002886002886005, "reciprocal_se_rank": 0.10102635228848822, "percentage_ge_sources_not_in_se_sources": 40.0, "percentage_ge_sources_in_se_sources": 60.0 }, { "model_name": "claude", "query_id": 31, "query_type": "DebateQA", "num_sources": 5, "unweighted_mean_score": 4.8, "weighted_total_content_score": 96.21052631578948, "semantic_relevance": 4.6, "factual_accuracy": 5.0, "freshness": 4.8, "objectivity_tone": 5.0, "layout_ad_density": 4.6, "accountability": 4.4, "transparency": 5.0, "authority": 5.0, "avg_ge_freq": 0.86668, "relative_se_rank": 0.14666666666666667, "normalized_reciprocal_se_rank": 0.5259164746884045, "reciprocal_se_rank": 0.13608187134502925, "percentage_ge_sources_not_in_se_sources": 0.0, "percentage_ge_sources_in_se_sources": 100.0 }, { "model_name": "gpt-5", "query_id": 89, "query_type": "QuoraQuestions", "num_sources": 5, "unweighted_mean_score": 4.825, "weighted_total_content_score": 96.21052631578947, "semantic_relevance": 4.2, "factual_accuracy": 5.0, "freshness": 5.0, "objectivity_tone": 5.0, "layout_ad_density": 4.4, "accountability": 5.0, "transparency": 5.0, "authority": 5.0, "avg_ge_freq": 0.6, "relative_se_rank": 1.9951219512195124, "normalized_reciprocal_se_rank": 0.06127946127946128, "reciprocal_se_rank": 0.0244336569579288, "percentage_ge_sources_not_in_se_sources": 80.0, "percentage_ge_sources_in_se_sources": 20.0 }, { "model_name": "Gemini-2.5-Flash-Preview", "query_id": 50, "query_type": "HotpotQA", "num_sources": 4, "unweighted_mean_score": 4.75, "weighted_total_content_score": 95.78947368421052, "semantic_relevance": 5.0, "factual_accuracy": 5.0, "freshness": 4.0, "objectivity_tone": 5.0, "layout_ad_density": 4.0, "accountability": 5.0, "transparency": 5.0, "authority": 5.0, "avg_ge_freq": null, "relative_se_rank": null, "normalized_reciprocal_se_rank": null, "reciprocal_se_rank": null, "percentage_ge_sources_not_in_se_sources": null, "percentage_ge_sources_in_se_sources": null }, { "model_name": "claude", "query_id": 64, "query_type": "Pinocchios", "num_sources": 1, "unweighted_mean_score": 4.75, "weighted_total_content_score": 95.78947368421052, "semantic_relevance": 5.0, "factual_accuracy": 5.0, "freshness": 5.0, "objectivity_tone": 5.0, "layout_ad_density": 3.0, "accountability": 5.0, "transparency": 5.0, "authority": 5.0, "avg_ge_freq": 1.0, "relative_se_rank": 0.023809523809523808, "normalized_reciprocal_se_rank": 1.0, "reciprocal_se_rank": 0.25, "percentage_ge_sources_not_in_se_sources": 0.0, "percentage_ge_sources_in_se_sources": 100.0 }, { "model_name": "claude", "query_id": 71, "query_type": "Pinocchios", "num_sources": 1, "unweighted_mean_score": 4.75, "weighted_total_content_score": 95.78947368421052, "semantic_relevance": 5.0, "factual_accuracy": 5.0, "freshness": 5.0, "objectivity_tone": 5.0, "layout_ad_density": 4.0, "accountability": 5.0, "transparency": 5.0, "authority": 4.0, "avg_ge_freq": 1.0, "relative_se_rank": 0.02702702702702703, "normalized_reciprocal_se_rank": 1.0, "reciprocal_se_rank": 0.25, "percentage_ge_sources_not_in_se_sources": 0.0, "percentage_ge_sources_in_se_sources": 100.0 }, { "model_name": "claude", "query_id": 80, "query_type": "Pinocchios", "num_sources": 2, "unweighted_mean_score": 4.75, "weighted_total_content_score": 95.78947368421052, "semantic_relevance": 5.0, "factual_accuracy": 5.0, "freshness": 3.5, "objectivity_tone": 5.0, "layout_ad_density": 4.5, "accountability": 5.0, "transparency": 5.0, "authority": 5.0, "avg_ge_freq": 1.0, "relative_se_rank": 1.127659574468085, "normalized_reciprocal_se_rank": 0.21099887766554432, "reciprocal_se_rank": 0.060409924487594385, "percentage_ge_sources_not_in_se_sources": 50.0, "percentage_ge_sources_in_se_sources": 50.0 }, { "model_name": "exa", "query_id": 50, "query_type": "HotpotQA", "num_sources": 5, "unweighted_mean_score": 4.775, "weighted_total_content_score": 95.78947368421052, "semantic_relevance": 5.0, "factual_accuracy": 5.0, "freshness": 4.0, "objectivity_tone": 4.6, "layout_ad_density": 4.6, "accountability": 5.0, "transparency": 5.0, "authority": 5.0, "avg_ge_freq": 1.0, "relative_se_rank": 0.08936170212765956, "normalized_reciprocal_se_rank": 0.6264454064454065, "reciprocal_se_rank": 0.16023809523809524, "percentage_ge_sources_not_in_se_sources": 0.0, "percentage_ge_sources_in_se_sources": 100.0 }, { "model_name": "gpt-5", "query_id": 46, "query_type": "HotpotQA", "num_sources": 2, "unweighted_mean_score": 4.75, "weighted_total_content_score": 95.78947368421052, "semantic_relevance": 5.0, "factual_accuracy": 5.0, "freshness": 5.0, "objectivity_tone": 5.0, "layout_ad_density": 5.0, "accountability": 3.0, "transparency": 5.0, "authority": 5.0, "avg_ge_freq": 0.5, "relative_se_rank": 0.03333333333333333, "normalized_reciprocal_se_rank": 0.8959595959595961, "reciprocal_se_rank": 0.225, "percentage_ge_sources_not_in_se_sources": 0.0, "percentage_ge_sources_in_se_sources": 100.0 }, { "model_name": "gpt-5", "query_id": 65, "query_type": "Pinocchios", "num_sources": 1, "unweighted_mean_score": 4.75, "weighted_total_content_score": 95.78947368421052, "semantic_relevance": 5.0, "factual_accuracy": 5.0, "freshness": 5.0, "objectivity_tone": 5.0, "layout_ad_density": 4.0, "accountability": 5.0, "transparency": 5.0, "authority": 4.0, "avg_ge_freq": 1.0, "relative_se_rank": 2.4390243902439024, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "gpt-5", "query_id": 68, "query_type": "Pinocchios", "num_sources": 1, "unweighted_mean_score": 4.75, "weighted_total_content_score": 95.78947368421052, "semantic_relevance": 5.0, "factual_accuracy": 5.0, "freshness": 5.0, "objectivity_tone": 5.0, "layout_ad_density": 4.0, "accountability": 5.0, "transparency": 5.0, "authority": 4.0, "avg_ge_freq": 1.0, "relative_se_rank": 2.272727272727273, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "grok-4.1-fast-non-reasoning", "query_id": 80, "query_type": "Pinocchios", "num_sources": 1, "unweighted_mean_score": 4.75, "weighted_total_content_score": 95.78947368421052, "semantic_relevance": 5.0, "factual_accuracy": 5.0, "freshness": 5.0, "objectivity_tone": 5.0, "layout_ad_density": 4.0, "accountability": 5.0, "transparency": 4.0, "authority": 5.0, "avg_ge_freq": 0.3333, "relative_se_rank": 2.127659574468085, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "grok-4.1-fast-non-reasoning", "query_id": 26, "query_type": "DebateQA", "num_sources": 5, "unweighted_mean_score": 4.775, "weighted_total_content_score": 95.57894736842104, "semantic_relevance": 5.0, "factual_accuracy": 5.0, "freshness": 4.8, "objectivity_tone": 4.4, "layout_ad_density": 4.8, "accountability": 4.2, "transparency": 5.0, "authority": 5.0, "avg_ge_freq": 1.0, "relative_se_rank": 0.813953488372093, "normalized_reciprocal_se_rank": 0.23975511209657557, "reciprocal_se_rank": 0.06731979635330335, "percentage_ge_sources_not_in_se_sources": 20.0, "percentage_ge_sources_in_se_sources": 80.0 }, { "model_name": "google-search", "query_id": 29, "query_type": "DebateQA", "num_sources": 5, "unweighted_mean_score": 4.775, "weighted_total_content_score": 95.36842105263159, "semantic_relevance": 5.0, "factual_accuracy": 5.0, "freshness": 5.0, "objectivity_tone": 4.2, "layout_ad_density": 4.6, "accountability": 4.4, "transparency": 5.0, "authority": 5.0, "avg_ge_freq": null, "relative_se_rank": null, "normalized_reciprocal_se_rank": null, "reciprocal_se_rank": null, "percentage_ge_sources_not_in_se_sources": null, "percentage_ge_sources_in_se_sources": null }, { "model_name": "tavily", "query_id": 31, "query_type": "DebateQA", "num_sources": 5, "unweighted_mean_score": 4.775, "weighted_total_content_score": 95.36842105263158, "semantic_relevance": 4.4, "factual_accuracy": 5.0, "freshness": 5.0, "objectivity_tone": 4.8, "layout_ad_density": 4.2, "accountability": 4.8, "transparency": 5.0, "authority": 5.0, "avg_ge_freq": 1.0, "relative_se_rank": 0.5022222222222222, "normalized_reciprocal_se_rank": 0.553605900272567, "reciprocal_se_rank": 0.14273539836646632, "percentage_ge_sources_not_in_se_sources": 20.0, "percentage_ge_sources_in_se_sources": 80.0 }, { "model_name": "exa", "query_id": 25, "query_type": "DebateQA", "num_sources": 4, "unweighted_mean_score": 4.75, "weighted_total_content_score": 95.26315789473685, "semantic_relevance": 4.5, "factual_accuracy": 5.0, "freshness": 5.0, "objectivity_tone": 5.0, "layout_ad_density": 4.75, "accountability": 4.25, "transparency": 4.75, "authority": 4.75, "avg_ge_freq": 1.0, "relative_se_rank": 1.0548780487804876, "normalized_reciprocal_se_rank": 0.14183654729109277, "reciprocal_se_rank": 0.04379082082965578, "percentage_ge_sources_not_in_se_sources": 25.0, "percentage_ge_sources_in_se_sources": 75.0 }, { "model_name": "Perplexity-Sonar-Pro", "query_id": 26, "query_type": "DebateQA", "num_sources": 2, "unweighted_mean_score": 4.75, "weighted_total_content_score": 95.26315789473684, "semantic_relevance": 5.0, "factual_accuracy": 5.0, "freshness": 4.0, "objectivity_tone": 4.5, "layout_ad_density": 5.0, "accountability": 4.5, "transparency": 5.0, "authority": 5.0, "avg_ge_freq": 1.0, "relative_se_rank": 0.23255813953488375, "normalized_reciprocal_se_rank": 0.2816257816257816, "reciprocal_se_rank": 0.07738095238095238, "percentage_ge_sources_not_in_se_sources": 0.0, "percentage_ge_sources_in_se_sources": 100.0 }, { "model_name": "claude", "query_id": 25, "query_type": "DebateQA", "num_sources": 4, "unweighted_mean_score": 4.75, "weighted_total_content_score": 95.26315789473684, "semantic_relevance": 4.75, "factual_accuracy": 5.0, "freshness": 4.25, "objectivity_tone": 4.75, "layout_ad_density": 4.5, "accountability": 4.75, "transparency": 5.0, "authority": 5.0, "avg_ge_freq": 0.5833499999999999, "relative_se_rank": 0.6951219512195121, "normalized_reciprocal_se_rank": 0.13046231546231546, "reciprocal_se_rank": 0.04105769230769231, "percentage_ge_sources_not_in_se_sources": 0.0, "percentage_ge_sources_in_se_sources": 100.0 }, { "model_name": "Gemini-3-Flash-Preview", "query_id": 50, "query_type": "HotpotQA", "num_sources": 5, "unweighted_mean_score": 4.725, "weighted_total_content_score": 95.15789473684211, "semantic_relevance": 5.0, "factual_accuracy": 5.0, "freshness": 3.6, "objectivity_tone": 4.8, "layout_ad_density": 4.4, "accountability": 5.0, "transparency": 5.0, "authority": 5.0, "avg_ge_freq": 0.46665999999999996, "relative_se_rank": 0.6936170212765957, "normalized_reciprocal_se_rank": 0.28368377734459604, "reciprocal_se_rank": 0.07787547076969661, "percentage_ge_sources_not_in_se_sources": 20.0, "percentage_ge_sources_in_se_sources": 80.0 }, { "model_name": "deepseek-chat-gensee", "query_id": 32, "query_type": "DebateQA", "num_sources": 5, "unweighted_mean_score": 4.725, "weighted_total_content_score": 95.15789473684211, "semantic_relevance": 5.0, "factual_accuracy": 5.0, "freshness": 3.8, "objectivity_tone": 4.8, "layout_ad_density": 5.0, "accountability": 4.2, "transparency": 5.0, "authority": 5.0, "avg_ge_freq": 1.0, "relative_se_rank": 1.231111111111111, "normalized_reciprocal_se_rank": 0.22700758374382662, "reciprocal_se_rank": 0.06425667667630786, "percentage_ge_sources_not_in_se_sources": 40.0, "percentage_ge_sources_in_se_sources": 60.0 }, { "model_name": "gpt-5", "query_id": 33, "query_type": "DebateQA", "num_sources": 5, "unweighted_mean_score": 4.775, "weighted_total_content_score": 95.15789473684211, "semantic_relevance": 4.8, "factual_accuracy": 4.8, "freshness": 5.0, "objectivity_tone": 4.4, "layout_ad_density": 4.2, "accountability": 5.0, "transparency": 5.0, "authority": 5.0, "avg_ge_freq": 0.73334, "relative_se_rank": 1.8744186046511628, "normalized_reciprocal_se_rank": 0.1306397306397306, "reciprocal_se_rank": 0.04110032362459547, "percentage_ge_sources_not_in_se_sources": 80.0, "percentage_ge_sources_in_se_sources": 20.0 }, { "model_name": "gpt-5", "query_id": 98, "query_type": "QuoraQuestions", "num_sources": 5, "unweighted_mean_score": 4.8, "weighted_total_content_score": 95.15789473684211, "semantic_relevance": 4.2, "factual_accuracy": 4.8, "freshness": 5.0, "objectivity_tone": 4.6, "layout_ad_density": 4.8, "accountability": 5.0, "transparency": 5.0, "authority": 5.0, "avg_ge_freq": 0.73332, "relative_se_rank": 2.5, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "claude", "query_id": 79, "query_type": "Pinocchios", "num_sources": 3, "unweighted_mean_score": 4.708333333333333, "weighted_total_content_score": 95.0877192982456, "semantic_relevance": 5.0, "factual_accuracy": 5.0, "freshness": 4.0, "objectivity_tone": 5.0, "layout_ad_density": 3.6666666666666665, "accountability": 5.0, "transparency": 5.0, "authority": 5.0, "avg_ge_freq": 1.0, "relative_se_rank": 0.7629629629629631, "normalized_reciprocal_se_rank": 0.5973063973063973, "reciprocal_se_rank": 0.15323624595469257, "percentage_ge_sources_not_in_se_sources": 33.333333333333336, "percentage_ge_sources_in_se_sources": 66.66666666666666 }, { "model_name": "Gemini-2.5-Flash-Preview", "query_id": 29, "query_type": "DebateQA", "num_sources": 5, "unweighted_mean_score": 4.775, "weighted_total_content_score": 94.94736842105263, "semantic_relevance": 4.6, "factual_accuracy": 5.0, "freshness": 5.0, "objectivity_tone": 4.2, "layout_ad_density": 4.6, "accountability": 4.8, "transparency": 5.0, "authority": 5.0, "avg_ge_freq": null, "relative_se_rank": null, "normalized_reciprocal_se_rank": null, "reciprocal_se_rank": null, "percentage_ge_sources_not_in_se_sources": null, "percentage_ge_sources_in_se_sources": null }, { "model_name": "gpt-5", "query_id": 16, "query_type": "VACOS", "num_sources": 5, "unweighted_mean_score": 4.775, "weighted_total_content_score": 94.73684210526315, "semantic_relevance": 3.6, "factual_accuracy": 5.0, "freshness": 5.0, "objectivity_tone": 5.0, "layout_ad_density": 4.6, "accountability": 5.0, "transparency": 5.0, "authority": 5.0, "avg_ge_freq": 1.6666599999999998, "relative_se_rank": 2.380952380952381, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "gpt-5", "query_id": 40, "query_type": "DebateQA", "num_sources": 2, "unweighted_mean_score": 4.6875, "weighted_total_content_score": 94.73684210526315, "semantic_relevance": 5.0, "factual_accuracy": 5.0, "freshness": 2.5, "objectivity_tone": 5.0, "layout_ad_density": 5.0, "accountability": 5.0, "transparency": 5.0, "authority": 5.0, "avg_ge_freq": 0.5, "relative_se_rank": 2.5, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "grok-4.1-fast-non-reasoning", "query_id": 70, "query_type": "Pinocchios", "num_sources": 5, "unweighted_mean_score": 4.7, "weighted_total_content_score": 94.73684210526315, "semantic_relevance": 5.0, "factual_accuracy": 5.0, "freshness": 3.8, "objectivity_tone": 4.8, "layout_ad_density": 4.6, "accountability": 4.6, "transparency": 5.0, "authority": 4.8, "avg_ge_freq": 0.86668, "relative_se_rank": 0.9125, "normalized_reciprocal_se_rank": 0.25650152316818986, "reciprocal_se_rank": 0.07134381260594852, "percentage_ge_sources_not_in_se_sources": 40.0, "percentage_ge_sources_in_se_sources": 60.0 }, { "model_name": "exa", "query_id": 79, "query_type": "Pinocchios", "num_sources": 5, "unweighted_mean_score": 4.75, "weighted_total_content_score": 94.52631578947368, "semantic_relevance": 3.8, "factual_accuracy": 5.0, "freshness": 4.6, "objectivity_tone": 5.0, "layout_ad_density": 4.6, "accountability": 5.0, "transparency": 5.0, "authority": 5.0, "avg_ge_freq": 1.0, "relative_se_rank": 1.3466666666666667, "normalized_reciprocal_se_rank": 0.3583838383838384, "reciprocal_se_rank": 0.0958252427184466, "percentage_ge_sources_not_in_se_sources": 60.0, "percentage_ge_sources_in_se_sources": 40.0 }, { "model_name": "Gemini-3-Pro-Preview", "query_id": 78, "query_type": "Pinocchios", "num_sources": 5, "unweighted_mean_score": 4.675, "weighted_total_content_score": 94.52631578947367, "semantic_relevance": 5.0, "factual_accuracy": 5.0, "freshness": 4.4, "objectivity_tone": 5.0, "layout_ad_density": 3.8, "accountability": 5.0, "transparency": 4.8, "authority": 4.4, "avg_ge_freq": 0.6, "relative_se_rank": 1.7106382978723402, "normalized_reciprocal_se_rank": 0.1583838383838384, "reciprocal_se_rank": 0.047766990291262135, "percentage_ge_sources_not_in_se_sources": 80.0, "percentage_ge_sources_in_se_sources": 20.0 }, { "model_name": "exa", "query_id": 32, "query_type": "DebateQA", "num_sources": 5, "unweighted_mean_score": 4.675, "weighted_total_content_score": 94.52631578947367, "semantic_relevance": 5.0, "factual_accuracy": 5.0, "freshness": 3.2, "objectivity_tone": 5.0, "layout_ad_density": 4.2, "accountability": 5.0, "transparency": 5.0, "authority": 5.0, "avg_ge_freq": 1.0, "relative_se_rank": 1.8622222222222224, "normalized_reciprocal_se_rank": 0.029752066115702486, "reciprocal_se_rank": 0.016857899382171224, "percentage_ge_sources_not_in_se_sources": 80.0, "percentage_ge_sources_in_se_sources": 20.0 }, { "model_name": "gpt-5", "query_id": 31, "query_type": "DebateQA", "num_sources": 5, "unweighted_mean_score": 4.725, "weighted_total_content_score": 94.52631578947367, "semantic_relevance": 4.4, "factual_accuracy": 5.0, "freshness": 5.0, "objectivity_tone": 4.8, "layout_ad_density": 4.2, "accountability": 4.4, "transparency": 5.0, "authority": 5.0, "avg_ge_freq": 0.86668, "relative_se_rank": 1.422222222222222, "normalized_reciprocal_se_rank": 0.13411896745230079, "reciprocal_se_rank": 0.04193635382955771, "percentage_ge_sources_not_in_se_sources": 60.0, "percentage_ge_sources_in_se_sources": 40.0 }, { "model_name": "grok-4.1-fast-non-reasoning", "query_id": 36, "query_type": "DebateQA", "num_sources": 5, "unweighted_mean_score": 4.725, "weighted_total_content_score": 94.52631578947367, "semantic_relevance": 5.0, "factual_accuracy": 5.0, "freshness": 4.4, "objectivity_tone": 4.2, "layout_ad_density": 4.2, "accountability": 5.0, "transparency": 5.0, "authority": 5.0, "avg_ge_freq": 1.0, "relative_se_rank": 1.6829268292682926, "normalized_reciprocal_se_rank": 0.07454027454027454, "reciprocal_se_rank": 0.027620114513318396, "percentage_ge_sources_not_in_se_sources": 60.0, "percentage_ge_sources_in_se_sources": 40.0 }, { "model_name": "Gemini-3-Pro-Preview", "query_id": 50, "query_type": "HotpotQA", "num_sources": 5, "unweighted_mean_score": 4.7, "weighted_total_content_score": 94.3157894736842, "semantic_relevance": 5.0, "factual_accuracy": 5.0, "freshness": 4.4, "objectivity_tone": 4.4, "layout_ad_density": 4.2, "accountability": 4.8, "transparency": 5.0, "authority": 4.8, "avg_ge_freq": 0.46665999999999996, "relative_se_rank": 0.9957446808510637, "normalized_reciprocal_se_rank": 0.22842712842712842, "reciprocal_se_rank": 0.06459778085991677, "percentage_ge_sources_not_in_se_sources": 40.0, "percentage_ge_sources_in_se_sources": 60.0 }, { "model_name": "gpt-5", "query_id": 94, "query_type": "QuoraQuestions", "num_sources": 5, "unweighted_mean_score": 4.7, "weighted_total_content_score": 94.3157894736842, "semantic_relevance": 4.4, "factual_accuracy": 5.0, "freshness": 4.2, "objectivity_tone": 5.0, "layout_ad_density": 4.0, "accountability": 5.0, "transparency": 5.0, "authority": 5.0, "avg_ge_freq": 1.00002, "relative_se_rank": 2.5, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "exa", "query_id": 26, "query_type": "DebateQA", "num_sources": 4, "unweighted_mean_score": 4.71875, "weighted_total_content_score": 94.21052631578948, "semantic_relevance": 4.75, "factual_accuracy": 5.0, "freshness": 4.75, "objectivity_tone": 4.25, "layout_ad_density": 4.25, "accountability": 5.0, "transparency": 5.0, "authority": 4.75, "avg_ge_freq": 1.0, "relative_se_rank": 0.6976744186046512, "normalized_reciprocal_se_rank": 0.39652939652939656, "reciprocal_se_rank": 0.10499128703012198, "percentage_ge_sources_not_in_se_sources": 25.0, "percentage_ge_sources_in_se_sources": 75.0 }, { "model_name": "claude", "query_id": 78, "query_type": "Pinocchios", "num_sources": 4, "unweighted_mean_score": 4.65625, "weighted_total_content_score": 94.21052631578947, "semantic_relevance": 5.0, "factual_accuracy": 5.0, "freshness": 4.0, "objectivity_tone": 5.0, "layout_ad_density": 4.0, "accountability": 5.0, "transparency": 5.0, "authority": 4.25, "avg_ge_freq": 0.583325, "relative_se_rank": 1.6063829787234043, "normalized_reciprocal_se_rank": 0.197979797979798, "reciprocal_se_rank": 0.05728155339805825, "percentage_ge_sources_not_in_se_sources": 75.0, "percentage_ge_sources_in_se_sources": 25.0 }, { "model_name": "gpt-5", "query_id": 32, "query_type": "DebateQA", "num_sources": 4, "unweighted_mean_score": 4.8125, "weighted_total_content_score": 94.21052631578947, "semantic_relevance": 5.0, "factual_accuracy": 5.0, "freshness": 3.75, "objectivity_tone": 5.0, "layout_ad_density": 5.0, "accountability": 4.75, "transparency": 5.0, "authority": 5.0, "avg_ge_freq": 0.750025, "relative_se_rank": 1.4388888888888889, "normalized_reciprocal_se_rank": 0.048523856450685715, "reciprocal_se_rank": 0.02136859657431526, "percentage_ge_sources_not_in_se_sources": 50.0, "percentage_ge_sources_in_se_sources": 50.0 }, { "model_name": "gpt-5", "query_id": 51, "query_type": "HotpotQA", "num_sources": 2, "unweighted_mean_score": 4.6875, "weighted_total_content_score": 94.21052631578947, "semantic_relevance": 5.0, "factual_accuracy": 5.0, "freshness": 5.0, "objectivity_tone": 4.5, "layout_ad_density": 4.5, "accountability": 4.0, "transparency": 4.5, "authority": 5.0, "avg_ge_freq": 0.5, "relative_se_rank": 2.127659574468085, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "gpt-5", "query_id": 73, "query_type": "Pinocchios", "num_sources": 2, "unweighted_mean_score": 4.6875, "weighted_total_content_score": 94.21052631578947, "semantic_relevance": 5.0, "factual_accuracy": 5.0, "freshness": 5.0, "objectivity_tone": 4.5, "layout_ad_density": 3.5, "accountability": 5.0, "transparency": 5.0, "authority": 4.5, "avg_ge_freq": 1.5, "relative_se_rank": 0.03125, "normalized_reciprocal_se_rank": 0.8959595959595961, "reciprocal_se_rank": 0.225, "percentage_ge_sources_not_in_se_sources": 0.0, "percentage_ge_sources_in_se_sources": 100.0 }, { "model_name": "tavily", "query_id": 66, "query_type": "Pinocchios", "num_sources": 2, "unweighted_mean_score": 4.6875, "weighted_total_content_score": 94.21052631578947, "semantic_relevance": 5.0, "factual_accuracy": 5.0, "freshness": 5.0, "objectivity_tone": 4.5, "layout_ad_density": 3.5, "accountability": 5.0, "transparency": 5.0, "authority": 4.5, "avg_ge_freq": 1.0, "relative_se_rank": 0.06097560975609756, "normalized_reciprocal_se_rank": 0.777056277056277, "reciprocal_se_rank": 0.19642857142857142, "percentage_ge_sources_not_in_se_sources": 0.0, "percentage_ge_sources_in_se_sources": 100.0 }, { "model_name": "gpt-5", "query_id": 26, "query_type": "DebateQA", "num_sources": 5, "unweighted_mean_score": 4.75, "weighted_total_content_score": 94.10526315789474, "semantic_relevance": 3.4, "factual_accuracy": 5.0, "freshness": 5.0, "objectivity_tone": 5.0, "layout_ad_density": 5.0, "accountability": 4.6, "transparency": 5.0, "authority": 5.0, "avg_ge_freq": 0.60002, "relative_se_rank": 1.902325581395349, "normalized_reciprocal_se_rank": 0.06127946127946128, "reciprocal_se_rank": 0.0244336569579288, "percentage_ge_sources_not_in_se_sources": 80.0, "percentage_ge_sources_in_se_sources": 20.0 }, { "model_name": "gpt-5", "query_id": 14, "query_type": "VACOS", "num_sources": 5, "unweighted_mean_score": 4.725, "weighted_total_content_score": 93.6842105263158, "semantic_relevance": 4.0, "factual_accuracy": 4.8, "freshness": 5.0, "objectivity_tone": 4.6, "layout_ad_density": 4.6, "accountability": 4.8, "transparency": 5.0, "authority": 5.0, "avg_ge_freq": 0.86668, "relative_se_rank": 2.2222222222222223, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "Gemini-3-Pro-Preview", "query_id": 70, "query_type": "Pinocchios", "num_sources": 5, "unweighted_mean_score": 4.65, "weighted_total_content_score": 93.68421052631578, "semantic_relevance": 5.0, "factual_accuracy": 4.8, "freshness": 5.0, "objectivity_tone": 4.8, "layout_ad_density": 4.2, "accountability": 4.4, "transparency": 4.4, "authority": 4.6, "avg_ge_freq": 0.93334, "relative_se_rank": 1.0583333333333333, "normalized_reciprocal_se_rank": 0.18293760111941929, "reciprocal_se_rank": 0.053667044929180854, "percentage_ge_sources_not_in_se_sources": 40.0, "percentage_ge_sources_in_se_sources": 60.0 }, { "model_name": "claude", "query_id": 28, "query_type": "DebateQA", "num_sources": 1, "unweighted_mean_score": 4.625, "weighted_total_content_score": 93.68421052631578, "semantic_relevance": 5.0, "factual_accuracy": 5.0, "freshness": 2.0, "objectivity_tone": 5.0, "layout_ad_density": 5.0, "accountability": 5.0, "transparency": 5.0, "authority": 5.0, "avg_ge_freq": 0.6667, "relative_se_rank": 2.2222222222222223, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "claude", "query_id": 93, "query_type": "QuoraQuestions", "num_sources": 1, "unweighted_mean_score": 4.625, "weighted_total_content_score": 93.68421052631578, "semantic_relevance": 5.0, "factual_accuracy": 5.0, "freshness": 2.0, "objectivity_tone": 5.0, "layout_ad_density": 5.0, "accountability": 5.0, "transparency": 5.0, "authority": 5.0, "avg_ge_freq": 1.0, "relative_se_rank": 0.20454545454545456, "normalized_reciprocal_se_rank": 0.3063973063973064, "reciprocal_se_rank": 0.08333333333333333, "percentage_ge_sources_not_in_se_sources": 0.0, "percentage_ge_sources_in_se_sources": 100.0 }, { "model_name": "deepseek-reasoning-tavily", "query_id": 31, "query_type": "DebateQA", "num_sources": 4, "unweighted_mean_score": 4.6875, "weighted_total_content_score": 93.68421052631578, "semantic_relevance": 4.5, "factual_accuracy": 5.0, "freshness": 4.75, "objectivity_tone": 4.5, "layout_ad_density": 4.5, "accountability": 4.25, "transparency": 5.0, "authority": 5.0, "avg_ge_freq": 1.0, "relative_se_rank": 0.08333333333333334, "normalized_reciprocal_se_rank": 0.6457671957671958, "reciprocal_se_rank": 0.16488095238095238, "percentage_ge_sources_not_in_se_sources": 0.0, "percentage_ge_sources_in_se_sources": 100.0 }, { "model_name": "gensee", "query_id": 73, "query_type": "Pinocchios", "num_sources": 2, "unweighted_mean_score": 4.6875, "weighted_total_content_score": 93.68421052631578, "semantic_relevance": 5.0, "factual_accuracy": 5.0, "freshness": 5.0, "objectivity_tone": 4.0, "layout_ad_density": 3.5, "accountability": 5.0, "transparency": 5.0, "authority": 5.0, "avg_ge_freq": 0.83335, "relative_se_rank": 0.03125, "normalized_reciprocal_se_rank": 0.8959595959595961, "reciprocal_se_rank": 0.225, "percentage_ge_sources_not_in_se_sources": 0.0, "percentage_ge_sources_in_se_sources": 100.0 }, { "model_name": "google-search", "query_id": 31, "query_type": "DebateQA", "num_sources": 5, "unweighted_mean_score": 4.675, "weighted_total_content_score": 93.68421052631578, "semantic_relevance": 4.6, "factual_accuracy": 5.0, "freshness": 4.8, "objectivity_tone": 4.6, "layout_ad_density": 4.2, "accountability": 4.2, "transparency": 5.0, "authority": 5.0, "avg_ge_freq": null, "relative_se_rank": null, "normalized_reciprocal_se_rank": null, "reciprocal_se_rank": null, "percentage_ge_sources_not_in_se_sources": null, "percentage_ge_sources_in_se_sources": null }, { "model_name": "gpt-5", "query_id": 4, "query_type": "VACOS", "num_sources": 1, "unweighted_mean_score": 4.625, "weighted_total_content_score": 93.68421052631578, "semantic_relevance": 5.0, "factual_accuracy": 5.0, "freshness": 5.0, "objectivity_tone": 5.0, "layout_ad_density": 3.0, "accountability": 4.0, "transparency": 5.0, "authority": 5.0, "avg_ge_freq": 0.6667, "relative_se_rank": 2.4390243902439024, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "gpt-5", "query_id": 80, "query_type": "Pinocchios", "num_sources": 2, "unweighted_mean_score": 4.6875, "weighted_total_content_score": 93.68421052631578, "semantic_relevance": 4.0, "factual_accuracy": 5.0, "freshness": 5.0, "objectivity_tone": 5.0, "layout_ad_density": 3.5, "accountability": 5.0, "transparency": 5.0, "authority": 5.0, "avg_ge_freq": 0.5, "relative_se_rank": 1.3297872340425532, "normalized_reciprocal_se_rank": 0.05411255411255411, "reciprocal_se_rank": 0.02271151178918169, "percentage_ge_sources_not_in_se_sources": 50.0, "percentage_ge_sources_in_se_sources": 50.0 }, { "model_name": "grok-4.1-fast-non-reasoning", "query_id": 62, "query_type": "Pinocchios", "num_sources": 4, "unweighted_mean_score": 4.65625, "weighted_total_content_score": 93.68421052631578, "semantic_relevance": 4.75, "factual_accuracy": 5.0, "freshness": 3.0, "objectivity_tone": 4.75, "layout_ad_density": 4.75, "accountability": 5.0, "transparency": 5.0, "authority": 5.0, "avg_ge_freq": 0.750025, "relative_se_rank": 1.9878048780487805, "normalized_reciprocal_se_rank": 0.025774991292232673, "reciprocal_se_rank": 0.015902243053230663, "percentage_ge_sources_not_in_se_sources": 75.0, "percentage_ge_sources_in_se_sources": 25.0 }, { "model_name": "tavily", "query_id": 26, "query_type": "DebateQA", "num_sources": 1, "unweighted_mean_score": 4.75, "weighted_total_content_score": 93.68421052631578, "semantic_relevance": 5.0, "factual_accuracy": 5.0, "freshness": 5.0, "objectivity_tone": 3.0, "layout_ad_density": 5.0, "accountability": 5.0, "transparency": 5.0, "authority": 5.0, "avg_ge_freq": 1.0, "relative_se_rank": 0.23255813953488372, "normalized_reciprocal_se_rank": 0.27972027972027974, "reciprocal_se_rank": 0.07692307692307693, "percentage_ge_sources_not_in_se_sources": 0.0, "percentage_ge_sources_in_se_sources": 100.0 }, { "model_name": "Gemini-3-Flash-Preview", "query_id": 26, "query_type": "DebateQA", "num_sources": 5, "unweighted_mean_score": 4.65, "weighted_total_content_score": 93.47368421052632, "semantic_relevance": 4.8, "factual_accuracy": 4.8, "freshness": 4.6, "objectivity_tone": 4.8, "layout_ad_density": 4.6, "accountability": 4.4, "transparency": 4.6, "authority": 4.6, "avg_ge_freq": 0.5333399999999999, "relative_se_rank": 2.3255813953488373, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "Gemini-3-Pro-Preview", "query_id": 79, "query_type": "Pinocchios", "num_sources": 5, "unweighted_mean_score": 4.675, "weighted_total_content_score": 93.47368421052632, "semantic_relevance": 4.2, "factual_accuracy": 5.0, "freshness": 5.0, "objectivity_tone": 4.8, "layout_ad_density": 4.2, "accountability": 4.6, "transparency": 4.8, "authority": 4.8, "avg_ge_freq": 0.39997999999999995, "relative_se_rank": 0.9333333333333332, "normalized_reciprocal_se_rank": 0.4335353535353536, "reciprocal_se_rank": 0.11388349514563108, "percentage_ge_sources_not_in_se_sources": 40.0, "percentage_ge_sources_in_se_sources": 60.0 }, { "model_name": "google-search", "query_id": 75, "query_type": "Pinocchios", "num_sources": 5, "unweighted_mean_score": 4.65, "weighted_total_content_score": 93.47368421052632, "semantic_relevance": 4.4, "factual_accuracy": 5.0, "freshness": 3.6, "objectivity_tone": 5.0, "layout_ad_density": 4.6, "accountability": 5.0, "transparency": 4.6, "authority": 5.0, "avg_ge_freq": null, "relative_se_rank": null, "normalized_reciprocal_se_rank": null, "reciprocal_se_rank": null, "percentage_ge_sources_not_in_se_sources": null, "percentage_ge_sources_in_se_sources": null }, { "model_name": "Gemini-3-Flash-Preview", "query_id": 29, "query_type": "DebateQA", "num_sources": 5, "unweighted_mean_score": 4.675, "weighted_total_content_score": 93.4736842105263, "semantic_relevance": 4.8, "factual_accuracy": 4.8, "freshness": 5.0, "objectivity_tone": 4.4, "layout_ad_density": 4.2, "accountability": 4.4, "transparency": 5.0, "authority": 4.8, "avg_ge_freq": 0.46665999999999996, "relative_se_rank": 1.3822222222222222, "normalized_reciprocal_se_rank": 0.25594405594405595, "reciprocal_se_rank": 0.07120985810306199, "percentage_ge_sources_not_in_se_sources": 60.0, "percentage_ge_sources_in_se_sources": 40.0 }, { "model_name": "gpt-4o", "query_id": 70, "query_type": "Pinocchios", "num_sources": 3, "unweighted_mean_score": 4.625, "weighted_total_content_score": 93.33333333333333, "semantic_relevance": 5.0, "factual_accuracy": 5.0, "freshness": 5.0, "objectivity_tone": 4.666666666666667, "layout_ad_density": 3.6666666666666665, "accountability": 4.333333333333333, "transparency": 4.666666666666667, "authority": 4.666666666666667, "avg_ge_freq": 0.3333, "relative_se_rank": 1.4166666666666667, "normalized_reciprocal_se_rank": 0.1847041847041847, "reciprocal_se_rank": 0.05409153952843273, "percentage_ge_sources_not_in_se_sources": 66.66666666666667, "percentage_ge_sources_in_se_sources": 33.33333333333333 }, { "model_name": "gpt-5", "query_id": 21, "query_type": "DebateQA", "num_sources": 5, "unweighted_mean_score": 4.65, "weighted_total_content_score": 93.26315789473685, "semantic_relevance": 4.2, "factual_accuracy": 5.0, "freshness": 4.4, "objectivity_tone": 5.0, "layout_ad_density": 4.2, "accountability": 4.6, "transparency": 4.8, "authority": 5.0, "avg_ge_freq": 0.73332, "relative_se_rank": 2.0833333333333335, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "claude", "query_id": 26, "query_type": "DebateQA", "num_sources": 4, "unweighted_mean_score": 4.65625, "weighted_total_content_score": 93.15789473684211, "semantic_relevance": 5.0, "factual_accuracy": 4.75, "freshness": 4.75, "objectivity_tone": 4.25, "layout_ad_density": 4.25, "accountability": 4.5, "transparency": 5.0, "authority": 4.75, "avg_ge_freq": 0.833325, "relative_se_rank": 0.38372093023255816, "normalized_reciprocal_se_rank": 0.41673934466617396, "reciprocal_se_rank": 0.10984756097560976, "percentage_ge_sources_not_in_se_sources": 0.0, "percentage_ge_sources_in_se_sources": 100.0 }, { "model_name": "gpt-5", "query_id": 99, "query_type": "QuoraQuestions", "num_sources": 2, "unweighted_mean_score": 4.6875, "weighted_total_content_score": 93.15789473684211, "semantic_relevance": 3.5, "factual_accuracy": 5.0, "freshness": 4.0, "objectivity_tone": 5.0, "layout_ad_density": 5.0, "accountability": 5.0, "transparency": 5.0, "authority": 5.0, "avg_ge_freq": 0.3333, "relative_se_rank": 2.4390243902439024, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "exa", "query_id": 29, "query_type": "DebateQA", "num_sources": 2, "unweighted_mean_score": 4.625, "weighted_total_content_score": 93.1578947368421, "semantic_relevance": 5.0, "factual_accuracy": 5.0, "freshness": 3.5, "objectivity_tone": 4.5, "layout_ad_density": 4.5, "accountability": 4.5, "transparency": 5.0, "authority": 5.0, "avg_ge_freq": 1.0, "relative_se_rank": 0.14444444444444443, "normalized_reciprocal_se_rank": 0.5243867243867244, "reciprocal_se_rank": 0.13571428571428573, "percentage_ge_sources_not_in_se_sources": 0.0, "percentage_ge_sources_in_se_sources": 100.0 }, { "model_name": "gpt-5", "query_id": 96, "query_type": "QuoraQuestions", "num_sources": 4, "unweighted_mean_score": 4.65625, "weighted_total_content_score": 93.1578947368421, "semantic_relevance": 4.0, "factual_accuracy": 5.0, "freshness": 5.0, "objectivity_tone": 5.0, "layout_ad_density": 4.0, "accountability": 4.25, "transparency": 5.0, "authority": 5.0, "avg_ge_freq": 0.49999999999999994, "relative_se_rank": 1.7159090909090913, "normalized_reciprocal_se_rank": 0.197979797979798, "reciprocal_se_rank": 0.05728155339805825, "percentage_ge_sources_not_in_se_sources": 75.0, "percentage_ge_sources_in_se_sources": 25.0 }, { "model_name": "gpt-5", "query_id": 63, "query_type": "Pinocchios", "num_sources": 3, "unweighted_mean_score": 4.708333333333333, "weighted_total_content_score": 92.98245614035089, "semantic_relevance": 3.0, "factual_accuracy": 5.0, "freshness": 5.0, "objectivity_tone": 5.0, "layout_ad_density": 4.666666666666667, "accountability": 5.0, "transparency": 5.0, "authority": 5.0, "avg_ge_freq": 1.0, "relative_se_rank": 4.761904761904762, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "claude", "query_id": 30, "query_type": "DebateQA", "num_sources": 3, "unweighted_mean_score": 4.666666666666667, "weighted_total_content_score": 92.98245614035086, "semantic_relevance": 4.666666666666667, "factual_accuracy": 4.333333333333333, "freshness": 5.0, "objectivity_tone": 4.666666666666667, "layout_ad_density": 4.0, "accountability": 4.666666666666667, "transparency": 5.0, "authority": 5.0, "avg_ge_freq": 0.7777666666666666, "relative_se_rank": 0.2681159420289855, "normalized_reciprocal_se_rank": 0.31783031783031784, "reciprocal_se_rank": 0.08608058608058607, "percentage_ge_sources_not_in_se_sources": 0.0, "percentage_ge_sources_in_se_sources": 100.0 }, { "model_name": "tavily", "query_id": 29, "query_type": "DebateQA", "num_sources": 3, "unweighted_mean_score": 4.625, "weighted_total_content_score": 92.98245614035086, "semantic_relevance": 5.0, "factual_accuracy": 5.0, "freshness": 5.0, "objectivity_tone": 4.333333333333333, "layout_ad_density": 3.6666666666666665, "accountability": 4.333333333333333, "transparency": 4.666666666666667, "authority": 5.0, "avg_ge_freq": 1.0, "relative_se_rank": 0.09629629629629628, "normalized_reciprocal_se_rank": 0.6905464905464905, "reciprocal_se_rank": 0.17564102564102566, "percentage_ge_sources_not_in_se_sources": 0.0, "percentage_ge_sources_in_se_sources": 100.0 }, { "model_name": "Gemini-3-Flash-Preview", "query_id": 24, "query_type": "DebateQA", "num_sources": 5, "unweighted_mean_score": 4.625, "weighted_total_content_score": 92.84210526315789, "semantic_relevance": 4.8, "factual_accuracy": 4.8, "freshness": 5.0, "objectivity_tone": 4.6, "layout_ad_density": 3.8, "accountability": 4.8, "transparency": 4.6, "authority": 4.6, "avg_ge_freq": 0.80002, "relative_se_rank": 1.3659574468085107, "normalized_reciprocal_se_rank": 0.10731490731490731, "reciprocal_se_rank": 0.03549557238877627, "percentage_ge_sources_not_in_se_sources": 60.0, "percentage_ge_sources_in_se_sources": 40.0 }, { "model_name": "gensee", "query_id": 32, "query_type": "DebateQA", "num_sources": 5, "unweighted_mean_score": 4.6, "weighted_total_content_score": 92.84210526315789, "semantic_relevance": 4.8, "factual_accuracy": 5.0, "freshness": 3.8, "objectivity_tone": 4.8, "layout_ad_density": 4.2, "accountability": 4.6, "transparency": 4.8, "authority": 4.8, "avg_ge_freq": 0.59998, "relative_se_rank": 1.3555555555555556, "normalized_reciprocal_se_rank": 0.3108225108225108, "reciprocal_se_rank": 0.08439667128987517, "percentage_ge_sources_not_in_se_sources": 60.0, "percentage_ge_sources_in_se_sources": 40.0 }, { "model_name": "Gemini-2.5-Flash-Preview", "query_id": 26, "query_type": "DebateQA", "num_sources": 5, "unweighted_mean_score": 4.675, "weighted_total_content_score": 92.63157894736841, "semantic_relevance": 3.6, "factual_accuracy": 4.6, "freshness": 5.0, "objectivity_tone": 5.0, "layout_ad_density": 5.0, "accountability": 4.4, "transparency": 5.0, "authority": 4.8, "avg_ge_freq": null, "relative_se_rank": null, "normalized_reciprocal_se_rank": null, "reciprocal_se_rank": null, "percentage_ge_sources_not_in_se_sources": null, "percentage_ge_sources_in_se_sources": null }, { "model_name": "Perplexity-Sonar-Pro", "query_id": 36, "query_type": "DebateQA", "num_sources": 2, "unweighted_mean_score": 4.6875, "weighted_total_content_score": 92.63157894736841, "semantic_relevance": 4.0, "factual_accuracy": 5.0, "freshness": 5.0, "objectivity_tone": 4.0, "layout_ad_density": 4.5, "accountability": 5.0, "transparency": 5.0, "authority": 5.0, "avg_ge_freq": 1.0, "relative_se_rank": 2.4390243902439024, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "exa", "query_id": 75, "query_type": "Pinocchios", "num_sources": 5, "unweighted_mean_score": 4.625, "weighted_total_content_score": 92.63157894736841, "semantic_relevance": 4.2, "factual_accuracy": 4.8, "freshness": 4.8, "objectivity_tone": 5.0, "layout_ad_density": 3.4, "accountability": 5.0, "transparency": 4.8, "authority": 5.0, "avg_ge_freq": 1.0, "relative_se_rank": 1.7148936170212763, "normalized_reciprocal_se_rank": 0.1306397306397306, "reciprocal_se_rank": 0.04110032362459547, "percentage_ge_sources_not_in_se_sources": 80.0, "percentage_ge_sources_in_se_sources": 20.0 }, { "model_name": "gpt-4o", "query_id": 25, "query_type": "DebateQA", "num_sources": 5, "unweighted_mean_score": 4.575, "weighted_total_content_score": 92.63157894736841, "semantic_relevance": 4.8, "factual_accuracy": 5.0, "freshness": 4.8, "objectivity_tone": 5.0, "layout_ad_density": 4.8, "accountability": 3.8, "transparency": 4.2, "authority": 4.2, "avg_ge_freq": 0.3333, "relative_se_rank": 1.3414634146341464, "normalized_reciprocal_se_rank": 0.11282898919262556, "reciprocal_se_rank": 0.03682055808269401, "percentage_ge_sources_not_in_se_sources": 40.0, "percentage_ge_sources_in_se_sources": 60.0 }, { "model_name": "gpt-5", "query_id": 9, "query_type": "VACOS", "num_sources": 5, "unweighted_mean_score": 4.625, "weighted_total_content_score": 92.63157894736841, "semantic_relevance": 5.0, "factual_accuracy": 4.8, "freshness": 5.0, "objectivity_tone": 4.2, "layout_ad_density": 4.2, "accountability": 4.6, "transparency": 4.8, "authority": 4.4, "avg_ge_freq": 0.93334, "relative_se_rank": 2.2222222222222223, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "gpt-5", "query_id": 43, "query_type": "HotpotQA", "num_sources": 2, "unweighted_mean_score": 4.625, "weighted_total_content_score": 92.63157894736841, "semantic_relevance": 4.0, "factual_accuracy": 5.0, "freshness": 5.0, "objectivity_tone": 5.0, "layout_ad_density": 5.0, "accountability": 3.0, "transparency": 5.0, "authority": 5.0, "avg_ge_freq": 0.5, "relative_se_rank": 1.125, "normalized_reciprocal_se_rank": 0.16896235078053262, "reciprocal_se_rank": 0.05030891438658429, "percentage_ge_sources_not_in_se_sources": 50.0, "percentage_ge_sources_in_se_sources": 50.0 }, { "model_name": "grok-4.1-fast-non-reasoning", "query_id": 46, "query_type": "HotpotQA", "num_sources": 1, "unweighted_mean_score": 4.625, "weighted_total_content_score": 92.63157894736841, "semantic_relevance": 5.0, "factual_accuracy": 5.0, "freshness": 5.0, "objectivity_tone": 4.0, "layout_ad_density": 4.0, "accountability": 4.0, "transparency": 5.0, "authority": 5.0, "avg_ge_freq": 1.0, "relative_se_rank": 0.06666666666666667, "normalized_reciprocal_se_rank": 0.6531986531986531, "reciprocal_se_rank": 0.16666666666666666, "percentage_ge_sources_not_in_se_sources": 0.0, "percentage_ge_sources_in_se_sources": 100.0 }, { "model_name": "Perplexity-Sonar-Pro", "query_id": 29, "query_type": "DebateQA", "num_sources": 3, "unweighted_mean_score": 4.625, "weighted_total_content_score": 92.6315789473684, "semantic_relevance": 5.0, "factual_accuracy": 5.0, "freshness": 4.0, "objectivity_tone": 4.0, "layout_ad_density": 5.0, "accountability": 4.333333333333333, "transparency": 4.666666666666667, "authority": 5.0, "avg_ge_freq": 1.0, "relative_se_rank": 0.7777777777777778, "normalized_reciprocal_se_rank": 0.4817059483726151, "reciprocal_se_rank": 0.12545846817691478, "percentage_ge_sources_not_in_se_sources": 33.333333333333336, "percentage_ge_sources_in_se_sources": 66.66666666666666 }, { "model_name": "deepseek-chat-gensee", "query_id": 21, "query_type": "DebateQA", "num_sources": 5, "unweighted_mean_score": 4.575, "weighted_total_content_score": 92.42105263157895, "semantic_relevance": 4.8, "factual_accuracy": 5.0, "freshness": 3.8, "objectivity_tone": 4.8, "layout_ad_density": 4.4, "accountability": 4.4, "transparency": 4.6, "authority": 4.8, "avg_ge_freq": 1.0, "relative_se_rank": 1.3208333333333335, "normalized_reciprocal_se_rank": 0.19654320987654322, "reciprocal_se_rank": 0.05693635382955771, "percentage_ge_sources_not_in_se_sources": 60.0, "percentage_ge_sources_in_se_sources": 40.0 }, { "model_name": "gensee", "query_id": 29, "query_type": "DebateQA", "num_sources": 3, "unweighted_mean_score": 4.625, "weighted_total_content_score": 92.28070175438596, "semantic_relevance": 4.666666666666667, "factual_accuracy": 5.0, "freshness": 4.666666666666667, "objectivity_tone": 4.0, "layout_ad_density": 4.666666666666667, "accountability": 5.0, "transparency": 4.333333333333333, "authority": 4.666666666666667, "avg_ge_freq": 0.6666666666666666, "relative_se_rank": 1.5555555555555556, "normalized_reciprocal_se_rank": 0.09324009324009325, "reciprocal_se_rank": 0.03211351755041075, "percentage_ge_sources_not_in_se_sources": 66.66666666666667, "percentage_ge_sources_in_se_sources": 33.33333333333333 }, { "model_name": "gensee", "query_id": 62, "query_type": "Pinocchios", "num_sources": 3, "unweighted_mean_score": 4.541666666666667, "weighted_total_content_score": 92.28070175438596, "semantic_relevance": 5.0, "factual_accuracy": 5.0, "freshness": 3.6666666666666665, "objectivity_tone": 5.0, "layout_ad_density": 3.0, "accountability": 5.0, "transparency": 4.666666666666667, "authority": 5.0, "avg_ge_freq": 0.4444333333333333, "relative_se_rank": 1.3252032520325203, "normalized_reciprocal_se_rank": 0.05557877626843144, "reciprocal_se_rank": 0.02306383216158911, "percentage_ge_sources_not_in_se_sources": 33.333333333333336, "percentage_ge_sources_in_se_sources": 66.66666666666666 }, { "model_name": "Gemini-2.5-Flash-Preview", "query_id": 32, "query_type": "DebateQA", "num_sources": 5, "unweighted_mean_score": 4.6, "weighted_total_content_score": 92.21052631578947, "semantic_relevance": 4.4, "factual_accuracy": 4.8, "freshness": 3.8, "objectivity_tone": 4.8, "layout_ad_density": 4.4, "accountability": 4.8, "transparency": 5.0, "authority": 4.8, "avg_ge_freq": null, "relative_se_rank": null, "normalized_reciprocal_se_rank": null, "reciprocal_se_rank": null, "percentage_ge_sources_not_in_se_sources": null, "percentage_ge_sources_in_se_sources": null }, { "model_name": "Perplexity-Sonar-Pro", "query_id": 35, "query_type": "DebateQA", "num_sources": 5, "unweighted_mean_score": 4.65, "weighted_total_content_score": 92.21052631578947, "semantic_relevance": 4.4, "factual_accuracy": 4.8, "freshness": 4.8, "objectivity_tone": 4.0, "layout_ad_density": 4.8, "accountability": 4.6, "transparency": 4.8, "authority": 5.0, "avg_ge_freq": 1.0, "relative_se_rank": 0.9608695652173912, "normalized_reciprocal_se_rank": 0.3149923477196205, "reciprocal_se_rank": 0.08539864666078259, "percentage_ge_sources_not_in_se_sources": 40.0, "percentage_ge_sources_in_se_sources": 60.0 }, { "model_name": "google-search", "query_id": 24, "query_type": "DebateQA", "num_sources": 5, "unweighted_mean_score": 4.6, "weighted_total_content_score": 92.21052631578947, "semantic_relevance": 4.8, "factual_accuracy": 4.6, "freshness": 4.4, "objectivity_tone": 4.6, "layout_ad_density": 4.6, "accountability": 4.4, "transparency": 4.8, "authority": 4.6, "avg_ge_freq": null, "relative_se_rank": null, "normalized_reciprocal_se_rank": null, "reciprocal_se_rank": null, "percentage_ge_sources_not_in_se_sources": null, "percentage_ge_sources_in_se_sources": null }, { "model_name": "grok-4.1-fast-non-reasoning", "query_id": 55, "query_type": "HotpotQA", "num_sources": 5, "unweighted_mean_score": 4.65, "weighted_total_content_score": 92.21052631578947, "semantic_relevance": 3.4, "factual_accuracy": 4.8, "freshness": 5.0, "objectivity_tone": 5.0, "layout_ad_density": 5.0, "accountability": 4.0, "transparency": 5.0, "authority": 5.0, "avg_ge_freq": 0.59998, "relative_se_rank": 2.127659574468085, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "gpt-5", "query_id": 62, "query_type": "Pinocchios", "num_sources": 2, "unweighted_mean_score": 4.5625, "weighted_total_content_score": 92.10526315789474, "semantic_relevance": 5.0, "factual_accuracy": 5.0, "freshness": 2.5, "objectivity_tone": 4.5, "layout_ad_density": 4.5, "accountability": 5.0, "transparency": 5.0, "authority": 5.0, "avg_ge_freq": 0.3333, "relative_se_rank": 2.4390243902439024, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "grok-4.1-fast-non-reasoning", "query_id": 79, "query_type": "Pinocchios", "num_sources": 2, "unweighted_mean_score": 4.5625, "weighted_total_content_score": 92.10526315789474, "semantic_relevance": 5.0, "factual_accuracy": 5.0, "freshness": 3.5, "objectivity_tone": 4.5, "layout_ad_density": 3.5, "accountability": 5.0, "transparency": 5.0, "authority": 5.0, "avg_ge_freq": 0.6667, "relative_se_rank": 0.03333333333333333, "normalized_reciprocal_se_rank": 0.8959595959595961, "reciprocal_se_rank": 0.225, "percentage_ge_sources_not_in_se_sources": 0.0, "percentage_ge_sources_in_se_sources": 100.0 }, { "model_name": "exa", "query_id": 66, "query_type": "Pinocchios", "num_sources": 2, "unweighted_mean_score": 4.5625, "weighted_total_content_score": 92.10526315789473, "semantic_relevance": 5.0, "factual_accuracy": 5.0, "freshness": 3.5, "objectivity_tone": 4.5, "layout_ad_density": 4.5, "accountability": 5.0, "transparency": 5.0, "authority": 4.0, "avg_ge_freq": 1.0, "relative_se_rank": 0.1951219512195122, "normalized_reciprocal_se_rank": 0.3955747955747956, "reciprocal_se_rank": 0.10476190476190475, "percentage_ge_sources_not_in_se_sources": 0.0, "percentage_ge_sources_in_se_sources": 100.0 }, { "model_name": "gpt-5", "query_id": 78, "query_type": "Pinocchios", "num_sources": 4, "unweighted_mean_score": 4.53125, "weighted_total_content_score": 92.10526315789473, "semantic_relevance": 5.0, "factual_accuracy": 5.0, "freshness": 3.0, "objectivity_tone": 5.0, "layout_ad_density": 4.0, "accountability": 5.0, "transparency": 5.0, "authority": 4.25, "avg_ge_freq": 0.749975, "relative_se_rank": 2.127659574468085, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "gpt-5", "query_id": 97, "query_type": "QuoraQuestions", "num_sources": 5, "unweighted_mean_score": 4.625, "weighted_total_content_score": 92.0, "semantic_relevance": 3.4, "factual_accuracy": 5.0, "freshness": 5.0, "objectivity_tone": 5.0, "layout_ad_density": 4.8, "accountability": 3.8, "transparency": 5.0, "authority": 5.0, "avg_ge_freq": 0.6667, "relative_se_rank": 2.5, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "grok-4.1-fast-non-reasoning", "query_id": 16, "query_type": "VACOS", "num_sources": 5, "unweighted_mean_score": 4.6, "weighted_total_content_score": 92.0, "semantic_relevance": 4.8, "factual_accuracy": 5.0, "freshness": 5.0, "objectivity_tone": 4.0, "layout_ad_density": 4.2, "accountability": 4.2, "transparency": 4.8, "authority": 4.8, "avg_ge_freq": 0.6667, "relative_se_rank": 1.538095238095238, "normalized_reciprocal_se_rank": 0.0998834498834499, "reciprocal_se_rank": 0.033709858103061985, "percentage_ge_sources_not_in_se_sources": 60.0, "percentage_ge_sources_in_se_sources": 40.0 }, { "model_name": "google-search", "query_id": 77, "query_type": "Pinocchios", "num_sources": 4, "unweighted_mean_score": 4.59375, "weighted_total_content_score": 91.8421052631579, "semantic_relevance": 4.75, "factual_accuracy": 5.0, "freshness": 4.25, "objectivity_tone": 4.0, "layout_ad_density": 4.5, "accountability": 4.25, "transparency": 5.0, "authority": 5.0, "avg_ge_freq": null, "relative_se_rank": null, "normalized_reciprocal_se_rank": null, "reciprocal_se_rank": null, "percentage_ge_sources_not_in_se_sources": null, "percentage_ge_sources_in_se_sources": null }, { "model_name": "Perplexity-Sonar-Pro", "query_id": 32, "query_type": "DebateQA", "num_sources": 4, "unweighted_mean_score": 4.5625, "weighted_total_content_score": 91.84210526315789, "semantic_relevance": 4.75, "factual_accuracy": 5.0, "freshness": 3.25, "objectivity_tone": 4.5, "layout_ad_density": 5.0, "accountability": 4.0, "transparency": 5.0, "authority": 5.0, "avg_ge_freq": 1.0, "relative_se_rank": 0.7333333333333334, "normalized_reciprocal_se_rank": 0.4703914141414142, "reciprocal_se_rank": 0.12273968446601942, "percentage_ge_sources_not_in_se_sources": 25.0, "percentage_ge_sources_in_se_sources": 75.0 }, { "model_name": "gensee", "query_id": 35, "query_type": "DebateQA", "num_sources": 4, "unweighted_mean_score": 4.59375, "weighted_total_content_score": 91.84210526315789, "semantic_relevance": 5.0, "factual_accuracy": 4.75, "freshness": 5.0, "objectivity_tone": 4.0, "layout_ad_density": 4.25, "accountability": 4.25, "transparency": 4.75, "authority": 4.75, "avg_ge_freq": 0.41664999999999996, "relative_se_rank": 1.152173913043478, "normalized_reciprocal_se_rank": 0.2138888888888889, "reciprocal_se_rank": 0.061104368932038834, "percentage_ge_sources_not_in_se_sources": 50.0, "percentage_ge_sources_in_se_sources": 50.0 }, { "model_name": "Perplexity-Sonar-Pro", "query_id": 62, "query_type": "Pinocchios", "num_sources": 5, "unweighted_mean_score": 4.575, "weighted_total_content_score": 91.78947368421053, "semantic_relevance": 4.2, "factual_accuracy": 5.0, "freshness": 3.8, "objectivity_tone": 4.8, "layout_ad_density": 4.6, "accountability": 4.2, "transparency": 5.0, "authority": 5.0, "avg_ge_freq": 0.8, "relative_se_rank": 1.9658536585365856, "normalized_reciprocal_se_rank": 0.1306397306397306, "reciprocal_se_rank": 0.04110032362459547, "percentage_ge_sources_not_in_se_sources": 80.0, "percentage_ge_sources_in_se_sources": 20.0 }, { "model_name": "tavily", "query_id": 77, "query_type": "Pinocchios", "num_sources": 5, "unweighted_mean_score": 4.625, "weighted_total_content_score": 91.78947368421052, "semantic_relevance": 3.6, "factual_accuracy": 5.0, "freshness": 5.0, "objectivity_tone": 4.6, "layout_ad_density": 4.2, "accountability": 4.6, "transparency": 5.0, "authority": 5.0, "avg_ge_freq": 1.0, "relative_se_rank": 1.4044444444444444, "normalized_reciprocal_se_rank": 0.19926322043969105, "reciprocal_se_rank": 0.057589948600799544, "percentage_ge_sources_not_in_se_sources": 60.0, "percentage_ge_sources_in_se_sources": 40.0 }, { "model_name": "Perplexity-Sonar-Pro", "query_id": 37, "query_type": "DebateQA", "num_sources": 4, "unweighted_mean_score": 4.5625, "weighted_total_content_score": 91.57894736842105, "semantic_relevance": 5.0, "factual_accuracy": 5.0, "freshness": 4.75, "objectivity_tone": 4.0, "layout_ad_density": 4.5, "accountability": 3.75, "transparency": 4.75, "authority": 4.75, "avg_ge_freq": 0.75, "relative_se_rank": 0.3469387755102041, "normalized_reciprocal_se_rank": 0.40063173730586943, "reciprocal_se_rank": 0.1059770436730123, "percentage_ge_sources_not_in_se_sources": 0.0, "percentage_ge_sources_in_se_sources": 100.0 }, { "model_name": "Gemini-3-Pro-Preview", "query_id": 62, "query_type": "Pinocchios", "num_sources": 5, "unweighted_mean_score": 4.65, "weighted_total_content_score": 91.57894736842104, "semantic_relevance": 3.2, "factual_accuracy": 4.8, "freshness": 5.0, "objectivity_tone": 4.6, "layout_ad_density": 4.8, "accountability": 4.8, "transparency": 5.0, "authority": 5.0, "avg_ge_freq": 0.39997999999999995, "relative_se_rank": 1.9609756097560975, "normalized_reciprocal_se_rank": 0.1583838383838384, "reciprocal_se_rank": 0.047766990291262135, "percentage_ge_sources_not_in_se_sources": 80.0, "percentage_ge_sources_in_se_sources": 20.0 }, { "model_name": "claude", "query_id": 61, "query_type": "Pinocchios", "num_sources": 1, "unweighted_mean_score": 4.5, "weighted_total_content_score": 91.57894736842104, "semantic_relevance": 5.0, "factual_accuracy": 5.0, "freshness": 3.0, "objectivity_tone": 5.0, "layout_ad_density": 4.0, "accountability": 5.0, "transparency": 5.0, "authority": 4.0, "avg_ge_freq": 1.0, "relative_se_rank": 2.2222222222222223, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "claude", "query_id": 62, "query_type": "Pinocchios", "num_sources": 2, "unweighted_mean_score": 4.5, "weighted_total_content_score": 91.57894736842104, "semantic_relevance": 5.0, "factual_accuracy": 5.0, "freshness": 2.0, "objectivity_tone": 5.0, "layout_ad_density": 4.0, "accountability": 5.0, "transparency": 5.0, "authority": 5.0, "avg_ge_freq": 0.66665, "relative_se_rank": 1.5365853658536586, "normalized_reciprocal_se_rank": 0.051549982584465345, "reciprocal_se_rank": 0.02209574824238366, "percentage_ge_sources_not_in_se_sources": 50.0, "percentage_ge_sources_in_se_sources": 50.0 }, { "model_name": "claude", "query_id": 66, "query_type": "Pinocchios", "num_sources": 1, "unweighted_mean_score": 4.5, "weighted_total_content_score": 91.57894736842104, "semantic_relevance": 5.0, "factual_accuracy": 5.0, "freshness": 3.0, "objectivity_tone": 5.0, "layout_ad_density": 4.0, "accountability": 5.0, "transparency": 5.0, "authority": 4.0, "avg_ge_freq": 0.6667, "relative_se_rank": 0.0975609756097561, "normalized_reciprocal_se_rank": 0.5541125541125541, "reciprocal_se_rank": 0.14285714285714285, "percentage_ge_sources_not_in_se_sources": 0.0, "percentage_ge_sources_in_se_sources": 100.0 }, { "model_name": "claude", "query_id": 70, "query_type": "Pinocchios", "num_sources": 1, "unweighted_mean_score": 4.5, "weighted_total_content_score": 91.57894736842104, "semantic_relevance": 5.0, "factual_accuracy": 5.0, "freshness": 3.0, "objectivity_tone": 5.0, "layout_ad_density": 4.0, "accountability": 5.0, "transparency": 5.0, "authority": 4.0, "avg_ge_freq": 1.0, "relative_se_rank": 0.125, "normalized_reciprocal_se_rank": 0.42199775533108863, "reciprocal_se_rank": 0.1111111111111111, "percentage_ge_sources_not_in_se_sources": 0.0, "percentage_ge_sources_in_se_sources": 100.0 }, { "model_name": "deepseek-reasoning-gensee", "query_id": 42, "query_type": "HotpotQA", "num_sources": 2, "unweighted_mean_score": 4.5625, "weighted_total_content_score": 91.57894736842104, "semantic_relevance": 4.0, "factual_accuracy": 5.0, "freshness": 5.0, "objectivity_tone": 5.0, "layout_ad_density": 5.0, "accountability": 3.0, "transparency": 4.5, "authority": 5.0, "avg_ge_freq": 1.0, "relative_se_rank": 0.0510204081632653, "normalized_reciprocal_se_rank": 0.777056277056277, "reciprocal_se_rank": 0.19642857142857142, "percentage_ge_sources_not_in_se_sources": 0.0, "percentage_ge_sources_in_se_sources": 100.0 }, { "model_name": "exa", "query_id": 72, "query_type": "Pinocchios", "num_sources": 1, "unweighted_mean_score": 4.5, "weighted_total_content_score": 91.57894736842104, "semantic_relevance": 5.0, "factual_accuracy": 5.0, "freshness": 3.0, "objectivity_tone": 5.0, "layout_ad_density": 4.0, "accountability": 5.0, "transparency": 5.0, "authority": 4.0, "avg_ge_freq": 1.0, "relative_se_rank": 0.07317073170731707, "normalized_reciprocal_se_rank": 0.6531986531986531, "reciprocal_se_rank": 0.16666666666666666, "percentage_ge_sources_not_in_se_sources": 0.0, "percentage_ge_sources_in_se_sources": 100.0 }, { "model_name": "gpt-4o", "query_id": 52, "query_type": "HotpotQA", "num_sources": 1, "unweighted_mean_score": 4.625, "weighted_total_content_score": 91.57894736842104, "semantic_relevance": 3.0, "factual_accuracy": 5.0, "freshness": 5.0, "objectivity_tone": 5.0, "layout_ad_density": 4.0, "accountability": 5.0, "transparency": 5.0, "authority": 5.0, "avg_ge_freq": 0.3333, "relative_se_rank": 2.0408163265306123, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "gpt-4o", "query_id": 71, "query_type": "Pinocchios", "num_sources": 1, "unweighted_mean_score": 4.5, "weighted_total_content_score": 91.57894736842104, "semantic_relevance": 5.0, "factual_accuracy": 5.0, "freshness": 3.0, "objectivity_tone": 5.0, "layout_ad_density": 4.0, "accountability": 5.0, "transparency": 5.0, "authority": 4.0, "avg_ge_freq": 0.3333, "relative_se_rank": 0.02702702702702703, "normalized_reciprocal_se_rank": 1.0, "reciprocal_se_rank": 0.25, "percentage_ge_sources_not_in_se_sources": 0.0, "percentage_ge_sources_in_se_sources": 100.0 }, { "model_name": "gpt-4o", "query_id": 89, "query_type": "QuoraQuestions", "num_sources": 2, "unweighted_mean_score": 4.5625, "weighted_total_content_score": 91.57894736842104, "semantic_relevance": 5.0, "factual_accuracy": 4.5, "freshness": 5.0, "objectivity_tone": 4.5, "layout_ad_density": 4.0, "accountability": 4.5, "transparency": 4.5, "authority": 4.5, "avg_ge_freq": 0.3333, "relative_se_rank": 1.329268292682927, "normalized_reciprocal_se_rank": 0.1531986531986532, "reciprocal_se_rank": 0.0465210355987055, "percentage_ge_sources_not_in_se_sources": 50.0, "percentage_ge_sources_in_se_sources": 50.0 }, { "model_name": "gpt-5", "query_id": 44, "query_type": "HotpotQA", "num_sources": 2, "unweighted_mean_score": 4.5, "weighted_total_content_score": 91.57894736842104, "semantic_relevance": 5.0, "factual_accuracy": 5.0, "freshness": 5.0, "objectivity_tone": 5.0, "layout_ad_density": 3.5, "accountability": 3.0, "transparency": 4.5, "authority": 5.0, "avg_ge_freq": 0.5, "relative_se_rank": 1.1744186046511629, "normalized_reciprocal_se_rank": 0.5, "reciprocal_se_rank": 0.12985436893203883, "percentage_ge_sources_not_in_se_sources": 50.0, "percentage_ge_sources_in_se_sources": 50.0 }, { "model_name": "gpt-5", "query_id": 55, "query_type": "HotpotQA", "num_sources": 1, "unweighted_mean_score": 4.625, "weighted_total_content_score": 91.57894736842104, "semantic_relevance": 3.0, "factual_accuracy": 5.0, "freshness": 5.0, "objectivity_tone": 5.0, "layout_ad_density": 5.0, "accountability": 4.0, "transparency": 5.0, "authority": 5.0, "avg_ge_freq": 1.0, "relative_se_rank": 2.127659574468085, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "gpt-5", "query_id": 79, "query_type": "Pinocchios", "num_sources": 1, "unweighted_mean_score": 4.5, "weighted_total_content_score": 91.57894736842104, "semantic_relevance": 5.0, "factual_accuracy": 5.0, "freshness": 5.0, "objectivity_tone": 5.0, "layout_ad_density": 3.0, "accountability": 5.0, "transparency": 4.0, "authority": 4.0, "avg_ge_freq": 1.0, "relative_se_rank": 0.044444444444444446, "normalized_reciprocal_se_rank": 0.791919191919192, "reciprocal_se_rank": 0.2, "percentage_ge_sources_not_in_se_sources": 0.0, "percentage_ge_sources_in_se_sources": 100.0 }, { "model_name": "tavily", "query_id": 64, "query_type": "Pinocchios", "num_sources": 3, "unweighted_mean_score": 4.5, "weighted_total_content_score": 91.57894736842104, "semantic_relevance": 5.0, "factual_accuracy": 5.0, "freshness": 2.6666666666666665, "objectivity_tone": 5.0, "layout_ad_density": 4.0, "accountability": 5.0, "transparency": 4.666666666666667, "authority": 4.666666666666667, "avg_ge_freq": 1.0, "relative_se_rank": 0.8571428571428571, "normalized_reciprocal_se_rank": 0.4046389824167602, "reciprocal_se_rank": 0.10693994965839626, "percentage_ge_sources_not_in_se_sources": 33.333333333333336, "percentage_ge_sources_in_se_sources": 66.66666666666666 }, { "model_name": "Perplexity-Sonar-Pro", "query_id": 70, "query_type": "Pinocchios", "num_sources": 5, "unweighted_mean_score": 4.55, "weighted_total_content_score": 91.36842105263159, "semantic_relevance": 5.0, "factual_accuracy": 4.8, "freshness": 4.2, "objectivity_tone": 4.2, "layout_ad_density": 4.2, "accountability": 4.4, "transparency": 5.0, "authority": 4.6, "avg_ge_freq": 1.0, "relative_se_rank": 0.09583333333333335, "normalized_reciprocal_se_rank": 0.6000224466891134, "reciprocal_se_rank": 0.15388888888888888, "percentage_ge_sources_not_in_se_sources": 0.0, "percentage_ge_sources_in_se_sources": 100.0 }, { "model_name": "google-search", "query_id": 70, "query_type": "Pinocchios", "num_sources": 5, "unweighted_mean_score": 4.55, "weighted_total_content_score": 91.36842105263159, "semantic_relevance": 5.0, "factual_accuracy": 4.8, "freshness": 4.8, "objectivity_tone": 4.2, "layout_ad_density": 4.0, "accountability": 4.2, "transparency": 5.0, "authority": 4.4, "avg_ge_freq": null, "relative_se_rank": null, "normalized_reciprocal_se_rank": null, "reciprocal_se_rank": null, "percentage_ge_sources_not_in_se_sources": null, "percentage_ge_sources_in_se_sources": null }, { "model_name": "google-search", "query_id": 62, "query_type": "Pinocchios", "num_sources": 5, "unweighted_mean_score": 4.525, "weighted_total_content_score": 91.36842105263158, "semantic_relevance": 5.0, "factual_accuracy": 5.0, "freshness": 3.2, "objectivity_tone": 4.4, "layout_ad_density": 4.6, "accountability": 4.2, "transparency": 4.8, "authority": 5.0, "avg_ge_freq": null, "relative_se_rank": null, "normalized_reciprocal_se_rank": null, "reciprocal_se_rank": null, "percentage_ge_sources_not_in_se_sources": null, "percentage_ge_sources_in_se_sources": null }, { "model_name": "deepseek-reasoning-tavily", "query_id": 22, "query_type": "DebateQA", "num_sources": 4, "unweighted_mean_score": 4.53125, "weighted_total_content_score": 91.3157894736842, "semantic_relevance": 4.75, "factual_accuracy": 4.75, "freshness": 4.25, "objectivity_tone": 4.75, "layout_ad_density": 4.25, "accountability": 4.0, "transparency": 4.75, "authority": 4.75, "avg_ge_freq": 1.0, "relative_se_rank": 2.380952380952381, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "google-search", "query_id": 63, "query_type": "Pinocchios", "num_sources": 4, "unweighted_mean_score": 4.53125, "weighted_total_content_score": 91.3157894736842, "semantic_relevance": 5.0, "factual_accuracy": 5.0, "freshness": 4.25, "objectivity_tone": 4.25, "layout_ad_density": 3.5, "accountability": 5.0, "transparency": 4.75, "authority": 4.5, "avg_ge_freq": null, "relative_se_rank": null, "normalized_reciprocal_se_rank": null, "reciprocal_se_rank": null, "percentage_ge_sources_not_in_se_sources": null, "percentage_ge_sources_in_se_sources": null }, { "model_name": "deepseek-reasoning-tavily", "query_id": 92, "query_type": "QuoraQuestions", "num_sources": 5, "unweighted_mean_score": 4.575, "weighted_total_content_score": 91.15789473684211, "semantic_relevance": 3.6, "factual_accuracy": 5.0, "freshness": 5.0, "objectivity_tone": 4.8, "layout_ad_density": 3.6, "accountability": 5.0, "transparency": 4.8, "authority": 4.8, "avg_ge_freq": 1.0, "relative_se_rank": 0.9224489795918368, "normalized_reciprocal_se_rank": 0.23052318052318052, "reciprocal_se_rank": 0.06510144386357979, "percentage_ge_sources_not_in_se_sources": 40.0, "percentage_ge_sources_in_se_sources": 60.0 }, { "model_name": "gpt-4o", "query_id": 79, "query_type": "Pinocchios", "num_sources": 2, "unweighted_mean_score": 4.5, "weighted_total_content_score": 91.05263157894737, "semantic_relevance": 5.0, "factual_accuracy": 5.0, "freshness": 3.0, "objectivity_tone": 4.5, "layout_ad_density": 3.5, "accountability": 5.0, "transparency": 5.0, "authority": 5.0, "avg_ge_freq": 0.3333, "relative_se_rank": 0.03333333333333333, "normalized_reciprocal_se_rank": 0.8959595959595961, "reciprocal_se_rank": 0.225, "percentage_ge_sources_not_in_se_sources": 0.0, "percentage_ge_sources_in_se_sources": 100.0 }, { "model_name": "google-search", "query_id": 33, "query_type": "DebateQA", "num_sources": 4, "unweighted_mean_score": 4.53125, "weighted_total_content_score": 91.05263157894736, "semantic_relevance": 4.5, "factual_accuracy": 5.0, "freshness": 4.25, "objectivity_tone": 4.5, "layout_ad_density": 4.5, "accountability": 4.5, "transparency": 4.25, "authority": 4.75, "avg_ge_freq": null, "relative_se_rank": null, "normalized_reciprocal_se_rank": null, "reciprocal_se_rank": null, "percentage_ge_sources_not_in_se_sources": null, "percentage_ge_sources_in_se_sources": null }, { "model_name": "deepseek-chat-gensee", "query_id": 31, "query_type": "DebateQA", "num_sources": 5, "unweighted_mean_score": 4.525, "weighted_total_content_score": 90.94736842105263, "semantic_relevance": 4.6, "factual_accuracy": 5.0, "freshness": 4.4, "objectivity_tone": 4.4, "layout_ad_density": 4.0, "accountability": 4.6, "transparency": 4.6, "authority": 4.6, "avg_ge_freq": 1.0, "relative_se_rank": 1.1333333333333333, "normalized_reciprocal_se_rank": 0.09965874658234361, "reciprocal_se_rank": 0.03365586386323305, "percentage_ge_sources_not_in_se_sources": 40.0, "percentage_ge_sources_in_se_sources": 60.0 }, { "model_name": "gpt-4o", "query_id": 32, "query_type": "DebateQA", "num_sources": 5, "unweighted_mean_score": 4.5, "weighted_total_content_score": 90.94736842105263, "semantic_relevance": 5.0, "factual_accuracy": 5.0, "freshness": 4.0, "objectivity_tone": 4.4, "layout_ad_density": 3.4, "accountability": 4.6, "transparency": 4.8, "authority": 4.8, "avg_ge_freq": 0.3333, "relative_se_rank": 1.52, "normalized_reciprocal_se_rank": 0.2108356290174472, "reciprocal_se_rank": 0.06037069726390114, "percentage_ge_sources_not_in_se_sources": 60.0, "percentage_ge_sources_in_se_sources": 40.0 }, { "model_name": "Gemini-3-Flash-Preview", "query_id": 4, "query_type": "VACOS", "num_sources": 3, "unweighted_mean_score": 4.5, "weighted_total_content_score": 90.87719298245612, "semantic_relevance": 4.666666666666667, "factual_accuracy": 5.0, "freshness": 5.0, "objectivity_tone": 4.666666666666667, "layout_ad_density": 3.3333333333333335, "accountability": 4.666666666666667, "transparency": 4.333333333333333, "authority": 4.333333333333333, "avg_ge_freq": 0.3333, "relative_se_rank": 1.8699186991869918, "normalized_reciprocal_se_rank": 0.02856851341699827, "reciprocal_se_rank": 0.016573502010395213, "percentage_ge_sources_not_in_se_sources": 66.66666666666667, "percentage_ge_sources_in_se_sources": 33.33333333333333 }, { "model_name": "google-search", "query_id": 32, "query_type": "DebateQA", "num_sources": 4, "unweighted_mean_score": 4.53125, "weighted_total_content_score": 90.78947368421052, "semantic_relevance": 5.0, "factual_accuracy": 4.75, "freshness": 4.25, "objectivity_tone": 4.0, "layout_ad_density": 4.75, "accountability": 4.0, "transparency": 4.75, "authority": 4.75, "avg_ge_freq": null, "relative_se_rank": null, "normalized_reciprocal_se_rank": null, "reciprocal_se_rank": null, "percentage_ge_sources_not_in_se_sources": null, "percentage_ge_sources_in_se_sources": null }, { "model_name": "google-search", "query_id": 50, "query_type": "HotpotQA", "num_sources": 5, "unweighted_mean_score": 4.5, "weighted_total_content_score": 90.73684210526316, "semantic_relevance": 5.0, "factual_accuracy": 4.8, "freshness": 3.6, "objectivity_tone": 4.4, "layout_ad_density": 4.4, "accountability": 4.2, "transparency": 4.6, "authority": 5.0, "avg_ge_freq": null, "relative_se_rank": null, "normalized_reciprocal_se_rank": null, "reciprocal_se_rank": null, "percentage_ge_sources_not_in_se_sources": null, "percentage_ge_sources_in_se_sources": null }, { "model_name": "grok-4.1-fast-non-reasoning", "query_id": 64, "query_type": "Pinocchios", "num_sources": 5, "unweighted_mean_score": 4.45, "weighted_total_content_score": 90.73684210526315, "semantic_relevance": 5.0, "factual_accuracy": 5.0, "freshness": 3.0, "objectivity_tone": 5.0, "layout_ad_density": 3.2, "accountability": 5.0, "transparency": 4.8, "authority": 4.6, "avg_ge_freq": 0.60002, "relative_se_rank": 1.4428571428571426, "normalized_reciprocal_se_rank": 0.3583838383838384, "reciprocal_se_rank": 0.0958252427184466, "percentage_ge_sources_not_in_se_sources": 60.0, "percentage_ge_sources_in_se_sources": 40.0 }, { "model_name": "Gemini-2.5-Flash-Preview", "query_id": 18, "query_type": "VACOS", "num_sources": 3, "unweighted_mean_score": 4.5, "weighted_total_content_score": 90.52631578947368, "semantic_relevance": 4.333333333333333, "factual_accuracy": 5.0, "freshness": 5.0, "objectivity_tone": 4.666666666666667, "layout_ad_density": 3.3333333333333335, "accountability": 4.666666666666667, "transparency": 4.666666666666667, "authority": 4.333333333333333, "avg_ge_freq": null, "relative_se_rank": null, "normalized_reciprocal_se_rank": null, "reciprocal_se_rank": null, "percentage_ge_sources_not_in_se_sources": null, "percentage_ge_sources_in_se_sources": null }, { "model_name": "Gemini-3-Pro-Preview", "query_id": 22, "query_type": "DebateQA", "num_sources": 5, "unweighted_mean_score": 4.525, "weighted_total_content_score": 90.52631578947368, "semantic_relevance": 4.6, "factual_accuracy": 4.6, "freshness": 4.6, "objectivity_tone": 4.4, "layout_ad_density": 4.2, "accountability": 4.6, "transparency": 4.4, "authority": 4.8, "avg_ge_freq": 0.39997999999999995, "relative_se_rank": 2.380952380952381, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "claude", "query_id": 18, "query_type": "VACOS", "num_sources": 1, "unweighted_mean_score": 4.5, "weighted_total_content_score": 90.52631578947368, "semantic_relevance": 5.0, "factual_accuracy": 5.0, "freshness": 5.0, "objectivity_tone": 4.0, "layout_ad_density": 3.0, "accountability": 4.0, "transparency": 5.0, "authority": 5.0, "avg_ge_freq": 1.0, "relative_se_rank": 2.127659574468085, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "deepseek-chat-gensee", "query_id": 42, "query_type": "HotpotQA", "num_sources": 2, "unweighted_mean_score": 4.5, "weighted_total_content_score": 90.52631578947368, "semantic_relevance": 4.0, "factual_accuracy": 5.0, "freshness": 5.0, "objectivity_tone": 5.0, "layout_ad_density": 5.0, "accountability": 3.0, "transparency": 4.0, "authority": 5.0, "avg_ge_freq": 1.0, "relative_se_rank": 0.0510204081632653, "normalized_reciprocal_se_rank": 0.777056277056277, "reciprocal_se_rank": 0.19642857142857142, "percentage_ge_sources_not_in_se_sources": 0.0, "percentage_ge_sources_in_se_sources": 100.0 }, { "model_name": "deepseek-reasoning-gensee", "query_id": 31, "query_type": "DebateQA", "num_sources": 5, "unweighted_mean_score": 4.5, "weighted_total_content_score": 90.52631578947368, "semantic_relevance": 4.8, "factual_accuracy": 5.0, "freshness": 4.2, "objectivity_tone": 4.2, "layout_ad_density": 4.0, "accountability": 4.6, "transparency": 4.6, "authority": 4.6, "avg_ge_freq": 1.0, "relative_se_rank": 1.0533333333333332, "normalized_reciprocal_se_rank": 0.2660915032679739, "reciprocal_se_rank": 0.073648201027984, "percentage_ge_sources_not_in_se_sources": 40.0, "percentage_ge_sources_in_se_sources": 60.0 }, { "model_name": "gensee", "query_id": 37, "query_type": "DebateQA", "num_sources": 5, "unweighted_mean_score": 4.475, "weighted_total_content_score": 90.52631578947368, "semantic_relevance": 5.0, "factual_accuracy": 4.8, "freshness": 3.6, "objectivity_tone": 4.6, "layout_ad_density": 4.4, "accountability": 4.2, "transparency": 4.4, "authority": 4.8, "avg_ge_freq": 0.6, "relative_se_rank": 0.8816326530612244, "normalized_reciprocal_se_rank": 0.345679012345679, "reciprocal_se_rank": 0.09277238403451996, "percentage_ge_sources_not_in_se_sources": 40.0, "percentage_ge_sources_in_se_sources": 60.0 }, { "model_name": "gpt-5", "query_id": 59, "query_type": "HotpotQA", "num_sources": 5, "unweighted_mean_score": 4.525, "weighted_total_content_score": 90.52631578947368, "semantic_relevance": 3.8, "factual_accuracy": 5.0, "freshness": 5.0, "objectivity_tone": 4.8, "layout_ad_density": 4.2, "accountability": 3.6, "transparency": 5.0, "authority": 4.8, "avg_ge_freq": 0.3333, "relative_se_rank": 2.3255813953488373, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "grok-4.1-fast-non-reasoning", "query_id": 51, "query_type": "HotpotQA", "num_sources": 1, "unweighted_mean_score": 4.5, "weighted_total_content_score": 90.52631578947368, "semantic_relevance": 5.0, "factual_accuracy": 5.0, "freshness": 5.0, "objectivity_tone": 4.0, "layout_ad_density": 4.0, "accountability": 4.0, "transparency": 4.0, "authority": 5.0, "avg_ge_freq": 1.0, "relative_se_rank": 2.127659574468085, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "tavily", "query_id": 99, "query_type": "QuoraQuestions", "num_sources": 3, "unweighted_mean_score": 4.5, "weighted_total_content_score": 90.52631578947368, "semantic_relevance": 4.666666666666667, "factual_accuracy": 5.0, "freshness": 5.0, "objectivity_tone": 4.333333333333333, "layout_ad_density": 3.3333333333333335, "accountability": 4.666666666666667, "transparency": 4.333333333333333, "authority": 4.666666666666667, "avg_ge_freq": 1.0, "relative_se_rank": 0.9268292682926829, "normalized_reciprocal_se_rank": 0.34298540965207636, "reciprocal_se_rank": 0.09212513484358144, "percentage_ge_sources_not_in_se_sources": 33.333333333333336, "percentage_ge_sources_in_se_sources": 66.66666666666666 }, { "model_name": "exa", "query_id": 80, "query_type": "Pinocchios", "num_sources": 4, "unweighted_mean_score": 4.4375, "weighted_total_content_score": 90.52631578947367, "semantic_relevance": 5.0, "factual_accuracy": 5.0, "freshness": 3.25, "objectivity_tone": 5.0, "layout_ad_density": 3.25, "accountability": 4.75, "transparency": 4.5, "authority": 4.75, "avg_ge_freq": 1.0, "relative_se_rank": 1.6648936170212765, "normalized_reciprocal_se_rank": 0.05492424242424243, "reciprocal_se_rank": 0.02290655339805825, "percentage_ge_sources_not_in_se_sources": 75.0, "percentage_ge_sources_in_se_sources": 25.0 }, { "model_name": "gensee", "query_id": 55, "query_type": "HotpotQA", "num_sources": 5, "unweighted_mean_score": 4.575, "weighted_total_content_score": 90.52631578947367, "semantic_relevance": 3.0, "factual_accuracy": 4.8, "freshness": 5.0, "objectivity_tone": 5.0, "layout_ad_density": 5.0, "accountability": 4.4, "transparency": 4.6, "authority": 4.8, "avg_ge_freq": 0.39997999999999995, "relative_se_rank": 2.127659574468085, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "tavily", "query_id": 80, "query_type": "Pinocchios", "num_sources": 2, "unweighted_mean_score": 4.5, "weighted_total_content_score": 90.52631578947367, "semantic_relevance": 4.0, "factual_accuracy": 5.0, "freshness": 5.0, "objectivity_tone": 5.0, "layout_ad_density": 3.0, "accountability": 5.0, "transparency": 4.5, "authority": 4.5, "avg_ge_freq": 1.0, "relative_se_rank": 2.127659574468085, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "Gemini-3-Flash-Preview", "query_id": 79, "query_type": "Pinocchios", "num_sources": 5, "unweighted_mean_score": 4.525, "weighted_total_content_score": 90.3157894736842, "semantic_relevance": 4.2, "factual_accuracy": 4.8, "freshness": 4.6, "objectivity_tone": 4.4, "layout_ad_density": 4.2, "accountability": 4.4, "transparency": 4.8, "authority": 4.8, "avg_ge_freq": 0.66668, "relative_se_rank": 1.3466666666666667, "normalized_reciprocal_se_rank": 0.3583838383838384, "reciprocal_se_rank": 0.0958252427184466, "percentage_ge_sources_not_in_se_sources": 60.0, "percentage_ge_sources_in_se_sources": 40.0 }, { "model_name": "Gemini-3-Pro-Preview", "query_id": 36, "query_type": "DebateQA", "num_sources": 5, "unweighted_mean_score": 4.5, "weighted_total_content_score": 90.3157894736842, "semantic_relevance": 5.0, "factual_accuracy": 4.8, "freshness": 4.8, "objectivity_tone": 4.0, "layout_ad_density": 4.0, "accountability": 4.6, "transparency": 4.8, "authority": 4.0, "avg_ge_freq": 0.39997999999999995, "relative_se_rank": 2.1609756097560977, "normalized_reciprocal_se_rank": 0.010013175230566534, "reciprocal_se_rank": 0.012114816378218657, "percentage_ge_sources_not_in_se_sources": 80.0, "percentage_ge_sources_in_se_sources": 20.0 }, { "model_name": "deepseek-reasoning-gensee", "query_id": 22, "query_type": "DebateQA", "num_sources": 5, "unweighted_mean_score": 4.5, "weighted_total_content_score": 90.3157894736842, "semantic_relevance": 4.6, "factual_accuracy": 5.0, "freshness": 4.4, "objectivity_tone": 4.2, "layout_ad_density": 4.8, "accountability": 3.8, "transparency": 4.4, "authority": 4.8, "avg_ge_freq": 1.0, "relative_se_rank": 1.961904761904762, "normalized_reciprocal_se_rank": 0.04740740740740741, "reciprocal_se_rank": 0.02110032362459547, "percentage_ge_sources_not_in_se_sources": 80.0, "percentage_ge_sources_in_se_sources": 20.0 }, { "model_name": "deepseek-reasoning-gensee", "query_id": 32, "query_type": "DebateQA", "num_sources": 5, "unweighted_mean_score": 4.5, "weighted_total_content_score": 90.3157894736842, "semantic_relevance": 4.6, "factual_accuracy": 4.8, "freshness": 3.2, "objectivity_tone": 4.4, "layout_ad_density": 4.8, "accountability": 4.6, "transparency": 4.8, "authority": 4.8, "avg_ge_freq": 1.0, "relative_se_rank": 1.462222222222222, "normalized_reciprocal_se_rank": 0.21876832844574778, "reciprocal_se_rank": 0.0622768556216724, "percentage_ge_sources_not_in_se_sources": 60.0, "percentage_ge_sources_in_se_sources": 40.0 }, { "model_name": "exa", "query_id": 77, "query_type": "Pinocchios", "num_sources": 5, "unweighted_mean_score": 4.575, "weighted_total_content_score": 90.3157894736842, "semantic_relevance": 4.2, "factual_accuracy": 4.2, "freshness": 5.0, "objectivity_tone": 4.2, "layout_ad_density": 4.0, "accountability": 5.0, "transparency": 5.0, "authority": 5.0, "avg_ge_freq": 1.0, "relative_se_rank": 0.9955555555555555, "normalized_reciprocal_se_rank": 0.38498316498316504, "reciprocal_se_rank": 0.1022168284789644, "percentage_ge_sources_not_in_se_sources": 40.0, "percentage_ge_sources_in_se_sources": 60.0 }, { "model_name": "gensee", "query_id": 50, "query_type": "HotpotQA", "num_sources": 5, "unweighted_mean_score": 4.475, "weighted_total_content_score": 90.3157894736842, "semantic_relevance": 4.8, "factual_accuracy": 5.0, "freshness": 3.6, "objectivity_tone": 4.4, "layout_ad_density": 3.8, "accountability": 4.2, "transparency": 5.0, "authority": 5.0, "avg_ge_freq": 0.59998, "relative_se_rank": 1.302127659574468, "normalized_reciprocal_se_rank": 0.295959595959596, "reciprocal_se_rank": 0.0808252427184466, "percentage_ge_sources_not_in_se_sources": 60.0, "percentage_ge_sources_in_se_sources": 40.0 }, { "model_name": "gpt-4o", "query_id": 62, "query_type": "Pinocchios", "num_sources": 5, "unweighted_mean_score": 4.475, "weighted_total_content_score": 90.3157894736842, "semantic_relevance": 5.0, "factual_accuracy": 5.0, "freshness": 3.0, "objectivity_tone": 4.2, "layout_ad_density": 4.4, "accountability": 4.4, "transparency": 4.8, "authority": 5.0, "avg_ge_freq": 0.3333, "relative_se_rank": 1.1024390243902438, "normalized_reciprocal_se_rank": 0.31562289562289564, "reciprocal_se_rank": 0.08555016181229774, "percentage_ge_sources_not_in_se_sources": 40.0, "percentage_ge_sources_in_se_sources": 60.0 }, { "model_name": "exa", "query_id": 8, "query_type": "VACOS", "num_sources": 4, "unweighted_mean_score": 4.53125, "weighted_total_content_score": 90.26315789473684, "semantic_relevance": 4.75, "factual_accuracy": 5.0, "freshness": 5.0, "objectivity_tone": 3.5, "layout_ad_density": 3.25, "accountability": 5.0, "transparency": 4.75, "authority": 5.0, "avg_ge_freq": 1.0, "relative_se_rank": 1.755813953488372, "normalized_reciprocal_se_rank": 0.197979797979798, "reciprocal_se_rank": 0.05728155339805825, "percentage_ge_sources_not_in_se_sources": 75.0, "percentage_ge_sources_in_se_sources": 25.0 }, { "model_name": "gpt-5", "query_id": 67, "query_type": "Pinocchios", "num_sources": 4, "unweighted_mean_score": 4.4375, "weighted_total_content_score": 90.26315789473684, "semantic_relevance": 5.0, "factual_accuracy": 5.0, "freshness": 3.25, "objectivity_tone": 4.75, "layout_ad_density": 3.75, "accountability": 3.75, "transparency": 5.0, "authority": 5.0, "avg_ge_freq": 0.49997499999999995, "relative_se_rank": 1.622340425531915, "normalized_reciprocal_se_rank": 0.11994949494949496, "reciprocal_se_rank": 0.03853155339805825, "percentage_ge_sources_not_in_se_sources": 75.0, "percentage_ge_sources_in_se_sources": 25.0 }, { "model_name": "gpt-5", "query_id": 2, "query_type": "VACOS", "num_sources": 5, "unweighted_mean_score": 4.575, "weighted_total_content_score": 90.10526315789474, "semantic_relevance": 4.2, "factual_accuracy": 5.0, "freshness": 5.0, "objectivity_tone": 3.2, "layout_ad_density": 4.4, "accountability": 5.0, "transparency": 4.8, "authority": 5.0, "avg_ge_freq": 0.73332, "relative_se_rank": 2.2222222222222223, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "Gemini-2.5-Flash-Preview", "query_id": 31, "query_type": "DebateQA", "num_sources": 5, "unweighted_mean_score": 4.5, "weighted_total_content_score": 90.10526315789473, "semantic_relevance": 4.6, "factual_accuracy": 4.6, "freshness": 5.0, "objectivity_tone": 4.4, "layout_ad_density": 3.6, "accountability": 4.2, "transparency": 5.0, "authority": 4.6, "avg_ge_freq": null, "relative_se_rank": null, "normalized_reciprocal_se_rank": null, "reciprocal_se_rank": null, "percentage_ge_sources_not_in_se_sources": null, "percentage_ge_sources_in_se_sources": null }, { "model_name": "exa", "query_id": 22, "query_type": "DebateQA", "num_sources": 5, "unweighted_mean_score": 4.525, "weighted_total_content_score": 90.10526315789473, "semantic_relevance": 4.2, "factual_accuracy": 4.4, "freshness": 5.0, "objectivity_tone": 4.6, "layout_ad_density": 4.4, "accountability": 4.2, "transparency": 4.4, "authority": 5.0, "avg_ge_freq": 1.0, "relative_se_rank": 2.104761904761905, "normalized_reciprocal_se_rank": 0.010415263748597083, "reciprocal_se_rank": 0.012211434735706577, "percentage_ge_sources_not_in_se_sources": 80.0, "percentage_ge_sources_in_se_sources": 20.0 }, { "model_name": "google-search", "query_id": 26, "query_type": "DebateQA", "num_sources": 5, "unweighted_mean_score": 4.525, "weighted_total_content_score": 90.10526315789473, "semantic_relevance": 5.0, "factual_accuracy": 4.4, "freshness": 4.8, "objectivity_tone": 3.8, "layout_ad_density": 4.6, "accountability": 4.2, "transparency": 4.8, "authority": 4.6, "avg_ge_freq": null, "relative_se_rank": null, "normalized_reciprocal_se_rank": null, "reciprocal_se_rank": null, "percentage_ge_sources_not_in_se_sources": null, "percentage_ge_sources_in_se_sources": null }, { "model_name": "grok-4.1-fast-non-reasoning", "query_id": 31, "query_type": "DebateQA", "num_sources": 5, "unweighted_mean_score": 4.475, "weighted_total_content_score": 90.10526315789473, "semantic_relevance": 5.0, "factual_accuracy": 4.8, "freshness": 4.8, "objectivity_tone": 4.2, "layout_ad_density": 3.6, "accountability": 4.4, "transparency": 4.6, "authority": 4.4, "avg_ge_freq": 0.80002, "relative_se_rank": 0.2622222222222222, "normalized_reciprocal_se_rank": 0.4556088913044182, "reciprocal_se_rank": 0.11918757339596456, "percentage_ge_sources_not_in_se_sources": 0.0, "percentage_ge_sources_in_se_sources": 100.0 }, { "model_name": "grok-4.1-fast-non-reasoning", "query_id": 38, "query_type": "DebateQA", "num_sources": 5, "unweighted_mean_score": 4.475, "weighted_total_content_score": 90.10526315789473, "semantic_relevance": 5.0, "factual_accuracy": 4.6, "freshness": 4.6, "objectivity_tone": 4.4, "layout_ad_density": 4.0, "accountability": 4.2, "transparency": 4.6, "authority": 4.4, "avg_ge_freq": 0.3333, "relative_se_rank": 0.9608695652173914, "normalized_reciprocal_se_rank": 0.2823416235180941, "reciprocal_se_rank": 0.07755296293274591, "percentage_ge_sources_not_in_se_sources": 40.0, "percentage_ge_sources_in_se_sources": 60.0 }, { "model_name": "exa", "query_id": 97, "query_type": "QuoraQuestions", "num_sources": 4, "unweighted_mean_score": 4.5, "weighted_total_content_score": 90.0, "semantic_relevance": 4.5, "factual_accuracy": 4.5, "freshness": 5.0, "objectivity_tone": 4.5, "layout_ad_density": 4.25, "accountability": 4.5, "transparency": 4.5, "authority": 4.25, "avg_ge_freq": 1.0, "relative_se_rank": 0.75625, "normalized_reciprocal_se_rank": 0.39081289081289083, "reciprocal_se_rank": 0.10361766065649561, "percentage_ge_sources_not_in_se_sources": 25.0, "percentage_ge_sources_in_se_sources": 75.0 }, { "model_name": "gpt-4o", "query_id": 61, "query_type": "Pinocchios", "num_sources": 2, "unweighted_mean_score": 4.4375, "weighted_total_content_score": 90.0, "semantic_relevance": 5.0, "factual_accuracy": 5.0, "freshness": 3.5, "objectivity_tone": 4.5, "layout_ad_density": 3.5, "accountability": 5.0, "transparency": 4.5, "authority": 4.5, "avg_ge_freq": 0.66665, "relative_se_rank": 2.2222222222222223, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "google-search", "query_id": 79, "query_type": "Pinocchios", "num_sources": 4, "unweighted_mean_score": 4.4375, "weighted_total_content_score": 89.99999999999999, "semantic_relevance": 5.0, "factual_accuracy": 5.0, "freshness": 3.25, "objectivity_tone": 4.5, "layout_ad_density": 4.0, "accountability": 4.5, "transparency": 4.5, "authority": 4.75, "avg_ge_freq": null, "relative_se_rank": null, "normalized_reciprocal_se_rank": null, "reciprocal_se_rank": null, "percentage_ge_sources_not_in_se_sources": null, "percentage_ge_sources_in_se_sources": null }, { "model_name": "gpt-4o", "query_id": 77, "query_type": "Pinocchios", "num_sources": 4, "unweighted_mean_score": 4.46875, "weighted_total_content_score": 89.99999999999999, "semantic_relevance": 5.0, "factual_accuracy": 5.0, "freshness": 4.5, "objectivity_tone": 4.0, "layout_ad_density": 3.75, "accountability": 4.5, "transparency": 4.5, "authority": 4.5, "avg_ge_freq": 0.3333, "relative_se_rank": 1.6777777777777778, "normalized_reciprocal_se_rank": 0.197979797979798, "reciprocal_se_rank": 0.05728155339805825, "percentage_ge_sources_not_in_se_sources": 75.0, "percentage_ge_sources_in_se_sources": 25.0 }, { "model_name": "Gemini-2.5-Flash-Preview", "query_id": 89, "query_type": "QuoraQuestions", "num_sources": 5, "unweighted_mean_score": 4.5, "weighted_total_content_score": 89.89473684210527, "semantic_relevance": 4.4, "factual_accuracy": 4.6, "freshness": 5.0, "objectivity_tone": 4.4, "layout_ad_density": 3.8, "accountability": 4.6, "transparency": 4.6, "authority": 4.6, "avg_ge_freq": null, "relative_se_rank": null, "normalized_reciprocal_se_rank": null, "reciprocal_se_rank": null, "percentage_ge_sources_not_in_se_sources": null, "percentage_ge_sources_in_se_sources": null }, { "model_name": "grok-4.1-fast-non-reasoning", "query_id": 99, "query_type": "QuoraQuestions", "num_sources": 5, "unweighted_mean_score": 4.475, "weighted_total_content_score": 89.89473684210527, "semantic_relevance": 4.8, "factual_accuracy": 4.8, "freshness": 4.6, "objectivity_tone": 4.2, "layout_ad_density": 3.6, "accountability": 4.2, "transparency": 4.6, "authority": 5.0, "avg_ge_freq": 1.0, "relative_se_rank": 0.8, "normalized_reciprocal_se_rank": 0.26535710017558534, "reciprocal_se_rank": 0.07347173038199746, "percentage_ge_sources_not_in_se_sources": 20.0, "percentage_ge_sources_in_se_sources": 80.0 }, { "model_name": "exa", "query_id": 37, "query_type": "DebateQA", "num_sources": 5, "unweighted_mean_score": 4.571428571428571, "weighted_total_content_score": 89.89473684210526, "semantic_relevance": 4.8, "factual_accuracy": 5.0, "freshness": 3.8, "objectivity_tone": 4.4, "layout_ad_density": 4.25, "accountability": 4.6, "transparency": 4.8, "authority": 4.8, "avg_ge_freq": 1.0, "relative_se_rank": 1.1632653061224492, "normalized_reciprocal_se_rank": 0.06369686527789295, "reciprocal_se_rank": 0.02501453801580437, "percentage_ge_sources_not_in_se_sources": 40.0, "percentage_ge_sources_in_se_sources": 60.0 }, { "model_name": "Gemini-3-Flash-Preview", "query_id": 31, "query_type": "DebateQA", "num_sources": 5, "unweighted_mean_score": 4.5, "weighted_total_content_score": 89.89473684210525, "semantic_relevance": 4.4, "factual_accuracy": 4.6, "freshness": 5.0, "objectivity_tone": 4.4, "layout_ad_density": 3.8, "accountability": 4.2, "transparency": 5.0, "authority": 4.6, "avg_ge_freq": 0.73334, "relative_se_rank": 0.5511111111111111, "normalized_reciprocal_se_rank": 0.4900691121743754, "reciprocal_se_rank": 0.12746806336228922, "percentage_ge_sources_not_in_se_sources": 20.0, "percentage_ge_sources_in_se_sources": 80.0 }, { "model_name": "grok-4.1-fast-non-reasoning", "query_id": 75, "query_type": "Pinocchios", "num_sources": 3, "unweighted_mean_score": 4.416666666666667, "weighted_total_content_score": 89.82456140350877, "semantic_relevance": 4.666666666666667, "factual_accuracy": 5.0, "freshness": 3.0, "objectivity_tone": 5.0, "layout_ad_density": 3.3333333333333335, "accountability": 4.666666666666667, "transparency": 4.666666666666667, "authority": 5.0, "avg_ge_freq": 0.7777666666666666, "relative_se_rank": 1.4397163120567376, "normalized_reciprocal_se_rank": 0.21773288439955105, "reciprocal_se_rank": 0.062028047464940665, "percentage_ge_sources_not_in_se_sources": 66.66666666666667, "percentage_ge_sources_in_se_sources": 33.33333333333333 }, { "model_name": "exa", "query_id": 62, "query_type": "Pinocchios", "num_sources": 4, "unweighted_mean_score": 4.53125, "weighted_total_content_score": 89.73684210526315, "semantic_relevance": 3.75, "factual_accuracy": 4.0, "freshness": 4.25, "objectivity_tone": 5.0, "layout_ad_density": 4.75, "accountability": 4.75, "transparency": 5.0, "authority": 4.75, "avg_ge_freq": 1.0, "relative_se_rank": 2.4390243902439024, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "gensee", "query_id": 14, "query_type": "VACOS", "num_sources": 4, "unweighted_mean_score": 4.46875, "weighted_total_content_score": 89.73684210526315, "semantic_relevance": 4.75, "factual_accuracy": 4.5, "freshness": 5.0, "objectivity_tone": 4.5, "layout_ad_density": 3.75, "accountability": 4.25, "transparency": 4.5, "authority": 4.5, "avg_ge_freq": 0.75, "relative_se_rank": 1.7000000000000002, "normalized_reciprocal_se_rank": 0.10549943883277216, "reciprocal_se_rank": 0.035059331175836025, "percentage_ge_sources_not_in_se_sources": 75.0, "percentage_ge_sources_in_se_sources": 25.0 }, { "model_name": "Gemini-3-Pro-Preview", "query_id": 26, "query_type": "DebateQA", "num_sources": 5, "unweighted_mean_score": 4.525, "weighted_total_content_score": 89.6842105263158, "semantic_relevance": 4.2, "factual_accuracy": 4.6, "freshness": 5.0, "objectivity_tone": 4.0, "layout_ad_density": 4.8, "accountability": 4.0, "transparency": 4.6, "authority": 5.0, "avg_ge_freq": 0.46665999999999996, "relative_se_rank": 1.9627906976744185, "normalized_reciprocal_se_rank": 0.025212121212121213, "reciprocal_se_rank": 0.015766990291262134, "percentage_ge_sources_not_in_se_sources": 80.0, "percentage_ge_sources_in_se_sources": 20.0 }, { "model_name": "deepseek-chat-gensee", "query_id": 22, "query_type": "DebateQA", "num_sources": 5, "unweighted_mean_score": 4.425, "weighted_total_content_score": 89.6842105263158, "semantic_relevance": 5.0, "factual_accuracy": 5.0, "freshness": 3.8, "objectivity_tone": 4.4, "layout_ad_density": 4.2, "accountability": 4.0, "transparency": 4.2, "authority": 4.8, "avg_ge_freq": 1.0, "relative_se_rank": 2.104761904761905, "normalized_reciprocal_se_rank": 0.010415263748597083, "reciprocal_se_rank": 0.012211434735706581, "percentage_ge_sources_not_in_se_sources": 80.0, "percentage_ge_sources_in_se_sources": 20.0 }, { "model_name": "grok-4.1-fast-non-reasoning", "query_id": 19, "query_type": "VACOS", "num_sources": 5, "unweighted_mean_score": 4.45, "weighted_total_content_score": 89.6842105263158, "semantic_relevance": 4.8, "factual_accuracy": 5.0, "freshness": 5.0, "objectivity_tone": 4.2, "layout_ad_density": 3.4, "accountability": 4.2, "transparency": 4.6, "authority": 4.4, "avg_ge_freq": 0.86668, "relative_se_rank": 1.1142857142857143, "normalized_reciprocal_se_rank": 0.26926103136629453, "reciprocal_se_rank": 0.07440981093510475, "percentage_ge_sources_not_in_se_sources": 40.0, "percentage_ge_sources_in_se_sources": 60.0 }, { "model_name": "Gemini-2.5-Flash-Preview", "query_id": 2, "query_type": "VACOS", "num_sources": 5, "unweighted_mean_score": 4.475, "weighted_total_content_score": 89.47368421052633, "semantic_relevance": 4.6, "factual_accuracy": 5.0, "freshness": 5.0, "objectivity_tone": 3.8, "layout_ad_density": 3.2, "accountability": 5.0, "transparency": 4.8, "authority": 4.4, "avg_ge_freq": null, "relative_se_rank": null, "normalized_reciprocal_se_rank": null, "reciprocal_se_rank": null, "percentage_ge_sources_not_in_se_sources": null, "percentage_ge_sources_in_se_sources": null }, { "model_name": "Gemini-3-Pro-Preview", "query_id": 29, "query_type": "DebateQA", "num_sources": 5, "unweighted_mean_score": 4.5, "weighted_total_content_score": 89.47368421052632, "semantic_relevance": 4.6, "factual_accuracy": 4.6, "freshness": 5.0, "objectivity_tone": 3.8, "layout_ad_density": 4.8, "accountability": 3.8, "transparency": 5.0, "authority": 4.4, "avg_ge_freq": 0.66668, "relative_se_rank": 1.3511111111111112, "normalized_reciprocal_se_rank": 0.3306397306397306, "reciprocal_se_rank": 0.08915857605177993, "percentage_ge_sources_not_in_se_sources": 60.0, "percentage_ge_sources_in_se_sources": 40.0 }, { "model_name": "claude", "query_id": 58, "query_type": "HotpotQA", "num_sources": 1, "unweighted_mean_score": 4.375, "weighted_total_content_score": 89.47368421052632, "semantic_relevance": 5.0, "factual_accuracy": 5.0, "freshness": 5.0, "objectivity_tone": 5.0, "layout_ad_density": 4.0, "accountability": 3.0, "transparency": 4.0, "authority": 4.0, "avg_ge_freq": 1.0, "relative_se_rank": 0.2708333333333333, "normalized_reciprocal_se_rank": 0.21969696969696972, "reciprocal_se_rank": 0.0625, "percentage_ge_sources_not_in_se_sources": 0.0, "percentage_ge_sources_in_se_sources": 100.0 }, { "model_name": "exa", "query_id": 4, "query_type": "VACOS", "num_sources": 2, "unweighted_mean_score": 4.4375, "weighted_total_content_score": 89.47368421052632, "semantic_relevance": 4.5, "factual_accuracy": 5.0, "freshness": 5.0, "objectivity_tone": 4.5, "layout_ad_density": 2.5, "accountability": 4.5, "transparency": 4.5, "authority": 5.0, "avg_ge_freq": 1.0, "relative_se_rank": 1.2560975609756098, "normalized_reciprocal_se_rank": 0.32659932659932656, "reciprocal_se_rank": 0.08818770226537216, "percentage_ge_sources_not_in_se_sources": 50.0, "percentage_ge_sources_in_se_sources": 50.0 }, { "model_name": "exa", "query_id": 89, "query_type": "QuoraQuestions", "num_sources": 5, "unweighted_mean_score": 4.45, "weighted_total_content_score": 89.47368421052632, "semantic_relevance": 5.0, "factual_accuracy": 4.8, "freshness": 5.0, "objectivity_tone": 4.0, "layout_ad_density": 3.4, "accountability": 4.6, "transparency": 4.4, "authority": 4.4, "avg_ge_freq": 1.0, "relative_se_rank": 1.2146341463414634, "normalized_reciprocal_se_rank": 0.23188305822452165, "reciprocal_se_rank": 0.06542821059278554, "percentage_ge_sources_not_in_se_sources": 40.0, "percentage_ge_sources_in_se_sources": 60.0 }, { "model_name": "gensee", "query_id": 25, "query_type": "DebateQA", "num_sources": 3, "unweighted_mean_score": 4.416666666666667, "weighted_total_content_score": 89.47368421052632, "semantic_relevance": 5.0, "factual_accuracy": 4.666666666666667, "freshness": 5.0, "objectivity_tone": 4.666666666666667, "layout_ad_density": 4.333333333333333, "accountability": 3.3333333333333335, "transparency": 4.0, "authority": 4.333333333333333, "avg_ge_freq": 0.5555666666666667, "relative_se_rank": 2.4390243902439024, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "gpt-5", "query_id": 60, "query_type": "HotpotQA", "num_sources": 1, "unweighted_mean_score": 4.5, "weighted_total_content_score": 89.47368421052632, "semantic_relevance": 3.0, "factual_accuracy": 5.0, "freshness": 5.0, "objectivity_tone": 5.0, "layout_ad_density": 5.0, "accountability": 3.0, "transparency": 5.0, "authority": 5.0, "avg_ge_freq": 1.0, "relative_se_rank": 0.020833333333333332, "normalized_reciprocal_se_rank": 1.0, "reciprocal_se_rank": 0.25, "percentage_ge_sources_not_in_se_sources": 0.0, "percentage_ge_sources_in_se_sources": 100.0 }, { "model_name": "gpt-5", "query_id": 64, "query_type": "Pinocchios", "num_sources": 2, "unweighted_mean_score": 4.375, "weighted_total_content_score": 89.47368421052632, "semantic_relevance": 5.0, "factual_accuracy": 5.0, "freshness": 2.0, "objectivity_tone": 5.0, "layout_ad_density": 3.5, "accountability": 5.0, "transparency": 5.0, "authority": 4.5, "avg_ge_freq": 1.16665, "relative_se_rank": 2.380952380952381, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "deepseek-reasoning-tavily", "query_id": 71, "query_type": "Pinocchios", "num_sources": 2, "unweighted_mean_score": 4.4375, "weighted_total_content_score": 89.4736842105263, "semantic_relevance": 5.0, "factual_accuracy": 5.0, "freshness": 3.5, "objectivity_tone": 4.0, "layout_ad_density": 4.0, "accountability": 5.0, "transparency": 4.5, "authority": 4.5, "avg_ge_freq": 1.0, "relative_se_rank": 1.364864864864865, "normalized_reciprocal_se_rank": 0.5, "reciprocal_se_rank": 0.12985436893203883, "percentage_ge_sources_not_in_se_sources": 50.0, "percentage_ge_sources_in_se_sources": 50.0 }, { "model_name": "tavily", "query_id": 93, "query_type": "QuoraQuestions", "num_sources": 2, "unweighted_mean_score": 4.5625, "weighted_total_content_score": 89.4736842105263, "semantic_relevance": 3.5, "factual_accuracy": 4.5, "freshness": 5.0, "objectivity_tone": 4.0, "layout_ad_density": 4.5, "accountability": 5.0, "transparency": 5.0, "authority": 5.0, "avg_ge_freq": 1.0, "relative_se_rank": 0.38636363636363635, "normalized_reciprocal_se_rank": 0.1724663514944845, "reciprocal_se_rank": 0.05115089514066496, "percentage_ge_sources_not_in_se_sources": 0.0, "percentage_ge_sources_in_se_sources": 100.0 }, { "model_name": "google-search", "query_id": 35, "query_type": "DebateQA", "num_sources": 5, "unweighted_mean_score": 4.475, "weighted_total_content_score": 89.26315789473685, "semantic_relevance": 5.0, "factual_accuracy": 4.4, "freshness": 4.4, "objectivity_tone": 3.8, "layout_ad_density": 4.6, "accountability": 3.8, "transparency": 4.8, "authority": 5.0, "avg_ge_freq": null, "relative_se_rank": null, "normalized_reciprocal_se_rank": null, "reciprocal_se_rank": null, "percentage_ge_sources_not_in_se_sources": null, "percentage_ge_sources_in_se_sources": null }, { "model_name": "Gemini-3-Flash-Preview", "query_id": 62, "query_type": "Pinocchios", "num_sources": 5, "unweighted_mean_score": 4.475, "weighted_total_content_score": 89.26315789473684, "semantic_relevance": 3.6, "factual_accuracy": 5.0, "freshness": 4.2, "objectivity_tone": 4.6, "layout_ad_density": 4.2, "accountability": 4.6, "transparency": 4.8, "authority": 4.8, "avg_ge_freq": 0.5333399999999999, "relative_se_rank": 2.097560975609756, "normalized_reciprocal_se_rank": 0.01714110805019896, "reciprocal_se_rank": 0.013827596351868195, "percentage_ge_sources_not_in_se_sources": 80.0, "percentage_ge_sources_in_se_sources": 20.0 }, { "model_name": "tavily", "query_id": 79, "query_type": "Pinocchios", "num_sources": 5, "unweighted_mean_score": 4.375, "weighted_total_content_score": 89.26315789473684, "semantic_relevance": 4.8, "factual_accuracy": 5.0, "freshness": 4.0, "objectivity_tone": 5.0, "layout_ad_density": 3.2, "accountability": 4.4, "transparency": 4.4, "authority": 4.2, "avg_ge_freq": 1.0, "relative_se_rank": 0.48888888888888893, "normalized_reciprocal_se_rank": 0.5998460798460798, "reciprocal_se_rank": 0.15384650947757744, "percentage_ge_sources_not_in_se_sources": 20.0, "percentage_ge_sources_in_se_sources": 80.0 }, { "model_name": "gpt-4o", "query_id": 31, "query_type": "DebateQA", "num_sources": 3, "unweighted_mean_score": 4.416666666666667, "weighted_total_content_score": 89.12280701754385, "semantic_relevance": 4.666666666666667, "factual_accuracy": 5.0, "freshness": 4.666666666666667, "objectivity_tone": 4.333333333333333, "layout_ad_density": 3.3333333333333335, "accountability": 4.333333333333333, "transparency": 4.333333333333333, "authority": 4.666666666666667, "avg_ge_freq": 0.3333, "relative_se_rank": 0.5777777777777777, "normalized_reciprocal_se_rank": 0.12367853620378873, "reciprocal_se_rank": 0.03942760942760943, "percentage_ge_sources_not_in_se_sources": 0.0, "percentage_ge_sources_in_se_sources": 100.0 }, { "model_name": "gpt-4o", "query_id": 55, "query_type": "HotpotQA", "num_sources": 3, "unweighted_mean_score": 4.5, "weighted_total_content_score": 89.12280701754385, "semantic_relevance": 3.3333333333333335, "factual_accuracy": 4.666666666666667, "freshness": 5.0, "objectivity_tone": 4.666666666666667, "layout_ad_density": 4.666666666666667, "accountability": 4.0, "transparency": 4.666666666666667, "authority": 5.0, "avg_ge_freq": 0.5555666666666667, "relative_se_rank": 2.127659574468085, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "grok-4.1-fast-non-reasoning", "query_id": 14, "query_type": "VACOS", "num_sources": 3, "unweighted_mean_score": 4.458333333333333, "weighted_total_content_score": 89.12280701754385, "semantic_relevance": 4.333333333333333, "factual_accuracy": 4.666666666666667, "freshness": 5.0, "objectivity_tone": 4.333333333333333, "layout_ad_density": 3.6666666666666665, "accountability": 4.333333333333333, "transparency": 4.666666666666667, "authority": 4.666666666666667, "avg_ge_freq": 0.5555666666666667, "relative_se_rank": 2.2222222222222223, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "Gemini-3-Pro-Preview", "query_id": 32, "query_type": "DebateQA", "num_sources": 5, "unweighted_mean_score": 4.45, "weighted_total_content_score": 89.05263157894737, "semantic_relevance": 4.6, "factual_accuracy": 4.8, "freshness": 4.8, "objectivity_tone": 4.0, "layout_ad_density": 4.4, "accountability": 4.0, "transparency": 4.6, "authority": 4.4, "avg_ge_freq": 0.39997999999999995, "relative_se_rank": 1.488888888888889, "normalized_reciprocal_se_rank": 0.10232884399551065, "reciprocal_se_rank": 0.03429746494066882, "percentage_ge_sources_not_in_se_sources": 60.0, "percentage_ge_sources_in_se_sources": 40.0 }, { "model_name": "Gemini-3-Pro-Preview", "query_id": 24, "query_type": "DebateQA", "num_sources": 5, "unweighted_mean_score": 4.45, "weighted_total_content_score": 89.05263157894736, "semantic_relevance": 4.4, "factual_accuracy": 4.6, "freshness": 5.0, "objectivity_tone": 4.4, "layout_ad_density": 3.4, "accountability": 4.6, "transparency": 4.6, "authority": 4.6, "avg_ge_freq": 0.46665999999999996, "relative_se_rank": 2.127659574468085, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "Perplexity-Sonar-Pro", "query_id": 65, "query_type": "Pinocchios", "num_sources": 4, "unweighted_mean_score": 4.46875, "weighted_total_content_score": 88.94736842105263, "semantic_relevance": 3.25, "factual_accuracy": 5.0, "freshness": 5.0, "objectivity_tone": 4.75, "layout_ad_density": 4.5, "accountability": 3.75, "transparency": 4.75, "authority": 4.75, "avg_ge_freq": 1.0, "relative_se_rank": 1.292682926829268, "normalized_reciprocal_se_rank": 0.22300931391840484, "reciprocal_se_rank": 0.06329592737359728, "percentage_ge_sources_not_in_se_sources": 50.0, "percentage_ge_sources_in_se_sources": 50.0 }, { "model_name": "Gemini-2.5-Flash-Preview", "query_id": 19, "query_type": "VACOS", "num_sources": 5, "unweighted_mean_score": 4.4, "weighted_total_content_score": 88.84210526315789, "semantic_relevance": 5.0, "factual_accuracy": 5.0, "freshness": 5.0, "objectivity_tone": 4.0, "layout_ad_density": 2.6, "accountability": 5.0, "transparency": 4.4, "authority": 4.2, "avg_ge_freq": null, "relative_se_rank": null, "normalized_reciprocal_se_rank": null, "reciprocal_se_rank": null, "percentage_ge_sources_not_in_se_sources": null, "percentage_ge_sources_in_se_sources": null }, { "model_name": "Gemini-2.5-Flash-Preview", "query_id": 62, "query_type": "Pinocchios", "num_sources": 5, "unweighted_mean_score": 4.475, "weighted_total_content_score": 88.84210526315789, "semantic_relevance": 4.2, "factual_accuracy": 4.4, "freshness": 4.4, "objectivity_tone": 4.2, "layout_ad_density": 4.4, "accountability": 4.6, "transparency": 4.8, "authority": 4.8, "avg_ge_freq": null, "relative_se_rank": null, "normalized_reciprocal_se_rank": null, "reciprocal_se_rank": null, "percentage_ge_sources_not_in_se_sources": null, "percentage_ge_sources_in_se_sources": null }, { "model_name": "Perplexity-Sonar-Pro", "query_id": 31, "query_type": "DebateQA", "num_sources": 5, "unweighted_mean_score": 4.45, "weighted_total_content_score": 88.84210526315789, "semantic_relevance": 4.2, "factual_accuracy": 4.8, "freshness": 4.2, "objectivity_tone": 4.2, "layout_ad_density": 4.0, "accountability": 4.2, "transparency": 5.0, "authority": 5.0, "avg_ge_freq": 0.93334, "relative_se_rank": 0.9511111111111112, "normalized_reciprocal_se_rank": 0.3721019721019721, "reciprocal_se_rank": 0.0991215903837263, "percentage_ge_sources_not_in_se_sources": 40.0, "percentage_ge_sources_in_se_sources": 60.0 }, { "model_name": "gensee", "query_id": 76, "query_type": "Pinocchios", "num_sources": 5, "unweighted_mean_score": 4.425, "weighted_total_content_score": 88.84210526315789, "semantic_relevance": 4.4, "factual_accuracy": 5.0, "freshness": 4.6, "objectivity_tone": 4.2, "layout_ad_density": 3.2, "accountability": 4.8, "transparency": 4.4, "authority": 4.8, "avg_ge_freq": 0.46663999999999994, "relative_se_rank": 2.380952380952381, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "gpt-4o", "query_id": 8, "query_type": "VACOS", "num_sources": 5, "unweighted_mean_score": 4.425, "weighted_total_content_score": 88.84210526315789, "semantic_relevance": 4.8, "factual_accuracy": 4.8, "freshness": 5.0, "objectivity_tone": 4.0, "layout_ad_density": 3.2, "accountability": 4.6, "transparency": 4.4, "authority": 4.6, "avg_ge_freq": 0.73334, "relative_se_rank": 1.8651162790697675, "normalized_reciprocal_se_rank": 0.2, "reciprocal_se_rank": 0.05776699029126213, "percentage_ge_sources_not_in_se_sources": 80.0, "percentage_ge_sources_in_se_sources": 20.0 }, { "model_name": "gpt-5", "query_id": 66, "query_type": "Pinocchios", "num_sources": 5, "unweighted_mean_score": 4.425, "weighted_total_content_score": 88.84210526315789, "semantic_relevance": 4.0, "factual_accuracy": 5.0, "freshness": 3.4, "objectivity_tone": 4.6, "layout_ad_density": 4.2, "accountability": 4.8, "transparency": 5.0, "authority": 4.4, "avg_ge_freq": 0.39997999999999995, "relative_se_rank": 1.502439024390244, "normalized_reciprocal_se_rank": 0.22164502164502164, "reciprocal_se_rank": 0.06296809986130374, "percentage_ge_sources_not_in_se_sources": 60.0, "percentage_ge_sources_in_se_sources": 40.0 }, { "model_name": "grok-4.1-fast-non-reasoning", "query_id": 78, "query_type": "Pinocchios", "num_sources": 5, "unweighted_mean_score": 4.4, "weighted_total_content_score": 88.84210526315789, "semantic_relevance": 5.0, "factual_accuracy": 5.0, "freshness": 3.0, "objectivity_tone": 4.0, "layout_ad_density": 3.8, "accountability": 5.0, "transparency": 4.8, "authority": 4.6, "avg_ge_freq": 0.86668, "relative_se_rank": 1.7106382978723402, "normalized_reciprocal_se_rank": 0.1583838383838384, "reciprocal_se_rank": 0.047766990291262135, "percentage_ge_sources_not_in_se_sources": 80.0, "percentage_ge_sources_in_se_sources": 20.0 }, { "model_name": "deepseek-chat-gensee", "query_id": 71, "query_type": "Pinocchios", "num_sources": 3, "unweighted_mean_score": 4.416666666666667, "weighted_total_content_score": 88.77192982456141, "semantic_relevance": 4.0, "factual_accuracy": 5.0, "freshness": 4.333333333333333, "objectivity_tone": 4.666666666666667, "layout_ad_density": 4.0, "accountability": 3.6666666666666665, "transparency": 4.666666666666667, "authority": 5.0, "avg_ge_freq": 1.0, "relative_se_rank": 1.8108108108108107, "normalized_reciprocal_se_rank": 0.3333333333333333, "reciprocal_se_rank": 0.08980582524271845, "percentage_ge_sources_not_in_se_sources": 66.66666666666667, "percentage_ge_sources_in_se_sources": 33.33333333333333 }, { "model_name": "Gemini-3-Flash-Preview", "query_id": 46, "query_type": "HotpotQA", "num_sources": 3, "unweighted_mean_score": 4.5, "weighted_total_content_score": 88.77192982456138, "semantic_relevance": 2.6666666666666665, "factual_accuracy": 4.666666666666667, "freshness": 5.0, "objectivity_tone": 5.0, "layout_ad_density": 4.666666666666667, "accountability": 4.0, "transparency": 5.0, "authority": 5.0, "avg_ge_freq": 0.3333, "relative_se_rank": 1.6444444444444446, "normalized_reciprocal_se_rank": 0.04202020202020202, "reciprocal_se_rank": 0.019805825242718445, "percentage_ge_sources_not_in_se_sources": 66.66666666666667, "percentage_ge_sources_in_se_sources": 33.33333333333333 }, { "model_name": "gpt-5", "query_id": 49, "query_type": "HotpotQA", "num_sources": 3, "unweighted_mean_score": 4.375, "weighted_total_content_score": 88.77192982456138, "semantic_relevance": 5.0, "factual_accuracy": 4.666666666666667, "freshness": 5.0, "objectivity_tone": 4.666666666666667, "layout_ad_density": 4.0, "accountability": 3.0, "transparency": 4.333333333333333, "authority": 4.333333333333333, "avg_ge_freq": 0.3333, "relative_se_rank": 1.4565217391304348, "normalized_reciprocal_se_rank": 0.3333333333333333, "reciprocal_se_rank": 0.08980582524271845, "percentage_ge_sources_not_in_se_sources": 66.66666666666667, "percentage_ge_sources_in_se_sources": 33.33333333333333 }, { "model_name": "Gemini-2.5-Flash-Preview", "query_id": 55, "query_type": "HotpotQA", "num_sources": 4, "unweighted_mean_score": 4.53125, "weighted_total_content_score": 88.6842105263158, "semantic_relevance": 2.0, "factual_accuracy": 4.75, "freshness": 5.0, "objectivity_tone": 5.0, "layout_ad_density": 5.0, "accountability": 5.0, "transparency": 4.75, "authority": 4.75, "avg_ge_freq": null, "relative_se_rank": null, "normalized_reciprocal_se_rank": null, "reciprocal_se_rank": null, "percentage_ge_sources_not_in_se_sources": null, "percentage_ge_sources_in_se_sources": null }, { "model_name": "Gemini-2.5-Flash-Preview", "query_id": 33, "query_type": "DebateQA", "num_sources": 5, "unweighted_mean_score": 4.475, "weighted_total_content_score": 88.63157894736841, "semantic_relevance": 3.0, "factual_accuracy": 4.8, "freshness": 5.0, "objectivity_tone": 4.8, "layout_ad_density": 4.2, "accountability": 4.6, "transparency": 4.6, "authority": 4.8, "avg_ge_freq": null, "relative_se_rank": null, "normalized_reciprocal_se_rank": null, "reciprocal_se_rank": null, "percentage_ge_sources_not_in_se_sources": null, "percentage_ge_sources_in_se_sources": null }, { "model_name": "Gemini-3-Pro-Preview", "query_id": 77, "query_type": "Pinocchios", "num_sources": 5, "unweighted_mean_score": 4.45, "weighted_total_content_score": 88.63157894736841, "semantic_relevance": 4.0, "factual_accuracy": 4.8, "freshness": 4.4, "objectivity_tone": 4.2, "layout_ad_density": 3.8, "accountability": 5.0, "transparency": 4.8, "authority": 4.6, "avg_ge_freq": 0.39997999999999995, "relative_se_rank": 2.2222222222222223, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "deepseek-chat-gensee", "query_id": 61, "query_type": "Pinocchios", "num_sources": 5, "unweighted_mean_score": 4.4, "weighted_total_content_score": 88.63157894736841, "semantic_relevance": 4.6, "factual_accuracy": 4.8, "freshness": 3.2, "objectivity_tone": 4.4, "layout_ad_density": 4.4, "accountability": 4.6, "transparency": 4.4, "authority": 4.8, "avg_ge_freq": 1.0, "relative_se_rank": 1.791111111111111, "normalized_reciprocal_se_rank": 0.1306397306397306, "reciprocal_se_rank": 0.04110032362459547, "percentage_ge_sources_not_in_se_sources": 80.0, "percentage_ge_sources_in_se_sources": 20.0 }, { "model_name": "exa", "query_id": 55, "query_type": "HotpotQA", "num_sources": 5, "unweighted_mean_score": 4.525, "weighted_total_content_score": 88.63157894736841, "semantic_relevance": 2.0, "factual_accuracy": 4.8, "freshness": 5.0, "objectivity_tone": 5.0, "layout_ad_density": 5.0, "accountability": 4.8, "transparency": 4.8, "authority": 4.8, "avg_ge_freq": 1.0, "relative_se_rank": 2.127659574468085, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "gensee", "query_id": 77, "query_type": "Pinocchios", "num_sources": 5, "unweighted_mean_score": 4.425, "weighted_total_content_score": 88.63157894736841, "semantic_relevance": 4.8, "factual_accuracy": 4.8, "freshness": 4.4, "objectivity_tone": 3.8, "layout_ad_density": 4.4, "accountability": 4.2, "transparency": 4.6, "authority": 4.4, "avg_ge_freq": 0.46663999999999994, "relative_se_rank": 1.7822222222222224, "normalized_reciprocal_se_rank": 0.2, "reciprocal_se_rank": 0.05776699029126213, "percentage_ge_sources_not_in_se_sources": 80.0, "percentage_ge_sources_in_se_sources": 20.0 }, { "model_name": "gpt-5", "query_id": 95, "query_type": "QuoraQuestions", "num_sources": 5, "unweighted_mean_score": 4.55, "weighted_total_content_score": 88.63157894736841, "semantic_relevance": 2.0, "factual_accuracy": 4.8, "freshness": 5.0, "objectivity_tone": 4.6, "layout_ad_density": 5.0, "accountability": 5.0, "transparency": 5.0, "authority": 5.0, "avg_ge_freq": 1.2, "relative_se_rank": 2.4390243902439024, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "grok-4.1-fast-non-reasoning", "query_id": 76, "query_type": "Pinocchios", "num_sources": 5, "unweighted_mean_score": 4.425, "weighted_total_content_score": 88.63157894736841, "semantic_relevance": 4.6, "factual_accuracy": 5.0, "freshness": 3.4, "objectivity_tone": 3.8, "layout_ad_density": 4.4, "accountability": 4.6, "transparency": 5.0, "authority": 4.6, "avg_ge_freq": 0.66666, "relative_se_rank": 1.9142857142857141, "normalized_reciprocal_se_rank": 0.1583838383838384, "reciprocal_se_rank": 0.047766990291262135, "percentage_ge_sources_not_in_se_sources": 80.0, "percentage_ge_sources_in_se_sources": 20.0 }, { "model_name": "gensee", "query_id": 64, "query_type": "Pinocchios", "num_sources": 3, "unweighted_mean_score": 4.375, "weighted_total_content_score": 88.42105263157896, "semantic_relevance": 4.0, "factual_accuracy": 5.0, "freshness": 2.6666666666666665, "objectivity_tone": 5.0, "layout_ad_density": 4.333333333333333, "accountability": 4.666666666666667, "transparency": 4.666666666666667, "authority": 4.666666666666667, "avg_ge_freq": 0.4444333333333333, "relative_se_rank": 2.380952380952381, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "Gemini-3-Flash-Preview", "query_id": 5, "query_type": "VACOS", "num_sources": 1, "unweighted_mean_score": 4.375, "weighted_total_content_score": 88.42105263157895, "semantic_relevance": 5.0, "factual_accuracy": 5.0, "freshness": 5.0, "objectivity_tone": 4.0, "layout_ad_density": 3.0, "accountability": 5.0, "transparency": 4.0, "authority": 4.0, "avg_ge_freq": 0.3333, "relative_se_rank": 2.3255813953488373, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "Perplexity-Sonar-Pro", "query_id": 6, "query_type": "VACOS", "num_sources": 2, "unweighted_mean_score": 4.375, "weighted_total_content_score": 88.42105263157895, "semantic_relevance": 5.0, "factual_accuracy": 5.0, "freshness": 5.0, "objectivity_tone": 4.0, "layout_ad_density": 3.5, "accountability": 4.0, "transparency": 4.0, "authority": 4.5, "avg_ge_freq": 1.0, "relative_se_rank": 2.9411764705882355, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "Perplexity-Sonar-Pro", "query_id": 71, "query_type": "Pinocchios", "num_sources": 1, "unweighted_mean_score": 4.375, "weighted_total_content_score": 88.42105263157895, "semantic_relevance": 5.0, "factual_accuracy": 5.0, "freshness": 3.0, "objectivity_tone": 4.0, "layout_ad_density": 4.0, "accountability": 5.0, "transparency": 5.0, "authority": 4.0, "avg_ge_freq": 1.0, "relative_se_rank": 0.02702702702702703, "normalized_reciprocal_se_rank": 1.0, "reciprocal_se_rank": 0.25, "percentage_ge_sources_not_in_se_sources": 0.0, "percentage_ge_sources_in_se_sources": 100.0 }, { "model_name": "claude", "query_id": 35, "query_type": "DebateQA", "num_sources": 3, "unweighted_mean_score": 4.416666666666667, "weighted_total_content_score": 88.42105263157895, "semantic_relevance": 4.666666666666667, "factual_accuracy": 5.0, "freshness": 5.0, "objectivity_tone": 3.6666666666666665, "layout_ad_density": 4.0, "accountability": 4.0, "transparency": 4.666666666666667, "authority": 4.333333333333333, "avg_ge_freq": 1.0, "relative_se_rank": 1.4565217391304348, "normalized_reciprocal_se_rank": 0.3333333333333333, "reciprocal_se_rank": 0.08980582524271845, "percentage_ge_sources_not_in_se_sources": 66.66666666666667, "percentage_ge_sources_in_se_sources": 33.33333333333333 }, { "model_name": "deepseek-reasoning-gensee", "query_id": 81, "query_type": "QuoraQuestions", "num_sources": 4, "unweighted_mean_score": 4.40625, "weighted_total_content_score": 88.42105263157895, "semantic_relevance": 4.25, "factual_accuracy": 4.5, "freshness": 4.25, "objectivity_tone": 4.75, "layout_ad_density": 4.75, "accountability": 3.5, "transparency": 4.5, "authority": 4.75, "avg_ge_freq": 1.0, "relative_se_rank": 1.8353658536585367, "normalized_reciprocal_se_rank": 0.25, "reciprocal_se_rank": 0.06978155339805825, "percentage_ge_sources_not_in_se_sources": 75.0, "percentage_ge_sources_in_se_sources": 25.0 }, { "model_name": "gensee", "query_id": 16, "query_type": "VACOS", "num_sources": 5, "unweighted_mean_score": 4.375, "weighted_total_content_score": 88.42105263157895, "semantic_relevance": 5.0, "factual_accuracy": 4.8, "freshness": 5.0, "objectivity_tone": 4.2, "layout_ad_density": 2.6, "accountability": 4.6, "transparency": 4.4, "authority": 4.4, "avg_ge_freq": 0.66668, "relative_se_rank": 1.5761904761904764, "normalized_reciprocal_se_rank": 0.0754930254930255, "reciprocal_se_rank": 0.027849052242256124, "percentage_ge_sources_not_in_se_sources": 60.0, "percentage_ge_sources_in_se_sources": 40.0 }, { "model_name": "gensee", "query_id": 75, "query_type": "Pinocchios", "num_sources": 3, "unweighted_mean_score": 4.333333333333333, "weighted_total_content_score": 88.42105263157895, "semantic_relevance": 5.0, "factual_accuracy": 5.0, "freshness": 3.0, "objectivity_tone": 4.666666666666667, "layout_ad_density": 3.6666666666666665, "accountability": 4.333333333333333, "transparency": 4.333333333333333, "authority": 4.666666666666667, "avg_ge_freq": 0.3333, "relative_se_rank": 1.4397163120567376, "normalized_reciprocal_se_rank": 0.21773288439955105, "reciprocal_se_rank": 0.062028047464940665, "percentage_ge_sources_not_in_se_sources": 66.66666666666667, "percentage_ge_sources_in_se_sources": 33.33333333333333 }, { "model_name": "gpt-5", "query_id": 41, "query_type": "HotpotQA", "num_sources": 1, "unweighted_mean_score": 4.5, "weighted_total_content_score": 88.42105263157895, "semantic_relevance": 2.0, "factual_accuracy": 5.0, "freshness": 5.0, "objectivity_tone": 5.0, "layout_ad_density": 5.0, "accountability": 4.0, "transparency": 5.0, "authority": 5.0, "avg_ge_freq": 1.0, "relative_se_rank": 4.545454545454546, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "grok-4.1-fast-non-reasoning", "query_id": 39, "query_type": "DebateQA", "num_sources": 3, "unweighted_mean_score": 4.375, "weighted_total_content_score": 88.42105263157895, "semantic_relevance": 4.666666666666667, "factual_accuracy": 5.0, "freshness": 3.0, "objectivity_tone": 4.333333333333333, "layout_ad_density": 4.333333333333333, "accountability": 4.333333333333333, "transparency": 5.0, "authority": 4.333333333333333, "avg_ge_freq": 0.4444333333333333, "relative_se_rank": 1.4305555555555556, "normalized_reciprocal_se_rank": 0.14066591844369622, "reciprocal_se_rank": 0.04350952894642215, "percentage_ge_sources_not_in_se_sources": 66.66666666666667, "percentage_ge_sources_in_se_sources": 33.33333333333333 }, { "model_name": "Gemini-2.5-Flash-Preview", "query_id": 78, "query_type": "Pinocchios", "num_sources": 2, "unweighted_mean_score": 4.3125, "weighted_total_content_score": 88.42105263157893, "semantic_relevance": 5.0, "factual_accuracy": 5.0, "freshness": 2.5, "objectivity_tone": 5.0, "layout_ad_density": 3.5, "accountability": 5.0, "transparency": 4.5, "authority": 4.0, "avg_ge_freq": null, "relative_se_rank": null, "normalized_reciprocal_se_rank": null, "reciprocal_se_rank": null, "percentage_ge_sources_not_in_se_sources": null, "percentage_ge_sources_in_se_sources": null }, { "model_name": "Gemini-3-Pro-Preview", "query_id": 96, "query_type": "QuoraQuestions", "num_sources": 4, "unweighted_mean_score": 4.40625, "weighted_total_content_score": 88.42105263157893, "semantic_relevance": 4.5, "factual_accuracy": 4.5, "freshness": 5.0, "objectivity_tone": 4.5, "layout_ad_density": 4.25, "accountability": 3.75, "transparency": 4.5, "authority": 4.25, "avg_ge_freq": 0.66665, "relative_se_rank": 1.7159090909090913, "normalized_reciprocal_se_rank": 0.197979797979798, "reciprocal_se_rank": 0.05728155339805825, "percentage_ge_sources_not_in_se_sources": 75.0, "percentage_ge_sources_in_se_sources": 25.0 }, { "model_name": "exa", "query_id": 35, "query_type": "DebateQA", "num_sources": 5, "unweighted_mean_score": 4.425, "weighted_total_content_score": 88.42105263157893, "semantic_relevance": 4.8, "factual_accuracy": 4.6, "freshness": 4.4, "objectivity_tone": 3.8, "layout_ad_density": 4.0, "accountability": 4.0, "transparency": 5.0, "authority": 4.8, "avg_ge_freq": 1.0, "relative_se_rank": 0.1, "normalized_reciprocal_se_rank": 0.5663331729998397, "reciprocal_se_rank": 0.1457936507936508, "percentage_ge_sources_not_in_se_sources": 0.0, "percentage_ge_sources_in_se_sources": 100.0 }, { "model_name": "gpt-4o", "query_id": 65, "query_type": "Pinocchios", "num_sources": 2, "unweighted_mean_score": 4.4375, "weighted_total_content_score": 88.42105263157893, "semantic_relevance": 3.0, "factual_accuracy": 5.0, "freshness": 5.0, "objectivity_tone": 5.0, "layout_ad_density": 5.0, "accountability": 3.0, "transparency": 4.5, "authority": 5.0, "avg_ge_freq": 0.3333, "relative_se_rank": 2.4390243902439024, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "tavily", "query_id": 70, "query_type": "Pinocchios", "num_sources": 5, "unweighted_mean_score": 4.425, "weighted_total_content_score": 88.42105263157893, "semantic_relevance": 4.4, "factual_accuracy": 4.4, "freshness": 5.0, "objectivity_tone": 4.4, "layout_ad_density": 3.6, "accountability": 4.4, "transparency": 4.8, "authority": 4.4, "avg_ge_freq": 1.0, "relative_se_rank": 0.15, "normalized_reciprocal_se_rank": 0.45304473304473303, "reciprocal_se_rank": 0.11857142857142855, "percentage_ge_sources_not_in_se_sources": 0.0, "percentage_ge_sources_in_se_sources": 100.0 }, { "model_name": "gensee", "query_id": 65, "query_type": "Pinocchios", "num_sources": 5, "unweighted_mean_score": 4.425, "weighted_total_content_score": 88.21052631578947, "semantic_relevance": 3.6, "factual_accuracy": 5.0, "freshness": 5.0, "objectivity_tone": 4.4, "layout_ad_density": 3.8, "accountability": 4.4, "transparency": 4.8, "authority": 4.4, "avg_ge_freq": 0.39997999999999995, "relative_se_rank": 1.5073170731707317, "normalized_reciprocal_se_rank": 0.2675849403122131, "reciprocal_se_rank": 0.07400706090026479, "percentage_ge_sources_not_in_se_sources": 60.0, "percentage_ge_sources_in_se_sources": 40.0 }, { "model_name": "grok-4.1-fast-non-reasoning", "query_id": 96, "query_type": "QuoraQuestions", "num_sources": 4, "unweighted_mean_score": 4.375, "weighted_total_content_score": 88.15789473684211, "semantic_relevance": 5.0, "factual_accuracy": 4.5, "freshness": 4.5, "objectivity_tone": 4.25, "layout_ad_density": 3.5, "accountability": 4.0, "transparency": 4.75, "authority": 4.5, "avg_ge_freq": 1.0, "relative_se_rank": 0.7045454545454546, "normalized_reciprocal_se_rank": 0.3295033670033671, "reciprocal_se_rank": 0.08888551779935275, "percentage_ge_sources_not_in_se_sources": 25.0, "percentage_ge_sources_in_se_sources": 75.0 }, { "model_name": "Gemini-2.5-Flash-Preview", "query_id": 74, "query_type": "Pinocchios", "num_sources": 3, "unweighted_mean_score": 4.375, "weighted_total_content_score": 88.0701754385965, "semantic_relevance": 4.333333333333333, "factual_accuracy": 5.0, "freshness": 3.0, "objectivity_tone": 4.333333333333333, "layout_ad_density": 4.0, "accountability": 4.666666666666667, "transparency": 5.0, "authority": 4.666666666666667, "avg_ge_freq": null, "relative_se_rank": null, "normalized_reciprocal_se_rank": null, "reciprocal_se_rank": null, "percentage_ge_sources_not_in_se_sources": null, "percentage_ge_sources_in_se_sources": null }, { "model_name": "Gemini-2.5-Flash-Preview", "query_id": 8, "query_type": "VACOS", "num_sources": 5, "unweighted_mean_score": 4.35, "weighted_total_content_score": 88.0, "semantic_relevance": 5.0, "factual_accuracy": 5.0, "freshness": 5.0, "objectivity_tone": 4.0, "layout_ad_density": 2.4, "accountability": 5.0, "transparency": 4.2, "authority": 4.2, "avg_ge_freq": null, "relative_se_rank": null, "normalized_reciprocal_se_rank": null, "reciprocal_se_rank": null, "percentage_ge_sources_not_in_se_sources": null, "percentage_ge_sources_in_se_sources": null }, { "model_name": "Gemini-2.5-Flash-Preview", "query_id": 77, "query_type": "Pinocchios", "num_sources": 5, "unweighted_mean_score": 4.4, "weighted_total_content_score": 87.99999999999999, "semantic_relevance": 4.4, "factual_accuracy": 4.6, "freshness": 4.6, "objectivity_tone": 4.2, "layout_ad_density": 3.6, "accountability": 4.6, "transparency": 4.8, "authority": 4.4, "avg_ge_freq": null, "relative_se_rank": null, "normalized_reciprocal_se_rank": null, "reciprocal_se_rank": null, "percentage_ge_sources_not_in_se_sources": null, "percentage_ge_sources_in_se_sources": null }, { "model_name": "Gemini-2.5-Flash-Preview", "query_id": 94, "query_type": "QuoraQuestions", "num_sources": 5, "unweighted_mean_score": 4.35, "weighted_total_content_score": 87.99999999999999, "semantic_relevance": 4.6, "factual_accuracy": 5.0, "freshness": 5.0, "objectivity_tone": 4.4, "layout_ad_density": 2.4, "accountability": 5.0, "transparency": 4.2, "authority": 4.2, "avg_ge_freq": null, "relative_se_rank": null, "normalized_reciprocal_se_rank": null, "reciprocal_se_rank": null, "percentage_ge_sources_not_in_se_sources": null, "percentage_ge_sources_in_se_sources": null }, { "model_name": "Perplexity-Sonar-Pro", "query_id": 50, "query_type": "HotpotQA", "num_sources": 5, "unweighted_mean_score": 4.35, "weighted_total_content_score": 87.99999999999999, "semantic_relevance": 5.0, "factual_accuracy": 5.0, "freshness": 2.8, "objectivity_tone": 4.0, "layout_ad_density": 4.0, "accountability": 4.6, "transparency": 4.6, "authority": 4.8, "avg_ge_freq": 0.73334, "relative_se_rank": 1.0680851063829788, "normalized_reciprocal_se_rank": 0.10487719298245615, "reciprocal_se_rank": 0.03490981093510475, "percentage_ge_sources_not_in_se_sources": 40.0, "percentage_ge_sources_in_se_sources": 60.0 }, { "model_name": "exa", "query_id": 78, "query_type": "Pinocchios", "num_sources": 5, "unweighted_mean_score": 4.35, "weighted_total_content_score": 87.99999999999999, "semantic_relevance": 4.4, "factual_accuracy": 4.8, "freshness": 3.2, "objectivity_tone": 4.8, "layout_ad_density": 3.8, "accountability": 4.8, "transparency": 4.8, "authority": 4.2, "avg_ge_freq": 1.0, "relative_se_rank": 1.7106382978723402, "normalized_reciprocal_se_rank": 0.1583838383838384, "reciprocal_se_rank": 0.047766990291262135, "percentage_ge_sources_not_in_se_sources": 80.0, "percentage_ge_sources_in_se_sources": 20.0 }, { "model_name": "tavily", "query_id": 27, "query_type": "DebateQA", "num_sources": 4, "unweighted_mean_score": 4.3125, "weighted_total_content_score": 87.89473684210527, "semantic_relevance": 5.0, "factual_accuracy": 5.0, "freshness": 3.25, "objectivity_tone": 4.5, "layout_ad_density": 4.0, "accountability": 5.0, "transparency": 3.75, "authority": 4.0, "avg_ge_freq": 1.0, "relative_se_rank": 1.8214285714285716, "normalized_reciprocal_se_rank": 0.10549943883277216, "reciprocal_se_rank": 0.035059331175836025, "percentage_ge_sources_not_in_se_sources": 75.0, "percentage_ge_sources_in_se_sources": 25.0 }, { "model_name": "Gemini-3-Flash-Preview", "query_id": 80, "query_type": "Pinocchios", "num_sources": 2, "unweighted_mean_score": 4.3125, "weighted_total_content_score": 87.89473684210526, "semantic_relevance": 5.0, "factual_accuracy": 4.5, "freshness": 5.0, "objectivity_tone": 5.0, "layout_ad_density": 4.0, "accountability": 4.5, "transparency": 3.5, "authority": 3.0, "avg_ge_freq": 0.83335, "relative_se_rank": 1.2872340425531914, "normalized_reciprocal_se_rank": 0.06649831649831649, "reciprocal_se_rank": 0.02568770226537217, "percentage_ge_sources_not_in_se_sources": 50.0, "percentage_ge_sources_in_se_sources": 50.0 }, { "model_name": "deepseek-chat-tavily", "query_id": 42, "query_type": "HotpotQA", "num_sources": 2, "unweighted_mean_score": 4.4375, "weighted_total_content_score": 87.89473684210526, "semantic_relevance": 2.5, "factual_accuracy": 5.0, "freshness": 5.0, "objectivity_tone": 5.0, "layout_ad_density": 5.0, "accountability": 3.0, "transparency": 5.0, "authority": 5.0, "avg_ge_freq": 1.0, "relative_se_rank": 0.0510204081632653, "normalized_reciprocal_se_rank": 0.777056277056277, "reciprocal_se_rank": 0.19642857142857142, "percentage_ge_sources_not_in_se_sources": 0.0, "percentage_ge_sources_in_se_sources": 100.0 }, { "model_name": "deepseek-reasoning-tavily", "query_id": 42, "query_type": "HotpotQA", "num_sources": 2, "unweighted_mean_score": 4.4375, "weighted_total_content_score": 87.89473684210526, "semantic_relevance": 2.5, "factual_accuracy": 5.0, "freshness": 5.0, "objectivity_tone": 5.0, "layout_ad_density": 5.0, "accountability": 3.0, "transparency": 5.0, "authority": 5.0, "avg_ge_freq": 1.0, "relative_se_rank": 0.0510204081632653, "normalized_reciprocal_se_rank": 0.777056277056277, "reciprocal_se_rank": 0.19642857142857142, "percentage_ge_sources_not_in_se_sources": 0.0, "percentage_ge_sources_in_se_sources": 100.0 }, { "model_name": "exa", "query_id": 99, "query_type": "QuoraQuestions", "num_sources": 4, "unweighted_mean_score": 4.375, "weighted_total_content_score": 87.89473684210526, "semantic_relevance": 5.0, "factual_accuracy": 4.25, "freshness": 5.0, "objectivity_tone": 4.25, "layout_ad_density": 3.75, "accountability": 4.5, "transparency": 4.25, "authority": 4.0, "avg_ge_freq": 1.0, "relative_se_rank": 0.7134146341463415, "normalized_reciprocal_se_rank": 0.37640036730945825, "reciprocal_se_rank": 0.10015445719329213, "percentage_ge_sources_not_in_se_sources": 25.0, "percentage_ge_sources_in_se_sources": 75.0 }, { "model_name": "gensee", "query_id": 79, "query_type": "Pinocchios", "num_sources": 2, "unweighted_mean_score": 4.4375, "weighted_total_content_score": 87.89473684210526, "semantic_relevance": 3.5, "factual_accuracy": 4.5, "freshness": 4.0, "objectivity_tone": 4.5, "layout_ad_density": 4.5, "accountability": 4.5, "transparency": 5.0, "authority": 5.0, "avg_ge_freq": 1.0, "relative_se_rank": 1.1222222222222222, "normalized_reciprocal_se_rank": 0.5, "reciprocal_se_rank": 0.12985436893203883, "percentage_ge_sources_not_in_se_sources": 50.0, "percentage_ge_sources_in_se_sources": 50.0 }, { "model_name": "gpt-4o", "query_id": 29, "query_type": "DebateQA", "num_sources": 4, "unweighted_mean_score": 4.375, "weighted_total_content_score": 87.89473684210526, "semantic_relevance": 5.0, "factual_accuracy": 4.25, "freshness": 5.0, "objectivity_tone": 4.25, "layout_ad_density": 4.0, "accountability": 3.75, "transparency": 4.5, "authority": 4.25, "avg_ge_freq": 0.41664999999999996, "relative_se_rank": 1.1333333333333333, "normalized_reciprocal_se_rank": 0.4132996632996633, "reciprocal_se_rank": 0.10902103559870549, "percentage_ge_sources_not_in_se_sources": 50.0, "percentage_ge_sources_in_se_sources": 50.0 }, { "model_name": "gpt-5", "query_id": 23, "query_type": "DebateQA", "num_sources": 2, "unweighted_mean_score": 4.375, "weighted_total_content_score": 87.89473684210526, "semantic_relevance": 4.5, "factual_accuracy": 5.0, "freshness": 3.0, "objectivity_tone": 4.0, "layout_ad_density": 4.0, "accountability": 4.5, "transparency": 5.0, "authority": 5.0, "avg_ge_freq": 0.5, "relative_se_rank": 2.5, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "grok-4.1-fast-non-reasoning", "query_id": 6, "query_type": "VACOS", "num_sources": 4, "unweighted_mean_score": 4.40625, "weighted_total_content_score": 87.89473684210526, "semantic_relevance": 4.25, "factual_accuracy": 4.75, "freshness": 5.0, "objectivity_tone": 4.0, "layout_ad_density": 3.25, "accountability": 4.5, "transparency": 4.5, "authority": 5.0, "avg_ge_freq": 0.750025, "relative_se_rank": 2.2867647058823533, "normalized_reciprocal_se_rank": 0.06421356421356421, "reciprocal_se_rank": 0.02513869625520111, "percentage_ge_sources_not_in_se_sources": 75.0, "percentage_ge_sources_in_se_sources": 25.0 }, { "model_name": "tavily", "query_id": 32, "query_type": "DebateQA", "num_sources": 4, "unweighted_mean_score": 4.375, "weighted_total_content_score": 87.89473684210526, "semantic_relevance": 4.25, "factual_accuracy": 4.75, "freshness": 3.75, "objectivity_tone": 4.5, "layout_ad_density": 4.25, "accountability": 4.0, "transparency": 4.75, "authority": 4.75, "avg_ge_freq": 1.0, "relative_se_rank": 1.1277777777777778, "normalized_reciprocal_se_rank": 0.44797979797979803, "reciprocal_se_rank": 0.11735436893203884, "percentage_ge_sources_not_in_se_sources": 50.0, "percentage_ge_sources_in_se_sources": 50.0 }, { "model_name": "tavily", "query_id": 35, "query_type": "DebateQA", "num_sources": 4, "unweighted_mean_score": 4.40625, "weighted_total_content_score": 87.89473684210526, "semantic_relevance": 4.25, "factual_accuracy": 4.75, "freshness": 5.0, "objectivity_tone": 4.0, "layout_ad_density": 3.75, "accountability": 4.25, "transparency": 4.5, "authority": 4.75, "avg_ge_freq": 1.0, "relative_se_rank": 0.6576086956521738, "normalized_reciprocal_se_rank": 0.3937404346495256, "reciprocal_se_rank": 0.10432112385995881, "percentage_ge_sources_not_in_se_sources": 25.0, "percentage_ge_sources_in_se_sources": 75.0 }, { "model_name": "exa", "query_id": 15, "query_type": "VACOS", "num_sources": 5, "unweighted_mean_score": 4.375, "weighted_total_content_score": 87.78947368421053, "semantic_relevance": 4.6, "factual_accuracy": 5.0, "freshness": 5.0, "objectivity_tone": 3.8, "layout_ad_density": 2.8, "accountability": 4.8, "transparency": 4.4, "authority": 4.6, "avg_ge_freq": 1.0, "relative_se_rank": 1.7789473684210528, "normalized_reciprocal_se_rank": 0.06013468013468013, "reciprocal_se_rank": 0.024158576051779936, "percentage_ge_sources_not_in_se_sources": 60.0, "percentage_ge_sources_in_se_sources": 40.0 }, { "model_name": "Gemini-2.5-Flash-Preview", "query_id": 25, "query_type": "DebateQA", "num_sources": 5, "unweighted_mean_score": 4.4, "weighted_total_content_score": 87.78947368421052, "semantic_relevance": 3.8, "factual_accuracy": 4.2, "freshness": 4.4, "objectivity_tone": 5.0, "layout_ad_density": 4.2, "accountability": 4.2, "transparency": 4.8, "authority": 4.6, "avg_ge_freq": null, "relative_se_rank": null, "normalized_reciprocal_se_rank": null, "reciprocal_se_rank": null, "percentage_ge_sources_not_in_se_sources": null, "percentage_ge_sources_in_se_sources": null }, { "model_name": "Gemini-2.5-Flash-Preview", "query_id": 36, "query_type": "DebateQA", "num_sources": 5, "unweighted_mean_score": 4.375, "weighted_total_content_score": 87.78947368421052, "semantic_relevance": 4.6, "factual_accuracy": 4.4, "freshness": 5.0, "objectivity_tone": 4.4, "layout_ad_density": 3.2, "accountability": 4.4, "transparency": 4.6, "authority": 4.4, "avg_ge_freq": null, "relative_se_rank": null, "normalized_reciprocal_se_rank": null, "reciprocal_se_rank": null, "percentage_ge_sources_not_in_se_sources": null, "percentage_ge_sources_in_se_sources": null }, { "model_name": "google-search", "query_id": 64, "query_type": "Pinocchios", "num_sources": 5, "unweighted_mean_score": 4.325, "weighted_total_content_score": 87.78947368421052, "semantic_relevance": 5.0, "factual_accuracy": 4.8, "freshness": 2.4, "objectivity_tone": 4.4, "layout_ad_density": 4.2, "accountability": 4.6, "transparency": 4.4, "authority": 4.8, "avg_ge_freq": null, "relative_se_rank": null, "normalized_reciprocal_se_rank": null, "reciprocal_se_rank": null, "percentage_ge_sources_not_in_se_sources": null, "percentage_ge_sources_in_se_sources": null }, { "model_name": "tavily", "query_id": 16, "query_type": "VACOS", "num_sources": 5, "unweighted_mean_score": 4.425, "weighted_total_content_score": 87.78947368421052, "semantic_relevance": 4.4, "factual_accuracy": 5.0, "freshness": 5.0, "objectivity_tone": 3.2, "layout_ad_density": 3.4, "accountability": 5.0, "transparency": 4.6, "authority": 4.8, "avg_ge_freq": 1.0, "relative_se_rank": 1.5380952380952384, "normalized_reciprocal_se_rank": 0.0998834498834499, "reciprocal_se_rank": 0.033709858103061985, "percentage_ge_sources_not_in_se_sources": 60.0, "percentage_ge_sources_in_se_sources": 40.0 }, { "model_name": "exa", "query_id": 33, "query_type": "DebateQA", "num_sources": 3, "unweighted_mean_score": 4.333333333333333, "weighted_total_content_score": 87.71929824561403, "semantic_relevance": 4.333333333333333, "factual_accuracy": 5.0, "freshness": 4.0, "objectivity_tone": 4.666666666666667, "layout_ad_density": 4.333333333333333, "accountability": 4.666666666666667, "transparency": 3.6666666666666665, "authority": 4.0, "avg_ge_freq": 1.0, "relative_se_rank": 0.12403100775193798, "normalized_reciprocal_se_rank": 0.48365132809577244, "reciprocal_se_rank": 0.1259259259259259, "percentage_ge_sources_not_in_se_sources": 0.0, "percentage_ge_sources_in_se_sources": 100.0 }, { "model_name": "Perplexity-Sonar-Pro", "query_id": 10, "query_type": "VACOS", "num_sources": 4, "unweighted_mean_score": 4.34375, "weighted_total_content_score": 87.63157894736841, "semantic_relevance": 5.0, "factual_accuracy": 5.0, "freshness": 5.0, "objectivity_tone": 3.75, "layout_ad_density": 3.25, "accountability": 4.25, "transparency": 4.25, "authority": 4.25, "avg_ge_freq": 0.75, "relative_se_rank": 1.826086956521739, "normalized_reciprocal_se_rank": 0.016576016576016577, "reciprocal_se_rank": 0.013691809808314661, "percentage_ge_sources_not_in_se_sources": 75.0, "percentage_ge_sources_in_se_sources": 25.0 }, { "model_name": "Gemini-3-Flash-Preview", "query_id": 15, "query_type": "VACOS", "num_sources": 5, "unweighted_mean_score": 4.325, "weighted_total_content_score": 87.57894736842105, "semantic_relevance": 4.8, "factual_accuracy": 5.0, "freshness": 5.0, "objectivity_tone": 4.2, "layout_ad_density": 2.4, "accountability": 4.8, "transparency": 4.2, "authority": 4.2, "avg_ge_freq": 0.39997999999999995, "relative_se_rank": 2.6315789473684212, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "Gemini-3-Pro-Preview", "query_id": 64, "query_type": "Pinocchios", "num_sources": 5, "unweighted_mean_score": 4.4, "weighted_total_content_score": 87.57894736842105, "semantic_relevance": 3.8, "factual_accuracy": 4.4, "freshness": 5.0, "objectivity_tone": 4.6, "layout_ad_density": 3.8, "accountability": 4.6, "transparency": 5.0, "authority": 4.0, "avg_ge_freq": 0.53332, "relative_se_rank": 2.380952380952381, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "tavily", "query_id": 67, "query_type": "Pinocchios", "num_sources": 5, "unweighted_mean_score": 4.3, "weighted_total_content_score": 87.57894736842105, "semantic_relevance": 5.0, "factual_accuracy": 5.0, "freshness": 2.4, "objectivity_tone": 4.4, "layout_ad_density": 3.6, "accountability": 4.4, "transparency": 4.6, "authority": 5.0, "avg_ge_freq": 1.0, "relative_se_rank": 0.24255319148936166, "normalized_reciprocal_se_rank": 0.4561132057906251, "reciprocal_se_rank": 0.11930875576036866, "percentage_ge_sources_not_in_se_sources": 0.0, "percentage_ge_sources_in_se_sources": 100.0 }, { "model_name": "Gemini-3-Flash-Preview", "query_id": 76, "query_type": "Pinocchios", "num_sources": 5, "unweighted_mean_score": 4.375, "weighted_total_content_score": 87.57894736842104, "semantic_relevance": 4.2, "factual_accuracy": 4.6, "freshness": 4.2, "objectivity_tone": 4.4, "layout_ad_density": 3.8, "accountability": 4.8, "transparency": 4.8, "authority": 4.2, "avg_ge_freq": 0.6, "relative_se_rank": 2.380952380952381, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "Gemini-3-Pro-Preview", "query_id": 2, "query_type": "VACOS", "num_sources": 5, "unweighted_mean_score": 4.375, "weighted_total_content_score": 87.57894736842104, "semantic_relevance": 4.4, "factual_accuracy": 5.0, "freshness": 4.4, "objectivity_tone": 3.8, "layout_ad_density": 4.4, "accountability": 3.8, "transparency": 4.6, "authority": 4.6, "avg_ge_freq": 0.39997999999999995, "relative_se_rank": 2.2222222222222223, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "Gemini-2.5-Flash-Preview", "query_id": 43, "query_type": "HotpotQA", "num_sources": 1, "unweighted_mean_score": 4.25, "weighted_total_content_score": 87.36842105263158, "semantic_relevance": 5.0, "factual_accuracy": 5.0, "freshness": 5.0, "objectivity_tone": 5.0, "layout_ad_density": 4.0, "accountability": 3.0, "transparency": 3.0, "authority": 4.0, "avg_ge_freq": null, "relative_se_rank": null, "normalized_reciprocal_se_rank": null, "reciprocal_se_rank": null, "percentage_ge_sources_not_in_se_sources": null, "percentage_ge_sources_in_se_sources": null }, { "model_name": "Gemini-3-Pro-Preview", "query_id": 16, "query_type": "VACOS", "num_sources": 5, "unweighted_mean_score": 4.375, "weighted_total_content_score": 87.36842105263158, "semantic_relevance": 4.2, "factual_accuracy": 4.8, "freshness": 5.0, "objectivity_tone": 4.0, "layout_ad_density": 4.0, "accountability": 4.0, "transparency": 4.6, "authority": 4.4, "avg_ge_freq": 0.39997999999999995, "relative_se_rank": 2.380952380952381, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "Perplexity-Sonar-Pro", "query_id": 80, "query_type": "Pinocchios", "num_sources": 2, "unweighted_mean_score": 4.3125, "weighted_total_content_score": 87.36842105263158, "semantic_relevance": 5.0, "factual_accuracy": 5.0, "freshness": 5.0, "objectivity_tone": 4.0, "layout_ad_density": 2.5, "accountability": 5.0, "transparency": 4.0, "authority": 4.0, "avg_ge_freq": 0.66665, "relative_se_rank": 2.127659574468085, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "claude", "query_id": 51, "query_type": "HotpotQA", "num_sources": 1, "unweighted_mean_score": 4.5, "weighted_total_content_score": 87.36842105263158, "semantic_relevance": 2.0, "factual_accuracy": 4.0, "freshness": 5.0, "objectivity_tone": 5.0, "layout_ad_density": 5.0, "accountability": 5.0, "transparency": 5.0, "authority": 5.0, "avg_ge_freq": 0.3333, "relative_se_rank": 2.127659574468085, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "claude", "query_id": 75, "query_type": "Pinocchios", "num_sources": 1, "unweighted_mean_score": 4.25, "weighted_total_content_score": 87.36842105263158, "semantic_relevance": 5.0, "factual_accuracy": 5.0, "freshness": 2.0, "objectivity_tone": 5.0, "layout_ad_density": 3.0, "accountability": 4.0, "transparency": 5.0, "authority": 5.0, "avg_ge_freq": 1.0, "relative_se_rank": 2.127659574468085, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "deepseek-reasoning-gensee", "query_id": 21, "query_type": "DebateQA", "num_sources": 5, "unweighted_mean_score": 4.325, "weighted_total_content_score": 87.36842105263158, "semantic_relevance": 4.8, "factual_accuracy": 4.8, "freshness": 4.2, "objectivity_tone": 4.2, "layout_ad_density": 3.6, "accountability": 4.4, "transparency": 4.2, "authority": 4.4, "avg_ge_freq": 1.0, "relative_se_rank": 0.9083333333333334, "normalized_reciprocal_se_rank": 0.39654320987654323, "reciprocal_se_rank": 0.10499460625674217, "percentage_ge_sources_not_in_se_sources": 40.0, "percentage_ge_sources_in_se_sources": 60.0 }, { "model_name": "deepseek-reasoning-tavily", "query_id": 2, "query_type": "VACOS", "num_sources": 4, "unweighted_mean_score": 4.40625, "weighted_total_content_score": 87.36842105263158, "semantic_relevance": 3.75, "factual_accuracy": 5.0, "freshness": 5.0, "objectivity_tone": 3.75, "layout_ad_density": 3.5, "accountability": 4.75, "transparency": 4.75, "authority": 4.75, "avg_ge_freq": 1.0, "relative_se_rank": 2.2222222222222223, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "deepseek-reasoning-tavily", "query_id": 72, "query_type": "Pinocchios", "num_sources": 2, "unweighted_mean_score": 4.375, "weighted_total_content_score": 87.36842105263158, "semantic_relevance": 3.5, "factual_accuracy": 4.5, "freshness": 3.5, "objectivity_tone": 5.0, "layout_ad_density": 4.5, "accountability": 4.5, "transparency": 5.0, "authority": 4.5, "avg_ge_freq": 1.0, "relative_se_rank": 0.08536585365853658, "normalized_reciprocal_se_rank": 0.6036556036556036, "reciprocal_se_rank": 0.15476190476190477, "percentage_ge_sources_not_in_se_sources": 0.0, "percentage_ge_sources_in_se_sources": 100.0 }, { "model_name": "exa", "query_id": 6, "query_type": "VACOS", "num_sources": 4, "unweighted_mean_score": 4.34375, "weighted_total_content_score": 87.36842105263158, "semantic_relevance": 4.5, "factual_accuracy": 5.0, "freshness": 5.0, "objectivity_tone": 4.0, "layout_ad_density": 2.75, "accountability": 5.0, "transparency": 4.25, "authority": 4.25, "avg_ge_freq": 1.0, "relative_se_rank": 2.2867647058823533, "normalized_reciprocal_se_rank": 0.06421356421356421, "reciprocal_se_rank": 0.025138696255201105, "percentage_ge_sources_not_in_se_sources": 75.0, "percentage_ge_sources_in_se_sources": 25.0 }, { "model_name": "exa", "query_id": 93, "query_type": "QuoraQuestions", "num_sources": 3, "unweighted_mean_score": 4.416666666666667, "weighted_total_content_score": 87.36842105263158, "semantic_relevance": 4.333333333333333, "factual_accuracy": 4.0, "freshness": 5.0, "objectivity_tone": 4.0, "layout_ad_density": 4.0, "accountability": 5.0, "transparency": 4.666666666666667, "authority": 4.333333333333333, "avg_ge_freq": 1.0, "relative_se_rank": 1.575757575757576, "normalized_reciprocal_se_rank": 0.11264156718702174, "reciprocal_se_rank": 0.03677552221241542, "percentage_ge_sources_not_in_se_sources": 66.66666666666667, "percentage_ge_sources_in_se_sources": 33.33333333333333 }, { "model_name": "gensee", "query_id": 69, "query_type": "Pinocchios", "num_sources": 1, "unweighted_mean_score": 4.375, "weighted_total_content_score": 87.36842105263158, "semantic_relevance": 5.0, "factual_accuracy": 4.0, "freshness": 5.0, "objectivity_tone": 4.0, "layout_ad_density": 4.0, "accountability": 4.0, "transparency": 4.0, "authority": 5.0, "avg_ge_freq": 1.0, "relative_se_rank": 0.06666666666666667, "normalized_reciprocal_se_rank": 1.0, "reciprocal_se_rank": 0.25, "percentage_ge_sources_not_in_se_sources": 0.0, "percentage_ge_sources_in_se_sources": 100.0 }, { "model_name": "gensee", "query_id": 81, "query_type": "QuoraQuestions", "num_sources": 5, "unweighted_mean_score": 4.4, "weighted_total_content_score": 87.36842105263158, "semantic_relevance": 4.2, "factual_accuracy": 4.6, "freshness": 5.0, "objectivity_tone": 3.8, "layout_ad_density": 4.8, "accountability": 3.4, "transparency": 4.6, "authority": 4.8, "avg_ge_freq": 0.66666, "relative_se_rank": 1.95609756097561, "normalized_reciprocal_se_rank": 0.2, "reciprocal_se_rank": 0.05776699029126213, "percentage_ge_sources_not_in_se_sources": 80.0, "percentage_ge_sources_in_se_sources": 20.0 }, { "model_name": "gpt-4o", "query_id": 75, "query_type": "Pinocchios", "num_sources": 1, "unweighted_mean_score": 4.25, "weighted_total_content_score": 87.36842105263158, "semantic_relevance": 5.0, "factual_accuracy": 5.0, "freshness": 2.0, "objectivity_tone": 5.0, "layout_ad_density": 4.0, "accountability": 4.0, "transparency": 4.0, "authority": 5.0, "avg_ge_freq": 0.3333, "relative_se_rank": 2.127659574468085, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "gpt-4o", "query_id": 76, "query_type": "Pinocchios", "num_sources": 2, "unweighted_mean_score": 4.3125, "weighted_total_content_score": 87.36842105263158, "semantic_relevance": 5.0, "factual_accuracy": 5.0, "freshness": 2.5, "objectivity_tone": 4.0, "layout_ad_density": 3.5, "accountability": 5.0, "transparency": 5.0, "authority": 4.5, "avg_ge_freq": 0.5, "relative_se_rank": 1.2023809523809523, "normalized_reciprocal_se_rank": 0.5, "reciprocal_se_rank": 0.12985436893203883, "percentage_ge_sources_not_in_se_sources": 50.0, "percentage_ge_sources_in_se_sources": 50.0 }, { "model_name": "claude", "query_id": 96, "query_type": "QuoraQuestions", "num_sources": 4, "unweighted_mean_score": 4.375, "weighted_total_content_score": 87.36842105263156, "semantic_relevance": 4.5, "factual_accuracy": 4.5, "freshness": 4.75, "objectivity_tone": 4.0, "layout_ad_density": 3.75, "accountability": 4.25, "transparency": 4.75, "authority": 4.5, "avg_ge_freq": 1.0, "relative_se_rank": 0.7045454545454546, "normalized_reciprocal_se_rank": 0.3295033670033671, "reciprocal_se_rank": 0.08888551779935275, "percentage_ge_sources_not_in_se_sources": 25.0, "percentage_ge_sources_in_se_sources": 75.0 }, { "model_name": "Gemini-3-Pro-Preview", "query_id": 34, "query_type": "DebateQA", "num_sources": 5, "unweighted_mean_score": 4.4, "weighted_total_content_score": 87.15789473684211, "semantic_relevance": 4.2, "factual_accuracy": 4.4, "freshness": 5.0, "objectivity_tone": 3.8, "layout_ad_density": 4.6, "accountability": 4.4, "transparency": 4.4, "authority": 4.4, "avg_ge_freq": 0.39997999999999995, "relative_se_rank": 1.64, "normalized_reciprocal_se_rank": 0.05594405594405595, "reciprocal_se_rank": 0.02315160567587752, "percentage_ge_sources_not_in_se_sources": 80.0, "percentage_ge_sources_in_se_sources": 20.0 }, { "model_name": "Perplexity-Sonar-Pro", "query_id": 77, "query_type": "Pinocchios", "num_sources": 5, "unweighted_mean_score": 4.375, "weighted_total_content_score": 87.15789473684211, "semantic_relevance": 4.6, "factual_accuracy": 4.8, "freshness": 4.8, "objectivity_tone": 3.4, "layout_ad_density": 3.8, "accountability": 4.4, "transparency": 4.6, "authority": 4.6, "avg_ge_freq": 0.80002, "relative_se_rank": 2.2222222222222223, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "Gemini-2.5-Flash-Preview", "query_id": 1, "query_type": "VACOS", "num_sources": 5, "unweighted_mean_score": 4.35, "weighted_total_content_score": 87.1578947368421, "semantic_relevance": 4.2, "factual_accuracy": 5.0, "freshness": 5.0, "objectivity_tone": 4.0, "layout_ad_density": 3.2, "accountability": 4.8, "transparency": 4.2, "authority": 4.4, "avg_ge_freq": null, "relative_se_rank": null, "normalized_reciprocal_se_rank": null, "reciprocal_se_rank": null, "percentage_ge_sources_not_in_se_sources": null, "percentage_ge_sources_in_se_sources": null }, { "model_name": "Gemini-3-Flash-Preview", "query_id": 33, "query_type": "DebateQA", "num_sources": 5, "unweighted_mean_score": 4.35, "weighted_total_content_score": 87.1578947368421, "semantic_relevance": 4.2, "factual_accuracy": 4.8, "freshness": 5.0, "objectivity_tone": 4.2, "layout_ad_density": 3.2, "accountability": 4.6, "transparency": 4.4, "authority": 4.4, "avg_ge_freq": 0.46665999999999996, "relative_se_rank": 1.1534883720930234, "normalized_reciprocal_se_rank": 0.13141864109606044, "reciprocal_se_rank": 0.041287489001237826, "percentage_ge_sources_not_in_se_sources": 40.0, "percentage_ge_sources_in_se_sources": 60.0 }, { "model_name": "gpt-5", "query_id": 1, "query_type": "VACOS", "num_sources": 5, "unweighted_mean_score": 4.375, "weighted_total_content_score": 87.1578947368421, "semantic_relevance": 3.6, "factual_accuracy": 5.0, "freshness": 5.0, "objectivity_tone": 4.2, "layout_ad_density": 3.6, "accountability": 4.8, "transparency": 4.4, "authority": 4.4, "avg_ge_freq": 0.73336, "relative_se_rank": 2.2222222222222223, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "deepseek-chat-gensee", "query_id": 92, "query_type": "QuoraQuestions", "num_sources": 4, "unweighted_mean_score": 4.375, "weighted_total_content_score": 87.10526315789474, "semantic_relevance": 3.25, "factual_accuracy": 5.0, "freshness": 4.75, "objectivity_tone": 4.5, "layout_ad_density": 3.5, "accountability": 4.75, "transparency": 4.5, "authority": 4.75, "avg_ge_freq": 1.0, "relative_se_rank": 0.6428571428571429, "normalized_reciprocal_se_rank": 0.28815397565397566, "reciprocal_se_rank": 0.07894962036345532, "percentage_ge_sources_not_in_se_sources": 25.0, "percentage_ge_sources_in_se_sources": 75.0 }, { "model_name": "gensee", "query_id": 36, "query_type": "DebateQA", "num_sources": 4, "unweighted_mean_score": 4.3125, "weighted_total_content_score": 87.10526315789474, "semantic_relevance": 5.0, "factual_accuracy": 4.5, "freshness": 4.5, "objectivity_tone": 4.25, "layout_ad_density": 4.5, "accountability": 3.5, "transparency": 3.75, "authority": 4.5, "avg_ge_freq": 0.41664999999999996, "relative_se_rank": 2.048780487804878, "normalized_reciprocal_se_rank": 0.016576016576016577, "reciprocal_se_rank": 0.013691809808314661, "percentage_ge_sources_not_in_se_sources": 75.0, "percentage_ge_sources_in_se_sources": 25.0 }, { "model_name": "Gemini-3-Flash-Preview", "query_id": 89, "query_type": "QuoraQuestions", "num_sources": 3, "unweighted_mean_score": 4.375, "weighted_total_content_score": 87.01754385964914, "semantic_relevance": 4.0, "factual_accuracy": 4.333333333333333, "freshness": 5.0, "objectivity_tone": 4.333333333333333, "layout_ad_density": 3.6666666666666665, "accountability": 4.333333333333333, "transparency": 4.666666666666667, "authority": 4.666666666666667, "avg_ge_freq": 0.6666666666666666, "relative_se_rank": 1.6991869918699187, "normalized_reciprocal_se_rank": 0.10213243546576879, "reciprocal_se_rank": 0.03425026968716289, "percentage_ge_sources_not_in_se_sources": 66.66666666666667, "percentage_ge_sources_in_se_sources": 33.33333333333333 }, { "model_name": "gpt-4o", "query_id": 30, "query_type": "DebateQA", "num_sources": 3, "unweighted_mean_score": 4.333333333333333, "weighted_total_content_score": 87.01754385964911, "semantic_relevance": 5.0, "factual_accuracy": 4.333333333333333, "freshness": 5.0, "objectivity_tone": 4.0, "layout_ad_density": 3.0, "accountability": 4.666666666666667, "transparency": 4.333333333333333, "authority": 4.333333333333333, "avg_ge_freq": 0.5555666666666667, "relative_se_rank": 0.8840579710144927, "normalized_reciprocal_se_rank": 0.17225243891910558, "reciprocal_se_rank": 0.05109949381794041, "percentage_ge_sources_not_in_se_sources": 33.333333333333336, "percentage_ge_sources_in_se_sources": 66.66666666666666 }, { "model_name": "grok-4.1-fast-non-reasoning", "query_id": 72, "query_type": "Pinocchios", "num_sources": 3, "unweighted_mean_score": 4.291666666666667, "weighted_total_content_score": 87.01754385964911, "semantic_relevance": 4.666666666666667, "factual_accuracy": 5.0, "freshness": 4.0, "objectivity_tone": 4.333333333333333, "layout_ad_density": 3.6666666666666665, "accountability": 3.6666666666666665, "transparency": 4.666666666666667, "authority": 4.333333333333333, "avg_ge_freq": 0.7777666666666666, "relative_se_rank": 2.4390243902439024, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "tavily", "query_id": 19, "query_type": "VACOS", "num_sources": 3, "unweighted_mean_score": 4.333333333333333, "weighted_total_content_score": 87.01754385964911, "semantic_relevance": 4.333333333333333, "factual_accuracy": 5.0, "freshness": 5.0, "objectivity_tone": 4.0, "layout_ad_density": 3.0, "accountability": 4.666666666666667, "transparency": 4.333333333333333, "authority": 4.333333333333333, "avg_ge_freq": 1.0, "relative_se_rank": 0.3412698412698412, "normalized_reciprocal_se_rank": 0.35959089643300174, "reciprocal_se_rank": 0.09611528822055138, "percentage_ge_sources_not_in_se_sources": 0.0, "percentage_ge_sources_in_se_sources": 100.0 }, { "model_name": "Gemini-2.5-Flash-Preview", "query_id": 70, "query_type": "Pinocchios", "num_sources": 5, "unweighted_mean_score": 4.375, "weighted_total_content_score": 86.94736842105263, "semantic_relevance": 4.2, "factual_accuracy": 4.4, "freshness": 5.0, "objectivity_tone": 4.0, "layout_ad_density": 3.8, "accountability": 4.2, "transparency": 5.0, "authority": 4.4, "avg_ge_freq": null, "relative_se_rank": null, "normalized_reciprocal_se_rank": null, "reciprocal_se_rank": null, "percentage_ge_sources_not_in_se_sources": null, "percentage_ge_sources_in_se_sources": null }, { "model_name": "Gemini-3-Flash-Preview", "query_id": 8, "query_type": "VACOS", "num_sources": 5, "unweighted_mean_score": 4.3, "weighted_total_content_score": 86.94736842105263, "semantic_relevance": 5.0, "factual_accuracy": 4.8, "freshness": 5.0, "objectivity_tone": 4.0, "layout_ad_density": 2.6, "accountability": 5.0, "transparency": 4.0, "authority": 4.0, "avg_ge_freq": 1.0, "relative_se_rank": 1.9674418604651163, "normalized_reciprocal_se_rank": 0.023931623931623933, "reciprocal_se_rank": 0.015459297983569828, "percentage_ge_sources_not_in_se_sources": 80.0, "percentage_ge_sources_in_se_sources": 20.0 }, { "model_name": "Gemini-3-Pro-Preview", "query_id": 68, "query_type": "Pinocchios", "num_sources": 5, "unweighted_mean_score": 4.375, "weighted_total_content_score": 86.94736842105263, "semantic_relevance": 3.4, "factual_accuracy": 4.6, "freshness": 4.2, "objectivity_tone": 4.6, "layout_ad_density": 3.8, "accountability": 4.8, "transparency": 4.8, "authority": 4.8, "avg_ge_freq": 0.5333399999999999, "relative_se_rank": 2.272727272727273, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "exa", "query_id": 28, "query_type": "DebateQA", "num_sources": 5, "unweighted_mean_score": 4.325, "weighted_total_content_score": 86.94736842105263, "semantic_relevance": 5.0, "factual_accuracy": 4.4, "freshness": 3.8, "objectivity_tone": 4.0, "layout_ad_density": 3.8, "accountability": 4.8, "transparency": 4.2, "authority": 4.6, "avg_ge_freq": 1.0, "relative_se_rank": 0.5777777777777777, "normalized_reciprocal_se_rank": 0.37251696734950324, "reciprocal_se_rank": 0.09922131011553598, "percentage_ge_sources_not_in_se_sources": 20.0, "percentage_ge_sources_in_se_sources": 80.0 }, { "model_name": "gensee", "query_id": 31, "query_type": "DebateQA", "num_sources": 5, "unweighted_mean_score": 4.325, "weighted_total_content_score": 86.94736842105263, "semantic_relevance": 4.6, "factual_accuracy": 4.8, "freshness": 4.8, "objectivity_tone": 4.0, "layout_ad_density": 3.0, "accountability": 4.6, "transparency": 4.4, "authority": 4.4, "avg_ge_freq": 0.80002, "relative_se_rank": 0.7911111111111111, "normalized_reciprocal_se_rank": 0.2769271322854211, "reciprocal_se_rank": 0.07625190800062301, "percentage_ge_sources_not_in_se_sources": 20.0, "percentage_ge_sources_in_se_sources": 80.0 }, { "model_name": "gpt-4o", "query_id": 16, "query_type": "VACOS", "num_sources": 5, "unweighted_mean_score": 4.3, "weighted_total_content_score": 86.94736842105263, "semantic_relevance": 5.0, "factual_accuracy": 4.8, "freshness": 5.0, "objectivity_tone": 4.0, "layout_ad_density": 2.8, "accountability": 4.4, "transparency": 4.0, "authority": 4.4, "avg_ge_freq": 0.73334, "relative_se_rank": 1.5761904761904764, "normalized_reciprocal_se_rank": 0.0754930254930255, "reciprocal_se_rank": 0.027849052242256124, "percentage_ge_sources_not_in_se_sources": 60.0, "percentage_ge_sources_in_se_sources": 40.0 }, { "model_name": "gpt-4o", "query_id": 81, "query_type": "QuoraQuestions", "num_sources": 5, "unweighted_mean_score": 4.4, "weighted_total_content_score": 86.94736842105263, "semantic_relevance": 3.6, "factual_accuracy": 4.4, "freshness": 5.0, "objectivity_tone": 4.2, "layout_ad_density": 4.6, "accountability": 3.8, "transparency": 4.8, "authority": 4.8, "avg_ge_freq": 0.46663999999999994, "relative_se_rank": 1.95609756097561, "normalized_reciprocal_se_rank": 0.2, "reciprocal_se_rank": 0.05776699029126213, "percentage_ge_sources_not_in_se_sources": 80.0, "percentage_ge_sources_in_se_sources": 20.0 }, { "model_name": "gpt-5", "query_id": 77, "query_type": "Pinocchios", "num_sources": 5, "unweighted_mean_score": 4.375, "weighted_total_content_score": 86.94736842105263, "semantic_relevance": 3.6, "factual_accuracy": 4.6, "freshness": 4.6, "objectivity_tone": 4.4, "layout_ad_density": 4.0, "accountability": 4.4, "transparency": 4.8, "authority": 4.6, "avg_ge_freq": 0.39997999999999995, "relative_se_rank": 1.8, "normalized_reciprocal_se_rank": 0.09595959595959597, "reciprocal_se_rank": 0.032766990291262135, "percentage_ge_sources_not_in_se_sources": 80.0, "percentage_ge_sources_in_se_sources": 20.0 }, { "model_name": "Perplexity-Sonar-Pro", "query_id": 22, "query_type": "DebateQA", "num_sources": 5, "unweighted_mean_score": 4.35, "weighted_total_content_score": 86.94736842105262, "semantic_relevance": 4.8, "factual_accuracy": 4.4, "freshness": 4.8, "objectivity_tone": 3.8, "layout_ad_density": 3.6, "accountability": 4.2, "transparency": 4.6, "authority": 4.6, "avg_ge_freq": 0.8, "relative_se_rank": 0.780952380952381, "normalized_reciprocal_se_rank": 0.22026374859708192, "reciprocal_se_rank": 0.06263619201725998, "percentage_ge_sources_not_in_se_sources": 20.0, "percentage_ge_sources_in_se_sources": 80.0 }, { "model_name": "Perplexity-Sonar-Pro", "query_id": 57, "query_type": "HotpotQA", "num_sources": 2, "unweighted_mean_score": 4.3125, "weighted_total_content_score": 86.84210526315789, "semantic_relevance": 5.0, "factual_accuracy": 4.5, "freshness": 5.0, "objectivity_tone": 4.0, "layout_ad_density": 2.5, "accountability": 4.5, "transparency": 4.5, "authority": 4.5, "avg_ge_freq": 1.0, "relative_se_rank": 0.1702127659574468, "normalized_reciprocal_se_rank": 0.3955747955747956, "reciprocal_se_rank": 0.10476190476190475, "percentage_ge_sources_not_in_se_sources": 0.0, "percentage_ge_sources_in_se_sources": 100.0 }, { "model_name": "claude", "query_id": 72, "query_type": "Pinocchios", "num_sources": 2, "unweighted_mean_score": 4.4375, "weighted_total_content_score": 86.84210526315789, "semantic_relevance": 3.0, "factual_accuracy": 4.0, "freshness": 5.0, "objectivity_tone": 4.5, "layout_ad_density": 4.0, "accountability": 5.0, "transparency": 5.0, "authority": 5.0, "avg_ge_freq": 1.0, "relative_se_rank": 2.4390243902439024, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "google-search", "query_id": 72, "query_type": "Pinocchios", "num_sources": 4, "unweighted_mean_score": 4.3125, "weighted_total_content_score": 86.84210526315789, "semantic_relevance": 4.5, "factual_accuracy": 5.0, "freshness": 3.75, "objectivity_tone": 4.0, "layout_ad_density": 4.25, "accountability": 4.0, "transparency": 4.5, "authority": 4.5, "avg_ge_freq": null, "relative_se_rank": null, "normalized_reciprocal_se_rank": null, "reciprocal_se_rank": null, "percentage_ge_sources_not_in_se_sources": null, "percentage_ge_sources_in_se_sources": null }, { "model_name": "gpt-5", "query_id": 18, "query_type": "VACOS", "num_sources": 4, "unweighted_mean_score": 4.3125, "weighted_total_content_score": 86.84210526315789, "semantic_relevance": 4.5, "factual_accuracy": 5.0, "freshness": 5.0, "objectivity_tone": 4.0, "layout_ad_density": 2.75, "accountability": 3.75, "transparency": 4.75, "authority": 4.75, "avg_ge_freq": 0.83335, "relative_se_rank": 2.127659574468085, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "Gemini-2.5-Flash-Preview", "query_id": 30, "query_type": "DebateQA", "num_sources": 5, "unweighted_mean_score": 4.3, "weighted_total_content_score": 86.73684210526315, "semantic_relevance": 4.8, "factual_accuracy": 4.2, "freshness": 5.0, "objectivity_tone": 4.6, "layout_ad_density": 3.6, "accountability": 3.8, "transparency": 4.0, "authority": 4.4, "avg_ge_freq": null, "relative_se_rank": null, "normalized_reciprocal_se_rank": null, "reciprocal_se_rank": null, "percentage_ge_sources_not_in_se_sources": null, "percentage_ge_sources_in_se_sources": null }, { "model_name": "Gemini-3-Pro-Preview", "query_id": 33, "query_type": "DebateQA", "num_sources": 5, "unweighted_mean_score": 4.35, "weighted_total_content_score": 86.73684210526315, "semantic_relevance": 4.8, "factual_accuracy": 4.6, "freshness": 4.8, "objectivity_tone": 3.4, "layout_ad_density": 3.4, "accountability": 4.6, "transparency": 4.6, "authority": 4.6, "avg_ge_freq": 0.5333399999999999, "relative_se_rank": 1.4558139534883723, "normalized_reciprocal_se_rank": 0.20975468975468975, "reciprocal_se_rank": 0.06011095700416089, "percentage_ge_sources_not_in_se_sources": 60.0, "percentage_ge_sources_in_se_sources": 40.0 }, { "model_name": "gensee", "query_id": 84, "query_type": "QuoraQuestions", "num_sources": 5, "unweighted_mean_score": 4.3, "weighted_total_content_score": 86.73684210526315, "semantic_relevance": 5.0, "factual_accuracy": 4.4, "freshness": 4.8, "objectivity_tone": 4.2, "layout_ad_density": 3.4, "accountability": 4.2, "transparency": 4.2, "authority": 4.2, "avg_ge_freq": 0.46663999999999994, "relative_se_rank": 1.0545454545454547, "normalized_reciprocal_se_rank": 0.19395559606085921, "reciprocal_se_rank": 0.05631457283986665, "percentage_ge_sources_not_in_se_sources": 40.0, "percentage_ge_sources_in_se_sources": 60.0 }, { "model_name": "gpt-4o", "query_id": 1, "query_type": "VACOS", "num_sources": 5, "unweighted_mean_score": 4.3, "weighted_total_content_score": 86.73684210526315, "semantic_relevance": 4.8, "factual_accuracy": 5.0, "freshness": 5.0, "objectivity_tone": 3.8, "layout_ad_density": 2.4, "accountability": 4.2, "transparency": 4.4, "authority": 4.8, "avg_ge_freq": 0.3333, "relative_se_rank": 2.2222222222222223, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "gpt-5", "query_id": 6, "query_type": "VACOS", "num_sources": 5, "unweighted_mean_score": 4.325, "weighted_total_content_score": 86.73684210526315, "semantic_relevance": 4.0, "factual_accuracy": 5.0, "freshness": 5.0, "objectivity_tone": 4.2, "layout_ad_density": 3.0, "accountability": 4.8, "transparency": 4.2, "authority": 4.4, "avg_ge_freq": 0.6667, "relative_se_rank": 2.9411764705882355, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "gpt-5", "query_id": 15, "query_type": "VACOS", "num_sources": 5, "unweighted_mean_score": 4.325, "weighted_total_content_score": 86.73684210526315, "semantic_relevance": 4.4, "factual_accuracy": 4.8, "freshness": 5.0, "objectivity_tone": 4.0, "layout_ad_density": 3.0, "accountability": 4.8, "transparency": 4.2, "authority": 4.4, "avg_ge_freq": 0.66666, "relative_se_rank": 2.6315789473684212, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "Perplexity-Sonar-Pro", "query_id": 8, "query_type": "VACOS", "num_sources": 3, "unweighted_mean_score": 4.291666666666667, "weighted_total_content_score": 86.66666666666667, "semantic_relevance": 5.0, "factual_accuracy": 4.666666666666667, "freshness": 5.0, "objectivity_tone": 4.0, "layout_ad_density": 3.3333333333333335, "accountability": 4.333333333333333, "transparency": 4.0, "authority": 4.0, "avg_ge_freq": 1.0, "relative_se_rank": 1.6046511627906976, "normalized_reciprocal_se_rank": 0.12525252525252525, "reciprocal_se_rank": 0.039805825242718446, "percentage_ge_sources_not_in_se_sources": 66.66666666666667, "percentage_ge_sources_in_se_sources": 33.33333333333333 }, { "model_name": "google-search", "query_id": 17, "query_type": "VACOS", "num_sources": 4, "unweighted_mean_score": 4.28125, "weighted_total_content_score": 86.57894736842105, "semantic_relevance": 5.0, "factual_accuracy": 4.75, "freshness": 5.0, "objectivity_tone": 4.0, "layout_ad_density": 3.0, "accountability": 4.25, "transparency": 4.0, "authority": 4.25, "avg_ge_freq": null, "relative_se_rank": null, "normalized_reciprocal_se_rank": null, "reciprocal_se_rank": null, "percentage_ge_sources_not_in_se_sources": null, "percentage_ge_sources_in_se_sources": null }, { "model_name": "tavily", "query_id": 96, "query_type": "QuoraQuestions", "num_sources": 4, "unweighted_mean_score": 4.34375, "weighted_total_content_score": 86.57894736842104, "semantic_relevance": 4.0, "factual_accuracy": 4.25, "freshness": 4.5, "objectivity_tone": 4.5, "layout_ad_density": 4.75, "accountability": 3.5, "transparency": 4.25, "authority": 5.0, "avg_ge_freq": 1.0, "relative_se_rank": 1.1988636363636365, "normalized_reciprocal_se_rank": 0.2745791245791246, "reciprocal_se_rank": 0.07568770226537216, "percentage_ge_sources_not_in_se_sources": 50.0, "percentage_ge_sources_in_se_sources": 50.0 }, { "model_name": "Gemini-3-Pro-Preview", "query_id": 31, "query_type": "DebateQA", "num_sources": 5, "unweighted_mean_score": 4.325, "weighted_total_content_score": 86.52631578947368, "semantic_relevance": 4.4, "factual_accuracy": 4.6, "freshness": 5.0, "objectivity_tone": 4.0, "layout_ad_density": 4.0, "accountability": 3.8, "transparency": 4.6, "authority": 4.2, "avg_ge_freq": 0.73336, "relative_se_rank": 1.0266666666666668, "normalized_reciprocal_se_rank": 0.2766050598867936, "reciprocal_se_rank": 0.0761745168174577, "percentage_ge_sources_not_in_se_sources": 40.0, "percentage_ge_sources_in_se_sources": 60.0 }, { "model_name": "claude", "query_id": 6, "query_type": "VACOS", "num_sources": 5, "unweighted_mean_score": 4.3, "weighted_total_content_score": 86.52631578947368, "semantic_relevance": 4.8, "factual_accuracy": 4.8, "freshness": 5.0, "objectivity_tone": 3.8, "layout_ad_density": 2.8, "accountability": 4.8, "transparency": 4.2, "authority": 4.2, "avg_ge_freq": 0.8, "relative_se_rank": 1.9705882352941178, "normalized_reciprocal_se_rank": 0.0775890775890776, "reciprocal_se_rank": 0.02835271524591913, "percentage_ge_sources_not_in_se_sources": 60.0, "percentage_ge_sources_in_se_sources": 40.0 }, { "model_name": "gensee", "query_id": 8, "query_type": "VACOS", "num_sources": 5, "unweighted_mean_score": 4.275, "weighted_total_content_score": 86.52631578947368, "semantic_relevance": 5.0, "factual_accuracy": 4.8, "freshness": 5.0, "objectivity_tone": 4.0, "layout_ad_density": 2.8, "accountability": 4.2, "transparency": 4.2, "authority": 4.2, "avg_ge_freq": 0.53332, "relative_se_rank": 1.8697674418604653, "normalized_reciprocal_se_rank": 0.1583838383838384, "reciprocal_se_rank": 0.047766990291262135, "percentage_ge_sources_not_in_se_sources": 80.0, "percentage_ge_sources_in_se_sources": 20.0 }, { "model_name": "Gemini-3-Pro-Preview", "query_id": 8, "query_type": "VACOS", "num_sources": 5, "unweighted_mean_score": 4.275, "weighted_total_content_score": 86.52631578947367, "semantic_relevance": 4.8, "factual_accuracy": 5.0, "freshness": 5.0, "objectivity_tone": 4.0, "layout_ad_density": 3.0, "accountability": 4.2, "transparency": 4.0, "authority": 4.2, "avg_ge_freq": 0.66666, "relative_se_rank": 2.3255813953488373, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "claude", "query_id": 27, "query_type": "DebateQA", "num_sources": 5, "unweighted_mean_score": 4.35, "weighted_total_content_score": 86.52631578947367, "semantic_relevance": 4.2, "factual_accuracy": 4.2, "freshness": 4.2, "objectivity_tone": 4.2, "layout_ad_density": 4.2, "accountability": 4.8, "transparency": 4.6, "authority": 4.4, "avg_ge_freq": 0.8666600000000001, "relative_se_rank": 2.380952380952381, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "Gemini-3-Flash-Preview", "query_id": 2, "query_type": "VACOS", "num_sources": 5, "unweighted_mean_score": 4.35, "weighted_total_content_score": 86.3157894736842, "semantic_relevance": 4.2, "factual_accuracy": 4.6, "freshness": 5.0, "objectivity_tone": 3.6, "layout_ad_density": 3.4, "accountability": 5.0, "transparency": 4.6, "authority": 4.4, "avg_ge_freq": 0.39997999999999995, "relative_se_rank": 2.2222222222222223, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "Gemini-3-Pro-Preview", "query_id": 17, "query_type": "VACOS", "num_sources": 2, "unweighted_mean_score": 4.3125, "weighted_total_content_score": 86.3157894736842, "semantic_relevance": 4.0, "factual_accuracy": 5.0, "freshness": 4.5, "objectivity_tone": 4.0, "layout_ad_density": 2.5, "accountability": 5.0, "transparency": 5.0, "authority": 4.5, "avg_ge_freq": 0.3333, "relative_se_rank": 2.3255813953488373, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "Gemini-3-Pro-Preview", "query_id": 80, "query_type": "Pinocchios", "num_sources": 2, "unweighted_mean_score": 4.3125, "weighted_total_content_score": 86.3157894736842, "semantic_relevance": 3.5, "factual_accuracy": 4.5, "freshness": 3.5, "objectivity_tone": 5.0, "layout_ad_density": 4.0, "accountability": 5.0, "transparency": 4.5, "authority": 4.5, "avg_ge_freq": 0.5, "relative_se_rank": 2.127659574468085, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "google-search", "query_id": 25, "query_type": "DebateQA", "num_sources": 2, "unweighted_mean_score": 4.3125, "weighted_total_content_score": 86.3157894736842, "semantic_relevance": 4.0, "factual_accuracy": 5.0, "freshness": 3.5, "objectivity_tone": 4.0, "layout_ad_density": 5.0, "accountability": 4.0, "transparency": 4.5, "authority": 4.5, "avg_ge_freq": null, "relative_se_rank": null, "normalized_reciprocal_se_rank": null, "reciprocal_se_rank": null, "percentage_ge_sources_not_in_se_sources": null, "percentage_ge_sources_in_se_sources": null }, { "model_name": "google-search", "query_id": 55, "query_type": "HotpotQA", "num_sources": 2, "unweighted_mean_score": 4.4375, "weighted_total_content_score": 86.3157894736842, "semantic_relevance": 1.5, "factual_accuracy": 4.5, "freshness": 5.0, "objectivity_tone": 5.0, "layout_ad_density": 5.0, "accountability": 5.0, "transparency": 4.5, "authority": 5.0, "avg_ge_freq": null, "relative_se_rank": null, "normalized_reciprocal_se_rank": null, "reciprocal_se_rank": null, "percentage_ge_sources_not_in_se_sources": null, "percentage_ge_sources_in_se_sources": null }, { "model_name": "gpt-4o", "query_id": 72, "query_type": "Pinocchios", "num_sources": 1, "unweighted_mean_score": 4.25, "weighted_total_content_score": 86.3157894736842, "semantic_relevance": 4.0, "factual_accuracy": 5.0, "freshness": 5.0, "objectivity_tone": 5.0, "layout_ad_density": 4.0, "accountability": 3.0, "transparency": 4.0, "authority": 4.0, "avg_ge_freq": 0.3333, "relative_se_rank": 2.4390243902439024, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "gpt-5", "query_id": 36, "query_type": "DebateQA", "num_sources": 4, "unweighted_mean_score": 4.40625, "weighted_total_content_score": 86.3157894736842, "semantic_relevance": 3.5, "factual_accuracy": 4.0, "freshness": 5.0, "objectivity_tone": 4.0, "layout_ad_density": 4.25, "accountability": 4.5, "transparency": 5.0, "authority": 5.0, "avg_ge_freq": 0.49999999999999994, "relative_se_rank": 2.4390243902439024, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "gpt-5", "query_id": 45, "query_type": "HotpotQA", "num_sources": 1, "unweighted_mean_score": 4.375, "weighted_total_content_score": 86.3157894736842, "semantic_relevance": 3.0, "factual_accuracy": 5.0, "freshness": 5.0, "objectivity_tone": 4.0, "layout_ad_density": 5.0, "accountability": 3.0, "transparency": 5.0, "authority": 5.0, "avg_ge_freq": 0.6667, "relative_se_rank": 2.0408163265306123, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "gpt-5", "query_id": 52, "query_type": "HotpotQA", "num_sources": 1, "unweighted_mean_score": 4.375, "weighted_total_content_score": 86.3157894736842, "semantic_relevance": 2.0, "factual_accuracy": 5.0, "freshness": 5.0, "objectivity_tone": 5.0, "layout_ad_density": 4.0, "accountability": 4.0, "transparency": 5.0, "authority": 5.0, "avg_ge_freq": 1.0, "relative_se_rank": 2.0408163265306123, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "gpt-5", "query_id": 53, "query_type": "HotpotQA", "num_sources": 1, "unweighted_mean_score": 4.375, "weighted_total_content_score": 86.3157894736842, "semantic_relevance": 2.0, "factual_accuracy": 5.0, "freshness": 5.0, "objectivity_tone": 5.0, "layout_ad_density": 5.0, "accountability": 3.0, "transparency": 5.0, "authority": 5.0, "avg_ge_freq": 1.0, "relative_se_rank": 2.127659574468085, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "gpt-5", "query_id": 56, "query_type": "HotpotQA", "num_sources": 2, "unweighted_mean_score": 4.375, "weighted_total_content_score": 86.3157894736842, "semantic_relevance": 2.0, "factual_accuracy": 5.0, "freshness": 5.0, "objectivity_tone": 5.0, "layout_ad_density": 5.0, "accountability": 3.0, "transparency": 5.0, "authority": 5.0, "avg_ge_freq": 0.83335, "relative_se_rank": 2.127659574468085, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "gpt-5", "query_id": 57, "query_type": "HotpotQA", "num_sources": 2, "unweighted_mean_score": 4.25, "weighted_total_content_score": 86.3157894736842, "semantic_relevance": 5.0, "factual_accuracy": 4.5, "freshness": 3.5, "objectivity_tone": 4.5, "layout_ad_density": 4.0, "accountability": 3.5, "transparency": 4.5, "authority": 4.5, "avg_ge_freq": 0.3333, "relative_se_rank": 1.074468085106383, "normalized_reciprocal_se_rank": 0.5, "reciprocal_se_rank": 0.12985436893203883, "percentage_ge_sources_not_in_se_sources": 50.0, "percentage_ge_sources_in_se_sources": 50.0 }, { "model_name": "gpt-5", "query_id": 74, "query_type": "Pinocchios", "num_sources": 2, "unweighted_mean_score": 4.25, "weighted_total_content_score": 86.3157894736842, "semantic_relevance": 5.0, "factual_accuracy": 4.5, "freshness": 2.5, "objectivity_tone": 4.5, "layout_ad_density": 4.0, "accountability": 4.5, "transparency": 4.5, "authority": 4.5, "avg_ge_freq": 0.3333, "relative_se_rank": 2.127659574468085, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "claude", "query_id": 5, "query_type": "VACOS", "num_sources": 5, "unweighted_mean_score": 4.275, "weighted_total_content_score": 86.31578947368419, "semantic_relevance": 4.6, "factual_accuracy": 5.0, "freshness": 5.0, "objectivity_tone": 4.0, "layout_ad_density": 3.0, "accountability": 4.6, "transparency": 4.0, "authority": 4.0, "avg_ge_freq": 0.60002, "relative_se_rank": 1.269767441860465, "normalized_reciprocal_se_rank": 0.11585517311745105, "reciprocal_se_rank": 0.03754772363744576, "percentage_ge_sources_not_in_se_sources": 40.0, "percentage_ge_sources_in_se_sources": 60.0 }, { "model_name": "grok-4.1-fast-non-reasoning", "query_id": 15, "query_type": "VACOS", "num_sources": 5, "unweighted_mean_score": 4.3, "weighted_total_content_score": 86.31578947368419, "semantic_relevance": 4.8, "factual_accuracy": 5.0, "freshness": 5.0, "objectivity_tone": 3.4, "layout_ad_density": 3.8, "accountability": 3.4, "transparency": 4.4, "authority": 4.6, "avg_ge_freq": 0.80002, "relative_se_rank": 2.6315789473684212, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "claude", "query_id": 8, "query_type": "VACOS", "num_sources": 5, "unweighted_mean_score": 4.275, "weighted_total_content_score": 86.10526315789474, "semantic_relevance": 4.8, "factual_accuracy": 4.8, "freshness": 5.0, "objectivity_tone": 3.8, "layout_ad_density": 3.2, "accountability": 4.4, "transparency": 4.0, "authority": 4.2, "avg_ge_freq": 0.60002, "relative_se_rank": 1.5348837209302324, "normalized_reciprocal_se_rank": 0.0990831390831391, "reciprocal_se_rank": 0.03351755041075429, "percentage_ge_sources_not_in_se_sources": 60.0, "percentage_ge_sources_in_se_sources": 40.0 }, { "model_name": "exa", "query_id": 67, "query_type": "Pinocchios", "num_sources": 5, "unweighted_mean_score": 4.307142857142857, "weighted_total_content_score": 86.10526315789473, "semantic_relevance": 5.0, "factual_accuracy": 5.0, "freshness": 1.8, "objectivity_tone": 4.6, "layout_ad_density": 3.5, "accountability": 4.4, "transparency": 5.0, "authority": 5.0, "avg_ge_freq": 1.0, "relative_se_rank": 0.49361702127659574, "normalized_reciprocal_se_rank": 0.5017508417508417, "reciprocal_se_rank": 0.13027508090614887, "percentage_ge_sources_not_in_se_sources": 20.0, "percentage_ge_sources_in_se_sources": 80.0 }, { "model_name": "grok-4.1-fast-non-reasoning", "query_id": 9, "query_type": "VACOS", "num_sources": 5, "unweighted_mean_score": 4.275, "weighted_total_content_score": 86.10526315789473, "semantic_relevance": 4.8, "factual_accuracy": 4.8, "freshness": 5.0, "objectivity_tone": 3.8, "layout_ad_density": 3.4, "accountability": 4.0, "transparency": 4.0, "authority": 4.4, "avg_ge_freq": 0.73336, "relative_se_rank": 1.8800000000000001, "normalized_reciprocal_se_rank": 0.023931623931623933, "reciprocal_se_rank": 0.015459297983569828, "percentage_ge_sources_not_in_se_sources": 80.0, "percentage_ge_sources_in_se_sources": 20.0 }, { "model_name": "Gemini-2.5-Flash-Preview", "query_id": 64, "query_type": "Pinocchios", "num_sources": 4, "unweighted_mean_score": 4.28125, "weighted_total_content_score": 86.05263157894737, "semantic_relevance": 3.75, "factual_accuracy": 4.75, "freshness": 3.75, "objectivity_tone": 4.75, "layout_ad_density": 3.5, "accountability": 5.0, "transparency": 4.75, "authority": 4.0, "avg_ge_freq": null, "relative_se_rank": null, "normalized_reciprocal_se_rank": null, "reciprocal_se_rank": null, "percentage_ge_sources_not_in_se_sources": null, "percentage_ge_sources_in_se_sources": null }, { "model_name": "exa", "query_id": 10, "query_type": "VACOS", "num_sources": 4, "unweighted_mean_score": 4.28125, "weighted_total_content_score": 86.05263157894737, "semantic_relevance": 4.5, "factual_accuracy": 4.75, "freshness": 5.0, "objectivity_tone": 4.0, "layout_ad_density": 3.0, "accountability": 4.5, "transparency": 4.25, "authority": 4.25, "avg_ge_freq": 1.0, "relative_se_rank": 2.1739130434782608, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "Perplexity-Sonar-Pro", "query_id": 97, "query_type": "QuoraQuestions", "num_sources": 4, "unweighted_mean_score": 4.28125, "weighted_total_content_score": 86.05263157894736, "semantic_relevance": 4.75, "factual_accuracy": 4.5, "freshness": 4.25, "objectivity_tone": 4.0, "layout_ad_density": 3.75, "accountability": 4.0, "transparency": 4.5, "authority": 4.5, "avg_ge_freq": 0.833325, "relative_se_rank": 0.1125, "normalized_reciprocal_se_rank": 0.6098484848484849, "reciprocal_se_rank": 0.15625, "percentage_ge_sources_not_in_se_sources": 0.0, "percentage_ge_sources_in_se_sources": 100.0 }, { "model_name": "Perplexity-Sonar-Pro", "query_id": 33, "query_type": "DebateQA", "num_sources": 5, "unweighted_mean_score": 4.325, "weighted_total_content_score": 85.89473684210527, "semantic_relevance": 4.2, "factual_accuracy": 4.4, "freshness": 5.0, "objectivity_tone": 3.8, "layout_ad_density": 4.2, "accountability": 4.4, "transparency": 4.4, "authority": 4.2, "avg_ge_freq": 0.66666, "relative_se_rank": 1.488372093023256, "normalized_reciprocal_se_rank": 0.11265031265031264, "reciprocal_se_rank": 0.036777623670827556, "percentage_ge_sources_not_in_se_sources": 60.0, "percentage_ge_sources_in_se_sources": 40.0 }, { "model_name": "gensee", "query_id": 61, "query_type": "Pinocchios", "num_sources": 5, "unweighted_mean_score": 4.25, "weighted_total_content_score": 85.89473684210527, "semantic_relevance": 5.0, "factual_accuracy": 4.8, "freshness": 3.2, "objectivity_tone": 3.8, "layout_ad_density": 3.8, "accountability": 4.6, "transparency": 4.6, "authority": 4.2, "avg_ge_freq": 0.39997999999999995, "relative_se_rank": 1.7866666666666666, "normalized_reciprocal_se_rank": 0.1583838383838384, "reciprocal_se_rank": 0.047766990291262135, "percentage_ge_sources_not_in_se_sources": 80.0, "percentage_ge_sources_in_se_sources": 20.0 }, { "model_name": "google-search", "query_id": 30, "query_type": "DebateQA", "num_sources": 5, "unweighted_mean_score": 4.3, "weighted_total_content_score": 85.89473684210527, "semantic_relevance": 4.8, "factual_accuracy": 4.2, "freshness": 5.0, "objectivity_tone": 3.8, "layout_ad_density": 3.4, "accountability": 4.6, "transparency": 4.2, "authority": 4.4, "avg_ge_freq": null, "relative_se_rank": null, "normalized_reciprocal_se_rank": null, "reciprocal_se_rank": null, "percentage_ge_sources_not_in_se_sources": null, "percentage_ge_sources_in_se_sources": null }, { "model_name": "google-search", "query_id": 97, "query_type": "QuoraQuestions", "num_sources": 5, "unweighted_mean_score": 4.3, "weighted_total_content_score": 85.89473684210526, "semantic_relevance": 4.8, "factual_accuracy": 4.2, "freshness": 4.4, "objectivity_tone": 3.8, "layout_ad_density": 3.8, "accountability": 4.0, "transparency": 4.8, "authority": 4.6, "avg_ge_freq": null, "relative_se_rank": null, "normalized_reciprocal_se_rank": null, "reciprocal_se_rank": null, "percentage_ge_sources_not_in_se_sources": null, "percentage_ge_sources_in_se_sources": null }, { "model_name": "gpt-4o", "query_id": 26, "query_type": "DebateQA", "num_sources": 5, "unweighted_mean_score": 4.275, "weighted_total_content_score": 85.89473684210526, "semantic_relevance": 4.8, "factual_accuracy": 4.2, "freshness": 5.0, "objectivity_tone": 4.2, "layout_ad_density": 3.6, "accountability": 3.8, "transparency": 4.2, "authority": 4.4, "avg_ge_freq": 0.46665999999999996, "relative_se_rank": 2.3255813953488373, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "gensee", "query_id": 39, "query_type": "DebateQA", "num_sources": 5, "unweighted_mean_score": 4.225, "weighted_total_content_score": 85.89473684210525, "semantic_relevance": 4.8, "factual_accuracy": 4.8, "freshness": 3.2, "objectivity_tone": 4.4, "layout_ad_density": 3.4, "accountability": 4.4, "transparency": 4.4, "authority": 4.4, "avg_ge_freq": 0.3333, "relative_se_rank": 1.3708333333333336, "normalized_reciprocal_se_rank": 0.10833117499784166, "reciprocal_se_rank": 0.035739772632976514, "percentage_ge_sources_not_in_se_sources": 60.0, "percentage_ge_sources_in_se_sources": 40.0 }, { "model_name": "grok-4.1-fast-non-reasoning", "query_id": 10, "query_type": "VACOS", "num_sources": 5, "unweighted_mean_score": 4.275, "weighted_total_content_score": 85.89473684210525, "semantic_relevance": 4.2, "factual_accuracy": 5.0, "freshness": 5.0, "objectivity_tone": 4.0, "layout_ad_density": 3.0, "accountability": 4.4, "transparency": 4.2, "authority": 4.4, "avg_ge_freq": 0.86668, "relative_se_rank": 2.1739130434782608, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "tavily", "query_id": 71, "query_type": "Pinocchios", "num_sources": 5, "unweighted_mean_score": 4.25, "weighted_total_content_score": 85.89473684210525, "semantic_relevance": 4.4, "factual_accuracy": 4.8, "freshness": 3.6, "objectivity_tone": 4.4, "layout_ad_density": 3.2, "accountability": 4.8, "transparency": 4.4, "authority": 4.4, "avg_ge_freq": 1.0, "relative_se_rank": 2.1675675675675676, "normalized_reciprocal_se_rank": 0.2, "reciprocal_se_rank": 0.05776699029126213, "percentage_ge_sources_not_in_se_sources": 80.0, "percentage_ge_sources_in_se_sources": 20.0 }, { "model_name": "Perplexity-Sonar-Pro", "query_id": 39, "query_type": "DebateQA", "num_sources": 4, "unweighted_mean_score": 4.3125, "weighted_total_content_score": 85.78947368421053, "semantic_relevance": 4.5, "factual_accuracy": 4.5, "freshness": 4.0, "objectivity_tone": 3.5, "layout_ad_density": 4.5, "accountability": 4.0, "transparency": 5.0, "authority": 4.5, "avg_ge_freq": 0.666675, "relative_se_rank": 0.7708333333333334, "normalized_reciprocal_se_rank": 0.17337395115172893, "reciprocal_se_rank": 0.051368983407818355, "percentage_ge_sources_not_in_se_sources": 25.0, "percentage_ge_sources_in_se_sources": 75.0 }, { "model_name": "Perplexity-Sonar-Pro", "query_id": 17, "query_type": "VACOS", "num_sources": 2, "unweighted_mean_score": 4.25, "weighted_total_content_score": 85.78947368421052, "semantic_relevance": 4.5, "factual_accuracy": 5.0, "freshness": 5.0, "objectivity_tone": 4.0, "layout_ad_density": 3.5, "accountability": 4.0, "transparency": 4.0, "authority": 4.0, "avg_ge_freq": 1.0, "relative_se_rank": 2.3255813953488373, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "Perplexity-Sonar-Pro", "query_id": 19, "query_type": "VACOS", "num_sources": 2, "unweighted_mean_score": 4.25, "weighted_total_content_score": 85.78947368421052, "semantic_relevance": 5.0, "factual_accuracy": 4.5, "freshness": 5.0, "objectivity_tone": 4.0, "layout_ad_density": 3.5, "accountability": 4.0, "transparency": 4.0, "authority": 4.0, "avg_ge_freq": 1.0, "relative_se_rank": 0.09523809523809523, "normalized_reciprocal_se_rank": 0.6069584736251403, "reciprocal_se_rank": 0.15555555555555556, "percentage_ge_sources_not_in_se_sources": 0.0, "percentage_ge_sources_in_se_sources": 100.0 }, { "model_name": "gpt-5", "query_id": 19, "query_type": "VACOS", "num_sources": 2, "unweighted_mean_score": 4.375, "weighted_total_content_score": 85.78947368421052, "semantic_relevance": 3.5, "factual_accuracy": 5.0, "freshness": 5.0, "objectivity_tone": 3.0, "layout_ad_density": 4.0, "accountability": 5.0, "transparency": 5.0, "authority": 4.5, "avg_ge_freq": 1.0, "relative_se_rank": 2.380952380952381, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "gpt-5", "query_id": 20, "query_type": "VACOS", "num_sources": 4, "unweighted_mean_score": 4.25, "weighted_total_content_score": 85.78947368421052, "semantic_relevance": 4.0, "factual_accuracy": 5.0, "freshness": 5.0, "objectivity_tone": 4.5, "layout_ad_density": 2.5, "accountability": 3.75, "transparency": 4.75, "authority": 4.5, "avg_ge_freq": 0.41664999999999996, "relative_se_rank": 2.272727272727273, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "tavily", "query_id": 51, "query_type": "HotpotQA", "num_sources": 4, "unweighted_mean_score": 4.25, "weighted_total_content_score": 85.78947368421052, "semantic_relevance": 4.0, "factual_accuracy": 5.0, "freshness": 3.75, "objectivity_tone": 4.5, "layout_ad_density": 4.5, "accountability": 4.25, "transparency": 3.75, "authority": 4.25, "avg_ge_freq": 1.0, "relative_se_rank": 1.601063829787234, "normalized_reciprocal_se_rank": 0.25, "reciprocal_se_rank": 0.06978155339805825, "percentage_ge_sources_not_in_se_sources": 75.0, "percentage_ge_sources_in_se_sources": 25.0 }, { "model_name": "Gemini-3-Flash-Preview", "query_id": 81, "query_type": "QuoraQuestions", "num_sources": 5, "unweighted_mean_score": 4.325, "weighted_total_content_score": 85.6842105263158, "semantic_relevance": 3.0, "factual_accuracy": 4.6, "freshness": 5.0, "objectivity_tone": 4.6, "layout_ad_density": 4.0, "accountability": 4.0, "transparency": 4.8, "authority": 4.6, "avg_ge_freq": 0.46663999999999994, "relative_se_rank": 1.502439024390244, "normalized_reciprocal_se_rank": 0.27515151515151515, "reciprocal_se_rank": 0.0758252427184466, "percentage_ge_sources_not_in_se_sources": 60.0, "percentage_ge_sources_in_se_sources": 40.0 }, { "model_name": "gensee", "query_id": 27, "query_type": "DebateQA", "num_sources": 5, "unweighted_mean_score": 4.275, "weighted_total_content_score": 85.68421052631578, "semantic_relevance": 4.6, "factual_accuracy": 4.4, "freshness": 4.8, "objectivity_tone": 4.0, "layout_ad_density": 3.8, "accountability": 4.4, "transparency": 4.2, "authority": 4.0, "avg_ge_freq": 0.3333, "relative_se_rank": 2.380952380952381, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "google-search", "query_id": 37, "query_type": "DebateQA", "num_sources": 5, "unweighted_mean_score": 4.275, "weighted_total_content_score": 85.68421052631578, "semantic_relevance": 4.8, "factual_accuracy": 4.4, "freshness": 4.2, "objectivity_tone": 3.8, "layout_ad_density": 3.8, "accountability": 4.0, "transparency": 4.6, "authority": 4.6, "avg_ge_freq": null, "relative_se_rank": null, "normalized_reciprocal_se_rank": null, "reciprocal_se_rank": null, "percentage_ge_sources_not_in_se_sources": null, "percentage_ge_sources_in_se_sources": null }, { "model_name": "gpt-4o", "query_id": 96, "query_type": "QuoraQuestions", "num_sources": 5, "unweighted_mean_score": 4.25, "weighted_total_content_score": 85.68421052631578, "semantic_relevance": 5.0, "factual_accuracy": 4.4, "freshness": 4.4, "objectivity_tone": 4.0, "layout_ad_density": 3.0, "accountability": 4.2, "transparency": 4.8, "authority": 4.2, "avg_ge_freq": 0.46665999999999996, "relative_se_rank": 1.1272727272727274, "normalized_reciprocal_se_rank": 0.21736251402918075, "reciprocal_se_rank": 0.061939050701186625, "percentage_ge_sources_not_in_se_sources": 40.0, "percentage_ge_sources_in_se_sources": 60.0 }, { "model_name": "tavily", "query_id": 22, "query_type": "DebateQA", "num_sources": 5, "unweighted_mean_score": 4.275, "weighted_total_content_score": 85.68421052631578, "semantic_relevance": 4.8, "factual_accuracy": 4.4, "freshness": 5.0, "objectivity_tone": 3.8, "layout_ad_density": 3.2, "accountability": 4.4, "transparency": 4.4, "authority": 4.2, "avg_ge_freq": 1.0, "relative_se_rank": 0.8333333333333333, "normalized_reciprocal_se_rank": 0.21852974186307517, "reciprocal_se_rank": 0.06221952535059331, "percentage_ge_sources_not_in_se_sources": 20.0, "percentage_ge_sources_in_se_sources": 80.0 }, { "model_name": "deepseek-chat-tavily", "query_id": 71, "query_type": "Pinocchios", "num_sources": 3, "unweighted_mean_score": 4.208333333333333, "weighted_total_content_score": 85.6140350877193, "semantic_relevance": 4.666666666666667, "factual_accuracy": 5.0, "freshness": 2.3333333333333335, "objectivity_tone": 4.333333333333333, "layout_ad_density": 4.0, "accountability": 4.666666666666667, "transparency": 4.333333333333333, "authority": 4.333333333333333, "avg_ge_freq": 1.0, "relative_se_rank": 1.8108108108108107, "normalized_reciprocal_se_rank": 0.3333333333333333, "reciprocal_se_rank": 0.08980582524271845, "percentage_ge_sources_not_in_se_sources": 66.66666666666667, "percentage_ge_sources_in_se_sources": 33.33333333333333 }, { "model_name": "Perplexity-Sonar-Pro", "query_id": 1, "query_type": "VACOS", "num_sources": 3, "unweighted_mean_score": 4.208333333333333, "weighted_total_content_score": 85.61403508771929, "semantic_relevance": 5.0, "factual_accuracy": 5.0, "freshness": 5.0, "objectivity_tone": 4.0, "layout_ad_density": 2.0, "accountability": 4.0, "transparency": 4.0, "authority": 4.666666666666667, "avg_ge_freq": 0.7777666666666666, "relative_se_rank": 2.2222222222222223, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "exa", "query_id": 19, "query_type": "VACOS", "num_sources": 4, "unweighted_mean_score": 4.21875, "weighted_total_content_score": 85.52631578947368, "semantic_relevance": 5.0, "factual_accuracy": 4.75, "freshness": 5.0, "objectivity_tone": 4.0, "layout_ad_density": 2.75, "accountability": 4.75, "transparency": 3.75, "authority": 3.75, "avg_ge_freq": 1.0, "relative_se_rank": 0.7916666666666666, "normalized_reciprocal_se_rank": 0.2903361096343553, "reciprocal_se_rank": 0.0794739680917504, "percentage_ge_sources_not_in_se_sources": 25.0, "percentage_ge_sources_in_se_sources": 75.0 }, { "model_name": "exa", "query_id": 81, "query_type": "QuoraQuestions", "num_sources": 4, "unweighted_mean_score": 4.28125, "weighted_total_content_score": 85.52631578947368, "semantic_relevance": 2.75, "factual_accuracy": 5.0, "freshness": 4.25, "objectivity_tone": 5.0, "layout_ad_density": 5.0, "accountability": 3.75, "transparency": 4.25, "authority": 4.25, "avg_ge_freq": 1.0, "relative_se_rank": 2.4390243902439024, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "google-search", "query_id": 22, "query_type": "DebateQA", "num_sources": 4, "unweighted_mean_score": 4.25, "weighted_total_content_score": 85.52631578947368, "semantic_relevance": 4.75, "factual_accuracy": 4.5, "freshness": 4.0, "objectivity_tone": 4.0, "layout_ad_density": 3.75, "accountability": 4.0, "transparency": 4.25, "authority": 4.75, "avg_ge_freq": null, "relative_se_rank": null, "normalized_reciprocal_se_rank": null, "reciprocal_se_rank": null, "percentage_ge_sources_not_in_se_sources": null, "percentage_ge_sources_in_se_sources": null }, { "model_name": "grok-4.1-fast-non-reasoning", "query_id": 20, "query_type": "VACOS", "num_sources": 4, "unweighted_mean_score": 4.28125, "weighted_total_content_score": 85.52631578947367, "semantic_relevance": 4.0, "factual_accuracy": 4.75, "freshness": 5.0, "objectivity_tone": 4.0, "layout_ad_density": 4.25, "accountability": 3.0, "transparency": 4.5, "authority": 4.75, "avg_ge_freq": 0.3333, "relative_se_rank": 2.272727272727273, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "grok-4.1-fast-non-reasoning", "query_id": 63, "query_type": "Pinocchios", "num_sources": 4, "unweighted_mean_score": 4.25, "weighted_total_content_score": 85.52631578947367, "semantic_relevance": 4.5, "factual_accuracy": 4.75, "freshness": 3.0, "objectivity_tone": 4.0, "layout_ad_density": 3.75, "accountability": 4.75, "transparency": 4.5, "authority": 4.75, "avg_ge_freq": 0.833325, "relative_se_rank": 4.761904761904762, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "deepseek-chat-tavily", "query_id": 92, "query_type": "QuoraQuestions", "num_sources": 5, "unweighted_mean_score": 4.325, "weighted_total_content_score": 85.47368421052633, "semantic_relevance": 2.6, "factual_accuracy": 4.8, "freshness": 5.0, "objectivity_tone": 4.6, "layout_ad_density": 3.4, "accountability": 5.0, "transparency": 4.6, "authority": 4.6, "avg_ge_freq": 1.0, "relative_se_rank": 0.5795918367346939, "normalized_reciprocal_se_rank": 0.2662488583541215, "reciprocal_se_rank": 0.07368601208023794, "percentage_ge_sources_not_in_se_sources": 20.0, "percentage_ge_sources_in_se_sources": 80.0 }, { "model_name": "Perplexity-Sonar-Pro", "query_id": 72, "query_type": "Pinocchios", "num_sources": 5, "unweighted_mean_score": 4.325, "weighted_total_content_score": 85.47368421052632, "semantic_relevance": 3.4, "factual_accuracy": 4.2, "freshness": 5.0, "objectivity_tone": 4.4, "layout_ad_density": 5.0, "accountability": 3.2, "transparency": 4.4, "authority": 5.0, "avg_ge_freq": 1.0, "relative_se_rank": 1.6097560975609757, "normalized_reciprocal_se_rank": 0.13144250385629697, "reciprocal_se_rank": 0.0412932230140131, "percentage_ge_sources_not_in_se_sources": 60.0, "percentage_ge_sources_in_se_sources": 40.0 }, { "model_name": "gensee", "query_id": 6, "query_type": "VACOS", "num_sources": 5, "unweighted_mean_score": 4.225, "weighted_total_content_score": 85.47368421052632, "semantic_relevance": 4.8, "factual_accuracy": 4.8, "freshness": 5.0, "objectivity_tone": 4.0, "layout_ad_density": 2.4, "accountability": 4.4, "transparency": 4.2, "authority": 4.2, "avg_ge_freq": 0.53332, "relative_se_rank": 2.9411764705882355, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "gpt-5", "query_id": 8, "query_type": "VACOS", "num_sources": 5, "unweighted_mean_score": 4.325, "weighted_total_content_score": 85.47368421052632, "semantic_relevance": 3.6, "factual_accuracy": 5.0, "freshness": 5.0, "objectivity_tone": 3.4, "layout_ad_density": 3.6, "accountability": 5.0, "transparency": 4.6, "authority": 4.4, "avg_ge_freq": 0.86668, "relative_se_rank": 2.3255813953488373, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "gensee", "query_id": 89, "query_type": "QuoraQuestions", "num_sources": 5, "unweighted_mean_score": 4.25, "weighted_total_content_score": 85.4736842105263, "semantic_relevance": 5.0, "factual_accuracy": 4.2, "freshness": 4.6, "objectivity_tone": 4.0, "layout_ad_density": 3.2, "accountability": 4.2, "transparency": 4.4, "authority": 4.4, "avg_ge_freq": 0.53332, "relative_se_rank": 1.2975609756097561, "normalized_reciprocal_se_rank": 0.10456950456950458, "reciprocal_se_rank": 0.03483587609801202, "percentage_ge_sources_not_in_se_sources": 40.0, "percentage_ge_sources_in_se_sources": 60.0 }, { "model_name": "Gemini-3-Flash-Preview", "query_id": 75, "query_type": "Pinocchios", "num_sources": 2, "unweighted_mean_score": 4.3125, "weighted_total_content_score": 85.26315789473685, "semantic_relevance": 3.5, "factual_accuracy": 4.5, "freshness": 5.0, "objectivity_tone": 4.0, "layout_ad_density": 4.0, "accountability": 5.0, "transparency": 4.0, "authority": 4.5, "avg_ge_freq": 0.3333, "relative_se_rank": 0.5957446808510638, "normalized_reciprocal_se_rank": 0.14258901067411706, "reciprocal_se_rank": 0.04397163120567376, "percentage_ge_sources_not_in_se_sources": 0.0, "percentage_ge_sources_in_se_sources": 100.0 }, { "model_name": "Gemini-3-Flash-Preview", "query_id": 96, "query_type": "QuoraQuestions", "num_sources": 5, "unweighted_mean_score": 4.3, "weighted_total_content_score": 85.26315789473685, "semantic_relevance": 3.6, "factual_accuracy": 3.8, "freshness": 4.6, "objectivity_tone": 4.8, "layout_ad_density": 4.2, "accountability": 4.4, "transparency": 4.4, "authority": 4.6, "avg_ge_freq": 0.46665999999999996, "relative_se_rank": 1.8272727272727276, "normalized_reciprocal_se_rank": 0.1583838383838384, "reciprocal_se_rank": 0.047766990291262135, "percentage_ge_sources_not_in_se_sources": 80.0, "percentage_ge_sources_in_se_sources": 20.0 }, { "model_name": "Perplexity-Sonar-Pro", "query_id": 92, "query_type": "QuoraQuestions", "num_sources": 4, "unweighted_mean_score": 4.34375, "weighted_total_content_score": 85.26315789473685, "semantic_relevance": 2.75, "factual_accuracy": 4.5, "freshness": 4.75, "objectivity_tone": 4.25, "layout_ad_density": 4.0, "accountability": 4.75, "transparency": 5.0, "authority": 4.75, "avg_ge_freq": 0.833325, "relative_se_rank": 1.1581632653061225, "normalized_reciprocal_se_rank": 0.10602346999405823, "reciprocal_se_rank": 0.03518525128498001, "percentage_ge_sources_not_in_se_sources": 50.0, "percentage_ge_sources_in_se_sources": 50.0 }, { "model_name": "exa", "query_id": 14, "query_type": "VACOS", "num_sources": 5, "unweighted_mean_score": 4.75, "weighted_total_content_score": 85.26315789473685, "semantic_relevance": 4.25, "factual_accuracy": 4.75, "freshness": 5.0, "objectivity_tone": 4.75, "layout_ad_density": 4.4, "accountability": 5.0, "transparency": 4.8, "authority": 4.8, "avg_ge_freq": 1.0, "relative_se_rank": 1.8444444444444446, "normalized_reciprocal_se_rank": 0.038159371492704826, "reciprocal_se_rank": 0.018878101402373244, "percentage_ge_sources_not_in_se_sources": 80.0, "percentage_ge_sources_in_se_sources": 20.0 }, { "model_name": "Gemini-3-Flash-Preview", "query_id": 1, "query_type": "VACOS", "num_sources": 5, "unweighted_mean_score": 4.25, "weighted_total_content_score": 85.26315789473684, "semantic_relevance": 4.2, "factual_accuracy": 4.8, "freshness": 5.0, "objectivity_tone": 4.0, "layout_ad_density": 3.2, "accountability": 4.8, "transparency": 4.0, "authority": 4.0, "avg_ge_freq": 0.6667, "relative_se_rank": 2.2222222222222223, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "Gemini-3-Flash-Preview", "query_id": 18, "query_type": "VACOS", "num_sources": 5, "unweighted_mean_score": 4.225, "weighted_total_content_score": 85.26315789473684, "semantic_relevance": 4.2, "factual_accuracy": 4.8, "freshness": 4.4, "objectivity_tone": 4.4, "layout_ad_density": 3.0, "accountability": 4.6, "transparency": 4.2, "authority": 4.2, "avg_ge_freq": 0.46663999999999994, "relative_se_rank": 1.774468085106383, "normalized_reciprocal_se_rank": 0.03353535353535354, "reciprocal_se_rank": 0.017766990291262132, "percentage_ge_sources_not_in_se_sources": 80.0, "percentage_ge_sources_in_se_sources": 20.0 }, { "model_name": "Gemini-3-Flash-Preview", "query_id": 65, "query_type": "Pinocchios", "num_sources": 2, "unweighted_mean_score": 4.25, "weighted_total_content_score": 85.26315789473684, "semantic_relevance": 4.0, "factual_accuracy": 4.5, "freshness": 3.5, "objectivity_tone": 4.5, "layout_ad_density": 4.0, "accountability": 4.0, "transparency": 5.0, "authority": 4.5, "avg_ge_freq": 0.3333, "relative_se_rank": 2.4390243902439024, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "Gemini-3-Pro-Preview", "query_id": 5, "query_type": "VACOS", "num_sources": 5, "unweighted_mean_score": 4.225, "weighted_total_content_score": 85.26315789473684, "semantic_relevance": 4.4, "factual_accuracy": 5.0, "freshness": 5.0, "objectivity_tone": 4.0, "layout_ad_density": 3.2, "accountability": 4.0, "transparency": 4.0, "authority": 4.2, "avg_ge_freq": 0.60002, "relative_se_rank": 2.3255813953488373, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "Gemini-3-Pro-Preview", "query_id": 65, "query_type": "Pinocchios", "num_sources": 5, "unweighted_mean_score": 4.421428571428572, "weighted_total_content_score": 85.26315789473684, "semantic_relevance": 2.6, "factual_accuracy": 4.6, "freshness": 5.0, "objectivity_tone": 5.0, "layout_ad_density": 4.0, "accountability": 5.0, "transparency": 5.0, "authority": 4.0, "avg_ge_freq": 0.53332, "relative_se_rank": 2.4390243902439024, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "claude", "query_id": 21, "query_type": "DebateQA", "num_sources": 3, "unweighted_mean_score": 4.166666666666667, "weighted_total_content_score": 85.26315789473684, "semantic_relevance": 5.0, "factual_accuracy": 4.666666666666667, "freshness": 3.3333333333333335, "objectivity_tone": 4.666666666666667, "layout_ad_density": 2.6666666666666665, "accountability": 4.666666666666667, "transparency": 4.0, "authority": 4.333333333333333, "avg_ge_freq": 0.6666666666666666, "relative_se_rank": 0.041666666666666664, "normalized_reciprocal_se_rank": 0.8150392817059484, "reciprocal_se_rank": 0.20555555555555557, "percentage_ge_sources_not_in_se_sources": 0.0, "percentage_ge_sources_in_se_sources": 100.0 }, { "model_name": "gpt-4o", "query_id": 21, "query_type": "DebateQA", "num_sources": 4, "unweighted_mean_score": 4.1875, "weighted_total_content_score": 85.26315789473684, "semantic_relevance": 5.0, "factual_accuracy": 4.75, "freshness": 3.75, "objectivity_tone": 4.25, "layout_ad_density": 2.75, "accountability": 4.75, "transparency": 4.0, "authority": 4.25, "avg_ge_freq": 0.3333, "relative_se_rank": 0.5520833333333334, "normalized_reciprocal_se_rank": 0.6112794612794613, "reciprocal_se_rank": 0.1565938511326861, "percentage_ge_sources_not_in_se_sources": 25.0, "percentage_ge_sources_in_se_sources": 75.0 }, { "model_name": "gpt-4o", "query_id": 64, "query_type": "Pinocchios", "num_sources": 4, "unweighted_mean_score": 4.1875, "weighted_total_content_score": 85.26315789473684, "semantic_relevance": 4.5, "factual_accuracy": 4.75, "freshness": 3.0, "objectivity_tone": 4.75, "layout_ad_density": 3.5, "accountability": 4.25, "transparency": 4.25, "authority": 4.5, "avg_ge_freq": 0.41664999999999996, "relative_se_rank": 1.7976190476190474, "normalized_reciprocal_se_rank": 0.197979797979798, "reciprocal_se_rank": 0.05728155339805825, "percentage_ge_sources_not_in_se_sources": 75.0, "percentage_ge_sources_in_se_sources": 25.0 }, { "model_name": "gpt-4o", "query_id": 78, "query_type": "Pinocchios", "num_sources": 2, "unweighted_mean_score": 4.1875, "weighted_total_content_score": 85.26315789473684, "semantic_relevance": 5.0, "factual_accuracy": 5.0, "freshness": 2.5, "objectivity_tone": 4.0, "layout_ad_density": 3.5, "accountability": 4.5, "transparency": 4.5, "authority": 4.5, "avg_ge_freq": 0.5, "relative_se_rank": 2.127659574468085, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "gpt-4o", "query_id": 98, "query_type": "QuoraQuestions", "num_sources": 1, "unweighted_mean_score": 4.25, "weighted_total_content_score": 85.26315789473684, "semantic_relevance": 5.0, "factual_accuracy": 4.0, "freshness": 5.0, "objectivity_tone": 4.0, "layout_ad_density": 4.0, "accountability": 3.0, "transparency": 5.0, "authority": 4.0, "avg_ge_freq": 0.6667, "relative_se_rank": 0.375, "normalized_reciprocal_se_rank": 0.19079685746352412, "reciprocal_se_rank": 0.05555555555555555, "percentage_ge_sources_not_in_se_sources": 0.0, "percentage_ge_sources_in_se_sources": 100.0 }, { "model_name": "gpt-5", "query_id": 69, "query_type": "Pinocchios", "num_sources": 1, "unweighted_mean_score": 4.25, "weighted_total_content_score": 85.26315789473684, "semantic_relevance": 5.0, "factual_accuracy": 4.0, "freshness": 5.0, "objectivity_tone": 4.0, "layout_ad_density": 2.0, "accountability": 4.0, "transparency": 5.0, "authority": 5.0, "avg_ge_freq": 1.0, "relative_se_rank": 0.06666666666666667, "normalized_reciprocal_se_rank": 1.0, "reciprocal_se_rank": 0.25, "percentage_ge_sources_not_in_se_sources": 0.0, "percentage_ge_sources_in_se_sources": 100.0 }, { "model_name": "grok-4.1-fast-non-reasoning", "query_id": 3, "query_type": "VACOS", "num_sources": 5, "unweighted_mean_score": 4.225, "weighted_total_content_score": 85.26315789473684, "semantic_relevance": 4.6, "factual_accuracy": 5.0, "freshness": 4.4, "objectivity_tone": 3.8, "layout_ad_density": 3.2, "accountability": 4.2, "transparency": 4.0, "authority": 4.6, "avg_ge_freq": 0.5333399999999999, "relative_se_rank": 1.390909090909091, "normalized_reciprocal_se_rank": 0.2692063492063492, "reciprocal_se_rank": 0.07439667128987518, "percentage_ge_sources_not_in_se_sources": 60.0, "percentage_ge_sources_in_se_sources": 40.0 }, { "model_name": "grok-4.1-fast-non-reasoning", "query_id": 94, "query_type": "QuoraQuestions", "num_sources": 5, "unweighted_mean_score": 4.25, "weighted_total_content_score": 85.26315789473684, "semantic_relevance": 4.8, "factual_accuracy": 4.6, "freshness": 4.2, "objectivity_tone": 3.6, "layout_ad_density": 2.8, "accountability": 4.8, "transparency": 4.8, "authority": 4.4, "avg_ge_freq": 0.93334, "relative_se_rank": 1.565, "normalized_reciprocal_se_rank": 0.24740740740740738, "reciprocal_se_rank": 0.06915857605177993, "percentage_ge_sources_not_in_se_sources": 60.0, "percentage_ge_sources_in_se_sources": 40.0 }, { "model_name": "tavily", "query_id": 5, "query_type": "VACOS", "num_sources": 3, "unweighted_mean_score": 4.25, "weighted_total_content_score": 85.26315789473684, "semantic_relevance": 4.333333333333333, "factual_accuracy": 5.0, "freshness": 4.666666666666667, "objectivity_tone": 3.6666666666666665, "layout_ad_density": 2.6666666666666665, "accountability": 4.666666666666667, "transparency": 4.333333333333333, "authority": 4.666666666666667, "avg_ge_freq": 1.0, "relative_se_rank": 1.0852713178294573, "normalized_reciprocal_se_rank": 0.16468994246772026, "reciprocal_se_rank": 0.0492822920007386, "percentage_ge_sources_not_in_se_sources": 33.333333333333336, "percentage_ge_sources_in_se_sources": 66.66666666666666 }, { "model_name": "tavily", "query_id": 25, "query_type": "DebateQA", "num_sources": 2, "unweighted_mean_score": 4.1875, "weighted_total_content_score": 85.26315789473684, "semantic_relevance": 4.5, "factual_accuracy": 5.0, "freshness": 5.0, "objectivity_tone": 4.5, "layout_ad_density": 2.5, "accountability": 4.0, "transparency": 4.5, "authority": 3.5, "avg_ge_freq": 1.0, "relative_se_rank": 2.4390243902439024, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "tavily", "query_id": 75, "query_type": "Pinocchios", "num_sources": 3, "unweighted_mean_score": 4.291666666666667, "weighted_total_content_score": 85.26315789473684, "semantic_relevance": 4.0, "factual_accuracy": 4.333333333333333, "freshness": 4.333333333333333, "objectivity_tone": 4.0, "layout_ad_density": 3.6666666666666665, "accountability": 5.0, "transparency": 4.666666666666667, "authority": 4.333333333333333, "avg_ge_freq": 1.0, "relative_se_rank": 0.24822695035460993, "normalized_reciprocal_se_rank": 0.3405519844913784, "reciprocal_se_rank": 0.09154040404040403, "percentage_ge_sources_not_in_se_sources": 0.0, "percentage_ge_sources_in_se_sources": 100.0 }, { "model_name": "deepseek-chat-tavily", "query_id": 31, "query_type": "DebateQA", "num_sources": 5, "unweighted_mean_score": 4.25, "weighted_total_content_score": 85.05263157894737, "semantic_relevance": 4.6, "factual_accuracy": 4.2, "freshness": 4.2, "objectivity_tone": 4.0, "layout_ad_density": 4.2, "accountability": 3.6, "transparency": 4.8, "authority": 4.4, "avg_ge_freq": 1.0, "relative_se_rank": 0.9555555555555557, "normalized_reciprocal_se_rank": 0.382010582010582, "reciprocal_se_rank": 0.10150254276467867, "percentage_ge_sources_not_in_se_sources": 40.0, "percentage_ge_sources_in_se_sources": 60.0 }, { "model_name": "deepseek-reasoning-tavily", "query_id": 32, "query_type": "DebateQA", "num_sources": 5, "unweighted_mean_score": 4.25, "weighted_total_content_score": 85.05263157894737, "semantic_relevance": 4.4, "factual_accuracy": 4.2, "freshness": 3.0, "objectivity_tone": 4.2, "layout_ad_density": 4.2, "accountability": 4.2, "transparency": 5.0, "authority": 4.8, "avg_ge_freq": 1.0, "relative_se_rank": 1.3866666666666667, "normalized_reciprocal_se_rank": 0.25137085137085136, "reciprocal_se_rank": 0.0701109570041609, "percentage_ge_sources_not_in_se_sources": 60.0, "percentage_ge_sources_in_se_sources": 40.0 }, { "model_name": "exa", "query_id": 20, "query_type": "VACOS", "num_sources": 5, "unweighted_mean_score": 4.25, "weighted_total_content_score": 85.05263157894737, "semantic_relevance": 3.2, "factual_accuracy": 5.0, "freshness": 5.0, "objectivity_tone": 4.6, "layout_ad_density": 3.4, "accountability": 3.8, "transparency": 4.8, "authority": 4.2, "avg_ge_freq": 1.0, "relative_se_rank": 2.272727272727273, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "gensee", "query_id": 99, "query_type": "QuoraQuestions", "num_sources": 5, "unweighted_mean_score": 4.2, "weighted_total_content_score": 85.05263157894737, "semantic_relevance": 5.0, "factual_accuracy": 4.4, "freshness": 4.4, "objectivity_tone": 4.2, "layout_ad_density": 3.8, "accountability": 4.0, "transparency": 3.6, "authority": 4.2, "avg_ge_freq": 0.5333399999999999, "relative_se_rank": 1.6, "normalized_reciprocal_se_rank": 0.17900383141762455, "reciprocal_se_rank": 0.052721794442584534, "percentage_ge_sources_not_in_se_sources": 60.0, "percentage_ge_sources_in_se_sources": 40.0 }, { "model_name": "google-search", "query_id": 8, "query_type": "VACOS", "num_sources": 5, "unweighted_mean_score": 4.2, "weighted_total_content_score": 85.05263157894737, "semantic_relevance": 5.0, "factual_accuracy": 4.8, "freshness": 4.4, "objectivity_tone": 3.8, "layout_ad_density": 3.0, "accountability": 4.0, "transparency": 4.0, "authority": 4.6, "avg_ge_freq": null, "relative_se_rank": null, "normalized_reciprocal_se_rank": null, "reciprocal_se_rank": null, "percentage_ge_sources_not_in_se_sources": null, "percentage_ge_sources_in_se_sources": null }, { "model_name": "google-search", "query_id": 99, "query_type": "QuoraQuestions", "num_sources": 5, "unweighted_mean_score": 4.225, "weighted_total_content_score": 85.05263157894737, "semantic_relevance": 4.8, "factual_accuracy": 4.6, "freshness": 4.4, "objectivity_tone": 3.8, "layout_ad_density": 3.8, "accountability": 4.2, "transparency": 4.0, "authority": 4.2, "avg_ge_freq": null, "relative_se_rank": null, "normalized_reciprocal_se_rank": null, "reciprocal_se_rank": null, "percentage_ge_sources_not_in_se_sources": null, "percentage_ge_sources_in_se_sources": null }, { "model_name": "Gemini-2.5-Flash-Preview", "query_id": 73, "query_type": "Pinocchios", "num_sources": 5, "unweighted_mean_score": 4.225, "weighted_total_content_score": 85.05263157894736, "semantic_relevance": 4.2, "factual_accuracy": 4.2, "freshness": 3.0, "objectivity_tone": 4.8, "layout_ad_density": 3.6, "accountability": 4.6, "transparency": 4.8, "authority": 4.6, "avg_ge_freq": null, "relative_se_rank": null, "normalized_reciprocal_se_rank": null, "reciprocal_se_rank": null, "percentage_ge_sources_not_in_se_sources": null, "percentage_ge_sources_in_se_sources": null }, { "model_name": "google-search", "query_id": 45, "query_type": "HotpotQA", "num_sources": 5, "unweighted_mean_score": 4.225, "weighted_total_content_score": 85.05263157894736, "semantic_relevance": 4.6, "factual_accuracy": 4.6, "freshness": 4.8, "objectivity_tone": 4.0, "layout_ad_density": 3.6, "accountability": 3.8, "transparency": 4.0, "authority": 4.4, "avg_ge_freq": null, "relative_se_rank": null, "normalized_reciprocal_se_rank": null, "reciprocal_se_rank": null, "percentage_ge_sources_not_in_se_sources": null, "percentage_ge_sources_in_se_sources": null }, { "model_name": "gpt-5", "query_id": 84, "query_type": "QuoraQuestions", "num_sources": 5, "unweighted_mean_score": 4.275, "weighted_total_content_score": 85.05263157894736, "semantic_relevance": 3.4, "factual_accuracy": 4.4, "freshness": 4.6, "objectivity_tone": 4.6, "layout_ad_density": 3.8, "accountability": 4.2, "transparency": 4.6, "authority": 4.6, "avg_ge_freq": 0.3333, "relative_se_rank": 2.272727272727273, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "deepseek-reasoning-gensee", "query_id": 61, "query_type": "Pinocchios", "num_sources": 4, "unweighted_mean_score": 4.21875, "weighted_total_content_score": 85.0, "semantic_relevance": 4.5, "factual_accuracy": 4.75, "freshness": 2.25, "objectivity_tone": 4.0, "layout_ad_density": 4.5, "accountability": 4.25, "transparency": 4.75, "authority": 4.75, "avg_ge_freq": 1.0, "relative_se_rank": 2.2222222222222223, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "exa", "query_id": 24, "query_type": "DebateQA", "num_sources": 4, "unweighted_mean_score": 4.28125, "weighted_total_content_score": 85.0, "semantic_relevance": 4.25, "factual_accuracy": 4.0, "freshness": 4.5, "objectivity_tone": 4.0, "layout_ad_density": 4.0, "accountability": 4.0, "transparency": 5.0, "authority": 4.5, "avg_ge_freq": 1.0, "relative_se_rank": 0.6063829787234043, "normalized_reciprocal_se_rank": 0.38874859708193044, "reciprocal_se_rank": 0.10312162891046385, "percentage_ge_sources_not_in_se_sources": 25.0, "percentage_ge_sources_in_se_sources": 75.0 }, { "model_name": "grok-4.1-fast-non-reasoning", "query_id": 34, "query_type": "DebateQA", "num_sources": 4, "unweighted_mean_score": 4.28125, "weighted_total_content_score": 85.0, "semantic_relevance": 4.5, "factual_accuracy": 4.25, "freshness": 4.5, "objectivity_tone": 3.5, "layout_ad_density": 4.25, "accountability": 4.0, "transparency": 4.75, "authority": 4.5, "avg_ge_freq": 0.83335, "relative_se_rank": 0.63, "normalized_reciprocal_se_rank": 0.26771746771746774, "reciprocal_se_rank": 0.07403890607774102, "percentage_ge_sources_not_in_se_sources": 25.0, "percentage_ge_sources_in_se_sources": 75.0 }, { "model_name": "claude", "query_id": 97, "query_type": "QuoraQuestions", "num_sources": 3, "unweighted_mean_score": 4.25, "weighted_total_content_score": 84.91228070175438, "semantic_relevance": 4.666666666666667, "factual_accuracy": 4.0, "freshness": 5.0, "objectivity_tone": 4.0, "layout_ad_density": 3.0, "accountability": 4.333333333333333, "transparency": 4.666666666666667, "authority": 4.333333333333333, "avg_ge_freq": 0.5555666666666667, "relative_se_rank": 1.7249999999999999, "normalized_reciprocal_se_rank": 0.12525252525252525, "reciprocal_se_rank": 0.039805825242718446, "percentage_ge_sources_not_in_se_sources": 66.66666666666667, "percentage_ge_sources_in_se_sources": 33.33333333333333 }, { "model_name": "Gemini-2.5-Flash-Preview", "query_id": 54, "query_type": "HotpotQA", "num_sources": 5, "unweighted_mean_score": 4.175, "weighted_total_content_score": 84.84210526315789, "semantic_relevance": 4.4, "factual_accuracy": 4.6, "freshness": 4.6, "objectivity_tone": 4.8, "layout_ad_density": 4.0, "accountability": 3.2, "transparency": 4.0, "authority": 3.8, "avg_ge_freq": null, "relative_se_rank": null, "normalized_reciprocal_se_rank": null, "reciprocal_se_rank": null, "percentage_ge_sources_not_in_se_sources": null, "percentage_ge_sources_in_se_sources": null }, { "model_name": "Gemini-3-Pro-Preview", "query_id": 6, "query_type": "VACOS", "num_sources": 5, "unweighted_mean_score": 4.225, "weighted_total_content_score": 84.84210526315789, "semantic_relevance": 4.2, "factual_accuracy": 5.0, "freshness": 5.0, "objectivity_tone": 3.8, "layout_ad_density": 3.0, "accountability": 4.2, "transparency": 4.4, "authority": 4.2, "avg_ge_freq": 0.3333, "relative_se_rank": 2.9411764705882355, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "gpt-5", "query_id": 71, "query_type": "Pinocchios", "num_sources": 5, "unweighted_mean_score": 4.225, "weighted_total_content_score": 84.84210526315789, "semantic_relevance": 4.0, "factual_accuracy": 4.6, "freshness": 3.2, "objectivity_tone": 4.4, "layout_ad_density": 3.0, "accountability": 4.8, "transparency": 5.0, "authority": 4.8, "avg_ge_freq": 0.39997999999999995, "relative_se_rank": 2.1675675675675676, "normalized_reciprocal_se_rank": 0.2, "reciprocal_se_rank": 0.05776699029126213, "percentage_ge_sources_not_in_se_sources": 80.0, "percentage_ge_sources_in_se_sources": 20.0 }, { "model_name": "grok-4.1-fast-non-reasoning", "query_id": 61, "query_type": "Pinocchios", "num_sources": 5, "unweighted_mean_score": 4.225, "weighted_total_content_score": 84.84210526315789, "semantic_relevance": 4.2, "factual_accuracy": 4.8, "freshness": 2.4, "objectivity_tone": 4.0, "layout_ad_density": 4.4, "accountability": 4.6, "transparency": 5.0, "authority": 4.4, "avg_ge_freq": 0.93334, "relative_se_rank": 2.2222222222222223, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "claude", "query_id": 4, "query_type": "VACOS", "num_sources": 4, "unweighted_mean_score": 4.21875, "weighted_total_content_score": 84.73684210526316, "semantic_relevance": 4.75, "factual_accuracy": 4.5, "freshness": 5.0, "objectivity_tone": 3.75, "layout_ad_density": 2.75, "accountability": 3.75, "transparency": 4.75, "authority": 4.5, "avg_ge_freq": 0.666675, "relative_se_rank": 2.4390243902439024, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "claude", "query_id": 36, "query_type": "DebateQA", "num_sources": 2, "unweighted_mean_score": 4.1875, "weighted_total_content_score": 84.73684210526315, "semantic_relevance": 5.0, "factual_accuracy": 4.5, "freshness": 5.0, "objectivity_tone": 4.0, "layout_ad_density": 3.5, "accountability": 4.5, "transparency": 3.0, "authority": 4.0, "avg_ge_freq": 0.5, "relative_se_rank": 1.4024390243902438, "normalized_reciprocal_se_rank": 0.09539842873176206, "reciprocal_se_rank": 0.03263214670981661, "percentage_ge_sources_not_in_se_sources": 50.0, "percentage_ge_sources_in_se_sources": 50.0 }, { "model_name": "gpt-4o", "query_id": 63, "query_type": "Pinocchios", "num_sources": 2, "unweighted_mean_score": 4.1875, "weighted_total_content_score": 84.73684210526315, "semantic_relevance": 5.0, "factual_accuracy": 5.0, "freshness": 2.5, "objectivity_tone": 3.5, "layout_ad_density": 3.5, "accountability": 5.0, "transparency": 4.5, "authority": 4.5, "avg_ge_freq": 0.83335, "relative_se_rank": 4.761904761904762, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "gpt-5", "query_id": 42, "query_type": "HotpotQA", "num_sources": 2, "unweighted_mean_score": 4.3125, "weighted_total_content_score": 84.73684210526315, "semantic_relevance": 2.0, "factual_accuracy": 4.5, "freshness": 5.0, "objectivity_tone": 5.0, "layout_ad_density": 4.5, "accountability": 3.5, "transparency": 5.0, "authority": 5.0, "avg_ge_freq": 0.5, "relative_se_rank": 1.030612244897959, "normalized_reciprocal_se_rank": 0.5, "reciprocal_se_rank": 0.12985436893203883, "percentage_ge_sources_not_in_se_sources": 50.0, "percentage_ge_sources_in_se_sources": 50.0 }, { "model_name": "grok-4.1-fast-non-reasoning", "query_id": 54, "query_type": "HotpotQA", "num_sources": 2, "unweighted_mean_score": 4.1875, "weighted_total_content_score": 84.73684210526315, "semantic_relevance": 4.5, "factual_accuracy": 4.5, "freshness": 5.0, "objectivity_tone": 4.5, "layout_ad_density": 3.5, "accountability": 2.5, "transparency": 4.5, "authority": 4.5, "avg_ge_freq": 0.3333, "relative_se_rank": 2.0408163265306123, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "tavily", "query_id": 6, "query_type": "VACOS", "num_sources": 4, "unweighted_mean_score": 4.25, "weighted_total_content_score": 84.73684210526315, "semantic_relevance": 4.25, "factual_accuracy": 4.75, "freshness": 5.0, "objectivity_tone": 3.5, "layout_ad_density": 3.25, "accountability": 4.25, "transparency": 4.5, "authority": 4.5, "avg_ge_freq": 1.0, "relative_se_rank": 2.235294117647059, "normalized_reciprocal_se_rank": 0.13852813852813853, "reciprocal_se_rank": 0.04299583911234396, "percentage_ge_sources_not_in_se_sources": 75.0, "percentage_ge_sources_in_se_sources": 25.0 }, { "model_name": "Gemini-2.5-Flash-Preview", "query_id": 14, "query_type": "VACOS", "num_sources": 5, "unweighted_mean_score": 4.25, "weighted_total_content_score": 84.63157894736841, "semantic_relevance": 4.0, "factual_accuracy": 4.4, "freshness": 5.0, "objectivity_tone": 4.0, "layout_ad_density": 3.4, "accountability": 4.4, "transparency": 4.6, "authority": 4.2, "avg_ge_freq": null, "relative_se_rank": null, "normalized_reciprocal_se_rank": null, "reciprocal_se_rank": null, "percentage_ge_sources_not_in_se_sources": null, "percentage_ge_sources_in_se_sources": null }, { "model_name": "Gemini-3-Flash-Preview", "query_id": 32, "query_type": "DebateQA", "num_sources": 5, "unweighted_mean_score": 4.2, "weighted_total_content_score": 84.63157894736841, "semantic_relevance": 4.6, "factual_accuracy": 4.6, "freshness": 3.8, "objectivity_tone": 4.0, "layout_ad_density": 4.6, "accountability": 4.0, "transparency": 4.0, "authority": 4.0, "avg_ge_freq": 0.46665999999999996, "relative_se_rank": 1.8044444444444445, "normalized_reciprocal_se_rank": 0.08439955106621773, "reciprocal_se_rank": 0.029989212513484353, "percentage_ge_sources_not_in_se_sources": 80.0, "percentage_ge_sources_in_se_sources": 20.0 }, { "model_name": "Gemini-3-Flash-Preview", "query_id": 51, "query_type": "HotpotQA", "num_sources": 5, "unweighted_mean_score": 4.296428571428572, "weighted_total_content_score": 84.63157894736841, "semantic_relevance": 4.0, "factual_accuracy": 5.0, "freshness": 4.4, "objectivity_tone": 4.6, "layout_ad_density": 4.0, "accountability": 4.2, "transparency": 4.0, "authority": 4.0, "avg_ge_freq": 0.53332, "relative_se_rank": 2.127659574468085, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "deepseek-chat-tavily", "query_id": 32, "query_type": "DebateQA", "num_sources": 5, "unweighted_mean_score": 4.25, "weighted_total_content_score": 84.63157894736841, "semantic_relevance": 4.4, "factual_accuracy": 4.0, "freshness": 4.0, "objectivity_tone": 4.0, "layout_ad_density": 4.0, "accountability": 4.2, "transparency": 4.8, "authority": 4.6, "avg_ge_freq": 1.0, "relative_se_rank": 1.408888888888889, "normalized_reciprocal_se_rank": 0.1357704024370691, "reciprocal_se_rank": 0.042333179226383105, "percentage_ge_sources_not_in_se_sources": 60.0, "percentage_ge_sources_in_se_sources": 40.0 }, { "model_name": "gensee", "query_id": 22, "query_type": "DebateQA", "num_sources": 5, "unweighted_mean_score": 4.2, "weighted_total_content_score": 84.63157894736841, "semantic_relevance": 5.0, "factual_accuracy": 4.4, "freshness": 3.8, "objectivity_tone": 3.8, "layout_ad_density": 3.6, "accountability": 4.4, "transparency": 4.4, "authority": 4.2, "avg_ge_freq": 0.73332, "relative_se_rank": 1.6666666666666667, "normalized_reciprocal_se_rank": 0.04524826659214011, "reciprocal_se_rank": 0.020581500952965702, "percentage_ge_sources_not_in_se_sources": 60.0, "percentage_ge_sources_in_se_sources": 40.0 }, { "model_name": "gensee", "query_id": 72, "query_type": "Pinocchios", "num_sources": 5, "unweighted_mean_score": 4.15, "weighted_total_content_score": 84.63157894736841, "semantic_relevance": 4.6, "factual_accuracy": 4.8, "freshness": 3.6, "objectivity_tone": 4.6, "layout_ad_density": 3.8, "accountability": 3.8, "transparency": 4.0, "authority": 4.0, "avg_ge_freq": 0.3333, "relative_se_rank": 1.4829268292682927, "normalized_reciprocal_se_rank": 0.3306397306397306, "reciprocal_se_rank": 0.08915857605177993, "percentage_ge_sources_not_in_se_sources": 60.0, "percentage_ge_sources_in_se_sources": 40.0 }, { "model_name": "google-search", "query_id": 3, "query_type": "VACOS", "num_sources": 5, "unweighted_mean_score": 4.15, "weighted_total_content_score": 84.63157894736841, "semantic_relevance": 5.0, "factual_accuracy": 5.0, "freshness": 5.0, "objectivity_tone": 4.0, "layout_ad_density": 2.2, "accountability": 4.0, "transparency": 3.6, "authority": 4.4, "avg_ge_freq": null, "relative_se_rank": null, "normalized_reciprocal_se_rank": null, "reciprocal_se_rank": null, "percentage_ge_sources_not_in_se_sources": null, "percentage_ge_sources_in_se_sources": null }, { "model_name": "google-search", "query_id": 36, "query_type": "DebateQA", "num_sources": 5, "unweighted_mean_score": 4.2, "weighted_total_content_score": 84.63157894736841, "semantic_relevance": 5.0, "factual_accuracy": 4.2, "freshness": 4.2, "objectivity_tone": 4.0, "layout_ad_density": 3.4, "accountability": 4.2, "transparency": 4.4, "authority": 4.2, "avg_ge_freq": null, "relative_se_rank": null, "normalized_reciprocal_se_rank": null, "reciprocal_se_rank": null, "percentage_ge_sources_not_in_se_sources": null, "percentage_ge_sources_in_se_sources": null }, { "model_name": "gpt-4o", "query_id": 94, "query_type": "QuoraQuestions", "num_sources": 5, "unweighted_mean_score": 4.2, "weighted_total_content_score": 84.63157894736841, "semantic_relevance": 5.0, "factual_accuracy": 4.6, "freshness": 4.8, "objectivity_tone": 3.6, "layout_ad_density": 2.0, "accountability": 4.8, "transparency": 4.4, "authority": 4.4, "avg_ge_freq": 0.46665999999999996, "relative_se_rank": 1.6, "normalized_reciprocal_se_rank": 0.22975206611570248, "reciprocal_se_rank": 0.0649161518093557, "percentage_ge_sources_not_in_se_sources": 60.0, "percentage_ge_sources_in_se_sources": 40.0 }, { "model_name": "gpt-5", "query_id": 17, "query_type": "VACOS", "num_sources": 5, "unweighted_mean_score": 4.225, "weighted_total_content_score": 84.63157894736841, "semantic_relevance": 4.0, "factual_accuracy": 4.8, "freshness": 5.0, "objectivity_tone": 4.0, "layout_ad_density": 2.6, "accountability": 4.8, "transparency": 4.2, "authority": 4.4, "avg_ge_freq": 0.80002, "relative_se_rank": 2.3255813953488373, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "gpt-5", "query_id": 48, "query_type": "HotpotQA", "num_sources": 5, "unweighted_mean_score": 4.275, "weighted_total_content_score": 84.63157894736841, "semantic_relevance": 2.6, "factual_accuracy": 4.4, "freshness": 5.0, "objectivity_tone": 5.0, "layout_ad_density": 3.6, "accountability": 3.6, "transparency": 5.0, "authority": 5.0, "avg_ge_freq": 0.3333, "relative_se_rank": 2.272727272727273, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "tavily", "query_id": 38, "query_type": "DebateQA", "num_sources": 5, "unweighted_mean_score": 4.2, "weighted_total_content_score": 84.63157894736841, "semantic_relevance": 4.8, "factual_accuracy": 4.4, "freshness": 4.2, "objectivity_tone": 4.0, "layout_ad_density": 3.8, "accountability": 4.0, "transparency": 4.0, "authority": 4.4, "avg_ge_freq": 1.0, "relative_se_rank": 1.3695652173913042, "normalized_reciprocal_se_rank": 0.17804713804713804, "reciprocal_se_rank": 0.052491909385113264, "percentage_ge_sources_not_in_se_sources": 60.0, "percentage_ge_sources_in_se_sources": 40.0 }, { "model_name": "Gemini-2.5-Flash-Preview", "query_id": 80, "query_type": "Pinocchios", "num_sources": 3, "unweighted_mean_score": 4.208333333333333, "weighted_total_content_score": 84.56140350877193, "semantic_relevance": 4.0, "factual_accuracy": 4.333333333333333, "freshness": 5.0, "objectivity_tone": 4.666666666666667, "layout_ad_density": 3.6666666666666665, "accountability": 4.666666666666667, "transparency": 3.6666666666666665, "authority": 3.6666666666666665, "avg_ge_freq": null, "relative_se_rank": null, "normalized_reciprocal_se_rank": null, "reciprocal_se_rank": null, "percentage_ge_sources_not_in_se_sources": null, "percentage_ge_sources_in_se_sources": null }, { "model_name": "claude", "query_id": 81, "query_type": "QuoraQuestions", "num_sources": 3, "unweighted_mean_score": 4.166666666666667, "weighted_total_content_score": 84.56140350877193, "semantic_relevance": 5.0, "factual_accuracy": 4.666666666666667, "freshness": 4.0, "objectivity_tone": 4.0, "layout_ad_density": 4.0, "accountability": 3.6666666666666665, "transparency": 4.0, "authority": 4.0, "avg_ge_freq": 1.0, "relative_se_rank": 1.6341463414634145, "normalized_reciprocal_se_rank": 0.3333333333333333, "reciprocal_se_rank": 0.08980582524271845, "percentage_ge_sources_not_in_se_sources": 66.66666666666667, "percentage_ge_sources_in_se_sources": 33.33333333333333 }, { "model_name": "deepseek-chat-gensee", "query_id": 72, "query_type": "Pinocchios", "num_sources": 3, "unweighted_mean_score": 4.208333333333333, "weighted_total_content_score": 84.56140350877193, "semantic_relevance": 4.333333333333333, "factual_accuracy": 4.333333333333333, "freshness": 4.0, "objectivity_tone": 4.333333333333333, "layout_ad_density": 4.333333333333333, "accountability": 4.0, "transparency": 4.333333333333333, "authority": 4.0, "avg_ge_freq": 1.0, "relative_se_rank": 0.8699186991869919, "normalized_reciprocal_se_rank": 0.4024370691037357, "reciprocal_se_rank": 0.10641084912929573, "percentage_ge_sources_not_in_se_sources": 33.333333333333336, "percentage_ge_sources_in_se_sources": 66.66666666666666 }, { "model_name": "Gemini-2.5-Flash-Preview", "query_id": 93, "query_type": "QuoraQuestions", "num_sources": 4, "unweighted_mean_score": 4.65625, "weighted_total_content_score": 84.4736842105263, "semantic_relevance": 4.75, "factual_accuracy": 4.75, "freshness": 4.333333333333333, "objectivity_tone": 4.75, "layout_ad_density": 4.25, "accountability": 5.0, "transparency": 5.0, "authority": 5.0, "avg_ge_freq": null, "relative_se_rank": null, "normalized_reciprocal_se_rank": null, "reciprocal_se_rank": null, "percentage_ge_sources_not_in_se_sources": null, "percentage_ge_sources_in_se_sources": null }, { "model_name": "deepseek-reasoning-tavily", "query_id": 21, "query_type": "DebateQA", "num_sources": 4, "unweighted_mean_score": 4.15625, "weighted_total_content_score": 84.4736842105263, "semantic_relevance": 5.0, "factual_accuracy": 4.75, "freshness": 3.75, "objectivity_tone": 4.0, "layout_ad_density": 3.0, "accountability": 4.5, "transparency": 4.0, "authority": 4.25, "avg_ge_freq": 1.0, "relative_se_rank": 0.109375, "normalized_reciprocal_se_rank": 0.6589786756453423, "reciprocal_se_rank": 0.16805555555555557, "percentage_ge_sources_not_in_se_sources": 0.0, "percentage_ge_sources_in_se_sources": 100.0 }, { "model_name": "exa", "query_id": 96, "query_type": "QuoraQuestions", "num_sources": 4, "unweighted_mean_score": 4.15625, "weighted_total_content_score": 84.4736842105263, "semantic_relevance": 4.25, "factual_accuracy": 5.0, "freshness": 3.5, "objectivity_tone": 4.5, "layout_ad_density": 4.75, "accountability": 3.25, "transparency": 4.0, "authority": 4.0, "avg_ge_freq": 1.0, "relative_se_rank": 0.13068181818181818, "normalized_reciprocal_se_rank": 0.4884680134680135, "reciprocal_se_rank": 0.12708333333333333, "percentage_ge_sources_not_in_se_sources": 0.0, "percentage_ge_sources_in_se_sources": 100.0 }, { "model_name": "gensee", "query_id": 26, "query_type": "DebateQA", "num_sources": 4, "unweighted_mean_score": 4.1875, "weighted_total_content_score": 84.4736842105263, "semantic_relevance": 5.0, "factual_accuracy": 4.25, "freshness": 4.75, "objectivity_tone": 4.0, "layout_ad_density": 3.5, "accountability": 3.5, "transparency": 4.25, "authority": 4.25, "avg_ge_freq": 0.49997499999999995, "relative_se_rank": 1.75, "normalized_reciprocal_se_rank": 0.25, "reciprocal_se_rank": 0.06978155339805825, "percentage_ge_sources_not_in_se_sources": 75.0, "percentage_ge_sources_in_se_sources": 25.0 }, { "model_name": "gpt-5", "query_id": 85, "query_type": "QuoraQuestions", "num_sources": 4, "unweighted_mean_score": 4.28125, "weighted_total_content_score": 84.4736842105263, "semantic_relevance": 3.0, "factual_accuracy": 4.0, "freshness": 5.0, "objectivity_tone": 4.75, "layout_ad_density": 3.25, "accountability": 4.25, "transparency": 5.0, "authority": 5.0, "avg_ge_freq": 0.83335, "relative_se_rank": 2.127659574468085, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "claude", "query_id": 15, "query_type": "VACOS", "num_sources": 5, "unweighted_mean_score": 4.2, "weighted_total_content_score": 84.42105263157895, "semantic_relevance": 4.6, "factual_accuracy": 4.4, "freshness": 5.0, "objectivity_tone": 4.0, "layout_ad_density": 2.4, "accountability": 4.8, "transparency": 4.0, "authority": 4.4, "avg_ge_freq": 0.8, "relative_se_rank": 2.6315789473684212, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "gensee", "query_id": 21, "query_type": "DebateQA", "num_sources": 5, "unweighted_mean_score": 4.2, "weighted_total_content_score": 84.42105263157895, "semantic_relevance": 4.6, "factual_accuracy": 4.6, "freshness": 4.4, "objectivity_tone": 3.8, "layout_ad_density": 3.0, "accountability": 4.8, "transparency": 4.2, "authority": 4.2, "avg_ge_freq": 0.53332, "relative_se_rank": 0.26666666666666666, "normalized_reciprocal_se_rank": 0.5253434177572108, "reciprocal_se_rank": 0.13594417077175697, "percentage_ge_sources_not_in_se_sources": 0.0, "percentage_ge_sources_in_se_sources": 100.0 }, { "model_name": "gensee", "query_id": 30, "query_type": "DebateQA", "num_sources": 5, "unweighted_mean_score": 4.2, "weighted_total_content_score": 84.42105263157895, "semantic_relevance": 4.8, "factual_accuracy": 4.2, "freshness": 5.0, "objectivity_tone": 4.0, "layout_ad_density": 3.4, "accountability": 4.0, "transparency": 4.0, "authority": 4.2, "avg_ge_freq": 0.73334, "relative_se_rank": 0.9782608695652174, "normalized_reciprocal_se_rank": 0.21095194367921644, "reciprocal_se_rank": 0.06039864666078258, "percentage_ge_sources_not_in_se_sources": 40.0, "percentage_ge_sources_in_se_sources": 60.0 }, { "model_name": "gpt-5", "query_id": 35, "query_type": "DebateQA", "num_sources": 5, "unweighted_mean_score": 4.665, "weighted_total_content_score": 84.42105263157895, "semantic_relevance": 3.25, "factual_accuracy": 5.0, "freshness": 5.0, "objectivity_tone": 5.0, "layout_ad_density": 4.6, "accountability": 4.8, "transparency": 5.0, "authority": 4.8, "avg_ge_freq": 0.66666, "relative_se_rank": 2.1739130434782608, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "Gemini-3-Flash-Preview", "query_id": 20, "query_type": "VACOS", "num_sources": 5, "unweighted_mean_score": 4.225, "weighted_total_content_score": 84.42105263157893, "semantic_relevance": 3.6, "factual_accuracy": 5.0, "freshness": 5.0, "objectivity_tone": 4.0, "layout_ad_density": 3.0, "accountability": 5.0, "transparency": 4.2, "authority": 4.0, "avg_ge_freq": 0.39997999999999995, "relative_se_rank": 2.272727272727273, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "Perplexity-Sonar-Pro", "query_id": 98, "query_type": "QuoraQuestions", "num_sources": 5, "unweighted_mean_score": 4.225, "weighted_total_content_score": 84.42105263157893, "semantic_relevance": 4.4, "factual_accuracy": 4.4, "freshness": 4.8, "objectivity_tone": 3.8, "layout_ad_density": 3.6, "accountability": 4.4, "transparency": 4.4, "authority": 4.0, "avg_ge_freq": 0.8666600000000001, "relative_se_rank": 0.22000000000000003, "normalized_reciprocal_se_rank": 0.5411971936109867, "reciprocal_se_rank": 0.13975369458128079, "percentage_ge_sources_not_in_se_sources": 0.0, "percentage_ge_sources_in_se_sources": 100.0 }, { "model_name": "gensee", "query_id": 34, "query_type": "DebateQA", "num_sources": 5, "unweighted_mean_score": 4.225, "weighted_total_content_score": 84.42105263157893, "semantic_relevance": 4.4, "factual_accuracy": 4.2, "freshness": 4.4, "objectivity_tone": 4.0, "layout_ad_density": 3.6, "accountability": 3.8, "transparency": 4.6, "authority": 4.8, "avg_ge_freq": 0.73332, "relative_se_rank": 0.9120000000000001, "normalized_reciprocal_se_rank": 0.18775101441768108, "reciprocal_se_rank": 0.054823666085802004, "percentage_ge_sources_not_in_se_sources": 40.0, "percentage_ge_sources_in_se_sources": 60.0 }, { "model_name": "gensee", "query_id": 96, "query_type": "QuoraQuestions", "num_sources": 5, "unweighted_mean_score": 4.175, "weighted_total_content_score": 84.42105263157893, "semantic_relevance": 5.0, "factual_accuracy": 4.4, "freshness": 3.8, "objectivity_tone": 4.0, "layout_ad_density": 3.2, "accountability": 4.0, "transparency": 4.6, "authority": 4.4, "avg_ge_freq": 0.6, "relative_se_rank": 1.431818181818182, "normalized_reciprocal_se_rank": 0.20232323232323238, "reciprocal_se_rank": 0.0583252427184466, "percentage_ge_sources_not_in_se_sources": 60.0, "percentage_ge_sources_in_se_sources": 40.0 }, { "model_name": "grok-4.1-fast-non-reasoning", "query_id": 4, "query_type": "VACOS", "num_sources": 5, "unweighted_mean_score": 4.2, "weighted_total_content_score": 84.42105263157893, "semantic_relevance": 4.0, "factual_accuracy": 5.0, "freshness": 5.0, "objectivity_tone": 4.0, "layout_ad_density": 3.6, "accountability": 3.4, "transparency": 4.2, "authority": 4.4, "avg_ge_freq": 0.46663999999999994, "relative_se_rank": 2.4390243902439024, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "tavily", "query_id": 46, "query_type": "HotpotQA", "num_sources": 5, "unweighted_mean_score": 4.175, "weighted_total_content_score": 84.42105263157893, "semantic_relevance": 4.2, "factual_accuracy": 4.6, "freshness": 4.0, "objectivity_tone": 4.6, "layout_ad_density": 3.6, "accountability": 4.0, "transparency": 4.2, "authority": 4.2, "avg_ge_freq": 1.0, "relative_se_rank": 0.9422222222222223, "normalized_reciprocal_se_rank": 0.3374218374218374, "reciprocal_se_rank": 0.09078825705039298, "percentage_ge_sources_not_in_se_sources": 40.0, "percentage_ge_sources_in_se_sources": 60.0 }, { "model_name": "Gemini-3-Pro-Preview", "query_id": 37, "query_type": "DebateQA", "num_sources": 5, "unweighted_mean_score": 4.2, "weighted_total_content_score": 84.21052631578948, "semantic_relevance": 4.6, "factual_accuracy": 4.4, "freshness": 4.8, "objectivity_tone": 3.8, "layout_ad_density": 3.8, "accountability": 3.6, "transparency": 4.4, "authority": 4.2, "avg_ge_freq": 0.46665999999999996, "relative_se_rank": 1.657142857142857, "normalized_reciprocal_se_rank": 0.08439955106621773, "reciprocal_se_rank": 0.029989212513484353, "percentage_ge_sources_not_in_se_sources": 80.0, "percentage_ge_sources_in_se_sources": 20.0 }, { "model_name": "gensee", "query_id": 80, "query_type": "Pinocchios", "num_sources": 3, "unweighted_mean_score": 4.166666666666667, "weighted_total_content_score": 84.21052631578948, "semantic_relevance": 4.666666666666667, "factual_accuracy": 4.666666666666667, "freshness": 4.0, "objectivity_tone": 4.0, "layout_ad_density": 3.6666666666666665, "accountability": 3.6666666666666665, "transparency": 4.333333333333333, "authority": 4.333333333333333, "avg_ge_freq": 0.6666666666666666, "relative_se_rank": 2.127659574468085, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "Gemini-2.5-Flash-Preview", "query_id": 20, "query_type": "VACOS", "num_sources": 3, "unweighted_mean_score": 4.166666666666667, "weighted_total_content_score": 84.21052631578947, "semantic_relevance": 3.6666666666666665, "factual_accuracy": 5.0, "freshness": 5.0, "objectivity_tone": 4.666666666666667, "layout_ad_density": 3.0, "accountability": 3.3333333333333335, "transparency": 4.333333333333333, "authority": 4.333333333333333, "avg_ge_freq": null, "relative_se_rank": null, "normalized_reciprocal_se_rank": null, "reciprocal_se_rank": null, "percentage_ge_sources_not_in_se_sources": null, "percentage_ge_sources_in_se_sources": null }, { "model_name": "Gemini-3-Pro-Preview", "query_id": 67, "query_type": "Pinocchios", "num_sources": 4, "unweighted_mean_score": 4.1875, "weighted_total_content_score": 84.21052631578947, "semantic_relevance": 5.0, "factual_accuracy": 4.5, "freshness": 4.0, "objectivity_tone": 3.5, "layout_ad_density": 3.0, "accountability": 3.75, "transparency": 4.75, "authority": 5.0, "avg_ge_freq": 0.833325, "relative_se_rank": 0.8351063829787234, "normalized_reciprocal_se_rank": 0.21147407884996539, "reciprocal_se_rank": 0.06052411117996741, "percentage_ge_sources_not_in_se_sources": 25.0, "percentage_ge_sources_in_se_sources": 75.0 }, { "model_name": "Perplexity-Sonar-Pro", "query_id": 67, "query_type": "Pinocchios", "num_sources": 1, "unweighted_mean_score": 4.125, "weighted_total_content_score": 84.21052631578947, "semantic_relevance": 5.0, "factual_accuracy": 5.0, "freshness": 2.0, "objectivity_tone": 4.0, "layout_ad_density": 4.0, "accountability": 4.0, "transparency": 4.0, "authority": 5.0, "avg_ge_freq": 1.0, "relative_se_rank": 0.14893617021276595, "normalized_reciprocal_se_rank": 0.3757575757575758, "reciprocal_se_rank": 0.1, "percentage_ge_sources_not_in_se_sources": 0.0, "percentage_ge_sources_in_se_sources": 100.0 }, { "model_name": "gensee", "query_id": 10, "query_type": "VACOS", "num_sources": 5, "unweighted_mean_score": 4.175, "weighted_total_content_score": 84.21052631578947, "semantic_relevance": 4.6, "factual_accuracy": 4.6, "freshness": 5.0, "objectivity_tone": 4.0, "layout_ad_density": 2.8, "accountability": 4.2, "transparency": 3.8, "authority": 4.4, "avg_ge_freq": 0.73334, "relative_se_rank": 2.1739130434782608, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "gensee", "query_id": 46, "query_type": "HotpotQA", "num_sources": 2, "unweighted_mean_score": 4.125, "weighted_total_content_score": 84.21052631578947, "semantic_relevance": 5.0, "factual_accuracy": 4.5, "freshness": 5.0, "objectivity_tone": 4.5, "layout_ad_density": 4.5, "accountability": 3.0, "transparency": 3.0, "authority": 3.5, "avg_ge_freq": 0.5, "relative_se_rank": 1.2, "normalized_reciprocal_se_rank": 0.16896235078053262, "reciprocal_se_rank": 0.05030891438658429, "percentage_ge_sources_not_in_se_sources": 50.0, "percentage_ge_sources_in_se_sources": 50.0 }, { "model_name": "gensee", "query_id": 52, "query_type": "HotpotQA", "num_sources": 1, "unweighted_mean_score": 4.25, "weighted_total_content_score": 84.21052631578947, "semantic_relevance": 4.0, "factual_accuracy": 4.0, "freshness": 5.0, "objectivity_tone": 4.0, "layout_ad_density": 4.0, "accountability": 4.0, "transparency": 4.0, "authority": 5.0, "avg_ge_freq": 0.3333, "relative_se_rank": 2.0408163265306123, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "google-search", "query_id": 67, "query_type": "Pinocchios", "num_sources": 4, "unweighted_mean_score": 4.254464285714286, "weighted_total_content_score": 84.21052631578947, "semantic_relevance": 5.0, "factual_accuracy": 5.0, "freshness": 2.0, "objectivity_tone": 4.0, "layout_ad_density": 4.0, "accountability": 4.25, "transparency": 4.75, "authority": 5.0, "avg_ge_freq": null, "relative_se_rank": null, "normalized_reciprocal_se_rank": null, "reciprocal_se_rank": null, "percentage_ge_sources_not_in_se_sources": null, "percentage_ge_sources_in_se_sources": null }, { "model_name": "google-search", "query_id": 96, "query_type": "QuoraQuestions", "num_sources": 5, "unweighted_mean_score": 4.15, "weighted_total_content_score": 84.21052631578947, "semantic_relevance": 4.8, "factual_accuracy": 4.6, "freshness": 3.8, "objectivity_tone": 4.2, "layout_ad_density": 4.4, "accountability": 3.2, "transparency": 4.0, "authority": 4.2, "avg_ge_freq": null, "relative_se_rank": null, "normalized_reciprocal_se_rank": null, "reciprocal_se_rank": null, "percentage_ge_sources_not_in_se_sources": null, "percentage_ge_sources_in_se_sources": null }, { "model_name": "gpt-4o", "query_id": 23, "query_type": "DebateQA", "num_sources": 2, "unweighted_mean_score": 4.25, "weighted_total_content_score": 84.21052631578947, "semantic_relevance": 4.5, "factual_accuracy": 4.0, "freshness": 5.0, "objectivity_tone": 3.5, "layout_ad_density": 4.0, "accountability": 5.0, "transparency": 4.0, "authority": 4.0, "avg_ge_freq": 0.3333, "relative_se_rank": 1.4875, "normalized_reciprocal_se_rank": 0.07438016528925621, "reciprocal_se_rank": 0.02758164165931156, "percentage_ge_sources_not_in_se_sources": 50.0, "percentage_ge_sources_in_se_sources": 50.0 }, { "model_name": "tavily", "query_id": 81, "query_type": "QuoraQuestions", "num_sources": 5, "unweighted_mean_score": 4.175, "weighted_total_content_score": 84.21052631578945, "semantic_relevance": 4.2, "factual_accuracy": 4.8, "freshness": 3.8, "objectivity_tone": 4.2, "layout_ad_density": 5.0, "accountability": 3.6, "transparency": 4.0, "authority": 3.8, "avg_ge_freq": 1.0, "relative_se_rank": 0.5414634146341464, "normalized_reciprocal_se_rank": 0.5849831649831649, "reciprocal_se_rank": 0.15027508090614888, "percentage_ge_sources_not_in_se_sources": 20.0, "percentage_ge_sources_in_se_sources": 80.0 }, { "model_name": "Gemini-3-Flash-Preview", "query_id": 74, "query_type": "Pinocchios", "num_sources": 5, "unweighted_mean_score": 4.175, "weighted_total_content_score": 84.0, "semantic_relevance": 4.6, "factual_accuracy": 4.4, "freshness": 3.4, "objectivity_tone": 4.0, "layout_ad_density": 3.6, "accountability": 4.6, "transparency": 4.4, "authority": 4.4, "avg_ge_freq": 0.60002, "relative_se_rank": 1.7234042553191489, "normalized_reciprocal_se_rank": 0.09595959595959597, "reciprocal_se_rank": 0.032766990291262135, "percentage_ge_sources_not_in_se_sources": 80.0, "percentage_ge_sources_in_se_sources": 20.0 }, { "model_name": "exa", "query_id": 30, "query_type": "DebateQA", "num_sources": 5, "unweighted_mean_score": 4.2, "weighted_total_content_score": 84.0, "semantic_relevance": 4.8, "factual_accuracy": 4.0, "freshness": 5.0, "objectivity_tone": 3.8, "layout_ad_density": 3.2, "accountability": 4.6, "transparency": 4.0, "authority": 4.2, "avg_ge_freq": 1.0, "relative_se_rank": 0.10434782608695652, "normalized_reciprocal_se_rank": 0.5247170113836781, "reciprocal_se_rank": 0.13579365079365077, "percentage_ge_sources_not_in_se_sources": 0.0, "percentage_ge_sources_in_se_sources": 100.0 }, { "model_name": "gpt-4o", "query_id": 15, "query_type": "VACOS", "num_sources": 5, "unweighted_mean_score": 4.15, "weighted_total_content_score": 84.0, "semantic_relevance": 4.8, "factual_accuracy": 4.6, "freshness": 5.0, "objectivity_tone": 4.0, "layout_ad_density": 2.6, "accountability": 4.0, "transparency": 4.0, "authority": 4.2, "avg_ge_freq": 0.66668, "relative_se_rank": 2.6315789473684212, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "tavily", "query_id": 65, "query_type": "Pinocchios", "num_sources": 5, "unweighted_mean_score": 4.275, "weighted_total_content_score": 84.0, "semantic_relevance": 1.8, "factual_accuracy": 4.6, "freshness": 5.0, "objectivity_tone": 5.0, "layout_ad_density": 4.6, "accountability": 4.4, "transparency": 4.4, "authority": 4.4, "avg_ge_freq": 1.0, "relative_se_rank": 1.5317073170731708, "normalized_reciprocal_se_rank": 0.16676656676656676, "reciprocal_se_rank": 0.049781286674490555, "percentage_ge_sources_not_in_se_sources": 60.0, "percentage_ge_sources_in_se_sources": 40.0 }, { "model_name": "grok-4.1-fast-non-reasoning", "query_id": 2, "query_type": "VACOS", "num_sources": 5, "unweighted_mean_score": 4.175, "weighted_total_content_score": 83.99999999999997, "semantic_relevance": 4.2, "factual_accuracy": 4.8, "freshness": 4.6, "objectivity_tone": 4.0, "layout_ad_density": 3.4, "accountability": 3.8, "transparency": 4.2, "authority": 4.4, "avg_ge_freq": 0.8, "relative_se_rank": 2.2222222222222223, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "deepseek-chat-gensee", "query_id": 2, "query_type": "VACOS", "num_sources": 4, "unweighted_mean_score": 4.1875, "weighted_total_content_score": 83.94736842105263, "semantic_relevance": 4.5, "factual_accuracy": 4.5, "freshness": 5.0, "objectivity_tone": 3.75, "layout_ad_density": 4.0, "accountability": 3.5, "transparency": 4.0, "authority": 4.25, "avg_ge_freq": 1.0, "relative_se_rank": 2.2222222222222223, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "exa", "query_id": 2, "query_type": "VACOS", "num_sources": 4, "unweighted_mean_score": 4.28125, "weighted_total_content_score": 83.94736842105263, "semantic_relevance": 3.5, "factual_accuracy": 5.0, "freshness": 5.0, "objectivity_tone": 2.75, "layout_ad_density": 3.5, "accountability": 4.5, "transparency": 5.0, "authority": 5.0, "avg_ge_freq": 1.0, "relative_se_rank": 2.2222222222222223, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "exa", "query_id": 76, "query_type": "Pinocchios", "num_sources": 4, "unweighted_mean_score": 4.21875, "weighted_total_content_score": 83.94736842105263, "semantic_relevance": 4.25, "factual_accuracy": 4.0, "freshness": 4.25, "objectivity_tone": 4.0, "layout_ad_density": 3.5, "accountability": 4.75, "transparency": 4.5, "authority": 4.5, "avg_ge_freq": 1.0, "relative_se_rank": 1.3095238095238095, "normalized_reciprocal_se_rank": 0.2871900826446281, "reciprocal_se_rank": 0.0787180052956752, "percentage_ge_sources_not_in_se_sources": 50.0, "percentage_ge_sources_in_se_sources": 50.0 }, { "model_name": "gpt-4o", "query_id": 10, "query_type": "VACOS", "num_sources": 4, "unweighted_mean_score": 4.15625, "weighted_total_content_score": 83.94736842105263, "semantic_relevance": 4.75, "factual_accuracy": 4.75, "freshness": 5.0, "objectivity_tone": 3.75, "layout_ad_density": 3.25, "accountability": 3.25, "transparency": 4.25, "authority": 4.25, "avg_ge_freq": 0.41664999999999996, "relative_se_rank": 1.8641304347826086, "normalized_reciprocal_se_rank": 0.012516469038208168, "reciprocal_se_rank": 0.012716336006753905, "percentage_ge_sources_not_in_se_sources": 75.0, "percentage_ge_sources_in_se_sources": 25.0 }, { "model_name": "tavily", "query_id": 1, "query_type": "VACOS", "num_sources": 4, "unweighted_mean_score": 4.1875, "weighted_total_content_score": 83.94736842105263, "semantic_relevance": 4.0, "factual_accuracy": 4.5, "freshness": 5.0, "objectivity_tone": 4.25, "layout_ad_density": 2.75, "accountability": 4.75, "transparency": 4.0, "authority": 4.25, "avg_ge_freq": 1.0, "relative_se_rank": 1.6888888888888889, "normalized_reciprocal_se_rank": 0.13852813852813853, "reciprocal_se_rank": 0.04299583911234396, "percentage_ge_sources_not_in_se_sources": 75.0, "percentage_ge_sources_in_se_sources": 25.0 }, { "model_name": "exa", "query_id": 12, "query_type": "VACOS", "num_sources": 3, "unweighted_mean_score": 4.208333333333333, "weighted_total_content_score": 83.85964912280701, "semantic_relevance": 3.0, "factual_accuracy": 4.666666666666667, "freshness": 5.0, "objectivity_tone": 4.666666666666667, "layout_ad_density": 2.6666666666666665, "accountability": 3.6666666666666665, "transparency": 5.0, "authority": 5.0, "avg_ge_freq": 1.0, "relative_se_rank": 2.5, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "google-search", "query_id": 38, "query_type": "DebateQA", "num_sources": 3, "unweighted_mean_score": 4.166666666666667, "weighted_total_content_score": 83.85964912280701, "semantic_relevance": 5.0, "factual_accuracy": 4.0, "freshness": 4.333333333333333, "objectivity_tone": 4.0, "layout_ad_density": 4.0, "accountability": 3.6666666666666665, "transparency": 4.333333333333333, "authority": 4.0, "avg_ge_freq": null, "relative_se_rank": null, "normalized_reciprocal_se_rank": null, "reciprocal_se_rank": null, "percentage_ge_sources_not_in_se_sources": null, "percentage_ge_sources_in_se_sources": null }, { "model_name": "Gemini-2.5-Flash-Preview", "query_id": 98, "query_type": "QuoraQuestions", "num_sources": 5, "unweighted_mean_score": 4.225, "weighted_total_content_score": 83.78947368421053, "semantic_relevance": 4.2, "factual_accuracy": 4.2, "freshness": 5.0, "objectivity_tone": 3.6, "layout_ad_density": 3.2, "accountability": 4.8, "transparency": 4.4, "authority": 4.4, "avg_ge_freq": null, "relative_se_rank": null, "normalized_reciprocal_se_rank": null, "reciprocal_se_rank": null, "percentage_ge_sources_not_in_se_sources": null, "percentage_ge_sources_in_se_sources": null }, { "model_name": "Gemini-2.5-Flash-Preview", "query_id": 22, "query_type": "DebateQA", "num_sources": 5, "unweighted_mean_score": 4.15, "weighted_total_content_score": 83.78947368421052, "semantic_relevance": 4.4, "factual_accuracy": 4.2, "freshness": 4.4, "objectivity_tone": 4.6, "layout_ad_density": 3.2, "accountability": 4.6, "transparency": 3.8, "authority": 4.0, "avg_ge_freq": null, "relative_se_rank": null, "normalized_reciprocal_se_rank": null, "reciprocal_se_rank": null, "percentage_ge_sources_not_in_se_sources": null, "percentage_ge_sources_in_se_sources": null }, { "model_name": "Gemini-3-Pro-Preview", "query_id": 4, "query_type": "VACOS", "num_sources": 5, "unweighted_mean_score": 4.15, "weighted_total_content_score": 83.78947368421052, "semantic_relevance": 4.4, "factual_accuracy": 4.8, "freshness": 4.8, "objectivity_tone": 4.0, "layout_ad_density": 2.8, "accountability": 4.0, "transparency": 4.4, "authority": 4.0, "avg_ge_freq": 0.39997999999999995, "relative_se_rank": 2.097560975609756, "normalized_reciprocal_se_rank": 0.01714110805019896, "reciprocal_se_rank": 0.013827596351868195, "percentage_ge_sources_not_in_se_sources": 80.0, "percentage_ge_sources_in_se_sources": 20.0 }, { "model_name": "Gemini-3-Pro-Preview", "query_id": 89, "query_type": "QuoraQuestions", "num_sources": 5, "unweighted_mean_score": 4.225, "weighted_total_content_score": 83.78947368421052, "semantic_relevance": 3.6, "factual_accuracy": 4.4, "freshness": 5.0, "objectivity_tone": 4.0, "layout_ad_density": 3.8, "accountability": 4.0, "transparency": 4.4, "authority": 4.6, "avg_ge_freq": 0.46665999999999996, "relative_se_rank": 1.5170731707317073, "normalized_reciprocal_se_rank": 0.21966329966329967, "reciprocal_se_rank": 0.062491909385113266, "percentage_ge_sources_not_in_se_sources": 60.0, "percentage_ge_sources_in_se_sources": 40.0 }, { "model_name": "google-search", "query_id": 5, "query_type": "VACOS", "num_sources": 5, "unweighted_mean_score": 4.15, "weighted_total_content_score": 83.78947368421052, "semantic_relevance": 4.4, "factual_accuracy": 4.8, "freshness": 5.0, "objectivity_tone": 4.0, "layout_ad_density": 3.4, "accountability": 3.6, "transparency": 4.0, "authority": 4.0, "avg_ge_freq": null, "relative_se_rank": null, "normalized_reciprocal_se_rank": null, "reciprocal_se_rank": null, "percentage_ge_sources_not_in_se_sources": null, "percentage_ge_sources_in_se_sources": null }, { "model_name": "gpt-4o", "query_id": 2, "query_type": "VACOS", "num_sources": 5, "unweighted_mean_score": 4.175, "weighted_total_content_score": 83.78947368421052, "semantic_relevance": 4.0, "factual_accuracy": 4.8, "freshness": 5.0, "objectivity_tone": 4.0, "layout_ad_density": 3.0, "accountability": 4.2, "transparency": 4.2, "authority": 4.2, "avg_ge_freq": 0.6, "relative_se_rank": 2.2222222222222223, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "gpt-5", "query_id": 92, "query_type": "QuoraQuestions", "num_sources": 5, "unweighted_mean_score": 4.63, "weighted_total_content_score": 83.78947368421052, "semantic_relevance": 3.5, "factual_accuracy": 5.0, "freshness": 4.4, "objectivity_tone": 5.0, "layout_ad_density": 4.4, "accountability": 4.8, "transparency": 5.0, "authority": 5.0, "avg_ge_freq": 0.46665999999999996, "relative_se_rank": 2.0408163265306123, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "grok-4.1-fast-non-reasoning", "query_id": 57, "query_type": "HotpotQA", "num_sources": 5, "unweighted_mean_score": 4.15, "weighted_total_content_score": 83.78947368421052, "semantic_relevance": 4.2, "factual_accuracy": 4.8, "freshness": 4.0, "objectivity_tone": 4.2, "layout_ad_density": 2.8, "accountability": 4.2, "transparency": 4.6, "authority": 4.4, "avg_ge_freq": 0.3333, "relative_se_rank": 2.127659574468085, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "tavily", "query_id": 74, "query_type": "Pinocchios", "num_sources": 5, "unweighted_mean_score": 4.2, "weighted_total_content_score": 83.78947368421052, "semantic_relevance": 3.4, "factual_accuracy": 4.6, "freshness": 2.8, "objectivity_tone": 4.4, "layout_ad_density": 3.8, "accountability": 4.6, "transparency": 5.0, "authority": 5.0, "avg_ge_freq": 1.0, "relative_se_rank": 1.3617021276595744, "normalized_reciprocal_se_rank": 0.18993746993746996, "reciprocal_se_rank": 0.05534905224225613, "percentage_ge_sources_not_in_se_sources": 60.0, "percentage_ge_sources_in_se_sources": 40.0 }, { "model_name": "gpt-4o", "query_id": 67, "query_type": "Pinocchios", "num_sources": 4, "unweighted_mean_score": 4.09375, "weighted_total_content_score": 83.6842105263158, "semantic_relevance": 5.0, "factual_accuracy": 5.0, "freshness": 2.0, "objectivity_tone": 4.0, "layout_ad_density": 4.0, "accountability": 3.5, "transparency": 4.25, "authority": 5.0, "avg_ge_freq": 0.583325, "relative_se_rank": 0.6170212765957447, "normalized_reciprocal_se_rank": 0.4284205693296603, "reciprocal_se_rank": 0.11265445719329215, "percentage_ge_sources_not_in_se_sources": 25.0, "percentage_ge_sources_in_se_sources": 75.0 }, { "model_name": "claude", "query_id": 14, "query_type": "VACOS", "num_sources": 4, "unweighted_mean_score": 4.15625, "weighted_total_content_score": 83.68421052631578, "semantic_relevance": 4.75, "factual_accuracy": 4.25, "freshness": 5.0, "objectivity_tone": 4.0, "layout_ad_density": 3.0, "accountability": 4.25, "transparency": 4.0, "authority": 4.0, "avg_ge_freq": 1.0, "relative_se_rank": 2.2222222222222223, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "deepseek-reasoning-tavily", "query_id": 82, "query_type": "QuoraQuestions", "num_sources": 2, "unweighted_mean_score": 4.1875, "weighted_total_content_score": 83.68421052631578, "semantic_relevance": 4.0, "factual_accuracy": 4.5, "freshness": 5.0, "objectivity_tone": 4.0, "layout_ad_density": 2.5, "accountability": 4.5, "transparency": 4.5, "authority": 4.5, "avg_ge_freq": 1.0, "relative_se_rank": 0.625, "normalized_reciprocal_se_rank": 0.1186552239183818, "reciprocal_se_rank": 0.03822055137844611, "percentage_ge_sources_not_in_se_sources": 0.0, "percentage_ge_sources_in_se_sources": 100.0 }, { "model_name": "gensee", "query_id": 5, "query_type": "VACOS", "num_sources": 4, "unweighted_mean_score": 4.15625, "weighted_total_content_score": 83.68421052631578, "semantic_relevance": 4.5, "factual_accuracy": 4.75, "freshness": 5.0, "objectivity_tone": 3.75, "layout_ad_density": 2.75, "accountability": 4.5, "transparency": 4.0, "authority": 4.0, "avg_ge_freq": 0.83335, "relative_se_rank": 1.7790697674418605, "normalized_reciprocal_se_rank": 0.10549943883277216, "reciprocal_se_rank": 0.035059331175836025, "percentage_ge_sources_not_in_se_sources": 75.0, "percentage_ge_sources_in_se_sources": 25.0 }, { "model_name": "gensee", "query_id": 78, "query_type": "Pinocchios", "num_sources": 4, "unweighted_mean_score": 4.428571428571429, "weighted_total_content_score": 83.68421052631578, "semantic_relevance": 4.5, "factual_accuracy": 4.5, "freshness": 4.25, "objectivity_tone": 4.5, "layout_ad_density": 2.0, "accountability": 5.0, "transparency": 4.75, "authority": 4.5, "avg_ge_freq": 0.41664999999999996, "relative_se_rank": 1.6063829787234043, "normalized_reciprocal_se_rank": 0.197979797979798, "reciprocal_se_rank": 0.05728155339805825, "percentage_ge_sources_not_in_se_sources": 75.0, "percentage_ge_sources_in_se_sources": 25.0 }, { "model_name": "google-search", "query_id": 60, "query_type": "HotpotQA", "num_sources": 4, "unweighted_mean_score": 4.15625, "weighted_total_content_score": 83.68421052631578, "semantic_relevance": 4.0, "factual_accuracy": 5.0, "freshness": 5.0, "objectivity_tone": 4.0, "layout_ad_density": 4.0, "accountability": 3.25, "transparency": 3.75, "authority": 4.25, "avg_ge_freq": null, "relative_se_rank": null, "normalized_reciprocal_se_rank": null, "reciprocal_se_rank": null, "percentage_ge_sources_not_in_se_sources": null, "percentage_ge_sources_in_se_sources": null }, { "model_name": "grok-4.1-fast-non-reasoning", "query_id": 85, "query_type": "QuoraQuestions", "num_sources": 2, "unweighted_mean_score": 4.1875, "weighted_total_content_score": 83.68421052631578, "semantic_relevance": 4.5, "factual_accuracy": 4.5, "freshness": 5.0, "objectivity_tone": 3.5, "layout_ad_density": 3.0, "accountability": 4.0, "transparency": 4.5, "authority": 4.5, "avg_ge_freq": 0.6667, "relative_se_rank": 2.127659574468085, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "google-search", "query_id": 76, "query_type": "Pinocchios", "num_sources": 5, "unweighted_mean_score": 4.2, "weighted_total_content_score": 83.57894736842105, "semantic_relevance": 4.4, "factual_accuracy": 4.4, "freshness": 3.4, "objectivity_tone": 3.4, "layout_ad_density": 4.0, "accountability": 4.4, "transparency": 4.8, "authority": 4.8, "avg_ge_freq": null, "relative_se_rank": null, "normalized_reciprocal_se_rank": null, "reciprocal_se_rank": null, "percentage_ge_sources_not_in_se_sources": null, "percentage_ge_sources_in_se_sources": null }, { "model_name": "Gemini-2.5-Flash-Preview", "query_id": 6, "query_type": "VACOS", "num_sources": 5, "unweighted_mean_score": 4.175, "weighted_total_content_score": 83.57894736842104, "semantic_relevance": 4.2, "factual_accuracy": 4.6, "freshness": 5.0, "objectivity_tone": 3.8, "layout_ad_density": 2.8, "accountability": 4.8, "transparency": 4.2, "authority": 4.0, "avg_ge_freq": null, "relative_se_rank": null, "normalized_reciprocal_se_rank": null, "reciprocal_se_rank": null, "percentage_ge_sources_not_in_se_sources": null, "percentage_ge_sources_in_se_sources": null }, { "model_name": "Gemini-3-Pro-Preview", "query_id": 21, "query_type": "DebateQA", "num_sources": 5, "unweighted_mean_score": 4.125, "weighted_total_content_score": 83.57894736842104, "semantic_relevance": 5.0, "factual_accuracy": 4.4, "freshness": 4.4, "objectivity_tone": 4.0, "layout_ad_density": 3.6, "accountability": 3.6, "transparency": 4.0, "authority": 4.0, "avg_ge_freq": 0.8, "relative_se_rank": 1.2833333333333337, "normalized_reciprocal_se_rank": 0.24278338945005612, "reciprocal_se_rank": 0.06804746494066882, "percentage_ge_sources_not_in_se_sources": 60.0, "percentage_ge_sources_in_se_sources": 40.0 }, { "model_name": "Gemini-3-Pro-Preview", "query_id": 35, "query_type": "DebateQA", "num_sources": 5, "unweighted_mean_score": 4.225, "weighted_total_content_score": 83.57894736842104, "semantic_relevance": 4.2, "factual_accuracy": 4.4, "freshness": 5.0, "objectivity_tone": 3.2, "layout_ad_density": 3.4, "accountability": 4.4, "transparency": 4.8, "authority": 4.4, "avg_ge_freq": 0.60002, "relative_se_rank": 1.008695652173913, "normalized_reciprocal_se_rank": 0.3165795889933821, "reciprocal_se_rank": 0.085780046869769, "percentage_ge_sources_not_in_se_sources": 40.0, "percentage_ge_sources_in_se_sources": 60.0 }, { "model_name": "gpt-4o", "query_id": 35, "query_type": "DebateQA", "num_sources": 5, "unweighted_mean_score": 4.175, "weighted_total_content_score": 83.57894736842104, "semantic_relevance": 4.6, "factual_accuracy": 4.4, "freshness": 5.0, "objectivity_tone": 3.6, "layout_ad_density": 4.2, "accountability": 3.6, "transparency": 3.8, "authority": 4.2, "avg_ge_freq": 0.80002, "relative_se_rank": 1.4217391304347826, "normalized_reciprocal_se_rank": 0.22061999303378613, "reciprocal_se_rank": 0.06272179444258454, "percentage_ge_sources_not_in_se_sources": 60.0, "percentage_ge_sources_in_se_sources": 40.0 }, { "model_name": "google-search", "query_id": 57, "query_type": "HotpotQA", "num_sources": 4, "unweighted_mean_score": 4.125, "weighted_total_content_score": 83.42105263157895, "semantic_relevance": 5.0, "factual_accuracy": 4.5, "freshness": 4.75, "objectivity_tone": 3.75, "layout_ad_density": 2.25, "accountability": 3.75, "transparency": 4.25, "authority": 4.75, "avg_ge_freq": null, "relative_se_rank": null, "normalized_reciprocal_se_rank": null, "reciprocal_se_rank": null, "percentage_ge_sources_not_in_se_sources": null, "percentage_ge_sources_in_se_sources": null }, { "model_name": "Gemini-3-Flash-Preview", "query_id": 93, "query_type": "QuoraQuestions", "num_sources": 5, "unweighted_mean_score": 4.1, "weighted_total_content_score": 83.36842105263158, "semantic_relevance": 4.6, "factual_accuracy": 4.4, "freshness": 4.8, "objectivity_tone": 4.6, "layout_ad_density": 3.8, "accountability": 3.6, "transparency": 3.4, "authority": 3.6, "avg_ge_freq": 0.3333, "relative_se_rank": 2.272727272727273, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "claude", "query_id": 16, "query_type": "VACOS", "num_sources": 5, "unweighted_mean_score": 4.125, "weighted_total_content_score": 83.36842105263158, "semantic_relevance": 4.6, "factual_accuracy": 4.8, "freshness": 5.0, "objectivity_tone": 3.8, "layout_ad_density": 3.0, "accountability": 4.6, "transparency": 3.6, "authority": 3.6, "avg_ge_freq": 0.60002, "relative_se_rank": 1.9904761904761905, "normalized_reciprocal_se_rank": 0.031553631553631556, "reciprocal_se_rank": 0.01729079981507166, "percentage_ge_sources_not_in_se_sources": 80.0, "percentage_ge_sources_in_se_sources": 20.0 }, { "model_name": "gensee", "query_id": 74, "query_type": "Pinocchios", "num_sources": 5, "unweighted_mean_score": 4.125, "weighted_total_content_score": 83.36842105263158, "semantic_relevance": 4.8, "factual_accuracy": 4.6, "freshness": 3.8, "objectivity_tone": 3.8, "layout_ad_density": 3.2, "accountability": 3.6, "transparency": 4.4, "authority": 4.8, "avg_ge_freq": 0.3333, "relative_se_rank": 2.127659574468085, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "google-search", "query_id": 34, "query_type": "DebateQA", "num_sources": 5, "unweighted_mean_score": 4.175, "weighted_total_content_score": 83.36842105263158, "semantic_relevance": 4.2, "factual_accuracy": 4.6, "freshness": 4.6, "objectivity_tone": 3.6, "layout_ad_density": 3.8, "accountability": 3.6, "transparency": 4.4, "authority": 4.6, "avg_ge_freq": null, "relative_se_rank": null, "normalized_reciprocal_se_rank": null, "reciprocal_se_rank": null, "percentage_ge_sources_not_in_se_sources": null, "percentage_ge_sources_in_se_sources": null }, { "model_name": "gpt-5", "query_id": 27, "query_type": "DebateQA", "num_sources": 5, "unweighted_mean_score": 4.6, "weighted_total_content_score": 83.36842105263158, "semantic_relevance": 4.75, "factual_accuracy": 5.0, "freshness": 4.0, "objectivity_tone": 4.75, "layout_ad_density": 4.0, "accountability": 4.8, "transparency": 4.6, "authority": 4.8, "avg_ge_freq": 0.46665999999999996, "relative_se_rank": 2.380952380952381, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "grok-4.1-fast-non-reasoning", "query_id": 8, "query_type": "VACOS", "num_sources": 5, "unweighted_mean_score": 4.125, "weighted_total_content_score": 83.36842105263158, "semantic_relevance": 4.6, "factual_accuracy": 4.6, "freshness": 4.8, "objectivity_tone": 4.0, "layout_ad_density": 2.2, "accountability": 4.6, "transparency": 4.0, "authority": 4.2, "avg_ge_freq": 1.0, "relative_se_rank": 1.8930232558139537, "normalized_reciprocal_se_rank": 0.07515151515151516, "reciprocal_se_rank": 0.027766990291262134, "percentage_ge_sources_not_in_se_sources": 80.0, "percentage_ge_sources_in_se_sources": 20.0 }, { "model_name": "grok-4.1-fast-non-reasoning", "query_id": 17, "query_type": "VACOS", "num_sources": 5, "unweighted_mean_score": 4.125, "weighted_total_content_score": 83.36842105263158, "semantic_relevance": 4.6, "factual_accuracy": 4.6, "freshness": 5.0, "objectivity_tone": 4.0, "layout_ad_density": 2.4, "accountability": 4.2, "transparency": 4.0, "authority": 4.2, "avg_ge_freq": 0.60002, "relative_se_rank": 1.8790697674418606, "normalized_reciprocal_se_rank": 0.11082251082251082, "reciprocal_se_rank": 0.0363384188626907, "percentage_ge_sources_not_in_se_sources": 80.0, "percentage_ge_sources_in_se_sources": 20.0 }, { "model_name": "Gemini-3-Flash-Preview", "query_id": 77, "query_type": "Pinocchios", "num_sources": 5, "unweighted_mean_score": 4.2, "weighted_total_content_score": 83.15789473684211, "semantic_relevance": 3.8, "factual_accuracy": 4.2, "freshness": 5.0, "objectivity_tone": 3.8, "layout_ad_density": 4.0, "accountability": 4.8, "transparency": 4.0, "authority": 4.0, "avg_ge_freq": 0.39997999999999995, "relative_se_rank": 2.2222222222222223, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "Gemini-3-Pro-Preview", "query_id": 99, "query_type": "QuoraQuestions", "num_sources": 5, "unweighted_mean_score": 4.15, "weighted_total_content_score": 83.15789473684211, "semantic_relevance": 4.0, "factual_accuracy": 4.4, "freshness": 5.0, "objectivity_tone": 4.2, "layout_ad_density": 3.4, "accountability": 4.4, "transparency": 4.0, "authority": 3.8, "avg_ge_freq": 0.46665999999999996, "relative_se_rank": 1.6487804878048782, "normalized_reciprocal_se_rank": 0.06802740044119356, "reciprocal_se_rank": 0.026055127775917862, "percentage_ge_sources_not_in_se_sources": 60.0, "percentage_ge_sources_in_se_sources": 40.0 }, { "model_name": "Perplexity-Sonar-Pro", "query_id": 15, "query_type": "VACOS", "num_sources": 5, "unweighted_mean_score": 4.125, "weighted_total_content_score": 83.15789473684211, "semantic_relevance": 4.6, "factual_accuracy": 4.8, "freshness": 5.0, "objectivity_tone": 3.6, "layout_ad_density": 2.4, "accountability": 4.0, "transparency": 4.2, "authority": 4.4, "avg_ge_freq": 0.73334, "relative_se_rank": 2.6315789473684212, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "claude", "query_id": 68, "query_type": "Pinocchios", "num_sources": 2, "unweighted_mean_score": 4.3125, "weighted_total_content_score": 83.15789473684211, "semantic_relevance": 3.0, "factual_accuracy": 4.0, "freshness": 5.0, "objectivity_tone": 3.0, "layout_ad_density": 5.0, "accountability": 5.0, "transparency": 5.0, "authority": 4.5, "avg_ge_freq": 1.0, "relative_se_rank": 1.272727272727273, "normalized_reciprocal_se_rank": 0.11851851851851852, "reciprocal_se_rank": 0.038187702265372166, "percentage_ge_sources_not_in_se_sources": 50.0, "percentage_ge_sources_in_se_sources": 50.0 }, { "model_name": "google-search", "query_id": 65, "query_type": "Pinocchios", "num_sources": 5, "unweighted_mean_score": 4.175, "weighted_total_content_score": 83.15789473684211, "semantic_relevance": 3.2, "factual_accuracy": 4.6, "freshness": 4.4, "objectivity_tone": 4.4, "layout_ad_density": 4.2, "accountability": 3.6, "transparency": 4.4, "authority": 4.6, "avg_ge_freq": null, "relative_se_rank": null, "normalized_reciprocal_se_rank": null, "reciprocal_se_rank": null, "percentage_ge_sources_not_in_se_sources": null, "percentage_ge_sources_in_se_sources": null }, { "model_name": "tavily", "query_id": 12, "query_type": "VACOS", "num_sources": 2, "unweighted_mean_score": 4.1875, "weighted_total_content_score": 83.15789473684211, "semantic_relevance": 3.0, "factual_accuracy": 4.5, "freshness": 5.0, "objectivity_tone": 4.5, "layout_ad_density": 2.5, "accountability": 4.0, "transparency": 5.0, "authority": 5.0, "avg_ge_freq": 1.0, "relative_se_rank": 1.3375, "normalized_reciprocal_se_rank": 0.1878787878787879, "reciprocal_se_rank": 0.054854368932038836, "percentage_ge_sources_not_in_se_sources": 50.0, "percentage_ge_sources_in_se_sources": 50.0 }, { "model_name": "tavily", "query_id": 40, "query_type": "DebateQA", "num_sources": 4, "unweighted_mean_score": 4.21875, "weighted_total_content_score": 83.15789473684211, "semantic_relevance": 3.75, "factual_accuracy": 4.0, "freshness": 4.5, "objectivity_tone": 3.75, "layout_ad_density": 4.25, "accountability": 4.75, "transparency": 4.25, "authority": 4.5, "avg_ge_freq": 1.0, "relative_se_rank": 1.90625, "normalized_reciprocal_se_rank": 0.11994949494949496, "reciprocal_se_rank": 0.03853155339805825, "percentage_ge_sources_not_in_se_sources": 75.0, "percentage_ge_sources_in_se_sources": 25.0 }, { "model_name": "claude", "query_id": 44, "query_type": "HotpotQA", "num_sources": 1, "unweighted_mean_score": 4.0, "weighted_total_content_score": 83.1578947368421, "semantic_relevance": 5.0, "factual_accuracy": 5.0, "freshness": 5.0, "objectivity_tone": 5.0, "layout_ad_density": 5.0, "accountability": 3.0, "transparency": 2.0, "authority": 2.0, "avg_ge_freq": 0.6667, "relative_se_rank": 2.3255813953488373, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "gensee", "query_id": 67, "query_type": "Pinocchios", "num_sources": 4, "unweighted_mean_score": 4.0625, "weighted_total_content_score": 83.1578947368421, "semantic_relevance": 5.0, "factual_accuracy": 5.0, "freshness": 2.0, "objectivity_tone": 4.0, "layout_ad_density": 3.5, "accountability": 4.0, "transparency": 4.0, "authority": 5.0, "avg_ge_freq": 0.75, "relative_se_rank": 0.6276595744680851, "normalized_reciprocal_se_rank": 0.3417202326293235, "reciprocal_se_rank": 0.0918211238599588, "percentage_ge_sources_not_in_se_sources": 25.0, "percentage_ge_sources_in_se_sources": 75.0 }, { "model_name": "google-search", "query_id": 4, "query_type": "VACOS", "num_sources": 3, "unweighted_mean_score": 4.166666666666667, "weighted_total_content_score": 83.1578947368421, "semantic_relevance": 4.0, "factual_accuracy": 4.666666666666667, "freshness": 5.0, "objectivity_tone": 3.6666666666666665, "layout_ad_density": 2.6666666666666665, "accountability": 4.0, "transparency": 4.333333333333333, "authority": 5.0, "avg_ge_freq": null, "relative_se_rank": null, "normalized_reciprocal_se_rank": null, "reciprocal_se_rank": null, "percentage_ge_sources_not_in_se_sources": null, "percentage_ge_sources_in_se_sources": null }, { "model_name": "gpt-4o", "query_id": 88, "query_type": "QuoraQuestions", "num_sources": 1, "unweighted_mean_score": 4.125, "weighted_total_content_score": 83.1578947368421, "semantic_relevance": 5.0, "factual_accuracy": 4.0, "freshness": 5.0, "objectivity_tone": 4.0, "layout_ad_density": 4.0, "accountability": 4.0, "transparency": 4.0, "authority": 3.0, "avg_ge_freq": 0.3333, "relative_se_rank": 2.3255813953488373, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "gpt-5", "query_id": 3, "query_type": "VACOS", "num_sources": 4, "unweighted_mean_score": 4.21875, "weighted_total_content_score": 83.1578947368421, "semantic_relevance": 1.5, "factual_accuracy": 5.0, "freshness": 4.25, "objectivity_tone": 5.0, "layout_ad_density": 3.5, "accountability": 4.5, "transparency": 5.0, "authority": 5.0, "avg_ge_freq": 0.5833499999999999, "relative_se_rank": 2.272727272727273, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "gpt-5", "query_id": 76, "query_type": "Pinocchios", "num_sources": 2, "unweighted_mean_score": 4.1875, "weighted_total_content_score": 83.1578947368421, "semantic_relevance": 4.5, "factual_accuracy": 4.0, "freshness": 2.5, "objectivity_tone": 3.5, "layout_ad_density": 4.5, "accountability": 5.0, "transparency": 5.0, "authority": 4.5, "avg_ge_freq": 1.0, "relative_se_rank": 2.380952380952381, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "Gemini-3-Pro-Preview", "query_id": 18, "query_type": "VACOS", "num_sources": 5, "unweighted_mean_score": 4.1, "weighted_total_content_score": 82.94736842105263, "semantic_relevance": 4.4, "factual_accuracy": 4.8, "freshness": 4.4, "objectivity_tone": 4.0, "layout_ad_density": 3.4, "accountability": 3.4, "transparency": 4.2, "authority": 4.2, "avg_ge_freq": 0.3333, "relative_se_rank": 1.774468085106383, "normalized_reciprocal_se_rank": 0.03353535353535354, "reciprocal_se_rank": 0.017766990291262132, "percentage_ge_sources_not_in_se_sources": 80.0, "percentage_ge_sources_in_se_sources": 20.0 }, { "model_name": "Perplexity-Sonar-Pro", "query_id": 30, "query_type": "DebateQA", "num_sources": 5, "unweighted_mean_score": 4.175, "weighted_total_content_score": 82.94736842105263, "semantic_relevance": 4.2, "factual_accuracy": 4.0, "freshness": 5.0, "objectivity_tone": 3.8, "layout_ad_density": 4.2, "accountability": 4.2, "transparency": 4.0, "authority": 4.0, "avg_ge_freq": 0.73334, "relative_se_rank": 1.7565217391304349, "normalized_reciprocal_se_rank": 0.11082251082251082, "reciprocal_se_rank": 0.0363384188626907, "percentage_ge_sources_not_in_se_sources": 80.0, "percentage_ge_sources_in_se_sources": 20.0 }, { "model_name": "exa", "query_id": 36, "query_type": "DebateQA", "num_sources": 5, "unweighted_mean_score": 4.125, "weighted_total_content_score": 82.94736842105263, "semantic_relevance": 4.6, "factual_accuracy": 4.4, "freshness": 5.0, "objectivity_tone": 3.8, "layout_ad_density": 2.8, "accountability": 4.0, "transparency": 4.0, "authority": 4.4, "avg_ge_freq": 1.0, "relative_se_rank": 1.078048780487805, "normalized_reciprocal_se_rank": 0.23239057239057243, "reciprocal_se_rank": 0.06555016181229774, "percentage_ge_sources_not_in_se_sources": 40.0, "percentage_ge_sources_in_se_sources": 60.0 }, { "model_name": "grok-4.1-fast-non-reasoning", "query_id": 84, "query_type": "QuoraQuestions", "num_sources": 5, "unweighted_mean_score": 4.125, "weighted_total_content_score": 82.94736842105263, "semantic_relevance": 4.8, "factual_accuracy": 4.2, "freshness": 5.0, "objectivity_tone": 3.8, "layout_ad_density": 2.8, "accountability": 4.0, "transparency": 4.4, "authority": 4.0, "avg_ge_freq": 0.93334, "relative_se_rank": 1.0863636363636364, "normalized_reciprocal_se_rank": 0.18216154216154218, "reciprocal_se_rank": 0.053480564742700666, "percentage_ge_sources_not_in_se_sources": 40.0, "percentage_ge_sources_in_se_sources": 60.0 }, { "model_name": "tavily", "query_id": 3, "query_type": "VACOS", "num_sources": 5, "unweighted_mean_score": 4.175, "weighted_total_content_score": 82.94736842105263, "semantic_relevance": 4.0, "factual_accuracy": 4.6, "freshness": 5.0, "objectivity_tone": 3.4, "layout_ad_density": 3.0, "accountability": 4.2, "transparency": 4.6, "authority": 4.6, "avg_ge_freq": 1.0, "relative_se_rank": 1.0181818181818183, "normalized_reciprocal_se_rank": 0.20792854732248672, "reciprocal_se_rank": 0.05967215093428686, "percentage_ge_sources_not_in_se_sources": 40.0, "percentage_ge_sources_in_se_sources": 60.0 }, { "model_name": "tavily", "query_id": 97, "query_type": "QuoraQuestions", "num_sources": 5, "unweighted_mean_score": 4.125, "weighted_total_content_score": 82.94736842105263, "semantic_relevance": 4.6, "factual_accuracy": 4.4, "freshness": 4.4, "objectivity_tone": 3.8, "layout_ad_density": 3.4, "accountability": 4.6, "transparency": 4.0, "authority": 3.8, "avg_ge_freq": 1.0, "relative_se_rank": 0.12000000000000002, "normalized_reciprocal_se_rank": 0.5887926887926888, "reciprocal_se_rank": 0.15119047619047618, "percentage_ge_sources_not_in_se_sources": 0.0, "percentage_ge_sources_in_se_sources": 100.0 }, { "model_name": "Perplexity-Sonar-Pro", "query_id": 61, "query_type": "Pinocchios", "num_sources": 4, "unweighted_mean_score": 4.15625, "weighted_total_content_score": 82.89473684210526, "semantic_relevance": 4.0, "factual_accuracy": 4.25, "freshness": 3.25, "objectivity_tone": 4.0, "layout_ad_density": 4.0, "accountability": 4.75, "transparency": 4.5, "authority": 4.5, "avg_ge_freq": 0.833325, "relative_se_rank": 0.7111111111111111, "normalized_reciprocal_se_rank": 0.32910927456382005, "reciprocal_se_rank": 0.0887908208296558, "percentage_ge_sources_not_in_se_sources": 25.0, "percentage_ge_sources_in_se_sources": 75.0 }, { "model_name": "google-search", "query_id": 89, "query_type": "QuoraQuestions", "num_sources": 4, "unweighted_mean_score": 4.09375, "weighted_total_content_score": 82.89473684210526, "semantic_relevance": 5.0, "factual_accuracy": 4.25, "freshness": 4.25, "objectivity_tone": 4.0, "layout_ad_density": 3.5, "accountability": 3.5, "transparency": 4.25, "authority": 4.0, "avg_ge_freq": null, "relative_se_rank": null, "normalized_reciprocal_se_rank": null, "reciprocal_se_rank": null, "percentage_ge_sources_not_in_se_sources": null, "percentage_ge_sources_in_se_sources": null }, { "model_name": "google-search", "query_id": 9, "query_type": "VACOS", "num_sources": 4, "unweighted_mean_score": 4.125, "weighted_total_content_score": 82.89473684210525, "semantic_relevance": 4.25, "factual_accuracy": 4.5, "freshness": 5.0, "objectivity_tone": 4.0, "layout_ad_density": 2.75, "accountability": 4.0, "transparency": 4.0, "authority": 4.5, "avg_ge_freq": null, "relative_se_rank": null, "normalized_reciprocal_se_rank": null, "reciprocal_se_rank": null, "percentage_ge_sources_not_in_se_sources": null, "percentage_ge_sources_in_se_sources": null }, { "model_name": "tavily", "query_id": 98, "query_type": "QuoraQuestions", "num_sources": 3, "unweighted_mean_score": 4.208333333333333, "weighted_total_content_score": 82.80701754385966, "semantic_relevance": 2.6666666666666665, "factual_accuracy": 4.666666666666667, "freshness": 5.0, "objectivity_tone": 4.0, "layout_ad_density": 4.0, "accountability": 4.666666666666667, "transparency": 4.0, "authority": 4.666666666666667, "avg_ge_freq": 1.0, "relative_se_rank": 1.7, "normalized_reciprocal_se_rank": 0.1847041847041847, "reciprocal_se_rank": 0.05409153952843273, "percentage_ge_sources_not_in_se_sources": 66.66666666666667, "percentage_ge_sources_in_se_sources": 33.33333333333333 }, { "model_name": "claude", "query_id": 67, "query_type": "Pinocchios", "num_sources": 3, "unweighted_mean_score": 4.041666666666667, "weighted_total_content_score": 82.80701754385963, "semantic_relevance": 5.0, "factual_accuracy": 5.0, "freshness": 1.3333333333333333, "objectivity_tone": 4.0, "layout_ad_density": 3.0, "accountability": 4.333333333333333, "transparency": 4.666666666666667, "authority": 5.0, "avg_ge_freq": 0.6666666666666666, "relative_se_rank": 0.3049645390070922, "normalized_reciprocal_se_rank": 0.26917463984912665, "reciprocal_se_rank": 0.07438905180840664, "percentage_ge_sources_not_in_se_sources": 0.0, "percentage_ge_sources_in_se_sources": 100.0 }, { "model_name": "Gemini-2.5-Flash-Preview", "query_id": 4, "query_type": "VACOS", "num_sources": 5, "unweighted_mean_score": 4.1, "weighted_total_content_score": 82.73684210526315, "semantic_relevance": 4.2, "factual_accuracy": 4.8, "freshness": 5.0, "objectivity_tone": 4.0, "layout_ad_density": 3.0, "accountability": 3.8, "transparency": 4.0, "authority": 4.0, "avg_ge_freq": null, "relative_se_rank": null, "normalized_reciprocal_se_rank": null, "reciprocal_se_rank": null, "percentage_ge_sources_not_in_se_sources": null, "percentage_ge_sources_in_se_sources": null }, { "model_name": "Gemini-2.5-Flash-Preview", "query_id": 96, "query_type": "QuoraQuestions", "num_sources": 5, "unweighted_mean_score": 4.1, "weighted_total_content_score": 82.73684210526315, "semantic_relevance": 4.0, "factual_accuracy": 4.8, "freshness": 3.0, "objectivity_tone": 4.2, "layout_ad_density": 3.8, "accountability": 4.0, "transparency": 4.6, "authority": 4.4, "avg_ge_freq": null, "relative_se_rank": null, "normalized_reciprocal_se_rank": null, "reciprocal_se_rank": null, "percentage_ge_sources_not_in_se_sources": null, "percentage_ge_sources_in_se_sources": null }, { "model_name": "Gemini-3-Pro-Preview", "query_id": 9, "query_type": "VACOS", "num_sources": 5, "unweighted_mean_score": 4.125, "weighted_total_content_score": 82.73684210526315, "semantic_relevance": 3.8, "factual_accuracy": 4.8, "freshness": 4.4, "objectivity_tone": 4.0, "layout_ad_density": 3.4, "accountability": 4.2, "transparency": 4.2, "authority": 4.2, "avg_ge_freq": 0.3333, "relative_se_rank": 2.2222222222222223, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "Perplexity-Sonar-Pro", "query_id": 24, "query_type": "DebateQA", "num_sources": 5, "unweighted_mean_score": 4.575, "weighted_total_content_score": 82.73684210526315, "semantic_relevance": 5.0, "factual_accuracy": 4.75, "freshness": 4.4, "objectivity_tone": 4.5, "layout_ad_density": 4.2, "accountability": 4.6, "transparency": 4.4, "authority": 4.6, "avg_ge_freq": 0.66666, "relative_se_rank": 1.4382978723404256, "normalized_reciprocal_se_rank": 0.14446216551479707, "reciprocal_se_rank": 0.04442173394651677, "percentage_ge_sources_not_in_se_sources": 60.0, "percentage_ge_sources_in_se_sources": 40.0 }, { "model_name": "Perplexity-Sonar-Pro", "query_id": 96, "query_type": "QuoraQuestions", "num_sources": 5, "unweighted_mean_score": 4.075, "weighted_total_content_score": 82.73684210526315, "semantic_relevance": 4.2, "factual_accuracy": 4.8, "freshness": 3.6, "objectivity_tone": 4.4, "layout_ad_density": 4.2, "accountability": 3.0, "transparency": 4.2, "authority": 4.2, "avg_ge_freq": 0.8, "relative_se_rank": 0.9727272727272729, "normalized_reciprocal_se_rank": 0.32949494949494956, "reciprocal_se_rank": 0.08888349514563107, "percentage_ge_sources_not_in_se_sources": 40.0, "percentage_ge_sources_in_se_sources": 60.0 }, { "model_name": "Perplexity-Sonar-Pro", "query_id": 99, "query_type": "QuoraQuestions", "num_sources": 5, "unweighted_mean_score": 4.175, "weighted_total_content_score": 82.73684210526315, "semantic_relevance": 4.4, "factual_accuracy": 3.8, "freshness": 5.0, "objectivity_tone": 3.6, "layout_ad_density": 3.6, "accountability": 4.4, "transparency": 4.2, "authority": 4.4, "avg_ge_freq": 0.66666, "relative_se_rank": 2.078048780487805, "normalized_reciprocal_se_rank": 0.020619993033786137, "reciprocal_se_rank": 0.014663542015400067, "percentage_ge_sources_not_in_se_sources": 80.0, "percentage_ge_sources_in_se_sources": 20.0 }, { "model_name": "deepseek-reasoning-gensee", "query_id": 2, "query_type": "VACOS", "num_sources": 5, "unweighted_mean_score": 4.125, "weighted_total_content_score": 82.73684210526315, "semantic_relevance": 4.2, "factual_accuracy": 4.4, "freshness": 4.6, "objectivity_tone": 4.0, "layout_ad_density": 3.0, "accountability": 4.6, "transparency": 4.2, "authority": 4.0, "avg_ge_freq": 1.0, "relative_se_rank": 2.2222222222222223, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "gpt-4o", "query_id": 86, "query_type": "QuoraQuestions", "num_sources": 5, "unweighted_mean_score": 4.1, "weighted_total_content_score": 82.73684210526315, "semantic_relevance": 5.0, "factual_accuracy": 4.0, "freshness": 5.0, "objectivity_tone": 4.0, "layout_ad_density": 3.0, "accountability": 4.0, "transparency": 4.4, "authority": 3.4, "avg_ge_freq": 0.6, "relative_se_rank": 1.4375000000000002, "normalized_reciprocal_se_rank": 0.05365497076023392, "reciprocal_se_rank": 0.022601558507920283, "percentage_ge_sources_not_in_se_sources": 60.0, "percentage_ge_sources_in_se_sources": 40.0 }, { "model_name": "tavily", "query_id": 33, "query_type": "DebateQA", "num_sources": 5, "unweighted_mean_score": 4.1, "weighted_total_content_score": 82.73684210526315, "semantic_relevance": 4.0, "factual_accuracy": 4.6, "freshness": 4.0, "objectivity_tone": 4.4, "layout_ad_density": 3.8, "accountability": 4.2, "transparency": 3.6, "authority": 4.2, "avg_ge_freq": 1.0, "relative_se_rank": 1.4325581395348839, "normalized_reciprocal_se_rank": 0.24278338945005612, "reciprocal_se_rank": 0.06804746494066882, "percentage_ge_sources_not_in_se_sources": 60.0, "percentage_ge_sources_in_se_sources": 40.0 }, { "model_name": "tavily", "query_id": 73, "query_type": "Pinocchios", "num_sources": 4, "unweighted_mean_score": 4.15625, "weighted_total_content_score": 82.63157894736842, "semantic_relevance": 4.0, "factual_accuracy": 4.0, "freshness": 3.75, "objectivity_tone": 4.0, "layout_ad_density": 3.75, "accountability": 4.5, "transparency": 4.75, "authority": 4.5, "avg_ge_freq": 1.0, "relative_se_rank": 1.0572916666666667, "normalized_reciprocal_se_rank": 0.44797979797979803, "reciprocal_se_rank": 0.11735436893203884, "percentage_ge_sources_not_in_se_sources": 50.0, "percentage_ge_sources_in_se_sources": 50.0 }, { "model_name": "Gemini-3-Pro-Preview", "query_id": 75, "query_type": "Pinocchios", "num_sources": 2, "unweighted_mean_score": 4.1875, "weighted_total_content_score": 82.63157894736841, "semantic_relevance": 3.5, "factual_accuracy": 4.0, "freshness": 4.5, "objectivity_tone": 4.0, "layout_ad_density": 4.0, "accountability": 5.0, "transparency": 4.0, "authority": 4.5, "avg_ge_freq": 0.66665, "relative_se_rank": 0.5957446808510638, "normalized_reciprocal_se_rank": 0.14258901067411706, "reciprocal_se_rank": 0.04397163120567376, "percentage_ge_sources_not_in_se_sources": 0.0, "percentage_ge_sources_in_se_sources": 100.0 }, { "model_name": "claude", "query_id": 1, "query_type": "VACOS", "num_sources": 4, "unweighted_mean_score": 4.125, "weighted_total_content_score": 82.63157894736841, "semantic_relevance": 4.0, "factual_accuracy": 4.5, "freshness": 5.0, "objectivity_tone": 4.0, "layout_ad_density": 2.75, "accountability": 4.75, "transparency": 3.75, "authority": 4.25, "avg_ge_freq": 0.583325, "relative_se_rank": 2.2222222222222223, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "exa", "query_id": 74, "query_type": "Pinocchios", "num_sources": 4, "unweighted_mean_score": 4.125, "weighted_total_content_score": 82.63157894736841, "semantic_relevance": 3.5, "factual_accuracy": 4.5, "freshness": 3.0, "objectivity_tone": 4.5, "layout_ad_density": 3.5, "accountability": 4.5, "transparency": 4.75, "authority": 4.75, "avg_ge_freq": 1.0, "relative_se_rank": 1.7127659574468086, "normalized_reciprocal_se_rank": 0.03151515151515152, "reciprocal_se_rank": 0.01728155339805825, "percentage_ge_sources_not_in_se_sources": 75.0, "percentage_ge_sources_in_se_sources": 25.0 }, { "model_name": "grok-4.1-fast-non-reasoning", "query_id": 1, "query_type": "VACOS", "num_sources": 4, "unweighted_mean_score": 4.125, "weighted_total_content_score": 82.63157894736841, "semantic_relevance": 4.25, "factual_accuracy": 4.5, "freshness": 5.0, "objectivity_tone": 3.75, "layout_ad_density": 3.0, "accountability": 4.25, "transparency": 4.0, "authority": 4.25, "avg_ge_freq": 0.6667, "relative_se_rank": 1.6777777777777778, "normalized_reciprocal_se_rank": 0.197979797979798, "reciprocal_se_rank": 0.05728155339805825, "percentage_ge_sources_not_in_se_sources": 75.0, "percentage_ge_sources_in_se_sources": 25.0 }, { "model_name": "Gemini-3-Flash-Preview", "query_id": 69, "query_type": "Pinocchios", "num_sources": 5, "unweighted_mean_score": 4.15, "weighted_total_content_score": 82.52631578947368, "semantic_relevance": 3.6, "factual_accuracy": 4.4, "freshness": 4.4, "objectivity_tone": 4.0, "layout_ad_density": 3.6, "accountability": 4.2, "transparency": 4.4, "authority": 4.6, "avg_ge_freq": 0.53332, "relative_se_rank": 5.346666666666667, "normalized_reciprocal_se_rank": 0.2, "reciprocal_se_rank": 0.05776699029126213, "percentage_ge_sources_not_in_se_sources": 80.0, "percentage_ge_sources_in_se_sources": 20.0 }, { "model_name": "gensee", "query_id": 40, "query_type": "DebateQA", "num_sources": 5, "unweighted_mean_score": 4.125, "weighted_total_content_score": 82.52631578947368, "semantic_relevance": 5.0, "factual_accuracy": 4.2, "freshness": 4.2, "objectivity_tone": 3.2, "layout_ad_density": 3.0, "accountability": 5.0, "transparency": 4.2, "authority": 4.2, "avg_ge_freq": 0.73334, "relative_se_rank": 2.5, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "google-search", "query_id": 27, "query_type": "DebateQA", "num_sources": 5, "unweighted_mean_score": 4.125, "weighted_total_content_score": 82.52631578947368, "semantic_relevance": 4.2, "factual_accuracy": 4.6, "freshness": 3.6, "objectivity_tone": 3.6, "layout_ad_density": 4.8, "accountability": 3.8, "transparency": 4.2, "authority": 4.2, "avg_ge_freq": null, "relative_se_rank": null, "normalized_reciprocal_se_rank": null, "reciprocal_se_rank": null, "percentage_ge_sources_not_in_se_sources": null, "percentage_ge_sources_in_se_sources": null }, { "model_name": "grok-4.1-fast-non-reasoning", "query_id": 67, "query_type": "Pinocchios", "num_sources": 5, "unweighted_mean_score": 4.025, "weighted_total_content_score": 82.52631578947367, "semantic_relevance": 5.0, "factual_accuracy": 5.0, "freshness": 1.8, "objectivity_tone": 4.0, "layout_ad_density": 3.8, "accountability": 3.4, "transparency": 4.2, "authority": 5.0, "avg_ge_freq": 1.0, "relative_se_rank": 0.2127659574468085, "normalized_reciprocal_se_rank": 0.44590433497569365, "reciprocal_se_rank": 0.11685565330726622, "percentage_ge_sources_not_in_se_sources": 0.0, "percentage_ge_sources_in_se_sources": 100.0 }, { "model_name": "gpt-4o", "query_id": 28, "query_type": "DebateQA", "num_sources": 3, "unweighted_mean_score": 4.125, "weighted_total_content_score": 82.45614035087719, "semantic_relevance": 5.0, "factual_accuracy": 4.0, "freshness": 4.0, "objectivity_tone": 3.3333333333333335, "layout_ad_density": 4.0, "accountability": 4.333333333333333, "transparency": 4.333333333333333, "authority": 4.0, "avg_ge_freq": 0.5555666666666667, "relative_se_rank": 0.7851851851851852, "normalized_reciprocal_se_rank": 0.44867724867724873, "reciprocal_se_rank": 0.11752196024040684, "percentage_ge_sources_not_in_se_sources": 33.333333333333336, "percentage_ge_sources_in_se_sources": 66.66666666666666 }, { "model_name": "Perplexity-Sonar-Pro", "query_id": 73, "query_type": "Pinocchios", "num_sources": 5, "unweighted_mean_score": 4.075, "weighted_total_content_score": 82.3157894736842, "semantic_relevance": 4.8, "factual_accuracy": 4.6, "freshness": 3.0, "objectivity_tone": 3.6, "layout_ad_density": 4.0, "accountability": 4.4, "transparency": 4.2, "authority": 4.0, "avg_ge_freq": 1.0, "relative_se_rank": 1.2625000000000002, "normalized_reciprocal_se_rank": 0.3583838383838384, "reciprocal_se_rank": 0.0958252427184466, "percentage_ge_sources_not_in_se_sources": 60.0, "percentage_ge_sources_in_se_sources": 40.0 }, { "model_name": "Perplexity-Sonar-Pro", "query_id": 78, "query_type": "Pinocchios", "num_sources": 5, "unweighted_mean_score": 4.15, "weighted_total_content_score": 82.3157894736842, "semantic_relevance": 3.8, "factual_accuracy": 4.0, "freshness": 3.4, "objectivity_tone": 4.0, "layout_ad_density": 3.6, "accountability": 4.8, "transparency": 4.8, "authority": 4.8, "avg_ge_freq": 0.73332, "relative_se_rank": 1.297872340425532, "normalized_reciprocal_se_rank": 0.28902356902356907, "reciprocal_se_rank": 0.07915857605177994, "percentage_ge_sources_not_in_se_sources": 60.0, "percentage_ge_sources_in_se_sources": 40.0 }, { "model_name": "deepseek-chat-tavily", "query_id": 82, "query_type": "QuoraQuestions", "num_sources": 5, "unweighted_mean_score": 4.125, "weighted_total_content_score": 82.3157894736842, "semantic_relevance": 4.0, "factual_accuracy": 4.2, "freshness": 4.8, "objectivity_tone": 4.0, "layout_ad_density": 3.2, "accountability": 4.4, "transparency": 4.2, "authority": 4.2, "avg_ge_freq": 1.0, "relative_se_rank": 1.4500000000000004, "normalized_reciprocal_se_rank": 0.1663654084706716, "reciprocal_se_rank": 0.04968489184125362, "percentage_ge_sources_not_in_se_sources": 60.0, "percentage_ge_sources_in_se_sources": 40.0 }, { "model_name": "google-search", "query_id": 78, "query_type": "Pinocchios", "num_sources": 5, "unweighted_mean_score": 4.2, "weighted_total_content_score": 82.3157894736842, "semantic_relevance": 4.2, "factual_accuracy": 4.6, "freshness": 3.4, "objectivity_tone": 4.2, "layout_ad_density": 3.25, "accountability": 4.2, "transparency": 4.6, "authority": 4.8, "avg_ge_freq": null, "relative_se_rank": null, "normalized_reciprocal_se_rank": null, "reciprocal_se_rank": null, "percentage_ge_sources_not_in_se_sources": null, "percentage_ge_sources_in_se_sources": null }, { "model_name": "gpt-4o", "query_id": 22, "query_type": "DebateQA", "num_sources": 5, "unweighted_mean_score": 4.1, "weighted_total_content_score": 82.3157894736842, "semantic_relevance": 4.0, "factual_accuracy": 4.4, "freshness": 4.6, "objectivity_tone": 4.2, "layout_ad_density": 3.8, "accountability": 3.4, "transparency": 4.2, "authority": 4.2, "avg_ge_freq": 0.46665999999999996, "relative_se_rank": 2.380952380952381, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "gpt-5", "query_id": 34, "query_type": "DebateQA", "num_sources": 5, "unweighted_mean_score": 4.25, "weighted_total_content_score": 82.3157894736842, "semantic_relevance": 1.2, "factual_accuracy": 4.6, "freshness": 3.8, "objectivity_tone": 4.4, "layout_ad_density": 5.0, "accountability": 5.0, "transparency": 5.0, "authority": 5.0, "avg_ge_freq": 0.80002, "relative_se_rank": 2.0, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "grok-4.1-fast-non-reasoning", "query_id": 68, "query_type": "Pinocchios", "num_sources": 5, "unweighted_mean_score": 4.2, "weighted_total_content_score": 82.3157894736842, "semantic_relevance": 3.4, "factual_accuracy": 4.6, "freshness": 4.4, "objectivity_tone": 3.0, "layout_ad_density": 4.6, "accountability": 4.2, "transparency": 4.6, "authority": 4.8, "avg_ge_freq": 0.8, "relative_se_rank": 1.872727272727273, "normalized_reciprocal_se_rank": 0.04740740740740741, "reciprocal_se_rank": 0.021100323624595466, "percentage_ge_sources_not_in_se_sources": 80.0, "percentage_ge_sources_in_se_sources": 20.0 }, { "model_name": "Gemini-2.5-Flash-Preview", "query_id": 15, "query_type": "VACOS", "num_sources": 5, "unweighted_mean_score": 4.1, "weighted_total_content_score": 82.10526315789474, "semantic_relevance": 4.2, "factual_accuracy": 4.6, "freshness": 5.0, "objectivity_tone": 3.6, "layout_ad_density": 2.8, "accountability": 4.2, "transparency": 4.2, "authority": 4.2, "avg_ge_freq": null, "relative_se_rank": null, "normalized_reciprocal_se_rank": null, "reciprocal_se_rank": null, "percentage_ge_sources_not_in_se_sources": null, "percentage_ge_sources_in_se_sources": null }, { "model_name": "exa", "query_id": 5, "query_type": "VACOS", "num_sources": 3, "unweighted_mean_score": 4.083333333333333, "weighted_total_content_score": 82.10526315789474, "semantic_relevance": 3.6666666666666665, "factual_accuracy": 5.0, "freshness": 5.0, "objectivity_tone": 4.0, "layout_ad_density": 3.0, "accountability": 3.3333333333333335, "transparency": 4.333333333333333, "authority": 4.333333333333333, "avg_ge_freq": 1.0, "relative_se_rank": 1.5968992248062015, "normalized_reciprocal_se_rank": 0.14066591844369622, "reciprocal_se_rank": 0.04350952894642215, "percentage_ge_sources_not_in_se_sources": 66.66666666666667, "percentage_ge_sources_in_se_sources": 33.33333333333333 }, { "model_name": "google-search", "query_id": 74, "query_type": "Pinocchios", "num_sources": 5, "unweighted_mean_score": 4.075, "weighted_total_content_score": 82.10526315789474, "semantic_relevance": 4.2, "factual_accuracy": 4.8, "freshness": 2.6, "objectivity_tone": 3.8, "layout_ad_density": 3.8, "accountability": 4.0, "transparency": 4.4, "authority": 5.0, "avg_ge_freq": null, "relative_se_rank": null, "normalized_reciprocal_se_rank": null, "reciprocal_se_rank": null, "percentage_ge_sources_not_in_se_sources": null, "percentage_ge_sources_in_se_sources": null }, { "model_name": "gpt-4o", "query_id": 82, "query_type": "QuoraQuestions", "num_sources": 5, "unweighted_mean_score": 4.05, "weighted_total_content_score": 82.10526315789474, "semantic_relevance": 5.0, "factual_accuracy": 4.6, "freshness": 4.8, "objectivity_tone": 3.6, "layout_ad_density": 2.6, "accountability": 4.2, "transparency": 3.6, "authority": 4.0, "avg_ge_freq": 0.46665999999999996, "relative_se_rank": 2.272727272727273, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "Gemini-2.5-Flash-Preview", "query_id": 52, "query_type": "HotpotQA", "num_sources": 5, "unweighted_mean_score": 4.175, "weighted_total_content_score": 82.10526315789473, "semantic_relevance": 2.2, "factual_accuracy": 4.6, "freshness": 5.0, "objectivity_tone": 4.4, "layout_ad_density": 4.0, "accountability": 4.6, "transparency": 4.4, "authority": 4.2, "avg_ge_freq": null, "relative_se_rank": null, "normalized_reciprocal_se_rank": null, "reciprocal_se_rank": null, "percentage_ge_sources_not_in_se_sources": null, "percentage_ge_sources_in_se_sources": null }, { "model_name": "claude", "query_id": 23, "query_type": "DebateQA", "num_sources": 2, "unweighted_mean_score": 4.125, "weighted_total_content_score": 82.10526315789473, "semantic_relevance": 5.0, "factual_accuracy": 4.0, "freshness": 5.0, "objectivity_tone": 3.0, "layout_ad_density": 3.5, "accountability": 4.5, "transparency": 4.0, "authority": 4.0, "avg_ge_freq": 1.0, "relative_se_rank": 2.5, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "claude", "query_id": 32, "query_type": "DebateQA", "num_sources": 4, "unweighted_mean_score": 4.125, "weighted_total_content_score": 82.10526315789473, "semantic_relevance": 4.0, "factual_accuracy": 4.25, "freshness": 5.0, "objectivity_tone": 3.75, "layout_ad_density": 3.25, "accountability": 4.5, "transparency": 4.25, "authority": 4.0, "avg_ge_freq": 0.83335, "relative_se_rank": 1.7000000000000002, "normalized_reciprocal_se_rank": 0.10549943883277216, "reciprocal_se_rank": 0.035059331175836025, "percentage_ge_sources_not_in_se_sources": 75.0, "percentage_ge_sources_in_se_sources": 25.0 }, { "model_name": "deepseek-chat-gensee", "query_id": 1, "query_type": "VACOS", "num_sources": 5, "unweighted_mean_score": 4.125, "weighted_total_content_score": 82.10526315789473, "semantic_relevance": 3.8, "factual_accuracy": 4.2, "freshness": 5.0, "objectivity_tone": 4.0, "layout_ad_density": 3.0, "accountability": 4.4, "transparency": 4.2, "authority": 4.4, "avg_ge_freq": 1.0, "relative_se_rank": 2.2222222222222223, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "deepseek-chat-tavily", "query_id": 21, "query_type": "DebateQA", "num_sources": 5, "unweighted_mean_score": 4.025, "weighted_total_content_score": 82.10526315789473, "semantic_relevance": 5.0, "factual_accuracy": 4.6, "freshness": 3.4, "objectivity_tone": 4.0, "layout_ad_density": 2.6, "accountability": 4.2, "transparency": 4.2, "authority": 4.2, "avg_ge_freq": 1.0, "relative_se_rank": 0.19166666666666668, "normalized_reciprocal_se_rank": 0.5488279621612955, "reciprocal_se_rank": 0.14158730158730157, "percentage_ge_sources_not_in_se_sources": 0.0, "percentage_ge_sources_in_se_sources": 100.0 }, { "model_name": "deepseek-chat-tavily", "query_id": 61, "query_type": "Pinocchios", "num_sources": 3, "unweighted_mean_score": 4.083333333333333, "weighted_total_content_score": 82.10526315789473, "semantic_relevance": 4.0, "factual_accuracy": 4.333333333333333, "freshness": 3.0, "objectivity_tone": 4.333333333333333, "layout_ad_density": 4.333333333333333, "accountability": 3.6666666666666665, "transparency": 4.0, "authority": 5.0, "avg_ge_freq": 1.0, "relative_se_rank": 0.8148148148148149, "normalized_reciprocal_se_rank": 0.3429854096520763, "reciprocal_se_rank": 0.09212513484358144, "percentage_ge_sources_not_in_se_sources": 33.333333333333336, "percentage_ge_sources_in_se_sources": 66.66666666666666 }, { "model_name": "google-search", "query_id": 84, "query_type": "QuoraQuestions", "num_sources": 5, "unweighted_mean_score": 4.1, "weighted_total_content_score": 82.10526315789473, "semantic_relevance": 4.8, "factual_accuracy": 4.0, "freshness": 5.0, "objectivity_tone": 3.6, "layout_ad_density": 2.6, "accountability": 4.4, "transparency": 4.4, "authority": 4.0, "avg_ge_freq": null, "relative_se_rank": null, "normalized_reciprocal_se_rank": null, "reciprocal_se_rank": null, "percentage_ge_sources_not_in_se_sources": null, "percentage_ge_sources_in_se_sources": null }, { "model_name": "gpt-4o", "query_id": 6, "query_type": "VACOS", "num_sources": 5, "unweighted_mean_score": 4.05, "weighted_total_content_score": 82.10526315789473, "semantic_relevance": 4.8, "factual_accuracy": 4.6, "freshness": 5.0, "objectivity_tone": 3.8, "layout_ad_density": 2.4, "accountability": 4.2, "transparency": 3.6, "authority": 4.0, "avg_ge_freq": 0.5333399999999999, "relative_se_rank": 2.9411764705882355, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "gpt-4o", "query_id": 11, "query_type": "VACOS", "num_sources": 1, "unweighted_mean_score": 4.125, "weighted_total_content_score": 82.10526315789473, "semantic_relevance": 5.0, "factual_accuracy": 4.0, "freshness": 5.0, "objectivity_tone": 3.0, "layout_ad_density": 2.0, "accountability": 5.0, "transparency": 4.0, "authority": 5.0, "avg_ge_freq": 1.0, "relative_se_rank": 0.04878048780487805, "normalized_reciprocal_se_rank": 0.791919191919192, "reciprocal_se_rank": 0.2, "percentage_ge_sources_not_in_se_sources": 0.0, "percentage_ge_sources_in_se_sources": 100.0 }, { "model_name": "gpt-4o", "query_id": 19, "query_type": "VACOS", "num_sources": 5, "unweighted_mean_score": 4.05, "weighted_total_content_score": 82.10526315789473, "semantic_relevance": 5.0, "factual_accuracy": 4.4, "freshness": 5.0, "objectivity_tone": 3.8, "layout_ad_density": 2.4, "accountability": 4.0, "transparency": 4.0, "authority": 3.8, "avg_ge_freq": 0.60002, "relative_se_rank": 1.9476190476190478, "normalized_reciprocal_se_rank": 0.06127946127946128, "reciprocal_se_rank": 0.024433656957928797, "percentage_ge_sources_not_in_se_sources": 80.0, "percentage_ge_sources_in_se_sources": 20.0 }, { "model_name": "gpt-4o", "query_id": 40, "query_type": "DebateQA", "num_sources": 5, "unweighted_mean_score": 4.175, "weighted_total_content_score": 82.10526315789473, "semantic_relevance": 3.4, "factual_accuracy": 3.2, "freshness": 4.6, "objectivity_tone": 4.6, "layout_ad_density": 3.8, "accountability": 5.0, "transparency": 4.4, "authority": 4.4, "avg_ge_freq": 0.46665999999999996, "relative_se_rank": 2.5, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "gpt-4o", "query_id": 51, "query_type": "HotpotQA", "num_sources": 1, "unweighted_mean_score": 4.125, "weighted_total_content_score": 82.10526315789473, "semantic_relevance": 4.0, "factual_accuracy": 4.0, "freshness": 5.0, "objectivity_tone": 4.0, "layout_ad_density": 4.0, "accountability": 4.0, "transparency": 4.0, "authority": 4.0, "avg_ge_freq": 0.3333, "relative_se_rank": 2.127659574468085, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "grok-4.1-fast-non-reasoning", "query_id": 18, "query_type": "VACOS", "num_sources": 4, "unweighted_mean_score": 4.09375, "weighted_total_content_score": 82.10526315789473, "semantic_relevance": 4.25, "factual_accuracy": 4.75, "freshness": 5.0, "objectivity_tone": 3.5, "layout_ad_density": 3.5, "accountability": 3.75, "transparency": 4.0, "authority": 4.0, "avg_ge_freq": 1.0, "relative_se_rank": 1.6861702127659575, "normalized_reciprocal_se_rank": 0.041919191919191925, "reciprocal_se_rank": 0.019781553398058253, "percentage_ge_sources_not_in_se_sources": 75.0, "percentage_ge_sources_in_se_sources": 25.0 }, { "model_name": "grok-4.1-fast-non-reasoning", "query_id": 48, "query_type": "HotpotQA", "num_sources": 5, "unweighted_mean_score": 4.15, "weighted_total_content_score": 82.10526315789473, "semantic_relevance": 3.2, "factual_accuracy": 4.4, "freshness": 4.6, "objectivity_tone": 4.0, "layout_ad_density": 3.6, "accountability": 4.0, "transparency": 4.4, "authority": 5.0, "avg_ge_freq": 0.59998, "relative_se_rank": 2.272727272727273, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "Gemini-2.5-Flash-Preview", "query_id": 40, "query_type": "DebateQA", "num_sources": 5, "unweighted_mean_score": 4.1, "weighted_total_content_score": 81.89473684210527, "semantic_relevance": 4.0, "factual_accuracy": 4.0, "freshness": 3.6, "objectivity_tone": 4.2, "layout_ad_density": 4.6, "accountability": 4.2, "transparency": 4.0, "authority": 4.2, "avg_ge_freq": null, "relative_se_rank": null, "normalized_reciprocal_se_rank": null, "reciprocal_se_rank": null, "percentage_ge_sources_not_in_se_sources": null, "percentage_ge_sources_in_se_sources": null }, { "model_name": "Gemini-2.5-Flash-Preview", "query_id": 16, "query_type": "VACOS", "num_sources": 5, "unweighted_mean_score": 4.175, "weighted_total_content_score": 81.89473684210526, "semantic_relevance": 3.0, "factual_accuracy": 4.2, "freshness": 5.0, "objectivity_tone": 3.8, "layout_ad_density": 3.6, "accountability": 4.8, "transparency": 4.6, "authority": 4.4, "avg_ge_freq": null, "relative_se_rank": null, "normalized_reciprocal_se_rank": null, "reciprocal_se_rank": null, "percentage_ge_sources_not_in_se_sources": null, "percentage_ge_sources_in_se_sources": null }, { "model_name": "Gemini-3-Pro-Preview", "query_id": 14, "query_type": "VACOS", "num_sources": 5, "unweighted_mean_score": 4.1, "weighted_total_content_score": 81.89473684210526, "semantic_relevance": 3.6, "factual_accuracy": 4.6, "freshness": 4.8, "objectivity_tone": 4.0, "layout_ad_density": 3.2, "accountability": 4.2, "transparency": 4.2, "authority": 4.2, "avg_ge_freq": 0.46665999999999996, "relative_se_rank": 2.2222222222222223, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "claude", "query_id": 3, "query_type": "VACOS", "num_sources": 5, "unweighted_mean_score": 4.1, "weighted_total_content_score": 81.89473684210526, "semantic_relevance": 4.0, "factual_accuracy": 4.4, "freshness": 5.0, "objectivity_tone": 3.8, "layout_ad_density": 3.0, "accountability": 4.6, "transparency": 4.0, "authority": 4.0, "avg_ge_freq": 0.60002, "relative_se_rank": 2.272727272727273, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "deepseek-reasoning-gensee", "query_id": 1, "query_type": "VACOS", "num_sources": 5, "unweighted_mean_score": 4.075, "weighted_total_content_score": 81.89473684210526, "semantic_relevance": 4.0, "factual_accuracy": 4.6, "freshness": 4.2, "objectivity_tone": 4.0, "layout_ad_density": 3.4, "accountability": 4.2, "transparency": 4.0, "authority": 4.2, "avg_ge_freq": 1.0, "relative_se_rank": 1.7866666666666666, "normalized_reciprocal_se_rank": 0.1583838383838384, "reciprocal_se_rank": 0.047766990291262135, "percentage_ge_sources_not_in_se_sources": 80.0, "percentage_ge_sources_in_se_sources": 20.0 }, { "model_name": "exa", "query_id": 64, "query_type": "Pinocchios", "num_sources": 5, "unweighted_mean_score": 4.025, "weighted_total_content_score": 81.89473684210526, "semantic_relevance": 4.4, "factual_accuracy": 4.6, "freshness": 1.6, "objectivity_tone": 4.4, "layout_ad_density": 3.4, "accountability": 4.6, "transparency": 4.6, "authority": 4.6, "avg_ge_freq": 1.0, "relative_se_rank": 1.9095238095238094, "normalized_reciprocal_se_rank": 0.2, "reciprocal_se_rank": 0.05776699029126213, "percentage_ge_sources_not_in_se_sources": 80.0, "percentage_ge_sources_in_se_sources": 20.0 }, { "model_name": "gensee", "query_id": 33, "query_type": "DebateQA", "num_sources": 5, "unweighted_mean_score": 4.1, "weighted_total_content_score": 81.89473684210526, "semantic_relevance": 5.0, "factual_accuracy": 4.0, "freshness": 5.0, "objectivity_tone": 3.2, "layout_ad_density": 3.2, "accountability": 4.6, "transparency": 3.6, "authority": 4.2, "avg_ge_freq": 0.6667, "relative_se_rank": 1.4465116279069767, "normalized_reciprocal_se_rank": 0.21966329966329967, "reciprocal_se_rank": 0.062491909385113266, "percentage_ge_sources_not_in_se_sources": 60.0, "percentage_ge_sources_in_se_sources": 40.0 }, { "model_name": "Gemini-3-Flash-Preview", "query_id": 6, "query_type": "VACOS", "num_sources": 5, "unweighted_mean_score": 4.125, "weighted_total_content_score": 81.89473684210525, "semantic_relevance": 3.8, "factual_accuracy": 4.6, "freshness": 5.0, "objectivity_tone": 3.4, "layout_ad_density": 2.8, "accountability": 4.6, "transparency": 4.4, "authority": 4.4, "avg_ge_freq": 0.3333, "relative_se_rank": 2.9411764705882355, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "Gemini-3-Pro-Preview", "query_id": 1, "query_type": "VACOS", "num_sources": 5, "unweighted_mean_score": 4.075, "weighted_total_content_score": 81.89473684210525, "semantic_relevance": 3.8, "factual_accuracy": 4.8, "freshness": 4.8, "objectivity_tone": 4.0, "layout_ad_density": 3.0, "accountability": 4.0, "transparency": 4.0, "authority": 4.2, "avg_ge_freq": 0.39997999999999995, "relative_se_rank": 2.2222222222222223, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "claude", "query_id": 2, "query_type": "VACOS", "num_sources": 5, "unweighted_mean_score": 4.1, "weighted_total_content_score": 81.89473684210525, "semantic_relevance": 4.0, "factual_accuracy": 4.6, "freshness": 5.0, "objectivity_tone": 3.6, "layout_ad_density": 3.2, "accountability": 4.6, "transparency": 4.0, "authority": 3.8, "avg_ge_freq": 0.86668, "relative_se_rank": 2.2222222222222223, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "exa", "query_id": 38, "query_type": "DebateQA", "num_sources": 5, "unweighted_mean_score": 4.1, "weighted_total_content_score": 81.89473684210525, "semantic_relevance": 4.6, "factual_accuracy": 3.8, "freshness": 5.0, "objectivity_tone": 3.8, "layout_ad_density": 3.2, "accountability": 4.2, "transparency": 4.2, "authority": 4.0, "avg_ge_freq": 1.0, "relative_se_rank": 0.5434782608695652, "normalized_reciprocal_se_rank": 0.3732691999358666, "reciprocal_se_rank": 0.099402065033133, "percentage_ge_sources_not_in_se_sources": 20.0, "percentage_ge_sources_in_se_sources": 80.0 }, { "model_name": "gpt-5", "query_id": 39, "query_type": "DebateQA", "num_sources": 5, "unweighted_mean_score": 4.075, "weighted_total_content_score": 81.89473684210525, "semantic_relevance": 4.2, "factual_accuracy": 4.2, "freshness": 3.4, "objectivity_tone": 4.2, "layout_ad_density": 3.6, "accountability": 4.4, "transparency": 4.2, "authority": 4.4, "avg_ge_freq": 0.53332, "relative_se_rank": 2.0833333333333335, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "tavily", "query_id": 37, "query_type": "DebateQA", "num_sources": 5, "unweighted_mean_score": 4.125, "weighted_total_content_score": 81.89473684210525, "semantic_relevance": 4.2, "factual_accuracy": 4.2, "freshness": 4.4, "objectivity_tone": 3.4, "layout_ad_density": 3.8, "accountability": 4.0, "transparency": 4.6, "authority": 4.4, "avg_ge_freq": 1.0, "relative_se_rank": 0.11020408163265305, "normalized_reciprocal_se_rank": 0.5693057559724226, "reciprocal_se_rank": 0.1465079365079365, "percentage_ge_sources_not_in_se_sources": 0.0, "percentage_ge_sources_in_se_sources": 100.0 }, { "model_name": "grok-4.1-fast-non-reasoning", "query_id": 71, "query_type": "Pinocchios", "num_sources": 4, "unweighted_mean_score": 4.09375, "weighted_total_content_score": 81.84210526315789, "semantic_relevance": 3.75, "factual_accuracy": 4.5, "freshness": 3.0, "objectivity_tone": 4.0, "layout_ad_density": 4.25, "accountability": 4.0, "transparency": 4.5, "authority": 4.75, "avg_ge_freq": 0.666675, "relative_se_rank": 2.0337837837837838, "normalized_reciprocal_se_rank": 0.25, "reciprocal_se_rank": 0.06978155339805825, "percentage_ge_sources_not_in_se_sources": 75.0, "percentage_ge_sources_in_se_sources": 25.0 }, { "model_name": "Perplexity-Sonar-Pro", "query_id": 9, "query_type": "VACOS", "num_sources": 3, "unweighted_mean_score": 4.041666666666667, "weighted_total_content_score": 81.75438596491227, "semantic_relevance": 4.333333333333333, "factual_accuracy": 4.666666666666667, "freshness": 5.0, "objectivity_tone": 4.0, "layout_ad_density": 2.3333333333333335, "accountability": 3.6666666666666665, "transparency": 4.333333333333333, "authority": 4.0, "avg_ge_freq": 0.8889, "relative_se_rank": 0.9481481481481482, "normalized_reciprocal_se_rank": 0.19981869981869982, "reciprocal_se_rank": 0.05772342544187204, "percentage_ge_sources_not_in_se_sources": 33.333333333333336, "percentage_ge_sources_in_se_sources": 66.66666666666666 }, { "model_name": "claude", "query_id": 10, "query_type": "VACOS", "num_sources": 3, "unweighted_mean_score": 4.083333333333333, "weighted_total_content_score": 81.75438596491227, "semantic_relevance": 4.333333333333333, "factual_accuracy": 4.666666666666667, "freshness": 5.0, "objectivity_tone": 3.3333333333333335, "layout_ad_density": 3.0, "accountability": 4.0, "transparency": 4.333333333333333, "authority": 4.0, "avg_ge_freq": 1.0, "relative_se_rank": 1.7101449275362317, "normalized_reciprocal_se_rank": 0.02210135543468877, "reciprocal_se_rank": 0.015019500456393659, "percentage_ge_sources_not_in_se_sources": 66.66666666666667, "percentage_ge_sources_in_se_sources": 33.33333333333333 }, { "model_name": "deepseek-reasoning-tavily", "query_id": 11, "query_type": "VACOS", "num_sources": 3, "unweighted_mean_score": 4.083333333333333, "weighted_total_content_score": 81.75438596491227, "semantic_relevance": 5.0, "factual_accuracy": 4.333333333333333, "freshness": 5.0, "objectivity_tone": 3.0, "layout_ad_density": 2.3333333333333335, "accountability": 4.333333333333333, "transparency": 4.0, "authority": 4.666666666666667, "avg_ge_freq": 1.0, "relative_se_rank": 1.6422764227642277, "normalized_reciprocal_se_rank": 0.263973063973064, "reciprocal_se_rank": 0.07313915857605179, "percentage_ge_sources_not_in_se_sources": 66.66666666666667, "percentage_ge_sources_in_se_sources": 33.33333333333333 }, { "model_name": "gpt-4o", "query_id": 99, "query_type": "QuoraQuestions", "num_sources": 3, "unweighted_mean_score": 4.041666666666667, "weighted_total_content_score": 81.75438596491227, "semantic_relevance": 5.0, "factual_accuracy": 4.0, "freshness": 4.666666666666667, "objectivity_tone": 4.0, "layout_ad_density": 3.0, "accountability": 4.0, "transparency": 3.6666666666666665, "authority": 4.0, "avg_ge_freq": 0.3333, "relative_se_rank": 1.7235772357723576, "normalized_reciprocal_se_rank": 0.07901234567901234, "reciprocal_se_rank": 0.028694714131607332, "percentage_ge_sources_not_in_se_sources": 66.66666666666667, "percentage_ge_sources_in_se_sources": 33.33333333333333 }, { "model_name": "Gemini-3-Flash-Preview", "query_id": 40, "query_type": "DebateQA", "num_sources": 5, "unweighted_mean_score": 4.15, "weighted_total_content_score": 81.6842105263158, "semantic_relevance": 3.8, "factual_accuracy": 4.0, "freshness": 4.4, "objectivity_tone": 3.4, "layout_ad_density": 4.4, "accountability": 4.6, "transparency": 4.4, "authority": 4.2, "avg_ge_freq": 0.46665999999999996, "relative_se_rank": 2.025, "normalized_reciprocal_se_rank": 0.09595959595959597, "reciprocal_se_rank": 0.032766990291262135, "percentage_ge_sources_not_in_se_sources": 80.0, "percentage_ge_sources_in_se_sources": 20.0 }, { "model_name": "Gemini-3-Pro-Preview", "query_id": 19, "query_type": "VACOS", "num_sources": 5, "unweighted_mean_score": 4.05, "weighted_total_content_score": 81.6842105263158, "semantic_relevance": 4.4, "factual_accuracy": 4.6, "freshness": 5.0, "objectivity_tone": 3.8, "layout_ad_density": 2.8, "accountability": 3.8, "transparency": 4.2, "authority": 3.8, "avg_ge_freq": 0.59998, "relative_se_rank": 1.9333333333333331, "normalized_reciprocal_se_rank": 0.08439955106621773, "reciprocal_se_rank": 0.029989212513484353, "percentage_ge_sources_not_in_se_sources": 80.0, "percentage_ge_sources_in_se_sources": 20.0 }, { "model_name": "claude", "query_id": 19, "query_type": "VACOS", "num_sources": 5, "unweighted_mean_score": 4.075, "weighted_total_content_score": 81.6842105263158, "semantic_relevance": 4.4, "factual_accuracy": 4.4, "freshness": 5.0, "objectivity_tone": 3.6, "layout_ad_density": 2.4, "accountability": 4.4, "transparency": 4.2, "authority": 4.2, "avg_ge_freq": 1.0, "relative_se_rank": 1.4428571428571426, "normalized_reciprocal_se_rank": 0.3583838383838384, "reciprocal_se_rank": 0.0958252427184466, "percentage_ge_sources_not_in_se_sources": 60.0, "percentage_ge_sources_in_se_sources": 40.0 }, { "model_name": "google-search", "query_id": 71, "query_type": "Pinocchios", "num_sources": 5, "unweighted_mean_score": 4.025, "weighted_total_content_score": 81.6842105263158, "semantic_relevance": 4.2, "factual_accuracy": 5.0, "freshness": 2.2, "objectivity_tone": 4.0, "layout_ad_density": 3.6, "accountability": 4.4, "transparency": 4.4, "authority": 4.4, "avg_ge_freq": null, "relative_se_rank": null, "normalized_reciprocal_se_rank": null, "reciprocal_se_rank": null, "percentage_ge_sources_not_in_se_sources": null, "percentage_ge_sources_in_se_sources": null }, { "model_name": "tavily", "query_id": 92, "query_type": "QuoraQuestions", "num_sources": 5, "unweighted_mean_score": 4.15, "weighted_total_content_score": 81.6842105263158, "semantic_relevance": 2.6, "factual_accuracy": 4.6, "freshness": 5.0, "objectivity_tone": 4.0, "layout_ad_density": 3.0, "accountability": 4.8, "transparency": 4.4, "authority": 4.8, "avg_ge_freq": 1.0, "relative_se_rank": 0.19591836734693877, "normalized_reciprocal_se_rank": 0.3506484094203392, "reciprocal_se_rank": 0.09396648672964461, "percentage_ge_sources_not_in_se_sources": 0.0, "percentage_ge_sources_in_se_sources": 100.0 }, { "model_name": "Gemini-2.5-Flash-Preview", "query_id": 3, "query_type": "VACOS", "num_sources": 5, "unweighted_mean_score": 4.075, "weighted_total_content_score": 81.68421052631578, "semantic_relevance": 3.8, "factual_accuracy": 4.6, "freshness": 4.4, "objectivity_tone": 4.0, "layout_ad_density": 2.8, "accountability": 4.8, "transparency": 4.2, "authority": 4.0, "avg_ge_freq": null, "relative_se_rank": null, "normalized_reciprocal_se_rank": null, "reciprocal_se_rank": null, "percentage_ge_sources_not_in_se_sources": null, "percentage_ge_sources_in_se_sources": null }, { "model_name": "exa", "query_id": 3, "query_type": "VACOS", "num_sources": 5, "unweighted_mean_score": 4.125, "weighted_total_content_score": 81.68421052631578, "semantic_relevance": 3.2, "factual_accuracy": 4.6, "freshness": 5.0, "objectivity_tone": 3.8, "layout_ad_density": 2.6, "accountability": 4.4, "transparency": 4.8, "authority": 4.6, "avg_ge_freq": 1.0, "relative_se_rank": 1.8363636363636366, "normalized_reciprocal_se_rank": 0.11082251082251082, "reciprocal_se_rank": 0.0363384188626907, "percentage_ge_sources_not_in_se_sources": 80.0, "percentage_ge_sources_in_se_sources": 20.0 }, { "model_name": "Gemini-2.5-Flash-Preview", "query_id": 37, "query_type": "DebateQA", "num_sources": 5, "unweighted_mean_score": 4.075, "weighted_total_content_score": 81.47368421052632, "semantic_relevance": 4.4, "factual_accuracy": 4.2, "freshness": 4.4, "objectivity_tone": 3.6, "layout_ad_density": 3.4, "accountability": 3.6, "transparency": 4.8, "authority": 4.2, "avg_ge_freq": null, "relative_se_rank": null, "normalized_reciprocal_se_rank": null, "reciprocal_se_rank": null, "percentage_ge_sources_not_in_se_sources": null, "percentage_ge_sources_in_se_sources": null }, { "model_name": "Gemini-3-Flash-Preview", "query_id": 35, "query_type": "DebateQA", "num_sources": 5, "unweighted_mean_score": 4.075, "weighted_total_content_score": 81.47368421052632, "semantic_relevance": 3.8, "factual_accuracy": 4.4, "freshness": 5.0, "objectivity_tone": 4.0, "layout_ad_density": 3.2, "accountability": 4.0, "transparency": 4.0, "authority": 4.2, "avg_ge_freq": 0.39997999999999995, "relative_se_rank": 1.7434782608695651, "normalized_reciprocal_se_rank": 0.2, "reciprocal_se_rank": 0.05776699029126213, "percentage_ge_sources_not_in_se_sources": 80.0, "percentage_ge_sources_in_se_sources": 20.0 }, { "model_name": "Gemini-3-Pro-Preview", "query_id": 20, "query_type": "VACOS", "num_sources": 5, "unweighted_mean_score": 4.075, "weighted_total_content_score": 81.47368421052632, "semantic_relevance": 3.4, "factual_accuracy": 4.8, "freshness": 5.0, "objectivity_tone": 4.0, "layout_ad_density": 3.4, "accountability": 3.6, "transparency": 4.4, "authority": 4.0, "avg_ge_freq": 0.5333399999999999, "relative_se_rank": 2.272727272727273, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "Gemini-3-Pro-Preview", "query_id": 54, "query_type": "HotpotQA", "num_sources": 5, "unweighted_mean_score": 4.05, "weighted_total_content_score": 81.47368421052632, "semantic_relevance": 3.6, "factual_accuracy": 4.4, "freshness": 4.4, "objectivity_tone": 4.6, "layout_ad_density": 3.8, "accountability": 3.6, "transparency": 4.0, "authority": 4.0, "avg_ge_freq": 0.53332, "relative_se_rank": 1.7183673469387752, "normalized_reciprocal_se_rank": 0.026599326599326595, "reciprocal_se_rank": 0.016100323624595465, "percentage_ge_sources_not_in_se_sources": 80.0, "percentage_ge_sources_in_se_sources": 20.0 }, { "model_name": "exa", "query_id": 9, "query_type": "VACOS", "num_sources": 5, "unweighted_mean_score": 4.125, "weighted_total_content_score": 81.47368421052632, "semantic_relevance": 3.8, "factual_accuracy": 4.8, "freshness": 5.0, "objectivity_tone": 2.8, "layout_ad_density": 2.4, "accountability": 4.4, "transparency": 4.8, "authority": 5.0, "avg_ge_freq": 1.0, "relative_se_rank": 2.2222222222222223, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "grok-4.1-fast-non-reasoning", "query_id": 74, "query_type": "Pinocchios", "num_sources": 5, "unweighted_mean_score": 4.025, "weighted_total_content_score": 81.47368421052632, "semantic_relevance": 4.4, "factual_accuracy": 4.6, "freshness": 2.6, "objectivity_tone": 4.0, "layout_ad_density": 3.8, "accountability": 4.4, "transparency": 4.2, "authority": 4.2, "avg_ge_freq": 0.80002, "relative_se_rank": 1.7234042553191489, "normalized_reciprocal_se_rank": 0.09595959595959597, "reciprocal_se_rank": 0.032766990291262135, "percentage_ge_sources_not_in_se_sources": 80.0, "percentage_ge_sources_in_se_sources": 20.0 }, { "model_name": "tavily", "query_id": 13, "query_type": "VACOS", "num_sources": 5, "unweighted_mean_score": 4.1, "weighted_total_content_score": 81.47368421052632, "semantic_relevance": 3.4, "factual_accuracy": 4.6, "freshness": 4.4, "objectivity_tone": 3.8, "layout_ad_density": 2.8, "accountability": 4.2, "transparency": 4.8, "authority": 4.8, "avg_ge_freq": 0.80002, "relative_se_rank": 1.9285714285714284, "normalized_reciprocal_se_rank": 0.09595959595959597, "reciprocal_se_rank": 0.032766990291262135, "percentage_ge_sources_not_in_se_sources": 80.0, "percentage_ge_sources_in_se_sources": 20.0 }, { "model_name": "gpt-4o", "query_id": 17, "query_type": "VACOS", "num_sources": 5, "unweighted_mean_score": 4.025, "weighted_total_content_score": 81.4736842105263, "semantic_relevance": 4.8, "factual_accuracy": 4.4, "freshness": 5.0, "objectivity_tone": 3.8, "layout_ad_density": 2.4, "accountability": 4.0, "transparency": 4.0, "authority": 3.8, "avg_ge_freq": 0.46663999999999994, "relative_se_rank": 1.8837209302325584, "normalized_reciprocal_se_rank": 0.09595959595959597, "reciprocal_se_rank": 0.032766990291262135, "percentage_ge_sources_not_in_se_sources": 80.0, "percentage_ge_sources_in_se_sources": 20.0 }, { "model_name": "Perplexity-Sonar-Pro", "query_id": 16, "query_type": "VACOS", "num_sources": 3, "unweighted_mean_score": 4.125, "weighted_total_content_score": 81.40350877192982, "semantic_relevance": 3.6666666666666665, "factual_accuracy": 4.0, "freshness": 5.0, "objectivity_tone": 3.6666666666666665, "layout_ad_density": 3.6666666666666665, "accountability": 4.0, "transparency": 4.666666666666667, "authority": 4.333333333333333, "avg_ge_freq": 0.7777666666666666, "relative_se_rank": 1.73015873015873, "normalized_reciprocal_se_rank": 0.05258938592271926, "reciprocal_se_rank": 0.02234550778240098, "percentage_ge_sources_not_in_se_sources": 66.66666666666667, "percentage_ge_sources_in_se_sources": 33.33333333333333 }, { "model_name": "gensee", "query_id": 66, "query_type": "Pinocchios", "num_sources": 4, "unweighted_mean_score": 4.09375, "weighted_total_content_score": 81.3157894736842, "semantic_relevance": 3.5, "factual_accuracy": 4.5, "freshness": 3.5, "objectivity_tone": 3.75, "layout_ad_density": 4.0, "accountability": 4.5, "transparency": 4.75, "authority": 4.25, "avg_ge_freq": 0.583325, "relative_se_rank": 1.25, "normalized_reciprocal_se_rank": 0.3885281385281385, "reciprocal_se_rank": 0.10306865464632455, "percentage_ge_sources_not_in_se_sources": 50.0, "percentage_ge_sources_in_se_sources": 50.0 }, { "model_name": "Gemini-3-Flash-Preview", "query_id": 71, "query_type": "Pinocchios", "num_sources": 5, "unweighted_mean_score": 4.025, "weighted_total_content_score": 81.26315789473684, "semantic_relevance": 3.6, "factual_accuracy": 4.2, "freshness": 3.0, "objectivity_tone": 5.0, "layout_ad_density": 3.6, "accountability": 4.4, "transparency": 4.4, "authority": 4.0, "avg_ge_freq": 0.46663999999999994, "relative_se_rank": 2.1675675675675676, "normalized_reciprocal_se_rank": 0.2, "reciprocal_se_rank": 0.05776699029126213, "percentage_ge_sources_not_in_se_sources": 80.0, "percentage_ge_sources_in_se_sources": 20.0 }, { "model_name": "Gemini-3-Pro-Preview", "query_id": 55, "query_type": "HotpotQA", "num_sources": 5, "unweighted_mean_score": 4.235714285714286, "weighted_total_content_score": 81.26315789473684, "semantic_relevance": 1.8, "factual_accuracy": 4.4, "freshness": 5.0, "objectivity_tone": 5.0, "layout_ad_density": 4.75, "accountability": 4.0, "transparency": 4.6, "authority": 4.4, "avg_ge_freq": 0.66668, "relative_se_rank": 2.127659574468085, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "Perplexity-Sonar-Pro", "query_id": 5, "query_type": "VACOS", "num_sources": 5, "unweighted_mean_score": 4.05, "weighted_total_content_score": 81.26315789473684, "semantic_relevance": 4.0, "factual_accuracy": 4.8, "freshness": 5.0, "objectivity_tone": 3.6, "layout_ad_density": 3.8, "accountability": 3.2, "transparency": 4.0, "authority": 4.0, "avg_ge_freq": 1.0, "relative_se_rank": 2.3255813953488373, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "google-search", "query_id": 21, "query_type": "DebateQA", "num_sources": 5, "unweighted_mean_score": 4.0, "weighted_total_content_score": 81.26315789473684, "semantic_relevance": 5.0, "factual_accuracy": 4.4, "freshness": 3.6, "objectivity_tone": 3.8, "layout_ad_density": 3.0, "accountability": 4.2, "transparency": 4.2, "authority": 3.8, "avg_ge_freq": null, "relative_se_rank": null, "normalized_reciprocal_se_rank": null, "reciprocal_se_rank": null, "percentage_ge_sources_not_in_se_sources": null, "percentage_ge_sources_in_se_sources": null }, { "model_name": "Gemini-3-Flash-Preview", "query_id": 55, "query_type": "HotpotQA", "num_sources": 5, "unweighted_mean_score": 4.125, "weighted_total_content_score": 81.05263157894737, "semantic_relevance": 2.0, "factual_accuracy": 4.2, "freshness": 5.0, "objectivity_tone": 4.8, "layout_ad_density": 4.2, "accountability": 4.0, "transparency": 4.4, "authority": 4.4, "avg_ge_freq": 0.39997999999999995, "relative_se_rank": 2.127659574468085, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "exa", "query_id": 27, "query_type": "DebateQA", "num_sources": 5, "unweighted_mean_score": 4.1, "weighted_total_content_score": 81.05263157894737, "semantic_relevance": 3.8, "factual_accuracy": 4.0, "freshness": 5.0, "objectivity_tone": 3.6, "layout_ad_density": 3.8, "accountability": 4.0, "transparency": 4.2, "authority": 4.4, "avg_ge_freq": 1.0, "relative_se_rank": 2.0904761904761906, "normalized_reciprocal_se_rank": 0.011736411736411736, "reciprocal_se_rank": 0.012528895053166896, "percentage_ge_sources_not_in_se_sources": 80.0, "percentage_ge_sources_in_se_sources": 20.0 }, { "model_name": "gpt-4o", "query_id": 38, "query_type": "DebateQA", "num_sources": 5, "unweighted_mean_score": 4.0, "weighted_total_content_score": 81.05263157894737, "semantic_relevance": 5.0, "factual_accuracy": 4.0, "freshness": 5.0, "objectivity_tone": 4.0, "layout_ad_density": 3.4, "accountability": 3.6, "transparency": 3.6, "authority": 3.4, "avg_ge_freq": 0.53332, "relative_se_rank": 2.1739130434782608, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "gpt-4o", "query_id": 46, "query_type": "HotpotQA", "num_sources": 1, "unweighted_mean_score": 4.0, "weighted_total_content_score": 81.05263157894737, "semantic_relevance": 5.0, "factual_accuracy": 4.0, "freshness": 5.0, "objectivity_tone": 4.0, "layout_ad_density": 3.0, "accountability": 4.0, "transparency": 4.0, "authority": 3.0, "avg_ge_freq": 0.3333, "relative_se_rank": 2.2222222222222223, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "gpt-4o", "query_id": 69, "query_type": "Pinocchios", "num_sources": 1, "unweighted_mean_score": 4.0, "weighted_total_content_score": 81.05263157894737, "semantic_relevance": 5.0, "factual_accuracy": 4.0, "freshness": 2.0, "objectivity_tone": 4.0, "layout_ad_density": 4.0, "accountability": 4.0, "transparency": 4.0, "authority": 5.0, "avg_ge_freq": 0.3333, "relative_se_rank": 0.06666666666666667, "normalized_reciprocal_se_rank": 1.0, "reciprocal_se_rank": 0.25, "percentage_ge_sources_not_in_se_sources": 0.0, "percentage_ge_sources_in_se_sources": 100.0 }, { "model_name": "gpt-4o", "query_id": 92, "query_type": "QuoraQuestions", "num_sources": 5, "unweighted_mean_score": 4.075, "weighted_total_content_score": 81.05263157894737, "semantic_relevance": 3.0, "factual_accuracy": 4.6, "freshness": 5.0, "objectivity_tone": 4.2, "layout_ad_density": 2.6, "accountability": 5.0, "transparency": 4.0, "authority": 4.2, "avg_ge_freq": 0.3333, "relative_se_rank": 1.073469387755102, "normalized_reciprocal_se_rank": 0.17599360533291103, "reciprocal_se_rank": 0.051998463417374255, "percentage_ge_sources_not_in_se_sources": 40.0, "percentage_ge_sources_in_se_sources": 60.0 }, { "model_name": "gpt-5", "query_id": 5, "query_type": "VACOS", "num_sources": 4, "unweighted_mean_score": 4.0625, "weighted_total_content_score": 81.05263157894737, "semantic_relevance": 3.5, "factual_accuracy": 5.0, "freshness": 5.0, "objectivity_tone": 3.5, "layout_ad_density": 3.0, "accountability": 3.5, "transparency": 4.75, "authority": 4.25, "avg_ge_freq": 0.916675, "relative_se_rank": 2.3255813953488373, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "grok-4.1-fast-non-reasoning", "query_id": 11, "query_type": "VACOS", "num_sources": 2, "unweighted_mean_score": 4.0625, "weighted_total_content_score": 81.05263157894737, "semantic_relevance": 5.0, "factual_accuracy": 4.0, "freshness": 5.0, "objectivity_tone": 3.0, "layout_ad_density": 2.5, "accountability": 4.5, "transparency": 4.0, "authority": 4.5, "avg_ge_freq": 0.3333, "relative_se_rank": 2.4390243902439024, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "grok-4.1-fast-non-reasoning", "query_id": 69, "query_type": "Pinocchios", "num_sources": 1, "unweighted_mean_score": 4.0, "weighted_total_content_score": 81.05263157894737, "semantic_relevance": 5.0, "factual_accuracy": 4.0, "freshness": 2.0, "objectivity_tone": 4.0, "layout_ad_density": 4.0, "accountability": 4.0, "transparency": 4.0, "authority": 5.0, "avg_ge_freq": 1.0, "relative_se_rank": 0.06666666666666667, "normalized_reciprocal_se_rank": 1.0, "reciprocal_se_rank": 0.25, "percentage_ge_sources_not_in_se_sources": 0.0, "percentage_ge_sources_in_se_sources": 100.0 }, { "model_name": "grok-4.1-fast-non-reasoning", "query_id": 92, "query_type": "QuoraQuestions", "num_sources": 4, "unweighted_mean_score": 4.03125, "weighted_total_content_score": 81.05263157894737, "semantic_relevance": 4.5, "factual_accuracy": 4.25, "freshness": 4.0, "objectivity_tone": 3.75, "layout_ad_density": 3.75, "accountability": 4.25, "transparency": 4.0, "authority": 3.75, "avg_ge_freq": 1.0, "relative_se_rank": 1.545918367346939, "normalized_reciprocal_se_rank": 0.16329966329966328, "reciprocal_se_rank": 0.048948220064724914, "percentage_ge_sources_not_in_se_sources": 75.0, "percentage_ge_sources_in_se_sources": 25.0 }, { "model_name": "grok-4.1-fast-non-reasoning", "query_id": 97, "query_type": "QuoraQuestions", "num_sources": 2, "unweighted_mean_score": 4.0625, "weighted_total_content_score": 81.05263157894737, "semantic_relevance": 4.5, "factual_accuracy": 4.5, "freshness": 5.0, "objectivity_tone": 3.0, "layout_ad_density": 4.0, "accountability": 3.5, "transparency": 4.0, "authority": 4.0, "avg_ge_freq": 0.83335, "relative_se_rank": 1.2625, "normalized_reciprocal_se_rank": 0.5, "reciprocal_se_rank": 0.12985436893203883, "percentage_ge_sources_not_in_se_sources": 50.0, "percentage_ge_sources_in_se_sources": 50.0 }, { "model_name": "tavily", "query_id": 94, "query_type": "QuoraQuestions", "num_sources": 5, "unweighted_mean_score": 4.025, "weighted_total_content_score": 81.05263157894737, "semantic_relevance": 4.6, "factual_accuracy": 4.2, "freshness": 5.0, "objectivity_tone": 3.8, "layout_ad_density": 2.4, "accountability": 4.4, "transparency": 3.8, "authority": 4.0, "avg_ge_freq": 1.0, "relative_se_rank": 1.1400000000000001, "normalized_reciprocal_se_rank": 0.29733700642791555, "reciprocal_se_rank": 0.08115622241835835, "percentage_ge_sources_not_in_se_sources": 40.0, "percentage_ge_sources_in_se_sources": 60.0 }, { "model_name": "Gemini-3-Flash-Preview", "query_id": 16, "query_type": "VACOS", "num_sources": 5, "unweighted_mean_score": 4.125, "weighted_total_content_score": 81.05263157894736, "semantic_relevance": 2.6, "factual_accuracy": 4.0, "freshness": 5.0, "objectivity_tone": 4.4, "layout_ad_density": 3.2, "accountability": 4.8, "transparency": 4.4, "authority": 4.6, "avg_ge_freq": 0.6667, "relative_se_rank": 2.380952380952381, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "Gemini-3-Pro-Preview", "query_id": 38, "query_type": "DebateQA", "num_sources": 5, "unweighted_mean_score": 4.128571428571428, "weighted_total_content_score": 81.05263157894736, "semantic_relevance": 4.6, "factual_accuracy": 4.4, "freshness": 4.2, "objectivity_tone": 3.6, "layout_ad_density": 3.75, "accountability": 3.8, "transparency": 4.6, "authority": 4.0, "avg_ge_freq": 0.46665999999999996, "relative_se_rank": 1.5217391304347827, "normalized_reciprocal_se_rank": 0.11972789115646258, "reciprocal_se_rank": 0.038478303942936394, "percentage_ge_sources_not_in_se_sources": 60.0, "percentage_ge_sources_in_se_sources": 40.0 }, { "model_name": "gensee", "query_id": 71, "query_type": "Pinocchios", "num_sources": 3, "unweighted_mean_score": 4.0, "weighted_total_content_score": 81.05263157894736, "semantic_relevance": 4.0, "factual_accuracy": 4.333333333333333, "freshness": 3.0, "objectivity_tone": 4.666666666666667, "layout_ad_density": 4.333333333333333, "accountability": 4.0, "transparency": 4.0, "authority": 3.6666666666666665, "avg_ge_freq": 0.5555333333333333, "relative_se_rank": 1.8108108108108107, "normalized_reciprocal_se_rank": 0.3333333333333333, "reciprocal_se_rank": 0.08980582524271845, "percentage_ge_sources_not_in_se_sources": 66.66666666666667, "percentage_ge_sources_in_se_sources": 33.33333333333333 }, { "model_name": "google-search", "query_id": 41, "query_type": "HotpotQA", "num_sources": 3, "unweighted_mean_score": 4.041666666666667, "weighted_total_content_score": 81.05263157894736, "semantic_relevance": 3.6666666666666665, "factual_accuracy": 4.666666666666667, "freshness": 2.0, "objectivity_tone": 4.0, "layout_ad_density": 4.666666666666667, "accountability": 4.0, "transparency": 4.666666666666667, "authority": 4.666666666666667, "avg_ge_freq": null, "relative_se_rank": null, "normalized_reciprocal_se_rank": null, "reciprocal_se_rank": null, "percentage_ge_sources_not_in_se_sources": null, "percentage_ge_sources_in_se_sources": null }, { "model_name": "google-search", "query_id": 66, "query_type": "Pinocchios", "num_sources": 5, "unweighted_mean_score": 4.1, "weighted_total_content_score": 81.05263157894736, "semantic_relevance": 3.4, "factual_accuracy": 4.4, "freshness": 3.0, "objectivity_tone": 3.6, "layout_ad_density": 4.8, "accountability": 4.4, "transparency": 4.6, "authority": 4.6, "avg_ge_freq": null, "relative_se_rank": null, "normalized_reciprocal_se_rank": null, "reciprocal_se_rank": null, "percentage_ge_sources_not_in_se_sources": null, "percentage_ge_sources_in_se_sources": null }, { "model_name": "deepseek-reasoning-gensee", "query_id": 92, "query_type": "QuoraQuestions", "num_sources": 5, "unweighted_mean_score": 4.1, "weighted_total_content_score": 80.84210526315789, "semantic_relevance": 2.6, "factual_accuracy": 4.6, "freshness": 5.0, "objectivity_tone": 4.0, "layout_ad_density": 2.6, "accountability": 5.0, "transparency": 4.4, "authority": 4.6, "avg_ge_freq": 1.0, "relative_se_rank": 0.26122448979591834, "normalized_reciprocal_se_rank": 0.29146097956624273, "reciprocal_se_rank": 0.0797442645074224, "percentage_ge_sources_not_in_se_sources": 0.0, "percentage_ge_sources_in_se_sources": 100.0 }, { "model_name": "gpt-4o", "query_id": 90, "query_type": "QuoraQuestions", "num_sources": 5, "unweighted_mean_score": 4.025, "weighted_total_content_score": 80.84210526315789, "semantic_relevance": 4.4, "factual_accuracy": 4.2, "freshness": 4.4, "objectivity_tone": 3.8, "layout_ad_density": 3.6, "accountability": 3.6, "transparency": 4.4, "authority": 3.8, "avg_ge_freq": 0.39997999999999995, "relative_se_rank": 1.5155555555555555, "normalized_reciprocal_se_rank": 0.060542681719152315, "reciprocal_se_rank": 0.024256615267466208, "percentage_ge_sources_not_in_se_sources": 60.0, "percentage_ge_sources_in_se_sources": 40.0 }, { "model_name": "grok-4.1-fast-non-reasoning", "query_id": 21, "query_type": "DebateQA", "num_sources": 5, "unweighted_mean_score": 4.0, "weighted_total_content_score": 80.84210526315789, "semantic_relevance": 4.8, "factual_accuracy": 4.2, "freshness": 3.2, "objectivity_tone": 3.8, "layout_ad_density": 3.4, "accountability": 4.2, "transparency": 4.2, "authority": 4.2, "avg_ge_freq": 0.6667, "relative_se_rank": 0.1625, "normalized_reciprocal_se_rank": 0.5742258933063531, "reciprocal_se_rank": 0.1476902025177887, "percentage_ge_sources_not_in_se_sources": 0.0, "percentage_ge_sources_in_se_sources": 100.0 }, { "model_name": "gensee", "query_id": 92, "query_type": "QuoraQuestions", "num_sources": 4, "unweighted_mean_score": 4.09375, "weighted_total_content_score": 80.78947368421052, "semantic_relevance": 2.5, "factual_accuracy": 4.75, "freshness": 5.0, "objectivity_tone": 4.0, "layout_ad_density": 3.0, "accountability": 4.75, "transparency": 4.5, "authority": 4.25, "avg_ge_freq": 0.75, "relative_se_rank": 1.1020408163265305, "normalized_reciprocal_se_rank": 0.2182239057239057, "reciprocal_se_rank": 0.0621460355987055, "percentage_ge_sources_not_in_se_sources": 50.0, "percentage_ge_sources_in_se_sources": 50.0 }, { "model_name": "gensee", "query_id": 57, "query_type": "HotpotQA", "num_sources": 3, "unweighted_mean_score": 3.9583333333333335, "weighted_total_content_score": 80.70175438596492, "semantic_relevance": 5.0, "factual_accuracy": 4.333333333333333, "freshness": 5.0, "objectivity_tone": 4.0, "layout_ad_density": 2.3333333333333335, "accountability": 3.0, "transparency": 4.0, "authority": 4.0, "avg_ge_freq": 0.3333, "relative_se_rank": 1.7588652482269502, "normalized_reciprocal_se_rank": 0.013732092163464714, "reciprocal_se_rank": 0.013008439621803413, "percentage_ge_sources_not_in_se_sources": 66.66666666666667, "percentage_ge_sources_in_se_sources": 33.33333333333333 }, { "model_name": "tavily", "query_id": 34, "query_type": "DebateQA", "num_sources": 3, "unweighted_mean_score": 4.125, "weighted_total_content_score": 80.70175438596492, "semantic_relevance": 2.0, "factual_accuracy": 4.333333333333333, "freshness": 5.0, "objectivity_tone": 4.333333333333333, "layout_ad_density": 4.0, "accountability": 4.0, "transparency": 4.666666666666667, "authority": 4.666666666666667, "avg_ge_freq": 1.0, "relative_se_rank": 0.7000000000000001, "normalized_reciprocal_se_rank": 0.518037518037518, "reciprocal_se_rank": 0.1341886269070735, "percentage_ge_sources_not_in_se_sources": 33.333333333333336, "percentage_ge_sources_in_se_sources": 66.66666666666666 }, { "model_name": "gpt-4o", "query_id": 14, "query_type": "VACOS", "num_sources": 5, "unweighted_mean_score": 4.05, "weighted_total_content_score": 80.63157894736842, "semantic_relevance": 3.8, "factual_accuracy": 4.2, "freshness": 5.0, "objectivity_tone": 3.8, "layout_ad_density": 3.4, "accountability": 3.4, "transparency": 4.4, "authority": 4.4, "avg_ge_freq": 0.5333399999999999, "relative_se_rank": 1.8044444444444445, "normalized_reciprocal_se_rank": 0.08439955106621773, "reciprocal_se_rank": 0.029989212513484353, "percentage_ge_sources_not_in_se_sources": 80.0, "percentage_ge_sources_in_se_sources": 20.0 }, { "model_name": "Gemini-2.5-Flash-Preview", "query_id": 9, "query_type": "VACOS", "num_sources": 5, "unweighted_mean_score": 4.0, "weighted_total_content_score": 80.63157894736841, "semantic_relevance": 4.4, "factual_accuracy": 4.4, "freshness": 5.0, "objectivity_tone": 3.8, "layout_ad_density": 2.6, "accountability": 4.0, "transparency": 3.8, "authority": 4.0, "avg_ge_freq": null, "relative_se_rank": null, "normalized_reciprocal_se_rank": null, "reciprocal_se_rank": null, "percentage_ge_sources_not_in_se_sources": null, "percentage_ge_sources_in_se_sources": null }, { "model_name": "Gemini-3-Flash-Preview", "query_id": 3, "query_type": "VACOS", "num_sources": 5, "unweighted_mean_score": 4.075, "weighted_total_content_score": 80.63157894736841, "semantic_relevance": 2.8, "factual_accuracy": 4.6, "freshness": 5.0, "objectivity_tone": 4.0, "layout_ad_density": 2.6, "accountability": 5.0, "transparency": 4.2, "authority": 4.4, "avg_ge_freq": 0.39997999999999995, "relative_se_rank": 1.8954545454545457, "normalized_reciprocal_se_rank": 0.03353535353535354, "reciprocal_se_rank": 0.017766990291262136, "percentage_ge_sources_not_in_se_sources": 80.0, "percentage_ge_sources_in_se_sources": 20.0 }, { "model_name": "Gemini-3-Flash-Preview", "query_id": 17, "query_type": "VACOS", "num_sources": 5, "unweighted_mean_score": 4.05, "weighted_total_content_score": 80.63157894736841, "semantic_relevance": 3.8, "factual_accuracy": 4.2, "freshness": 5.0, "objectivity_tone": 3.8, "layout_ad_density": 3.0, "accountability": 4.4, "transparency": 4.2, "authority": 4.0, "avg_ge_freq": 0.46663999999999994, "relative_se_rank": 1.4232558139534883, "normalized_reciprocal_se_rank": 0.2692063492063492, "reciprocal_se_rank": 0.07439667128987518, "percentage_ge_sources_not_in_se_sources": 60.0, "percentage_ge_sources_in_se_sources": 40.0 }, { "model_name": "Gemini-3-Pro-Preview", "query_id": 57, "query_type": "HotpotQA", "num_sources": 5, "unweighted_mean_score": 3.975, "weighted_total_content_score": 80.63157894736841, "semantic_relevance": 5.0, "factual_accuracy": 4.2, "freshness": 5.0, "objectivity_tone": 3.8, "layout_ad_density": 3.0, "accountability": 3.8, "transparency": 3.6, "authority": 3.4, "avg_ge_freq": 0.46665999999999996, "relative_se_rank": 1.4978723404255319, "normalized_reciprocal_se_rank": 0.11906176612058965, "reciprocal_se_rank": 0.03831823991732615, "percentage_ge_sources_not_in_se_sources": 60.0, "percentage_ge_sources_in_se_sources": 40.0 }, { "model_name": "exa", "query_id": 48, "query_type": "HotpotQA", "num_sources": 5, "unweighted_mean_score": 4.1, "weighted_total_content_score": 80.63157894736841, "semantic_relevance": 2.0, "factual_accuracy": 4.2, "freshness": 5.0, "objectivity_tone": 4.8, "layout_ad_density": 3.4, "accountability": 3.8, "transparency": 4.8, "authority": 4.8, "avg_ge_freq": 1.0, "relative_se_rank": 1.3954545454545457, "normalized_reciprocal_se_rank": 0.24146224146224143, "reciprocal_se_rank": 0.06773000462320851, "percentage_ge_sources_not_in_se_sources": 60.0, "percentage_ge_sources_in_se_sources": 40.0 }, { "model_name": "gensee", "query_id": 2, "query_type": "VACOS", "num_sources": 5, "unweighted_mean_score": 3.975, "weighted_total_content_score": 80.63157894736841, "semantic_relevance": 4.6, "factual_accuracy": 4.6, "freshness": 5.0, "objectivity_tone": 3.8, "layout_ad_density": 2.8, "accountability": 3.8, "transparency": 3.6, "authority": 3.6, "avg_ge_freq": 0.3333, "relative_se_rank": 2.2222222222222223, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "gensee", "query_id": 68, "query_type": "Pinocchios", "num_sources": 5, "unweighted_mean_score": 4.1, "weighted_total_content_score": 80.63157894736841, "semantic_relevance": 3.2, "factual_accuracy": 4.4, "freshness": 5.0, "objectivity_tone": 3.4, "layout_ad_density": 4.2, "accountability": 3.8, "transparency": 4.4, "authority": 4.4, "avg_ge_freq": 0.46665999999999996, "relative_se_rank": 1.872727272727273, "normalized_reciprocal_se_rank": 0.04740740740740741, "reciprocal_se_rank": 0.021100323624595466, "percentage_ge_sources_not_in_se_sources": 80.0, "percentage_ge_sources_in_se_sources": 20.0 }, { "model_name": "gensee", "query_id": 93, "query_type": "QuoraQuestions", "num_sources": 5, "unweighted_mean_score": 4.0, "weighted_total_content_score": 80.63157894736841, "semantic_relevance": 4.8, "factual_accuracy": 3.8, "freshness": 4.6, "objectivity_tone": 4.0, "layout_ad_density": 4.0, "accountability": 3.4, "transparency": 3.6, "authority": 3.8, "avg_ge_freq": 0.46665999999999996, "relative_se_rank": 1.6090909090909093, "normalized_reciprocal_se_rank": 0.05782267115600449, "reciprocal_se_rank": 0.023603020496224376, "percentage_ge_sources_not_in_se_sources": 60.0, "percentage_ge_sources_in_se_sources": 40.0 }, { "model_name": "google-search", "query_id": 86, "query_type": "QuoraQuestions", "num_sources": 5, "unweighted_mean_score": 4.0, "weighted_total_content_score": 80.63157894736841, "semantic_relevance": 5.0, "factual_accuracy": 4.0, "freshness": 3.6, "objectivity_tone": 3.6, "layout_ad_density": 3.8, "accountability": 3.6, "transparency": 4.4, "authority": 4.0, "avg_ge_freq": null, "relative_se_rank": null, "normalized_reciprocal_se_rank": null, "reciprocal_se_rank": null, "percentage_ge_sources_not_in_se_sources": null, "percentage_ge_sources_in_se_sources": null }, { "model_name": "Perplexity-Sonar-Pro", "query_id": 20, "query_type": "VACOS", "num_sources": 4, "unweighted_mean_score": 4.0, "weighted_total_content_score": 80.52631578947368, "semantic_relevance": 3.75, "factual_accuracy": 4.75, "freshness": 5.0, "objectivity_tone": 4.0, "layout_ad_density": 3.25, "accountability": 3.25, "transparency": 4.0, "authority": 4.0, "avg_ge_freq": 1.0, "relative_se_rank": 2.272727272727273, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "Perplexity-Sonar-Pro", "query_id": 75, "query_type": "Pinocchios", "num_sources": 2, "unweighted_mean_score": 4.0, "weighted_total_content_score": 80.52631578947368, "semantic_relevance": 4.0, "factual_accuracy": 4.5, "freshness": 3.0, "objectivity_tone": 4.0, "layout_ad_density": 3.5, "accountability": 5.0, "transparency": 4.0, "authority": 4.0, "avg_ge_freq": 1.0, "relative_se_rank": 0.3404255319148936, "normalized_reciprocal_se_rank": 0.1842286501377411, "reciprocal_se_rank": 0.05397727272727273, "percentage_ge_sources_not_in_se_sources": 0.0, "percentage_ge_sources_in_se_sources": 100.0 }, { "model_name": "deepseek-chat-gensee", "query_id": 41, "query_type": "HotpotQA", "num_sources": 4, "unweighted_mean_score": 4.03125, "weighted_total_content_score": 80.52631578947368, "semantic_relevance": 3.0, "factual_accuracy": 4.5, "freshness": 5.0, "objectivity_tone": 4.5, "layout_ad_density": 4.75, "accountability": 2.5, "transparency": 4.0, "authority": 4.0, "avg_ge_freq": 1.0, "relative_se_rank": 4.545454545454546, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "deepseek-chat-tavily", "query_id": 41, "query_type": "HotpotQA", "num_sources": 4, "unweighted_mean_score": 4.03125, "weighted_total_content_score": 80.52631578947368, "semantic_relevance": 3.25, "factual_accuracy": 4.5, "freshness": 4.25, "objectivity_tone": 4.25, "layout_ad_density": 4.25, "accountability": 2.75, "transparency": 4.5, "authority": 4.5, "avg_ge_freq": 1.0, "relative_se_rank": 4.545454545454546, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "deepseek-chat-tavily", "query_id": 62, "query_type": "Pinocchios", "num_sources": 4, "unweighted_mean_score": 4.21875, "weighted_total_content_score": 80.52631578947368, "semantic_relevance": 3.0, "factual_accuracy": 3.0, "freshness": 5.0, "objectivity_tone": 3.0, "layout_ad_density": 4.75, "accountability": 5.0, "transparency": 5.0, "authority": 5.0, "avg_ge_freq": 1.0, "relative_se_rank": 2.024390243902439, "normalized_reciprocal_se_rank": 0.019624819624819625, "reciprocal_se_rank": 0.014424410540915394, "percentage_ge_sources_not_in_se_sources": 75.0, "percentage_ge_sources_in_se_sources": 25.0 }, { "model_name": "gpt-4o", "query_id": 97, "query_type": "QuoraQuestions", "num_sources": 4, "unweighted_mean_score": 4.0, "weighted_total_content_score": 80.52631578947368, "semantic_relevance": 5.0, "factual_accuracy": 4.25, "freshness": 4.5, "objectivity_tone": 3.25, "layout_ad_density": 3.0, "accountability": 4.5, "transparency": 4.0, "authority": 3.5, "avg_ge_freq": 0.3333, "relative_se_rank": 2.5, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "Gemini-3-Flash-Preview", "query_id": 57, "query_type": "HotpotQA", "num_sources": 5, "unweighted_mean_score": 4.0, "weighted_total_content_score": 80.42105263157893, "semantic_relevance": 4.4, "factual_accuracy": 4.0, "freshness": 4.2, "objectivity_tone": 4.0, "layout_ad_density": 2.8, "accountability": 4.4, "transparency": 4.0, "authority": 4.2, "avg_ge_freq": 0.46665999999999996, "relative_se_rank": 1.4978723404255319, "normalized_reciprocal_se_rank": 0.11906176612058965, "reciprocal_se_rank": 0.03831823991732615, "percentage_ge_sources_not_in_se_sources": 60.0, "percentage_ge_sources_in_se_sources": 40.0 }, { "model_name": "exa", "query_id": 17, "query_type": "VACOS", "num_sources": 3, "unweighted_mean_score": 3.9583333333333335, "weighted_total_content_score": 80.35087719298245, "semantic_relevance": 4.0, "factual_accuracy": 5.0, "freshness": 5.0, "objectivity_tone": 4.0, "layout_ad_density": 2.0, "accountability": 3.0, "transparency": 4.333333333333333, "authority": 4.333333333333333, "avg_ge_freq": 1.0, "relative_se_rank": 1.5813953488372094, "normalized_reciprocal_se_rank": 0.1847041847041847, "reciprocal_se_rank": 0.05409153952843273, "percentage_ge_sources_not_in_se_sources": 66.66666666666667, "percentage_ge_sources_in_se_sources": 33.33333333333333 }, { "model_name": "gpt-4o", "query_id": 13, "query_type": "VACOS", "num_sources": 3, "unweighted_mean_score": 3.9583333333333335, "weighted_total_content_score": 80.35087719298245, "semantic_relevance": 4.333333333333333, "factual_accuracy": 4.666666666666667, "freshness": 5.0, "objectivity_tone": 4.0, "layout_ad_density": 2.0, "accountability": 4.0, "transparency": 3.6666666666666665, "authority": 4.0, "avg_ge_freq": 0.3333, "relative_se_rank": 2.380952380952381, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "gpt-5", "query_id": 11, "query_type": "VACOS", "num_sources": 3, "unweighted_mean_score": 3.9166666666666665, "weighted_total_content_score": 80.35087719298245, "semantic_relevance": 4.333333333333333, "factual_accuracy": 4.666666666666667, "freshness": 5.0, "objectivity_tone": 4.666666666666667, "layout_ad_density": 2.0, "accountability": 2.6666666666666665, "transparency": 3.6666666666666665, "authority": 4.333333333333333, "avg_ge_freq": 0.7778, "relative_se_rank": 1.6422764227642277, "normalized_reciprocal_se_rank": 0.263973063973064, "reciprocal_se_rank": 0.07313915857605179, "percentage_ge_sources_not_in_se_sources": 66.66666666666667, "percentage_ge_sources_in_se_sources": 33.33333333333333 }, { "model_name": "Perplexity-Sonar-Pro", "query_id": 25, "query_type": "DebateQA", "num_sources": 4, "unweighted_mean_score": 4.0, "weighted_total_content_score": 80.26315789473685, "semantic_relevance": 3.75, "factual_accuracy": 4.25, "freshness": 3.0, "objectivity_tone": 4.25, "layout_ad_density": 3.5, "accountability": 4.25, "transparency": 4.5, "authority": 4.5, "avg_ge_freq": 0.49997499999999995, "relative_se_rank": 1.3841463414634145, "normalized_reciprocal_se_rank": 0.11184926184926186, "reciprocal_se_rank": 0.03658513816280806, "percentage_ge_sources_not_in_se_sources": 50.0, "percentage_ge_sources_in_se_sources": 50.0 }, { "model_name": "deepseek-reasoning-gensee", "query_id": 71, "query_type": "Pinocchios", "num_sources": 4, "unweighted_mean_score": 4.0625, "weighted_total_content_score": 80.26315789473685, "semantic_relevance": 3.0, "factual_accuracy": 4.25, "freshness": 4.5, "objectivity_tone": 4.0, "layout_ad_density": 4.0, "accountability": 4.25, "transparency": 4.5, "authority": 4.0, "avg_ge_freq": 1.0, "relative_se_rank": 2.0337837837837838, "normalized_reciprocal_se_rank": 0.25, "reciprocal_se_rank": 0.06978155339805825, "percentage_ge_sources_not_in_se_sources": 75.0, "percentage_ge_sources_in_se_sources": 25.0 }, { "model_name": "Gemini-3-Flash-Preview", "query_id": 67, "query_type": "Pinocchios", "num_sources": 5, "unweighted_mean_score": 3.925, "weighted_total_content_score": 80.21052631578947, "semantic_relevance": 4.8, "factual_accuracy": 4.8, "freshness": 2.8, "objectivity_tone": 3.8, "layout_ad_density": 3.8, "accountability": 3.8, "transparency": 3.8, "authority": 3.8, "avg_ge_freq": 0.3333, "relative_se_rank": 1.076595744680851, "normalized_reciprocal_se_rank": 0.17082661237699998, "reciprocal_se_rank": 0.05075688015855097, "percentage_ge_sources_not_in_se_sources": 40.0, "percentage_ge_sources_in_se_sources": 60.0 }, { "model_name": "Perplexity-Sonar-Pro", "query_id": 46, "query_type": "HotpotQA", "num_sources": 5, "unweighted_mean_score": 4.05, "weighted_total_content_score": 80.21052631578947, "semantic_relevance": 4.0, "factual_accuracy": 3.8, "freshness": 4.8, "objectivity_tone": 3.6, "layout_ad_density": 3.4, "accountability": 4.0, "transparency": 4.2, "authority": 4.6, "avg_ge_freq": 0.66668, "relative_se_rank": 0.47111111111111115, "normalized_reciprocal_se_rank": 0.3087696286536866, "reciprocal_se_rank": 0.08390338164251207, "percentage_ge_sources_not_in_se_sources": 0.0, "percentage_ge_sources_in_se_sources": 100.0 }, { "model_name": "gpt-4o", "query_id": 5, "query_type": "VACOS", "num_sources": 5, "unweighted_mean_score": 3.975, "weighted_total_content_score": 80.21052631578947, "semantic_relevance": 4.0, "factual_accuracy": 4.6, "freshness": 4.6, "objectivity_tone": 4.0, "layout_ad_density": 3.2, "accountability": 3.4, "transparency": 4.0, "authority": 4.0, "avg_ge_freq": 0.3333, "relative_se_rank": 1.888372093023256, "normalized_reciprocal_se_rank": 0.08439955106621773, "reciprocal_se_rank": 0.029989212513484353, "percentage_ge_sources_not_in_se_sources": 80.0, "percentage_ge_sources_in_se_sources": 20.0 }, { "model_name": "gpt-4o", "query_id": 36, "query_type": "DebateQA", "num_sources": 5, "unweighted_mean_score": 3.975, "weighted_total_content_score": 80.21052631578945, "semantic_relevance": 5.0, "factual_accuracy": 4.0, "freshness": 4.8, "objectivity_tone": 3.6, "layout_ad_density": 3.4, "accountability": 4.2, "transparency": 3.8, "authority": 3.0, "avg_ge_freq": 0.46663999999999994, "relative_se_rank": 2.048780487804878, "normalized_reciprocal_se_rank": 0.028107158541941152, "reciprocal_se_rank": 0.016462642465175176, "percentage_ge_sources_not_in_se_sources": 80.0, "percentage_ge_sources_in_se_sources": 20.0 }, { "model_name": "Gemini-2.5-Flash-Preview", "query_id": 51, "query_type": "HotpotQA", "num_sources": 4, "unweighted_mean_score": 4.03125, "weighted_total_content_score": 80.0, "semantic_relevance": 3.5, "factual_accuracy": 4.25, "freshness": 4.25, "objectivity_tone": 3.75, "layout_ad_density": 4.25, "accountability": 4.25, "transparency": 4.0, "authority": 4.0, "avg_ge_freq": null, "relative_se_rank": null, "normalized_reciprocal_se_rank": null, "reciprocal_se_rank": null, "percentage_ge_sources_not_in_se_sources": null, "percentage_ge_sources_in_se_sources": null }, { "model_name": "Perplexity-Sonar-Pro", "query_id": 64, "query_type": "Pinocchios", "num_sources": 4, "unweighted_mean_score": 4.03125, "weighted_total_content_score": 80.0, "semantic_relevance": 3.0, "factual_accuracy": 4.75, "freshness": 4.0, "objectivity_tone": 3.75, "layout_ad_density": 3.5, "accountability": 4.5, "transparency": 4.25, "authority": 4.5, "avg_ge_freq": 0.66665, "relative_se_rank": 1.7916666666666665, "normalized_reciprocal_se_rank": 0.25, "reciprocal_se_rank": 0.06978155339805825, "percentage_ge_sources_not_in_se_sources": 75.0, "percentage_ge_sources_in_se_sources": 25.0 }, { "model_name": "gensee", "query_id": 49, "query_type": "HotpotQA", "num_sources": 2, "unweighted_mean_score": 3.9375, "weighted_total_content_score": 80.0, "semantic_relevance": 5.0, "factual_accuracy": 4.0, "freshness": 5.0, "objectivity_tone": 4.0, "layout_ad_density": 3.5, "accountability": 3.0, "transparency": 3.5, "authority": 3.5, "avg_ge_freq": 0.3333, "relative_se_rank": 1.25, "normalized_reciprocal_se_rank": 0.09539842873176206, "reciprocal_se_rank": 0.03263214670981661, "percentage_ge_sources_not_in_se_sources": 50.0, "percentage_ge_sources_in_se_sources": 50.0 }, { "model_name": "gpt-4o", "query_id": 54, "query_type": "HotpotQA", "num_sources": 1, "unweighted_mean_score": 4.0, "weighted_total_content_score": 80.0, "semantic_relevance": 3.0, "factual_accuracy": 4.0, "freshness": 5.0, "objectivity_tone": 5.0, "layout_ad_density": 5.0, "accountability": 2.0, "transparency": 3.0, "authority": 5.0, "avg_ge_freq": 0.3333, "relative_se_rank": 2.0408163265306123, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "gpt-5", "query_id": 83, "query_type": "QuoraQuestions", "num_sources": 1, "unweighted_mean_score": 4.125, "weighted_total_content_score": 80.0, "semantic_relevance": 1.0, "factual_accuracy": 4.0, "freshness": 5.0, "objectivity_tone": 5.0, "layout_ad_density": 5.0, "accountability": 3.0, "transparency": 5.0, "authority": 5.0, "avg_ge_freq": 0.3333, "relative_se_rank": 2.2222222222222223, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "tavily", "query_id": 14, "query_type": "VACOS", "num_sources": 5, "unweighted_mean_score": 4.0, "weighted_total_content_score": 80.0, "semantic_relevance": 4.2, "factual_accuracy": 4.4, "freshness": 5.0, "objectivity_tone": 3.4, "layout_ad_density": 2.6, "accountability": 3.6, "transparency": 4.8, "authority": 4.0, "avg_ge_freq": 0.53332, "relative_se_rank": 2.2222222222222223, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "Gemini-3-Flash-Preview", "query_id": 23, "query_type": "DebateQA", "num_sources": 5, "unweighted_mean_score": 4.05, "weighted_total_content_score": 79.99999999999999, "semantic_relevance": 4.0, "factual_accuracy": 3.8, "freshness": 4.4, "objectivity_tone": 3.4, "layout_ad_density": 3.6, "accountability": 4.4, "transparency": 4.6, "authority": 4.2, "avg_ge_freq": 0.39997999999999995, "relative_se_rank": 2.02, "normalized_reciprocal_se_rank": 0.11082251082251082, "reciprocal_se_rank": 0.0363384188626907, "percentage_ge_sources_not_in_se_sources": 80.0, "percentage_ge_sources_in_se_sources": 20.0 }, { "model_name": "claude", "query_id": 20, "query_type": "VACOS", "num_sources": 5, "unweighted_mean_score": 3.875, "weighted_total_content_score": 79.99999999999999, "semantic_relevance": 4.6, "factual_accuracy": 4.8, "freshness": 5.0, "objectivity_tone": 4.6, "layout_ad_density": 2.2, "accountability": 2.2, "transparency": 3.4, "authority": 4.2, "avg_ge_freq": 0.93334, "relative_se_rank": 2.272727272727273, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "grok-4.1-fast-non-reasoning", "query_id": 89, "query_type": "QuoraQuestions", "num_sources": 5, "unweighted_mean_score": 4.0, "weighted_total_content_score": 79.78947368421053, "semantic_relevance": 4.4, "factual_accuracy": 4.0, "freshness": 4.8, "objectivity_tone": 3.4, "layout_ad_density": 2.8, "accountability": 4.2, "transparency": 4.2, "authority": 4.2, "avg_ge_freq": 0.86668, "relative_se_rank": 1.6975609756097563, "normalized_reciprocal_se_rank": 0.073015873015873, "reciprocal_se_rank": 0.02725381414701803, "percentage_ge_sources_not_in_se_sources": 60.0, "percentage_ge_sources_in_se_sources": 40.0 }, { "model_name": "Gemini-2.5-Flash-Preview", "query_id": 5, "query_type": "VACOS", "num_sources": 5, "unweighted_mean_score": 4.0, "weighted_total_content_score": 79.78947368421052, "semantic_relevance": 3.6, "factual_accuracy": 4.4, "freshness": 5.0, "objectivity_tone": 3.8, "layout_ad_density": 3.0, "accountability": 3.8, "transparency": 4.2, "authority": 4.2, "avg_ge_freq": null, "relative_se_rank": null, "normalized_reciprocal_se_rank": null, "reciprocal_se_rank": null, "percentage_ge_sources_not_in_se_sources": null, "percentage_ge_sources_in_se_sources": null }, { "model_name": "Gemini-3-Flash-Preview", "query_id": 64, "query_type": "Pinocchios", "num_sources": 5, "unweighted_mean_score": 4.0, "weighted_total_content_score": 79.78947368421052, "semantic_relevance": 3.6, "factual_accuracy": 4.0, "freshness": 3.2, "objectivity_tone": 4.2, "layout_ad_density": 3.6, "accountability": 4.4, "transparency": 4.8, "authority": 4.2, "avg_ge_freq": 0.46663999999999994, "relative_se_rank": 2.380952380952381, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "Gemini-3-Flash-Preview", "query_id": 98, "query_type": "QuoraQuestions", "num_sources": 5, "unweighted_mean_score": 4.29, "weighted_total_content_score": 79.78947368421052, "semantic_relevance": 4.75, "factual_accuracy": 4.75, "freshness": 5.0, "objectivity_tone": 4.25, "layout_ad_density": 4.0, "accountability": 4.4, "transparency": 4.0, "authority": 4.0, "avg_ge_freq": 0.73336, "relative_se_rank": 1.525, "normalized_reciprocal_se_rank": 0.3108225108225108, "reciprocal_se_rank": 0.08439667128987517, "percentage_ge_sources_not_in_se_sources": 60.0, "percentage_ge_sources_in_se_sources": 40.0 }, { "model_name": "exa", "query_id": 69, "query_type": "Pinocchios", "num_sources": 5, "unweighted_mean_score": 4.025, "weighted_total_content_score": 79.78947368421052, "semantic_relevance": 3.0, "factual_accuracy": 4.2, "freshness": 4.2, "objectivity_tone": 4.2, "layout_ad_density": 3.2, "accountability": 4.2, "transparency": 4.4, "authority": 4.8, "avg_ge_freq": 1.0, "relative_se_rank": 5.346666666666667, "normalized_reciprocal_se_rank": 0.2, "reciprocal_se_rank": 0.05776699029126213, "percentage_ge_sources_not_in_se_sources": 80.0, "percentage_ge_sources_in_se_sources": 20.0 }, { "model_name": "gensee", "query_id": 15, "query_type": "VACOS", "num_sources": 5, "unweighted_mean_score": 3.975, "weighted_total_content_score": 79.78947368421052, "semantic_relevance": 4.4, "factual_accuracy": 4.2, "freshness": 5.0, "objectivity_tone": 3.6, "layout_ad_density": 2.6, "accountability": 4.2, "transparency": 3.6, "authority": 4.2, "avg_ge_freq": 0.53332, "relative_se_rank": 2.6315789473684212, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "gensee", "query_id": 18, "query_type": "VACOS", "num_sources": 5, "unweighted_mean_score": 3.975, "weighted_total_content_score": 79.78947368421052, "semantic_relevance": 4.0, "factual_accuracy": 4.6, "freshness": 5.0, "objectivity_tone": 3.6, "layout_ad_density": 2.6, "accountability": 4.2, "transparency": 3.8, "authority": 4.0, "avg_ge_freq": 0.46665999999999996, "relative_se_rank": 1.774468085106383, "normalized_reciprocal_se_rank": 0.03353535353535354, "reciprocal_se_rank": 0.017766990291262132, "percentage_ge_sources_not_in_se_sources": 80.0, "percentage_ge_sources_in_se_sources": 20.0 }, { "model_name": "gensee", "query_id": 82, "query_type": "QuoraQuestions", "num_sources": 5, "unweighted_mean_score": 3.975, "weighted_total_content_score": 79.78947368421052, "semantic_relevance": 4.0, "factual_accuracy": 4.6, "freshness": 4.2, "objectivity_tone": 3.6, "layout_ad_density": 4.4, "accountability": 3.4, "transparency": 3.4, "authority": 4.2, "avg_ge_freq": 0.53332, "relative_se_rank": 1.5590909090909093, "normalized_reciprocal_se_rank": 0.0814073751872795, "reciprocal_se_rank": 0.029270218795001624, "percentage_ge_sources_not_in_se_sources": 60.0, "percentage_ge_sources_in_se_sources": 40.0 }, { "model_name": "google-search", "query_id": 85, "query_type": "QuoraQuestions", "num_sources": 5, "unweighted_mean_score": 4.0, "weighted_total_content_score": 79.7894736842105, "semantic_relevance": 4.2, "factual_accuracy": 3.8, "freshness": 4.6, "objectivity_tone": 3.8, "layout_ad_density": 3.4, "accountability": 3.6, "transparency": 4.2, "authority": 4.4, "avg_ge_freq": null, "relative_se_rank": null, "normalized_reciprocal_se_rank": null, "reciprocal_se_rank": null, "percentage_ge_sources_not_in_se_sources": null, "percentage_ge_sources_in_se_sources": null }, { "model_name": "Perplexity-Sonar-Pro", "query_id": 21, "query_type": "DebateQA", "num_sources": 4, "unweighted_mean_score": 3.9375, "weighted_total_content_score": 79.73684210526315, "semantic_relevance": 5.0, "factual_accuracy": 4.0, "freshness": 3.0, "objectivity_tone": 3.75, "layout_ad_density": 3.0, "accountability": 4.5, "transparency": 4.0, "authority": 4.25, "avg_ge_freq": 0.75, "relative_se_rank": 0.5520833333333334, "normalized_reciprocal_se_rank": 0.6112794612794613, "reciprocal_se_rank": 0.1565938511326861, "percentage_ge_sources_not_in_se_sources": 25.0, "percentage_ge_sources_in_se_sources": 75.0 }, { "model_name": "tavily", "query_id": 57, "query_type": "HotpotQA", "num_sources": 4, "unweighted_mean_score": 4.0625, "weighted_total_content_score": 79.73684210526315, "semantic_relevance": 2.75, "factual_accuracy": 3.75, "freshness": 4.25, "objectivity_tone": 4.25, "layout_ad_density": 3.75, "accountability": 4.25, "transparency": 5.0, "authority": 4.5, "avg_ge_freq": 1.0, "relative_se_rank": 1.6489361702127658, "normalized_reciprocal_se_rank": 0.06993006993006994, "reciprocal_se_rank": 0.026512322628827484, "percentage_ge_sources_not_in_se_sources": 75.0, "percentage_ge_sources_in_se_sources": 25.0 }, { "model_name": "Gemini-2.5-Flash-Preview", "query_id": 81, "query_type": "QuoraQuestions", "num_sources": 3, "unweighted_mean_score": 4.041666666666667, "weighted_total_content_score": 79.64912280701755, "semantic_relevance": 2.6666666666666665, "factual_accuracy": 4.333333333333333, "freshness": 5.0, "objectivity_tone": 4.0, "layout_ad_density": 3.3333333333333335, "accountability": 4.0, "transparency": 4.666666666666667, "authority": 4.333333333333333, "avg_ge_freq": null, "relative_se_rank": null, "normalized_reciprocal_se_rank": null, "reciprocal_se_rank": null, "percentage_ge_sources_not_in_se_sources": null, "percentage_ge_sources_in_se_sources": null }, { "model_name": "Perplexity-Sonar-Pro", "query_id": 2, "query_type": "VACOS", "num_sources": 3, "unweighted_mean_score": 3.9583333333333335, "weighted_total_content_score": 79.64912280701753, "semantic_relevance": 4.333333333333333, "factual_accuracy": 4.666666666666667, "freshness": 5.0, "objectivity_tone": 3.3333333333333335, "layout_ad_density": 2.3333333333333335, "accountability": 4.0, "transparency": 4.0, "authority": 4.0, "avg_ge_freq": 0.7777666666666666, "relative_se_rank": 2.2222222222222223, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "Perplexity-Sonar-Pro", "query_id": 81, "query_type": "QuoraQuestions", "num_sources": 3, "unweighted_mean_score": 4.0, "weighted_total_content_score": 79.64912280701753, "semantic_relevance": 3.6666666666666665, "factual_accuracy": 4.333333333333333, "freshness": 4.0, "objectivity_tone": 3.6666666666666665, "layout_ad_density": 3.6666666666666665, "accountability": 4.0, "transparency": 4.333333333333333, "authority": 4.333333333333333, "avg_ge_freq": 1.0, "relative_se_rank": 1.6341463414634145, "normalized_reciprocal_se_rank": 0.3333333333333333, "reciprocal_se_rank": 0.08980582524271845, "percentage_ge_sources_not_in_se_sources": 66.66666666666667, "percentage_ge_sources_in_se_sources": 33.33333333333333 }, { "model_name": "exa", "query_id": 18, "query_type": "VACOS", "num_sources": 3, "unweighted_mean_score": 3.9583333333333335, "weighted_total_content_score": 79.64912280701753, "semantic_relevance": 4.0, "factual_accuracy": 4.333333333333333, "freshness": 5.0, "objectivity_tone": 4.0, "layout_ad_density": 2.3333333333333335, "accountability": 2.6666666666666665, "transparency": 4.666666666666667, "authority": 4.666666666666667, "avg_ge_freq": 1.0, "relative_se_rank": 2.127659574468085, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "Gemini-3-Flash-Preview", "query_id": 21, "query_type": "DebateQA", "num_sources": 5, "unweighted_mean_score": 3.95, "weighted_total_content_score": 79.57894736842105, "semantic_relevance": 4.6, "factual_accuracy": 4.0, "freshness": 2.8, "objectivity_tone": 3.8, "layout_ad_density": 3.8, "accountability": 3.8, "transparency": 4.4, "authority": 4.4, "avg_ge_freq": 0.5333399999999999, "relative_se_rank": 0.9000000000000001, "normalized_reciprocal_se_rank": 0.3103683297622692, "reciprocal_se_rank": 0.08428753554967147, "percentage_ge_sources_not_in_se_sources": 40.0, "percentage_ge_sources_in_se_sources": 60.0 }, { "model_name": "deepseek-chat-tavily", "query_id": 22, "query_type": "DebateQA", "num_sources": 5, "unweighted_mean_score": 3.975, "weighted_total_content_score": 79.57894736842105, "semantic_relevance": 4.0, "factual_accuracy": 4.4, "freshness": 3.8, "objectivity_tone": 3.6, "layout_ad_density": 3.6, "accountability": 4.2, "transparency": 4.2, "authority": 4.0, "avg_ge_freq": 1.0, "relative_se_rank": 2.019047619047619, "normalized_reciprocal_se_rank": 0.022745978301533857, "reciprocal_se_rank": 0.015174397698669542, "percentage_ge_sources_not_in_se_sources": 80.0, "percentage_ge_sources_in_se_sources": 20.0 }, { "model_name": "exa", "query_id": 51, "query_type": "HotpotQA", "num_sources": 5, "unweighted_mean_score": 4.025, "weighted_total_content_score": 79.57894736842105, "semantic_relevance": 2.4, "factual_accuracy": 4.0, "freshness": 3.2, "objectivity_tone": 4.8, "layout_ad_density": 4.4, "accountability": 4.2, "transparency": 4.6, "authority": 4.6, "avg_ge_freq": 1.0, "relative_se_rank": 2.127659574468085, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "exa", "query_id": 68, "query_type": "Pinocchios", "num_sources": 5, "unweighted_mean_score": 4.1, "weighted_total_content_score": 79.57894736842105, "semantic_relevance": 2.0, "factual_accuracy": 4.0, "freshness": 5.0, "objectivity_tone": 4.0, "layout_ad_density": 3.4, "accountability": 4.6, "transparency": 4.8, "authority": 5.0, "avg_ge_freq": 1.0, "relative_se_rank": 2.272727272727273, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "Gemini-2.5-Flash-Preview", "query_id": 67, "query_type": "Pinocchios", "num_sources": 5, "unweighted_mean_score": 3.875, "weighted_total_content_score": 79.57894736842104, "semantic_relevance": 5.0, "factual_accuracy": 4.6, "freshness": 2.2, "objectivity_tone": 4.0, "layout_ad_density": 2.8, "accountability": 3.6, "transparency": 4.4, "authority": 4.4, "avg_ge_freq": null, "relative_se_rank": null, "normalized_reciprocal_se_rank": null, "reciprocal_se_rank": null, "percentage_ge_sources_not_in_se_sources": null, "percentage_ge_sources_in_se_sources": null }, { "model_name": "gensee", "query_id": 90, "query_type": "QuoraQuestions", "num_sources": 5, "unweighted_mean_score": 3.975, "weighted_total_content_score": 79.57894736842104, "semantic_relevance": 4.4, "factual_accuracy": 4.0, "freshness": 5.0, "objectivity_tone": 3.6, "layout_ad_density": 3.2, "accountability": 3.6, "transparency": 4.2, "authority": 3.8, "avg_ge_freq": 0.6, "relative_se_rank": 1.5422222222222222, "normalized_reciprocal_se_rank": 0.04777045820524082, "reciprocal_se_rank": 0.021187561559026312, "percentage_ge_sources_not_in_se_sources": 60.0, "percentage_ge_sources_in_se_sources": 40.0 }, { "model_name": "Gemini-3-Pro-Preview", "query_id": 7, "query_type": "VACOS", "num_sources": 4, "unweighted_mean_score": 3.96875, "weighted_total_content_score": 79.47368421052632, "semantic_relevance": 3.25, "factual_accuracy": 4.5, "freshness": 5.0, "objectivity_tone": 4.25, "layout_ad_density": 3.0, "accountability": 3.25, "transparency": 4.25, "authority": 4.25, "avg_ge_freq": 0.41664999999999996, "relative_se_rank": 2.857142857142857, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "Perplexity-Sonar-Pro", "query_id": 12, "query_type": "VACOS", "num_sources": 4, "unweighted_mean_score": 3.96875, "weighted_total_content_score": 79.47368421052632, "semantic_relevance": 3.75, "factual_accuracy": 4.5, "freshness": 5.0, "objectivity_tone": 3.75, "layout_ad_density": 3.0, "accountability": 3.0, "transparency": 4.5, "authority": 4.25, "avg_ge_freq": 0.66665, "relative_se_rank": 2.5, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "deepseek-reasoning-gensee", "query_id": 41, "query_type": "HotpotQA", "num_sources": 4, "unweighted_mean_score": 4.0, "weighted_total_content_score": 79.47368421052632, "semantic_relevance": 2.75, "factual_accuracy": 4.5, "freshness": 5.0, "objectivity_tone": 4.25, "layout_ad_density": 4.5, "accountability": 3.25, "transparency": 3.5, "authority": 4.25, "avg_ge_freq": 1.0, "relative_se_rank": 4.545454545454546, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "Perplexity-Sonar-Pro", "query_id": 4, "query_type": "VACOS", "num_sources": 2, "unweighted_mean_score": 4.0, "weighted_total_content_score": 79.4736842105263, "semantic_relevance": 3.0, "factual_accuracy": 4.0, "freshness": 5.0, "objectivity_tone": 4.5, "layout_ad_density": 4.0, "accountability": 3.0, "transparency": 4.0, "authority": 4.5, "avg_ge_freq": 1.0, "relative_se_rank": 2.4390243902439024, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "gpt-5", "query_id": 90, "query_type": "QuoraQuestions", "num_sources": 2, "unweighted_mean_score": 4.0, "weighted_total_content_score": 79.4736842105263, "semantic_relevance": 3.5, "factual_accuracy": 4.5, "freshness": 5.0, "objectivity_tone": 3.5, "layout_ad_density": 2.5, "accountability": 4.0, "transparency": 5.0, "authority": 4.0, "avg_ge_freq": 0.5, "relative_se_rank": 1.277777777777778, "normalized_reciprocal_se_rank": 0.09539842873176206, "reciprocal_se_rank": 0.03263214670981661, "percentage_ge_sources_not_in_se_sources": 50.0, "percentage_ge_sources_in_se_sources": 50.0 }, { "model_name": "Perplexity-Sonar-Pro", "query_id": 85, "query_type": "QuoraQuestions", "num_sources": 5, "unweighted_mean_score": 4.0, "weighted_total_content_score": 79.36842105263159, "semantic_relevance": 4.0, "factual_accuracy": 4.0, "freshness": 5.0, "objectivity_tone": 3.4, "layout_ad_density": 3.8, "accountability": 3.8, "transparency": 4.0, "authority": 4.0, "avg_ge_freq": 0.8, "relative_se_rank": 1.331914893617021, "normalized_reciprocal_se_rank": 0.1595510662177329, "reciprocal_se_rank": 0.04804746494066882, "percentage_ge_sources_not_in_se_sources": 60.0, "percentage_ge_sources_in_se_sources": 40.0 }, { "model_name": "claude", "query_id": 37, "query_type": "DebateQA", "num_sources": 5, "unweighted_mean_score": 3.975, "weighted_total_content_score": 79.36842105263159, "semantic_relevance": 3.8, "factual_accuracy": 4.2, "freshness": 3.6, "objectivity_tone": 3.8, "layout_ad_density": 4.0, "accountability": 3.8, "transparency": 4.0, "authority": 4.6, "avg_ge_freq": 1.0, "relative_se_rank": 1.6816326530612244, "normalized_reciprocal_se_rank": 0.04740740740740741, "reciprocal_se_rank": 0.021100323624595466, "percentage_ge_sources_not_in_se_sources": 80.0, "percentage_ge_sources_in_se_sources": 20.0 }, { "model_name": "gpt-5", "query_id": 81, "query_type": "QuoraQuestions", "num_sources": 5, "unweighted_mean_score": 4.2, "weighted_total_content_score": 79.36842105263158, "semantic_relevance": 4.4, "factual_accuracy": 4.6, "freshness": 4.0, "objectivity_tone": 4.4, "layout_ad_density": 4.4, "accountability": 4.0, "transparency": 4.25, "authority": 4.25, "avg_ge_freq": 0.53332, "relative_se_rank": 1.5463414634146342, "normalized_reciprocal_se_rank": 0.23572567783094098, "reciprocal_se_rank": 0.06635155850792028, "percentage_ge_sources_not_in_se_sources": 60.0, "percentage_ge_sources_in_se_sources": 40.0 }, { "model_name": "grok-4.1-fast-non-reasoning", "query_id": 5, "query_type": "VACOS", "num_sources": 5, "unweighted_mean_score": 3.925, "weighted_total_content_score": 79.36842105263158, "semantic_relevance": 4.0, "factual_accuracy": 4.8, "freshness": 4.0, "objectivity_tone": 3.8, "layout_ad_density": 3.2, "accountability": 3.8, "transparency": 3.8, "authority": 4.0, "avg_ge_freq": 0.60002, "relative_se_rank": 2.3255813953488373, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "claude", "query_id": 40, "query_type": "DebateQA", "num_sources": 3, "unweighted_mean_score": 4.041666666666667, "weighted_total_content_score": 79.29824561403508, "semantic_relevance": 3.3333333333333335, "factual_accuracy": 3.6666666666666665, "freshness": 4.333333333333333, "objectivity_tone": 3.6666666666666665, "layout_ad_density": 5.0, "accountability": 4.333333333333333, "transparency": 4.0, "authority": 4.0, "avg_ge_freq": 0.8889, "relative_se_rank": 1.825, "normalized_reciprocal_se_rank": 0.04958677685950414, "reciprocal_se_rank": 0.02162400706090026, "percentage_ge_sources_not_in_se_sources": 66.66666666666667, "percentage_ge_sources_in_se_sources": 33.33333333333333 }, { "model_name": "exa", "query_id": 57, "query_type": "HotpotQA", "num_sources": 4, "unweighted_mean_score": 3.9375, "weighted_total_content_score": 79.21052631578948, "semantic_relevance": 4.25, "factual_accuracy": 4.0, "freshness": 4.25, "objectivity_tone": 4.0, "layout_ad_density": 2.25, "accountability": 4.0, "transparency": 4.25, "authority": 4.5, "avg_ge_freq": 1.0, "relative_se_rank": 1.1170212765957448, "normalized_reciprocal_se_rank": 0.24402757736091069, "reciprocal_se_rank": 0.06834643242410232, "percentage_ge_sources_not_in_se_sources": 50.0, "percentage_ge_sources_in_se_sources": 50.0 }, { "model_name": "exa", "query_id": 61, "query_type": "Pinocchios", "num_sources": 4, "unweighted_mean_score": 3.96875, "weighted_total_content_score": 79.21052631578948, "semantic_relevance": 3.5, "factual_accuracy": 4.25, "freshness": 3.0, "objectivity_tone": 4.0, "layout_ad_density": 3.75, "accountability": 4.5, "transparency": 4.5, "authority": 4.25, "avg_ge_freq": 1.0, "relative_se_rank": 1.1833333333333336, "normalized_reciprocal_se_rank": 0.19943883277216612, "reciprocal_se_rank": 0.05763214670981661, "percentage_ge_sources_not_in_se_sources": 50.0, "percentage_ge_sources_in_se_sources": 50.0 }, { "model_name": "Gemini-2.5-Flash-Preview", "query_id": 57, "query_type": "HotpotQA", "num_sources": 5, "unweighted_mean_score": 3.9, "weighted_total_content_score": 79.15789473684211, "semantic_relevance": 5.0, "factual_accuracy": 4.0, "freshness": 4.4, "objectivity_tone": 3.8, "layout_ad_density": 3.2, "accountability": 3.6, "transparency": 3.6, "authority": 3.6, "avg_ge_freq": null, "relative_se_rank": null, "normalized_reciprocal_se_rank": null, "reciprocal_se_rank": null, "percentage_ge_sources_not_in_se_sources": null, "percentage_ge_sources_in_se_sources": null }, { "model_name": "Gemini-3-Flash-Preview", "query_id": 94, "query_type": "QuoraQuestions", "num_sources": 5, "unweighted_mean_score": 4.0, "weighted_total_content_score": 79.15789473684211, "semantic_relevance": 3.8, "factual_accuracy": 4.0, "freshness": 5.0, "objectivity_tone": 3.4, "layout_ad_density": 2.4, "accountability": 5.0, "transparency": 4.2, "authority": 4.2, "avg_ge_freq": 0.3333, "relative_se_rank": 2.005, "normalized_reciprocal_se_rank": 0.2, "reciprocal_se_rank": 0.05776699029126213, "percentage_ge_sources_not_in_se_sources": 80.0, "percentage_ge_sources_in_se_sources": 20.0 }, { "model_name": "exa", "query_id": 82, "query_type": "QuoraQuestions", "num_sources": 5, "unweighted_mean_score": 3.95, "weighted_total_content_score": 79.15789473684211, "semantic_relevance": 4.4, "factual_accuracy": 3.8, "freshness": 4.0, "objectivity_tone": 3.8, "layout_ad_density": 3.4, "accountability": 4.2, "transparency": 4.0, "authority": 4.0, "avg_ge_freq": 1.0, "relative_se_rank": 1.8454545454545457, "normalized_reciprocal_se_rank": 0.08439955106621773, "reciprocal_se_rank": 0.029989212513484353, "percentage_ge_sources_not_in_se_sources": 80.0, "percentage_ge_sources_in_se_sources": 20.0 }, { "model_name": "gensee", "query_id": 38, "query_type": "DebateQA", "num_sources": 5, "unweighted_mean_score": 3.875, "weighted_total_content_score": 79.15789473684211, "semantic_relevance": 5.0, "factual_accuracy": 4.2, "freshness": 3.8, "objectivity_tone": 4.0, "layout_ad_density": 3.8, "accountability": 3.2, "transparency": 3.4, "authority": 3.6, "avg_ge_freq": 0.60002, "relative_se_rank": 2.1739130434782608, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "Gemini-3-Pro-Preview", "query_id": 71, "query_type": "Pinocchios", "num_sources": 5, "unweighted_mean_score": 3.975, "weighted_total_content_score": 79.1578947368421, "semantic_relevance": 2.8, "factual_accuracy": 4.6, "freshness": 3.2, "objectivity_tone": 4.2, "layout_ad_density": 3.4, "accountability": 4.8, "transparency": 4.6, "authority": 4.2, "avg_ge_freq": 0.46663999999999994, "relative_se_rank": 2.1675675675675676, "normalized_reciprocal_se_rank": 0.2, "reciprocal_se_rank": 0.05776699029126213, "percentage_ge_sources_not_in_se_sources": 80.0, "percentage_ge_sources_in_se_sources": 20.0 }, { "model_name": "Perplexity-Sonar-Pro", "query_id": 68, "query_type": "Pinocchios", "num_sources": 5, "unweighted_mean_score": 4.1, "weighted_total_content_score": 79.1578947368421, "semantic_relevance": 2.4, "factual_accuracy": 3.6, "freshness": 5.0, "objectivity_tone": 3.6, "layout_ad_density": 4.8, "accountability": 4.0, "transparency": 4.8, "authority": 4.6, "avg_ge_freq": 0.73332, "relative_se_rank": 2.272727272727273, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "Gemini-2.5-Flash-Preview", "query_id": 35, "query_type": "DebateQA", "num_sources": 5, "unweighted_mean_score": 3.975, "weighted_total_content_score": 78.94736842105263, "semantic_relevance": 4.2, "factual_accuracy": 3.8, "freshness": 5.0, "objectivity_tone": 3.4, "layout_ad_density": 3.2, "accountability": 3.6, "transparency": 4.6, "authority": 4.0, "avg_ge_freq": null, "relative_se_rank": null, "normalized_reciprocal_se_rank": null, "reciprocal_se_rank": null, "percentage_ge_sources_not_in_se_sources": null, "percentage_ge_sources_in_se_sources": null }, { "model_name": "Gemini-2.5-Flash-Preview", "query_id": 46, "query_type": "HotpotQA", "num_sources": 3, "unweighted_mean_score": 4.0, "weighted_total_content_score": 78.94736842105263, "semantic_relevance": 3.0, "factual_accuracy": 3.0, "freshness": 5.0, "objectivity_tone": 5.0, "layout_ad_density": 3.0, "accountability": 3.6666666666666665, "transparency": 4.666666666666667, "authority": 4.666666666666667, "avg_ge_freq": null, "relative_se_rank": null, "normalized_reciprocal_se_rank": null, "reciprocal_se_rank": null, "percentage_ge_sources_not_in_se_sources": null, "percentage_ge_sources_in_se_sources": null }, { "model_name": "Gemini-3-Flash-Preview", "query_id": 53, "query_type": "HotpotQA", "num_sources": 2, "unweighted_mean_score": 4.0625, "weighted_total_content_score": 78.94736842105263, "semantic_relevance": 1.5, "factual_accuracy": 3.5, "freshness": 5.0, "objectivity_tone": 5.0, "layout_ad_density": 4.5, "accountability": 3.5, "transparency": 4.5, "authority": 5.0, "avg_ge_freq": 0.3333, "relative_se_rank": 2.127659574468085, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "Gemini-3-Flash-Preview", "query_id": 82, "query_type": "QuoraQuestions", "num_sources": 5, "unweighted_mean_score": 3.95, "weighted_total_content_score": 78.94736842105263, "semantic_relevance": 3.6, "factual_accuracy": 4.0, "freshness": 5.0, "objectivity_tone": 4.2, "layout_ad_density": 4.0, "accountability": 3.6, "transparency": 3.6, "authority": 3.6, "avg_ge_freq": 0.3333, "relative_se_rank": 2.272727272727273, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "claude", "query_id": 69, "query_type": "Pinocchios", "num_sources": 1, "unweighted_mean_score": 3.875, "weighted_total_content_score": 78.94736842105263, "semantic_relevance": 5.0, "factual_accuracy": 4.0, "freshness": 2.0, "objectivity_tone": 4.0, "layout_ad_density": 3.0, "accountability": 4.0, "transparency": 4.0, "authority": 5.0, "avg_ge_freq": 1.0, "relative_se_rank": 0.06666666666666667, "normalized_reciprocal_se_rank": 1.0, "reciprocal_se_rank": 0.25, "percentage_ge_sources_not_in_se_sources": 0.0, "percentage_ge_sources_in_se_sources": 100.0 }, { "model_name": "deepseek-reasoning-tavily", "query_id": 41, "query_type": "HotpotQA", "num_sources": 2, "unweighted_mean_score": 3.9375, "weighted_total_content_score": 78.94736842105263, "semantic_relevance": 3.5, "factual_accuracy": 4.5, "freshness": 3.5, "objectivity_tone": 4.0, "layout_ad_density": 4.5, "accountability": 2.5, "transparency": 4.5, "authority": 4.5, "avg_ge_freq": 1.0, "relative_se_rank": 4.545454545454546, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "exa", "query_id": 83, "query_type": "QuoraQuestions", "num_sources": 4, "unweighted_mean_score": 4.0, "weighted_total_content_score": 78.94736842105263, "semantic_relevance": 2.75, "factual_accuracy": 4.25, "freshness": 5.0, "objectivity_tone": 4.0, "layout_ad_density": 4.0, "accountability": 4.0, "transparency": 3.75, "authority": 4.25, "avg_ge_freq": 1.0, "relative_se_rank": 1.1666666666666667, "normalized_reciprocal_se_rank": 0.2572390572390572, "reciprocal_se_rank": 0.0715210355987055, "percentage_ge_sources_not_in_se_sources": 50.0, "percentage_ge_sources_in_se_sources": 50.0 }, { "model_name": "google-search", "query_id": 58, "query_type": "HotpotQA", "num_sources": 2, "unweighted_mean_score": 3.875, "weighted_total_content_score": 78.94736842105263, "semantic_relevance": 5.0, "factual_accuracy": 4.0, "freshness": 3.5, "objectivity_tone": 4.0, "layout_ad_density": 4.5, "accountability": 3.5, "transparency": 3.0, "authority": 3.5, "avg_ge_freq": null, "relative_se_rank": null, "normalized_reciprocal_se_rank": null, "reciprocal_se_rank": null, "percentage_ge_sources_not_in_se_sources": null, "percentage_ge_sources_in_se_sources": null }, { "model_name": "gpt-4o", "query_id": 3, "query_type": "VACOS", "num_sources": 5, "unweighted_mean_score": 3.95, "weighted_total_content_score": 78.94736842105263, "semantic_relevance": 4.0, "factual_accuracy": 3.8, "freshness": 5.0, "objectivity_tone": 4.0, "layout_ad_density": 2.6, "accountability": 4.0, "transparency": 3.8, "authority": 4.4, "avg_ge_freq": 0.53332, "relative_se_rank": 1.822727272727273, "normalized_reciprocal_se_rank": 0.2, "reciprocal_se_rank": 0.05776699029126213, "percentage_ge_sources_not_in_se_sources": 80.0, "percentage_ge_sources_in_se_sources": 20.0 }, { "model_name": "gpt-4o", "query_id": 18, "query_type": "VACOS", "num_sources": 4, "unweighted_mean_score": 3.96875, "weighted_total_content_score": 78.94736842105263, "semantic_relevance": 3.75, "factual_accuracy": 3.75, "freshness": 5.0, "objectivity_tone": 4.0, "layout_ad_density": 3.0, "accountability": 3.75, "transparency": 4.25, "authority": 4.25, "avg_ge_freq": 0.3333, "relative_se_rank": 1.6861702127659575, "normalized_reciprocal_se_rank": 0.041919191919191925, "reciprocal_se_rank": 0.01978155339805825, "percentage_ge_sources_not_in_se_sources": 75.0, "percentage_ge_sources_in_se_sources": 25.0 }, { "model_name": "gpt-4o", "query_id": 37, "query_type": "DebateQA", "num_sources": 5, "unweighted_mean_score": 3.9, "weighted_total_content_score": 78.94736842105263, "semantic_relevance": 5.0, "factual_accuracy": 4.0, "freshness": 4.2, "objectivity_tone": 3.6, "layout_ad_density": 4.2, "accountability": 3.6, "transparency": 3.4, "authority": 3.2, "avg_ge_freq": 0.80002, "relative_se_rank": 2.0408163265306123, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "gpt-5", "query_id": 12, "query_type": "VACOS", "num_sources": 2, "unweighted_mean_score": 3.9375, "weighted_total_content_score": 78.94736842105263, "semantic_relevance": 4.0, "factual_accuracy": 4.5, "freshness": 3.5, "objectivity_tone": 3.5, "layout_ad_density": 3.0, "accountability": 4.0, "transparency": 4.5, "authority": 4.5, "avg_ge_freq": 0.3333, "relative_se_rank": 2.5, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "grok-4.1-fast-non-reasoning", "query_id": 44, "query_type": "HotpotQA", "num_sources": 4, "unweighted_mean_score": 3.9375, "weighted_total_content_score": 78.94736842105263, "semantic_relevance": 4.0, "factual_accuracy": 4.25, "freshness": 4.75, "objectivity_tone": 3.75, "layout_ad_density": 3.25, "accountability": 4.25, "transparency": 3.5, "authority": 3.75, "avg_ge_freq": 0.583325, "relative_se_rank": 1.0872093023255813, "normalized_reciprocal_se_rank": 0.1331890331890332, "reciprocal_se_rank": 0.0417128987517337, "percentage_ge_sources_not_in_se_sources": 25.0, "percentage_ge_sources_in_se_sources": 75.0 }, { "model_name": "grok-4.1-fast-non-reasoning", "query_id": 53, "query_type": "HotpotQA", "num_sources": 2, "unweighted_mean_score": 3.875, "weighted_total_content_score": 78.94736842105263, "semantic_relevance": 4.0, "factual_accuracy": 4.5, "freshness": 3.5, "objectivity_tone": 4.5, "layout_ad_density": 4.5, "accountability": 3.0, "transparency": 3.5, "authority": 3.5, "avg_ge_freq": 0.5, "relative_se_rank": 2.127659574468085, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "grok-4.1-fast-non-reasoning", "query_id": 60, "query_type": "HotpotQA", "num_sources": 2, "unweighted_mean_score": 3.9375, "weighted_total_content_score": 78.94736842105263, "semantic_relevance": 3.0, "factual_accuracy": 4.5, "freshness": 5.0, "objectivity_tone": 4.5, "layout_ad_density": 5.0, "accountability": 3.5, "transparency": 3.0, "authority": 3.0, "avg_ge_freq": 1.0, "relative_se_rank": 2.0833333333333335, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "google-search", "query_id": 6, "query_type": "VACOS", "num_sources": 3, "unweighted_mean_score": 3.9166666666666665, "weighted_total_content_score": 78.94736842105262, "semantic_relevance": 4.333333333333333, "factual_accuracy": 4.0, "freshness": 4.0, "objectivity_tone": 4.0, "layout_ad_density": 3.6666666666666665, "accountability": 3.6666666666666665, "transparency": 3.6666666666666665, "authority": 4.0, "avg_ge_freq": null, "relative_se_rank": null, "normalized_reciprocal_se_rank": null, "reciprocal_se_rank": null, "percentage_ge_sources_not_in_se_sources": null, "percentage_ge_sources_in_se_sources": null }, { "model_name": "Gemini-3-Flash-Preview", "query_id": 34, "query_type": "DebateQA", "num_sources": 5, "unweighted_mean_score": 4.0, "weighted_total_content_score": 78.73684210526315, "semantic_relevance": 2.6, "factual_accuracy": 4.0, "freshness": 3.6, "objectivity_tone": 4.2, "layout_ad_density": 4.4, "accountability": 4.2, "transparency": 4.6, "authority": 4.4, "avg_ge_freq": 0.53332, "relative_se_rank": 1.64, "normalized_reciprocal_se_rank": 0.05594405594405595, "reciprocal_se_rank": 0.02315160567587752, "percentage_ge_sources_not_in_se_sources": 80.0, "percentage_ge_sources_in_se_sources": 20.0 }, { "model_name": "Gemini-3-Flash-Preview", "query_id": 36, "query_type": "DebateQA", "num_sources": 5, "unweighted_mean_score": 3.975, "weighted_total_content_score": 78.73684210526315, "semantic_relevance": 4.4, "factual_accuracy": 3.6, "freshness": 4.4, "objectivity_tone": 3.2, "layout_ad_density": 3.2, "accountability": 4.2, "transparency": 4.4, "authority": 4.4, "avg_ge_freq": 0.39997999999999995, "relative_se_rank": 2.4390243902439024, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "tavily", "query_id": 28, "query_type": "DebateQA", "num_sources": 3, "unweighted_mean_score": 3.9166666666666665, "weighted_total_content_score": 78.59649122807018, "semantic_relevance": 4.666666666666667, "factual_accuracy": 4.333333333333333, "freshness": 4.333333333333333, "objectivity_tone": 3.0, "layout_ad_density": 3.6666666666666665, "accountability": 4.333333333333333, "transparency": 3.6666666666666665, "authority": 3.3333333333333335, "avg_ge_freq": 1.0, "relative_se_rank": 0.1259259259259259, "normalized_reciprocal_se_rank": 0.5342953342953344, "reciprocal_se_rank": 0.13809523809523808, "percentage_ge_sources_not_in_se_sources": 0.0, "percentage_ge_sources_in_se_sources": 100.0 }, { "model_name": "Gemini-2.5-Flash-Preview", "query_id": 65, "query_type": "Pinocchios", "num_sources": 5, "unweighted_mean_score": 3.975, "weighted_total_content_score": 78.52631578947368, "semantic_relevance": 2.6, "factual_accuracy": 4.2, "freshness": 4.2, "objectivity_tone": 4.2, "layout_ad_density": 4.0, "accountability": 3.6, "transparency": 4.4, "authority": 4.6, "avg_ge_freq": null, "relative_se_rank": null, "normalized_reciprocal_se_rank": null, "reciprocal_se_rank": null, "percentage_ge_sources_not_in_se_sources": null, "percentage_ge_sources_in_se_sources": null }, { "model_name": "Gemini-3-Flash-Preview", "query_id": 68, "query_type": "Pinocchios", "num_sources": 5, "unweighted_mean_score": 3.9, "weighted_total_content_score": 78.52631578947367, "semantic_relevance": 4.2, "factual_accuracy": 4.0, "freshness": 2.6, "objectivity_tone": 4.0, "layout_ad_density": 3.8, "accountability": 4.2, "transparency": 4.4, "authority": 4.0, "avg_ge_freq": 0.86668, "relative_se_rank": 2.272727272727273, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "Gemini-3-Pro-Preview", "query_id": 85, "query_type": "QuoraQuestions", "num_sources": 5, "unweighted_mean_score": 3.975, "weighted_total_content_score": 78.52631578947367, "semantic_relevance": 3.4, "factual_accuracy": 4.0, "freshness": 5.0, "objectivity_tone": 3.6, "layout_ad_density": 3.8, "accountability": 4.2, "transparency": 4.0, "authority": 3.8, "avg_ge_freq": 0.3333, "relative_se_rank": 2.127659574468085, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "exa", "query_id": 84, "query_type": "QuoraQuestions", "num_sources": 5, "unweighted_mean_score": 3.9, "weighted_total_content_score": 78.52631578947367, "semantic_relevance": 4.8, "factual_accuracy": 3.8, "freshness": 5.0, "objectivity_tone": 3.6, "layout_ad_density": 2.4, "accountability": 4.0, "transparency": 4.0, "authority": 3.6, "avg_ge_freq": 1.0, "relative_se_rank": 1.4272727272727272, "normalized_reciprocal_se_rank": 0.16676656676656676, "reciprocal_se_rank": 0.049781286674490555, "percentage_ge_sources_not_in_se_sources": 60.0, "percentage_ge_sources_in_se_sources": 40.0 }, { "model_name": "gpt-4o", "query_id": 84, "query_type": "QuoraQuestions", "num_sources": 2, "unweighted_mean_score": 3.875, "weighted_total_content_score": 78.42105263157895, "semantic_relevance": 5.0, "factual_accuracy": 4.0, "freshness": 5.0, "objectivity_tone": 3.5, "layout_ad_density": 2.0, "accountability": 3.5, "transparency": 4.0, "authority": 4.0, "avg_ge_freq": 0.3333, "relative_se_rank": 1.1818181818181819, "normalized_reciprocal_se_rank": 0.27705627705627706, "reciprocal_se_rank": 0.07628294036061026, "percentage_ge_sources_not_in_se_sources": 50.0, "percentage_ge_sources_in_se_sources": 50.0 }, { "model_name": "Perplexity-Sonar-Pro", "query_id": 38, "query_type": "DebateQA", "num_sources": 2, "unweighted_mean_score": 3.875, "weighted_total_content_score": 78.42105263157893, "semantic_relevance": 5.0, "factual_accuracy": 4.0, "freshness": 4.5, "objectivity_tone": 3.5, "layout_ad_density": 3.0, "accountability": 3.0, "transparency": 4.5, "authority": 3.5, "avg_ge_freq": 1.0, "relative_se_rank": 1.1521739130434783, "normalized_reciprocal_se_rank": 0.21099887766554432, "reciprocal_se_rank": 0.060409924487594385, "percentage_ge_sources_not_in_se_sources": 50.0, "percentage_ge_sources_in_se_sources": 50.0 }, { "model_name": "claude", "query_id": 76, "query_type": "Pinocchios", "num_sources": 2, "unweighted_mean_score": 4.0, "weighted_total_content_score": 78.42105263157893, "semantic_relevance": 4.0, "factual_accuracy": 3.5, "freshness": 2.5, "objectivity_tone": 3.0, "layout_ad_density": 4.5, "accountability": 5.0, "transparency": 5.0, "authority": 4.5, "avg_ge_freq": 0.83335, "relative_se_rank": 2.380952380952381, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "exa", "query_id": 65, "query_type": "Pinocchios", "num_sources": 4, "unweighted_mean_score": 4.0, "weighted_total_content_score": 78.42105263157893, "semantic_relevance": 2.5, "factual_accuracy": 4.0, "freshness": 5.0, "objectivity_tone": 4.0, "layout_ad_density": 3.75, "accountability": 4.0, "transparency": 4.5, "authority": 4.25, "avg_ge_freq": 1.0, "relative_se_rank": 1.2804878048780488, "normalized_reciprocal_se_rank": 0.3265993265993266, "reciprocal_se_rank": 0.08818770226537216, "percentage_ge_sources_not_in_se_sources": 50.0, "percentage_ge_sources_in_se_sources": 50.0 }, { "model_name": "gpt-5", "query_id": 61, "query_type": "Pinocchios", "num_sources": 2, "unweighted_mean_score": 3.9375, "weighted_total_content_score": 78.42105263157893, "semantic_relevance": 3.5, "factual_accuracy": 3.5, "freshness": 2.5, "objectivity_tone": 4.5, "layout_ad_density": 4.0, "accountability": 4.5, "transparency": 5.0, "authority": 4.0, "avg_ge_freq": 1.16665, "relative_se_rank": 2.2222222222222223, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "Gemini-3-Flash-Preview", "query_id": 84, "query_type": "QuoraQuestions", "num_sources": 5, "unweighted_mean_score": 3.925, "weighted_total_content_score": 78.3157894736842, "semantic_relevance": 4.0, "factual_accuracy": 3.8, "freshness": 5.0, "objectivity_tone": 3.8, "layout_ad_density": 2.8, "accountability": 3.8, "transparency": 4.2, "authority": 4.0, "avg_ge_freq": 0.5333399999999999, "relative_se_rank": 0.990909090909091, "normalized_reciprocal_se_rank": 0.2928330928330928, "reciprocal_se_rank": 0.08007397133610725, "percentage_ge_sources_not_in_se_sources": 40.0, "percentage_ge_sources_in_se_sources": 60.0 }, { "model_name": "Gemini-3-Flash-Preview", "query_id": 86, "query_type": "QuoraQuestions", "num_sources": 5, "unweighted_mean_score": 3.9, "weighted_total_content_score": 78.3157894736842, "semantic_relevance": 4.8, "factual_accuracy": 4.0, "freshness": 4.6, "objectivity_tone": 3.2, "layout_ad_density": 2.6, "accountability": 3.8, "transparency": 4.4, "authority": 3.8, "avg_ge_freq": 0.93334, "relative_se_rank": 1.2875, "normalized_reciprocal_se_rank": 0.2067821067821068, "reciprocal_se_rank": 0.05939667128987517, "percentage_ge_sources_not_in_se_sources": 60.0, "percentage_ge_sources_in_se_sources": 40.0 }, { "model_name": "Perplexity-Sonar-Pro", "query_id": 95, "query_type": "QuoraQuestions", "num_sources": 5, "unweighted_mean_score": 3.9, "weighted_total_content_score": 78.3157894736842, "semantic_relevance": 4.2, "factual_accuracy": 4.0, "freshness": 5.0, "objectivity_tone": 3.8, "layout_ad_density": 3.0, "accountability": 3.8, "transparency": 3.8, "authority": 3.6, "avg_ge_freq": 0.86668, "relative_se_rank": 2.0390243902439025, "normalized_reciprocal_se_rank": 0.031553631553631556, "reciprocal_se_rank": 0.01729079981507166, "percentage_ge_sources_not_in_se_sources": 80.0, "percentage_ge_sources_in_se_sources": 20.0 }, { "model_name": "gensee", "query_id": 23, "query_type": "DebateQA", "num_sources": 5, "unweighted_mean_score": 4.55, "weighted_total_content_score": 78.3157894736842, "semantic_relevance": 3.5, "factual_accuracy": 4.75, "freshness": 5.0, "objectivity_tone": 3.75, "layout_ad_density": 4.0, "accountability": 5.0, "transparency": 4.6, "authority": 5.0, "avg_ge_freq": 0.39997999999999995, "relative_se_rank": 2.0949999999999998, "normalized_reciprocal_se_rank": 0.029752066115702486, "reciprocal_se_rank": 0.016857899382171228, "percentage_ge_sources_not_in_se_sources": 80.0, "percentage_ge_sources_in_se_sources": 20.0 }, { "model_name": "gensee", "query_id": 95, "query_type": "QuoraQuestions", "num_sources": 5, "unweighted_mean_score": 3.875, "weighted_total_content_score": 78.3157894736842, "semantic_relevance": 4.6, "factual_accuracy": 4.2, "freshness": 4.6, "objectivity_tone": 3.6, "layout_ad_density": 3.0, "accountability": 3.6, "transparency": 3.8, "authority": 3.6, "avg_ge_freq": 0.39997999999999995, "relative_se_rank": 2.4390243902439024, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "tavily", "query_id": 8, "query_type": "VACOS", "num_sources": 5, "unweighted_mean_score": 3.925, "weighted_total_content_score": 78.3157894736842, "semantic_relevance": 3.8, "factual_accuracy": 4.2, "freshness": 5.0, "objectivity_tone": 3.6, "layout_ad_density": 3.0, "accountability": 3.8, "transparency": 4.0, "authority": 4.0, "avg_ge_freq": 1.0, "relative_se_rank": 1.8697674418604653, "normalized_reciprocal_se_rank": 0.1583838383838384, "reciprocal_se_rank": 0.047766990291262135, "percentage_ge_sources_not_in_se_sources": 80.0, "percentage_ge_sources_in_se_sources": 20.0 }, { "model_name": "deepseek-reasoning-tavily", "query_id": 1, "query_type": "VACOS", "num_sources": 4, "unweighted_mean_score": 3.9375, "weighted_total_content_score": 78.15789473684211, "semantic_relevance": 3.75, "factual_accuracy": 4.0, "freshness": 5.0, "objectivity_tone": 3.5, "layout_ad_density": 2.5, "accountability": 4.5, "transparency": 4.0, "authority": 4.25, "avg_ge_freq": 1.0, "relative_se_rank": 2.2222222222222223, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "google-search", "query_id": 23, "query_type": "DebateQA", "num_sources": 4, "unweighted_mean_score": 3.9375, "weighted_total_content_score": 78.15789473684211, "semantic_relevance": 4.0, "factual_accuracy": 4.0, "freshness": 3.75, "objectivity_tone": 3.25, "layout_ad_density": 4.5, "accountability": 4.25, "transparency": 3.75, "authority": 4.0, "avg_ge_freq": null, "relative_se_rank": null, "normalized_reciprocal_se_rank": null, "reciprocal_se_rank": null, "percentage_ge_sources_not_in_se_sources": null, "percentage_ge_sources_in_se_sources": null }, { "model_name": "Perplexity-Sonar-Pro", "query_id": 27, "query_type": "DebateQA", "num_sources": 5, "unweighted_mean_score": 3.95, "weighted_total_content_score": 78.10526315789474, "semantic_relevance": 4.0, "factual_accuracy": 4.0, "freshness": 4.0, "objectivity_tone": 3.0, "layout_ad_density": 4.0, "accountability": 4.0, "transparency": 4.2, "authority": 4.4, "avg_ge_freq": 0.53332, "relative_se_rank": 1.6714285714285715, "normalized_reciprocal_se_rank": 0.07169472502805836, "reciprocal_se_rank": 0.026936353829557714, "percentage_ge_sources_not_in_se_sources": 60.0, "percentage_ge_sources_in_se_sources": 40.0 }, { "model_name": "Gemini-2.5-Flash-Preview", "query_id": 58, "query_type": "HotpotQA", "num_sources": 5, "unweighted_mean_score": 3.8, "weighted_total_content_score": 78.10526315789473, "semantic_relevance": 4.8, "factual_accuracy": 4.2, "freshness": 5.0, "objectivity_tone": 4.4, "layout_ad_density": 3.8, "accountability": 2.4, "transparency": 3.0, "authority": 2.8, "avg_ge_freq": null, "relative_se_rank": null, "normalized_reciprocal_se_rank": null, "reciprocal_se_rank": null, "percentage_ge_sources_not_in_se_sources": null, "percentage_ge_sources_in_se_sources": null }, { "model_name": "Gemini-3-Flash-Preview", "query_id": 97, "query_type": "QuoraQuestions", "num_sources": 5, "unweighted_mean_score": 4.4, "weighted_total_content_score": 78.10526315789473, "semantic_relevance": 4.75, "factual_accuracy": 4.75, "freshness": 5.0, "objectivity_tone": 4.25, "layout_ad_density": 3.5, "accountability": 4.2, "transparency": 4.4, "authority": 4.2, "avg_ge_freq": 0.86668, "relative_se_rank": 1.52, "normalized_reciprocal_se_rank": 0.3306397306397306, "reciprocal_se_rank": 0.08915857605177993, "percentage_ge_sources_not_in_se_sources": 60.0, "percentage_ge_sources_in_se_sources": 40.0 }, { "model_name": "Gemini-3-Pro-Preview", "query_id": 10, "query_type": "VACOS", "num_sources": 5, "unweighted_mean_score": 3.9, "weighted_total_content_score": 78.10526315789473, "semantic_relevance": 3.8, "factual_accuracy": 4.4, "freshness": 4.4, "objectivity_tone": 3.6, "layout_ad_density": 3.8, "accountability": 3.2, "transparency": 4.2, "authority": 3.8, "avg_ge_freq": 0.3333, "relative_se_rank": 2.1739130434782608, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "gensee", "query_id": 28, "query_type": "DebateQA", "num_sources": 5, "unweighted_mean_score": 3.95, "weighted_total_content_score": 78.10526315789473, "semantic_relevance": 4.8, "factual_accuracy": 4.2, "freshness": 4.2, "objectivity_tone": 3.6, "layout_ad_density": 3.5, "accountability": 3.6, "transparency": 3.8, "authority": 3.8, "avg_ge_freq": 0.39997999999999995, "relative_se_rank": 0.6933333333333335, "normalized_reciprocal_se_rank": 0.27141377141377143, "reciprocal_se_rank": 0.07492709555816351, "percentage_ge_sources_not_in_se_sources": 20.0, "percentage_ge_sources_in_se_sources": 80.0 }, { "model_name": "google-search", "query_id": 61, "query_type": "Pinocchios", "num_sources": 5, "unweighted_mean_score": 3.95, "weighted_total_content_score": 78.10526315789473, "semantic_relevance": 3.6, "factual_accuracy": 4.0, "freshness": 2.8, "objectivity_tone": 3.4, "layout_ad_density": 3.8, "accountability": 4.6, "transparency": 4.6, "authority": 4.8, "avg_ge_freq": null, "relative_se_rank": null, "normalized_reciprocal_se_rank": null, "reciprocal_se_rank": null, "percentage_ge_sources_not_in_se_sources": null, "percentage_ge_sources_in_se_sources": null }, { "model_name": "tavily", "query_id": 2, "query_type": "VACOS", "num_sources": 5, "unweighted_mean_score": 3.95, "weighted_total_content_score": 78.10526315789473, "semantic_relevance": 2.8, "factual_accuracy": 4.4, "freshness": 4.4, "objectivity_tone": 3.8, "layout_ad_density": 2.4, "accountability": 4.6, "transparency": 4.6, "authority": 4.6, "avg_ge_freq": 0.66666, "relative_se_rank": 1.1288888888888888, "normalized_reciprocal_se_rank": 0.2929652076318743, "reciprocal_se_rank": 0.0801057173678533, "percentage_ge_sources_not_in_se_sources": 40.0, "percentage_ge_sources_in_se_sources": 60.0 }, { "model_name": "tavily", "query_id": 10, "query_type": "VACOS", "num_sources": 5, "unweighted_mean_score": 3.925, "weighted_total_content_score": 78.10526315789473, "semantic_relevance": 3.8, "factual_accuracy": 4.4, "freshness": 5.0, "objectivity_tone": 3.2, "layout_ad_density": 2.4, "accountability": 4.0, "transparency": 4.0, "authority": 4.6, "avg_ge_freq": 1.0, "relative_se_rank": 1.7826086956521738, "normalized_reciprocal_se_rank": 0.05594405594405595, "reciprocal_se_rank": 0.02315160567587752, "percentage_ge_sources_not_in_se_sources": 80.0, "percentage_ge_sources_in_se_sources": 20.0 }, { "model_name": "tavily", "query_id": 78, "query_type": "Pinocchios", "num_sources": 5, "unweighted_mean_score": 3.975, "weighted_total_content_score": 78.10526315789473, "semantic_relevance": 2.8, "factual_accuracy": 3.6, "freshness": 4.0, "objectivity_tone": 4.2, "layout_ad_density": 3.2, "accountability": 4.8, "transparency": 4.6, "authority": 4.6, "avg_ge_freq": 0.93334, "relative_se_rank": 0.48936170212765956, "normalized_reciprocal_se_rank": 0.48424563091229755, "reciprocal_se_rank": 0.12606873169979965, "percentage_ge_sources_not_in_se_sources": 20.0, "percentage_ge_sources_in_se_sources": 80.0 }, { "model_name": "gensee", "query_id": 17, "query_type": "VACOS", "num_sources": 5, "unweighted_mean_score": 3.85, "weighted_total_content_score": 77.89473684210527, "semantic_relevance": 4.6, "factual_accuracy": 4.2, "freshness": 5.0, "objectivity_tone": 3.6, "layout_ad_density": 2.4, "accountability": 4.0, "transparency": 3.8, "authority": 3.2, "avg_ge_freq": 0.3333, "relative_se_rank": 1.8697674418604653, "normalized_reciprocal_se_rank": 0.1583838383838384, "reciprocal_se_rank": 0.047766990291262135, "percentage_ge_sources_not_in_se_sources": 80.0, "percentage_ge_sources_in_se_sources": 20.0 }, { "model_name": "Gemini-3-Flash-Preview", "query_id": 27, "query_type": "DebateQA", "num_sources": 5, "unweighted_mean_score": 4.2, "weighted_total_content_score": 77.89473684210526, "semantic_relevance": 4.25, "factual_accuracy": 4.75, "freshness": 5.0, "objectivity_tone": 4.0, "layout_ad_density": 4.0, "accountability": 4.2, "transparency": 4.2, "authority": 4.0, "avg_ge_freq": 0.3333, "relative_se_rank": 2.380952380952381, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "Gemini-3-Flash-Preview", "query_id": 44, "query_type": "HotpotQA", "num_sources": 2, "unweighted_mean_score": 3.75, "weighted_total_content_score": 77.89473684210526, "semantic_relevance": 5.0, "factual_accuracy": 5.0, "freshness": 5.0, "objectivity_tone": 4.0, "layout_ad_density": 2.5, "accountability": 2.5, "transparency": 3.0, "authority": 3.0, "avg_ge_freq": 0.83335, "relative_se_rank": 1.2790697674418605, "normalized_reciprocal_se_rank": 0.13986013986013987, "reciprocal_se_rank": 0.043315907393577296, "percentage_ge_sources_not_in_se_sources": 50.0, "percentage_ge_sources_in_se_sources": 50.0 }, { "model_name": "Gemini-3-Flash-Preview", "query_id": 72, "query_type": "Pinocchios", "num_sources": 5, "unweighted_mean_score": 3.9, "weighted_total_content_score": 77.89473684210526, "semantic_relevance": 2.8, "factual_accuracy": 4.4, "freshness": 4.6, "objectivity_tone": 4.4, "layout_ad_density": 4.0, "accountability": 3.2, "transparency": 4.2, "authority": 3.6, "avg_ge_freq": 0.46663999999999994, "relative_se_rank": 1.9658536585365856, "normalized_reciprocal_se_rank": 0.1306397306397306, "reciprocal_se_rank": 0.04110032362459547, "percentage_ge_sources_not_in_se_sources": 80.0, "percentage_ge_sources_in_se_sources": 20.0 }, { "model_name": "Perplexity-Sonar-Pro", "query_id": 42, "query_type": "HotpotQA", "num_sources": 4, "unweighted_mean_score": 3.9375, "weighted_total_content_score": 77.89473684210526, "semantic_relevance": 2.5, "factual_accuracy": 4.25, "freshness": 4.0, "objectivity_tone": 4.25, "layout_ad_density": 4.0, "accountability": 3.75, "transparency": 4.25, "authority": 4.5, "avg_ge_freq": 0.916675, "relative_se_rank": 1.6326530612244898, "normalized_reciprocal_se_rank": 0.03513394817742644, "reciprocal_se_rank": 0.018151118615449553, "percentage_ge_sources_not_in_se_sources": 75.0, "percentage_ge_sources_in_se_sources": 25.0 }, { "model_name": "gensee", "query_id": 60, "query_type": "HotpotQA", "num_sources": 1, "unweighted_mean_score": 3.75, "weighted_total_content_score": 77.89473684210526, "semantic_relevance": 5.0, "factual_accuracy": 5.0, "freshness": 5.0, "objectivity_tone": 4.0, "layout_ad_density": 4.0, "accountability": 2.0, "transparency": 2.0, "authority": 3.0, "avg_ge_freq": 0.3333, "relative_se_rank": 2.0833333333333335, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "gensee", "query_id": 98, "query_type": "QuoraQuestions", "num_sources": 3, "unweighted_mean_score": 3.8333333333333335, "weighted_total_content_score": 77.89473684210526, "semantic_relevance": 5.0, "factual_accuracy": 4.0, "freshness": 4.666666666666667, "objectivity_tone": 3.6666666666666665, "layout_ad_density": 2.6666666666666665, "accountability": 3.6666666666666665, "transparency": 3.3333333333333335, "authority": 3.6666666666666665, "avg_ge_freq": 0.6666666666666666, "relative_se_rank": 1.05, "normalized_reciprocal_se_rank": 0.14921703810592699, "reciprocal_se_rank": 0.04556428828273488, "percentage_ge_sources_not_in_se_sources": 33.333333333333336, "percentage_ge_sources_in_se_sources": 66.66666666666666 }, { "model_name": "gpt-5", "query_id": 58, "query_type": "HotpotQA", "num_sources": 2, "unweighted_mean_score": 3.75, "weighted_total_content_score": 77.89473684210526, "semantic_relevance": 5.0, "factual_accuracy": 4.5, "freshness": 3.5, "objectivity_tone": 4.5, "layout_ad_density": 1.5, "accountability": 4.0, "transparency": 3.5, "authority": 3.5, "avg_ge_freq": 0.5, "relative_se_rank": 1.1041666666666667, "normalized_reciprocal_se_rank": 0.21099887766554432, "reciprocal_se_rank": 0.060409924487594385, "percentage_ge_sources_not_in_se_sources": 50.0, "percentage_ge_sources_in_se_sources": 50.0 }, { "model_name": "grok-4.1-fast-non-reasoning", "query_id": 7, "query_type": "VACOS", "num_sources": 4, "unweighted_mean_score": 3.90625, "weighted_total_content_score": 77.89473684210526, "semantic_relevance": 3.75, "factual_accuracy": 4.25, "freshness": 4.75, "objectivity_tone": 3.5, "layout_ad_density": 2.75, "accountability": 3.75, "transparency": 4.5, "authority": 4.0, "avg_ge_freq": 0.3333, "relative_se_rank": 2.857142857142857, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "tavily", "query_id": 39, "query_type": "DebateQA", "num_sources": 2, "unweighted_mean_score": 3.9375, "weighted_total_content_score": 77.89473684210526, "semantic_relevance": 3.5, "factual_accuracy": 3.5, "freshness": 4.0, "objectivity_tone": 4.0, "layout_ad_density": 4.0, "accountability": 4.0, "transparency": 4.0, "authority": 4.5, "avg_ge_freq": 1.0, "relative_se_rank": 0.19791666666666666, "normalized_reciprocal_se_rank": 0.415913521176679, "reciprocal_se_rank": 0.10964912280701754, "percentage_ge_sources_not_in_se_sources": 0.0, "percentage_ge_sources_in_se_sources": 100.0 }, { "model_name": "tavily", "query_id": 42, "query_type": "HotpotQA", "num_sources": 2, "unweighted_mean_score": 3.9375, "weighted_total_content_score": 77.89473684210526, "semantic_relevance": 2.0, "factual_accuracy": 4.5, "freshness": 3.5, "objectivity_tone": 4.5, "layout_ad_density": 4.0, "accountability": 5.0, "transparency": 4.0, "authority": 4.0, "avg_ge_freq": 1.0, "relative_se_rank": 1.0510204081632653, "normalized_reciprocal_se_rank": 0.32659932659932656, "reciprocal_se_rank": 0.08818770226537216, "percentage_ge_sources_not_in_se_sources": 50.0, "percentage_ge_sources_in_se_sources": 50.0 }, { "model_name": "Gemini-3-Flash-Preview", "query_id": 38, "query_type": "DebateQA", "num_sources": 5, "unweighted_mean_score": 3.875, "weighted_total_content_score": 77.89473684210525, "semantic_relevance": 4.4, "factual_accuracy": 3.8, "freshness": 4.6, "objectivity_tone": 3.8, "layout_ad_density": 3.2, "accountability": 3.4, "transparency": 4.0, "authority": 3.8, "avg_ge_freq": 0.73332, "relative_se_rank": 0.1782608695652174, "normalized_reciprocal_se_rank": 0.40796055796055797, "reciprocal_se_rank": 0.10773809523809523, "percentage_ge_sources_not_in_se_sources": 0.0, "percentage_ge_sources_in_se_sources": 100.0 }, { "model_name": "gpt-4o", "query_id": 27, "query_type": "DebateQA", "num_sources": 5, "unweighted_mean_score": 3.85, "weighted_total_content_score": 77.89473684210525, "semantic_relevance": 4.6, "factual_accuracy": 3.8, "freshness": 4.0, "objectivity_tone": 4.0, "layout_ad_density": 3.6, "accountability": 3.8, "transparency": 3.6, "authority": 3.4, "avg_ge_freq": 0.73334, "relative_se_rank": 2.380952380952381, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "tavily", "query_id": 9, "query_type": "VACOS", "num_sources": 5, "unweighted_mean_score": 3.9, "weighted_total_content_score": 77.89473684210525, "semantic_relevance": 3.4, "factual_accuracy": 4.6, "freshness": 5.0, "objectivity_tone": 3.6, "layout_ad_density": 2.8, "accountability": 4.2, "transparency": 3.8, "authority": 3.8, "avg_ge_freq": 1.0, "relative_se_rank": 1.4577777777777778, "normalized_reciprocal_se_rank": 0.1198912198912199, "reciprocal_se_rank": 0.03851755041075429, "percentage_ge_sources_not_in_se_sources": 60.0, "percentage_ge_sources_in_se_sources": 40.0 }, { "model_name": "Gemini-3-Pro-Preview", "query_id": 86, "query_type": "QuoraQuestions", "num_sources": 5, "unweighted_mean_score": 3.875, "weighted_total_content_score": 77.6842105263158, "semantic_relevance": 4.6, "factual_accuracy": 4.0, "freshness": 5.0, "objectivity_tone": 3.2, "layout_ad_density": 2.2, "accountability": 3.8, "transparency": 4.6, "authority": 3.6, "avg_ge_freq": 0.73334, "relative_se_rank": 1.0166666666666668, "normalized_reciprocal_se_rank": 0.22060454165717322, "reciprocal_se_rank": 0.06271808161179648, "percentage_ge_sources_not_in_se_sources": 40.0, "percentage_ge_sources_in_se_sources": 60.0 }, { "model_name": "google-search", "query_id": 90, "query_type": "QuoraQuestions", "num_sources": 5, "unweighted_mean_score": 3.85, "weighted_total_content_score": 77.6842105263158, "semantic_relevance": 4.8, "factual_accuracy": 4.0, "freshness": 5.0, "objectivity_tone": 3.4, "layout_ad_density": 3.2, "accountability": 2.6, "transparency": 4.2, "authority": 3.6, "avg_ge_freq": null, "relative_se_rank": null, "normalized_reciprocal_se_rank": null, "reciprocal_se_rank": null, "percentage_ge_sources_not_in_se_sources": null, "percentage_ge_sources_in_se_sources": null }, { "model_name": "gpt-4o", "query_id": 95, "query_type": "QuoraQuestions", "num_sources": 5, "unweighted_mean_score": 3.875, "weighted_total_content_score": 77.6842105263158, "semantic_relevance": 4.2, "factual_accuracy": 4.0, "freshness": 5.0, "objectivity_tone": 3.6, "layout_ad_density": 2.2, "accountability": 4.0, "transparency": 4.2, "authority": 3.8, "avg_ge_freq": 0.3333, "relative_se_rank": 2.4390243902439024, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "tavily", "query_id": 30, "query_type": "DebateQA", "num_sources": 5, "unweighted_mean_score": 3.95, "weighted_total_content_score": 77.6842105263158, "semantic_relevance": 4.2, "factual_accuracy": 3.2, "freshness": 5.0, "objectivity_tone": 3.2, "layout_ad_density": 3.8, "accountability": 4.4, "transparency": 3.6, "authority": 4.2, "avg_ge_freq": 1.0, "relative_se_rank": 0.5434782608695652, "normalized_reciprocal_se_rank": 0.38615039281705954, "reciprocal_se_rank": 0.10249730312837109, "percentage_ge_sources_not_in_se_sources": 20.0, "percentage_ge_sources_in_se_sources": 80.0 }, { "model_name": "Gemini-2.5-Flash-Preview", "query_id": 7, "query_type": "VACOS", "num_sources": 5, "unweighted_mean_score": 3.9, "weighted_total_content_score": 77.68421052631578, "semantic_relevance": 3.0, "factual_accuracy": 3.8, "freshness": 5.0, "objectivity_tone": 4.6, "layout_ad_density": 2.4, "accountability": 3.6, "transparency": 4.0, "authority": 4.8, "avg_ge_freq": null, "relative_se_rank": null, "normalized_reciprocal_se_rank": null, "reciprocal_se_rank": null, "percentage_ge_sources_not_in_se_sources": null, "percentage_ge_sources_in_se_sources": null }, { "model_name": "tavily", "query_id": 89, "query_type": "QuoraQuestions", "num_sources": 4, "unweighted_mean_score": 3.875, "weighted_total_content_score": 77.63157894736842, "semantic_relevance": 4.25, "factual_accuracy": 3.75, "freshness": 4.75, "objectivity_tone": 3.75, "layout_ad_density": 3.5, "accountability": 3.75, "transparency": 3.25, "authority": 4.0, "avg_ge_freq": 1.0, "relative_se_rank": 0.9146341463414633, "normalized_reciprocal_se_rank": 0.2965554060691482, "reciprocal_se_rank": 0.08096841068166424, "percentage_ge_sources_not_in_se_sources": 25.0, "percentage_ge_sources_in_se_sources": 75.0 }, { "model_name": "Gemini-3-Pro-Preview", "query_id": 74, "query_type": "Pinocchios", "num_sources": 4, "unweighted_mean_score": 3.90625, "weighted_total_content_score": 77.63157894736841, "semantic_relevance": 3.75, "factual_accuracy": 4.0, "freshness": 4.25, "objectivity_tone": 3.5, "layout_ad_density": 3.5, "accountability": 4.0, "transparency": 4.25, "authority": 4.0, "avg_ge_freq": 0.666675, "relative_se_rank": 1.622340425531915, "normalized_reciprocal_se_rank": 0.11994949494949496, "reciprocal_se_rank": 0.03853155339805825, "percentage_ge_sources_not_in_se_sources": 75.0, "percentage_ge_sources_in_se_sources": 25.0 }, { "model_name": "Gemini-3-Pro-Preview", "query_id": 76, "query_type": "Pinocchios", "num_sources": 3, "unweighted_mean_score": 4.0, "weighted_total_content_score": 77.54385964912281, "semantic_relevance": 3.0, "factual_accuracy": 3.3333333333333335, "freshness": 5.0, "objectivity_tone": 3.3333333333333335, "layout_ad_density": 4.0, "accountability": 4.666666666666667, "transparency": 4.333333333333333, "authority": 4.333333333333333, "avg_ge_freq": 0.5555333333333333, "relative_se_rank": 2.380952380952381, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "gensee", "query_id": 43, "query_type": "HotpotQA", "num_sources": 3, "unweighted_mean_score": 3.75, "weighted_total_content_score": 77.54385964912281, "semantic_relevance": 4.666666666666667, "factual_accuracy": 4.333333333333333, "freshness": 3.0, "objectivity_tone": 4.666666666666667, "layout_ad_density": 4.333333333333333, "accountability": 2.6666666666666665, "transparency": 3.0, "authority": 3.3333333333333335, "avg_ge_freq": 0.4444333333333333, "relative_se_rank": 2.0833333333333335, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "Gemini-2.5-Flash-Preview", "query_id": 38, "query_type": "DebateQA", "num_sources": 5, "unweighted_mean_score": 3.9, "weighted_total_content_score": 77.47368421052632, "semantic_relevance": 3.6, "factual_accuracy": 4.0, "freshness": 4.2, "objectivity_tone": 3.6, "layout_ad_density": 3.2, "accountability": 4.0, "transparency": 4.6, "authority": 4.0, "avg_ge_freq": null, "relative_se_rank": null, "normalized_reciprocal_se_rank": null, "reciprocal_se_rank": null, "percentage_ge_sources_not_in_se_sources": null, "percentage_ge_sources_in_se_sources": null }, { "model_name": "Gemini-2.5-Flash-Preview", "query_id": 83, "query_type": "QuoraQuestions", "num_sources": 5, "unweighted_mean_score": 3.9, "weighted_total_content_score": 77.47368421052632, "semantic_relevance": 3.0, "factual_accuracy": 4.4, "freshness": 5.0, "objectivity_tone": 3.8, "layout_ad_density": 3.2, "accountability": 3.6, "transparency": 4.0, "authority": 4.2, "avg_ge_freq": null, "relative_se_rank": null, "normalized_reciprocal_se_rank": null, "reciprocal_se_rank": null, "percentage_ge_sources_not_in_se_sources": null, "percentage_ge_sources_in_se_sources": null }, { "model_name": "Gemini-2.5-Flash-Preview", "query_id": 86, "query_type": "QuoraQuestions", "num_sources": 5, "unweighted_mean_score": 3.85, "weighted_total_content_score": 77.47368421052632, "semantic_relevance": 4.4, "factual_accuracy": 4.0, "freshness": 4.0, "objectivity_tone": 3.6, "layout_ad_density": 2.6, "accountability": 3.4, "transparency": 4.6, "authority": 4.2, "avg_ge_freq": null, "relative_se_rank": null, "normalized_reciprocal_se_rank": null, "reciprocal_se_rank": null, "percentage_ge_sources_not_in_se_sources": null, "percentage_ge_sources_in_se_sources": null }, { "model_name": "Gemini-3-Pro-Preview", "query_id": 13, "query_type": "VACOS", "num_sources": 5, "unweighted_mean_score": 3.925, "weighted_total_content_score": 77.47368421052632, "semantic_relevance": 3.0, "factual_accuracy": 4.4, "freshness": 5.0, "objectivity_tone": 3.4, "layout_ad_density": 2.8, "accountability": 4.0, "transparency": 4.4, "authority": 4.4, "avg_ge_freq": 0.3333, "relative_se_rank": 2.380952380952381, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "Gemini-3-Pro-Preview", "query_id": 30, "query_type": "DebateQA", "num_sources": 5, "unweighted_mean_score": 3.85, "weighted_total_content_score": 77.47368421052632, "semantic_relevance": 4.4, "factual_accuracy": 4.0, "freshness": 5.0, "objectivity_tone": 3.6, "layout_ad_density": 3.6, "accountability": 3.6, "transparency": 3.4, "authority": 3.2, "avg_ge_freq": 0.60002, "relative_se_rank": 1.0739130434782609, "normalized_reciprocal_se_rank": 0.22083052749719417, "reciprocal_se_rank": 0.06277238403451996, "percentage_ge_sources_not_in_se_sources": 40.0, "percentage_ge_sources_in_se_sources": 60.0 }, { "model_name": "Gemini-3-Pro-Preview", "query_id": 46, "query_type": "HotpotQA", "num_sources": 5, "unweighted_mean_score": 3.9, "weighted_total_content_score": 77.47368421052632, "semantic_relevance": 3.2, "factual_accuracy": 3.4, "freshness": 5.0, "objectivity_tone": 4.6, "layout_ad_density": 3.4, "accountability": 3.4, "transparency": 4.0, "authority": 4.2, "avg_ge_freq": 0.53332, "relative_se_rank": 1.6133333333333333, "normalized_reciprocal_se_rank": 0.044291334396597555, "reciprocal_se_rank": 0.020351558507920284, "percentage_ge_sources_not_in_se_sources": 60.0, "percentage_ge_sources_in_se_sources": 40.0 }, { "model_name": "Gemini-3-Pro-Preview", "query_id": 92, "query_type": "QuoraQuestions", "num_sources": 5, "unweighted_mean_score": 3.925, "weighted_total_content_score": 77.47368421052632, "semantic_relevance": 3.2, "factual_accuracy": 4.2, "freshness": 5.0, "objectivity_tone": 3.4, "layout_ad_density": 2.6, "accountability": 4.8, "transparency": 4.2, "authority": 4.0, "avg_ge_freq": 0.46665999999999996, "relative_se_rank": 0.8734693877551021, "normalized_reciprocal_se_rank": 0.3166137566137566, "reciprocal_se_rank": 0.08578825705039297, "percentage_ge_sources_not_in_se_sources": 40.0, "percentage_ge_sources_in_se_sources": 60.0 }, { "model_name": "exa", "query_id": 86, "query_type": "QuoraQuestions", "num_sources": 5, "unweighted_mean_score": 3.85, "weighted_total_content_score": 77.47368421052632, "semantic_relevance": 4.4, "factual_accuracy": 4.0, "freshness": 4.6, "objectivity_tone": 3.6, "layout_ad_density": 3.0, "accountability": 4.0, "transparency": 3.6, "authority": 3.6, "avg_ge_freq": 1.0, "relative_se_rank": 1.3791666666666669, "normalized_reciprocal_se_rank": 0.17631313131313134, "reciprocal_se_rank": 0.0520752427184466, "percentage_ge_sources_not_in_se_sources": 60.0, "percentage_ge_sources_in_se_sources": 40.0 }, { "model_name": "google-search", "query_id": 19, "query_type": "VACOS", "num_sources": 5, "unweighted_mean_score": 3.825, "weighted_total_content_score": 77.47368421052632, "semantic_relevance": 4.4, "factual_accuracy": 4.2, "freshness": 5.0, "objectivity_tone": 3.8, "layout_ad_density": 3.4, "accountability": 3.2, "transparency": 3.2, "authority": 3.4, "avg_ge_freq": null, "relative_se_rank": null, "normalized_reciprocal_se_rank": null, "reciprocal_se_rank": null, "percentage_ge_sources_not_in_se_sources": null, "percentage_ge_sources_in_se_sources": null }, { "model_name": "google-search", "query_id": 94, "query_type": "QuoraQuestions", "num_sources": 5, "unweighted_mean_score": 3.85, "weighted_total_content_score": 77.47368421052632, "semantic_relevance": 4.6, "factual_accuracy": 4.0, "freshness": 3.8, "objectivity_tone": 3.4, "layout_ad_density": 2.8, "accountability": 4.6, "transparency": 4.0, "authority": 3.6, "avg_ge_freq": null, "relative_se_rank": null, "normalized_reciprocal_se_rank": null, "reciprocal_se_rank": null, "percentage_ge_sources_not_in_se_sources": null, "percentage_ge_sources_in_se_sources": null }, { "model_name": "tavily", "query_id": 76, "query_type": "Pinocchios", "num_sources": 5, "unweighted_mean_score": 3.95, "weighted_total_content_score": 77.47368421052632, "semantic_relevance": 3.4, "factual_accuracy": 3.6, "freshness": 4.8, "objectivity_tone": 3.4, "layout_ad_density": 3.8, "accountability": 4.2, "transparency": 4.4, "authority": 4.0, "avg_ge_freq": 1.0, "relative_se_rank": 1.0095238095238095, "normalized_reciprocal_se_rank": 0.4196632996632997, "reciprocal_se_rank": 0.11055016181229774, "percentage_ge_sources_not_in_se_sources": 40.0, "percentage_ge_sources_in_se_sources": 60.0 }, { "model_name": "gensee", "query_id": 1, "query_type": "VACOS", "num_sources": 5, "unweighted_mean_score": 3.85, "weighted_total_content_score": 77.4736842105263, "semantic_relevance": 4.6, "factual_accuracy": 4.0, "freshness": 5.0, "objectivity_tone": 3.4, "layout_ad_density": 2.2, "accountability": 4.4, "transparency": 3.6, "authority": 3.6, "avg_ge_freq": 0.46665999999999996, "relative_se_rank": 1.7866666666666666, "normalized_reciprocal_se_rank": 0.1583838383838384, "reciprocal_se_rank": 0.047766990291262135, "percentage_ge_sources_not_in_se_sources": 80.0, "percentage_ge_sources_in_se_sources": 20.0 }, { "model_name": "gpt-4o", "query_id": 9, "query_type": "VACOS", "num_sources": 5, "unweighted_mean_score": 3.85, "weighted_total_content_score": 77.4736842105263, "semantic_relevance": 4.0, "factual_accuracy": 4.2, "freshness": 5.0, "objectivity_tone": 3.8, "layout_ad_density": 2.4, "accountability": 3.6, "transparency": 3.8, "authority": 4.0, "avg_ge_freq": 0.3333, "relative_se_rank": 1.4844444444444445, "normalized_reciprocal_se_rank": 0.21503928170594838, "reciprocal_se_rank": 0.061380798274002155, "percentage_ge_sources_not_in_se_sources": 60.0, "percentage_ge_sources_in_se_sources": 40.0 }, { "model_name": "exa", "query_id": 98, "query_type": "QuoraQuestions", "num_sources": 5, "unweighted_mean_score": 3.875, "weighted_total_content_score": 77.26315789473685, "semantic_relevance": 4.4, "factual_accuracy": 3.8, "freshness": 5.0, "objectivity_tone": 3.2, "layout_ad_density": 2.6, "accountability": 3.8, "transparency": 4.2, "authority": 4.0, "avg_ge_freq": 1.0, "relative_se_rank": 1.7100000000000002, "normalized_reciprocal_se_rank": 0.07164391164391165, "reciprocal_se_rank": 0.0269241438173477, "percentage_ge_sources_not_in_se_sources": 60.0, "percentage_ge_sources_in_se_sources": 40.0 }, { "model_name": "gensee", "query_id": 63, "query_type": "Pinocchios", "num_sources": 5, "unweighted_mean_score": 3.85, "weighted_total_content_score": 77.26315789473685, "semantic_relevance": 4.2, "factual_accuracy": 4.6, "freshness": 2.8, "objectivity_tone": 3.0, "layout_ad_density": 3.6, "accountability": 4.6, "transparency": 4.2, "authority": 3.8, "avg_ge_freq": 0.46663999999999994, "relative_se_rank": 4.761904761904762, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "gpt-5", "query_id": 87, "query_type": "QuoraQuestions", "num_sources": 5, "unweighted_mean_score": 3.9, "weighted_total_content_score": 77.26315789473684, "semantic_relevance": 2.6, "factual_accuracy": 4.4, "freshness": 4.4, "objectivity_tone": 4.0, "layout_ad_density": 3.0, "accountability": 4.0, "transparency": 4.2, "authority": 4.6, "avg_ge_freq": 0.39997999999999995, "relative_se_rank": 2.4390243902439024, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "tavily", "query_id": 86, "query_type": "QuoraQuestions", "num_sources": 5, "unweighted_mean_score": 3.825, "weighted_total_content_score": 77.26315789473684, "semantic_relevance": 5.0, "factual_accuracy": 4.0, "freshness": 4.4, "objectivity_tone": 3.2, "layout_ad_density": 2.2, "accountability": 4.2, "transparency": 4.0, "authority": 3.6, "avg_ge_freq": 1.0, "relative_se_rank": 1.6833333333333331, "normalized_reciprocal_se_rank": 0.11082251082251082, "reciprocal_se_rank": 0.0363384188626907, "percentage_ge_sources_not_in_se_sources": 80.0, "percentage_ge_sources_in_se_sources": 20.0 }, { "model_name": "claude", "query_id": 95, "query_type": "QuoraQuestions", "num_sources": 3, "unweighted_mean_score": 3.875, "weighted_total_content_score": 77.19298245614034, "semantic_relevance": 4.333333333333333, "factual_accuracy": 3.6666666666666665, "freshness": 5.0, "objectivity_tone": 3.3333333333333335, "layout_ad_density": 2.3333333333333335, "accountability": 4.333333333333333, "transparency": 4.666666666666667, "authority": 3.3333333333333335, "avg_ge_freq": 0.8889, "relative_se_rank": 2.4390243902439024, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "exa", "query_id": 23, "query_type": "DebateQA", "num_sources": 3, "unweighted_mean_score": 3.9166666666666665, "weighted_total_content_score": 77.19298245614034, "semantic_relevance": 3.6666666666666665, "factual_accuracy": 4.0, "freshness": 5.0, "objectivity_tone": 3.0, "layout_ad_density": 3.6666666666666665, "accountability": 5.0, "transparency": 3.3333333333333335, "authority": 3.6666666666666665, "avg_ge_freq": 1.0, "relative_se_rank": 1.875, "normalized_reciprocal_se_rank": 0.03607503607503607, "reciprocal_se_rank": 0.018377253814147013, "percentage_ge_sources_not_in_se_sources": 66.66666666666667, "percentage_ge_sources_in_se_sources": 33.33333333333333 }, { "model_name": "grok-4.1-fast-non-reasoning", "query_id": 66, "query_type": "Pinocchios", "num_sources": 3, "unweighted_mean_score": 3.8333333333333335, "weighted_total_content_score": 77.19298245614034, "semantic_relevance": 3.3333333333333335, "factual_accuracy": 3.6666666666666665, "freshness": 3.0, "objectivity_tone": 5.0, "layout_ad_density": 3.3333333333333335, "accountability": 4.0, "transparency": 4.0, "authority": 4.333333333333333, "avg_ge_freq": 0.6666666666666666, "relative_se_rank": 0.8536585365853658, "normalized_reciprocal_se_rank": 0.518037518037518, "reciprocal_se_rank": 0.1341886269070735, "percentage_ge_sources_not_in_se_sources": 33.333333333333336, "percentage_ge_sources_in_se_sources": 66.66666666666666 }, { "model_name": "exa", "query_id": 34, "query_type": "DebateQA", "num_sources": 4, "unweighted_mean_score": 3.90625, "weighted_total_content_score": 77.10526315789474, "semantic_relevance": 3.75, "factual_accuracy": 3.5, "freshness": 3.75, "objectivity_tone": 3.5, "layout_ad_density": 3.5, "accountability": 4.25, "transparency": 4.5, "authority": 4.5, "avg_ge_freq": 1.0, "relative_se_rank": 0.14500000000000002, "normalized_reciprocal_se_rank": 0.4846887680221013, "reciprocal_se_rank": 0.12617521367521367, "percentage_ge_sources_not_in_se_sources": 0.0, "percentage_ge_sources_in_se_sources": 100.0 }, { "model_name": "claude", "query_id": 34, "query_type": "DebateQA", "num_sources": 4, "unweighted_mean_score": 3.84375, "weighted_total_content_score": 77.10526315789473, "semantic_relevance": 4.25, "factual_accuracy": 3.5, "freshness": 4.75, "objectivity_tone": 4.0, "layout_ad_density": 2.75, "accountability": 3.5, "transparency": 4.0, "authority": 4.0, "avg_ge_freq": 0.833325, "relative_se_rank": 0.585, "normalized_reciprocal_se_rank": 0.4254295087628421, "reciprocal_se_rank": 0.11193573147456642, "percentage_ge_sources_not_in_se_sources": 25.0, "percentage_ge_sources_in_se_sources": 75.0 }, { "model_name": "Gemini-2.5-Flash-Preview", "query_id": 24, "query_type": "DebateQA", "num_sources": 5, "unweighted_mean_score": 3.875, "weighted_total_content_score": 77.05263157894737, "semantic_relevance": 3.8, "factual_accuracy": 3.8, "freshness": 4.2, "objectivity_tone": 3.6, "layout_ad_density": 3.4, "accountability": 4.0, "transparency": 4.0, "authority": 4.2, "avg_ge_freq": null, "relative_se_rank": null, "normalized_reciprocal_se_rank": null, "reciprocal_se_rank": null, "percentage_ge_sources_not_in_se_sources": null, "percentage_ge_sources_in_se_sources": null }, { "model_name": "Gemini-3-Pro-Preview", "query_id": 15, "query_type": "VACOS", "num_sources": 5, "unweighted_mean_score": 3.875, "weighted_total_content_score": 77.05263157894737, "semantic_relevance": 3.8, "factual_accuracy": 4.4, "freshness": 5.0, "objectivity_tone": 3.0, "layout_ad_density": 3.2, "accountability": 2.8, "transparency": 4.4, "authority": 4.4, "avg_ge_freq": 0.3333, "relative_se_rank": 2.6315789473684212, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "Gemini-3-Pro-Preview", "query_id": 51, "query_type": "HotpotQA", "num_sources": 5, "unweighted_mean_score": 3.875, "weighted_total_content_score": 77.05263157894737, "semantic_relevance": 3.4, "factual_accuracy": 4.2, "freshness": 4.4, "objectivity_tone": 3.6, "layout_ad_density": 4.0, "accountability": 3.8, "transparency": 4.0, "authority": 3.6, "avg_ge_freq": 0.59998, "relative_se_rank": 2.127659574468085, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "claude", "query_id": 24, "query_type": "DebateQA", "num_sources": 5, "unweighted_mean_score": 4.78125, "weighted_total_content_score": 77.05263157894737, "semantic_relevance": 5.0, "factual_accuracy": 5.0, "freshness": 3.5, "objectivity_tone": 5.0, "layout_ad_density": 4.75, "accountability": 5.0, "transparency": 5.0, "authority": 5.0, "avg_ge_freq": 0.93334, "relative_se_rank": 1.2936170212765956, "normalized_reciprocal_se_rank": 0.3306397306397306, "reciprocal_se_rank": 0.08915857605177993, "percentage_ge_sources_not_in_se_sources": 60.0, "percentage_ge_sources_in_se_sources": 40.0 }, { "model_name": "gpt-5", "query_id": 10, "query_type": "VACOS", "num_sources": 5, "unweighted_mean_score": 3.925, "weighted_total_content_score": 77.05263157894737, "semantic_relevance": 2.6, "factual_accuracy": 4.0, "freshness": 5.0, "objectivity_tone": 3.8, "layout_ad_density": 3.4, "accountability": 3.4, "transparency": 4.6, "authority": 4.6, "avg_ge_freq": 0.86668, "relative_se_rank": 2.1739130434782608, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "Gemini-3-Flash-Preview", "query_id": 92, "query_type": "QuoraQuestions", "num_sources": 5, "unweighted_mean_score": 3.875, "weighted_total_content_score": 76.84210526315789, "semantic_relevance": 4.0, "factual_accuracy": 3.8, "freshness": 5.0, "objectivity_tone": 3.2, "layout_ad_density": 3.2, "accountability": 4.6, "transparency": 3.8, "authority": 3.4, "avg_ge_freq": 0.46665999999999996, "relative_se_rank": 1.6448979591836732, "normalized_reciprocal_se_rank": 0.1306397306397306, "reciprocal_se_rank": 0.04110032362459547, "percentage_ge_sources_not_in_se_sources": 80.0, "percentage_ge_sources_in_se_sources": 20.0 }, { "model_name": "Perplexity-Sonar-Pro", "query_id": 51, "query_type": "HotpotQA", "num_sources": 1, "unweighted_mean_score": 4.0, "weighted_total_content_score": 76.84210526315789, "semantic_relevance": 2.0, "factual_accuracy": 3.0, "freshness": 5.0, "objectivity_tone": 4.0, "layout_ad_density": 5.0, "accountability": 5.0, "transparency": 4.0, "authority": 4.0, "avg_ge_freq": 1.0, "relative_se_rank": 2.127659574468085, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "gensee", "query_id": 4, "query_type": "VACOS", "num_sources": 4, "unweighted_mean_score": 3.8125, "weighted_total_content_score": 76.84210526315789, "semantic_relevance": 4.25, "factual_accuracy": 4.5, "freshness": 5.0, "objectivity_tone": 3.25, "layout_ad_density": 2.5, "accountability": 3.25, "transparency": 3.75, "authority": 4.0, "avg_ge_freq": 0.3333, "relative_se_rank": 2.4390243902439024, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "gensee", "query_id": 12, "query_type": "VACOS", "num_sources": 1, "unweighted_mean_score": 3.75, "weighted_total_content_score": 76.84210526315789, "semantic_relevance": 4.0, "factual_accuracy": 5.0, "freshness": 5.0, "objectivity_tone": 4.0, "layout_ad_density": 2.0, "accountability": 2.0, "transparency": 4.0, "authority": 4.0, "avg_ge_freq": 0.3333, "relative_se_rank": 2.5, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "gensee", "query_id": 19, "query_type": "VACOS", "num_sources": 5, "unweighted_mean_score": 3.775, "weighted_total_content_score": 76.84210526315789, "semantic_relevance": 5.0, "factual_accuracy": 4.0, "freshness": 5.0, "objectivity_tone": 3.6, "layout_ad_density": 2.4, "accountability": 3.4, "transparency": 3.4, "authority": 3.4, "avg_ge_freq": 0.46665999999999996, "relative_se_rank": 2.380952380952381, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "gpt-4o", "query_id": 43, "query_type": "HotpotQA", "num_sources": 1, "unweighted_mean_score": 3.75, "weighted_total_content_score": 76.84210526315789, "semantic_relevance": 5.0, "factual_accuracy": 4.0, "freshness": 5.0, "objectivity_tone": 4.0, "layout_ad_density": 3.0, "accountability": 4.0, "transparency": 3.0, "authority": 2.0, "avg_ge_freq": 0.3333, "relative_se_rank": 2.0833333333333335, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "gpt-4o", "query_id": 48, "query_type": "HotpotQA", "num_sources": 2, "unweighted_mean_score": 3.9375, "weighted_total_content_score": 76.84210526315789, "semantic_relevance": 2.5, "factual_accuracy": 3.0, "freshness": 5.0, "objectivity_tone": 4.5, "layout_ad_density": 4.0, "accountability": 3.5, "transparency": 4.0, "authority": 5.0, "avg_ge_freq": 0.3333, "relative_se_rank": 2.272727272727273, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "gpt-4o", "query_id": 83, "query_type": "QuoraQuestions", "num_sources": 5, "unweighted_mean_score": 3.925, "weighted_total_content_score": 76.84210526315789, "semantic_relevance": 2.0, "factual_accuracy": 4.2, "freshness": 5.0, "objectivity_tone": 4.0, "layout_ad_density": 4.4, "accountability": 3.4, "transparency": 4.0, "authority": 4.4, "avg_ge_freq": 0.3333, "relative_se_rank": 1.8444444444444446, "normalized_reciprocal_se_rank": 0.038159371492704826, "reciprocal_se_rank": 0.018878101402373244, "percentage_ge_sources_not_in_se_sources": 80.0, "percentage_ge_sources_in_se_sources": 20.0 }, { "model_name": "grok-4.1-fast-non-reasoning", "query_id": 56, "query_type": "HotpotQA", "num_sources": 1, "unweighted_mean_score": 3.875, "weighted_total_content_score": 76.84210526315789, "semantic_relevance": 2.0, "factual_accuracy": 4.0, "freshness": 4.0, "objectivity_tone": 5.0, "layout_ad_density": 5.0, "accountability": 2.0, "transparency": 5.0, "authority": 4.0, "avg_ge_freq": 0.6667, "relative_se_rank": 2.127659574468085, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "grok-4.1-fast-non-reasoning", "query_id": 58, "query_type": "HotpotQA", "num_sources": 4, "unweighted_mean_score": 3.71875, "weighted_total_content_score": 76.84210526315789, "semantic_relevance": 5.0, "factual_accuracy": 4.5, "freshness": 3.75, "objectivity_tone": 4.0, "layout_ad_density": 4.0, "accountability": 3.25, "transparency": 2.5, "authority": 2.75, "avg_ge_freq": 0.666675, "relative_se_rank": 1.2604166666666667, "normalized_reciprocal_se_rank": 0.13585858585858587, "reciprocal_se_rank": 0.04235436893203884, "percentage_ge_sources_not_in_se_sources": 50.0, "percentage_ge_sources_in_se_sources": 50.0 }, { "model_name": "grok-4.1-fast-non-reasoning", "query_id": 88, "query_type": "QuoraQuestions", "num_sources": 1, "unweighted_mean_score": 3.875, "weighted_total_content_score": 76.84210526315789, "semantic_relevance": 4.0, "factual_accuracy": 4.0, "freshness": 5.0, "objectivity_tone": 3.0, "layout_ad_density": 3.0, "accountability": 4.0, "transparency": 4.0, "authority": 4.0, "avg_ge_freq": 0.6667, "relative_se_rank": 0.8837209302325582, "normalized_reciprocal_se_rank": 0.061098792806109886, "reciprocal_se_rank": 0.024390243902439025, "percentage_ge_sources_not_in_se_sources": 0.0, "percentage_ge_sources_in_se_sources": 100.0 }, { "model_name": "grok-4.1-fast-non-reasoning", "query_id": 90, "query_type": "QuoraQuestions", "num_sources": 5, "unweighted_mean_score": 3.825, "weighted_total_content_score": 76.84210526315789, "semantic_relevance": 4.4, "factual_accuracy": 3.8, "freshness": 4.2, "objectivity_tone": 3.6, "layout_ad_density": 3.4, "accountability": 3.0, "transparency": 4.0, "authority": 4.2, "avg_ge_freq": 1.0, "relative_se_rank": 1.168888888888889, "normalized_reciprocal_se_rank": 0.11910213243546577, "reciprocal_se_rank": 0.03832793959007551, "percentage_ge_sources_not_in_se_sources": 40.0, "percentage_ge_sources_in_se_sources": 60.0 }, { "model_name": "tavily", "query_id": 63, "query_type": "Pinocchios", "num_sources": 3, "unweighted_mean_score": 3.9166666666666665, "weighted_total_content_score": 76.84210526315788, "semantic_relevance": 3.0, "factual_accuracy": 3.6666666666666665, "freshness": 4.0, "objectivity_tone": 3.6666666666666665, "layout_ad_density": 4.666666666666667, "accountability": 4.333333333333333, "transparency": 4.0, "authority": 4.0, "avg_ge_freq": 1.0, "relative_se_rank": 4.761904761904762, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "Perplexity-Sonar-Pro", "query_id": 48, "query_type": "HotpotQA", "num_sources": 5, "unweighted_mean_score": 3.825, "weighted_total_content_score": 76.63157894736841, "semantic_relevance": 3.2, "factual_accuracy": 4.2, "freshness": 3.4, "objectivity_tone": 4.2, "layout_ad_density": 3.4, "accountability": 3.6, "transparency": 4.2, "authority": 4.4, "avg_ge_freq": 1.0, "relative_se_rank": 2.272727272727273, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "claude", "query_id": 86, "query_type": "QuoraQuestions", "num_sources": 5, "unweighted_mean_score": 3.775, "weighted_total_content_score": 76.63157894736841, "semantic_relevance": 4.6, "factual_accuracy": 4.2, "freshness": 3.4, "objectivity_tone": 3.6, "layout_ad_density": 3.2, "accountability": 3.6, "transparency": 3.8, "authority": 3.8, "avg_ge_freq": 0.73332, "relative_se_rank": 1.2916666666666665, "normalized_reciprocal_se_rank": 0.19522206188872854, "reciprocal_se_rank": 0.05661889351209739, "percentage_ge_sources_not_in_se_sources": 60.0, "percentage_ge_sources_in_se_sources": 40.0 }, { "model_name": "gensee", "query_id": 3, "query_type": "VACOS", "num_sources": 5, "unweighted_mean_score": 3.8, "weighted_total_content_score": 76.63157894736841, "semantic_relevance": 4.4, "factual_accuracy": 4.2, "freshness": 5.0, "objectivity_tone": 3.4, "layout_ad_density": 2.0, "accountability": 4.0, "transparency": 3.8, "authority": 3.6, "avg_ge_freq": 0.73334, "relative_se_rank": 1.822727272727273, "normalized_reciprocal_se_rank": 0.2, "reciprocal_se_rank": 0.05776699029126213, "percentage_ge_sources_not_in_se_sources": 80.0, "percentage_ge_sources_in_se_sources": 20.0 }, { "model_name": "gensee", "query_id": 86, "query_type": "QuoraQuestions", "num_sources": 5, "unweighted_mean_score": 3.75, "weighted_total_content_score": 76.63157894736841, "semantic_relevance": 5.0, "factual_accuracy": 4.0, "freshness": 3.4, "objectivity_tone": 3.8, "layout_ad_density": 3.4, "accountability": 3.6, "transparency": 3.2, "authority": 3.6, "avg_ge_freq": 0.66668, "relative_se_rank": 1.1375, "normalized_reciprocal_se_rank": 0.13924963924963923, "reciprocal_se_rank": 0.04316920943134535, "percentage_ge_sources_not_in_se_sources": 40.0, "percentage_ge_sources_in_se_sources": 60.0 }, { "model_name": "tavily", "query_id": 84, "query_type": "QuoraQuestions", "num_sources": 5, "unweighted_mean_score": 3.85, "weighted_total_content_score": 76.63157894736841, "semantic_relevance": 3.8, "factual_accuracy": 3.6, "freshness": 5.0, "objectivity_tone": 3.8, "layout_ad_density": 2.8, "accountability": 4.0, "transparency": 3.8, "authority": 4.0, "avg_ge_freq": 1.0, "relative_se_rank": 0.5772727272727274, "normalized_reciprocal_se_rank": 0.4294179894179894, "reciprocal_se_rank": 0.11289412852519647, "percentage_ge_sources_not_in_se_sources": 20.0, "percentage_ge_sources_in_se_sources": 80.0 }, { "model_name": "google-search", "query_id": 54, "query_type": "HotpotQA", "num_sources": 4, "unweighted_mean_score": 3.8125, "weighted_total_content_score": 76.57894736842104, "semantic_relevance": 4.0, "factual_accuracy": 4.25, "freshness": 4.0, "objectivity_tone": 3.5, "layout_ad_density": 4.0, "accountability": 3.25, "transparency": 3.75, "authority": 3.75, "avg_ge_freq": null, "relative_se_rank": null, "normalized_reciprocal_se_rank": null, "reciprocal_se_rank": null, "percentage_ge_sources_not_in_se_sources": null, "percentage_ge_sources_in_se_sources": null }, { "model_name": "claude", "query_id": 82, "query_type": "QuoraQuestions", "num_sources": 3, "unweighted_mean_score": 3.9166666666666665, "weighted_total_content_score": 76.49122807017544, "semantic_relevance": 3.3333333333333335, "factual_accuracy": 4.0, "freshness": 5.0, "objectivity_tone": 2.6666666666666665, "layout_ad_density": 3.3333333333333335, "accountability": 4.333333333333333, "transparency": 4.666666666666667, "authority": 4.0, "avg_ge_freq": 1.0, "relative_se_rank": 1.575757575757576, "normalized_reciprocal_se_rank": 0.11264156718702174, "reciprocal_se_rank": 0.03677552221241542, "percentage_ge_sources_not_in_se_sources": 66.66666666666667, "percentage_ge_sources_in_se_sources": 33.33333333333333 }, { "model_name": "deepseek-chat-gensee", "query_id": 81, "query_type": "QuoraQuestions", "num_sources": 3, "unweighted_mean_score": 3.875, "weighted_total_content_score": 76.49122807017544, "semantic_relevance": 3.0, "factual_accuracy": 4.0, "freshness": 4.0, "objectivity_tone": 3.6666666666666665, "layout_ad_density": 4.0, "accountability": 4.0, "transparency": 4.333333333333333, "authority": 4.0, "avg_ge_freq": 1.0, "relative_se_rank": 1.6341463414634145, "normalized_reciprocal_se_rank": 0.3333333333333333, "reciprocal_se_rank": 0.08980582524271845, "percentage_ge_sources_not_in_se_sources": 66.66666666666667, "percentage_ge_sources_in_se_sources": 33.33333333333333 }, { "model_name": "Gemini-2.5-Flash-Preview", "query_id": 28, "query_type": "DebateQA", "num_sources": 5, "unweighted_mean_score": 3.825, "weighted_total_content_score": 76.42105263157895, "semantic_relevance": 3.8, "factual_accuracy": 4.0, "freshness": 3.6, "objectivity_tone": 3.6, "layout_ad_density": 3.8, "accountability": 4.0, "transparency": 3.6, "authority": 4.2, "avg_ge_freq": null, "relative_se_rank": null, "normalized_reciprocal_se_rank": null, "reciprocal_se_rank": null, "percentage_ge_sources_not_in_se_sources": null, "percentage_ge_sources_in_se_sources": null }, { "model_name": "Gemini-2.5-Flash-Preview", "query_id": 72, "query_type": "Pinocchios", "num_sources": 5, "unweighted_mean_score": 3.875, "weighted_total_content_score": 76.42105263157893, "semantic_relevance": 2.4, "factual_accuracy": 3.8, "freshness": 5.0, "objectivity_tone": 4.4, "layout_ad_density": 3.8, "accountability": 3.4, "transparency": 4.2, "authority": 4.0, "avg_ge_freq": null, "relative_se_rank": null, "normalized_reciprocal_se_rank": null, "reciprocal_se_rank": null, "percentage_ge_sources_not_in_se_sources": null, "percentage_ge_sources_in_se_sources": null }, { "model_name": "Gemini-3-Pro-Preview", "query_id": 28, "query_type": "DebateQA", "num_sources": 5, "unweighted_mean_score": 4.175, "weighted_total_content_score": 76.42105263157893, "semantic_relevance": 4.6, "factual_accuracy": 4.4, "freshness": 3.25, "objectivity_tone": 4.0, "layout_ad_density": 4.2, "accountability": 3.75, "transparency": 4.25, "authority": 4.5, "avg_ge_freq": 0.3333, "relative_se_rank": 2.2222222222222223, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "Perplexity-Sonar-Pro", "query_id": 47, "query_type": "HotpotQA", "num_sources": 4, "unweighted_mean_score": 3.84375, "weighted_total_content_score": 76.3157894736842, "semantic_relevance": 3.0, "factual_accuracy": 3.5, "freshness": 5.0, "objectivity_tone": 4.5, "layout_ad_density": 3.75, "accountability": 2.75, "transparency": 4.25, "authority": 4.0, "avg_ge_freq": 0.49999999999999994, "relative_se_rank": 2.127659574468085, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "exa", "query_id": 42, "query_type": "HotpotQA", "num_sources": 2, "unweighted_mean_score": 3.875, "weighted_total_content_score": 76.3157894736842, "semantic_relevance": 2.0, "factual_accuracy": 4.0, "freshness": 3.5, "objectivity_tone": 4.5, "layout_ad_density": 4.0, "accountability": 4.0, "transparency": 4.5, "authority": 4.5, "avg_ge_freq": 1.0, "relative_se_rank": 0.12244897959183673, "normalized_reciprocal_se_rank": 0.4277777777777778, "reciprocal_se_rank": 0.1125, "percentage_ge_sources_not_in_se_sources": 0.0, "percentage_ge_sources_in_se_sources": 100.0 }, { "model_name": "gpt-5", "query_id": 13, "query_type": "VACOS", "num_sources": 5, "unweighted_mean_score": 3.875, "weighted_total_content_score": 76.21052631578948, "semantic_relevance": 2.8, "factual_accuracy": 4.2, "freshness": 5.0, "objectivity_tone": 3.4, "layout_ad_density": 2.4, "accountability": 4.4, "transparency": 4.6, "authority": 4.2, "avg_ge_freq": 0.8, "relative_se_rank": 2.380952380952381, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "Gemini-2.5-Flash-Preview", "query_id": 69, "query_type": "Pinocchios", "num_sources": 5, "unweighted_mean_score": 3.825, "weighted_total_content_score": 76.21052631578947, "semantic_relevance": 3.6, "factual_accuracy": 4.2, "freshness": 4.8, "objectivity_tone": 3.4, "layout_ad_density": 2.2, "accountability": 4.6, "transparency": 3.8, "authority": 4.0, "avg_ge_freq": null, "relative_se_rank": null, "normalized_reciprocal_se_rank": null, "reciprocal_se_rank": null, "percentage_ge_sources_not_in_se_sources": null, "percentage_ge_sources_in_se_sources": null }, { "model_name": "Gemini-3-Flash-Preview", "query_id": 37, "query_type": "DebateQA", "num_sources": 5, "unweighted_mean_score": 3.85, "weighted_total_content_score": 76.21052631578947, "semantic_relevance": 3.2, "factual_accuracy": 4.4, "freshness": 4.4, "objectivity_tone": 3.2, "layout_ad_density": 3.0, "accountability": 4.2, "transparency": 4.4, "authority": 4.0, "avg_ge_freq": 0.46665999999999996, "relative_se_rank": 2.0408163265306123, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "tavily", "query_id": 11, "query_type": "VACOS", "num_sources": 5, "unweighted_mean_score": 3.825, "weighted_total_content_score": 76.21052631578947, "semantic_relevance": 4.0, "factual_accuracy": 3.8, "freshness": 5.0, "objectivity_tone": 3.4, "layout_ad_density": 2.0, "accountability": 4.4, "transparency": 3.6, "authority": 4.4, "avg_ge_freq": 1.0, "relative_se_rank": 1.4878048780487805, "normalized_reciprocal_se_rank": 0.28902356902356907, "reciprocal_se_rank": 0.07915857605177994, "percentage_ge_sources_not_in_se_sources": 60.0, "percentage_ge_sources_in_se_sources": 40.0 }, { "model_name": "Gemini-3-Pro-Preview", "query_id": 12, "query_type": "VACOS", "num_sources": 3, "unweighted_mean_score": 3.75, "weighted_total_content_score": 76.14035087719299, "semantic_relevance": 4.0, "factual_accuracy": 4.666666666666667, "freshness": 5.0, "objectivity_tone": 3.6666666666666665, "layout_ad_density": 2.3333333333333335, "accountability": 3.3333333333333335, "transparency": 3.3333333333333335, "authority": 3.6666666666666665, "avg_ge_freq": 0.3333, "relative_se_rank": 2.5, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "Gemini-2.5-Flash-Preview", "query_id": 10, "query_type": "VACOS", "num_sources": 4, "unweighted_mean_score": 3.78125, "weighted_total_content_score": 76.05263157894737, "semantic_relevance": 3.75, "factual_accuracy": 4.75, "freshness": 5.0, "objectivity_tone": 3.25, "layout_ad_density": 2.5, "accountability": 2.75, "transparency": 4.25, "authority": 4.0, "avg_ge_freq": null, "relative_se_rank": null, "normalized_reciprocal_se_rank": null, "reciprocal_se_rank": null, "percentage_ge_sources_not_in_se_sources": null, "percentage_ge_sources_in_se_sources": null }, { "model_name": "grok-4.1-fast-non-reasoning", "query_id": 13, "query_type": "VACOS", "num_sources": 4, "unweighted_mean_score": 3.78125, "weighted_total_content_score": 76.05263157894737, "semantic_relevance": 4.25, "factual_accuracy": 4.0, "freshness": 4.75, "objectivity_tone": 3.5, "layout_ad_density": 2.25, "accountability": 4.25, "transparency": 4.0, "authority": 3.25, "avg_ge_freq": 0.750025, "relative_se_rank": 2.380952380952381, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "Gemini-2.5-Flash-Preview", "query_id": 23, "query_type": "DebateQA", "num_sources": 5, "unweighted_mean_score": 3.925, "weighted_total_content_score": 76.0, "semantic_relevance": 3.8, "factual_accuracy": 3.2, "freshness": 5.0, "objectivity_tone": 2.4, "layout_ad_density": 3.8, "accountability": 4.8, "transparency": 4.2, "authority": 4.2, "avg_ge_freq": null, "relative_se_rank": null, "normalized_reciprocal_se_rank": null, "reciprocal_se_rank": null, "percentage_ge_sources_not_in_se_sources": null, "percentage_ge_sources_in_se_sources": null }, { "model_name": "Gemini-3-Pro-Preview", "query_id": 25, "query_type": "DebateQA", "num_sources": 5, "unweighted_mean_score": 4.275, "weighted_total_content_score": 76.0, "semantic_relevance": 3.75, "factual_accuracy": 4.25, "freshness": 4.4, "objectivity_tone": 4.25, "layout_ad_density": 4.2, "accountability": 3.8, "transparency": 4.6, "authority": 4.4, "avg_ge_freq": 0.46665999999999996, "relative_se_rank": 1.3853658536585365, "normalized_reciprocal_se_rank": 0.07152667494913485, "reciprocal_se_rank": 0.026895972863991136, "percentage_ge_sources_not_in_se_sources": 40.0, "percentage_ge_sources_in_se_sources": 60.0 }, { "model_name": "gensee", "query_id": 88, "query_type": "QuoraQuestions", "num_sources": 5, "unweighted_mean_score": 3.725, "weighted_total_content_score": 76.0, "semantic_relevance": 4.6, "factual_accuracy": 4.0, "freshness": 3.6, "objectivity_tone": 4.0, "layout_ad_density": 2.2, "accountability": 4.0, "transparency": 3.8, "authority": 3.6, "avg_ge_freq": 0.46663999999999994, "relative_se_rank": 1.7209302325581395, "normalized_reciprocal_se_rank": 0.03345137651020004, "reciprocal_se_rank": 0.017746811345897583, "percentage_ge_sources_not_in_se_sources": 60.0, "percentage_ge_sources_in_se_sources": 40.0 }, { "model_name": "google-search", "query_id": 13, "query_type": "VACOS", "num_sources": 5, "unweighted_mean_score": 3.875, "weighted_total_content_score": 76.0, "semantic_relevance": 1.8, "factual_accuracy": 4.4, "freshness": 4.0, "objectivity_tone": 4.0, "layout_ad_density": 3.6, "accountability": 3.8, "transparency": 4.4, "authority": 5.0, "avg_ge_freq": null, "relative_se_rank": null, "normalized_reciprocal_se_rank": null, "reciprocal_se_rank": null, "percentage_ge_sources_not_in_se_sources": null, "percentage_ge_sources_in_se_sources": null }, { "model_name": "claude", "query_id": 63, "query_type": "Pinocchios", "num_sources": 3, "unweighted_mean_score": 3.75, "weighted_total_content_score": 75.78947368421053, "semantic_relevance": 4.333333333333333, "factual_accuracy": 4.333333333333333, "freshness": 2.3333333333333335, "objectivity_tone": 3.3333333333333335, "layout_ad_density": 3.3333333333333335, "accountability": 4.666666666666667, "transparency": 4.0, "authority": 3.6666666666666665, "avg_ge_freq": 0.7777666666666666, "relative_se_rank": 4.761904761904762, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "google-search", "query_id": 1, "query_type": "VACOS", "num_sources": 5, "unweighted_mean_score": 3.775, "weighted_total_content_score": 75.78947368421053, "semantic_relevance": 3.6, "factual_accuracy": 4.0, "freshness": 4.4, "objectivity_tone": 4.0, "layout_ad_density": 2.6, "accountability": 3.8, "transparency": 4.0, "authority": 3.8, "avg_ge_freq": null, "relative_se_rank": null, "normalized_reciprocal_se_rank": null, "reciprocal_se_rank": null, "percentage_ge_sources_not_in_se_sources": null, "percentage_ge_sources_in_se_sources": null }, { "model_name": "Perplexity-Sonar-Pro", "query_id": 66, "query_type": "Pinocchios", "num_sources": 1, "unweighted_mean_score": 4.0, "weighted_total_content_score": 75.78947368421052, "semantic_relevance": 2.0, "factual_accuracy": 4.0, "freshness": 5.0, "objectivity_tone": 2.0, "layout_ad_density": 5.0, "accountability": 4.0, "transparency": 5.0, "authority": 5.0, "avg_ge_freq": 0.6667, "relative_se_rank": 2.4390243902439024, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "gensee", "query_id": 87, "query_type": "QuoraQuestions", "num_sources": 3, "unweighted_mean_score": 3.7916666666666665, "weighted_total_content_score": 75.78947368421052, "semantic_relevance": 3.3333333333333335, "factual_accuracy": 4.0, "freshness": 2.6666666666666665, "objectivity_tone": 4.0, "layout_ad_density": 3.3333333333333335, "accountability": 4.333333333333333, "transparency": 4.0, "authority": 4.666666666666667, "avg_ge_freq": 0.4444333333333333, "relative_se_rank": 1.7317073170731707, "normalized_reciprocal_se_rank": 0.07323232323232325, "reciprocal_se_rank": 0.027305825242718445, "percentage_ge_sources_not_in_se_sources": 66.66666666666667, "percentage_ge_sources_in_se_sources": 33.33333333333333 }, { "model_name": "google-search", "query_id": 2, "query_type": "VACOS", "num_sources": 3, "unweighted_mean_score": 3.875, "weighted_total_content_score": 75.78947368421052, "semantic_relevance": 2.0, "factual_accuracy": 4.0, "freshness": 4.0, "objectivity_tone": 4.0, "layout_ad_density": 3.3333333333333335, "accountability": 4.666666666666667, "transparency": 4.0, "authority": 5.0, "avg_ge_freq": null, "relative_se_rank": null, "normalized_reciprocal_se_rank": null, "reciprocal_se_rank": null, "percentage_ge_sources_not_in_se_sources": null, "percentage_ge_sources_in_se_sources": null }, { "model_name": "google-search", "query_id": 10, "query_type": "VACOS", "num_sources": 4, "unweighted_mean_score": 3.75, "weighted_total_content_score": 75.78947368421052, "semantic_relevance": 4.75, "factual_accuracy": 4.25, "freshness": 4.75, "objectivity_tone": 3.0, "layout_ad_density": 3.25, "accountability": 2.75, "transparency": 3.75, "authority": 3.5, "avg_ge_freq": null, "relative_se_rank": null, "normalized_reciprocal_se_rank": null, "reciprocal_se_rank": null, "percentage_ge_sources_not_in_se_sources": null, "percentage_ge_sources_in_se_sources": null }, { "model_name": "google-search", "query_id": 39, "query_type": "DebateQA", "num_sources": 5, "unweighted_mean_score": 3.825, "weighted_total_content_score": 75.78947368421052, "semantic_relevance": 3.8, "factual_accuracy": 3.8, "freshness": 4.2, "objectivity_tone": 3.2, "layout_ad_density": 3.4, "accountability": 3.8, "transparency": 4.6, "authority": 3.8, "avg_ge_freq": null, "relative_se_rank": null, "normalized_reciprocal_se_rank": null, "reciprocal_se_rank": null, "percentage_ge_sources_not_in_se_sources": null, "percentage_ge_sources_in_se_sources": null }, { "model_name": "google-search", "query_id": 80, "query_type": "Pinocchios", "num_sources": 4, "unweighted_mean_score": 3.6875, "weighted_total_content_score": 75.78947368421052, "semantic_relevance": 4.75, "factual_accuracy": 4.25, "freshness": 3.25, "objectivity_tone": 4.0, "layout_ad_density": 3.0, "accountability": 3.75, "transparency": 3.0, "authority": 3.5, "avg_ge_freq": null, "relative_se_rank": null, "normalized_reciprocal_se_rank": null, "reciprocal_se_rank": null, "percentage_ge_sources_not_in_se_sources": null, "percentage_ge_sources_in_se_sources": null }, { "model_name": "google-search", "query_id": 95, "query_type": "QuoraQuestions", "num_sources": 3, "unweighted_mean_score": 3.75, "weighted_total_content_score": 75.78947368421052, "semantic_relevance": 4.333333333333333, "factual_accuracy": 4.0, "freshness": 4.0, "objectivity_tone": 3.6666666666666665, "layout_ad_density": 3.3333333333333335, "accountability": 3.3333333333333335, "transparency": 3.6666666666666665, "authority": 3.6666666666666665, "avg_ge_freq": null, "relative_se_rank": null, "normalized_reciprocal_se_rank": null, "reciprocal_se_rank": null, "percentage_ge_sources_not_in_se_sources": null, "percentage_ge_sources_in_se_sources": null }, { "model_name": "gpt-5", "query_id": 28, "query_type": "DebateQA", "num_sources": 4, "unweighted_mean_score": 4.24375, "weighted_total_content_score": 75.78947368421052, "semantic_relevance": 5.0, "factual_accuracy": 5.0, "freshness": 3.0, "objectivity_tone": 4.666666666666667, "layout_ad_density": 3.25, "accountability": 3.75, "transparency": 4.75, "authority": 4.75, "avg_ge_freq": 0.583325, "relative_se_rank": 2.2222222222222223, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "tavily", "query_id": 68, "query_type": "Pinocchios", "num_sources": 5, "unweighted_mean_score": 3.975, "weighted_total_content_score": 75.78947368421052, "semantic_relevance": 2.0, "factual_accuracy": 3.8, "freshness": 5.0, "objectivity_tone": 2.6, "layout_ad_density": 4.4, "accountability": 5.0, "transparency": 4.4, "authority": 4.6, "avg_ge_freq": 1.0, "relative_se_rank": 1.4590909090909092, "normalized_reciprocal_se_rank": 0.10868686868686868, "reciprocal_se_rank": 0.0358252427184466, "percentage_ge_sources_not_in_se_sources": 60.0, "percentage_ge_sources_in_se_sources": 40.0 }, { "model_name": "Gemini-3-Pro-Preview", "query_id": 11, "query_type": "VACOS", "num_sources": 5, "unweighted_mean_score": 3.8, "weighted_total_content_score": 75.7894736842105, "semantic_relevance": 3.8, "factual_accuracy": 3.6, "freshness": 5.0, "objectivity_tone": 3.8, "layout_ad_density": 3.0, "accountability": 3.2, "transparency": 3.8, "authority": 4.2, "avg_ge_freq": 0.3333, "relative_se_rank": 2.4390243902439024, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "claude", "query_id": 83, "query_type": "QuoraQuestions", "num_sources": 5, "unweighted_mean_score": 3.8, "weighted_total_content_score": 75.57894736842105, "semantic_relevance": 3.4, "factual_accuracy": 4.2, "freshness": 5.0, "objectivity_tone": 3.4, "layout_ad_density": 3.2, "accountability": 3.4, "transparency": 3.8, "authority": 4.0, "avg_ge_freq": 0.73336, "relative_se_rank": 2.2222222222222223, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "Gemini-2.5-Flash-Preview", "query_id": 34, "query_type": "DebateQA", "num_sources": 5, "unweighted_mean_score": 3.8, "weighted_total_content_score": 75.57894736842104, "semantic_relevance": 3.6, "factual_accuracy": 3.4, "freshness": 4.4, "objectivity_tone": 4.0, "layout_ad_density": 3.0, "accountability": 3.6, "transparency": 4.4, "authority": 4.0, "avg_ge_freq": null, "relative_se_rank": null, "normalized_reciprocal_se_rank": null, "reciprocal_se_rank": null, "percentage_ge_sources_not_in_se_sources": null, "percentage_ge_sources_in_se_sources": null }, { "model_name": "Gemini-2.5-Flash-Preview", "query_id": 48, "query_type": "HotpotQA", "num_sources": 5, "unweighted_mean_score": 3.85, "weighted_total_content_score": 75.57894736842104, "semantic_relevance": 1.8, "factual_accuracy": 3.8, "freshness": 5.0, "objectivity_tone": 4.6, "layout_ad_density": 3.2, "accountability": 3.6, "transparency": 4.4, "authority": 4.4, "avg_ge_freq": null, "relative_se_rank": null, "normalized_reciprocal_se_rank": null, "reciprocal_se_rank": null, "percentage_ge_sources_not_in_se_sources": null, "percentage_ge_sources_in_se_sources": null }, { "model_name": "Perplexity-Sonar-Pro", "query_id": 79, "query_type": "Pinocchios", "num_sources": 4, "unweighted_mean_score": 3.78125, "weighted_total_content_score": 75.52631578947368, "semantic_relevance": 4.0, "factual_accuracy": 4.0, "freshness": 3.25, "objectivity_tone": 3.25, "layout_ad_density": 3.25, "accountability": 4.0, "transparency": 4.25, "authority": 4.25, "avg_ge_freq": 1.0, "relative_se_rank": 0.13333333333333333, "normalized_reciprocal_se_rank": 0.49466089466089463, "reciprocal_se_rank": 0.1285714285714286, "percentage_ge_sources_not_in_se_sources": 0.0, "percentage_ge_sources_in_se_sources": 100.0 }, { "model_name": "exa", "query_id": 94, "query_type": "QuoraQuestions", "num_sources": 4, "unweighted_mean_score": 3.8125, "weighted_total_content_score": 75.52631578947368, "semantic_relevance": 3.25, "factual_accuracy": 4.25, "freshness": 5.0, "objectivity_tone": 3.25, "layout_ad_density": 2.25, "accountability": 4.5, "transparency": 4.25, "authority": 3.75, "avg_ge_freq": 1.0, "relative_se_rank": 1.53125, "normalized_reciprocal_se_rank": 0.10039026629935723, "reciprocal_se_rank": 0.03383164165931156, "percentage_ge_sources_not_in_se_sources": 50.0, "percentage_ge_sources_in_se_sources": 50.0 }, { "model_name": "grok-4.1-fast-non-reasoning", "query_id": 98, "query_type": "QuoraQuestions", "num_sources": 4, "unweighted_mean_score": 3.78125, "weighted_total_content_score": 75.52631578947367, "semantic_relevance": 4.25, "factual_accuracy": 4.0, "freshness": 5.0, "objectivity_tone": 3.0, "layout_ad_density": 2.75, "accountability": 4.0, "transparency": 3.75, "authority": 3.5, "avg_ge_freq": 0.3333, "relative_se_rank": 1.5562500000000001, "normalized_reciprocal_se_rank": 0.07948826241509169, "reciprocal_se_rank": 0.028809072764791444, "percentage_ge_sources_not_in_se_sources": 50.0, "percentage_ge_sources_in_se_sources": 50.0 }, { "model_name": "deepseek-chat-gensee", "query_id": 62, "query_type": "Pinocchios", "num_sources": 5, "unweighted_mean_score": 4.75, "weighted_total_content_score": 75.36842105263159, "semantic_relevance": 4.25, "factual_accuracy": 4.5, "freshness": 5.0, "objectivity_tone": 4.75, "layout_ad_density": 4.75, "accountability": 5.0, "transparency": 4.75, "authority": 5.0, "avg_ge_freq": 1.0, "relative_se_rank": 2.4390243902439024, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "deepseek-reasoning-gensee", "query_id": 11, "query_type": "VACOS", "num_sources": 5, "unweighted_mean_score": 4.075, "weighted_total_content_score": 75.36842105263159, "semantic_relevance": 4.8, "factual_accuracy": 4.2, "freshness": 5.0, "objectivity_tone": 4.2, "layout_ad_density": 2.6, "accountability": 3.25, "transparency": 4.0, "authority": 4.5, "avg_ge_freq": 1.0, "relative_se_rank": 1.9609756097560975, "normalized_reciprocal_se_rank": 0.1583838383838384, "reciprocal_se_rank": 0.047766990291262135, "percentage_ge_sources_not_in_se_sources": 80.0, "percentage_ge_sources_in_se_sources": 20.0 }, { "model_name": "exa", "query_id": 40, "query_type": "DebateQA", "num_sources": 5, "unweighted_mean_score": 3.864285714285714, "weighted_total_content_score": 75.36842105263159, "semantic_relevance": 4.0, "factual_accuracy": 4.0, "freshness": 4.4, "objectivity_tone": 3.6, "layout_ad_density": 3.0, "accountability": 4.0, "transparency": 3.8, "authority": 3.8, "avg_ge_freq": 1.0, "relative_se_rank": 2.065, "normalized_reciprocal_se_rank": 0.043939393939393945, "reciprocal_se_rank": 0.02026699029126213, "percentage_ge_sources_not_in_se_sources": 80.0, "percentage_ge_sources_in_se_sources": 20.0 }, { "model_name": "grok-4.1-fast-non-reasoning", "query_id": 83, "query_type": "QuoraQuestions", "num_sources": 5, "unweighted_mean_score": 3.775, "weighted_total_content_score": 75.36842105263158, "semantic_relevance": 2.8, "factual_accuracy": 4.2, "freshness": 4.6, "objectivity_tone": 4.2, "layout_ad_density": 4.0, "accountability": 3.0, "transparency": 3.8, "authority": 3.6, "avg_ge_freq": 1.0, "relative_se_rank": 2.2222222222222223, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "Gemini-3-Pro-Preview", "query_id": 69, "query_type": "Pinocchios", "num_sources": 2, "unweighted_mean_score": 3.8125, "weighted_total_content_score": 75.26315789473684, "semantic_relevance": 3.5, "factual_accuracy": 4.0, "freshness": 5.0, "objectivity_tone": 3.0, "layout_ad_density": 2.5, "accountability": 3.5, "transparency": 4.5, "authority": 4.5, "avg_ge_freq": 0.66665, "relative_se_rank": 3.3666666666666667, "normalized_reciprocal_se_rank": 0.5, "reciprocal_se_rank": 0.12985436893203883, "percentage_ge_sources_not_in_se_sources": 50.0, "percentage_ge_sources_in_se_sources": 50.0 }, { "model_name": "claude", "query_id": 84, "query_type": "QuoraQuestions", "num_sources": 4, "unweighted_mean_score": 3.75, "weighted_total_content_score": 75.26315789473684, "semantic_relevance": 4.5, "factual_accuracy": 3.5, "freshness": 5.0, "objectivity_tone": 3.5, "layout_ad_density": 2.5, "accountability": 3.75, "transparency": 3.5, "authority": 3.75, "avg_ge_freq": 0.833325, "relative_se_rank": 1.2272727272727275, "normalized_reciprocal_se_rank": 0.1977873977873978, "reciprocal_se_rank": 0.05723532131299121, "percentage_ge_sources_not_in_se_sources": 50.0, "percentage_ge_sources_in_se_sources": 50.0 }, { "model_name": "google-search", "query_id": 51, "query_type": "HotpotQA", "num_sources": 2, "unweighted_mean_score": 3.875, "weighted_total_content_score": 75.26315789473684, "semantic_relevance": 1.5, "factual_accuracy": 3.0, "freshness": 3.0, "objectivity_tone": 5.0, "layout_ad_density": 4.5, "accountability": 4.5, "transparency": 4.5, "authority": 5.0, "avg_ge_freq": null, "relative_se_rank": null, "normalized_reciprocal_se_rank": null, "reciprocal_se_rank": null, "percentage_ge_sources_not_in_se_sources": null, "percentage_ge_sources_in_se_sources": null }, { "model_name": "Gemini-3-Pro-Preview", "query_id": 59, "query_type": "HotpotQA", "num_sources": 5, "unweighted_mean_score": 3.825, "weighted_total_content_score": 75.1578947368421, "semantic_relevance": 2.4, "factual_accuracy": 3.8, "freshness": 5.0, "objectivity_tone": 4.0, "layout_ad_density": 3.2, "accountability": 3.8, "transparency": 4.4, "authority": 4.0, "avg_ge_freq": 0.5333399999999999, "relative_se_rank": 1.8744186046511628, "normalized_reciprocal_se_rank": 0.1306397306397306, "reciprocal_se_rank": 0.04110032362459547, "percentage_ge_sources_not_in_se_sources": 80.0, "percentage_ge_sources_in_se_sources": 20.0 }, { "model_name": "tavily", "query_id": 60, "query_type": "HotpotQA", "num_sources": 5, "unweighted_mean_score": 3.775, "weighted_total_content_score": 75.1578947368421, "semantic_relevance": 2.8, "factual_accuracy": 4.2, "freshness": 5.0, "objectivity_tone": 4.0, "layout_ad_density": 4.2, "accountability": 3.2, "transparency": 3.2, "authority": 3.6, "avg_ge_freq": 1.0, "relative_se_rank": 1.2958333333333336, "normalized_reciprocal_se_rank": 0.19822467095194368, "reciprocal_se_rank": 0.057340394233598116, "percentage_ge_sources_not_in_se_sources": 60.0, "percentage_ge_sources_in_se_sources": 40.0 }, { "model_name": "tavily", "query_id": 95, "query_type": "QuoraQuestions", "num_sources": 3, "unweighted_mean_score": 3.75, "weighted_total_content_score": 75.0877192982456, "semantic_relevance": 4.0, "factual_accuracy": 3.6666666666666665, "freshness": 5.0, "objectivity_tone": 3.6666666666666665, "layout_ad_density": 3.0, "accountability": 3.3333333333333335, "transparency": 3.6666666666666665, "authority": 3.6666666666666665, "avg_ge_freq": 1.0, "relative_se_rank": 1.3414634146341464, "normalized_reciprocal_se_rank": 0.06686548019881354, "reciprocal_se_rank": 0.025775928494375096, "percentage_ge_sources_not_in_se_sources": 33.333333333333336, "percentage_ge_sources_in_se_sources": 66.66666666666666 }, { "model_name": "Perplexity-Sonar-Pro", "query_id": 18, "query_type": "VACOS", "num_sources": 4, "unweighted_mean_score": 3.78125, "weighted_total_content_score": 75.0, "semantic_relevance": 3.25, "factual_accuracy": 4.0, "freshness": 5.0, "objectivity_tone": 3.5, "layout_ad_density": 3.5, "accountability": 2.5, "transparency": 4.25, "authority": 4.25, "avg_ge_freq": 0.5833499999999999, "relative_se_rank": 2.127659574468085, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "exa", "query_id": 87, "query_type": "QuoraQuestions", "num_sources": 4, "unweighted_mean_score": 3.8125, "weighted_total_content_score": 74.99999999999999, "semantic_relevance": 2.0, "factual_accuracy": 4.0, "freshness": 3.5, "objectivity_tone": 4.25, "layout_ad_density": 3.75, "accountability": 4.25, "transparency": 4.25, "authority": 4.5, "avg_ge_freq": 1.0, "relative_se_rank": 2.4390243902439024, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "Gemini-3-Flash-Preview", "query_id": 66, "query_type": "Pinocchios", "num_sources": 5, "unweighted_mean_score": 3.875, "weighted_total_content_score": 74.94736842105263, "semantic_relevance": 2.2, "factual_accuracy": 3.2, "freshness": 4.0, "objectivity_tone": 3.8, "layout_ad_density": 3.6, "accountability": 4.6, "transparency": 4.8, "authority": 4.8, "avg_ge_freq": 0.53332, "relative_se_rank": 1.4878048780487805, "normalized_reciprocal_se_rank": 0.3108225108225108, "reciprocal_se_rank": 0.08439667128987517, "percentage_ge_sources_not_in_se_sources": 60.0, "percentage_ge_sources_in_se_sources": 40.0 }, { "model_name": "Gemini-3-Pro-Preview", "query_id": 72, "query_type": "Pinocchios", "num_sources": 5, "unweighted_mean_score": 3.825, "weighted_total_content_score": 74.94736842105263, "semantic_relevance": 2.4, "factual_accuracy": 3.4, "freshness": 5.0, "objectivity_tone": 4.2, "layout_ad_density": 3.4, "accountability": 4.0, "transparency": 4.6, "authority": 3.6, "avg_ge_freq": 0.46663999999999994, "relative_se_rank": 1.9658536585365856, "normalized_reciprocal_se_rank": 0.1306397306397306, "reciprocal_se_rank": 0.04110032362459547, "percentage_ge_sources_not_in_se_sources": 80.0, "percentage_ge_sources_in_se_sources": 20.0 }, { "model_name": "Gemini-3-Pro-Preview", "query_id": 87, "query_type": "QuoraQuestions", "num_sources": 5, "unweighted_mean_score": 3.8, "weighted_total_content_score": 74.94736842105263, "semantic_relevance": 2.4, "factual_accuracy": 3.8, "freshness": 4.4, "objectivity_tone": 4.2, "layout_ad_density": 3.4, "accountability": 4.0, "transparency": 4.2, "authority": 4.0, "avg_ge_freq": 0.3333, "relative_se_rank": 2.4390243902439024, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "Perplexity-Sonar-Pro", "query_id": 41, "query_type": "HotpotQA", "num_sources": 5, "unweighted_mean_score": 3.807142857142857, "weighted_total_content_score": 74.94736842105263, "semantic_relevance": 4.0, "factual_accuracy": 4.0, "freshness": 4.0, "objectivity_tone": 3.6, "layout_ad_density": 4.5, "accountability": 3.4, "transparency": 3.6, "authority": 3.6, "avg_ge_freq": 0.86668, "relative_se_rank": 4.545454545454546, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "gensee", "query_id": 85, "query_type": "QuoraQuestions", "num_sources": 5, "unweighted_mean_score": 3.725, "weighted_total_content_score": 74.94736842105263, "semantic_relevance": 4.4, "factual_accuracy": 4.2, "freshness": 4.8, "objectivity_tone": 3.0, "layout_ad_density": 2.8, "accountability": 3.6, "transparency": 3.8, "authority": 3.2, "avg_ge_freq": 0.6, "relative_se_rank": 2.127659574468085, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "gpt-5", "query_id": 22, "query_type": "DebateQA", "num_sources": 5, "unweighted_mean_score": 4.725, "weighted_total_content_score": 74.94736842105263, "semantic_relevance": 4.5, "factual_accuracy": 5.0, "freshness": 2.75, "objectivity_tone": 5.0, "layout_ad_density": 5.0, "accountability": 5.0, "transparency": 5.0, "authority": 5.0, "avg_ge_freq": 0.46665999999999996, "relative_se_rank": 2.0714285714285716, "normalized_reciprocal_se_rank": 0.013822434875066453, "reciprocal_se_rank": 0.013030148185998977, "percentage_ge_sources_not_in_se_sources": 80.0, "percentage_ge_sources_in_se_sources": 20.0 }, { "model_name": "Perplexity-Sonar-Pro", "query_id": 84, "query_type": "QuoraQuestions", "num_sources": 5, "unweighted_mean_score": 3.8, "weighted_total_content_score": 74.73684210526315, "semantic_relevance": 3.6, "factual_accuracy": 3.6, "freshness": 5.0, "objectivity_tone": 3.0, "layout_ad_density": 3.2, "accountability": 3.8, "transparency": 4.4, "authority": 3.8, "avg_ge_freq": 0.80002, "relative_se_rank": 1.831818181818182, "normalized_reciprocal_se_rank": 0.1306397306397306, "reciprocal_se_rank": 0.04110032362459547, "percentage_ge_sources_not_in_se_sources": 80.0, "percentage_ge_sources_in_se_sources": 20.0 }, { "model_name": "claude", "query_id": 98, "query_type": "QuoraQuestions", "num_sources": 2, "unweighted_mean_score": 3.75, "weighted_total_content_score": 74.73684210526315, "semantic_relevance": 4.5, "factual_accuracy": 4.0, "freshness": 5.0, "objectivity_tone": 2.5, "layout_ad_density": 2.5, "accountability": 4.0, "transparency": 4.0, "authority": 3.5, "avg_ge_freq": 1.0, "relative_se_rank": 0.1875, "normalized_reciprocal_se_rank": 0.40548340548340545, "reciprocal_se_rank": 0.10714285714285714, "percentage_ge_sources_not_in_se_sources": 0.0, "percentage_ge_sources_in_se_sources": 100.0 }, { "model_name": "exa", "query_id": 58, "query_type": "HotpotQA", "num_sources": 4, "unweighted_mean_score": 3.59375, "weighted_total_content_score": 74.73684210526315, "semantic_relevance": 5.0, "factual_accuracy": 4.0, "freshness": 4.5, "objectivity_tone": 4.5, "layout_ad_density": 3.5, "accountability": 2.5, "transparency": 2.5, "authority": 2.25, "avg_ge_freq": 1.0, "relative_se_rank": 0.4166666666666667, "normalized_reciprocal_se_rank": 0.35364014292585727, "reciprocal_se_rank": 0.09468537414965986, "percentage_ge_sources_not_in_se_sources": 0.0, "percentage_ge_sources_in_se_sources": 100.0 }, { "model_name": "gensee", "query_id": 9, "query_type": "VACOS", "num_sources": 5, "unweighted_mean_score": 3.7, "weighted_total_content_score": 74.73684210526315, "semantic_relevance": 4.4, "factual_accuracy": 4.0, "freshness": 4.6, "objectivity_tone": 3.4, "layout_ad_density": 2.2, "accountability": 4.2, "transparency": 3.6, "authority": 3.2, "avg_ge_freq": 0.6, "relative_se_rank": 2.2222222222222223, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "google-search", "query_id": 43, "query_type": "HotpotQA", "num_sources": 3, "unweighted_mean_score": 3.6666666666666665, "weighted_total_content_score": 74.73684210526315, "semantic_relevance": 5.0, "factual_accuracy": 4.0, "freshness": 4.0, "objectivity_tone": 3.3333333333333335, "layout_ad_density": 4.0, "accountability": 3.6666666666666665, "transparency": 2.6666666666666665, "authority": 2.6666666666666665, "avg_ge_freq": null, "relative_se_rank": null, "normalized_reciprocal_se_rank": null, "reciprocal_se_rank": null, "percentage_ge_sources_not_in_se_sources": null, "percentage_ge_sources_in_se_sources": null }, { "model_name": "google-search", "query_id": 68, "query_type": "Pinocchios", "num_sources": 5, "unweighted_mean_score": 3.85, "weighted_total_content_score": 74.73684210526315, "semantic_relevance": 1.6, "factual_accuracy": 4.0, "freshness": 5.0, "objectivity_tone": 3.8, "layout_ad_density": 4.2, "accountability": 3.6, "transparency": 4.2, "authority": 4.4, "avg_ge_freq": null, "relative_se_rank": null, "normalized_reciprocal_se_rank": null, "reciprocal_se_rank": null, "percentage_ge_sources_not_in_se_sources": null, "percentage_ge_sources_in_se_sources": null }, { "model_name": "gpt-4o", "query_id": 49, "query_type": "HotpotQA", "num_sources": 1, "unweighted_mean_score": 3.625, "weighted_total_content_score": 74.73684210526315, "semantic_relevance": 5.0, "factual_accuracy": 4.0, "freshness": 4.0, "objectivity_tone": 4.0, "layout_ad_density": 3.0, "accountability": 3.0, "transparency": 3.0, "authority": 3.0, "avg_ge_freq": 0.3333, "relative_se_rank": 2.1739130434782608, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "tavily", "query_id": 61, "query_type": "Pinocchios", "num_sources": 2, "unweighted_mean_score": 3.8125, "weighted_total_content_score": 74.73684210526315, "semantic_relevance": 3.5, "factual_accuracy": 3.5, "freshness": 3.0, "objectivity_tone": 3.0, "layout_ad_density": 4.0, "accountability": 4.5, "transparency": 4.5, "authority": 4.5, "avg_ge_freq": 1.0, "relative_se_rank": 1.1888888888888889, "normalized_reciprocal_se_rank": 0.1878787878787879, "reciprocal_se_rank": 0.054854368932038836, "percentage_ge_sources_not_in_se_sources": 50.0, "percentage_ge_sources_in_se_sources": 50.0 }, { "model_name": "Perplexity-Sonar-Pro", "query_id": 94, "query_type": "QuoraQuestions", "num_sources": 5, "unweighted_mean_score": 3.75, "weighted_total_content_score": 74.52631578947368, "semantic_relevance": 4.0, "factual_accuracy": 4.0, "freshness": 4.4, "objectivity_tone": 2.8, "layout_ad_density": 2.2, "accountability": 4.0, "transparency": 4.4, "authority": 4.2, "avg_ge_freq": 0.8, "relative_se_rank": 1.1400000000000001, "normalized_reciprocal_se_rank": 0.29733700642791555, "reciprocal_se_rank": 0.08115622241835833, "percentage_ge_sources_not_in_se_sources": 40.0, "percentage_ge_sources_in_se_sources": 60.0 }, { "model_name": "google-search", "query_id": 73, "query_type": "Pinocchios", "num_sources": 5, "unweighted_mean_score": 3.8, "weighted_total_content_score": 74.52631578947368, "semantic_relevance": 3.4, "factual_accuracy": 3.6, "freshness": 2.8, "objectivity_tone": 3.0, "layout_ad_density": 4.0, "accountability": 4.4, "transparency": 4.6, "authority": 4.6, "avg_ge_freq": null, "relative_se_rank": null, "normalized_reciprocal_se_rank": null, "reciprocal_se_rank": null, "percentage_ge_sources_not_in_se_sources": null, "percentage_ge_sources_in_se_sources": null }, { "model_name": "Gemini-2.5-Flash-Preview", "query_id": 99, "query_type": "QuoraQuestions", "num_sources": 5, "unweighted_mean_score": 3.7, "weighted_total_content_score": 74.52631578947367, "semantic_relevance": 4.2, "factual_accuracy": 3.8, "freshness": 4.0, "objectivity_tone": 3.6, "layout_ad_density": 3.0, "accountability": 4.0, "transparency": 3.6, "authority": 3.4, "avg_ge_freq": null, "relative_se_rank": null, "normalized_reciprocal_se_rank": null, "reciprocal_se_rank": null, "percentage_ge_sources_not_in_se_sources": null, "percentage_ge_sources_in_se_sources": null }, { "model_name": "claude", "query_id": 9, "query_type": "VACOS", "num_sources": 5, "unweighted_mean_score": 3.775, "weighted_total_content_score": 74.52631578947367, "semantic_relevance": 3.8, "factual_accuracy": 3.4, "freshness": 5.0, "objectivity_tone": 3.2, "layout_ad_density": 2.6, "accountability": 4.2, "transparency": 4.2, "authority": 3.8, "avg_ge_freq": 0.80002, "relative_se_rank": 1.8, "normalized_reciprocal_se_rank": 0.09595959595959597, "reciprocal_se_rank": 0.032766990291262135, "percentage_ge_sources_not_in_se_sources": 80.0, "percentage_ge_sources_in_se_sources": 20.0 }, { "model_name": "deepseek-chat-gensee", "query_id": 82, "query_type": "QuoraQuestions", "num_sources": 5, "unweighted_mean_score": 4.075, "weighted_total_content_score": 74.52631578947367, "semantic_relevance": 4.2, "factual_accuracy": 4.6, "freshness": 4.5, "objectivity_tone": 4.0, "layout_ad_density": 3.2, "accountability": 4.25, "transparency": 3.75, "authority": 3.75, "avg_ge_freq": 1.0, "relative_se_rank": 1.881818181818182, "normalized_reciprocal_se_rank": 0.04087938205585265, "reciprocal_se_rank": 0.019531696173615075, "percentage_ge_sources_not_in_se_sources": 80.0, "percentage_ge_sources_in_se_sources": 20.0 }, { "model_name": "exa", "query_id": 13, "query_type": "VACOS", "num_sources": 5, "unweighted_mean_score": 3.775, "weighted_total_content_score": 74.52631578947367, "semantic_relevance": 2.4, "factual_accuracy": 4.4, "freshness": 5.0, "objectivity_tone": 3.6, "layout_ad_density": 2.6, "accountability": 3.2, "transparency": 5.0, "authority": 4.0, "avg_ge_freq": 1.0, "relative_se_rank": 2.380952380952381, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "tavily", "query_id": 15, "query_type": "VACOS", "num_sources": 5, "unweighted_mean_score": 3.8, "weighted_total_content_score": 74.52631578947367, "semantic_relevance": 3.4, "factual_accuracy": 4.0, "freshness": 5.0, "objectivity_tone": 2.6, "layout_ad_density": 2.2, "accountability": 3.8, "transparency": 4.6, "authority": 4.8, "avg_ge_freq": 1.0, "relative_se_rank": 2.6315789473684212, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "claude", "query_id": 73, "query_type": "Pinocchios", "num_sources": 3, "unweighted_mean_score": 3.7083333333333335, "weighted_total_content_score": 74.3859649122807, "semantic_relevance": 4.0, "factual_accuracy": 3.6666666666666665, "freshness": 3.6666666666666665, "objectivity_tone": 3.6666666666666665, "layout_ad_density": 3.3333333333333335, "accountability": 4.0, "transparency": 4.0, "authority": 3.3333333333333335, "avg_ge_freq": 0.7777666666666666, "relative_se_rank": 0.7152777777777778, "normalized_reciprocal_se_rank": 0.5973063973063973, "reciprocal_se_rank": 0.15323624595469257, "percentage_ge_sources_not_in_se_sources": 33.333333333333336, "percentage_ge_sources_in_se_sources": 66.66666666666666 }, { "model_name": "Gemini-2.5-Flash-Preview", "query_id": 68, "query_type": "Pinocchios", "num_sources": 5, "unweighted_mean_score": 3.85, "weighted_total_content_score": 74.3157894736842, "semantic_relevance": 1.8, "factual_accuracy": 3.6, "freshness": 5.0, "objectivity_tone": 3.6, "layout_ad_density": 4.0, "accountability": 4.6, "transparency": 4.2, "authority": 4.0, "avg_ge_freq": null, "relative_se_rank": null, "normalized_reciprocal_se_rank": null, "reciprocal_se_rank": null, "percentage_ge_sources_not_in_se_sources": null, "percentage_ge_sources_in_se_sources": null }, { "model_name": "Gemini-3-Flash-Preview", "query_id": 30, "query_type": "DebateQA", "num_sources": 5, "unweighted_mean_score": 3.775, "weighted_total_content_score": 74.3157894736842, "semantic_relevance": 3.8, "factual_accuracy": 3.2, "freshness": 5.0, "objectivity_tone": 3.2, "layout_ad_density": 3.0, "accountability": 4.0, "transparency": 4.0, "authority": 4.0, "avg_ge_freq": 0.3333, "relative_se_rank": 1.4782608695652173, "normalized_reciprocal_se_rank": 0.1706035969450604, "reciprocal_se_rank": 0.05070329149893441, "percentage_ge_sources_not_in_se_sources": 60.0, "percentage_ge_sources_in_se_sources": 40.0 }, { "model_name": "Perplexity-Sonar-Pro", "query_id": 90, "query_type": "QuoraQuestions", "num_sources": 5, "unweighted_mean_score": 3.725, "weighted_total_content_score": 74.3157894736842, "semantic_relevance": 4.0, "factual_accuracy": 3.8, "freshness": 5.0, "objectivity_tone": 3.2, "layout_ad_density": 2.8, "accountability": 3.2, "transparency": 4.0, "authority": 3.8, "avg_ge_freq": 0.73334, "relative_se_rank": 0.6444444444444445, "normalized_reciprocal_se_rank": 0.24761653805132067, "reciprocal_se_rank": 0.06920882831815714, "percentage_ge_sources_not_in_se_sources": 20.0, "percentage_ge_sources_in_se_sources": 80.0 }, { "model_name": "google-search", "query_id": 28, "query_type": "DebateQA", "num_sources": 5, "unweighted_mean_score": 3.675, "weighted_total_content_score": 74.3157894736842, "semantic_relevance": 4.6, "factual_accuracy": 4.0, "freshness": 3.6, "objectivity_tone": 3.2, "layout_ad_density": 3.6, "accountability": 3.4, "transparency": 3.8, "authority": 3.2, "avg_ge_freq": null, "relative_se_rank": null, "normalized_reciprocal_se_rank": null, "reciprocal_se_rank": null, "percentage_ge_sources_not_in_se_sources": null, "percentage_ge_sources_in_se_sources": null }, { "model_name": "tavily", "query_id": 18, "query_type": "VACOS", "num_sources": 2, "unweighted_mean_score": 3.6875, "weighted_total_content_score": 74.21052631578948, "semantic_relevance": 4.0, "factual_accuracy": 4.0, "freshness": 5.0, "objectivity_tone": 3.5, "layout_ad_density": 2.5, "accountability": 3.5, "transparency": 3.0, "authority": 4.0, "avg_ge_freq": 1.0, "relative_se_rank": 2.127659574468085, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "Gemini-3-Pro-Preview", "query_id": 43, "query_type": "HotpotQA", "num_sources": 2, "unweighted_mean_score": 3.625, "weighted_total_content_score": 74.21052631578947, "semantic_relevance": 4.5, "factual_accuracy": 4.5, "freshness": 3.0, "objectivity_tone": 3.5, "layout_ad_density": 4.5, "accountability": 3.5, "transparency": 3.0, "authority": 2.5, "avg_ge_freq": 0.83335, "relative_se_rank": 2.0833333333333335, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "tavily", "query_id": 49, "query_type": "HotpotQA", "num_sources": 4, "unweighted_mean_score": 3.71875, "weighted_total_content_score": 74.21052631578947, "semantic_relevance": 2.75, "factual_accuracy": 3.5, "freshness": 4.5, "objectivity_tone": 4.75, "layout_ad_density": 4.0, "accountability": 3.75, "transparency": 3.0, "authority": 3.5, "avg_ge_freq": 1.0, "relative_se_rank": 0.6032608695652173, "normalized_reciprocal_se_rank": 0.46677890011223344, "reciprocal_se_rank": 0.12187162891046385, "percentage_ge_sources_not_in_se_sources": 25.0, "percentage_ge_sources_in_se_sources": 75.0 }, { "model_name": "Gemini-2.5-Flash-Preview", "query_id": 27, "query_type": "DebateQA", "num_sources": 5, "unweighted_mean_score": 3.75, "weighted_total_content_score": 74.10526315789474, "semantic_relevance": 2.8, "factual_accuracy": 3.6, "freshness": 4.2, "objectivity_tone": 4.0, "layout_ad_density": 3.8, "accountability": 3.8, "transparency": 3.8, "authority": 4.0, "avg_ge_freq": null, "relative_se_rank": null, "normalized_reciprocal_se_rank": null, "reciprocal_se_rank": null, "percentage_ge_sources_not_in_se_sources": null, "percentage_ge_sources_in_se_sources": null }, { "model_name": "Gemini-3-Flash-Preview", "query_id": 25, "query_type": "DebateQA", "num_sources": 5, "unweighted_mean_score": 4.055, "weighted_total_content_score": 74.10526315789473, "semantic_relevance": 3.75, "factual_accuracy": 4.0, "freshness": 5.0, "objectivity_tone": 4.25, "layout_ad_density": 3.8, "accountability": 3.8, "transparency": 4.4, "authority": 3.8, "avg_ge_freq": 0.46665999999999996, "relative_se_rank": 1.7951219512195125, "normalized_reciprocal_se_rank": 0.037677474041110416, "reciprocal_se_rank": 0.018762305655509535, "percentage_ge_sources_not_in_se_sources": 60.0, "percentage_ge_sources_in_se_sources": 40.0 }, { "model_name": "Perplexity-Sonar-Pro", "query_id": 13, "query_type": "VACOS", "num_sources": 5, "unweighted_mean_score": 3.7, "weighted_total_content_score": 74.10526315789473, "semantic_relevance": 4.2, "factual_accuracy": 3.8, "freshness": 4.8, "objectivity_tone": 3.2, "layout_ad_density": 2.8, "accountability": 3.8, "transparency": 3.8, "authority": 3.2, "avg_ge_freq": 0.66666, "relative_se_rank": 2.380952380952381, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "tavily", "query_id": 17, "query_type": "VACOS", "num_sources": 5, "unweighted_mean_score": 3.725, "weighted_total_content_score": 74.10526315789473, "semantic_relevance": 3.2, "factual_accuracy": 3.6, "freshness": 5.0, "objectivity_tone": 4.0, "layout_ad_density": 3.0, "accountability": 4.2, "transparency": 3.2, "authority": 3.6, "avg_ge_freq": 1.0, "relative_se_rank": 1.4232558139534883, "normalized_reciprocal_se_rank": 0.2692063492063492, "reciprocal_se_rank": 0.07439667128987518, "percentage_ge_sources_not_in_se_sources": 60.0, "percentage_ge_sources_in_se_sources": 40.0 }, { "model_name": "Gemini-2.5-Flash-Preview", "query_id": 56, "query_type": "HotpotQA", "num_sources": 3, "unweighted_mean_score": 3.75, "weighted_total_content_score": 74.03508771929823, "semantic_relevance": 1.6666666666666667, "factual_accuracy": 3.6666666666666665, "freshness": 5.0, "objectivity_tone": 5.0, "layout_ad_density": 4.0, "accountability": 3.0, "transparency": 4.0, "authority": 3.6666666666666665, "avg_ge_freq": null, "relative_se_rank": null, "normalized_reciprocal_se_rank": null, "reciprocal_se_rank": null, "percentage_ge_sources_not_in_se_sources": null, "percentage_ge_sources_in_se_sources": null }, { "model_name": "gensee", "query_id": 13, "query_type": "VACOS", "num_sources": 5, "unweighted_mean_score": 3.7, "weighted_total_content_score": 73.89473684210526, "semantic_relevance": 4.0, "factual_accuracy": 3.8, "freshness": 5.0, "objectivity_tone": 3.2, "layout_ad_density": 2.2, "accountability": 4.2, "transparency": 3.8, "authority": 3.4, "avg_ge_freq": 0.5333399999999999, "relative_se_rank": 2.380952380952381, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "Gemini-3-Pro-Preview", "query_id": 40, "query_type": "DebateQA", "num_sources": 5, "unweighted_mean_score": 3.775, "weighted_total_content_score": 73.89473684210525, "semantic_relevance": 3.6, "factual_accuracy": 3.4, "freshness": 4.8, "objectivity_tone": 2.8, "layout_ad_density": 3.8, "accountability": 4.4, "transparency": 3.6, "authority": 3.8, "avg_ge_freq": 0.46665999999999996, "relative_se_rank": 2.0949999999999998, "normalized_reciprocal_se_rank": 0.029752066115702486, "reciprocal_se_rank": 0.016857899382171224, "percentage_ge_sources_not_in_se_sources": 80.0, "percentage_ge_sources_in_se_sources": 20.0 }, { "model_name": "Gemini-3-Pro-Preview", "query_id": 60, "query_type": "HotpotQA", "num_sources": 3, "unweighted_mean_score": 3.6666666666666665, "weighted_total_content_score": 73.6842105263158, "semantic_relevance": 2.0, "factual_accuracy": 4.333333333333333, "freshness": 5.0, "objectivity_tone": 5.0, "layout_ad_density": 3.0, "accountability": 3.0, "transparency": 3.3333333333333335, "authority": 3.6666666666666665, "avg_ge_freq": 0.8889, "relative_se_rank": 1.4444444444444446, "normalized_reciprocal_se_rank": 0.11264156718702174, "reciprocal_se_rank": 0.03677552221241542, "percentage_ge_sources_not_in_se_sources": 66.66666666666667, "percentage_ge_sources_in_se_sources": 33.33333333333333 }, { "model_name": "google-search", "query_id": 98, "query_type": "QuoraQuestions", "num_sources": 4, "unweighted_mean_score": 3.625, "weighted_total_content_score": 73.6842105263158, "semantic_relevance": 4.5, "factual_accuracy": 3.75, "freshness": 4.75, "objectivity_tone": 3.75, "layout_ad_density": 3.75, "accountability": 3.0, "transparency": 2.5, "authority": 3.0, "avg_ge_freq": null, "relative_se_rank": null, "normalized_reciprocal_se_rank": null, "reciprocal_se_rank": null, "percentage_ge_sources_not_in_se_sources": null, "percentage_ge_sources_in_se_sources": null }, { "model_name": "Gemini-3-Pro-Preview", "query_id": 94, "query_type": "QuoraQuestions", "num_sources": 5, "unweighted_mean_score": 3.725, "weighted_total_content_score": 73.68421052631578, "semantic_relevance": 3.2, "factual_accuracy": 4.0, "freshness": 5.0, "objectivity_tone": 3.2, "layout_ad_density": 2.4, "accountability": 4.2, "transparency": 4.0, "authority": 3.8, "avg_ge_freq": 0.3333, "relative_se_rank": 2.5, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "claude", "query_id": 33, "query_type": "DebateQA", "num_sources": 5, "unweighted_mean_score": 3.7, "weighted_total_content_score": 73.68421052631578, "semantic_relevance": 3.8, "factual_accuracy": 3.8, "freshness": 5.0, "objectivity_tone": 3.2, "layout_ad_density": 2.8, "accountability": 3.8, "transparency": 3.6, "authority": 3.6, "avg_ge_freq": 0.8666600000000001, "relative_se_rank": 1.4697674418604652, "normalized_reciprocal_se_rank": 0.14733044733044734, "reciprocal_se_rank": 0.04511095700416089, "percentage_ge_sources_not_in_se_sources": 60.0, "percentage_ge_sources_in_se_sources": 40.0 }, { "model_name": "deepseek-chat-tavily", "query_id": 1, "query_type": "VACOS", "num_sources": 3, "unweighted_mean_score": 3.6666666666666665, "weighted_total_content_score": 73.68421052631578, "semantic_relevance": 4.0, "factual_accuracy": 3.3333333333333335, "freshness": 5.0, "objectivity_tone": 4.0, "layout_ad_density": 2.3333333333333335, "accountability": 3.6666666666666665, "transparency": 3.3333333333333335, "authority": 3.6666666666666665, "avg_ge_freq": 1.0, "relative_se_rank": 1.4962962962962962, "normalized_reciprocal_se_rank": 0.263973063973064, "reciprocal_se_rank": 0.07313915857605179, "percentage_ge_sources_not_in_se_sources": 66.66666666666667, "percentage_ge_sources_in_se_sources": 33.33333333333333 }, { "model_name": "google-search", "query_id": 48, "query_type": "HotpotQA", "num_sources": 4, "unweighted_mean_score": 3.78125, "weighted_total_content_score": 73.68421052631578, "semantic_relevance": 1.75, "factual_accuracy": 3.75, "freshness": 4.25, "objectivity_tone": 4.0, "layout_ad_density": 4.25, "accountability": 3.75, "transparency": 4.25, "authority": 4.25, "avg_ge_freq": null, "relative_se_rank": null, "normalized_reciprocal_se_rank": null, "reciprocal_se_rank": null, "percentage_ge_sources_not_in_se_sources": null, "percentage_ge_sources_in_se_sources": null }, { "model_name": "google-search", "query_id": 59, "query_type": "HotpotQA", "num_sources": 2, "unweighted_mean_score": 3.75, "weighted_total_content_score": 73.68421052631578, "semantic_relevance": 2.0, "factual_accuracy": 4.0, "freshness": 5.0, "objectivity_tone": 4.0, "layout_ad_density": 3.0, "accountability": 4.0, "transparency": 4.0, "authority": 4.0, "avg_ge_freq": null, "relative_se_rank": null, "normalized_reciprocal_se_rank": null, "reciprocal_se_rank": null, "percentage_ge_sources_not_in_se_sources": null, "percentage_ge_sources_in_se_sources": null }, { "model_name": "google-search", "query_id": 93, "query_type": "QuoraQuestions", "num_sources": 2, "unweighted_mean_score": 3.75, "weighted_total_content_score": 73.68421052631578, "semantic_relevance": 5.0, "factual_accuracy": 3.0, "freshness": 5.0, "objectivity_tone": 2.0, "layout_ad_density": 4.0, "accountability": 4.0, "transparency": 4.0, "authority": 3.0, "avg_ge_freq": null, "relative_se_rank": null, "normalized_reciprocal_se_rank": null, "reciprocal_se_rank": null, "percentage_ge_sources_not_in_se_sources": null, "percentage_ge_sources_in_se_sources": null }, { "model_name": "Gemini-3-Flash-Preview", "query_id": 52, "query_type": "HotpotQA", "num_sources": 5, "unweighted_mean_score": 4.2, "weighted_total_content_score": 73.47368421052633, "semantic_relevance": 2.0, "factual_accuracy": 4.0, "freshness": 5.0, "objectivity_tone": 4.75, "layout_ad_density": 4.0, "accountability": 4.2, "transparency": 4.6, "authority": 4.2, "avg_ge_freq": 0.8666600000000001, "relative_se_rank": 2.0408163265306123, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "Gemini-3-Pro-Preview", "query_id": 98, "query_type": "QuoraQuestions", "num_sources": 5, "unweighted_mean_score": 3.95, "weighted_total_content_score": 73.47368421052632, "semantic_relevance": 4.5, "factual_accuracy": 4.5, "freshness": 5.0, "objectivity_tone": 3.75, "layout_ad_density": 3.2, "accountability": 4.0, "transparency": 3.6, "authority": 3.8, "avg_ge_freq": 0.66668, "relative_se_rank": 1.625, "normalized_reciprocal_se_rank": 0.13742183742183742, "reciprocal_se_rank": 0.0427300046232085, "percentage_ge_sources_not_in_se_sources": 60.0, "percentage_ge_sources_in_se_sources": 40.0 }, { "model_name": "Perplexity-Sonar-Pro", "query_id": 93, "query_type": "QuoraQuestions", "num_sources": 5, "unweighted_mean_score": 3.775, "weighted_total_content_score": 73.47368421052632, "semantic_relevance": 3.8, "factual_accuracy": 3.0, "freshness": 5.0, "objectivity_tone": 2.6, "layout_ad_density": 3.2, "accountability": 4.0, "transparency": 4.2, "authority": 4.4, "avg_ge_freq": 0.93334, "relative_se_rank": 1.231818181818182, "normalized_reciprocal_se_rank": 0.11638973029347362, "reciprocal_se_rank": 0.03767617305595603, "percentage_ge_sources_not_in_se_sources": 40.0, "percentage_ge_sources_in_se_sources": 60.0 }, { "model_name": "google-search", "query_id": 92, "query_type": "QuoraQuestions", "num_sources": 5, "unweighted_mean_score": 3.675, "weighted_total_content_score": 73.47368421052632, "semantic_relevance": 3.4, "factual_accuracy": 4.0, "freshness": 4.2, "objectivity_tone": 3.6, "layout_ad_density": 2.6, "accountability": 4.0, "transparency": 3.8, "authority": 3.8, "avg_ge_freq": null, "relative_se_rank": null, "normalized_reciprocal_se_rank": null, "reciprocal_se_rank": null, "percentage_ge_sources_not_in_se_sources": null, "percentage_ge_sources_in_se_sources": null }, { "model_name": "Gemini-3-Flash-Preview", "query_id": 59, "query_type": "HotpotQA", "num_sources": 5, "unweighted_mean_score": 3.7, "weighted_total_content_score": 73.26315789473685, "semantic_relevance": 3.4, "factual_accuracy": 3.6, "freshness": 4.8, "objectivity_tone": 3.4, "layout_ad_density": 2.6, "accountability": 3.6, "transparency": 4.4, "authority": 3.8, "avg_ge_freq": 0.66668, "relative_se_rank": 2.3255813953488373, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "Gemini-3-Flash-Preview", "query_id": 19, "query_type": "VACOS", "num_sources": 5, "unweighted_mean_score": 4.09, "weighted_total_content_score": 73.26315789473684, "semantic_relevance": 3.75, "factual_accuracy": 3.75, "freshness": 5.0, "objectivity_tone": 4.0, "layout_ad_density": 2.8, "accountability": 4.6, "transparency": 4.4, "authority": 4.2, "avg_ge_freq": 0.3333, "relative_se_rank": 1.9333333333333336, "normalized_reciprocal_se_rank": 0.08439955106621773, "reciprocal_se_rank": 0.029989212513484353, "percentage_ge_sources_not_in_se_sources": 80.0, "percentage_ge_sources_in_se_sources": 20.0 }, { "model_name": "Gemini-3-Flash-Preview", "query_id": 22, "query_type": "DebateQA", "num_sources": 5, "unweighted_mean_score": 3.65, "weighted_total_content_score": 73.26315789473684, "semantic_relevance": 4.0, "factual_accuracy": 4.0, "freshness": 4.6, "objectivity_tone": 3.2, "layout_ad_density": 4.6, "accountability": 3.6, "transparency": 2.4, "authority": 2.8, "avg_ge_freq": 0.39997999999999995, "relative_se_rank": 2.380952380952381, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "claude", "query_id": 87, "query_type": "QuoraQuestions", "num_sources": 2, "unweighted_mean_score": 3.75, "weighted_total_content_score": 73.15789473684211, "semantic_relevance": 2.0, "factual_accuracy": 3.5, "freshness": 3.5, "objectivity_tone": 4.0, "layout_ad_density": 4.0, "accountability": 4.0, "transparency": 4.5, "authority": 4.5, "avg_ge_freq": 0.66665, "relative_se_rank": 0.3414634146341463, "normalized_reciprocal_se_rank": 0.20524691358024694, "reciprocal_se_rank": 0.059027777777777776, "percentage_ge_sources_not_in_se_sources": 0.0, "percentage_ge_sources_in_se_sources": 100.0 }, { "model_name": "Gemini-2.5-Flash-Preview", "query_id": 87, "query_type": "QuoraQuestions", "num_sources": 4, "unweighted_mean_score": 3.75, "weighted_total_content_score": 73.1578947368421, "semantic_relevance": 1.25, "factual_accuracy": 3.75, "freshness": 3.25, "objectivity_tone": 4.5, "layout_ad_density": 4.0, "accountability": 4.25, "transparency": 4.5, "authority": 4.5, "avg_ge_freq": null, "relative_se_rank": null, "normalized_reciprocal_se_rank": null, "reciprocal_se_rank": null, "percentage_ge_sources_not_in_se_sources": null, "percentage_ge_sources_in_se_sources": null }, { "model_name": "google-search", "query_id": 12, "query_type": "VACOS", "num_sources": 2, "unweighted_mean_score": 3.6875, "weighted_total_content_score": 73.1578947368421, "semantic_relevance": 2.5, "factual_accuracy": 4.5, "freshness": 5.0, "objectivity_tone": 3.5, "layout_ad_density": 1.5, "accountability": 4.0, "transparency": 4.0, "authority": 4.5, "avg_ge_freq": null, "relative_se_rank": null, "normalized_reciprocal_se_rank": null, "reciprocal_se_rank": null, "percentage_ge_sources_not_in_se_sources": null, "percentage_ge_sources_in_se_sources": null }, { "model_name": "tavily", "query_id": 72, "query_type": "Pinocchios", "num_sources": 5, "unweighted_mean_score": 3.8, "weighted_total_content_score": 73.05263157894737, "semantic_relevance": 1.8, "factual_accuracy": 3.4, "freshness": 4.6, "objectivity_tone": 3.4, "layout_ad_density": 4.2, "accountability": 4.0, "transparency": 4.6, "authority": 4.4, "avg_ge_freq": 1.0, "relative_se_rank": 1.9707317073170731, "normalized_reciprocal_se_rank": 0.11082251082251082, "reciprocal_se_rank": 0.0363384188626907, "percentage_ge_sources_not_in_se_sources": 80.0, "percentage_ge_sources_in_se_sources": 20.0 }, { "model_name": "Gemini-2.5-Flash-Preview", "query_id": 12, "query_type": "VACOS", "num_sources": 3, "unweighted_mean_score": 3.6666666666666665, "weighted_total_content_score": 72.98245614035088, "semantic_relevance": 2.3333333333333335, "factual_accuracy": 4.333333333333333, "freshness": 4.0, "objectivity_tone": 4.0, "layout_ad_density": 2.6666666666666665, "accountability": 3.6666666666666665, "transparency": 4.0, "authority": 4.333333333333333, "avg_ge_freq": null, "relative_se_rank": null, "normalized_reciprocal_se_rank": null, "reciprocal_se_rank": null, "percentage_ge_sources_not_in_se_sources": null, "percentage_ge_sources_in_se_sources": null }, { "model_name": "Perplexity-Sonar-Pro", "query_id": 53, "query_type": "HotpotQA", "num_sources": 3, "unweighted_mean_score": 3.7083333333333335, "weighted_total_content_score": 72.98245614035088, "semantic_relevance": 2.6666666666666665, "factual_accuracy": 3.6666666666666665, "freshness": 4.0, "objectivity_tone": 3.6666666666666665, "layout_ad_density": 4.0, "accountability": 3.6666666666666665, "transparency": 4.0, "authority": 4.0, "avg_ge_freq": 0.5555666666666667, "relative_se_rank": 2.127659574468085, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "exa", "query_id": 60, "query_type": "HotpotQA", "num_sources": 4, "unweighted_mean_score": 3.6875, "weighted_total_content_score": 72.89473684210526, "semantic_relevance": 2.0, "factual_accuracy": 4.25, "freshness": 5.0, "objectivity_tone": 4.0, "layout_ad_density": 4.0, "accountability": 3.0, "transparency": 3.5, "authority": 3.75, "avg_ge_freq": 1.0, "relative_se_rank": 0.11979166666666666, "normalized_reciprocal_se_rank": 0.4616697275788185, "reciprocal_se_rank": 0.12064393939393939, "percentage_ge_sources_not_in_se_sources": 0.0, "percentage_ge_sources_in_se_sources": 100.0 }, { "model_name": "Gemini-3-Flash-Preview", "query_id": 12, "query_type": "VACOS", "num_sources": 5, "unweighted_mean_score": 3.6, "weighted_total_content_score": 72.84210526315789, "semantic_relevance": 3.4, "factual_accuracy": 4.2, "freshness": 3.6, "objectivity_tone": 4.0, "layout_ad_density": 2.4, "accountability": 3.0, "transparency": 4.2, "authority": 4.0, "avg_ge_freq": 0.39997999999999995, "relative_se_rank": 2.195, "normalized_reciprocal_se_rank": 0.011736411736411736, "reciprocal_se_rank": 0.012528895053166894, "percentage_ge_sources_not_in_se_sources": 80.0, "percentage_ge_sources_in_se_sources": 20.0 }, { "model_name": "Gemini-3-Flash-Preview", "query_id": 28, "query_type": "DebateQA", "num_sources": 5, "unweighted_mean_score": 3.8714285714285714, "weighted_total_content_score": 72.84210526315789, "semantic_relevance": 4.0, "factual_accuracy": 4.2, "freshness": 4.0, "objectivity_tone": 3.8, "layout_ad_density": 4.0, "accountability": 4.5, "transparency": 4.25, "authority": 4.0, "avg_ge_freq": 0.46663999999999994, "relative_se_rank": 1.8266666666666667, "normalized_reciprocal_se_rank": 0.051370851370851366, "reciprocal_se_rank": 0.02205270457697642, "percentage_ge_sources_not_in_se_sources": 80.0, "percentage_ge_sources_in_se_sources": 20.0 }, { "model_name": "Gemini-3-Flash-Preview", "query_id": 39, "query_type": "DebateQA", "num_sources": 5, "unweighted_mean_score": 4.1450000000000005, "weighted_total_content_score": 72.84210526315789, "semantic_relevance": 4.0, "factual_accuracy": 3.25, "freshness": 5.0, "objectivity_tone": 3.25, "layout_ad_density": 4.0, "accountability": 4.4, "transparency": 4.4, "authority": 4.2, "avg_ge_freq": 0.39997999999999995, "relative_se_rank": 2.0833333333333335, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "gpt-4o", "query_id": 87, "query_type": "QuoraQuestions", "num_sources": 5, "unweighted_mean_score": 3.7, "weighted_total_content_score": 72.84210526315789, "semantic_relevance": 2.4, "factual_accuracy": 3.4, "freshness": 3.6, "objectivity_tone": 4.2, "layout_ad_density": 4.2, "accountability": 4.0, "transparency": 3.6, "authority": 4.2, "avg_ge_freq": 0.46663999999999994, "relative_se_rank": 2.0146341463414634, "normalized_reciprocal_se_rank": 0.043939393939393945, "reciprocal_se_rank": 0.02026699029126213, "percentage_ge_sources_not_in_se_sources": 80.0, "percentage_ge_sources_in_se_sources": 20.0 }, { "model_name": "Gemini-3-Flash-Preview", "query_id": 13, "query_type": "VACOS", "num_sources": 5, "unweighted_mean_score": 3.88, "weighted_total_content_score": 72.63157894736841, "semantic_relevance": 3.5, "factual_accuracy": 4.75, "freshness": 5.0, "objectivity_tone": 4.0, "layout_ad_density": 2.2, "accountability": 4.4, "transparency": 4.2, "authority": 4.0, "avg_ge_freq": 0.39997999999999995, "relative_se_rank": 2.380952380952381, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "Gemini-3-Pro-Preview", "query_id": 58, "query_type": "HotpotQA", "num_sources": 5, "unweighted_mean_score": 3.94, "weighted_total_content_score": 72.63157894736841, "semantic_relevance": 4.75, "factual_accuracy": 4.25, "freshness": 5.0, "objectivity_tone": 4.75, "layout_ad_density": 3.8, "accountability": 3.0, "transparency": 3.2, "authority": 3.0, "avg_ge_freq": 0.5333399999999999, "relative_se_rank": 0.9041666666666668, "normalized_reciprocal_se_rank": 0.3102874902874903, "reciprocal_se_rank": 0.08426811053024645, "percentage_ge_sources_not_in_se_sources": 40.0, "percentage_ge_sources_in_se_sources": 60.0 }, { "model_name": "claude", "query_id": 11, "query_type": "VACOS", "num_sources": 2, "unweighted_mean_score": 3.625, "weighted_total_content_score": 72.63157894736841, "semantic_relevance": 4.0, "factual_accuracy": 4.0, "freshness": 5.0, "objectivity_tone": 3.0, "layout_ad_density": 3.0, "accountability": 3.0, "transparency": 3.5, "authority": 3.5, "avg_ge_freq": 0.66665, "relative_se_rank": 1.2560975609756098, "normalized_reciprocal_se_rank": 0.32659932659932656, "reciprocal_se_rank": 0.08818770226537216, "percentage_ge_sources_not_in_se_sources": 50.0, "percentage_ge_sources_in_se_sources": 50.0 }, { "model_name": "exa", "query_id": 1, "query_type": "VACOS", "num_sources": 5, "unweighted_mean_score": 3.675, "weighted_total_content_score": 72.63157894736841, "semantic_relevance": 2.6, "factual_accuracy": 4.2, "freshness": 4.2, "objectivity_tone": 3.4, "layout_ad_density": 2.6, "accountability": 4.0, "transparency": 4.2, "authority": 4.2, "avg_ge_freq": 1.0, "relative_se_rank": 1.7955555555555556, "normalized_reciprocal_se_rank": 0.11082251082251082, "reciprocal_se_rank": 0.0363384188626907, "percentage_ge_sources_not_in_se_sources": 80.0, "percentage_ge_sources_in_se_sources": 20.0 }, { "model_name": "exa", "query_id": 71, "query_type": "Pinocchios", "num_sources": 5, "unweighted_mean_score": 3.725, "weighted_total_content_score": 72.63157894736841, "semantic_relevance": 2.4, "factual_accuracy": 3.0, "freshness": 3.2, "objectivity_tone": 4.0, "layout_ad_density": 3.0, "accountability": 4.6, "transparency": 4.8, "authority": 4.8, "avg_ge_freq": 1.0, "relative_se_rank": 2.1729729729729725, "normalized_reciprocal_se_rank": 0.1583838383838384, "reciprocal_se_rank": 0.047766990291262135, "percentage_ge_sources_not_in_se_sources": 80.0, "percentage_ge_sources_in_se_sources": 20.0 }, { "model_name": "gpt-4o", "query_id": 57, "query_type": "HotpotQA", "num_sources": 1, "unweighted_mean_score": 3.75, "weighted_total_content_score": 72.63157894736841, "semantic_relevance": 2.0, "factual_accuracy": 3.0, "freshness": 5.0, "objectivity_tone": 4.0, "layout_ad_density": 3.0, "accountability": 3.0, "transparency": 5.0, "authority": 5.0, "avg_ge_freq": 0.3333, "relative_se_rank": 2.127659574468085, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "gpt-4o", "query_id": 68, "query_type": "Pinocchios", "num_sources": 2, "unweighted_mean_score": 3.75, "weighted_total_content_score": 72.63157894736841, "semantic_relevance": 3.0, "factual_accuracy": 3.0, "freshness": 4.0, "objectivity_tone": 3.0, "layout_ad_density": 3.5, "accountability": 4.5, "transparency": 5.0, "authority": 4.0, "avg_ge_freq": 0.3333, "relative_se_rank": 2.272727272727273, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "grok-4.1-fast-non-reasoning", "query_id": 52, "query_type": "HotpotQA", "num_sources": 1, "unweighted_mean_score": 3.5, "weighted_total_content_score": 72.63157894736841, "semantic_relevance": 4.0, "factual_accuracy": 5.0, "freshness": 2.0, "objectivity_tone": 4.0, "layout_ad_density": 2.0, "accountability": 4.0, "transparency": 3.0, "authority": 4.0, "avg_ge_freq": 0.3333, "relative_se_rank": 2.0408163265306123, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "Gemini-2.5-Flash-Preview", "query_id": 59, "query_type": "HotpotQA", "num_sources": 5, "unweighted_mean_score": 3.675, "weighted_total_content_score": 72.42105263157895, "semantic_relevance": 2.4, "factual_accuracy": 4.0, "freshness": 3.8, "objectivity_tone": 3.6, "layout_ad_density": 3.0, "accountability": 3.8, "transparency": 4.4, "authority": 4.4, "avg_ge_freq": null, "relative_se_rank": null, "normalized_reciprocal_se_rank": null, "reciprocal_se_rank": null, "percentage_ge_sources_not_in_se_sources": null, "percentage_ge_sources_in_se_sources": null }, { "model_name": "Gemini-3-Flash-Preview", "query_id": 54, "query_type": "HotpotQA", "num_sources": 5, "unweighted_mean_score": 3.625, "weighted_total_content_score": 72.42105263157895, "semantic_relevance": 3.2, "factual_accuracy": 3.4, "freshness": 4.0, "objectivity_tone": 4.2, "layout_ad_density": 3.8, "accountability": 3.4, "transparency": 3.6, "authority": 3.4, "avg_ge_freq": 0.39997999999999995, "relative_se_rank": 1.269387755102041, "normalized_reciprocal_se_rank": 0.19822467095194368, "reciprocal_se_rank": 0.057340394233598116, "percentage_ge_sources_not_in_se_sources": 60.0, "percentage_ge_sources_in_se_sources": 40.0 }, { "model_name": "Gemini-2.5-Flash-Preview", "query_id": 84, "query_type": "QuoraQuestions", "num_sources": 5, "unweighted_mean_score": 3.675, "weighted_total_content_score": 72.42105263157893, "semantic_relevance": 3.6, "factual_accuracy": 3.2, "freshness": 5.0, "objectivity_tone": 3.2, "layout_ad_density": 2.6, "accountability": 4.0, "transparency": 4.0, "authority": 3.8, "avg_ge_freq": null, "relative_se_rank": null, "normalized_reciprocal_se_rank": null, "reciprocal_se_rank": null, "percentage_ge_sources_not_in_se_sources": null, "percentage_ge_sources_in_se_sources": null }, { "model_name": "Gemini-3-Flash-Preview", "query_id": 91, "query_type": "QuoraQuestions", "num_sources": 5, "unweighted_mean_score": 3.675, "weighted_total_content_score": 72.42105263157893, "semantic_relevance": 3.8, "factual_accuracy": 3.4, "freshness": 4.4, "objectivity_tone": 2.8, "layout_ad_density": 2.6, "accountability": 3.8, "transparency": 4.0, "authority": 4.6, "avg_ge_freq": 0.3333, "relative_se_rank": 2.7777777777777777, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "Perplexity-Sonar-Pro", "query_id": 28, "query_type": "DebateQA", "num_sources": 5, "unweighted_mean_score": 3.6785714285714284, "weighted_total_content_score": 72.42105263157893, "semantic_relevance": 4.6, "factual_accuracy": 3.8, "freshness": 3.6, "objectivity_tone": 2.8, "layout_ad_density": 3.5, "accountability": 3.8, "transparency": 3.8, "authority": 3.6, "avg_ge_freq": 0.93334, "relative_se_rank": 0.9555555555555555, "normalized_reciprocal_se_rank": 0.32192837465564744, "reciprocal_se_rank": 0.08706531332744924, "percentage_ge_sources_not_in_se_sources": 40.0, "percentage_ge_sources_in_se_sources": 60.0 }, { "model_name": "claude", "query_id": 13, "query_type": "VACOS", "num_sources": 5, "unweighted_mean_score": 3.65, "weighted_total_content_score": 72.42105263157893, "semantic_relevance": 3.2, "factual_accuracy": 3.6, "freshness": 4.8, "objectivity_tone": 3.6, "layout_ad_density": 2.6, "accountability": 4.4, "transparency": 3.8, "authority": 3.2, "avg_ge_freq": 0.86668, "relative_se_rank": 2.380952380952381, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "grok-4.1-fast-non-reasoning", "query_id": 41, "query_type": "HotpotQA", "num_sources": 4, "unweighted_mean_score": 3.625, "weighted_total_content_score": 72.36842105263159, "semantic_relevance": 2.75, "factual_accuracy": 4.0, "freshness": 3.25, "objectivity_tone": 4.0, "layout_ad_density": 3.25, "accountability": 3.25, "transparency": 4.0, "authority": 4.5, "avg_ge_freq": 0.583325, "relative_se_rank": 4.545454545454546, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "Perplexity-Sonar-Pro", "query_id": 63, "query_type": "Pinocchios", "num_sources": 4, "unweighted_mean_score": 3.6875, "weighted_total_content_score": 72.36842105263158, "semantic_relevance": 2.5, "factual_accuracy": 3.25, "freshness": 3.25, "objectivity_tone": 4.0, "layout_ad_density": 4.25, "accountability": 3.75, "transparency": 4.25, "authority": 4.25, "avg_ge_freq": 0.75, "relative_se_rank": 4.761904761904762, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "google-search", "query_id": 14, "query_type": "VACOS", "num_sources": 4, "unweighted_mean_score": 3.625, "weighted_total_content_score": 72.36842105263158, "semantic_relevance": 3.25, "factual_accuracy": 3.75, "freshness": 4.0, "objectivity_tone": 3.75, "layout_ad_density": 4.25, "accountability": 2.5, "transparency": 3.5, "authority": 4.0, "avg_ge_freq": null, "relative_se_rank": null, "normalized_reciprocal_se_rank": null, "reciprocal_se_rank": null, "percentage_ge_sources_not_in_se_sources": null, "percentage_ge_sources_in_se_sources": null }, { "model_name": "deepseek-reasoning-gensee", "query_id": 72, "query_type": "Pinocchios", "num_sources": 3, "unweighted_mean_score": 3.7083333333333335, "weighted_total_content_score": 72.28070175438596, "semantic_relevance": 3.0, "factual_accuracy": 3.3333333333333335, "freshness": 4.333333333333333, "objectivity_tone": 3.0, "layout_ad_density": 4.0, "accountability": 4.0, "transparency": 4.333333333333333, "authority": 3.6666666666666665, "avg_ge_freq": 1.0, "relative_se_rank": 2.4390243902439024, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "google-search", "query_id": 20, "query_type": "VACOS", "num_sources": 3, "unweighted_mean_score": 3.625, "weighted_total_content_score": 72.28070175438596, "semantic_relevance": 3.0, "factual_accuracy": 4.0, "freshness": 3.0, "objectivity_tone": 3.6666666666666665, "layout_ad_density": 3.6666666666666665, "accountability": 3.3333333333333335, "transparency": 4.0, "authority": 4.333333333333333, "avg_ge_freq": null, "relative_se_rank": null, "normalized_reciprocal_se_rank": null, "reciprocal_se_rank": null, "percentage_ge_sources_not_in_se_sources": null, "percentage_ge_sources_in_se_sources": null }, { "model_name": "Perplexity-Sonar-Pro", "query_id": 86, "query_type": "QuoraQuestions", "num_sources": 5, "unweighted_mean_score": 3.625, "weighted_total_content_score": 72.21052631578947, "semantic_relevance": 4.2, "factual_accuracy": 3.4, "freshness": 4.6, "objectivity_tone": 3.0, "layout_ad_density": 2.8, "accountability": 3.6, "transparency": 4.0, "authority": 3.4, "avg_ge_freq": 0.80002, "relative_se_rank": 1.3166666666666669, "normalized_reciprocal_se_rank": 0.14733044733044734, "reciprocal_se_rank": 0.04511095700416089, "percentage_ge_sources_not_in_se_sources": 60.0, "percentage_ge_sources_in_se_sources": 40.0 }, { "model_name": "tavily", "query_id": 21, "query_type": "DebateQA", "num_sources": 5, "unweighted_mean_score": 3.6, "weighted_total_content_score": 72.21052631578947, "semantic_relevance": 4.0, "factual_accuracy": 3.6, "freshness": 3.8, "objectivity_tone": 3.4, "layout_ad_density": 2.8, "accountability": 4.2, "transparency": 3.2, "authority": 3.8, "avg_ge_freq": 1.0, "relative_se_rank": 0.4791666666666667, "normalized_reciprocal_se_rank": 0.5367912895185623, "reciprocal_se_rank": 0.1386949943260623, "percentage_ge_sources_not_in_se_sources": 20.0, "percentage_ge_sources_in_se_sources": 80.0 }, { "model_name": "gensee", "query_id": 54, "query_type": "HotpotQA", "num_sources": 2, "unweighted_mean_score": 3.5625, "weighted_total_content_score": 72.10526315789474, "semantic_relevance": 3.0, "factual_accuracy": 4.0, "freshness": 5.0, "objectivity_tone": 4.5, "layout_ad_density": 2.0, "accountability": 3.0, "transparency": 3.5, "authority": 3.5, "avg_ge_freq": 0.66665, "relative_se_rank": 2.0408163265306123, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "gpt-5", "query_id": 72, "query_type": "Pinocchios", "num_sources": 4, "unweighted_mean_score": 3.65625, "weighted_total_content_score": 72.10526315789473, "semantic_relevance": 2.5, "factual_accuracy": 3.5, "freshness": 3.5, "objectivity_tone": 4.0, "layout_ad_density": 3.5, "accountability": 4.0, "transparency": 4.25, "authority": 4.0, "avg_ge_freq": 0.3333, "relative_se_rank": 1.2621951219512195, "normalized_reciprocal_se_rank": 0.3018278018278018, "reciprocal_se_rank": 0.08223532131299122, "percentage_ge_sources_not_in_se_sources": 50.0, "percentage_ge_sources_in_se_sources": 50.0 }, { "model_name": "Gemini-3-Pro-Preview", "query_id": 41, "query_type": "HotpotQA", "num_sources": 5, "unweighted_mean_score": 3.6, "weighted_total_content_score": 72.0, "semantic_relevance": 3.2, "factual_accuracy": 3.8, "freshness": 3.6, "objectivity_tone": 3.8, "layout_ad_density": 4.0, "accountability": 3.0, "transparency": 3.8, "authority": 3.6, "avg_ge_freq": 0.66668, "relative_se_rank": 4.545454545454546, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "tavily", "query_id": 36, "query_type": "DebateQA", "num_sources": 3, "unweighted_mean_score": 3.5833333333333335, "weighted_total_content_score": 71.9298245614035, "semantic_relevance": 3.6666666666666665, "factual_accuracy": 4.0, "freshness": 3.6666666666666665, "objectivity_tone": 3.3333333333333335, "layout_ad_density": 3.6666666666666665, "accountability": 3.3333333333333335, "transparency": 3.3333333333333335, "authority": 3.6666666666666665, "avg_ge_freq": 1.0, "relative_se_rank": 2.4390243902439024, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "exa", "query_id": 92, "query_type": "QuoraQuestions", "num_sources": 5, "unweighted_mean_score": 3.65, "weighted_total_content_score": 71.78947368421053, "semantic_relevance": 2.6, "factual_accuracy": 3.6, "freshness": 4.4, "objectivity_tone": 3.6, "layout_ad_density": 2.8, "accountability": 4.6, "transparency": 3.8, "authority": 3.8, "avg_ge_freq": 1.0, "relative_se_rank": 0.5510204081632654, "normalized_reciprocal_se_rank": 0.293534916262189, "reciprocal_se_rank": 0.08024261337368133, "percentage_ge_sources_not_in_se_sources": 20.0, "percentage_ge_sources_in_se_sources": 80.0 }, { "model_name": "Gemini-2.5-Flash-Preview", "query_id": 95, "query_type": "QuoraQuestions", "num_sources": 5, "unweighted_mean_score": 3.725, "weighted_total_content_score": 71.78947368421052, "semantic_relevance": 2.8, "factual_accuracy": 2.6, "freshness": 5.0, "objectivity_tone": 3.2, "layout_ad_density": 3.6, "accountability": 4.0, "transparency": 4.4, "authority": 4.2, "avg_ge_freq": null, "relative_se_rank": null, "normalized_reciprocal_se_rank": null, "reciprocal_se_rank": null, "percentage_ge_sources_not_in_se_sources": null, "percentage_ge_sources_in_se_sources": null }, { "model_name": "Gemini-3-Flash-Preview", "query_id": 73, "query_type": "Pinocchios", "num_sources": 5, "unweighted_mean_score": 3.6, "weighted_total_content_score": 71.78947368421052, "semantic_relevance": 3.8, "factual_accuracy": 3.6, "freshness": 5.0, "objectivity_tone": 3.2, "layout_ad_density": 3.2, "accountability": 3.6, "transparency": 3.4, "authority": 3.0, "avg_ge_freq": 0.8, "relative_se_rank": 1.2625, "normalized_reciprocal_se_rank": 0.3583838383838384, "reciprocal_se_rank": 0.0958252427184466, "percentage_ge_sources_not_in_se_sources": 60.0, "percentage_ge_sources_in_se_sources": 40.0 }, { "model_name": "gpt-4o", "query_id": 85, "query_type": "QuoraQuestions", "num_sources": 5, "unweighted_mean_score": 3.65, "weighted_total_content_score": 71.78947368421052, "semantic_relevance": 3.6, "factual_accuracy": 3.4, "freshness": 5.0, "objectivity_tone": 2.8, "layout_ad_density": 3.6, "accountability": 3.6, "transparency": 3.8, "authority": 3.4, "avg_ge_freq": 0.6, "relative_se_rank": 1.021276595744681, "normalized_reciprocal_se_rank": 0.23759259259259263, "reciprocal_se_rank": 0.06680016181229773, "percentage_ge_sources_not_in_se_sources": 40.0, "percentage_ge_sources_in_se_sources": 60.0 }, { "model_name": "Gemini-3-Flash-Preview", "query_id": 14, "query_type": "VACOS", "num_sources": 5, "unweighted_mean_score": 3.6, "weighted_total_content_score": 71.57894736842107, "semantic_relevance": 3.6, "factual_accuracy": 3.6, "freshness": 5.0, "objectivity_tone": 3.2, "layout_ad_density": 2.2, "accountability": 3.8, "transparency": 4.0, "authority": 3.4, "avg_ge_freq": 0.39997999999999995, "relative_se_rank": 2.2222222222222223, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "deepseek-chat-gensee", "query_id": 11, "query_type": "VACOS", "num_sources": 5, "unweighted_mean_score": 4.1, "weighted_total_content_score": 71.57894736842107, "semantic_relevance": 4.25, "factual_accuracy": 4.0, "freshness": 5.0, "objectivity_tone": 3.75, "layout_ad_density": 2.5, "accountability": 3.6, "transparency": 4.4, "authority": 4.6, "avg_ge_freq": 1.0, "relative_se_rank": 1.9609756097560975, "normalized_reciprocal_se_rank": 0.1583838383838384, "reciprocal_se_rank": 0.047766990291262135, "percentage_ge_sources_not_in_se_sources": 80.0, "percentage_ge_sources_in_se_sources": 20.0 }, { "model_name": "Gemini-3-Flash-Preview", "query_id": 11, "query_type": "VACOS", "num_sources": 3, "unweighted_mean_score": 3.625, "weighted_total_content_score": 71.57894736842105, "semantic_relevance": 3.3333333333333335, "factual_accuracy": 3.3333333333333335, "freshness": 5.0, "objectivity_tone": 3.3333333333333335, "layout_ad_density": 2.6666666666666665, "accountability": 4.333333333333333, "transparency": 3.3333333333333335, "authority": 3.6666666666666665, "avg_ge_freq": 0.3333, "relative_se_rank": 1.008130081300813, "normalized_reciprocal_se_rank": 0.19325530436641547, "reciprocal_se_rank": 0.05614629886474546, "percentage_ge_sources_not_in_se_sources": 33.333333333333336, "percentage_ge_sources_in_se_sources": 66.66666666666666 }, { "model_name": "Gemini-3-Flash-Preview", "query_id": 48, "query_type": "HotpotQA", "num_sources": 3, "unweighted_mean_score": 3.5833333333333335, "weighted_total_content_score": 71.57894736842105, "semantic_relevance": 2.6666666666666665, "factual_accuracy": 4.0, "freshness": 3.6666666666666665, "objectivity_tone": 4.0, "layout_ad_density": 4.0, "accountability": 3.6666666666666665, "transparency": 3.3333333333333335, "authority": 3.3333333333333335, "avg_ge_freq": 0.3333, "relative_se_rank": 2.272727272727273, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "google-search", "query_id": 16, "query_type": "VACOS", "num_sources": 3, "unweighted_mean_score": 3.7083333333333335, "weighted_total_content_score": 71.57894736842105, "semantic_relevance": 2.6666666666666665, "factual_accuracy": 3.0, "freshness": 5.0, "objectivity_tone": 3.0, "layout_ad_density": 4.333333333333333, "accountability": 4.0, "transparency": 3.6666666666666665, "authority": 4.0, "avg_ge_freq": null, "relative_se_rank": null, "normalized_reciprocal_se_rank": null, "reciprocal_se_rank": null, "percentage_ge_sources_not_in_se_sources": null, "percentage_ge_sources_in_se_sources": null }, { "model_name": "Perplexity-Sonar-Pro", "query_id": 59, "query_type": "HotpotQA", "num_sources": 5, "unweighted_mean_score": 3.675, "weighted_total_content_score": 71.57894736842104, "semantic_relevance": 1.6, "factual_accuracy": 3.2, "freshness": 5.0, "objectivity_tone": 4.4, "layout_ad_density": 3.8, "accountability": 3.0, "transparency": 4.4, "authority": 4.0, "avg_ge_freq": 0.93334, "relative_se_rank": 1.4418604651162792, "normalized_reciprocal_se_rank": 0.22596877869605145, "reciprocal_se_rank": 0.06400706090026478, "percentage_ge_sources_not_in_se_sources": 60.0, "percentage_ge_sources_in_se_sources": 40.0 }, { "model_name": "exa", "query_id": 21, "query_type": "DebateQA", "num_sources": 5, "unweighted_mean_score": 3.525, "weighted_total_content_score": 71.57894736842104, "semantic_relevance": 4.2, "factual_accuracy": 3.8, "freshness": 2.8, "objectivity_tone": 3.6, "layout_ad_density": 2.4, "accountability": 3.8, "transparency": 3.6, "authority": 4.0, "avg_ge_freq": 1.0, "relative_se_rank": 0.8833333333333334, "normalized_reciprocal_se_rank": 0.4196632996632997, "reciprocal_se_rank": 0.11055016181229774, "percentage_ge_sources_not_in_se_sources": 40.0, "percentage_ge_sources_in_se_sources": 60.0 }, { "model_name": "exa", "query_id": 95, "query_type": "QuoraQuestions", "num_sources": 5, "unweighted_mean_score": 3.6, "weighted_total_content_score": 71.57894736842104, "semantic_relevance": 3.8, "factual_accuracy": 3.2, "freshness": 4.4, "objectivity_tone": 3.4, "layout_ad_density": 2.2, "accountability": 4.0, "transparency": 4.0, "authority": 3.8, "avg_ge_freq": 1.0, "relative_se_rank": 0.8097560975609757, "normalized_reciprocal_se_rank": 0.28455799828348843, "reciprocal_se_rank": 0.07808553842248873, "percentage_ge_sources_not_in_se_sources": 20.0, "percentage_ge_sources_in_se_sources": 80.0 }, { "model_name": "google-search", "query_id": 83, "query_type": "QuoraQuestions", "num_sources": 4, "unweighted_mean_score": 3.65625, "weighted_total_content_score": 71.57894736842104, "semantic_relevance": 2.5, "factual_accuracy": 4.0, "freshness": 4.75, "objectivity_tone": 3.0, "layout_ad_density": 3.5, "accountability": 3.5, "transparency": 4.25, "authority": 3.75, "avg_ge_freq": null, "relative_se_rank": null, "normalized_reciprocal_se_rank": null, "reciprocal_se_rank": null, "percentage_ge_sources_not_in_se_sources": null, "percentage_ge_sources_in_se_sources": null }, { "model_name": "tavily", "query_id": 43, "query_type": "HotpotQA", "num_sources": 4, "unweighted_mean_score": 3.625, "weighted_total_content_score": 71.57894736842104, "semantic_relevance": 1.75, "factual_accuracy": 4.0, "freshness": 4.25, "objectivity_tone": 4.25, "layout_ad_density": 3.25, "accountability": 3.75, "transparency": 3.5, "authority": 4.25, "avg_ge_freq": 1.0, "relative_se_rank": 0.78125, "normalized_reciprocal_se_rank": 0.16163357715081855, "reciprocal_se_rank": 0.04854787412119184, "percentage_ge_sources_not_in_se_sources": 25.0, "percentage_ge_sources_in_se_sources": 75.0 }, { "model_name": "Perplexity-Sonar-Pro", "query_id": 58, "query_type": "HotpotQA", "num_sources": 5, "unweighted_mean_score": 3.55, "weighted_total_content_score": 71.36842105263159, "semantic_relevance": 4.2, "factual_accuracy": 3.4, "freshness": 4.4, "objectivity_tone": 3.4, "layout_ad_density": 3.6, "accountability": 2.8, "transparency": 3.2, "authority": 3.4, "avg_ge_freq": 1.0, "relative_se_rank": 1.0875000000000001, "normalized_reciprocal_se_rank": 0.21122861265718412, "reciprocal_se_rank": 0.060465127798692286, "percentage_ge_sources_not_in_se_sources": 40.0, "percentage_ge_sources_in_se_sources": 60.0 }, { "model_name": "gensee", "query_id": 58, "query_type": "HotpotQA", "num_sources": 4, "unweighted_mean_score": 3.46875, "weighted_total_content_score": 71.3157894736842, "semantic_relevance": 4.25, "factual_accuracy": 4.0, "freshness": 2.75, "objectivity_tone": 4.0, "layout_ad_density": 3.25, "accountability": 3.5, "transparency": 3.0, "authority": 3.0, "avg_ge_freq": 0.49999999999999994, "relative_se_rank": 1.2447916666666667, "normalized_reciprocal_se_rank": 0.2138888888888889, "reciprocal_se_rank": 0.061104368932038834, "percentage_ge_sources_not_in_se_sources": 50.0, "percentage_ge_sources_in_se_sources": 50.0 }, { "model_name": "gpt-4o", "query_id": 39, "query_type": "DebateQA", "num_sources": 4, "unweighted_mean_score": 3.5625, "weighted_total_content_score": 71.3157894736842, "semantic_relevance": 3.75, "factual_accuracy": 3.5, "freshness": 4.0, "objectivity_tone": 3.5, "layout_ad_density": 2.5, "accountability": 3.25, "transparency": 3.75, "authority": 4.25, "avg_ge_freq": 0.3333, "relative_se_rank": 2.0833333333333335, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "grok-4.1-fast-non-reasoning", "query_id": 43, "query_type": "HotpotQA", "num_sources": 4, "unweighted_mean_score": 3.59375, "weighted_total_content_score": 71.3157894736842, "semantic_relevance": 3.25, "factual_accuracy": 3.5, "freshness": 4.0, "objectivity_tone": 3.5, "layout_ad_density": 4.0, "accountability": 3.25, "transparency": 3.5, "authority": 3.75, "avg_ge_freq": 0.5833499999999999, "relative_se_rank": 1.1875, "normalized_reciprocal_se_rank": 0.2745791245791246, "reciprocal_se_rank": 0.07568770226537216, "percentage_ge_sources_not_in_se_sources": 50.0, "percentage_ge_sources_in_se_sources": 50.0 }, { "model_name": "tavily", "query_id": 4, "query_type": "VACOS", "num_sources": 4, "unweighted_mean_score": 3.625, "weighted_total_content_score": 71.3157894736842, "semantic_relevance": 3.5, "factual_accuracy": 3.5, "freshness": 5.0, "objectivity_tone": 2.75, "layout_ad_density": 2.25, "accountability": 3.75, "transparency": 4.0, "authority": 4.25, "avg_ge_freq": 1.0, "relative_se_rank": 2.0121951219512195, "normalized_reciprocal_se_rank": 0.0214263850627487, "reciprocal_se_rank": 0.014857310973815825, "percentage_ge_sources_not_in_se_sources": 75.0, "percentage_ge_sources_in_se_sources": 25.0 }, { "model_name": "google-search", "query_id": 53, "query_type": "HotpotQA", "num_sources": 5, "unweighted_mean_score": 3.625, "weighted_total_content_score": 71.15789473684211, "semantic_relevance": 2.2, "factual_accuracy": 3.6, "freshness": 4.4, "objectivity_tone": 3.8, "layout_ad_density": 4.4, "accountability": 3.4, "transparency": 3.6, "authority": 3.6, "avg_ge_freq": null, "relative_se_rank": null, "normalized_reciprocal_se_rank": null, "reciprocal_se_rank": null, "percentage_ge_sources_not_in_se_sources": null, "percentage_ge_sources_in_se_sources": null }, { "model_name": "Gemini-2.5-Flash-Preview", "query_id": 79, "query_type": "Pinocchios", "num_sources": 4, "unweighted_mean_score": 3.675, "weighted_total_content_score": 71.05263157894737, "semantic_relevance": 4.0, "factual_accuracy": 5.0, "freshness": 2.5, "objectivity_tone": 5.0, "layout_ad_density": 3.5, "accountability": 4.0, "transparency": 4.0, "authority": 4.0, "avg_ge_freq": null, "relative_se_rank": null, "normalized_reciprocal_se_rank": null, "reciprocal_se_rank": null, "percentage_ge_sources_not_in_se_sources": null, "percentage_ge_sources_in_se_sources": null }, { "model_name": "Gemini-3-Pro-Preview", "query_id": 88, "query_type": "QuoraQuestions", "num_sources": 2, "unweighted_mean_score": 3.625, "weighted_total_content_score": 71.05263157894737, "semantic_relevance": 3.0, "factual_accuracy": 3.5, "freshness": 5.0, "objectivity_tone": 3.0, "layout_ad_density": 3.0, "accountability": 4.0, "transparency": 3.5, "authority": 4.0, "avg_ge_freq": 0.3333, "relative_se_rank": 0.5930232558139534, "normalized_reciprocal_se_rank": 0.34719746484452363, "reciprocal_se_rank": 0.09313725490196079, "percentage_ge_sources_not_in_se_sources": 0.0, "percentage_ge_sources_in_se_sources": 100.0 }, { "model_name": "gensee", "query_id": 51, "query_type": "HotpotQA", "num_sources": 4, "unweighted_mean_score": 3.46875, "weighted_total_content_score": 71.05263157894737, "semantic_relevance": 3.75, "factual_accuracy": 4.25, "freshness": 3.25, "objectivity_tone": 4.0, "layout_ad_density": 3.75, "accountability": 2.5, "transparency": 3.0, "authority": 3.25, "avg_ge_freq": 1.0, "relative_se_rank": 2.127659574468085, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "google-search", "query_id": 11, "query_type": "VACOS", "num_sources": 2, "unweighted_mean_score": 3.5625, "weighted_total_content_score": 71.05263157894737, "semantic_relevance": 3.5, "factual_accuracy": 3.5, "freshness": 3.5, "objectivity_tone": 3.5, "layout_ad_density": 2.5, "accountability": 4.0, "transparency": 4.0, "authority": 4.0, "avg_ge_freq": null, "relative_se_rank": null, "normalized_reciprocal_se_rank": null, "reciprocal_se_rank": null, "percentage_ge_sources_not_in_se_sources": null, "percentage_ge_sources_in_se_sources": null }, { "model_name": "google-search", "query_id": 15, "query_type": "VACOS", "num_sources": 4, "unweighted_mean_score": 3.5625, "weighted_total_content_score": 71.05263157894736, "semantic_relevance": 3.5, "factual_accuracy": 4.0, "freshness": 4.0, "objectivity_tone": 3.0, "layout_ad_density": 3.75, "accountability": 3.0, "transparency": 3.5, "authority": 3.75, "avg_ge_freq": null, "relative_se_rank": null, "normalized_reciprocal_se_rank": null, "reciprocal_se_rank": null, "percentage_ge_sources_not_in_se_sources": null, "percentage_ge_sources_in_se_sources": null }, { "model_name": "google-search", "query_id": 18, "query_type": "VACOS", "num_sources": 3, "unweighted_mean_score": 3.5, "weighted_total_content_score": 70.87719298245614, "semantic_relevance": 4.0, "factual_accuracy": 4.0, "freshness": 5.0, "objectivity_tone": 3.3333333333333335, "layout_ad_density": 1.6666666666666667, "accountability": 3.3333333333333335, "transparency": 3.3333333333333335, "authority": 3.3333333333333335, "avg_ge_freq": null, "relative_se_rank": null, "normalized_reciprocal_se_rank": null, "reciprocal_se_rank": null, "percentage_ge_sources_not_in_se_sources": null, "percentage_ge_sources_in_se_sources": null }, { "model_name": "google-search", "query_id": 40, "query_type": "DebateQA", "num_sources": 3, "unweighted_mean_score": 3.7083333333333335, "weighted_total_content_score": 70.87719298245614, "semantic_relevance": 3.0, "factual_accuracy": 3.3333333333333335, "freshness": 5.0, "objectivity_tone": 1.6666666666666667, "layout_ad_density": 4.333333333333333, "accountability": 4.333333333333333, "transparency": 4.0, "authority": 4.0, "avg_ge_freq": null, "relative_se_rank": null, "normalized_reciprocal_se_rank": null, "reciprocal_se_rank": null, "percentage_ge_sources_not_in_se_sources": null, "percentage_ge_sources_in_se_sources": null }, { "model_name": "gpt-4o", "query_id": 4, "query_type": "VACOS", "num_sources": 3, "unweighted_mean_score": 3.5833333333333335, "weighted_total_content_score": 70.87719298245612, "semantic_relevance": 2.6666666666666665, "factual_accuracy": 3.6666666666666665, "freshness": 5.0, "objectivity_tone": 3.6666666666666665, "layout_ad_density": 2.6666666666666665, "accountability": 2.6666666666666665, "transparency": 4.333333333333333, "authority": 4.0, "avg_ge_freq": 0.3333, "relative_se_rank": 2.4390243902439024, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "Perplexity-Sonar-Pro", "query_id": 23, "query_type": "DebateQA", "num_sources": 4, "unweighted_mean_score": 3.75, "weighted_total_content_score": 70.78947368421052, "semantic_relevance": 2.75, "factual_accuracy": 3.0, "freshness": 5.0, "objectivity_tone": 1.5, "layout_ad_density": 4.25, "accountability": 4.25, "transparency": 4.75, "authority": 4.5, "avg_ge_freq": 0.41664999999999996, "relative_se_rank": 1.61875, "normalized_reciprocal_se_rank": 0.044203944203944204, "reciprocal_se_rank": 0.02033055940822931, "percentage_ge_sources_not_in_se_sources": 50.0, "percentage_ge_sources_in_se_sources": 50.0 }, { "model_name": "Gemini-3-Pro-Preview", "query_id": 39, "query_type": "DebateQA", "num_sources": 5, "unweighted_mean_score": 3.625, "weighted_total_content_score": 70.73684210526315, "semantic_relevance": 2.4, "factual_accuracy": 3.4, "freshness": 4.8, "objectivity_tone": 3.4, "layout_ad_density": 3.6, "accountability": 3.2, "transparency": 4.2, "authority": 4.0, "avg_ge_freq": 0.3333, "relative_se_rank": 1.8625000000000003, "normalized_reciprocal_se_rank": 0.008565656565656566, "reciprocal_se_rank": 0.011766990291262134, "percentage_ge_sources_not_in_se_sources": 80.0, "percentage_ge_sources_in_se_sources": 20.0 }, { "model_name": "Gemini-3-Pro-Preview", "query_id": 73, "query_type": "Pinocchios", "num_sources": 5, "unweighted_mean_score": 3.55, "weighted_total_content_score": 70.73684210526315, "semantic_relevance": 3.8, "factual_accuracy": 3.4, "freshness": 5.0, "objectivity_tone": 3.2, "layout_ad_density": 3.4, "accountability": 3.6, "transparency": 3.4, "authority": 2.6, "avg_ge_freq": 0.66666, "relative_se_rank": 1.2625, "normalized_reciprocal_se_rank": 0.3583838383838384, "reciprocal_se_rank": 0.0958252427184466, "percentage_ge_sources_not_in_se_sources": 60.0, "percentage_ge_sources_in_se_sources": 40.0 }, { "model_name": "gpt-4o", "query_id": 41, "query_type": "HotpotQA", "num_sources": 2, "unweighted_mean_score": 3.5, "weighted_total_content_score": 70.52631578947368, "semantic_relevance": 3.0, "factual_accuracy": 4.0, "freshness": 2.5, "objectivity_tone": 4.0, "layout_ad_density": 4.5, "accountability": 2.5, "transparency": 3.5, "authority": 4.0, "avg_ge_freq": 0.3333, "relative_se_rank": 4.545454545454546, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "Gemini-3-Pro-Preview", "query_id": 66, "query_type": "Pinocchios", "num_sources": 4, "unweighted_mean_score": 3.71875, "weighted_total_content_score": 70.52631578947367, "semantic_relevance": 2.25, "factual_accuracy": 2.75, "freshness": 4.5, "objectivity_tone": 2.5, "layout_ad_density": 4.25, "accountability": 4.25, "transparency": 4.75, "authority": 4.5, "avg_ge_freq": 0.583325, "relative_se_rank": 1.25, "normalized_reciprocal_se_rank": 0.3885281385281385, "reciprocal_se_rank": 0.10306865464632455, "percentage_ge_sources_not_in_se_sources": 50.0, "percentage_ge_sources_in_se_sources": 50.0 }, { "model_name": "gensee", "query_id": 11, "query_type": "VACOS", "num_sources": 5, "unweighted_mean_score": 3.475, "weighted_total_content_score": 70.52631578947367, "semantic_relevance": 4.2, "factual_accuracy": 4.0, "freshness": 3.6, "objectivity_tone": 3.2, "layout_ad_density": 2.2, "accountability": 3.6, "transparency": 3.4, "authority": 3.6, "avg_ge_freq": 0.53332, "relative_se_rank": 1.4878048780487805, "normalized_reciprocal_se_rank": 0.28902356902356907, "reciprocal_se_rank": 0.07915857605177994, "percentage_ge_sources_not_in_se_sources": 60.0, "percentage_ge_sources_in_se_sources": 40.0 }, { "model_name": "tavily", "query_id": 7, "query_type": "VACOS", "num_sources": 2, "unweighted_mean_score": 3.5625, "weighted_total_content_score": 70.52631578947367, "semantic_relevance": 4.0, "factual_accuracy": 3.5, "freshness": 5.0, "objectivity_tone": 2.5, "layout_ad_density": 2.0, "accountability": 2.0, "transparency": 4.5, "authority": 5.0, "avg_ge_freq": 1.0, "relative_se_rank": 2.857142857142857, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "claude", "query_id": 90, "query_type": "QuoraQuestions", "num_sources": 5, "unweighted_mean_score": 3.575, "weighted_total_content_score": 70.3157894736842, "semantic_relevance": 3.4, "factual_accuracy": 3.6, "freshness": 4.4, "objectivity_tone": 2.6, "layout_ad_density": 3.2, "accountability": 3.2, "transparency": 4.2, "authority": 4.0, "avg_ge_freq": 1.0, "relative_se_rank": 0.8, "normalized_reciprocal_se_rank": 0.19385730319063652, "reciprocal_se_rank": 0.05629095392202188, "percentage_ge_sources_not_in_se_sources": 20.0, "percentage_ge_sources_in_se_sources": 80.0 }, { "model_name": "deepseek-chat-gensee", "query_id": 51, "query_type": "HotpotQA", "num_sources": 5, "unweighted_mean_score": 4.491071428571429, "weighted_total_content_score": 70.3157894736842, "semantic_relevance": 5.0, "factual_accuracy": 5.0, "freshness": 4.5, "objectivity_tone": 4.0, "layout_ad_density": 4.333333333333333, "accountability": 4.25, "transparency": 4.25, "authority": 4.5, "avg_ge_freq": 1.0, "relative_se_rank": 2.127659574468085, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "tavily", "query_id": 90, "query_type": "QuoraQuestions", "num_sources": 5, "unweighted_mean_score": 3.55, "weighted_total_content_score": 70.3157894736842, "semantic_relevance": 4.0, "factual_accuracy": 3.4, "freshness": 5.0, "objectivity_tone": 2.6, "layout_ad_density": 2.4, "accountability": 3.4, "transparency": 4.0, "authority": 3.6, "avg_ge_freq": 1.0, "relative_se_rank": 0.6355555555555557, "normalized_reciprocal_se_rank": 0.2737392369012922, "reciprocal_se_rank": 0.07548588459521341, "percentage_ge_sources_not_in_se_sources": 20.0, "percentage_ge_sources_in_se_sources": 80.0 }, { "model_name": "grok-4.1-fast-non-reasoning", "query_id": 42, "query_type": "HotpotQA", "num_sources": 4, "unweighted_mean_score": 3.5, "weighted_total_content_score": 70.26315789473684, "semantic_relevance": 2.75, "factual_accuracy": 3.25, "freshness": 3.75, "objectivity_tone": 4.75, "layout_ad_density": 3.75, "accountability": 2.75, "transparency": 3.25, "authority": 3.75, "avg_ge_freq": 0.75, "relative_se_rank": 2.0408163265306123, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "google-search", "query_id": 49, "query_type": "HotpotQA", "num_sources": 5, "unweighted_mean_score": 3.55, "weighted_total_content_score": 70.10526315789474, "semantic_relevance": 2.4, "factual_accuracy": 3.2, "freshness": 3.0, "objectivity_tone": 4.2, "layout_ad_density": 4.8, "accountability": 3.6, "transparency": 3.4, "authority": 3.8, "avg_ge_freq": null, "relative_se_rank": null, "normalized_reciprocal_se_rank": null, "reciprocal_se_rank": null, "percentage_ge_sources_not_in_se_sources": null, "percentage_ge_sources_in_se_sources": null }, { "model_name": "Perplexity-Sonar-Pro", "query_id": 14, "query_type": "VACOS", "num_sources": 5, "unweighted_mean_score": 4.375, "weighted_total_content_score": 70.10526315789473, "semantic_relevance": 4.75, "factual_accuracy": 4.75, "freshness": 5.0, "objectivity_tone": 3.75, "layout_ad_density": 4.0, "accountability": 3.75, "transparency": 4.5, "authority": 4.5, "avg_ge_freq": 0.93334, "relative_se_rank": 2.2222222222222223, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "grok-4.1-fast-non-reasoning", "query_id": 95, "query_type": "QuoraQuestions", "num_sources": 5, "unweighted_mean_score": 3.525, "weighted_total_content_score": 70.10526315789473, "semantic_relevance": 3.6, "factual_accuracy": 3.4, "freshness": 4.2, "objectivity_tone": 3.2, "layout_ad_density": 2.2, "accountability": 3.8, "transparency": 4.0, "authority": 3.8, "avg_ge_freq": 0.93334, "relative_se_rank": 2.0390243902439025, "normalized_reciprocal_se_rank": 0.031553631553631556, "reciprocal_se_rank": 0.017290799815071655, "percentage_ge_sources_not_in_se_sources": 80.0, "percentage_ge_sources_in_se_sources": 20.0 }, { "model_name": "Gemini-2.5-Flash-Preview", "query_id": 88, "query_type": "QuoraQuestions", "num_sources": 2, "unweighted_mean_score": 3.5625, "weighted_total_content_score": 70.0, "semantic_relevance": 3.0, "factual_accuracy": 3.5, "freshness": 5.0, "objectivity_tone": 3.0, "layout_ad_density": 2.0, "accountability": 4.5, "transparency": 3.5, "authority": 4.0, "avg_ge_freq": null, "relative_se_rank": null, "normalized_reciprocal_se_rank": null, "reciprocal_se_rank": null, "percentage_ge_sources_not_in_se_sources": null, "percentage_ge_sources_in_se_sources": null }, { "model_name": "Gemini-2.5-Flash-Preview", "query_id": 21, "query_type": "DebateQA", "num_sources": 5, "unweighted_mean_score": 3.575, "weighted_total_content_score": 69.89473684210526, "semantic_relevance": 3.6, "factual_accuracy": 2.8, "freshness": 4.6, "objectivity_tone": 2.8, "layout_ad_density": 3.0, "accountability": 4.4, "transparency": 3.6, "authority": 3.8, "avg_ge_freq": null, "relative_se_rank": null, "normalized_reciprocal_se_rank": null, "reciprocal_se_rank": null, "percentage_ge_sources_not_in_se_sources": null, "percentage_ge_sources_in_se_sources": null }, { "model_name": "exa", "query_id": 63, "query_type": "Pinocchios", "num_sources": 5, "unweighted_mean_score": 3.6, "weighted_total_content_score": 69.89473684210525, "semantic_relevance": 1.8, "factual_accuracy": 3.2, "freshness": 3.2, "objectivity_tone": 3.8, "layout_ad_density": 3.4, "accountability": 4.6, "transparency": 4.4, "authority": 4.4, "avg_ge_freq": 1.0, "relative_se_rank": 4.761904761904762, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "Gemini-2.5-Flash-Preview", "query_id": 53, "query_type": "HotpotQA", "num_sources": 3, "unweighted_mean_score": 3.625, "weighted_total_content_score": 69.82456140350877, "semantic_relevance": 1.3333333333333333, "factual_accuracy": 3.3333333333333335, "freshness": 5.0, "objectivity_tone": 3.6666666666666665, "layout_ad_density": 3.3333333333333335, "accountability": 3.6666666666666665, "transparency": 4.666666666666667, "authority": 4.0, "avg_ge_freq": null, "relative_se_rank": null, "normalized_reciprocal_se_rank": null, "reciprocal_se_rank": null, "percentage_ge_sources_not_in_se_sources": null, "percentage_ge_sources_in_se_sources": null }, { "model_name": "google-search", "query_id": 47, "query_type": "HotpotQA", "num_sources": 3, "unweighted_mean_score": 3.625, "weighted_total_content_score": 69.82456140350877, "semantic_relevance": 1.3333333333333333, "factual_accuracy": 3.3333333333333335, "freshness": 2.6666666666666665, "objectivity_tone": 3.6666666666666665, "layout_ad_density": 4.0, "accountability": 4.0, "transparency": 5.0, "authority": 5.0, "avg_ge_freq": null, "relative_se_rank": null, "normalized_reciprocal_se_rank": null, "reciprocal_se_rank": null, "percentage_ge_sources_not_in_se_sources": null, "percentage_ge_sources_in_se_sources": null }, { "model_name": "tavily", "query_id": 88, "query_type": "QuoraQuestions", "num_sources": 3, "unweighted_mean_score": 3.5833333333333335, "weighted_total_content_score": 69.82456140350877, "semantic_relevance": 2.3333333333333335, "factual_accuracy": 3.6666666666666665, "freshness": 5.0, "objectivity_tone": 3.0, "layout_ad_density": 2.0, "accountability": 4.333333333333333, "transparency": 4.0, "authority": 4.333333333333333, "avg_ge_freq": 1.0, "relative_se_rank": 0.4108527131782946, "normalized_reciprocal_se_rank": 0.4954380405360797, "reciprocal_se_rank": 0.12875816993464054, "percentage_ge_sources_not_in_se_sources": 0.0, "percentage_ge_sources_in_se_sources": 100.0 }, { "model_name": "google-search", "query_id": 81, "query_type": "QuoraQuestions", "num_sources": 4, "unweighted_mean_score": 3.53125, "weighted_total_content_score": 69.73684210526315, "semantic_relevance": 2.75, "factual_accuracy": 4.0, "freshness": 4.0, "objectivity_tone": 3.0, "layout_ad_density": 3.25, "accountability": 3.75, "transparency": 3.5, "authority": 4.0, "avg_ge_freq": null, "relative_se_rank": null, "normalized_reciprocal_se_rank": null, "reciprocal_se_rank": null, "percentage_ge_sources_not_in_se_sources": null, "percentage_ge_sources_in_se_sources": null }, { "model_name": "Gemini-2.5-Flash-Preview", "query_id": 97, "query_type": "QuoraQuestions", "num_sources": 5, "unweighted_mean_score": 3.8049999999999997, "weighted_total_content_score": 69.6842105263158, "semantic_relevance": 4.25, "factual_accuracy": 3.75, "freshness": 4.4, "objectivity_tone": 3.75, "layout_ad_density": 3.2, "accountability": 3.8, "transparency": 4.0, "authority": 3.6, "avg_ge_freq": null, "relative_se_rank": null, "normalized_reciprocal_se_rank": null, "reciprocal_se_rank": null, "percentage_ge_sources_not_in_se_sources": null, "percentage_ge_sources_in_se_sources": null }, { "model_name": "gensee", "query_id": 7, "query_type": "VACOS", "num_sources": 5, "unweighted_mean_score": 3.4, "weighted_total_content_score": 69.68421052631578, "semantic_relevance": 4.6, "factual_accuracy": 4.2, "freshness": 3.6, "objectivity_tone": 3.0, "layout_ad_density": 2.0, "accountability": 3.2, "transparency": 3.2, "authority": 3.4, "avg_ge_freq": 0.3333, "relative_se_rank": 2.857142857142857, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "Gemini-2.5-Flash-Preview", "query_id": 13, "query_type": "VACOS", "num_sources": 5, "unweighted_mean_score": 3.525, "weighted_total_content_score": 69.47368421052632, "semantic_relevance": 3.2, "factual_accuracy": 3.2, "freshness": 5.0, "objectivity_tone": 3.2, "layout_ad_density": 2.4, "accountability": 4.6, "transparency": 3.6, "authority": 3.0, "avg_ge_freq": null, "relative_se_rank": null, "normalized_reciprocal_se_rank": null, "reciprocal_se_rank": null, "percentage_ge_sources_not_in_se_sources": null, "percentage_ge_sources_in_se_sources": null }, { "model_name": "Gemini-2.5-Flash-Preview", "query_id": 76, "query_type": "Pinocchios", "num_sources": 5, "unweighted_mean_score": 3.55, "weighted_total_content_score": 69.47368421052632, "semantic_relevance": 2.4, "factual_accuracy": 3.0, "freshness": 3.8, "objectivity_tone": 3.8, "layout_ad_density": 3.2, "accountability": 4.4, "transparency": 4.0, "authority": 3.8, "avg_ge_freq": null, "relative_se_rank": null, "normalized_reciprocal_se_rank": null, "reciprocal_se_rank": null, "percentage_ge_sources_not_in_se_sources": null, "percentage_ge_sources_in_se_sources": null }, { "model_name": "Gemini-3-Flash-Preview", "query_id": 61, "query_type": "Pinocchios", "num_sources": 4, "unweighted_mean_score": 3.5, "weighted_total_content_score": 69.47368421052632, "semantic_relevance": 3.5, "factual_accuracy": 3.25, "freshness": 2.25, "objectivity_tone": 3.25, "layout_ad_density": 3.0, "accountability": 4.5, "transparency": 4.25, "authority": 4.0, "avg_ge_freq": 0.75, "relative_se_rank": 0.7111111111111111, "normalized_reciprocal_se_rank": 0.32910927456382005, "reciprocal_se_rank": 0.08879082082965578, "percentage_ge_sources_not_in_se_sources": 25.0, "percentage_ge_sources_in_se_sources": 75.0 }, { "model_name": "Perplexity-Sonar-Pro", "query_id": 34, "query_type": "DebateQA", "num_sources": 5, "unweighted_mean_score": 4.46875, "weighted_total_content_score": 69.47368421052632, "semantic_relevance": 1.75, "factual_accuracy": 5.0, "freshness": 5.0, "objectivity_tone": 4.25, "layout_ad_density": 5.0, "accountability": 5.0, "transparency": 4.75, "authority": 5.0, "avg_ge_freq": 0.66666, "relative_se_rank": 2.0, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "Perplexity-Sonar-Pro", "query_id": 88, "query_type": "QuoraQuestions", "num_sources": 1, "unweighted_mean_score": 3.625, "weighted_total_content_score": 69.47368421052632, "semantic_relevance": 2.0, "factual_accuracy": 4.0, "freshness": 5.0, "objectivity_tone": 2.0, "layout_ad_density": 4.0, "accountability": 4.0, "transparency": 4.0, "authority": 4.0, "avg_ge_freq": 1.0, "relative_se_rank": 0.5348837209302325, "normalized_reciprocal_se_rank": 0.11965811965811966, "reciprocal_se_rank": 0.038461538461538464, "percentage_ge_sources_not_in_se_sources": 0.0, "percentage_ge_sources_in_se_sources": 100.0 }, { "model_name": "claude", "query_id": 49, "query_type": "HotpotQA", "num_sources": 1, "unweighted_mean_score": 3.375, "weighted_total_content_score": 69.47368421052632, "semantic_relevance": 5.0, "factual_accuracy": 3.0, "freshness": 5.0, "objectivity_tone": 4.0, "layout_ad_density": 3.0, "accountability": 2.0, "transparency": 3.0, "authority": 2.0, "avg_ge_freq": 1.0, "relative_se_rank": 2.1739130434782608, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "deepseek-reasoning-gensee", "query_id": 62, "query_type": "Pinocchios", "num_sources": 5, "unweighted_mean_score": 4.40625, "weighted_total_content_score": 69.47368421052632, "semantic_relevance": 4.0, "factual_accuracy": 4.0, "freshness": 4.25, "objectivity_tone": 4.0, "layout_ad_density": 4.5, "accountability": 4.75, "transparency": 4.75, "authority": 5.0, "avg_ge_freq": 1.0, "relative_se_rank": 2.4390243902439024, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "grok-4.1-fast-non-reasoning", "query_id": 59, "query_type": "HotpotQA", "num_sources": 5, "unweighted_mean_score": 3.575, "weighted_total_content_score": 69.47368421052632, "semantic_relevance": 1.8, "factual_accuracy": 3.0, "freshness": 5.0, "objectivity_tone": 4.0, "layout_ad_density": 4.2, "accountability": 2.8, "transparency": 4.2, "authority": 3.6, "avg_ge_freq": 0.6, "relative_se_rank": 2.3255813953488373, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "tavily", "query_id": 58, "query_type": "HotpotQA", "num_sources": 5, "unweighted_mean_score": 3.375, "weighted_total_content_score": 69.47368421052632, "semantic_relevance": 4.2, "factual_accuracy": 3.8, "freshness": 4.0, "objectivity_tone": 4.0, "layout_ad_density": 3.4, "accountability": 2.6, "transparency": 2.4, "authority": 2.6, "avg_ge_freq": 1.0, "relative_se_rank": 0.2541666666666667, "normalized_reciprocal_se_rank": 0.35882551000198065, "reciprocal_se_rank": 0.09593137254901961, "percentage_ge_sources_not_in_se_sources": 0.0, "percentage_ge_sources_in_se_sources": 100.0 }, { "model_name": "Gemini-3-Pro-Preview", "query_id": 3, "query_type": "VACOS", "num_sources": 5, "unweighted_mean_score": 3.525, "weighted_total_content_score": 69.26315789473684, "semantic_relevance": 2.6, "factual_accuracy": 3.6, "freshness": 5.0, "objectivity_tone": 3.2, "layout_ad_density": 2.4, "accountability": 3.0, "transparency": 3.8, "authority": 4.6, "avg_ge_freq": 0.3333, "relative_se_rank": 2.272727272727273, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "deepseek-reasoning-tavily", "query_id": 91, "query_type": "QuoraQuestions", "num_sources": 4, "unweighted_mean_score": 3.53125, "weighted_total_content_score": 69.21052631578948, "semantic_relevance": 3.5, "factual_accuracy": 3.5, "freshness": 5.0, "objectivity_tone": 2.25, "layout_ad_density": 3.5, "accountability": 3.25, "transparency": 3.5, "authority": 3.75, "avg_ge_freq": 1.0, "relative_se_rank": 2.1249999999999996, "normalized_reciprocal_se_rank": 0.10549943883277216, "reciprocal_se_rank": 0.035059331175836025, "percentage_ge_sources_not_in_se_sources": 75.0, "percentage_ge_sources_in_se_sources": 25.0 }, { "model_name": "Gemini-2.5-Flash-Preview", "query_id": 39, "query_type": "DebateQA", "num_sources": 5, "unweighted_mean_score": 3.6, "weighted_total_content_score": 69.05263157894737, "semantic_relevance": 3.0, "factual_accuracy": 2.4, "freshness": 5.0, "objectivity_tone": 2.6, "layout_ad_density": 2.8, "accountability": 4.0, "transparency": 4.6, "authority": 4.4, "avg_ge_freq": null, "relative_se_rank": null, "normalized_reciprocal_se_rank": null, "reciprocal_se_rank": null, "percentage_ge_sources_not_in_se_sources": null, "percentage_ge_sources_in_se_sources": null }, { "model_name": "Gemini-2.5-Flash-Preview", "query_id": 63, "query_type": "Pinocchios", "num_sources": 5, "unweighted_mean_score": 3.475, "weighted_total_content_score": 69.05263157894736, "semantic_relevance": 3.0, "factual_accuracy": 3.8, "freshness": 2.0, "objectivity_tone": 3.2, "layout_ad_density": 3.4, "accountability": 4.4, "transparency": 4.0, "authority": 4.0, "avg_ge_freq": null, "relative_se_rank": null, "normalized_reciprocal_se_rank": null, "reciprocal_se_rank": null, "percentage_ge_sources_not_in_se_sources": null, "percentage_ge_sources_in_se_sources": null }, { "model_name": "Gemini-3-Flash-Preview", "query_id": 10, "query_type": "VACOS", "num_sources": 5, "unweighted_mean_score": 3.45, "weighted_total_content_score": 69.05263157894736, "semantic_relevance": 3.6, "factual_accuracy": 3.8, "freshness": 3.8, "objectivity_tone": 3.0, "layout_ad_density": 2.6, "accountability": 3.4, "transparency": 3.6, "authority": 3.8, "avg_ge_freq": 0.3333, "relative_se_rank": 2.1739130434782608, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "Gemini-3-Pro-Preview", "query_id": 83, "query_type": "QuoraQuestions", "num_sources": 5, "unweighted_mean_score": 3.425, "weighted_total_content_score": 68.84210526315789, "semantic_relevance": 2.0, "factual_accuracy": 4.2, "freshness": 4.4, "objectivity_tone": 4.4, "layout_ad_density": 4.0, "accountability": 3.2, "transparency": 2.2, "authority": 3.0, "avg_ge_freq": 0.46665999999999996, "relative_se_rank": 2.2222222222222223, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "Gemini-3-Flash-Preview", "query_id": 7, "query_type": "VACOS", "num_sources": 5, "unweighted_mean_score": 3.425, "weighted_total_content_score": 68.63157894736841, "semantic_relevance": 3.0, "factual_accuracy": 3.8, "freshness": 4.6, "objectivity_tone": 3.6, "layout_ad_density": 2.2, "accountability": 2.6, "transparency": 4.0, "authority": 3.6, "avg_ge_freq": 0.46665999999999996, "relative_se_rank": 2.857142857142857, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "gpt-4o", "query_id": 34, "query_type": "DebateQA", "num_sources": 5, "unweighted_mean_score": 3.625, "weighted_total_content_score": 68.63157894736841, "semantic_relevance": 2.2, "factual_accuracy": 2.4, "freshness": 4.8, "objectivity_tone": 2.6, "layout_ad_density": 4.4, "accountability": 3.6, "transparency": 4.6, "authority": 4.4, "avg_ge_freq": 0.5333399999999999, "relative_se_rank": 1.6480000000000001, "normalized_reciprocal_se_rank": 0.04740740740740741, "reciprocal_se_rank": 0.02110032362459547, "percentage_ge_sources_not_in_se_sources": 80.0, "percentage_ge_sources_in_se_sources": 20.0 }, { "model_name": "Gemini-2.5-Flash-Preview", "query_id": 11, "query_type": "VACOS", "num_sources": 3, "unweighted_mean_score": 3.375, "weighted_total_content_score": 68.42105263157895, "semantic_relevance": 3.6666666666666665, "factual_accuracy": 3.3333333333333335, "freshness": 5.0, "objectivity_tone": 4.0, "layout_ad_density": 2.0, "accountability": 2.3333333333333335, "transparency": 3.3333333333333335, "authority": 3.3333333333333335, "avg_ge_freq": null, "relative_se_rank": null, "normalized_reciprocal_se_rank": null, "reciprocal_se_rank": null, "percentage_ge_sources_not_in_se_sources": null, "percentage_ge_sources_in_se_sources": null }, { "model_name": "Gemini-3-Pro-Preview", "query_id": 84, "query_type": "QuoraQuestions", "num_sources": 5, "unweighted_mean_score": 3.5, "weighted_total_content_score": 68.42105263157895, "semantic_relevance": 3.4, "factual_accuracy": 2.8, "freshness": 5.0, "objectivity_tone": 2.8, "layout_ad_density": 2.2, "accountability": 3.8, "transparency": 4.2, "authority": 3.8, "avg_ge_freq": 0.3333, "relative_se_rank": 1.4863636363636368, "normalized_reciprocal_se_rank": 0.13475413475413475, "reciprocal_se_rank": 0.04208897898218286, "percentage_ge_sources_not_in_se_sources": 60.0, "percentage_ge_sources_in_se_sources": 40.0 }, { "model_name": "Perplexity-Sonar-Pro", "query_id": 76, "query_type": "Pinocchios", "num_sources": 5, "unweighted_mean_score": 3.55, "weighted_total_content_score": 68.42105263157895, "semantic_relevance": 3.2, "factual_accuracy": 3.0, "freshness": 3.8, "objectivity_tone": 2.0, "layout_ad_density": 4.6, "accountability": 3.4, "transparency": 4.0, "authority": 4.4, "avg_ge_freq": 0.5333399999999999, "relative_se_rank": 1.9666666666666668, "normalized_reciprocal_se_rank": 0.043939393939393945, "reciprocal_se_rank": 0.02026699029126213, "percentage_ge_sources_not_in_se_sources": 80.0, "percentage_ge_sources_in_se_sources": 20.0 }, { "model_name": "exa", "query_id": 44, "query_type": "HotpotQA", "num_sources": 4, "unweighted_mean_score": 3.3125, "weighted_total_content_score": 68.42105263157895, "semantic_relevance": 4.25, "factual_accuracy": 4.0, "freshness": 3.5, "objectivity_tone": 3.75, "layout_ad_density": 2.0, "accountability": 3.25, "transparency": 3.0, "authority": 2.75, "avg_ge_freq": 1.0, "relative_se_rank": 0.6686046511627908, "normalized_reciprocal_se_rank": 0.3974186307519641, "reciprocal_se_rank": 0.10520496224379719, "percentage_ge_sources_not_in_se_sources": 25.0, "percentage_ge_sources_in_se_sources": 75.0 }, { "model_name": "gensee", "query_id": 56, "query_type": "HotpotQA", "num_sources": 1, "unweighted_mean_score": 3.375, "weighted_total_content_score": 68.42105263157895, "semantic_relevance": 2.0, "factual_accuracy": 5.0, "freshness": 3.0, "objectivity_tone": 4.0, "layout_ad_density": 4.0, "accountability": 3.0, "transparency": 3.0, "authority": 3.0, "avg_ge_freq": 0.3333, "relative_se_rank": 2.127659574468085, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "google-search", "query_id": 82, "query_type": "QuoraQuestions", "num_sources": 4, "unweighted_mean_score": 3.4375, "weighted_total_content_score": 68.42105263157895, "semantic_relevance": 3.0, "factual_accuracy": 3.75, "freshness": 4.0, "objectivity_tone": 3.25, "layout_ad_density": 3.25, "accountability": 3.5, "transparency": 3.25, "authority": 3.5, "avg_ge_freq": null, "relative_se_rank": null, "normalized_reciprocal_se_rank": null, "reciprocal_se_rank": null, "percentage_ge_sources_not_in_se_sources": null, "percentage_ge_sources_in_se_sources": null }, { "model_name": "gpt-4o", "query_id": 7, "query_type": "VACOS", "num_sources": 1, "unweighted_mean_score": 3.5, "weighted_total_content_score": 68.42105263157895, "semantic_relevance": 1.0, "factual_accuracy": 3.0, "freshness": 5.0, "objectivity_tone": 5.0, "layout_ad_density": 2.0, "accountability": 3.0, "transparency": 4.0, "authority": 5.0, "avg_ge_freq": 0.6667, "relative_se_rank": 2.857142857142857, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "gpt-4o", "query_id": 58, "query_type": "HotpotQA", "num_sources": 1, "unweighted_mean_score": 3.25, "weighted_total_content_score": 68.42105263157895, "semantic_relevance": 5.0, "factual_accuracy": 4.0, "freshness": 3.0, "objectivity_tone": 4.0, "layout_ad_density": 3.0, "accountability": 2.0, "transparency": 2.0, "authority": 3.0, "avg_ge_freq": 0.3333, "relative_se_rank": 0.041666666666666664, "normalized_reciprocal_se_rank": 0.791919191919192, "reciprocal_se_rank": 0.2, "percentage_ge_sources_not_in_se_sources": 0.0, "percentage_ge_sources_in_se_sources": 100.0 }, { "model_name": "Gemini-3-Flash-Preview", "query_id": 99, "query_type": "QuoraQuestions", "num_sources": 5, "unweighted_mean_score": 3.45, "weighted_total_content_score": 68.21052631578948, "semantic_relevance": 3.4, "factual_accuracy": 3.4, "freshness": 4.8, "objectivity_tone": 2.8, "layout_ad_density": 3.0, "accountability": 3.8, "transparency": 3.4, "authority": 3.0, "avg_ge_freq": 0.39997999999999995, "relative_se_rank": 2.4390243902439024, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "Perplexity-Sonar-Pro", "query_id": 43, "query_type": "HotpotQA", "num_sources": 5, "unweighted_mean_score": 3.45, "weighted_total_content_score": 68.21052631578948, "semantic_relevance": 2.2, "factual_accuracy": 3.6, "freshness": 3.8, "objectivity_tone": 3.8, "layout_ad_density": 3.8, "accountability": 3.4, "transparency": 3.6, "authority": 3.4, "avg_ge_freq": 0.6, "relative_se_rank": 1.6708333333333336, "normalized_reciprocal_se_rank": 0.2, "reciprocal_se_rank": 0.05776699029126213, "percentage_ge_sources_not_in_se_sources": 80.0, "percentage_ge_sources_in_se_sources": 20.0 }, { "model_name": "exa", "query_id": 88, "query_type": "QuoraQuestions", "num_sources": 5, "unweighted_mean_score": 3.475, "weighted_total_content_score": 68.21052631578947, "semantic_relevance": 2.6, "factual_accuracy": 3.6, "freshness": 3.2, "objectivity_tone": 3.0, "layout_ad_density": 2.6, "accountability": 4.0, "transparency": 4.2, "authority": 4.6, "avg_ge_freq": 1.0, "relative_se_rank": 1.0558139534883721, "normalized_reciprocal_se_rank": 0.31423569023569026, "reciprocal_se_rank": 0.0852168284789644, "percentage_ge_sources_not_in_se_sources": 40.0, "percentage_ge_sources_in_se_sources": 60.0 }, { "model_name": "google-search", "query_id": 42, "query_type": "HotpotQA", "num_sources": 4, "unweighted_mean_score": 3.40625, "weighted_total_content_score": 68.1578947368421, "semantic_relevance": 2.0, "factual_accuracy": 4.25, "freshness": 4.25, "objectivity_tone": 4.0, "layout_ad_density": 4.25, "accountability": 2.75, "transparency": 2.5, "authority": 3.25, "avg_ge_freq": null, "relative_se_rank": null, "normalized_reciprocal_se_rank": null, "reciprocal_se_rank": null, "percentage_ge_sources_not_in_se_sources": null, "percentage_ge_sources_in_se_sources": null }, { "model_name": "Gemini-3-Flash-Preview", "query_id": 58, "query_type": "HotpotQA", "num_sources": 5, "unweighted_mean_score": 3.655, "weighted_total_content_score": 68.0, "semantic_relevance": 4.25, "factual_accuracy": 4.0, "freshness": 4.2, "objectivity_tone": 4.5, "layout_ad_density": 3.8, "accountability": 3.0, "transparency": 3.0, "authority": 3.0, "avg_ge_freq": 0.73334, "relative_se_rank": 0.8375, "normalized_reciprocal_se_rank": 0.16153164296021438, "reciprocal_se_rank": 0.04852338022587675, "percentage_ge_sources_not_in_se_sources": 20.0, "percentage_ge_sources_in_se_sources": 80.0 }, { "model_name": "Gemini-2.5-Flash-Preview", "query_id": 44, "query_type": "HotpotQA", "num_sources": 5, "unweighted_mean_score": 3.475, "weighted_total_content_score": 67.99999999999999, "semantic_relevance": 2.0, "factual_accuracy": 3.4, "freshness": 4.8, "objectivity_tone": 3.6, "layout_ad_density": 2.4, "accountability": 4.0, "transparency": 3.6, "authority": 4.0, "avg_ge_freq": null, "relative_se_rank": null, "normalized_reciprocal_se_rank": null, "reciprocal_se_rank": null, "percentage_ge_sources_not_in_se_sources": null, "percentage_ge_sources_in_se_sources": null }, { "model_name": "exa", "query_id": 49, "query_type": "HotpotQA", "num_sources": 2, "unweighted_mean_score": 3.5, "weighted_total_content_score": 67.89473684210526, "semantic_relevance": 1.5, "factual_accuracy": 2.5, "freshness": 3.5, "objectivity_tone": 4.5, "layout_ad_density": 4.0, "accountability": 4.0, "transparency": 4.0, "authority": 4.0, "avg_ge_freq": 1.0, "relative_se_rank": 0.16304347826086957, "normalized_reciprocal_se_rank": 0.4451178451178451, "reciprocal_se_rank": 0.11666666666666667, "percentage_ge_sources_not_in_se_sources": 0.0, "percentage_ge_sources_in_se_sources": 100.0 }, { "model_name": "exa", "query_id": 53, "query_type": "HotpotQA", "num_sources": 4, "unweighted_mean_score": 3.4375, "weighted_total_content_score": 67.89473684210526, "semantic_relevance": 1.75, "factual_accuracy": 3.5, "freshness": 4.25, "objectivity_tone": 4.25, "layout_ad_density": 3.0, "accountability": 3.0, "transparency": 3.75, "authority": 4.0, "avg_ge_freq": 1.0, "relative_se_rank": 2.127659574468085, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "gpt-4o", "query_id": 93, "query_type": "QuoraQuestions", "num_sources": 5, "unweighted_mean_score": 3.425, "weighted_total_content_score": 67.78947368421052, "semantic_relevance": 3.2, "factual_accuracy": 3.2, "freshness": 3.0, "objectivity_tone": 3.2, "layout_ad_density": 3.4, "accountability": 3.8, "transparency": 3.8, "authority": 3.8, "avg_ge_freq": 0.3333, "relative_se_rank": 2.272727272727273, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "tavily", "query_id": 56, "query_type": "HotpotQA", "num_sources": 5, "unweighted_mean_score": 3.55, "weighted_total_content_score": 67.78947368421052, "semantic_relevance": 1.2, "factual_accuracy": 2.0, "freshness": 4.2, "objectivity_tone": 4.4, "layout_ad_density": 4.2, "accountability": 3.4, "transparency": 4.4, "authority": 4.6, "avg_ge_freq": 1.0, "relative_se_rank": 1.327659574468085, "normalized_reciprocal_se_rank": 0.17111111111111113, "reciprocal_se_rank": 0.050825242718446594, "percentage_ge_sources_not_in_se_sources": 60.0, "percentage_ge_sources_in_se_sources": 40.0 }, { "model_name": "Perplexity-Sonar-Pro", "query_id": 55, "query_type": "HotpotQA", "num_sources": 3, "unweighted_mean_score": 3.5, "weighted_total_content_score": 67.71929824561404, "semantic_relevance": 1.6666666666666667, "factual_accuracy": 3.6666666666666665, "freshness": 5.0, "objectivity_tone": 3.0, "layout_ad_density": 3.3333333333333335, "accountability": 3.6666666666666665, "transparency": 4.333333333333333, "authority": 3.3333333333333335, "avg_ge_freq": 1.0, "relative_se_rank": 2.127659574468085, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "deepseek-chat-tavily", "query_id": 72, "query_type": "Pinocchios", "num_sources": 4, "unweighted_mean_score": 3.34375, "weighted_total_content_score": 67.63157894736841, "semantic_relevance": 3.25, "factual_accuracy": 3.75, "freshness": 3.5, "objectivity_tone": 3.75, "layout_ad_density": 2.75, "accountability": 3.25, "transparency": 3.25, "authority": 3.25, "avg_ge_freq": 1.0, "relative_se_rank": 1.8475609756097562, "normalized_reciprocal_se_rank": 0.16329966329966328, "reciprocal_se_rank": 0.048948220064724914, "percentage_ge_sources_not_in_se_sources": 75.0, "percentage_ge_sources_in_se_sources": 25.0 }, { "model_name": "Perplexity-Sonar-Pro", "query_id": 89, "query_type": "QuoraQuestions", "num_sources": 5, "unweighted_mean_score": 3.425, "weighted_total_content_score": 67.57894736842105, "semantic_relevance": 2.8, "factual_accuracy": 3.2, "freshness": 3.0, "objectivity_tone": 3.4, "layout_ad_density": 3.6, "accountability": 2.8, "transparency": 4.4, "authority": 4.2, "avg_ge_freq": 0.66666, "relative_se_rank": 1.9951219512195124, "normalized_reciprocal_se_rank": 0.06127946127946128, "reciprocal_se_rank": 0.0244336569579288, "percentage_ge_sources_not_in_se_sources": 80.0, "percentage_ge_sources_in_se_sources": 20.0 }, { "model_name": "Gemini-3-Pro-Preview", "query_id": 52, "query_type": "HotpotQA", "num_sources": 5, "unweighted_mean_score": 3.45, "weighted_total_content_score": 67.57894736842104, "semantic_relevance": 1.6, "factual_accuracy": 3.6, "freshness": 5.0, "objectivity_tone": 3.8, "layout_ad_density": 2.8, "accountability": 3.6, "transparency": 3.6, "authority": 3.6, "avg_ge_freq": 0.46665999999999996, "relative_se_rank": 2.0408163265306123, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "Perplexity-Sonar-Pro", "query_id": 54, "query_type": "HotpotQA", "num_sources": 5, "unweighted_mean_score": 3.375, "weighted_total_content_score": 67.57894736842104, "semantic_relevance": 3.4, "factual_accuracy": 3.6, "freshness": 4.0, "objectivity_tone": 3.2, "layout_ad_density": 2.8, "accountability": 2.8, "transparency": 3.6, "authority": 3.6, "avg_ge_freq": 0.86668, "relative_se_rank": 0.14693877551020407, "normalized_reciprocal_se_rank": 0.4250780789818223, "reciprocal_se_rank": 0.11185128596893304, "percentage_ge_sources_not_in_se_sources": 0.0, "percentage_ge_sources_in_se_sources": 100.0 }, { "model_name": "Perplexity-Sonar-Pro", "query_id": 87, "query_type": "QuoraQuestions", "num_sources": 3, "unweighted_mean_score": 3.4583333333333335, "weighted_total_content_score": 67.36842105263158, "semantic_relevance": 1.6666666666666667, "factual_accuracy": 3.6666666666666665, "freshness": 2.3333333333333335, "objectivity_tone": 3.3333333333333335, "layout_ad_density": 4.666666666666667, "accountability": 3.6666666666666665, "transparency": 4.333333333333333, "authority": 4.0, "avg_ge_freq": 0.8889, "relative_se_rank": 1.983739837398374, "normalized_reciprocal_se_rank": 0.016046994770399028, "reciprocal_se_rank": 0.013564690490945394, "percentage_ge_sources_not_in_se_sources": 66.66666666666667, "percentage_ge_sources_in_se_sources": 33.33333333333333 }, { "model_name": "gensee", "query_id": 44, "query_type": "HotpotQA", "num_sources": 3, "unweighted_mean_score": 3.25, "weighted_total_content_score": 67.36842105263158, "semantic_relevance": 5.0, "factual_accuracy": 4.0, "freshness": 4.0, "objectivity_tone": 3.0, "layout_ad_density": 2.3333333333333335, "accountability": 3.0, "transparency": 2.3333333333333335, "authority": 2.3333333333333335, "avg_ge_freq": 0.4444333333333333, "relative_se_rank": 2.3255813953488373, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "gensee", "query_id": 53, "query_type": "HotpotQA", "num_sources": 3, "unweighted_mean_score": 3.3333333333333335, "weighted_total_content_score": 67.36842105263158, "semantic_relevance": 2.6666666666666665, "factual_accuracy": 4.0, "freshness": 3.0, "objectivity_tone": 4.0, "layout_ad_density": 4.333333333333333, "accountability": 2.6666666666666665, "transparency": 3.0, "authority": 3.0, "avg_ge_freq": 0.3333, "relative_se_rank": 2.127659574468085, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "gpt-5", "query_id": 47, "query_type": "HotpotQA", "num_sources": 1, "unweighted_mean_score": 3.375, "weighted_total_content_score": 67.36842105263158, "semantic_relevance": 2.0, "factual_accuracy": 4.0, "freshness": 5.0, "objectivity_tone": 4.0, "layout_ad_density": 2.0, "accountability": 3.0, "transparency": 4.0, "authority": 3.0, "avg_ge_freq": 1.0, "relative_se_rank": 2.127659574468085, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "Gemini-2.5-Flash-Preview", "query_id": 85, "query_type": "QuoraQuestions", "num_sources": 5, "unweighted_mean_score": 3.45, "weighted_total_content_score": 67.15789473684211, "semantic_relevance": 3.2, "factual_accuracy": 3.0, "freshness": 5.0, "objectivity_tone": 2.4, "layout_ad_density": 3.2, "accountability": 3.6, "transparency": 3.8, "authority": 3.4, "avg_ge_freq": null, "relative_se_rank": null, "normalized_reciprocal_se_rank": null, "reciprocal_se_rank": null, "percentage_ge_sources_not_in_se_sources": null, "percentage_ge_sources_in_se_sources": null }, { "model_name": "Gemini-3-Flash-Preview", "query_id": 88, "query_type": "QuoraQuestions", "num_sources": 5, "unweighted_mean_score": 3.425, "weighted_total_content_score": 67.1578947368421, "semantic_relevance": 2.8, "factual_accuracy": 3.4, "freshness": 5.0, "objectivity_tone": 2.8, "layout_ad_density": 3.2, "accountability": 3.8, "transparency": 3.2, "authority": 3.2, "avg_ge_freq": 0.3333, "relative_se_rank": 1.6325581395348838, "normalized_reciprocal_se_rank": 0.13887898593780945, "reciprocal_se_rank": 0.043080144679230915, "percentage_ge_sources_not_in_se_sources": 60.0, "percentage_ge_sources_in_se_sources": 40.0 }, { "model_name": "Gemini-3-Flash-Preview", "query_id": 87, "query_type": "QuoraQuestions", "num_sources": 5, "unweighted_mean_score": 3.8299999999999996, "weighted_total_content_score": 66.94736842105263, "semantic_relevance": 1.75, "factual_accuracy": 3.75, "freshness": 3.6, "objectivity_tone": 4.0, "layout_ad_density": 3.6, "accountability": 4.2, "transparency": 4.2, "authority": 4.8, "avg_ge_freq": 0.39997999999999995, "relative_se_rank": 2.024390243902439, "normalized_reciprocal_se_rank": 0.038159371492704826, "reciprocal_se_rank": 0.018878101402373244, "percentage_ge_sources_not_in_se_sources": 80.0, "percentage_ge_sources_in_se_sources": 20.0 }, { "model_name": "Perplexity-Sonar-Pro", "query_id": 83, "query_type": "QuoraQuestions", "num_sources": 5, "unweighted_mean_score": 3.425, "weighted_total_content_score": 66.94736842105263, "semantic_relevance": 2.2, "factual_accuracy": 3.6, "freshness": 5.0, "objectivity_tone": 3.0, "layout_ad_density": 2.2, "accountability": 3.4, "transparency": 4.2, "authority": 3.8, "avg_ge_freq": 0.93334, "relative_se_rank": 1.7822222222222224, "normalized_reciprocal_se_rank": 0.2, "reciprocal_se_rank": 0.05776699029126213, "percentage_ge_sources_not_in_se_sources": 80.0, "percentage_ge_sources_in_se_sources": 20.0 }, { "model_name": "gensee", "query_id": 83, "query_type": "QuoraQuestions", "num_sources": 5, "unweighted_mean_score": 3.425, "weighted_total_content_score": 66.94736842105263, "semantic_relevance": 1.8, "factual_accuracy": 4.0, "freshness": 4.8, "objectivity_tone": 3.0, "layout_ad_density": 2.6, "accountability": 3.2, "transparency": 4.0, "authority": 4.0, "avg_ge_freq": 0.5333399999999999, "relative_se_rank": 1.7822222222222224, "normalized_reciprocal_se_rank": 0.2, "reciprocal_se_rank": 0.05776699029126213, "percentage_ge_sources_not_in_se_sources": 80.0, "percentage_ge_sources_in_se_sources": 20.0 }, { "model_name": "Gemini-2.5-Flash-Preview", "query_id": 71, "query_type": "Pinocchios", "num_sources": 5, "unweighted_mean_score": 3.45, "weighted_total_content_score": 66.73684210526315, "semantic_relevance": 1.6, "factual_accuracy": 3.0, "freshness": 2.6, "objectivity_tone": 3.6, "layout_ad_density": 3.4, "accountability": 4.6, "transparency": 4.4, "authority": 4.4, "avg_ge_freq": null, "relative_se_rank": null, "normalized_reciprocal_se_rank": null, "reciprocal_se_rank": null, "percentage_ge_sources_not_in_se_sources": null, "percentage_ge_sources_in_se_sources": null }, { "model_name": "Gemini-2.5-Flash-Preview", "query_id": 92, "query_type": "QuoraQuestions", "num_sources": 5, "unweighted_mean_score": 3.375, "weighted_total_content_score": 66.73684210526315, "semantic_relevance": 3.0, "factual_accuracy": 3.4, "freshness": 4.6, "objectivity_tone": 3.0, "layout_ad_density": 2.4, "accountability": 4.0, "transparency": 3.4, "authority": 3.2, "avg_ge_freq": null, "relative_se_rank": null, "normalized_reciprocal_se_rank": null, "reciprocal_se_rank": null, "percentage_ge_sources_not_in_se_sources": null, "percentage_ge_sources_in_se_sources": null }, { "model_name": "google-search", "query_id": 52, "query_type": "HotpotQA", "num_sources": 3, "unweighted_mean_score": 3.7023809523809526, "weighted_total_content_score": 66.66666666666667, "semantic_relevance": 1.0, "factual_accuracy": 1.6666666666666667, "freshness": 5.0, "objectivity_tone": 4.0, "layout_ad_density": 5.0, "accountability": 3.6666666666666665, "transparency": 4.666666666666667, "authority": 5.0, "avg_ge_freq": null, "relative_se_rank": null, "normalized_reciprocal_se_rank": null, "reciprocal_se_rank": null, "percentage_ge_sources_not_in_se_sources": null, "percentage_ge_sources_in_se_sources": null }, { "model_name": "google-search", "query_id": 56, "query_type": "HotpotQA", "num_sources": 4, "unweighted_mean_score": 3.5, "weighted_total_content_score": 66.57894736842105, "semantic_relevance": 1.0, "factual_accuracy": 2.25, "freshness": 5.0, "objectivity_tone": 4.0, "layout_ad_density": 4.5, "accountability": 3.5, "transparency": 4.0, "authority": 3.75, "avg_ge_freq": null, "relative_se_rank": null, "normalized_reciprocal_se_rank": null, "reciprocal_se_rank": null, "percentage_ge_sources_not_in_se_sources": null, "percentage_ge_sources_in_se_sources": null }, { "model_name": "exa", "query_id": 56, "query_type": "HotpotQA", "num_sources": 4, "unweighted_mean_score": 3.53125, "weighted_total_content_score": 66.57894736842104, "semantic_relevance": 1.0, "factual_accuracy": 1.5, "freshness": 4.5, "objectivity_tone": 4.25, "layout_ad_density": 3.75, "accountability": 3.75, "transparency": 4.5, "authority": 5.0, "avg_ge_freq": 1.0, "relative_se_rank": 1.6329787234042552, "normalized_reciprocal_se_rank": 0.09393939393939395, "reciprocal_se_rank": 0.03228155339805825, "percentage_ge_sources_not_in_se_sources": 75.0, "percentage_ge_sources_in_se_sources": 25.0 }, { "model_name": "Gemini-3-Pro-Preview", "query_id": 63, "query_type": "Pinocchios", "num_sources": 5, "unweighted_mean_score": 3.8549999999999995, "weighted_total_content_score": 66.52631578947367, "semantic_relevance": 2.25, "factual_accuracy": 2.25, "freshness": 4.4, "objectivity_tone": 4.0, "layout_ad_density": 3.8, "accountability": 4.2, "transparency": 4.6, "authority": 4.4, "avg_ge_freq": 0.53332, "relative_se_rank": 4.761904761904762, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "Gemini-3-Pro-Preview", "query_id": 97, "query_type": "QuoraQuestions", "num_sources": 5, "unweighted_mean_score": 3.9799999999999995, "weighted_total_content_score": 66.52631578947367, "semantic_relevance": 4.25, "factual_accuracy": 4.5, "freshness": 4.75, "objectivity_tone": 4.25, "layout_ad_density": 3.0, "accountability": 4.0, "transparency": 3.5, "authority": 4.0, "avg_ge_freq": 0.3333, "relative_se_rank": 2.055, "normalized_reciprocal_se_rank": 0.051370851370851366, "reciprocal_se_rank": 0.02205270457697642, "percentage_ge_sources_not_in_se_sources": 80.0, "percentage_ge_sources_in_se_sources": 20.0 }, { "model_name": "Gemini-3-Flash-Preview", "query_id": 56, "query_type": "HotpotQA", "num_sources": 4, "unweighted_mean_score": 3.34375, "weighted_total_content_score": 66.3157894736842, "semantic_relevance": 1.5, "factual_accuracy": 3.0, "freshness": 3.75, "objectivity_tone": 5.0, "layout_ad_density": 3.5, "accountability": 3.0, "transparency": 3.5, "authority": 3.5, "avg_ge_freq": 0.833325, "relative_se_rank": 2.127659574468085, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "Gemini-3-Pro-Preview", "query_id": 49, "query_type": "HotpotQA", "num_sources": 5, "unweighted_mean_score": 3.3928571428571432, "weighted_total_content_score": 66.3157894736842, "semantic_relevance": 3.8, "factual_accuracy": 3.4, "freshness": 5.0, "objectivity_tone": 4.2, "layout_ad_density": 3.3333333333333335, "accountability": 2.6, "transparency": 2.6, "authority": 2.2, "avg_ge_freq": 0.39997999999999995, "relative_se_rank": 2.1739130434782608, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "Perplexity-Sonar-Pro", "query_id": 11, "query_type": "VACOS", "num_sources": 2, "unweighted_mean_score": 3.375, "weighted_total_content_score": 66.3157894736842, "semantic_relevance": 2.5, "factual_accuracy": 3.0, "freshness": 5.0, "objectivity_tone": 3.5, "layout_ad_density": 2.5, "accountability": 3.0, "transparency": 3.5, "authority": 4.0, "avg_ge_freq": 1.0, "relative_se_rank": 1.2926829268292683, "normalized_reciprocal_se_rank": 0.21099887766554432, "reciprocal_se_rank": 0.060409924487594385, "percentage_ge_sources_not_in_se_sources": 50.0, "percentage_ge_sources_in_se_sources": 50.0 }, { "model_name": "gensee", "query_id": 41, "query_type": "HotpotQA", "num_sources": 1, "unweighted_mean_score": 3.25, "weighted_total_content_score": 66.3157894736842, "semantic_relevance": 3.0, "factual_accuracy": 4.0, "freshness": 3.0, "objectivity_tone": 4.0, "layout_ad_density": 3.0, "accountability": 2.0, "transparency": 3.0, "authority": 4.0, "avg_ge_freq": 0.3333, "relative_se_rank": 4.545454545454546, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "tavily", "query_id": 82, "query_type": "QuoraQuestions", "num_sources": 5, "unweighted_mean_score": 3.35, "weighted_total_content_score": 66.3157894736842, "semantic_relevance": 2.8, "factual_accuracy": 3.6, "freshness": 4.4, "objectivity_tone": 3.0, "layout_ad_density": 2.6, "accountability": 4.0, "transparency": 3.0, "authority": 3.4, "avg_ge_freq": 1.0, "relative_se_rank": 1.390909090909091, "normalized_reciprocal_se_rank": 0.2612794612794612, "reciprocal_se_rank": 0.07249190938511327, "percentage_ge_sources_not_in_se_sources": 60.0, "percentage_ge_sources_in_se_sources": 40.0 }, { "model_name": "Gemini-2.5-Flash-Preview", "query_id": 17, "query_type": "VACOS", "num_sources": 5, "unweighted_mean_score": 3.325, "weighted_total_content_score": 66.31578947368419, "semantic_relevance": 3.6, "factual_accuracy": 3.2, "freshness": 5.0, "objectivity_tone": 3.0, "layout_ad_density": 2.4, "accountability": 3.4, "transparency": 3.0, "authority": 3.0, "avg_ge_freq": null, "relative_se_rank": null, "normalized_reciprocal_se_rank": null, "reciprocal_se_rank": null, "percentage_ge_sources_not_in_se_sources": null, "percentage_ge_sources_in_se_sources": null }, { "model_name": "Gemini-3-Flash-Preview", "query_id": 60, "query_type": "HotpotQA", "num_sources": 5, "unweighted_mean_score": 3.275, "weighted_total_content_score": 66.10526315789474, "semantic_relevance": 1.8, "factual_accuracy": 4.0, "freshness": 4.4, "objectivity_tone": 4.6, "layout_ad_density": 4.0, "accountability": 2.6, "transparency": 2.2, "authority": 2.6, "avg_ge_freq": 0.6, "relative_se_rank": 1.3083333333333336, "normalized_reciprocal_se_rank": 0.182010582010582, "reciprocal_se_rank": 0.05344429033749422, "percentage_ge_sources_not_in_se_sources": 60.0, "percentage_ge_sources_in_se_sources": 40.0 }, { "model_name": "exa", "query_id": 45, "query_type": "HotpotQA", "num_sources": 5, "unweighted_mean_score": 3.275, "weighted_total_content_score": 66.10526315789474, "semantic_relevance": 2.0, "factual_accuracy": 3.8, "freshness": 5.0, "objectivity_tone": 4.6, "layout_ad_density": 3.4, "accountability": 2.2, "transparency": 3.0, "authority": 2.2, "avg_ge_freq": 1.0, "relative_se_rank": 2.0408163265306123, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "Gemini-3-Flash-Preview", "query_id": 95, "query_type": "QuoraQuestions", "num_sources": 5, "unweighted_mean_score": 3.325, "weighted_total_content_score": 66.10526315789473, "semantic_relevance": 3.2, "factual_accuracy": 3.4, "freshness": 5.0, "objectivity_tone": 3.0, "layout_ad_density": 2.4, "accountability": 3.4, "transparency": 3.4, "authority": 2.8, "avg_ge_freq": 0.5333399999999999, "relative_se_rank": 2.068292682926829, "normalized_reciprocal_se_rank": 0.022745978301533857, "reciprocal_se_rank": 0.015174397698669542, "percentage_ge_sources_not_in_se_sources": 80.0, "percentage_ge_sources_in_se_sources": 20.0 }, { "model_name": "Gemini-3-Pro-Preview", "query_id": 27, "query_type": "DebateQA", "num_sources": 5, "unweighted_mean_score": 3.4, "weighted_total_content_score": 66.10526315789473, "semantic_relevance": 2.8, "factual_accuracy": 3.0, "freshness": 5.0, "objectivity_tone": 2.6, "layout_ad_density": 3.2, "accountability": 3.2, "transparency": 3.8, "authority": 3.6, "avg_ge_freq": 0.3333, "relative_se_rank": 2.380952380952381, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "Gemini-3-Pro-Preview", "query_id": 95, "query_type": "QuoraQuestions", "num_sources": 5, "unweighted_mean_score": 3.325, "weighted_total_content_score": 66.10526315789473, "semantic_relevance": 3.0, "factual_accuracy": 3.4, "freshness": 5.0, "objectivity_tone": 3.2, "layout_ad_density": 3.2, "accountability": 3.0, "transparency": 3.0, "authority": 2.8, "avg_ge_freq": 0.39997999999999995, "relative_se_rank": 2.4390243902439024, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "claude", "query_id": 17, "query_type": "VACOS", "num_sources": 5, "unweighted_mean_score": 3.3, "weighted_total_content_score": 65.89473684210526, "semantic_relevance": 3.4, "factual_accuracy": 3.6, "freshness": 5.0, "objectivity_tone": 2.8, "layout_ad_density": 2.4, "accountability": 3.6, "transparency": 2.8, "authority": 2.8, "avg_ge_freq": 0.93334, "relative_se_rank": 2.3255813953488373, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "gpt-4o", "query_id": 20, "query_type": "VACOS", "num_sources": 5, "unweighted_mean_score": 3.475, "weighted_total_content_score": 65.89473684210526, "semantic_relevance": 1.6, "factual_accuracy": 2.2, "freshness": 5.0, "objectivity_tone": 3.2, "layout_ad_density": 3.0, "accountability": 3.2, "transparency": 4.6, "authority": 5.0, "avg_ge_freq": 0.3333, "relative_se_rank": 2.272727272727273, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "grok-4.1-fast-non-reasoning", "query_id": 49, "query_type": "HotpotQA", "num_sources": 2, "unweighted_mean_score": 3.1875, "weighted_total_content_score": 65.78947368421052, "semantic_relevance": 4.5, "factual_accuracy": 3.5, "freshness": 3.0, "objectivity_tone": 3.5, "layout_ad_density": 3.0, "accountability": 2.5, "transparency": 3.0, "authority": 2.5, "avg_ge_freq": 0.5, "relative_se_rank": 2.1739130434782608, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "grok-4.1-fast-non-reasoning", "query_id": 81, "query_type": "QuoraQuestions", "num_sources": 4, "unweighted_mean_score": 4.333333333333333, "weighted_total_content_score": 65.78947368421052, "semantic_relevance": 4.666666666666667, "factual_accuracy": 4.666666666666667, "freshness": 4.0, "objectivity_tone": 4.666666666666667, "layout_ad_density": 4.333333333333333, "accountability": 4.0, "transparency": 4.0, "authority": 4.333333333333333, "avg_ge_freq": 0.5833499999999999, "relative_se_rank": 1.8353658536585367, "normalized_reciprocal_se_rank": 0.25, "reciprocal_se_rank": 0.06978155339805825, "percentage_ge_sources_not_in_se_sources": 75.0, "percentage_ge_sources_in_se_sources": 25.0 }, { "model_name": "tavily", "query_id": 48, "query_type": "HotpotQA", "num_sources": 4, "unweighted_mean_score": 3.28125, "weighted_total_content_score": 65.78947368421052, "semantic_relevance": 2.0, "factual_accuracy": 4.0, "freshness": 2.75, "objectivity_tone": 4.0, "layout_ad_density": 3.0, "accountability": 2.75, "transparency": 3.25, "authority": 4.5, "avg_ge_freq": 1.0, "relative_se_rank": 0.6420454545454546, "normalized_reciprocal_se_rank": 0.4073272406605739, "reciprocal_se_rank": 0.10758591462474958, "percentage_ge_sources_not_in_se_sources": 25.0, "percentage_ge_sources_in_se_sources": 75.0 }, { "model_name": "gensee", "query_id": 59, "query_type": "HotpotQA", "num_sources": 5, "unweighted_mean_score": 3.325, "weighted_total_content_score": 65.6842105263158, "semantic_relevance": 2.0, "factual_accuracy": 3.6, "freshness": 5.0, "objectivity_tone": 3.6, "layout_ad_density": 3.4, "accountability": 2.8, "transparency": 3.0, "authority": 3.2, "avg_ge_freq": 0.39997999999999995, "relative_se_rank": 2.3255813953488373, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "Perplexity-Sonar-Pro", "query_id": 82, "query_type": "QuoraQuestions", "num_sources": 5, "unweighted_mean_score": 3.325, "weighted_total_content_score": 65.68421052631578, "semantic_relevance": 2.8, "factual_accuracy": 3.2, "freshness": 4.6, "objectivity_tone": 3.2, "layout_ad_density": 3.0, "accountability": 3.0, "transparency": 3.6, "authority": 3.2, "avg_ge_freq": 0.8666600000000001, "relative_se_rank": 1.4045454545454548, "normalized_reciprocal_se_rank": 0.21503928170594833, "reciprocal_se_rank": 0.061380798274002155, "percentage_ge_sources_not_in_se_sources": 60.0, "percentage_ge_sources_in_se_sources": 40.0 }, { "model_name": "exa", "query_id": 11, "query_type": "VACOS", "num_sources": 3, "unweighted_mean_score": 3.3333333333333335, "weighted_total_content_score": 65.6140350877193, "semantic_relevance": 3.0, "factual_accuracy": 3.0, "freshness": 5.0, "objectivity_tone": 3.0, "layout_ad_density": 2.6666666666666665, "accountability": 3.0, "transparency": 3.6666666666666665, "authority": 3.3333333333333335, "avg_ge_freq": 1.0, "relative_se_rank": 1.6422764227642277, "normalized_reciprocal_se_rank": 0.263973063973064, "reciprocal_se_rank": 0.07313915857605179, "percentage_ge_sources_not_in_se_sources": 66.66666666666667, "percentage_ge_sources_in_se_sources": 33.33333333333333 }, { "model_name": "google-search", "query_id": 69, "query_type": "Pinocchios", "num_sources": 4, "unweighted_mean_score": 3.28125, "weighted_total_content_score": 65.52631578947368, "semantic_relevance": 3.25, "factual_accuracy": 3.5, "freshness": 2.0, "objectivity_tone": 3.0, "layout_ad_density": 3.25, "accountability": 4.0, "transparency": 3.5, "authority": 3.75, "avg_ge_freq": null, "relative_se_rank": null, "normalized_reciprocal_se_rank": null, "reciprocal_se_rank": null, "percentage_ge_sources_not_in_se_sources": null, "percentage_ge_sources_in_se_sources": null }, { "model_name": "Gemini-2.5-Flash-Preview", "query_id": 41, "query_type": "HotpotQA", "num_sources": 5, "unweighted_mean_score": 3.35, "weighted_total_content_score": 65.47368421052632, "semantic_relevance": 1.8, "factual_accuracy": 3.2, "freshness": 5.0, "objectivity_tone": 3.6, "layout_ad_density": 3.6, "accountability": 3.2, "transparency": 3.2, "authority": 3.2, "avg_ge_freq": null, "relative_se_rank": null, "normalized_reciprocal_se_rank": null, "reciprocal_se_rank": null, "percentage_ge_sources_not_in_se_sources": null, "percentage_ge_sources_in_se_sources": null }, { "model_name": "Gemini-3-Flash-Preview", "query_id": 9, "query_type": "VACOS", "num_sources": 5, "unweighted_mean_score": 4.04, "weighted_total_content_score": 65.47368421052632, "semantic_relevance": 3.0, "factual_accuracy": 4.5, "freshness": 5.0, "objectivity_tone": 3.75, "layout_ad_density": 3.0, "accountability": 4.25, "transparency": 4.5, "authority": 4.5, "avg_ge_freq": 0.3333, "relative_se_rank": 2.2222222222222223, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "claude", "query_id": 92, "query_type": "QuoraQuestions", "num_sources": 4, "unweighted_mean_score": 3.3125, "weighted_total_content_score": 65.26315789473685, "semantic_relevance": 3.0, "factual_accuracy": 3.0, "freshness": 3.25, "objectivity_tone": 3.0, "layout_ad_density": 3.5, "accountability": 3.75, "transparency": 3.75, "authority": 3.25, "avg_ge_freq": 1.0, "relative_se_rank": 0.7346938775510204, "normalized_reciprocal_se_rank": 0.16931994772903866, "reciprocal_se_rank": 0.05039484180867676, "percentage_ge_sources_not_in_se_sources": 25.0, "percentage_ge_sources_in_se_sources": 75.0 }, { "model_name": "Perplexity-Sonar-Pro", "query_id": 7, "query_type": "VACOS", "num_sources": 1, "unweighted_mean_score": 3.25, "weighted_total_content_score": 65.26315789473684, "semantic_relevance": 3.0, "factual_accuracy": 3.0, "freshness": 5.0, "objectivity_tone": 4.0, "layout_ad_density": 3.0, "accountability": 2.0, "transparency": 3.0, "authority": 3.0, "avg_ge_freq": 0.3333, "relative_se_rank": 2.857142857142857, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "claude", "query_id": 41, "query_type": "HotpotQA", "num_sources": 1, "unweighted_mean_score": 3.25, "weighted_total_content_score": 65.26315789473684, "semantic_relevance": 2.0, "factual_accuracy": 4.0, "freshness": 2.0, "objectivity_tone": 4.0, "layout_ad_density": 2.0, "accountability": 4.0, "transparency": 4.0, "authority": 4.0, "avg_ge_freq": 0.6667, "relative_se_rank": 4.545454545454546, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "deepseek-chat-tavily", "query_id": 2, "query_type": "VACOS", "num_sources": 3, "unweighted_mean_score": 3.25, "weighted_total_content_score": 65.26315789473684, "semantic_relevance": 3.3333333333333335, "factual_accuracy": 3.6666666666666665, "freshness": 3.6666666666666665, "objectivity_tone": 3.0, "layout_ad_density": 2.0, "accountability": 3.6666666666666665, "transparency": 3.3333333333333335, "authority": 3.3333333333333335, "avg_ge_freq": 1.0, "relative_se_rank": 2.2222222222222223, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "exa", "query_id": 52, "query_type": "HotpotQA", "num_sources": 4, "unweighted_mean_score": 3.4375, "weighted_total_content_score": 65.26315789473684, "semantic_relevance": 1.25, "factual_accuracy": 2.0, "freshness": 4.5, "objectivity_tone": 3.75, "layout_ad_density": 4.0, "accountability": 3.75, "transparency": 4.25, "authority": 4.0, "avg_ge_freq": 1.0, "relative_se_rank": 0.5714285714285714, "normalized_reciprocal_se_rank": 0.47544893378226716, "reciprocal_se_rank": 0.12395496224379719, "percentage_ge_sources_not_in_se_sources": 25.0, "percentage_ge_sources_in_se_sources": 75.0 }, { "model_name": "tavily", "query_id": 20, "query_type": "VACOS", "num_sources": 4, "unweighted_mean_score": 3.15625, "weighted_total_content_score": 65.26315789473684, "semantic_relevance": 3.75, "factual_accuracy": 3.75, "freshness": 5.0, "objectivity_tone": 4.0, "layout_ad_density": 1.25, "accountability": 2.25, "transparency": 2.5, "authority": 2.75, "avg_ge_freq": 1.0, "relative_se_rank": 2.272727272727273, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "Gemini-2.5-Flash-Preview", "query_id": 45, "query_type": "HotpotQA", "num_sources": 5, "unweighted_mean_score": 3.225, "weighted_total_content_score": 65.05263157894737, "semantic_relevance": 2.0, "factual_accuracy": 3.8, "freshness": 4.6, "objectivity_tone": 4.4, "layout_ad_density": 3.0, "accountability": 2.2, "transparency": 2.8, "authority": 3.0, "avg_ge_freq": null, "relative_se_rank": null, "normalized_reciprocal_se_rank": null, "reciprocal_se_rank": null, "percentage_ge_sources_not_in_se_sources": null, "percentage_ge_sources_in_se_sources": null }, { "model_name": "Gemini-3-Pro-Preview", "query_id": 61, "query_type": "Pinocchios", "num_sources": 5, "unweighted_mean_score": 3.325, "weighted_total_content_score": 65.05263157894737, "semantic_relevance": 3.2, "factual_accuracy": 2.8, "freshness": 3.8, "objectivity_tone": 2.6, "layout_ad_density": 3.2, "accountability": 4.2, "transparency": 3.4, "authority": 3.4, "avg_ge_freq": 0.66666, "relative_se_rank": 1.4266666666666667, "normalized_reciprocal_se_rank": 0.1881359044995409, "reciprocal_se_rank": 0.0549161518093557, "percentage_ge_sources_not_in_se_sources": 60.0, "percentage_ge_sources_in_se_sources": 40.0 }, { "model_name": "exa", "query_id": 7, "query_type": "VACOS", "num_sources": 5, "unweighted_mean_score": 3.275, "weighted_total_content_score": 65.05263157894737, "semantic_relevance": 2.0, "factual_accuracy": 3.4, "freshness": 4.4, "objectivity_tone": 4.0, "layout_ad_density": 2.6, "accountability": 2.4, "transparency": 4.2, "authority": 3.2, "avg_ge_freq": 1.0, "relative_se_rank": 2.857142857142857, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "Gemini-3-Flash-Preview", "query_id": 49, "query_type": "HotpotQA", "num_sources": 5, "unweighted_mean_score": 3.15, "weighted_total_content_score": 64.84210526315789, "semantic_relevance": 3.8, "factual_accuracy": 3.4, "freshness": 3.0, "objectivity_tone": 4.0, "layout_ad_density": 3.4, "accountability": 2.8, "transparency": 2.6, "authority": 2.2, "avg_ge_freq": 0.66666, "relative_se_rank": 2.1739130434782608, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "exa", "query_id": 90, "query_type": "QuoraQuestions", "num_sources": 5, "unweighted_mean_score": 3.3, "weighted_total_content_score": 64.84210526315789, "semantic_relevance": 3.6, "factual_accuracy": 2.8, "freshness": 5.0, "objectivity_tone": 2.4, "layout_ad_density": 2.6, "accountability": 3.0, "transparency": 4.0, "authority": 3.0, "avg_ge_freq": 1.0, "relative_se_rank": 0.5866666666666667, "normalized_reciprocal_se_rank": 0.30691153963881235, "reciprocal_se_rank": 0.08345689908796705, "percentage_ge_sources_not_in_se_sources": 20.0, "percentage_ge_sources_in_se_sources": 80.0 }, { "model_name": "Gemini-3-Pro-Preview", "query_id": 48, "query_type": "HotpotQA", "num_sources": 2, "unweighted_mean_score": 3.3125, "weighted_total_content_score": 64.73684210526315, "semantic_relevance": 1.5, "factual_accuracy": 3.0, "freshness": 5.0, "objectivity_tone": 4.0, "layout_ad_density": 2.5, "accountability": 4.0, "transparency": 3.5, "authority": 3.0, "avg_ge_freq": 0.3333, "relative_se_rank": 2.272727272727273, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "exa", "query_id": 46, "query_type": "HotpotQA", "num_sources": 5, "unweighted_mean_score": 3.225, "weighted_total_content_score": 64.63157894736841, "semantic_relevance": 3.4, "factual_accuracy": 3.0, "freshness": 4.2, "objectivity_tone": 3.4, "layout_ad_density": 2.6, "accountability": 3.0, "transparency": 3.0, "authority": 3.2, "avg_ge_freq": 1.0, "relative_se_rank": 0.5466666666666666, "normalized_reciprocal_se_rank": 0.3722783389450056, "reciprocal_se_rank": 0.09916396979503775, "percentage_ge_sources_not_in_se_sources": 20.0, "percentage_ge_sources_in_se_sources": 80.0 }, { "model_name": "Perplexity-Sonar-Pro", "query_id": 40, "query_type": "DebateQA", "num_sources": 4, "unweighted_mean_score": 3.40625, "weighted_total_content_score": 64.47368421052632, "semantic_relevance": 2.5, "factual_accuracy": 2.5, "freshness": 5.0, "objectivity_tone": 1.75, "layout_ad_density": 3.25, "accountability": 4.25, "transparency": 4.0, "authority": 4.0, "avg_ge_freq": 1.0, "relative_se_rank": 1.4, "normalized_reciprocal_se_rank": 0.15713957759412306, "reciprocal_se_rank": 0.0474680052956752, "percentage_ge_sources_not_in_se_sources": 50.0, "percentage_ge_sources_in_se_sources": 50.0 }, { "model_name": "exa", "query_id": 85, "query_type": "QuoraQuestions", "num_sources": 4, "unweighted_mean_score": 3.3125, "weighted_total_content_score": 64.47368421052632, "semantic_relevance": 3.5, "factual_accuracy": 2.75, "freshness": 5.0, "objectivity_tone": 2.0, "layout_ad_density": 2.75, "accountability": 4.0, "transparency": 3.75, "authority": 2.75, "avg_ge_freq": 1.0, "relative_se_rank": 1.1861702127659575, "normalized_reciprocal_se_rank": 0.12769855416914241, "reciprocal_se_rank": 0.04039358461831334, "percentage_ge_sources_not_in_se_sources": 50.0, "percentage_ge_sources_in_se_sources": 50.0 }, { "model_name": "Gemini-2.5-Flash-Preview", "query_id": 61, "query_type": "Pinocchios", "num_sources": 5, "unweighted_mean_score": 3.3, "weighted_total_content_score": 64.42105263157895, "semantic_relevance": 3.2, "factual_accuracy": 2.6, "freshness": 3.2, "objectivity_tone": 2.6, "layout_ad_density": 3.6, "accountability": 4.0, "transparency": 3.6, "authority": 3.6, "avg_ge_freq": null, "relative_se_rank": null, "normalized_reciprocal_se_rank": null, "reciprocal_se_rank": null, "percentage_ge_sources_not_in_se_sources": null, "percentage_ge_sources_in_se_sources": null }, { "model_name": "deepseek-reasoning-gensee", "query_id": 82, "query_type": "QuoraQuestions", "num_sources": 5, "unweighted_mean_score": 4.0, "weighted_total_content_score": 64.42105263157893, "semantic_relevance": 4.25, "factual_accuracy": 4.5, "freshness": 4.75, "objectivity_tone": 3.75, "layout_ad_density": 3.25, "accountability": 4.25, "transparency": 3.75, "authority": 3.5, "avg_ge_freq": 1.0, "relative_se_rank": 1.8545454545454547, "normalized_reciprocal_se_rank": 0.06758494031221304, "reciprocal_se_rank": 0.025948808473080322, "percentage_ge_sources_not_in_se_sources": 80.0, "percentage_ge_sources_in_se_sources": 20.0 }, { "model_name": "Perplexity-Sonar-Pro", "query_id": 49, "query_type": "HotpotQA", "num_sources": 4, "unweighted_mean_score": 3.21875, "weighted_total_content_score": 64.21052631578948, "semantic_relevance": 2.5, "factual_accuracy": 3.0, "freshness": 2.75, "objectivity_tone": 4.0, "layout_ad_density": 3.5, "accountability": 3.0, "transparency": 3.5, "authority": 3.5, "avg_ge_freq": 0.916675, "relative_se_rank": 1.6467391304347825, "normalized_reciprocal_se_rank": 0.16329966329966328, "reciprocal_se_rank": 0.048948220064724914, "percentage_ge_sources_not_in_se_sources": 75.0, "percentage_ge_sources_in_se_sources": 25.0 }, { "model_name": "Gemini-2.5-Flash-Preview", "query_id": 42, "query_type": "HotpotQA", "num_sources": 3, "unweighted_mean_score": 3.25, "weighted_total_content_score": 64.21052631578947, "semantic_relevance": 1.3333333333333333, "factual_accuracy": 3.0, "freshness": 3.0, "objectivity_tone": 4.666666666666667, "layout_ad_density": 3.0, "accountability": 3.6666666666666665, "transparency": 3.6666666666666665, "authority": 3.6666666666666665, "avg_ge_freq": null, "relative_se_rank": null, "normalized_reciprocal_se_rank": null, "reciprocal_se_rank": null, "percentage_ge_sources_not_in_se_sources": null, "percentage_ge_sources_in_se_sources": null }, { "model_name": "exa", "query_id": 91, "query_type": "QuoraQuestions", "num_sources": 5, "unweighted_mean_score": 3.3, "weighted_total_content_score": 64.21052631578947, "semantic_relevance": 4.0, "factual_accuracy": 2.0, "freshness": 5.0, "objectivity_tone": 2.2, "layout_ad_density": 2.2, "accountability": 3.6, "transparency": 3.8, "authority": 3.6, "avg_ge_freq": 1.0, "relative_se_rank": 2.7777777777777777, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "google-search", "query_id": 44, "query_type": "HotpotQA", "num_sources": 4, "unweighted_mean_score": 3.25, "weighted_total_content_score": 64.21052631578947, "semantic_relevance": 2.0, "factual_accuracy": 3.5, "freshness": 3.75, "objectivity_tone": 3.5, "layout_ad_density": 3.0, "accountability": 3.5, "transparency": 3.25, "authority": 3.5, "avg_ge_freq": null, "relative_se_rank": null, "normalized_reciprocal_se_rank": null, "reciprocal_se_rank": null, "percentage_ge_sources_not_in_se_sources": null, "percentage_ge_sources_in_se_sources": null }, { "model_name": "tavily", "query_id": 44, "query_type": "HotpotQA", "num_sources": 4, "unweighted_mean_score": 3.125, "weighted_total_content_score": 63.94736842105263, "semantic_relevance": 3.25, "factual_accuracy": 3.25, "freshness": 3.5, "objectivity_tone": 4.25, "layout_ad_density": 2.5, "accountability": 2.75, "transparency": 2.5, "authority": 3.0, "avg_ge_freq": 1.0, "relative_se_rank": 0.36046511627906974, "normalized_reciprocal_se_rank": 0.2987863451631567, "reciprocal_se_rank": 0.08150448585231193, "percentage_ge_sources_not_in_se_sources": 0.0, "percentage_ge_sources_in_se_sources": 100.0 }, { "model_name": "tavily", "query_id": 52, "query_type": "HotpotQA", "num_sources": 4, "unweighted_mean_score": 3.59375, "weighted_total_content_score": 63.94736842105262, "semantic_relevance": 1.5, "factual_accuracy": 2.75, "freshness": 5.0, "objectivity_tone": 4.5, "layout_ad_density": 3.75, "accountability": 4.0, "transparency": 4.333333333333333, "authority": 4.666666666666667, "avg_ge_freq": 1.0, "relative_se_rank": 1.5612244897959184, "normalized_reciprocal_se_rank": 0.10549943883277216, "reciprocal_se_rank": 0.035059331175836025, "percentage_ge_sources_not_in_se_sources": 75.0, "percentage_ge_sources_in_se_sources": 25.0 }, { "model_name": "Gemini-3-Flash-Preview", "query_id": 43, "query_type": "HotpotQA", "num_sources": 5, "unweighted_mean_score": 3.225, "weighted_total_content_score": 63.78947368421052, "semantic_relevance": 2.4, "factual_accuracy": 3.4, "freshness": 4.4, "objectivity_tone": 3.2, "layout_ad_density": 3.2, "accountability": 3.0, "transparency": 3.2, "authority": 3.0, "avg_ge_freq": 0.39997999999999995, "relative_se_rank": 1.7791666666666668, "normalized_reciprocal_se_rank": 0.019663299663299664, "reciprocal_se_rank": 0.0144336569579288, "percentage_ge_sources_not_in_se_sources": 80.0, "percentage_ge_sources_in_se_sources": 20.0 }, { "model_name": "Gemini-3-Pro-Preview", "query_id": 91, "query_type": "QuoraQuestions", "num_sources": 5, "unweighted_mean_score": 3.25, "weighted_total_content_score": 63.78947368421052, "semantic_relevance": 3.8, "factual_accuracy": 2.4, "freshness": 5.0, "objectivity_tone": 2.4, "layout_ad_density": 2.6, "accountability": 3.4, "transparency": 3.6, "authority": 2.8, "avg_ge_freq": 0.86668, "relative_se_rank": 2.7777777777777777, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "exa", "query_id": 73, "query_type": "Pinocchios", "num_sources": 5, "unweighted_mean_score": 3.25, "weighted_total_content_score": 63.78947368421052, "semantic_relevance": 3.2, "factual_accuracy": 2.8, "freshness": 2.8, "objectivity_tone": 2.6, "layout_ad_density": 3.2, "accountability": 4.2, "transparency": 3.8, "authority": 3.4, "avg_ge_freq": 1.0, "relative_se_rank": 0.8875, "normalized_reciprocal_se_rank": 0.41432789432789435, "reciprocal_se_rank": 0.10926811053024646, "percentage_ge_sources_not_in_se_sources": 40.0, "percentage_ge_sources_in_se_sources": 60.0 }, { "model_name": "Gemini-2.5-Flash-Preview", "query_id": 60, "query_type": "HotpotQA", "num_sources": 4, "unweighted_mean_score": 3.21875, "weighted_total_content_score": 63.68421052631578, "semantic_relevance": 1.75, "factual_accuracy": 3.5, "freshness": 4.0, "objectivity_tone": 3.75, "layout_ad_density": 4.5, "accountability": 2.75, "transparency": 2.5, "authority": 3.0, "avg_ge_freq": null, "relative_se_rank": null, "normalized_reciprocal_se_rank": null, "reciprocal_se_rank": null, "percentage_ge_sources_not_in_se_sources": null, "percentage_ge_sources_in_se_sources": null }, { "model_name": "tavily", "query_id": 54, "query_type": "HotpotQA", "num_sources": 4, "unweighted_mean_score": 3.21875, "weighted_total_content_score": 63.68421052631578, "semantic_relevance": 2.25, "factual_accuracy": 3.25, "freshness": 4.0, "objectivity_tone": 3.5, "layout_ad_density": 3.0, "accountability": 3.0, "transparency": 3.25, "authority": 3.5, "avg_ge_freq": 1.0, "relative_se_rank": 1.3724489795918366, "normalized_reciprocal_se_rank": 0.04354822737175678, "reciprocal_se_rank": 0.020172996383019226, "percentage_ge_sources_not_in_se_sources": 50.0, "percentage_ge_sources_in_se_sources": 50.0 }, { "model_name": "deepseek-chat-gensee", "query_id": 91, "query_type": "QuoraQuestions", "num_sources": 5, "unweighted_mean_score": 3.25, "weighted_total_content_score": 63.578947368421055, "semantic_relevance": 3.4, "factual_accuracy": 3.0, "freshness": 5.0, "objectivity_tone": 2.0, "layout_ad_density": 2.6, "accountability": 3.2, "transparency": 3.6, "authority": 3.2, "avg_ge_freq": 1.0, "relative_se_rank": 2.7777777777777777, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "Perplexity-Sonar-Pro", "query_id": 44, "query_type": "HotpotQA", "num_sources": 3, "unweighted_mean_score": 3.1666666666666665, "weighted_total_content_score": 63.508771929824555, "semantic_relevance": 3.6666666666666665, "factual_accuracy": 3.0, "freshness": 4.0, "objectivity_tone": 3.0, "layout_ad_density": 2.3333333333333335, "accountability": 3.0, "transparency": 3.0, "authority": 3.3333333333333335, "avg_ge_freq": 0.6666666666666666, "relative_se_rank": 0.9767441860465116, "normalized_reciprocal_se_rank": 0.1875111826802648, "reciprocal_se_rank": 0.054766036614917996, "percentage_ge_sources_not_in_se_sources": 33.333333333333336, "percentage_ge_sources_in_se_sources": 66.66666666666666 }, { "model_name": "Gemini-2.5-Flash-Preview", "query_id": 47, "query_type": "HotpotQA", "num_sources": 5, "unweighted_mean_score": 3.275, "weighted_total_content_score": 63.368421052631575, "semantic_relevance": 1.2, "factual_accuracy": 2.8, "freshness": 4.4, "objectivity_tone": 3.8, "layout_ad_density": 2.4, "accountability": 3.6, "transparency": 4.2, "authority": 3.8, "avg_ge_freq": null, "relative_se_rank": null, "normalized_reciprocal_se_rank": null, "reciprocal_se_rank": null, "percentage_ge_sources_not_in_se_sources": null, "percentage_ge_sources_in_se_sources": null }, { "model_name": "Gemini-3-Flash-Preview", "query_id": 78, "query_type": "Pinocchios", "num_sources": 5, "unweighted_mean_score": 4.03125, "weighted_total_content_score": 63.368421052631575, "semantic_relevance": 3.0, "factual_accuracy": 3.75, "freshness": 4.25, "objectivity_tone": 4.0, "layout_ad_density": 3.25, "accountability": 5.0, "transparency": 4.75, "authority": 4.25, "avg_ge_freq": 0.39997999999999995, "relative_se_rank": 1.7106382978723402, "normalized_reciprocal_se_rank": 0.1583838383838384, "reciprocal_se_rank": 0.047766990291262135, "percentage_ge_sources_not_in_se_sources": 80.0, "percentage_ge_sources_in_se_sources": 20.0 }, { "model_name": "Gemini-3-Flash-Preview", "query_id": 63, "query_type": "Pinocchios", "num_sources": 4, "unweighted_mean_score": 3.1875, "weighted_total_content_score": 63.15789473684211, "semantic_relevance": 2.5, "factual_accuracy": 3.0, "freshness": 2.25, "objectivity_tone": 3.5, "layout_ad_density": 3.0, "accountability": 4.25, "transparency": 3.5, "authority": 3.5, "avg_ge_freq": 0.75, "relative_se_rank": 4.761904761904762, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "Gemini-3-Pro-Preview", "query_id": 81, "query_type": "QuoraQuestions", "num_sources": 5, "unweighted_mean_score": 3.675, "weighted_total_content_score": 63.1578947368421, "semantic_relevance": 1.8, "factual_accuracy": 3.2, "freshness": 5.0, "objectivity_tone": 4.2, "layout_ad_density": 4.0, "accountability": 3.5, "transparency": 3.25, "authority": 3.5, "avg_ge_freq": 0.53332, "relative_se_rank": 1.95609756097561, "normalized_reciprocal_se_rank": 0.2, "reciprocal_se_rank": 0.05776699029126213, "percentage_ge_sources_not_in_se_sources": 80.0, "percentage_ge_sources_in_se_sources": 20.0 }, { "model_name": "google-search", "query_id": 87, "query_type": "QuoraQuestions", "num_sources": 2, "unweighted_mean_score": 3.1875, "weighted_total_content_score": 63.1578947368421, "semantic_relevance": 2.0, "factual_accuracy": 3.5, "freshness": 3.5, "objectivity_tone": 3.5, "layout_ad_density": 3.0, "accountability": 3.5, "transparency": 3.5, "authority": 3.0, "avg_ge_freq": null, "relative_se_rank": null, "normalized_reciprocal_se_rank": null, "reciprocal_se_rank": null, "percentage_ge_sources_not_in_se_sources": null, "percentage_ge_sources_in_se_sources": null }, { "model_name": "gpt-4o", "query_id": 91, "query_type": "QuoraQuestions", "num_sources": 3, "unweighted_mean_score": 3.2083333333333335, "weighted_total_content_score": 63.1578947368421, "semantic_relevance": 4.0, "factual_accuracy": 2.6666666666666665, "freshness": 5.0, "objectivity_tone": 2.0, "layout_ad_density": 2.3333333333333335, "accountability": 3.0, "transparency": 3.6666666666666665, "authority": 3.0, "avg_ge_freq": 0.5555666666666667, "relative_se_rank": 2.7777777777777772, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "Perplexity-Sonar-Pro", "query_id": 91, "query_type": "QuoraQuestions", "num_sources": 2, "unweighted_mean_score": 3.25, "weighted_total_content_score": 63.157894736842096, "semantic_relevance": 3.0, "factual_accuracy": 3.0, "freshness": 5.0, "objectivity_tone": 2.0, "layout_ad_density": 3.0, "accountability": 3.5, "transparency": 3.5, "authority": 3.0, "avg_ge_freq": 0.83335, "relative_se_rank": 1.8333333333333333, "normalized_reciprocal_se_rank": 0.03924963924963925, "reciprocal_se_rank": 0.01914008321775312, "percentage_ge_sources_not_in_se_sources": 50.0, "percentage_ge_sources_in_se_sources": 50.0 }, { "model_name": "google-search", "query_id": 88, "query_type": "QuoraQuestions", "num_sources": 4, "unweighted_mean_score": 3.21875, "weighted_total_content_score": 62.89473684210526, "semantic_relevance": 1.75, "factual_accuracy": 3.75, "freshness": 3.75, "objectivity_tone": 2.75, "layout_ad_density": 3.0, "accountability": 3.5, "transparency": 3.5, "authority": 3.75, "avg_ge_freq": null, "relative_se_rank": null, "normalized_reciprocal_se_rank": null, "reciprocal_se_rank": null, "percentage_ge_sources_not_in_se_sources": null, "percentage_ge_sources_in_se_sources": null }, { "model_name": "Perplexity-Sonar-Pro", "query_id": 3, "query_type": "VACOS", "num_sources": 2, "unweighted_mean_score": 4.025, "weighted_total_content_score": 62.63157894736842, "semantic_relevance": 4.0, "factual_accuracy": 5.0, "freshness": 5.0, "objectivity_tone": 4.0, "layout_ad_density": 3.5, "accountability": 4.0, "transparency": 3.5, "authority": 4.0, "avg_ge_freq": 1.0, "relative_se_rank": 1.1931818181818183, "normalized_reciprocal_se_rank": 0.23989898989898992, "reciprocal_se_rank": 0.06735436893203883, "percentage_ge_sources_not_in_se_sources": 50.0, "percentage_ge_sources_in_se_sources": 50.0 }, { "model_name": "claude", "query_id": 74, "query_type": "Pinocchios", "num_sources": 2, "unweighted_mean_score": 3.125, "weighted_total_content_score": 62.63157894736841, "semantic_relevance": 3.5, "factual_accuracy": 3.0, "freshness": 2.5, "objectivity_tone": 3.0, "layout_ad_density": 3.0, "accountability": 3.5, "transparency": 3.5, "authority": 3.0, "avg_ge_freq": 0.3333, "relative_se_rank": 2.127659574468085, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "deepseek-chat-tavily", "query_id": 91, "query_type": "QuoraQuestions", "num_sources": 5, "unweighted_mean_score": 3.2, "weighted_total_content_score": 62.52631578947368, "semantic_relevance": 3.4, "factual_accuracy": 2.8, "freshness": 5.0, "objectivity_tone": 2.0, "layout_ad_density": 3.0, "accountability": 3.0, "transparency": 3.2, "authority": 3.2, "avg_ge_freq": 1.0, "relative_se_rank": 2.255555555555555, "normalized_reciprocal_se_rank": 0.08439955106621773, "reciprocal_se_rank": 0.029989212513484353, "percentage_ge_sources_not_in_se_sources": 80.0, "percentage_ge_sources_in_se_sources": 20.0 }, { "model_name": "tavily", "query_id": 23, "query_type": "DebateQA", "num_sources": 3, "unweighted_mean_score": 3.2916666666666665, "weighted_total_content_score": 62.456140350877185, "semantic_relevance": 3.0, "factual_accuracy": 2.3333333333333335, "freshness": 5.0, "objectivity_tone": 1.3333333333333333, "layout_ad_density": 3.3333333333333335, "accountability": 4.666666666666667, "transparency": 3.3333333333333335, "authority": 3.3333333333333335, "avg_ge_freq": 1.0, "relative_se_rank": 1.8916666666666666, "normalized_reciprocal_se_rank": 0.03277216610549944, "reciprocal_se_rank": 0.017583603020496223, "percentage_ge_sources_not_in_se_sources": 66.66666666666667, "percentage_ge_sources_in_se_sources": 33.33333333333333 }, { "model_name": "Gemini-3-Pro-Preview", "query_id": 47, "query_type": "HotpotQA", "num_sources": 5, "unweighted_mean_score": 3.25, "weighted_total_content_score": 62.315789473684205, "semantic_relevance": 1.2, "factual_accuracy": 2.4, "freshness": 5.0, "objectivity_tone": 3.6, "layout_ad_density": 3.4, "accountability": 3.8, "transparency": 3.4, "authority": 3.2, "avg_ge_freq": 0.3333, "relative_se_rank": 2.127659574468085, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "Perplexity-Sonar-Pro", "query_id": 60, "query_type": "HotpotQA", "num_sources": 5, "unweighted_mean_score": 3.175, "weighted_total_content_score": 62.315789473684205, "semantic_relevance": 1.8, "factual_accuracy": 3.4, "freshness": 4.0, "objectivity_tone": 3.2, "layout_ad_density": 3.4, "accountability": 3.0, "transparency": 3.2, "authority": 3.4, "avg_ge_freq": 0.5333399999999999, "relative_se_rank": 2.0833333333333335, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "google-search", "query_id": 91, "query_type": "QuoraQuestions", "num_sources": 4, "unweighted_mean_score": 3.15625, "weighted_total_content_score": 62.10526315789474, "semantic_relevance": 3.25, "factual_accuracy": 3.25, "freshness": 4.25, "objectivity_tone": 2.0, "layout_ad_density": 3.5, "accountability": 3.0, "transparency": 3.25, "authority": 2.75, "avg_ge_freq": null, "relative_se_rank": null, "normalized_reciprocal_se_rank": null, "reciprocal_se_rank": null, "percentage_ge_sources_not_in_se_sources": null, "percentage_ge_sources_in_se_sources": null }, { "model_name": "claude", "query_id": 89, "query_type": "QuoraQuestions", "num_sources": 1, "unweighted_mean_score": 3.125, "weighted_total_content_score": 62.10526315789473, "semantic_relevance": 4.0, "factual_accuracy": 3.0, "freshness": 5.0, "objectivity_tone": 2.0, "layout_ad_density": 2.0, "accountability": 4.0, "transparency": 3.0, "authority": 2.0, "avg_ge_freq": 0.3333, "relative_se_rank": 2.4390243902439024, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "exa", "query_id": 59, "query_type": "HotpotQA", "num_sources": 4, "unweighted_mean_score": 3.1875, "weighted_total_content_score": 62.10526315789473, "semantic_relevance": 1.25, "factual_accuracy": 2.5, "freshness": 3.75, "objectivity_tone": 4.25, "layout_ad_density": 3.25, "accountability": 3.0, "transparency": 4.0, "authority": 3.5, "avg_ge_freq": 1.0, "relative_se_rank": 1.2034883720930234, "normalized_reciprocal_se_rank": 0.31792929292929295, "reciprocal_se_rank": 0.08610436893203884, "percentage_ge_sources_not_in_se_sources": 50.0, "percentage_ge_sources_in_se_sources": 50.0 }, { "model_name": "grok-4.1-fast-non-reasoning", "query_id": 65, "query_type": "Pinocchios", "num_sources": 2, "unweighted_mean_score": 3.1875, "weighted_total_content_score": 62.105263157894726, "semantic_relevance": 3.0, "factual_accuracy": 2.5, "freshness": 3.5, "objectivity_tone": 2.5, "layout_ad_density": 4.0, "accountability": 2.5, "transparency": 3.5, "authority": 4.0, "avg_ge_freq": 0.83335, "relative_se_rank": 2.4390243902439024, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "exa", "query_id": 54, "query_type": "HotpotQA", "num_sources": 4, "unweighted_mean_score": 3.125, "weighted_total_content_score": 61.84210526315789, "semantic_relevance": 2.25, "factual_accuracy": 3.0, "freshness": 3.25, "objectivity_tone": 3.5, "layout_ad_density": 2.5, "accountability": 3.25, "transparency": 3.5, "authority": 3.75, "avg_ge_freq": 1.0, "relative_se_rank": 1.0969387755102042, "normalized_reciprocal_se_rank": 0.17842056932966027, "reciprocal_se_rank": 0.05258164165931156, "percentage_ge_sources_not_in_se_sources": 50.0, "percentage_ge_sources_in_se_sources": 50.0 }, { "model_name": "Perplexity-Sonar-Pro", "query_id": 52, "query_type": "HotpotQA", "num_sources": 3, "unweighted_mean_score": 3.125, "weighted_total_content_score": 61.754385964912274, "semantic_relevance": 2.3333333333333335, "factual_accuracy": 3.0, "freshness": 4.0, "objectivity_tone": 3.3333333333333335, "layout_ad_density": 3.0, "accountability": 2.6666666666666665, "transparency": 3.0, "authority": 3.6666666666666665, "avg_ge_freq": 1.0, "relative_se_rank": 1.0816326530612246, "normalized_reciprocal_se_rank": 0.05861269238878347, "reciprocal_se_rank": 0.02379285569536302, "percentage_ge_sources_not_in_se_sources": 33.333333333333336, "percentage_ge_sources_in_se_sources": 66.66666666666666 }, { "model_name": "Perplexity-Sonar-Pro", "query_id": 69, "query_type": "Pinocchios", "num_sources": 5, "unweighted_mean_score": 3.225, "weighted_total_content_score": 61.684210526315795, "semantic_relevance": 1.0, "factual_accuracy": 3.4, "freshness": 4.2, "objectivity_tone": 2.6, "layout_ad_density": 3.4, "accountability": 3.2, "transparency": 4.4, "authority": 3.6, "avg_ge_freq": 0.5333399999999999, "relative_se_rank": 6.666666666666667, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "tavily", "query_id": 59, "query_type": "HotpotQA", "num_sources": 5, "unweighted_mean_score": 3.175, "weighted_total_content_score": 61.684210526315795, "semantic_relevance": 1.4, "factual_accuracy": 2.8, "freshness": 4.2, "objectivity_tone": 3.6, "layout_ad_density": 3.0, "accountability": 2.6, "transparency": 4.2, "authority": 3.6, "avg_ge_freq": 1.0, "relative_se_rank": 0.586046511627907, "normalized_reciprocal_se_rank": 0.3732992260264988, "reciprocal_se_rank": 0.099409280040348, "percentage_ge_sources_not_in_se_sources": 20.0, "percentage_ge_sources_in_se_sources": 80.0 }, { "model_name": "Gemini-3-Flash-Preview", "query_id": 45, "query_type": "HotpotQA", "num_sources": 5, "unweighted_mean_score": 3.05, "weighted_total_content_score": 61.473684210526315, "semantic_relevance": 2.0, "factual_accuracy": 3.8, "freshness": 4.4, "objectivity_tone": 3.8, "layout_ad_density": 2.8, "accountability": 2.2, "transparency": 3.0, "authority": 2.4, "avg_ge_freq": 1.0, "relative_se_rank": 2.0408163265306123, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "grok-4.1-fast-non-reasoning", "query_id": 47, "query_type": "HotpotQA", "num_sources": 5, "unweighted_mean_score": 3.125, "weighted_total_content_score": 61.473684210526315, "semantic_relevance": 1.8, "factual_accuracy": 2.8, "freshness": 3.4, "objectivity_tone": 3.8, "layout_ad_density": 3.4, "accountability": 2.8, "transparency": 3.8, "authority": 3.2, "avg_ge_freq": 0.53332, "relative_se_rank": 2.127659574468085, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "claude", "query_id": 85, "query_type": "QuoraQuestions", "num_sources": 4, "unweighted_mean_score": 3.125, "weighted_total_content_score": 61.315789473684205, "semantic_relevance": 3.25, "factual_accuracy": 2.5, "freshness": 5.0, "objectivity_tone": 2.5, "layout_ad_density": 2.5, "accountability": 3.5, "transparency": 3.0, "authority": 2.75, "avg_ge_freq": 0.49999999999999994, "relative_se_rank": 1.6329787234042552, "normalized_reciprocal_se_rank": 0.09393939393939395, "reciprocal_se_rank": 0.03228155339805825, "percentage_ge_sources_not_in_se_sources": 75.0, "percentage_ge_sources_in_se_sources": 25.0 }, { "model_name": "gensee", "query_id": 97, "query_type": "QuoraQuestions", "num_sources": 4, "unweighted_mean_score": 4.0, "weighted_total_content_score": 61.315789473684205, "semantic_relevance": 5.0, "factual_accuracy": 4.666666666666667, "freshness": 4.333333333333333, "objectivity_tone": 4.0, "layout_ad_density": 3.0, "accountability": 3.3333333333333335, "transparency": 3.6666666666666665, "authority": 4.0, "avg_ge_freq": 0.5833499999999999, "relative_se_rank": 0.89375, "normalized_reciprocal_se_rank": 0.361217437533227, "reciprocal_se_rank": 0.09650613183444046, "percentage_ge_sources_not_in_se_sources": 25.0, "percentage_ge_sources_in_se_sources": 75.0 }, { "model_name": "tavily", "query_id": 83, "query_type": "QuoraQuestions", "num_sources": 5, "unweighted_mean_score": 3.125, "weighted_total_content_score": 61.26315789473684, "semantic_relevance": 2.2, "factual_accuracy": 3.4, "freshness": 4.6, "objectivity_tone": 2.6, "layout_ad_density": 2.2, "accountability": 3.2, "transparency": 3.2, "authority": 3.6, "avg_ge_freq": 1.0, "relative_se_rank": 0.9555555555555557, "normalized_reciprocal_se_rank": 0.382010582010582, "reciprocal_se_rank": 0.10150254276467867, "percentage_ge_sources_not_in_se_sources": 40.0, "percentage_ge_sources_in_se_sources": 60.0 }, { "model_name": "deepseek-reasoning-tavily", "query_id": 81, "query_type": "QuoraQuestions", "num_sources": 3, "unweighted_mean_score": 4.625, "weighted_total_content_score": 61.05263157894736, "semantic_relevance": 4.5, "factual_accuracy": 5.0, "freshness": 5.0, "objectivity_tone": 3.5, "layout_ad_density": 5.0, "accountability": 4.0, "transparency": 5.0, "authority": 5.0, "avg_ge_freq": 1.0, "relative_se_rank": 1.6341463414634145, "normalized_reciprocal_se_rank": 0.3333333333333333, "reciprocal_se_rank": 0.08980582524271845, "percentage_ge_sources_not_in_se_sources": 66.66666666666667, "percentage_ge_sources_in_se_sources": 33.33333333333333 }, { "model_name": "gpt-4o", "query_id": 47, "query_type": "HotpotQA", "num_sources": 1, "unweighted_mean_score": 3.25, "weighted_total_content_score": 61.05263157894736, "semantic_relevance": 1.0, "factual_accuracy": 1.0, "freshness": 5.0, "objectivity_tone": 4.0, "layout_ad_density": 3.0, "accountability": 5.0, "transparency": 4.0, "authority": 3.0, "avg_ge_freq": 0.3333, "relative_se_rank": 2.127659574468085, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "grok-4.1-fast-non-reasoning", "query_id": 87, "query_type": "QuoraQuestions", "num_sources": 2, "unweighted_mean_score": 3.0, "weighted_total_content_score": 61.05263157894736, "semantic_relevance": 4.0, "factual_accuracy": 3.0, "freshness": 2.0, "objectivity_tone": 3.0, "layout_ad_density": 3.5, "accountability": 3.5, "transparency": 2.5, "authority": 2.5, "avg_ge_freq": 0.5, "relative_se_rank": 2.4390243902439024, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "deepseek-chat-gensee", "query_id": 12, "query_type": "VACOS", "num_sources": 4, "unweighted_mean_score": 4.0, "weighted_total_content_score": 60.78947368421053, "semantic_relevance": 5.0, "factual_accuracy": 4.333333333333333, "freshness": 4.333333333333333, "objectivity_tone": 3.6666666666666665, "layout_ad_density": 3.3333333333333335, "accountability": 2.6666666666666665, "transparency": 4.333333333333333, "authority": 4.333333333333333, "avg_ge_freq": 1.0, "relative_se_rank": 2.5, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "Gemini-3-Flash-Preview", "query_id": 42, "query_type": "HotpotQA", "num_sources": 4, "unweighted_mean_score": 3.09375, "weighted_total_content_score": 60.526315789473685, "semantic_relevance": 1.75, "factual_accuracy": 3.0, "freshness": 3.5, "objectivity_tone": 3.25, "layout_ad_density": 2.75, "accountability": 3.25, "transparency": 3.75, "authority": 3.5, "avg_ge_freq": 0.75, "relative_se_rank": 1.5918367346938775, "normalized_reciprocal_se_rank": 0.05925925925925926, "reciprocal_se_rank": 0.023948220064724916, "percentage_ge_sources_not_in_se_sources": 75.0, "percentage_ge_sources_in_se_sources": 25.0 }, { "model_name": "exa", "query_id": 43, "query_type": "HotpotQA", "num_sources": 4, "unweighted_mean_score": 3.0, "weighted_total_content_score": 60.526315789473685, "semantic_relevance": 2.25, "factual_accuracy": 3.25, "freshness": 3.5, "objectivity_tone": 4.0, "layout_ad_density": 2.5, "accountability": 3.0, "transparency": 2.75, "authority": 2.75, "avg_ge_freq": 1.0, "relative_se_rank": 1.1458333333333335, "normalized_reciprocal_se_rank": 0.14886363636363636, "reciprocal_se_rank": 0.045479368932038834, "percentage_ge_sources_not_in_se_sources": 50.0, "percentage_ge_sources_in_se_sources": 50.0 }, { "model_name": "Gemini-3-Flash-Preview", "query_id": 41, "query_type": "HotpotQA", "num_sources": 5, "unweighted_mean_score": 3.3600000000000003, "weighted_total_content_score": 60.421052631578945, "semantic_relevance": 2.25, "factual_accuracy": 3.75, "freshness": 4.4, "objectivity_tone": 4.25, "layout_ad_density": 3.6, "accountability": 2.4, "transparency": 3.0, "authority": 3.0, "avg_ge_freq": 0.73334, "relative_se_rank": 4.545454545454546, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "Gemini-3-Flash-Preview", "query_id": 85, "query_type": "QuoraQuestions", "num_sources": 5, "unweighted_mean_score": 3.4200000000000004, "weighted_total_content_score": 60.421052631578945, "semantic_relevance": 2.0, "factual_accuracy": 3.25, "freshness": 5.0, "objectivity_tone": 3.5, "layout_ad_density": 2.8, "accountability": 3.6, "transparency": 3.6, "authority": 3.2, "avg_ge_freq": 0.3333, "relative_se_rank": 1.7148936170212763, "normalized_reciprocal_se_rank": 0.1306397306397306, "reciprocal_se_rank": 0.04110032362459547, "percentage_ge_sources_not_in_se_sources": 80.0, "percentage_ge_sources_in_se_sources": 20.0 }, { "model_name": "Gemini-2.5-Flash-Preview", "query_id": 66, "query_type": "Pinocchios", "num_sources": 4, "unweighted_mean_score": 3.1875, "weighted_total_content_score": 60.263157894736835, "semantic_relevance": 2.0, "factual_accuracy": 2.0, "freshness": 3.0, "objectivity_tone": 2.25, "layout_ad_density": 4.0, "accountability": 4.25, "transparency": 4.5, "authority": 3.5, "avg_ge_freq": null, "relative_se_rank": null, "normalized_reciprocal_se_rank": null, "reciprocal_se_rank": null, "percentage_ge_sources_not_in_se_sources": null, "percentage_ge_sources_in_se_sources": null }, { "model_name": "Gemini-3-Flash-Preview", "query_id": 90, "query_type": "QuoraQuestions", "num_sources": 5, "unweighted_mean_score": 3.075, "weighted_total_content_score": 60.21052631578948, "semantic_relevance": 3.2, "factual_accuracy": 2.6, "freshness": 5.0, "objectivity_tone": 2.2, "layout_ad_density": 1.8, "accountability": 2.8, "transparency": 3.8, "authority": 3.2, "avg_ge_freq": 0.5333399999999999, "relative_se_rank": 0.37333333333333335, "normalized_reciprocal_se_rank": 0.23152989876697544, "reciprocal_se_rank": 0.06534334946099651, "percentage_ge_sources_not_in_se_sources": 0.0, "percentage_ge_sources_in_se_sources": 100.0 }, { "model_name": "claude", "query_id": 91, "query_type": "QuoraQuestions", "num_sources": 3, "unweighted_mean_score": 3.0416666666666665, "weighted_total_content_score": 60.0, "semantic_relevance": 3.6666666666666665, "factual_accuracy": 2.6666666666666665, "freshness": 5.0, "objectivity_tone": 2.0, "layout_ad_density": 2.0, "accountability": 3.0, "transparency": 3.0, "authority": 3.0, "avg_ge_freq": 0.7777666666666666, "relative_se_rank": 1.9074074074074074, "normalized_reciprocal_se_rank": 0.14066591844369622, "reciprocal_se_rank": 0.04350952894642215, "percentage_ge_sources_not_in_se_sources": 66.66666666666667, "percentage_ge_sources_in_se_sources": 33.33333333333333 }, { "model_name": "Gemini-3-Pro-Preview", "query_id": 45, "query_type": "HotpotQA", "num_sources": 5, "unweighted_mean_score": 3.2950000000000004, "weighted_total_content_score": 59.78947368421052, "semantic_relevance": 2.0, "factual_accuracy": 4.0, "freshness": 5.0, "objectivity_tone": 4.5, "layout_ad_density": 3.0, "accountability": 2.2, "transparency": 3.2, "authority": 2.4, "avg_ge_freq": 1.0, "relative_se_rank": 2.0408163265306123, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "claude", "query_id": 7, "query_type": "VACOS", "num_sources": 3, "unweighted_mean_score": 2.9583333333333335, "weighted_total_content_score": 59.29824561403509, "semantic_relevance": 3.6666666666666665, "factual_accuracy": 3.0, "freshness": 3.6666666666666665, "objectivity_tone": 2.3333333333333335, "layout_ad_density": 2.0, "accountability": 3.6666666666666665, "transparency": 3.3333333333333335, "authority": 2.0, "avg_ge_freq": 0.6667, "relative_se_rank": 2.857142857142857, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "google-search", "query_id": 46, "query_type": "HotpotQA", "num_sources": 2, "unweighted_mean_score": 3.125, "weighted_total_content_score": 58.94736842105263, "semantic_relevance": 1.0, "factual_accuracy": 1.0, "freshness": 3.5, "objectivity_tone": 4.0, "layout_ad_density": 4.0, "accountability": 3.5, "transparency": 4.0, "authority": 4.0, "avg_ge_freq": null, "relative_se_rank": null, "normalized_reciprocal_se_rank": null, "reciprocal_se_rank": null, "percentage_ge_sources_not_in_se_sources": null, "percentage_ge_sources_in_se_sources": null }, { "model_name": "grok-4.1-fast-non-reasoning", "query_id": 45, "query_type": "HotpotQA", "num_sources": 1, "unweighted_mean_score": 2.875, "weighted_total_content_score": 58.94736842105263, "semantic_relevance": 2.0, "factual_accuracy": 4.0, "freshness": 2.0, "objectivity_tone": 4.0, "layout_ad_density": 3.0, "accountability": 3.0, "transparency": 2.0, "authority": 3.0, "avg_ge_freq": 0.3333, "relative_se_rank": 2.0408163265306123, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "deepseek-chat-tavily", "query_id": 51, "query_type": "HotpotQA", "num_sources": 3, "unweighted_mean_score": 4.375, "weighted_total_content_score": 58.59649122807017, "semantic_relevance": 4.5, "factual_accuracy": 5.0, "freshness": 5.0, "objectivity_tone": 4.0, "layout_ad_density": 4.5, "accountability": 4.0, "transparency": 4.0, "authority": 4.0, "avg_ge_freq": 1.0, "relative_se_rank": 2.127659574468085, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "Gemini-3-Flash-Preview", "query_id": 47, "query_type": "HotpotQA", "num_sources": 2, "unweighted_mean_score": 2.9375, "weighted_total_content_score": 58.421052631578945, "semantic_relevance": 1.5, "factual_accuracy": 3.0, "freshness": 2.0, "objectivity_tone": 4.0, "layout_ad_density": 3.0, "accountability": 3.0, "transparency": 3.5, "authority": 3.5, "avg_ge_freq": 0.3333, "relative_se_rank": 2.127659574468085, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "deepseek-reasoning-tavily", "query_id": 12, "query_type": "VACOS", "num_sources": 4, "unweighted_mean_score": 3.8333333333333335, "weighted_total_content_score": 58.421052631578945, "semantic_relevance": 4.333333333333333, "factual_accuracy": 4.666666666666667, "freshness": 5.0, "objectivity_tone": 3.6666666666666665, "layout_ad_density": 1.6666666666666667, "accountability": 2.0, "transparency": 4.333333333333333, "authority": 5.0, "avg_ge_freq": 1.0, "relative_se_rank": 2.5, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "Gemini-2.5-Flash-Preview", "query_id": 90, "query_type": "QuoraQuestions", "num_sources": 5, "unweighted_mean_score": 2.925, "weighted_total_content_score": 57.684210526315795, "semantic_relevance": 3.4, "factual_accuracy": 2.6, "freshness": 4.4, "objectivity_tone": 2.0, "layout_ad_density": 2.0, "accountability": 2.6, "transparency": 4.0, "authority": 2.4, "avg_ge_freq": null, "relative_se_rank": null, "normalized_reciprocal_se_rank": null, "reciprocal_se_rank": null, "percentage_ge_sources_not_in_se_sources": null, "percentage_ge_sources_in_se_sources": null }, { "model_name": "gensee", "query_id": 91, "query_type": "QuoraQuestions", "num_sources": 4, "unweighted_mean_score": 2.90625, "weighted_total_content_score": 57.631578947368425, "semantic_relevance": 3.5, "factual_accuracy": 3.0, "freshness": 4.25, "objectivity_tone": 1.75, "layout_ad_density": 2.0, "accountability": 2.75, "transparency": 3.25, "authority": 2.75, "avg_ge_freq": 0.49999999999999994, "relative_se_rank": 2.7777777777777777, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "Perplexity-Sonar-Pro", "query_id": 45, "query_type": "HotpotQA", "num_sources": 4, "unweighted_mean_score": 3.0, "weighted_total_content_score": 57.63157894736842, "semantic_relevance": 1.5, "factual_accuracy": 2.25, "freshness": 4.25, "objectivity_tone": 3.0, "layout_ad_density": 3.75, "accountability": 3.0, "transparency": 3.0, "authority": 3.25, "avg_ge_freq": 0.833325, "relative_se_rank": 1.112244897959184, "normalized_reciprocal_se_rank": 0.15441124532033623, "reciprocal_se_rank": 0.04681241089008079, "percentage_ge_sources_not_in_se_sources": 50.0, "percentage_ge_sources_in_se_sources": 50.0 }, { "model_name": "Gemini-3-Pro-Preview", "query_id": 56, "query_type": "HotpotQA", "num_sources": 4, "unweighted_mean_score": 3.1875, "weighted_total_content_score": 57.368421052631575, "semantic_relevance": 1.25, "factual_accuracy": 2.5, "freshness": 4.0, "objectivity_tone": 4.75, "layout_ad_density": 3.25, "accountability": 3.6666666666666665, "transparency": 3.6666666666666665, "authority": 3.6666666666666665, "avg_ge_freq": 0.5833499999999999, "relative_se_rank": 2.127659574468085, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "Gemini-3-Pro-Preview", "query_id": 23, "query_type": "DebateQA", "num_sources": 5, "unweighted_mean_score": 3.65625, "weighted_total_content_score": 56.8421052631579, "semantic_relevance": 3.25, "factual_accuracy": 3.25, "freshness": 4.75, "objectivity_tone": 2.5, "layout_ad_density": 3.75, "accountability": 3.75, "transparency": 3.75, "authority": 4.25, "avg_ge_freq": 0.3333, "relative_se_rank": 2.5, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "gpt-5", "query_id": 7, "query_type": "VACOS", "num_sources": 1, "unweighted_mean_score": 2.875, "weighted_total_content_score": 56.84210526315789, "semantic_relevance": 2.0, "factual_accuracy": 2.0, "freshness": 5.0, "objectivity_tone": 4.0, "layout_ad_density": 2.0, "accountability": 2.0, "transparency": 4.0, "authority": 2.0, "avg_ge_freq": 0.6667, "relative_se_rank": 2.857142857142857, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "deepseek-reasoning-gensee", "query_id": 91, "query_type": "QuoraQuestions", "num_sources": 3, "unweighted_mean_score": 2.9166666666666665, "weighted_total_content_score": 56.49122807017543, "semantic_relevance": 2.6666666666666665, "factual_accuracy": 2.6666666666666665, "freshness": 5.0, "objectivity_tone": 1.6666666666666667, "layout_ad_density": 2.3333333333333335, "accountability": 3.0, "transparency": 3.0, "authority": 3.0, "avg_ge_freq": 1.0, "relative_se_rank": 2.7777777777777772, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "tavily", "query_id": 69, "query_type": "Pinocchios", "num_sources": 5, "unweighted_mean_score": 3.6071428571428568, "weighted_total_content_score": 56.421052631578945, "semantic_relevance": 2.5, "factual_accuracy": 3.75, "freshness": 2.75, "objectivity_tone": 4.25, "layout_ad_density": 3.25, "accountability": 3.5, "transparency": 3.75, "authority": 4.5, "avg_ge_freq": 1.0, "relative_se_rank": 5.346666666666667, "normalized_reciprocal_se_rank": 0.2, "reciprocal_se_rank": 0.05776699029126213, "percentage_ge_sources_not_in_se_sources": 80.0, "percentage_ge_sources_in_se_sources": 20.0 }, { "model_name": "exa", "query_id": 41, "query_type": "HotpotQA", "num_sources": 5, "unweighted_mean_score": 2.8, "weighted_total_content_score": 56.0, "semantic_relevance": 1.8, "factual_accuracy": 2.8, "freshness": 3.6, "objectivity_tone": 3.8, "layout_ad_density": 2.8, "accountability": 2.2, "transparency": 2.6, "authority": 2.8, "avg_ge_freq": 1.0, "relative_se_rank": 4.545454545454546, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "claude", "query_id": 65, "query_type": "Pinocchios", "num_sources": 1, "unweighted_mean_score": 3.125, "weighted_total_content_score": 55.78947368421052, "semantic_relevance": 1.0, "factual_accuracy": 1.0, "freshness": 5.0, "objectivity_tone": 1.0, "layout_ad_density": 5.0, "accountability": 4.0, "transparency": 4.0, "authority": 4.0, "avg_ge_freq": 1.0, "relative_se_rank": 2.4390243902439024, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "Gemini-2.5-Flash-Preview", "query_id": 49, "query_type": "HotpotQA", "num_sources": 5, "unweighted_mean_score": 2.75, "weighted_total_content_score": 54.52631578947368, "semantic_relevance": 2.2, "factual_accuracy": 2.2, "freshness": 4.0, "objectivity_tone": 3.4, "layout_ad_density": 3.4, "accountability": 2.4, "transparency": 1.8, "authority": 2.6, "avg_ge_freq": null, "relative_se_rank": null, "normalized_reciprocal_se_rank": null, "reciprocal_se_rank": null, "percentage_ge_sources_not_in_se_sources": null, "percentage_ge_sources_in_se_sources": null }, { "model_name": "tavily", "query_id": 53, "query_type": "HotpotQA", "num_sources": 5, "unweighted_mean_score": 2.8, "weighted_total_content_score": 54.52631578947368, "semantic_relevance": 2.0, "factual_accuracy": 2.4, "freshness": 4.4, "objectivity_tone": 2.6, "layout_ad_density": 1.8, "accountability": 3.0, "transparency": 3.0, "authority": 3.2, "avg_ge_freq": 1.0, "relative_se_rank": 0.897872340425532, "normalized_reciprocal_se_rank": 0.36516594516594514, "reciprocal_se_rank": 0.09745492371705963, "percentage_ge_sources_not_in_se_sources": 40.0, "percentage_ge_sources_in_se_sources": 60.0 }, { "model_name": "Perplexity-Sonar-Pro", "query_id": 56, "query_type": "HotpotQA", "num_sources": 3, "unweighted_mean_score": 2.75, "weighted_total_content_score": 54.38596491228069, "semantic_relevance": 1.6666666666666667, "factual_accuracy": 3.0, "freshness": 3.6666666666666665, "objectivity_tone": 3.0, "layout_ad_density": 1.6666666666666667, "accountability": 2.0, "transparency": 3.3333333333333335, "authority": 3.6666666666666665, "avg_ge_freq": 1.0, "relative_se_rank": 2.127659574468085, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "exa", "query_id": 47, "query_type": "HotpotQA", "num_sources": 4, "unweighted_mean_score": 2.78125, "weighted_total_content_score": 53.94736842105263, "semantic_relevance": 1.5, "factual_accuracy": 2.0, "freshness": 4.25, "objectivity_tone": 3.25, "layout_ad_density": 2.25, "accountability": 2.25, "transparency": 3.5, "authority": 3.25, "avg_ge_freq": 1.0, "relative_se_rank": 2.127659574468085, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "Gemini-2.5-Flash-Preview", "query_id": 91, "query_type": "QuoraQuestions", "num_sources": 5, "unweighted_mean_score": 2.75, "weighted_total_content_score": 53.89473684210526, "semantic_relevance": 3.0, "factual_accuracy": 2.4, "freshness": 3.6, "objectivity_tone": 1.8, "layout_ad_density": 2.0, "accountability": 3.2, "transparency": 3.0, "authority": 3.0, "avg_ge_freq": null, "relative_se_rank": null, "normalized_reciprocal_se_rank": null, "reciprocal_se_rank": null, "percentage_ge_sources_not_in_se_sources": null, "percentage_ge_sources_in_se_sources": null }, { "model_name": "Gemini-3-Pro-Preview", "query_id": 42, "query_type": "HotpotQA", "num_sources": 2, "unweighted_mean_score": 2.75, "weighted_total_content_score": 53.68421052631579, "semantic_relevance": 2.0, "factual_accuracy": 2.5, "freshness": 5.0, "objectivity_tone": 2.5, "layout_ad_density": 2.5, "accountability": 2.0, "transparency": 2.5, "authority": 3.0, "avg_ge_freq": 0.66665, "relative_se_rank": 2.0408163265306123, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "gpt-4o", "query_id": 66, "query_type": "Pinocchios", "num_sources": 1, "unweighted_mean_score": 3.0, "weighted_total_content_score": 53.68421052631579, "semantic_relevance": 1.0, "factual_accuracy": 1.0, "freshness": 5.0, "objectivity_tone": 1.0, "layout_ad_density": 5.0, "accountability": 2.0, "transparency": 4.0, "authority": 5.0, "avg_ge_freq": 1.0, "relative_se_rank": 0.024390243902439025, "normalized_reciprocal_se_rank": 1.0, "reciprocal_se_rank": 0.25, "percentage_ge_sources_not_in_se_sources": 0.0, "percentage_ge_sources_in_se_sources": 100.0 }, { "model_name": "Gemini-3-Pro-Preview", "query_id": 90, "query_type": "QuoraQuestions", "num_sources": 5, "unweighted_mean_score": 2.995, "weighted_total_content_score": 53.68421052631578, "semantic_relevance": 3.0, "factual_accuracy": 2.75, "freshness": 5.0, "objectivity_tone": 2.0, "layout_ad_density": 1.8, "accountability": 2.6, "transparency": 4.0, "authority": 2.8, "avg_ge_freq": 0.80002, "relative_se_rank": 0.6088888888888889, "normalized_reciprocal_se_rank": 0.2841694253458959, "reciprocal_se_rank": 0.07799216774088276, "percentage_ge_sources_not_in_se_sources": 20.0, "percentage_ge_sources_in_se_sources": 80.0 }, { "model_name": "Gemini-3-Pro-Preview", "query_id": 44, "query_type": "HotpotQA", "num_sources": 3, "unweighted_mean_score": 2.7083333333333335, "weighted_total_content_score": 52.63157894736842, "semantic_relevance": 2.3333333333333335, "factual_accuracy": 2.0, "freshness": 5.0, "objectivity_tone": 2.3333333333333335, "layout_ad_density": 2.3333333333333335, "accountability": 2.6666666666666665, "transparency": 2.3333333333333335, "authority": 2.6666666666666665, "avg_ge_freq": 0.8889, "relative_se_rank": 1.798449612403101, "normalized_reciprocal_se_rank": 0.026166426166426168, "reciprocal_se_rank": 0.015996301433194637, "percentage_ge_sources_not_in_se_sources": 66.66666666666667, "percentage_ge_sources_in_se_sources": 33.33333333333333 }, { "model_name": "tavily", "query_id": 41, "query_type": "HotpotQA", "num_sources": 5, "unweighted_mean_score": 3.25, "weighted_total_content_score": 52.42105263157894, "semantic_relevance": 2.0, "factual_accuracy": 3.75, "freshness": 4.25, "objectivity_tone": 4.5, "layout_ad_density": 3.25, "accountability": 2.5, "transparency": 2.5, "authority": 3.25, "avg_ge_freq": 1.0, "relative_se_rank": 4.545454545454546, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "tavily", "query_id": 85, "query_type": "QuoraQuestions", "num_sources": 5, "unweighted_mean_score": 2.725, "weighted_total_content_score": 52.0, "semantic_relevance": 2.2, "factual_accuracy": 2.0, "freshness": 3.8, "objectivity_tone": 1.6, "layout_ad_density": 3.0, "accountability": 3.0, "transparency": 3.2, "authority": 3.0, "avg_ge_freq": 1.0, "relative_se_rank": 0.5148936170212765, "normalized_reciprocal_se_rank": 0.40632792572186516, "reciprocal_se_rank": 0.10734578797685594, "percentage_ge_sources_not_in_se_sources": 20.0, "percentage_ge_sources_in_se_sources": 80.0 }, { "model_name": "claude", "query_id": 47, "query_type": "HotpotQA", "num_sources": 2, "unweighted_mean_score": 2.5625, "weighted_total_content_score": 51.578947368421055, "semantic_relevance": 1.5, "factual_accuracy": 2.5, "freshness": 2.0, "objectivity_tone": 4.0, "layout_ad_density": 2.0, "accountability": 2.0, "transparency": 3.0, "authority": 3.5, "avg_ge_freq": 1.0, "relative_se_rank": 2.127659574468085, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "tavily", "query_id": 91, "query_type": "QuoraQuestions", "num_sources": 5, "unweighted_mean_score": 2.575, "weighted_total_content_score": 49.473684210526315, "semantic_relevance": 2.2, "factual_accuracy": 2.0, "freshness": 4.4, "objectivity_tone": 1.6, "layout_ad_density": 2.2, "accountability": 3.0, "transparency": 3.2, "authority": 2.0, "avg_ge_freq": 1.0, "relative_se_rank": 1.9444444444444442, "normalized_reciprocal_se_rank": 0.09402774792845714, "reciprocal_se_rank": 0.03230278408960499, "percentage_ge_sources_not_in_se_sources": 60.0, "percentage_ge_sources_in_se_sources": 40.0 }, { "model_name": "Gemini-3-Flash-Preview", "query_id": 83, "query_type": "QuoraQuestions", "num_sources": 2, "unweighted_mean_score": 2.5625, "weighted_total_content_score": 47.89473684210526, "semantic_relevance": 1.0, "factual_accuracy": 1.5, "freshness": 5.0, "objectivity_tone": 2.0, "layout_ad_density": 3.0, "accountability": 2.5, "transparency": 3.5, "authority": 2.0, "avg_ge_freq": 0.3333, "relative_se_rank": 2.2222222222222223, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "deepseek-reasoning-gensee", "query_id": 51, "query_type": "HotpotQA", "num_sources": 2, "unweighted_mean_score": 4.625, "weighted_total_content_score": 46.84210526315789, "semantic_relevance": 5.0, "factual_accuracy": 5.0, "freshness": 5.0, "objectivity_tone": 5.0, "layout_ad_density": 5.0, "accountability": 3.0, "transparency": 4.0, "authority": 5.0, "avg_ge_freq": 1.0, "relative_se_rank": 2.127659574468085, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "claude", "query_id": 22, "query_type": "DebateQA", "num_sources": 4, "unweighted_mean_score": 4.6875, "weighted_total_content_score": 46.57894736842105, "semantic_relevance": 4.0, "factual_accuracy": 4.5, "freshness": 5.0, "objectivity_tone": 5.0, "layout_ad_density": 4.0, "accountability": 5.0, "transparency": 5.0, "authority": 5.0, "avg_ge_freq": 0.916675, "relative_se_rank": 2.380952380952381, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "claude", "query_id": 29, "query_type": "DebateQA", "num_sources": 3, "unweighted_mean_score": 3.5625, "weighted_total_content_score": 45.96491228070175, "semantic_relevance": 2.5, "factual_accuracy": 3.0, "freshness": 5.0, "objectivity_tone": 3.0, "layout_ad_density": 4.0, "accountability": 3.5, "transparency": 3.5, "authority": 4.0, "avg_ge_freq": 1.0, "relative_se_rank": 2.2222222222222223, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "deepseek-reasoning-tavily", "query_id": 51, "query_type": "HotpotQA", "num_sources": 2, "unweighted_mean_score": 4.5, "weighted_total_content_score": 44.73684210526316, "semantic_relevance": 4.0, "factual_accuracy": 5.0, "freshness": 5.0, "objectivity_tone": 4.0, "layout_ad_density": 5.0, "accountability": 3.0, "transparency": 5.0, "authority": 5.0, "avg_ge_freq": 1.0, "relative_se_rank": 2.127659574468085, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "claude", "query_id": 94, "query_type": "QuoraQuestions", "num_sources": 3, "unweighted_mean_score": 3.375, "weighted_total_content_score": 44.21052631578947, "semantic_relevance": 3.5, "factual_accuracy": 3.0, "freshness": 5.0, "objectivity_tone": 2.5, "layout_ad_density": 2.0, "accountability": 4.5, "transparency": 3.5, "authority": 3.0, "avg_ge_freq": 0.3333, "relative_se_rank": 0.375, "normalized_reciprocal_se_rank": 0.46807625595504393, "reciprocal_se_rank": 0.1221833721833722, "percentage_ge_sources_not_in_se_sources": 0.0, "percentage_ge_sources_in_se_sources": 100.0 }, { "model_name": "google-search", "query_id": 7, "query_type": "VACOS", "num_sources": 2, "unweighted_mean_score": 2.25, "weighted_total_content_score": 43.68421052631578, "semantic_relevance": 1.0, "factual_accuracy": 2.0, "freshness": 3.0, "objectivity_tone": 2.5, "layout_ad_density": 3.0, "accountability": 2.5, "transparency": 2.0, "authority": 2.0, "avg_ge_freq": null, "relative_se_rank": null, "normalized_reciprocal_se_rank": null, "reciprocal_se_rank": null, "percentage_ge_sources_not_in_se_sources": null, "percentage_ge_sources_in_se_sources": null }, { "model_name": "deepseek-chat-tavily", "query_id": 12, "query_type": "VACOS", "num_sources": 4, "unweighted_mean_score": 4.25, "weighted_total_content_score": 43.1578947368421, "semantic_relevance": 4.5, "factual_accuracy": 5.0, "freshness": 5.0, "objectivity_tone": 4.5, "layout_ad_density": 2.0, "accountability": 3.0, "transparency": 5.0, "authority": 5.0, "avg_ge_freq": 1.0, "relative_se_rank": 2.5, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "deepseek-chat-tavily", "query_id": 81, "query_type": "QuoraQuestions", "num_sources": 4, "unweighted_mean_score": 4.3125, "weighted_total_content_score": 42.63157894736842, "semantic_relevance": 4.0, "factual_accuracy": 4.5, "freshness": 3.5, "objectivity_tone": 3.5, "layout_ad_density": 5.0, "accountability": 4.0, "transparency": 5.0, "authority": 5.0, "avg_ge_freq": 1.0, "relative_se_rank": 0.7134146341463414, "normalized_reciprocal_se_rank": 0.42542950876284213, "reciprocal_se_rank": 0.11193573147456642, "percentage_ge_sources_not_in_se_sources": 25.0, "percentage_ge_sources_in_se_sources": 75.0 }, { "model_name": "gpt-4o", "query_id": 53, "query_type": "HotpotQA", "num_sources": 1, "unweighted_mean_score": 2.0, "weighted_total_content_score": 41.05263157894736, "semantic_relevance": 1.0, "factual_accuracy": 1.0, "freshness": 1.0, "objectivity_tone": 5.0, "layout_ad_density": 1.0, "accountability": 2.0, "transparency": 2.0, "authority": 3.0, "avg_ge_freq": 0.3333, "relative_se_rank": 2.127659574468085, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "tavily", "query_id": 87, "query_type": "QuoraQuestions", "num_sources": 1, "unweighted_mean_score": 2.0, "weighted_total_content_score": 40.0, "semantic_relevance": 2.0, "factual_accuracy": 2.0, "freshness": 1.0, "objectivity_tone": 2.0, "layout_ad_density": 3.0, "accountability": 2.0, "transparency": 2.0, "authority": 2.0, "avg_ge_freq": 1.0, "relative_se_rank": 1.0731707317073171, "normalized_reciprocal_se_rank": 0.04814098431119708, "reciprocal_se_rank": 0.02127659574468085, "percentage_ge_sources_not_in_se_sources": 0.0, "percentage_ge_sources_in_se_sources": 100.0 }, { "model_name": "deepseek-chat-tavily", "query_id": 11, "query_type": "VACOS", "num_sources": 3, "unweighted_mean_score": 3.0, "weighted_total_content_score": 38.24561403508772, "semantic_relevance": 2.0, "factual_accuracy": 2.0, "freshness": 5.0, "objectivity_tone": 2.5, "layout_ad_density": 2.0, "accountability": 2.0, "transparency": 4.0, "authority": 4.5, "avg_ge_freq": 1.0, "relative_se_rank": 2.4390243902439024, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "claude", "query_id": 39, "query_type": "DebateQA", "num_sources": 3, "unweighted_mean_score": 2.9375, "weighted_total_content_score": 37.89473684210526, "semantic_relevance": 2.5, "factual_accuracy": 2.5, "freshness": 3.0, "objectivity_tone": 2.0, "layout_ad_density": 4.0, "accountability": 2.5, "transparency": 3.5, "authority": 3.5, "avg_ge_freq": 1.0, "relative_se_rank": 1.4305555555555556, "normalized_reciprocal_se_rank": 0.14066591844369622, "reciprocal_se_rank": 0.04350952894642215, "percentage_ge_sources_not_in_se_sources": 66.66666666666667, "percentage_ge_sources_in_se_sources": 33.33333333333333 }, { "model_name": "deepseek-reasoning-tavily", "query_id": 61, "query_type": "Pinocchios", "num_sources": 2, "unweighted_mean_score": 3.5, "weighted_total_content_score": 35.26315789473684, "semantic_relevance": 4.0, "factual_accuracy": 4.0, "freshness": 2.0, "objectivity_tone": 3.0, "layout_ad_density": 4.0, "accountability": 2.0, "transparency": 4.0, "authority": 5.0, "avg_ge_freq": 1.0, "relative_se_rank": 1.1666666666666667, "normalized_reciprocal_se_rank": 0.23989898989898992, "reciprocal_se_rank": 0.06735436893203883, "percentage_ge_sources_not_in_se_sources": 50.0, "percentage_ge_sources_in_se_sources": 50.0 }, { "model_name": "deepseek-reasoning-gensee", "query_id": 12, "query_type": "VACOS", "num_sources": 5, "unweighted_mean_score": 3.75, "weighted_total_content_score": 30.31578947368421, "semantic_relevance": 5.0, "factual_accuracy": 4.5, "freshness": 5.0, "objectivity_tone": 2.5, "layout_ad_density": 2.0, "accountability": 3.0, "transparency": 3.0, "authority": 5.0, "avg_ge_freq": 1.0, "relative_se_rank": 2.5, "normalized_reciprocal_se_rank": 0.0, "reciprocal_se_rank": 0.009708737864077669, "percentage_ge_sources_not_in_se_sources": 100.0, "percentage_ge_sources_in_se_sources": 0.0 }, { "model_name": "tavily", "query_id": 45, "query_type": "HotpotQA", "num_sources": 4, "unweighted_mean_score": 2.2916666666666665, "weighted_total_content_score": 27.105263157894736, "semantic_relevance": 1.5, "factual_accuracy": 1.5, "freshness": 4.0, "objectivity_tone": 2.5, "layout_ad_density": 2.5, "accountability": 2.6666666666666665, "transparency": 1.6666666666666667, "authority": 1.6666666666666667, "avg_ge_freq": 1.0, "relative_se_rank": 0.16326530612244897, "normalized_reciprocal_se_rank": 0.45578865578865585, "reciprocal_se_rank": 0.11923076923076922, "percentage_ge_sources_not_in_se_sources": 0.0, "percentage_ge_sources_in_se_sources": 100.0 } ] }