ResearchIT / scripts /expanded_eval_results.json
siddhm11
Phase 6.5: Pipeline telemetry, search UX fixes, latency profiling
ec67b2f
[
{
"band": "A",
"query": "attention is all you need",
"description": "Landmark transformer paper by Vaswani et al.",
"rewrite": "Transformer self-attention mechanisms Vaswani et al",
"latency_ms": 4127.867400005925,
"n_results": 10,
"results": [
{
"rank": 1,
"arxiv_id": "1706.03762",
"title": "Attention Is All You Need",
"category": "?"
},
{
"rank": 2,
"arxiv_id": "1912.11959",
"title": "Is Attention All What You Need? -- An Empirical Investigation on Convolution-Based Active Memory and Self-Attention",
"category": "?"
},
{
"rank": 3,
"arxiv_id": "2404.01183",
"title": "Positioning is All You Need",
"category": "?"
},
{
"rank": 4,
"arxiv_id": "2502.05383",
"title": "Is attention all you need to solve the correlated electron problem?",
"category": "?"
},
{
"rank": 5,
"arxiv_id": "2405.06478",
"title": "Attention is all they need: Cognitive science and the (techno)political economy of attention in humans and machines",
"category": "?"
},
{
"rank": 6,
"arxiv_id": "2304.04556",
"title": "Attention: Marginal Probability is All You Need?",
"category": "?"
},
{
"rank": 7,
"arxiv_id": "2501.05730",
"title": "Element-wise Attention Is All You Need",
"category": "?"
},
{
"rank": 8,
"arxiv_id": "1906.02792",
"title": "Attention is all you need for Videos: Self-attention based Video Summarization using Universal Transformers",
"category": "?"
},
{
"rank": 9,
"arxiv_id": "2408.02692",
"title": "Attention is all you need for an improved CNN-based flash flood susceptibility modeling. The case of the ungauged Rheraya watershed, Morocco",
"category": "?"
},
{
"rank": 10,
"arxiv_id": "2501.09166",
"title": "Attention is All You Need Until You Need Retention",
"category": "?"
}
],
"expected_id": "1706.03762",
"expected_found": true,
"expected_rank": 1,
"topic_diversity": 0
},
{
"band": "A",
"query": "BERT: Pre-training of Deep Bidirectional Transformers for Language Understanding",
"description": "Full BERT title \u2014 should be exact #1",
"rewrite": null,
"latency_ms": 1612.3626999906264,
"n_results": 10,
"results": [
{
"rank": 1,
"arxiv_id": "1810.04805",
"title": "BERT: Pre-training of Deep Bidirectional Transformers for Language Understanding",
"category": "?"
},
{
"rank": 2,
"arxiv_id": "2210.12440",
"title": "Spectrum-BERT: Pre-training of Deep Bidirectional Transformers for Spectral Classification of Chinese Liquors",
"category": "?"
},
{
"rank": 3,
"arxiv_id": "2106.08254",
"title": "BEiT: BERT Pre-Training of Image Transformers",
"category": "?"
},
{
"rank": 4,
"arxiv_id": "1909.04120",
"title": "Span Selection Pre-training for Question Answering",
"category": "?"
},
{
"rank": 5,
"arxiv_id": "2401.15861",
"title": "BPDec: Unveiling the Potential of Masked Language Modeling Decoder in BERT pretraining",
"category": "?"
},
{
"rank": 6,
"arxiv_id": "1909.10351",
"title": "TinyBERT: Distilling BERT for Natural Language Understanding",
"category": "?"
},
{
"rank": 7,
"arxiv_id": "2011.07208",
"title": "Utilizing Bidirectional Encoder Representations from Transformers for Answer Selection",
"category": "?"
},
{
"rank": 8,
"arxiv_id": "1905.05583",
"title": "How to Fine-Tune BERT for Text Classification?",
"category": "?"
},
{
"rank": 9,
"arxiv_id": "1906.08237",
"title": "XLNet: Generalized Autoregressive Pretraining for Language Understanding",
"category": "?"
},
{
"rank": 10,
"arxiv_id": "2112.07571",
"title": "Epigenomic language models powered by Cerebras",
"category": "?"
}
],
"expected_id": "1810.04805",
"expected_found": true,
"expected_rank": 1,
"topic_diversity": 0
},
{
"band": "A",
"query": "Deep Residual Learning for Image Recognition",
"description": "ResNet \u2014 the most-cited CV paper",
"rewrite": "Deep Residual Learning Image Recognition ConvNets ResNet",
"latency_ms": 2775.4920000443235,
"n_results": 10,
"results": [
{
"rank": 1,
"arxiv_id": "1512.03385",
"title": "Deep Residual Learning for Image Recognition",
"category": "?"
},
{
"rank": 2,
"arxiv_id": "1604.01335",
"title": "Deep Cross Residual Learning for Multitask Visual Recognition",
"category": "?"
},
{
"rank": 3,
"arxiv_id": "1805.00325",
"title": "Study of Residual Networks for Image Recognition",
"category": "?"
},
{
"rank": 4,
"arxiv_id": "2505.01632",
"title": "Transfer Learning-Based Deep Residual Learning for Speech Recognition in Clean and Noisy Environments",
"category": "?"
},
{
"rank": 5,
"arxiv_id": "2004.04989",
"title": "Improved Residual Networks for Image and Video Recognition",
"category": "?"
},
{
"rank": 6,
"arxiv_id": "1604.08671",
"title": "Deep Edge Guided Recurrent Residual Learning for Image Super-Resolution",
"category": "?"
},
{
"rank": 7,
"arxiv_id": "1707.08866",
"title": "Deep Residual Learning for Weakly-Supervised Relation Extraction",
"category": "?"
},
{
"rank": 8,
"arxiv_id": "1706.08807",
"title": "Recurrent Residual Learning for Action Recognition",
"category": "?"
},
{
"rank": 9,
"arxiv_id": "2211.12320",
"title": "A Cross-Residual Learning for Image Recognition",
"category": "?"
},
{
"rank": 10,
"arxiv_id": "2107.05318",
"title": "R3L: Connecting Deep Reinforcement Learning to Recurrent Neural Networks for Image Denoising via Residual Recovery",
"category": "?"
}
],
"expected_id": "1512.03385",
"expected_found": true,
"expected_rank": 1,
"topic_diversity": 0
},
{
"band": "F",
"query": "how do transformers work in NLP",
"description": "Newcomer asking about transformer basics",
"rewrite": "Transformer architecture self-attention mechanisms NLP deep learning sequence modeling",
"latency_ms": 2065.812500019092,
"n_results": 10,
"results": [
{
"rank": 1,
"arxiv_id": "2406.16893",
"title": "A Survey on Transformers in NLP with Focus on Efficiency",
"category": "?"
},
{
"rank": 2,
"arxiv_id": "1910.06764",
"title": "Stabilizing Transformers for Reinforcement Learning",
"category": "?"
},
{
"rank": 3,
"arxiv_id": "2204.05673",
"title": "What do Toothbrushes do in the Kitchen? How Transformers Think our World is Structured",
"category": "?"
},
{
"rank": 4,
"arxiv_id": "2410.13732",
"title": "Reducing the Transformer Architecture to a Minimum",
"category": "?"
},
{
"rank": 5,
"arxiv_id": "2306.07303",
"title": "A Comprehensive Survey on Applications of Transformers for Deep Learning Tasks",
"category": "?"
},
{
"rank": 6,
"arxiv_id": "2202.07856",
"title": "The NLP Task Effectiveness of Long-Range Transformers",
"category": "?"
},
{
"rank": 7,
"arxiv_id": "2310.10930",
"title": "Enhanced Transformer Architecture for Natural Language Processing",
"category": "?"
},
{
"rank": 8,
"arxiv_id": "1907.00570",
"title": "Do Transformer Attention Heads Provide Transparency in Abstractive Summarization?",
"category": "?"
},
{
"rank": 9,
"arxiv_id": "1906.06755",
"title": "Theoretical Limitations of Self-Attention in Neural Sequence Models",
"category": "?"
},
{
"rank": 10,
"arxiv_id": "2105.08050",
"title": "Pay Attention to MLPs",
"category": "?"
}
],
"expected_id": null,
"expected_found": null,
"expected_rank": null,
"topic_diversity": 0
},
{
"band": "F",
"query": "what is reinforcement learning from human feedback",
"description": "Beginner asking about RLHF \u2014 should surface Ouyang/InstructGPT/Christiano",
"rewrite": null,
"latency_ms": 1188.2260999991558,
"n_results": 10,
"results": [
{
"rank": 1,
"arxiv_id": "2312.14925",
"title": "A Survey of Reinforcement Learning from Human Feedback",
"category": "?"
},
{
"rank": 2,
"arxiv_id": "2504.12501",
"title": "Reinforcement Learning from Human Feedback",
"category": "?"
},
{
"rank": 3,
"arxiv_id": "2504.14732",
"title": "Reinforcement Learning from Multi-level and Episodic Human Feedback",
"category": "?"
},
{
"rank": 4,
"arxiv_id": "2502.05434",
"title": "Sample-Efficient Reinforcement Learning from Human Feedback via Information-Directed Sampling",
"category": "?"
},
{
"rank": 5,
"arxiv_id": "2211.11602",
"title": "Improving Multimodal Interactive Agents with Reinforcement Learning from Human Feedback",
"category": "?"
},
{
"rank": 6,
"arxiv_id": "2410.02504",
"title": "Dual Active Learning for Reinforcement Learning from Human Feedback",
"category": "?"
},
{
"rank": 7,
"arxiv_id": "2303.02891",
"title": "Perspectives on the Social Impacts of Reinforcement Learning with Human Feedback",
"category": "?"
},
{
"rank": 8,
"arxiv_id": "2310.13639",
"title": "Contrastive Preference Learning: Learning from Human Feedback without RL",
"category": "?"
},
{
"rank": 9,
"arxiv_id": "1707.07402",
"title": "Reinforcement Learning for Bandit Neural Machine Translation with Simulated Human Feedback",
"category": "?"
},
{
"rank": 10,
"arxiv_id": "1805.10627",
"title": "Reliability and Learnability of Human Bandit Feedback for Sequence-to-Sequence Reinforcement Learning",
"category": "?"
}
],
"expected_id": null,
"expected_found": null,
"expected_rank": null,
"topic_diversity": 0
},
{
"band": "F",
"query": "explain how neural networks learn",
"description": "Very basic \u2014 should return foundational/survey papers",
"rewrite": "deep learning neural networks backpropagation optimization stochastic gradient descent",
"latency_ms": 2098.3753000036813,
"n_results": 10,
"results": [
{
"rank": 1,
"arxiv_id": "2204.07291",
"title": "The training response law explains how deep neural networks learn",
"category": "?"
},
{
"rank": 2,
"arxiv_id": "1901.07538",
"title": "Unsupervised Learning of Neural Networks to Explain Neural Networks (extended abstract)",
"category": "?"
},
{
"rank": 3,
"arxiv_id": "1805.07468",
"title": "Unsupervised Learning of Neural Networks to Explain Neural Networks",
"category": "?"
},
{
"rank": 4,
"arxiv_id": "2310.09838",
"title": "Explaining How a Neural Network Play the Go Game and Let People Learn",
"category": "?"
},
{
"rank": 5,
"arxiv_id": "2307.02129",
"title": "How Deep Neural Networks Learn Compositional Data: The Random Hierarchy Model",
"category": "?"
},
{
"rank": 6,
"arxiv_id": "2303.14937",
"title": "LEURN: Learning Explainable Univariate Rules with Neural Networks",
"category": "?"
},
{
"rank": 7,
"arxiv_id": "1902.08160",
"title": "Topology of Learning in Artificial Neural Networks",
"category": "?"
},
{
"rank": 8,
"arxiv_id": "1502.04042",
"title": "Abstract Learning via Demodulation in a Deep Neural Network",
"category": "?"
},
{
"rank": 9,
"arxiv_id": "1911.05640",
"title": "Neural Network Processing Neural Networks: An efficient way to learn higher order functions",
"category": "?"
},
{
"rank": 10,
"arxiv_id": "2206.01717",
"title": "A Theoretical Analysis on Feature Learning in Neural Networks: Emergence from Inputs and Advantage over Fixed Features",
"category": "?"
}
],
"expected_id": null,
"expected_found": null,
"expected_rank": null,
"topic_diversity": 0
},
{
"band": "F",
"query": "what are diffusion models and how do they generate images",
"description": "Beginner asking about DDPM/Stable Diffusion family",
"rewrite": null,
"latency_ms": 1032.1455000084825,
"n_results": 10,
"results": [
{
"rank": 1,
"arxiv_id": "2302.09378",
"title": "Modelos Generativos basados en Mecanismos de Difusi\\'on",
"category": "?"
},
{
"rank": 2,
"arxiv_id": "2302.01394",
"title": "Understanding and contextualising diffusion models",
"category": "?"
},
{
"rank": 3,
"arxiv_id": "2305.19066",
"title": "Nested Diffusion Processes for Anytime Image Generation",
"category": "?"
},
{
"rank": 4,
"arxiv_id": "2209.10948",
"title": "Implementing and Experimenting with Diffusion Models for Text-to-Image Generation",
"category": "?"
},
{
"rank": 5,
"arxiv_id": "2411.07449",
"title": "Tracing the Roots: Leveraging Temporal Dynamics in Diffusion Trajectories for Origin Attribution",
"category": "?"
},
{
"rank": 6,
"arxiv_id": "2303.05275",
"title": "Detecting Images Generated by Diffusers",
"category": "?"
},
{
"rank": 7,
"arxiv_id": "2212.00235",
"title": "VIDM: Video Implicit Diffusion Models",
"category": "?"
},
{
"rank": 8,
"arxiv_id": "2301.13188",
"title": "Extracting Training Data from Diffusion Models",
"category": "?"
},
{
"rank": 9,
"arxiv_id": "2306.08103",
"title": "Generating Images with 3D Annotations Using Diffusion Models",
"category": "?"
},
{
"rank": 10,
"arxiv_id": "2306.05182",
"title": "Interactive Fashion Content Generation Using LLMs and Latent Diffusion Models",
"category": "?"
}
],
"expected_id": null,
"expected_found": null,
"expected_rank": null,
"topic_diversity": 0
},
{
"band": "F",
"query": "how does GPT-4 work",
"description": "Newcomer asking about GPT-4 \u2014 should surface the technical report",
"rewrite": "GPT-4 architecture transformer large language model multimodal processing",
"latency_ms": 2233.0756000010297,
"n_results": 10,
"results": [
{
"rank": 1,
"arxiv_id": "2305.03195",
"title": "Gpt-4: A Review on Advancements and Opportunities in Natural Language Processing",
"category": "?"
},
{
"rank": 2,
"arxiv_id": "2303.08774",
"title": "GPT-4 Technical Report",
"category": "?"
},
{
"rank": 3,
"arxiv_id": "2304.10592",
"title": "MiniGPT-4: Enhancing Vision-Language Understanding with Advanced Large Language Models",
"category": "?"
},
{
"rank": 4,
"arxiv_id": "2411.17976",
"title": "The importance of visual modelling languages in generative software engineering",
"category": "?"
},
{
"rank": 5,
"arxiv_id": "2503.23601",
"title": "Exploring GPT-4 for Robotic Agent Strategy with Real-Time State Feedback and a Reactive Behaviour Framework",
"category": "?"
},
{
"rank": 6,
"arxiv_id": "2409.15981",
"title": "GPT-4 as a Homework Tutor can Improve Student Engagement and Learning Outcomes",
"category": "?"
},
{
"rank": 7,
"arxiv_id": "2402.17396",
"title": "Benchmarking GPT-4 on Algorithmic Problems: A Systematic Evaluation of Prompting Strategies",
"category": "?"
},
{
"rank": 8,
"arxiv_id": "2310.11458",
"title": "GPT-4 as an interface between researchers and computational software: improving usability and reproducibility",
"category": "?"
},
{
"rank": 9,
"arxiv_id": "2305.04160",
"title": "X-LLM: Bootstrapping Advanced Large Language Models by Treating Multi-Modalities as Foreign Languages",
"category": "?"
},
{
"rank": 10,
"arxiv_id": "2311.15732",
"title": "GPT4Vis: What Can GPT-4 Do for Zero-shot Visual Recognition?",
"category": "?"
}
],
"expected_id": null,
"expected_found": null,
"expected_rank": null,
"topic_diversity": 0
},
{
"band": "G",
"query": "contrastive learning for self-supervised visual representations",
"description": "Should return SimCLR, MoCo, BYOL, DINO etc.",
"rewrite": "Contrastive learning self-supervised visual representation learning CNNs",
"latency_ms": 1839.8786999750882,
"n_results": 10,
"results": [
{
"rank": 1,
"arxiv_id": "2202.03968",
"title": "Self-supervised Contrastive Learning for Cross-domain Hyperspectral Image Representation",
"category": "?"
},
{
"rank": 2,
"arxiv_id": "2301.12541",
"title": "Supervised and Contrastive Self-Supervised In-Domain Representation Learning for Dense Prediction Problems in Remote Sensing",
"category": "?"
},
{
"rank": 3,
"arxiv_id": "2210.03163",
"title": "Brief Introduction to Contrastive Learning Pretext Tasks for Visual Representation",
"category": "?"
},
{
"rank": 4,
"arxiv_id": "2204.13386",
"title": "Self-supervised Contrastive Learning for Audio-Visual Action Recognition",
"category": "?"
},
{
"rank": 5,
"arxiv_id": "2208.04278",
"title": "Self-Supervised Contrastive Representation Learning for 3D Mesh Segmentation",
"category": "?"
},
{
"rank": 6,
"arxiv_id": "2401.12024",
"title": "Multimodal Visual-Tactile Representation Learning through Self-Supervised Contrastive Pre-Training",
"category": "?"
},
{
"rank": 7,
"arxiv_id": "2112.05760",
"title": "Learning Representations with Contrastive Self-Supervised Learning for Histopathology Applications",
"category": "?"
},
{
"rank": 8,
"arxiv_id": "2103.13517",
"title": "A Broad Study on the Transferability of Visual Representations with Contrastive Learning",
"category": "?"
},
{
"rank": 9,
"arxiv_id": "2106.15788",
"title": "Exploring Localization for Self-supervised Fine-grained Contrastive Learning",
"category": "?"
},
{
"rank": 10,
"arxiv_id": "2105.05682",
"title": "Multi-Scale Contrastive Siamese Networks for Self-Supervised Graph Representation Learning",
"category": "?"
}
],
"expected_id": null,
"expected_found": null,
"expected_rank": null,
"topic_diversity": 0
},
{
"band": "G",
"query": "knowledge distillation from large language models to smaller ones",
"description": "Distillation pipeline \u2014 DistilBERT, TinyBERT, knowledge distillation surveys",
"rewrite": null,
"latency_ms": 1139.147799985949,
"n_results": 10,
"results": [
{
"rank": 1,
"arxiv_id": "2306.08543",
"title": "MiniLLM: Knowledge Distillation of Large Language Models",
"category": "?"
},
{
"rank": 2,
"arxiv_id": "2504.13825",
"title": "Feature Alignment and Representation Transfer in Knowledge Distillation for Large Language Models",
"category": "?"
},
{
"rank": 3,
"arxiv_id": "2504.14366",
"title": "Empirical Evaluation of Knowledge Distillation from Transformers to Subquadratic Language Models",
"category": "?"
},
{
"rank": 4,
"arxiv_id": "2412.19449",
"title": "Feature Alignment-Based Knowledge Distillation for Efficient Compression of Large Language Models",
"category": "?"
},
{
"rank": 5,
"arxiv_id": "2403.06414",
"title": "Evolving Knowledge Distillation with Large Language Models and Active Learning",
"category": "?"
},
{
"rank": 6,
"arxiv_id": "2406.17328",
"title": "Dual-Space Knowledge Distillation for Large Language Models",
"category": "?"
},
{
"rank": 7,
"arxiv_id": "2002.00733",
"title": "Generation-Distillation for Efficient Natural Language Understanding in Low-Data Settings",
"category": "?"
},
{
"rank": 8,
"arxiv_id": "2402.04616",
"title": "Beyond Answers: Transferring Reasoning Capabilities to Smaller LLMs Using Multi-Teacher Knowledge Distillation",
"category": "?"
},
{
"rank": 9,
"arxiv_id": "2505.15442",
"title": "On the Generalization vs Fidelity Paradox in Knowledge Distillation",
"category": "?"
},
{
"rank": 10,
"arxiv_id": "2409.12500",
"title": "LLMR: Knowledge Distillation with a Large Language Model-Induced Reward",
"category": "?"
}
],
"expected_id": null,
"expected_found": null,
"expected_rank": null,
"topic_diversity": 0
},
{
"band": "G",
"query": "graph neural networks for molecular property prediction",
"description": "GNN + chemistry \u2014 SchNet, DimeNet, MPNN papers",
"rewrite": null,
"latency_ms": 1286.9488000287674,
"n_results": 10,
"results": [
{
"rank": 1,
"arxiv_id": "2208.04852",
"title": "Graph neural networks for the prediction of molecular structure-property relationships",
"category": "?"
},
{
"rank": 2,
"arxiv_id": "1909.00259",
"title": "Gated Graph Recursive Neural Networks for Molecular Property Prediction",
"category": "?"
},
{
"rank": 3,
"arxiv_id": "2008.12187",
"title": "Graph Neural Network Architecture Search for Molecular Property Prediction",
"category": "?"
},
{
"rank": 4,
"arxiv_id": "2005.13607",
"title": "Multi-View Graph Neural Networks for Molecular Property Prediction",
"category": "?"
},
{
"rank": 5,
"arxiv_id": "2410.12156",
"title": "FragNet: A Graph Neural Network for Molecular Property Prediction with Four Layers of Interpretability",
"category": "?"
},
{
"rank": 6,
"arxiv_id": "2410.11323",
"title": "KA-GNN: Kolmogorov-Arnold Graph Neural Networks for Molecular Property Prediction",
"category": "?"
},
{
"rank": 7,
"arxiv_id": "2104.06046",
"title": "Which Hyperparameters to Optimise? An Investigation of Evolutionary Hyperparameter Optimisation in Graph Neural Network For Molecular Property Prediction",
"category": "?"
},
{
"rank": 8,
"arxiv_id": "2106.15529",
"title": "On Graph Neural Network Ensembles for Large-Scale Molecular Property Prediction",
"category": "?"
},
{
"rank": 9,
"arxiv_id": "2012.01981",
"title": "Advanced Graph and Sequence Neural Networks for Molecular Property Prediction and Drug Discovery",
"category": "?"
},
{
"rank": 10,
"arxiv_id": "2405.05205",
"title": "Hybrid Quantum Graph Neural Network for Molecular Property Prediction",
"category": "?"
}
],
"expected_id": null,
"expected_found": null,
"expected_rank": null,
"topic_diversity": 0
},
{
"band": "G",
"query": "efficient inference for large language models quantization pruning",
"description": "LLM compression \u2014 GPTQ, AWQ, SparseGPT, pruning surveys",
"rewrite": null,
"latency_ms": 1113.6418000096455,
"n_results": 10,
"results": [
{
"rank": 1,
"arxiv_id": "2402.09748",
"title": "Model Compression and Efficient Inference for Large Language Models: A Survey",
"category": "?"
},
{
"rank": 2,
"arxiv_id": "2410.17170",
"title": "Self-calibration for Language Model Quantization and Pruning",
"category": "?"
},
{
"rank": 3,
"arxiv_id": "2412.11629",
"title": "QPruner: Probabilistic Decision Quantization for Structured Pruning in Large Language Models",
"category": "?"
},
{
"rank": 4,
"arxiv_id": "2412.11494",
"title": "FTP: A Fine-grained Token-wise Pruner for Large Language Models via Token Routing",
"category": "?"
},
{
"rank": 5,
"arxiv_id": "2402.02834",
"title": "Shortened LLaMA: Depth Pruning for Large Language Models with Comparison of Retraining Methods",
"category": "?"
},
{
"rank": 6,
"arxiv_id": "2309.09507",
"title": "Pruning Large Language Models via Accuracy Predictor",
"category": "?"
},
{
"rank": 7,
"arxiv_id": "2102.11289",
"title": "Ps and Qs: Quantization-aware pruning for efficient low latency neural network inference",
"category": "?"
},
{
"rank": 8,
"arxiv_id": "2502.15796",
"title": "Pruning as a Defense: Reducing Memorization in Large Language Models",
"category": "?"
},
{
"rank": 9,
"arxiv_id": "2502.04348",
"title": "Prompt-based Depth Pruning of Large Language Models",
"category": "?"
},
{
"rank": 10,
"arxiv_id": "2503.15369",
"title": "EfficientLLaVA:Generalizable Auto-Pruning for Large Vision-language Models",
"category": "?"
}
],
"expected_id": null,
"expected_found": null,
"expected_rank": null,
"topic_diversity": 0
},
{
"band": "G",
"query": "causal inference in observational studies with machine learning",
"description": "Causal ML \u2014 double ML, causal forests, CATE estimation",
"rewrite": null,
"latency_ms": 967.5587000092492,
"n_results": 10,
"results": [
{
"rank": 1,
"arxiv_id": "2202.12813",
"title": "Causal discovery for observational sciences using supervised machine learning",
"category": "?"
},
{
"rank": 2,
"arxiv_id": "2002.02770",
"title": "A Survey on Causal Inference",
"category": "?"
},
{
"rank": 3,
"arxiv_id": "2011.12508",
"title": "Causal inference using deep neural networks",
"category": "?"
},
{
"rank": 4,
"arxiv_id": "2101.00878",
"title": "The Value Added of Machine Learning to Causal Inference: Evidence from Revisited Studies",
"category": "?"
},
{
"rank": 5,
"arxiv_id": "2009.10982",
"title": "An Introduction to Proximal Causal Learning",
"category": "?"
},
{
"rank": 6,
"arxiv_id": "2012.06865",
"title": "From controlled to undisciplined data: estimating causal effects in the era of data science using a potential outcome framework",
"category": "?"
},
{
"rank": 7,
"arxiv_id": "1805.06826",
"title": "The Blessings of Multiple Causes",
"category": "?"
},
{
"rank": 8,
"arxiv_id": "1910.09648",
"title": "Causal bootstrapping",
"category": "?"
},
{
"rank": 9,
"arxiv_id": "2009.13472",
"title": "Targeted VAE: Variational and Targeted Learning for Causal Inference",
"category": "?"
},
{
"rank": 10,
"arxiv_id": "2406.02584",
"title": "A Scoping Review of Earth Observation and Machine Learning for Causal Inference: Implications for the Geography of Poverty",
"category": "?"
}
],
"expected_id": null,
"expected_found": null,
"expected_rank": null,
"topic_diversity": 0
},
{
"band": "G",
"query": "multi-task learning with shared representations",
"description": "MTL surveys, hard/soft parameter sharing, task relationships",
"rewrite": "Multi-task learning shared representations transfer learning joint optimization",
"latency_ms": 1785.949699988123,
"n_results": 10,
"results": [
{
"rank": 1,
"arxiv_id": "1603.02185",
"title": "Distributed Multi-Task Learning with Shared Representation",
"category": "?"
},
{
"rank": 2,
"arxiv_id": "1603.02041",
"title": "Learning Shared Representations in Multi-task Reinforcement Learning",
"category": "?"
},
{
"rank": 3,
"arxiv_id": "2102.06177",
"title": "Multi-Task Reinforcement Learning with Context-based Representations",
"category": "?"
},
{
"rank": 4,
"arxiv_id": "2206.12441",
"title": "Joint Representation Training in Sequential Tasks with Shared Structure",
"category": "?"
},
{
"rank": 5,
"arxiv_id": "2202.10066",
"title": "Multi-task Representation Learning with Stochastic Linear Bandits",
"category": "?"
},
{
"rank": 6,
"arxiv_id": "2409.16651",
"title": "Learning Representation for Multitask learning through Self Supervised Auxiliary learning",
"category": "?"
},
{
"rank": 7,
"arxiv_id": "1804.08139",
"title": "Same Representation, Different Attentions: Shareable Sentence Representation Learning from Multiple Tasks",
"category": "?"
},
{
"rank": 8,
"arxiv_id": "2304.01354",
"title": "Functional Knowledge Transfer with Self-supervised Representation Learning",
"category": "?"
},
{
"rank": 9,
"arxiv_id": "1812.00500",
"title": "Multi-task Learning of Hierarchical Vision-Language Representation",
"category": "?"
},
{
"rank": 10,
"arxiv_id": "2309.16921",
"title": "YOLOR-Based Multi-Task Learning",
"category": "?"
}
],
"expected_id": null,
"expected_found": null,
"expected_rank": null,
"topic_diversity": 0
},
{
"band": "H",
"query": "how to fine-tune a pre-trained language model for classification",
"description": "Practical fine-tuning \u2014 ULMFiT, how-to-fine-tune-BERT papers",
"rewrite": null,
"latency_ms": 1168.5913000255823,
"n_results": 10,
"results": [
{
"rank": 1,
"arxiv_id": "2004.11493",
"title": "UHH-LT at SemEval-2020 Task 12: Fine-Tuning of Pre-Trained Transformer Networks for Offensive Language Detection",
"category": "?"
},
{
"rank": 2,
"arxiv_id": "2305.19249",
"title": "Preserving Pre-trained Features Helps Calibrate Fine-tuned Language Models",
"category": "?"
},
{
"rank": 3,
"arxiv_id": "2305.00350",
"title": "POUF: Prompt-oriented unsupervised fine-tuning for large pre-trained models",
"category": "?"
},
{
"rank": 4,
"arxiv_id": "2305.18324",
"title": "Regex-augmented Domain Transfer Topic Classification based on a Pre-trained Language Model: An application in Financial Domain",
"category": "?"
},
{
"rank": 5,
"arxiv_id": "1907.05338",
"title": "To Tune or Not To Tune? How About the Best of Both Worlds?",
"category": "?"
},
{
"rank": 6,
"arxiv_id": "1910.11959",
"title": "FineText: Text Classification via Attention-based Language Model Fine-tuning",
"category": "?"
},
{
"rank": 7,
"arxiv_id": "2004.03829",
"title": "Exploring Versatile Generative Language Model Via Parameter-Efficient Transfer Learning",
"category": "?"
},
{
"rank": 8,
"arxiv_id": "2004.14218",
"title": "Exploring Fine-tuning Techniques for Pre-trained Cross-lingual Models via Continual Learning",
"category": "?"
},
{
"rank": 9,
"arxiv_id": "2105.12192",
"title": "NukeLM: Pre-Trained and Fine-Tuned Language Models for the Nuclear and Energy Domains",
"category": "?"
},
{
"rank": 10,
"arxiv_id": "2210.16771",
"title": "Parameter-Efficient Tuning Makes a Good Classification Head",
"category": "?"
}
],
"expected_id": null,
"expected_found": null,
"expected_rank": null,
"topic_diversity": 0
},
{
"band": "H",
"query": "implementing attention mechanism from scratch",
"description": "Implementation-level detail \u2014 attention tutorials, scaled dot product",
"rewrite": "Self-Attention Mechanism Implementation Transformer Architecture",
"latency_ms": 1706.720200018026,
"n_results": 10,
"results": [
{
"rank": 1,
"arxiv_id": "2503.15758",
"title": "ATTENTION2D: Communication Efficient Distributed Self-Attention Mechanism",
"category": "?"
},
{
"rank": 2,
"arxiv_id": "2204.13154",
"title": "Attention Mechanism in Neural Networks: Where it Comes and Where it Goes",
"category": "?"
},
{
"rank": 3,
"arxiv_id": "2502.19214",
"title": "A Hybrid Transformer Architecture with a Quantized Self-Attention Mechanism Applied to Molecular Generation",
"category": "?"
},
{
"rank": 4,
"arxiv_id": "1811.05544",
"title": "An Introductory Survey on Attention Mechanisms in NLP Problems",
"category": "?"
},
{
"rank": 5,
"arxiv_id": "1803.02155",
"title": "Self-Attention with Relative Position Representations",
"category": "?"
},
{
"rank": 6,
"arxiv_id": "2103.05103",
"title": "Image Captioning using Multiple Transformers for Self-Attention Mechanism",
"category": "?"
},
{
"rank": 7,
"arxiv_id": "2102.03902",
"title": "Nystr\\\"omformer: A Nystr\\\"om-Based Algorithm for Approximating Self-Attention",
"category": "?"
},
{
"rank": 8,
"arxiv_id": "2108.05340",
"title": "Person Re-identification via Attention Pyramid",
"category": "?"
},
{
"rank": 9,
"arxiv_id": "2006.04768",
"title": "Linformer: Self-Attention with Linear Complexity",
"category": "?"
},
{
"rank": 10,
"arxiv_id": "2004.11207",
"title": "Self-Attention Attribution: Interpreting Information Interactions Inside Transformer",
"category": "?"
}
],
"expected_id": null,
"expected_found": null,
"expected_rank": null,
"topic_diversity": 0
},
{
"band": "H",
"query": "best practices for training stable diffusion models",
"description": "Practical SD training \u2014 latent diffusion, classifier-free guidance",
"rewrite": null,
"latency_ms": 955.2253999863751,
"n_results": 10,
"results": [
{
"rank": 1,
"arxiv_id": "2302.00670",
"title": "Stable Target Field for Reduced Variance Score Estimation in Diffusion Models",
"category": "?"
},
{
"rank": 2,
"arxiv_id": "2502.05625",
"title": "Training-Free Constrained Generation With Stable Diffusion Models",
"category": "?"
},
{
"rank": 3,
"arxiv_id": "2306.01984",
"title": "DYffusion: A Dynamics-informed Diffusion Model for Spatiotemporal Forecasting",
"category": "?"
},
{
"rank": 4,
"arxiv_id": "2411.14430",
"title": "Stable Flow: Vital Layers for Training-Free Image Editing",
"category": "?"
},
{
"rank": 5,
"arxiv_id": "2303.16509",
"title": "HoloDiffusion: Training a 3D Diffusion Model using 2D Images",
"category": "?"
},
{
"rank": 6,
"arxiv_id": "2312.02696",
"title": "Analyzing and Improving the Training Dynamics of Diffusion Models",
"category": "?"
},
{
"rank": 7,
"arxiv_id": "2402.07802",
"title": "Towards a mathematical theory for consistency training in diffusion models",
"category": "?"
},
{
"rank": 8,
"arxiv_id": "2403.05056",
"title": "Stealing Stable Diffusion Prior for Robust Monocular Depth Estimation",
"category": "?"
},
{
"rank": 9,
"arxiv_id": "2310.13545",
"title": "ScaleLong: Towards More Stable Training of Diffusion Model via Scaling Network Long Skip Connection",
"category": "?"
},
{
"rank": 10,
"arxiv_id": "2407.01014",
"title": "An Expectation-Maximization Algorithm for Training Clean Diffusion Models from Corrupted Observations",
"category": "?"
}
],
"expected_id": null,
"expected_found": null,
"expected_rank": null,
"topic_diversity": 0
},
{
"band": "H",
"query": "building a retrieval augmented generation system",
"description": "RAG \u2014 should surface the Lewis et al. RAG paper, REALM, etc.",
"rewrite": "retrieval-augmented generation RAG language models knowledge retrieval architectures",
"latency_ms": 1782.7779999934137,
"n_results": 10,
"results": [
{
"rank": 1,
"arxiv_id": "2409.13707",
"title": "Retrieval Augmented Generation-Based Incident Resolution Recommendation System for IT Support",
"category": "?"
},
{
"rank": 2,
"arxiv_id": "2410.15944",
"title": "Developing Retrieval Augmented Generation (RAG) based LLM Systems from PDFs: An Experience Report",
"category": "?"
},
{
"rank": 3,
"arxiv_id": "2503.18016",
"title": "Retrieval Augmented Generation and Understanding in Vision: A Survey and New Outlook",
"category": "?"
},
{
"rank": 4,
"arxiv_id": "2503.10677",
"title": "A Survey on Knowledge-Oriented Retrieval-Augmented Generation",
"category": "?"
},
{
"rank": 5,
"arxiv_id": "2403.00820",
"title": "Retrieval Augmented Generation Systems: Automatic Dataset Creation, Evaluation and Boolean Agent Setup",
"category": "?"
},
{
"rank": 6,
"arxiv_id": "2402.19473",
"title": "Retrieval-Augmented Generation for AI-Generated Content: A Survey",
"category": "?"
},
{
"rank": 7,
"arxiv_id": "2406.13249",
"title": "R^2AG: Incorporating Retrieval Information into Retrieval Augmented Generation",
"category": "?"
},
{
"rank": 8,
"arxiv_id": "2501.15915",
"title": "Parametric Retrieval Augmented Generation",
"category": "?"
},
{
"rank": 9,
"arxiv_id": "2407.13193",
"title": "Retrieval-Augmented Generation for Natural Language Processing: A Survey",
"category": "?"
},
{
"rank": 10,
"arxiv_id": "2401.15884",
"title": "Corrective Retrieval Augmented Generation",
"category": "?"
}
],
"expected_id": null,
"expected_found": null,
"expected_rank": null,
"topic_diversity": 0
},
{
"band": "H",
"query": "how to do distributed training with PyTorch across GPUs",
"description": "Distributed training \u2014 ZeRO, Megatron, FSDP, DeepSpeed papers",
"rewrite": null,
"latency_ms": 967.3073999583721,
"n_results": 10,
"results": [
{
"rank": 1,
"arxiv_id": "2006.15704",
"title": "PyTorch Distributed: Experiences on Accelerating Data Parallel Training",
"category": "?"
},
{
"rank": 2,
"arxiv_id": "2101.07956",
"title": "PyTorch-Direct: Enabling GPU Centric Data Access for Very Large Graph Neural Network Training with Irregular Accesses",
"category": "?"
},
{
"rank": 3,
"arxiv_id": "2302.09406",
"title": "{\\ae}net-PyTorch: a GPU-supported implementation for machine learning atomic potentials training",
"category": "?"
},
{
"rank": 4,
"arxiv_id": "2309.06497",
"title": "A Distributed Data-Parallel PyTorch Implementation of the Distributed Shampoo Optimizer for Training Neural Networks At-Scale",
"category": "?"
},
{
"rank": 5,
"arxiv_id": "2103.13262",
"title": "FastMoE: A Fast Mixture-of-Expert Training System",
"category": "?"
},
{
"rank": 6,
"arxiv_id": "2410.06511",
"title": "TorchTitan: One-stop PyTorch native solution for production ready LLM pre-training",
"category": "?"
},
{
"rank": 7,
"arxiv_id": "2206.06356",
"title": "Modern Distributed Data-Parallel Large-Scale Pre-training Strategies For NLP models",
"category": "?"
},
{
"rank": 8,
"arxiv_id": "1811.05233",
"title": "Massively Distributed SGD: ImageNet/ResNet-50 Training in a Flash",
"category": "?"
},
{
"rank": 9,
"arxiv_id": "2412.06041",
"title": "PyPOD-GP: Using PyTorch for Accelerated Chip-Level Thermal Simulation of the GPU",
"category": "?"
},
{
"rank": 10,
"arxiv_id": "2304.14226",
"title": "TorchBench: Benchmarking PyTorch with High API Surface Coverage",
"category": "?"
}
],
"expected_id": null,
"expected_found": null,
"expected_rank": null,
"topic_diversity": 0
},
{
"band": "I",
"query": "transformer vs CNN for image classification",
"description": "ViT vs ResNet/EfficientNet \u2014 should surface comparison papers",
"rewrite": "Transformer CNN image classification comparison Vision Transformer ViT",
"latency_ms": 1903.2504000351764,
"n_results": 10,
"results": [
{
"rank": 1,
"arxiv_id": "2406.03478",
"title": "Convolutional Neural Networks and Vision Transformers for Fashion MNIST Classification: A Literature Review",
"category": "?"
},
{
"rank": 2,
"arxiv_id": "2504.08481",
"title": "A Hybrid Fully Convolutional CNN-Transformer Model for Inherently Interpretable Medical Image Classification",
"category": "?"
},
{
"rank": 3,
"arxiv_id": "2108.09038",
"title": "Is it Time to Replace CNNs with Transformers for Medical Images?",
"category": "?"
},
{
"rank": 4,
"arxiv_id": "2108.09147",
"title": "Convolutional Neural Network (CNN) vs Vision Transformer (ViT) for Digital Holography",
"category": "?"
},
{
"rank": 5,
"arxiv_id": "2112.00965",
"title": "Vision Pair Learning: An Efficient Training Framework for Image Classification",
"category": "?"
},
{
"rank": 6,
"arxiv_id": "2406.00237",
"title": "A Comparative Study of CNN, ResNet, and Vision Transformers for Multi-Classification of Chest Diseases",
"category": "?"
},
{
"rank": 7,
"arxiv_id": "2110.14731",
"title": "Vision Transformer for Classification of Breast Ultrasound Images",
"category": "?"
},
{
"rank": 8,
"arxiv_id": "2409.12026",
"title": "On Vision Transformers for Classification Tasks in Side-Scan Sonar Imagery",
"category": "?"
},
{
"rank": 9,
"arxiv_id": "2103.05940",
"title": "TransMed: Transformers Advance Multi-modal Medical Image Classification",
"category": "?"
},
{
"rank": 10,
"arxiv_id": "2108.03414",
"title": "Vision Transformer for femur fracture classification",
"category": "?"
}
],
"expected_id": null,
"expected_found": null,
"expected_rank": null,
"topic_diversity": 0
},
{
"band": "I",
"query": "survey of large language models",
"description": "LLM surveys \u2014 Zhao et al. survey, Minaee survey",
"rewrite": "large language models survey LLMs BERT transformer architectures evaluation benchmarks",
"latency_ms": 2101.066200004425,
"n_results": 10,
"results": [
{
"rank": 1,
"arxiv_id": "2411.11072",
"title": "Multilingual Large Language Models: A Systematic Survey",
"category": "?"
},
{
"rank": 2,
"arxiv_id": "2406.10833",
"title": "A Comprehensive Survey of Scientific Large Language Models and Their Applications in Scientific Discovery",
"category": "?"
},
{
"rank": 3,
"arxiv_id": "2309.15025",
"title": "Large Language Model Alignment: A Survey",
"category": "?"
},
{
"rank": 4,
"arxiv_id": "2311.05876",
"title": "Trends in Integration of Knowledge and Large Language Models: A Survey and Taxonomy of Methods, Benchmarks, and Applications",
"category": "?"
},
{
"rank": 5,
"arxiv_id": "2404.15777",
"title": "A Comprehensive Survey on Evaluating Large Language Model Applications in the Medical Industry",
"category": "?"
},
{
"rank": 6,
"arxiv_id": "2412.03220",
"title": "Survey of different Large Language Model Architectures: Trends, Benchmarks, and Challenges",
"category": "?"
},
{
"rank": 7,
"arxiv_id": "2505.15957",
"title": "Towards Holistic Evaluation of Large Audio-Language Models: A Comprehensive Survey",
"category": "?"
},
{
"rank": 8,
"arxiv_id": "2405.19323",
"title": "Are Large Language Models Chameleons? An Attempt to Simulate Social Surveys",
"category": "?"
},
{
"rank": 9,
"arxiv_id": "2309.17447",
"title": "A Large Language Model Approach to Educational Survey Feedback Analysis",
"category": "?"
},
{
"rank": 10,
"arxiv_id": "2310.19736",
"title": "Evaluating Large Language Models: A Comprehensive Survey",
"category": "?"
}
],
"expected_id": null,
"expected_found": null,
"expected_rank": null,
"topic_diversity": 0
},
{
"band": "I",
"query": "comparison of object detection architectures YOLO vs DETR",
"description": "YOLO family vs transformer-based detection",
"rewrite": null,
"latency_ms": 1010.0513999932446,
"n_results": 10,
"results": [
{
"rank": 1,
"arxiv_id": "2504.13099",
"title": "RF-DETR Object Detection vs YOLOv12 : A Study of Transformer-based and CNN-based Architectures for Single-Class and Multi-Class Greenfruit Detection in Complex Orchard Environments Under Label Ambiguity",
"category": "?"
},
{
"rank": 2,
"arxiv_id": "2211.06588",
"title": "DEYO: DETR with YOLO for Step-by-Step Object Detection",
"category": "?"
},
{
"rank": 3,
"arxiv_id": "2304.08069",
"title": "DETRs Beat YOLOs on Real-time Object Detection",
"category": "?"
},
{
"rank": 4,
"arxiv_id": "2309.11851",
"title": "DEYOv3: DETR with YOLO for Real-time Object Detection",
"category": "?"
},
{
"rank": 5,
"arxiv_id": "2402.16370",
"title": "DEYO: DETR with YOLO for End-to-End Object Detection",
"category": "?"
},
{
"rank": 6,
"arxiv_id": "2504.18586",
"title": "A Decade of You Only Look Once (YOLO) for Object Detection",
"category": "?"
},
{
"rank": 7,
"arxiv_id": "2406.03459",
"title": "LW-DETR: A Transformer Replacement to YOLO for Real-Time Detection",
"category": "?"
},
{
"rank": 8,
"arxiv_id": "2312.12314",
"title": "First qualitative observations on deep learning vision model YOLO and DETR for automated driving in Austria",
"category": "?"
},
{
"rank": 9,
"arxiv_id": "2310.08772",
"title": "Investigating the Robustness and Properties of Detection Transformers (DETR) Toward Difficult Images",
"category": "?"
},
{
"rank": 10,
"arxiv_id": "2502.12524",
"title": "YOLOv12: Attention-Centric Real-Time Object Detectors",
"category": "?"
}
],
"expected_id": null,
"expected_found": null,
"expected_rank": null,
"topic_diversity": 0
},
{
"band": "I",
"query": "GAN vs diffusion models for image generation",
"description": "Generative model comparison \u2014 StyleGAN, DDPM, score matching",
"rewrite": null,
"latency_ms": 889.1186999971978,
"n_results": 10,
"results": [
{
"rank": 1,
"arxiv_id": "2410.17966",
"title": "A Wavelet Diffusion GAN for Image Super-Resolution",
"category": "?"
},
{
"rank": 2,
"arxiv_id": "2403.19645",
"title": "GANTASTIC: GAN-based Transfer of Interpretable Directions for Disentangled Image Editing in Text-to-Image Diffusion Models",
"category": "?"
},
{
"rank": 3,
"arxiv_id": "2307.08702",
"title": "Diffusion Models Beat GANs on Image Classification",
"category": "?"
},
{
"rank": 4,
"arxiv_id": "2105.05233",
"title": "Diffusion Models Beat GANs on Image Synthesis",
"category": "?"
},
{
"rank": 5,
"arxiv_id": "2405.04356",
"title": "Diffusion-driven GAN Inversion for Multi-Modal Face Image Generation",
"category": "?"
},
{
"rank": 6,
"arxiv_id": "2209.11047",
"title": "MIDMs: Matching Interleaved Diffusion Models for Exemplar-based Image Translation",
"category": "?"
},
{
"rank": 7,
"arxiv_id": "2302.08908",
"title": "LayoutDiffuse: Adapting Foundational Diffusion Models for Layout-to-Image Generation",
"category": "?"
},
{
"rank": 8,
"arxiv_id": "2503.23127",
"title": "Galaxy Imaging with Generative Models: Insights from a Two-Models Framework",
"category": "?"
},
{
"rank": 9,
"arxiv_id": "2301.03396",
"title": "Diffused Heads: Diffusion Models Beat GANs on Talking-Face Generation",
"category": "?"
},
{
"rank": 10,
"arxiv_id": "2211.16152",
"title": "Wavelet Diffusion Models are fast and scalable Image Generators",
"category": "?"
}
],
"expected_id": null,
"expected_found": null,
"expected_rank": null,
"topic_diversity": 0
},
{
"band": "I",
"query": "review of federated learning privacy methods",
"description": "FL surveys \u2014 McMahan, differential privacy in FL",
"rewrite": "Federated learning privacy preservation methods differential privacy homomorphism",
"latency_ms": 1966.5430000168271,
"n_results": 10,
"results": [
{
"rank": 1,
"arxiv_id": "2302.04208",
"title": "Exploratory Analysis of Federated Learning Methods with Differential Privacy on MIMIC-III",
"category": "?"
},
{
"rank": 2,
"arxiv_id": "2109.13012",
"title": "Federated Deep Learning with Bayesian Privacy",
"category": "?"
},
{
"rank": 3,
"arxiv_id": "2102.05883",
"title": "Privacy-Preserving Self-Taught Federated Learning for Heterogeneous Data",
"category": "?"
},
{
"rank": 4,
"arxiv_id": "2408.15621",
"title": "Convergent Differential Privacy Analysis for General Federated Learning: the $f$-DP Perspective",
"category": "?"
},
{
"rank": 5,
"arxiv_id": "2402.00342",
"title": "Survey of Privacy Threats and Countermeasures in Federated Learning",
"category": "?"
},
{
"rank": 6,
"arxiv_id": "2206.03396",
"title": "Group privacy for personalized federated learning",
"category": "?"
},
{
"rank": 7,
"arxiv_id": "2311.10832",
"title": "Exploring Machine Learning Models for Federated Learning: A Review of Approaches, Performance, and Limitations",
"category": "?"
},
{
"rank": 8,
"arxiv_id": "2405.03636",
"title": "The Federation Strikes Back: A Survey of Federated Learning Privacy Attacks, Defenses, Applications, and Policy Landscape",
"category": "?"
},
{
"rank": 9,
"arxiv_id": "2007.00914",
"title": "Federated Learning and Differential Privacy: Software tools analysis, the Sherpa.ai FL framework and methodological guidelines for preserving data privacy",
"category": "?"
},
{
"rank": 10,
"arxiv_id": "2501.14453",
"title": "Optimal Strategies for Federated Learning Maintaining Client Privacy",
"category": "?"
}
],
"expected_id": null,
"expected_found": null,
"expected_rank": null,
"topic_diversity": 0
},
{
"band": "J",
"query": "mixture of experts models scaling",
"description": "MoE \u2014 Switch Transformer, Mixtral, GShard",
"rewrite": "Mixture of Experts MoE deep learning scalable architectures sparse gating",
"latency_ms": 1488.4431999525987,
"n_results": 10,
"results": [
{
"rank": 1,
"arxiv_id": "2502.05172",
"title": "Joint MoE Scaling Laws: Mixture of Experts Can Be Memory Efficient",
"category": "?"
},
{
"rank": 2,
"arxiv_id": "2303.07226",
"title": "Scaling Vision-Language Models with Sparse Mixture of Experts",
"category": "?"
},
{
"rank": 3,
"arxiv_id": "2310.07188",
"title": "Adaptive Gating in Mixture-of-Experts based Language Models",
"category": "?"
},
{
"rank": 4,
"arxiv_id": "2402.07871",
"title": "Scaling Laws for Fine-Grained Mixture of Experts",
"category": "?"
},
{
"rank": 5,
"arxiv_id": "2401.17118",
"title": "Explainable data-driven modeling via mixture of experts: towards effective blending of grey and black-box models",
"category": "?"
},
{
"rank": 6,
"arxiv_id": "2407.06204",
"title": "A Survey on Mixture of Experts in Large Language Models",
"category": "?"
},
{
"rank": 7,
"arxiv_id": "2404.02852",
"title": "Toward Inference-optimal Mixture-of-Expert Large Language Models",
"category": "?"
},
{
"rank": 8,
"arxiv_id": "2503.00634",
"title": "Efficiently Editing Mixture-of-Experts Models with Compressed Experts",
"category": "?"
},
{
"rank": 9,
"arxiv_id": "2410.09039",
"title": "Semi-Supervised Learning of Noisy Mixture of Experts Models",
"category": "?"
},
{
"rank": 10,
"arxiv_id": "2404.08985",
"title": "T-REX: Mixture-of-Rank-One-Experts with Semantic-aware Intuition for Multi-task Large Language Model Finetuning",
"category": "?"
}
],
"expected_id": null,
"expected_found": null,
"expected_rank": null,
"topic_diversity": 0
},
{
"band": "J",
"query": "test-time compute scaling for reasoning",
"description": "New paradigm \u2014 o1-style reasoning, tree search at inference",
"rewrite": "test-time compute efficient reasoning scalable inference methods",
"latency_ms": 2332.4564999784343,
"n_results": 10,
"results": [
{
"rank": 1,
"arxiv_id": "2504.00869",
"title": "m1: Unleash the Potential of Test-Time Scaling for Medical Reasoning with Large Language Models",
"category": "?"
},
{
"rank": 2,
"arxiv_id": "2502.05171",
"title": "Scaling up Test-Time Compute with Latent Reasoning: A Recurrent Depth Approach",
"category": "?"
},
{
"rank": 3,
"arxiv_id": "2502.18080",
"title": "Towards Thinking-Optimal Scaling of Test-Time Compute for LLM Reasoning",
"category": "?"
},
{
"rank": 4,
"arxiv_id": "2504.02890",
"title": "Scaling Test-time Compute for Low-resource Languages: Multilingual Reasoning in LLMs",
"category": "?"
},
{
"rank": 5,
"arxiv_id": "2505.14733",
"title": "The Energy Cost of Reasoning: Analyzing Energy Usage in LLMs with Test-time Compute",
"category": "?"
},
{
"rank": 6,
"arxiv_id": "2505.19187",
"title": "LIMOPro: Reasoning Refinement for Efficient and Effective Test-time Scaling",
"category": "?"
},
{
"rank": 7,
"arxiv_id": "2503.19877",
"title": "Scaling Evaluation-time Compute with Reasoning Models as Process Evaluators",
"category": "?"
},
{
"rank": 8,
"arxiv_id": "2504.01317",
"title": "Adaptive Rectification Sampling for Test-Time Compute Scaling",
"category": "?"
},
{
"rank": 9,
"arxiv_id": "2501.19393",
"title": "s1: Simple test-time scaling",
"category": "?"
},
{
"rank": 10,
"arxiv_id": "2502.13962",
"title": "Is That Your Final Answer? Test-Time Scaling Improves Selective Question Answering",
"category": "?"
}
],
"expected_id": null,
"expected_found": null,
"expected_rank": null,
"topic_diversity": 0
},
{
"band": "J",
"query": "multimodal large language models vision and text",
"description": "GPT-4V, LLaVA, Flamingo, multimodal LLMs",
"rewrite": null,
"latency_ms": 977.2495999932289,
"n_results": 10,
"results": [
{
"rank": 1,
"arxiv_id": "2411.14279",
"title": "Looking Beyond Text: Reducing Language bias in Large Vision-Language Models via Multimodal Dual-Attention and Soft-Image Guidance",
"category": "?"
},
{
"rank": 2,
"arxiv_id": "2311.17647",
"title": "Text as Images: Can Multimodal Large Language Models Follow Printed Instructions in Pixels?",
"category": "?"
},
{
"rank": 3,
"arxiv_id": "2410.01744",
"title": "Leopard: A Vision Language Model For Text-Rich Multi-Image Tasks",
"category": "?"
},
{
"rank": 4,
"arxiv_id": "2412.02104",
"title": "Explainable and Interpretable Multimodal Large Language Models: A Comprehensive Survey",
"category": "?"
},
{
"rank": 5,
"arxiv_id": "2407.10996",
"title": "Visualization Literacy of Multimodal Large Language Models: A Comparative Study",
"category": "?"
},
{
"rank": 6,
"arxiv_id": "2411.09273",
"title": "Cross-Modal Consistency in Multimodal Large Language Models",
"category": "?"
},
{
"rank": 7,
"arxiv_id": "2402.01345",
"title": "Skip \\n: A Simple Method to Reduce Hallucination in Large Vision-Language Models",
"category": "?"
},
{
"rank": 8,
"arxiv_id": "2502.04395",
"title": "Time-VLM: Exploring Multimodal Vision-Language Models for Augmented Time Series Forecasting",
"category": "?"
},
{
"rank": 9,
"arxiv_id": "2403.08773",
"title": "Veagle: Advancements in Multimodal Representation Learning",
"category": "?"
},
{
"rank": 10,
"arxiv_id": "2407.04681",
"title": "Rethinking Visual Prompting for Multimodal Large Language Models with External Knowledge",
"category": "?"
}
],
"expected_id": null,
"expected_found": null,
"expected_rank": null,
"topic_diversity": 0
},
{
"band": "J",
"query": "state space models as alternative to transformers",
"description": "S4, Mamba, H3 \u2014 structured state space models",
"rewrite": null,
"latency_ms": 970.7190000335686,
"n_results": 10,
"results": [
{
"rank": 1,
"arxiv_id": "2404.09516",
"title": "State Space Model for New-Generation Network Alternative to Transformers: A Survey",
"category": "?"
},
{
"rank": 2,
"arxiv_id": "2404.16112",
"title": "Mamba-360: Survey of State Space Models as Transformer Alternative for Long Sequence Modelling: Methods, Applications, and Challenges",
"category": "?"
},
{
"rank": 3,
"arxiv_id": "2402.01032",
"title": "Repeat After Me: Transformers are Better than State Space Models at Copying",
"category": "?"
},
{
"rank": 4,
"arxiv_id": "2501.16237",
"title": "Application of Structured State Space Models to High energy physics with locality-sensitive hashing",
"category": "?"
},
{
"rank": 5,
"arxiv_id": "2411.15685",
"title": "State-Space Large Audio Language Models",
"category": "?"
},
{
"rank": 6,
"arxiv_id": "2404.08819",
"title": "The Illusion of State in State-Space Models",
"category": "?"
},
{
"rank": 7,
"arxiv_id": "2412.14354",
"title": "State Space Models are Strong Text Rerankers",
"category": "?"
},
{
"rank": 8,
"arxiv_id": "2405.17394",
"title": "The Expressive Capacity of State Space Models: A Formal Language Perspective",
"category": "?"
},
{
"rank": 9,
"arxiv_id": "2302.14120",
"title": "Diagonal State Space Augmented Transformers for Speech Recognition",
"category": "?"
},
{
"rank": 10,
"arxiv_id": "2407.05489",
"title": "How Effective are State Space Models for Machine Translation?",
"category": "?"
}
],
"expected_id": null,
"expected_found": null,
"expected_rank": null,
"topic_diversity": 0
},
{
"band": "J",
"query": "constitutional AI and AI safety alignment techniques",
"description": "Anthropic constitutional AI, RLHF alternatives, safety",
"rewrite": null,
"latency_ms": 925.6349000497721,
"n_results": 10,
"results": [
{
"rank": 1,
"arxiv_id": "2502.15861",
"title": "C3AI: Crafting and Evaluating Constitutions for Constitutional AI",
"category": "?"
},
{
"rank": 2,
"arxiv_id": "2501.17112",
"title": "Decoding Human Preferences in Alignment: An Improved Approach to Inverse Constitutional AI",
"category": "?"
},
{
"rank": 3,
"arxiv_id": "2503.05748",
"title": "Alignment, Agency and Autonomy in Frontier AI: A Systems Engineering Perspective",
"category": "?"
},
{
"rank": 4,
"arxiv_id": "2503.17365",
"title": "How Effective Is Constitutional AI in Small LLMs? A Study on DeepSeek-R1 and Its Peers",
"category": "?"
},
{
"rank": 5,
"arxiv_id": "2502.16691",
"title": "Toward Responsible Federated Large Language Models: Leveraging a Safety Filter and Constitutional AI",
"category": "?"
},
{
"rank": 6,
"arxiv_id": "2411.04127",
"title": "Combining Theory of Mind and Kindness for Self-Supervised Human-AI Alignment",
"category": "?"
},
{
"rank": 7,
"arxiv_id": "2311.08706",
"title": "Aligned: A Platform-based Process for Alignment",
"category": "?"
},
{
"rank": 8,
"arxiv_id": "2101.06060",
"title": "The Challenge of Value Alignment: from Fairer Algorithms to AI Safety",
"category": "?"
},
{
"rank": 9,
"arxiv_id": "2403.18341",
"title": "IterAlign: Iterative Constitutional Alignment of Large Language Models",
"category": "?"
},
{
"rank": 10,
"arxiv_id": "2410.17271",
"title": "Rules, Cases, and Reasoning: Positivist Legal Theory as a Framework for Pluralistic AI Alignment",
"category": "?"
}
],
"expected_id": null,
"expected_found": null,
"expected_rank": null,
"topic_diversity": 0
},
{
"band": "J",
"query": "sparse attention mechanisms for long context",
"description": "Longformer, BigBird, sparse transformers for 100K+ context",
"rewrite": "sparse attention transformers long-range dependencies contextualization",
"latency_ms": 1455.3994999732822,
"n_results": 10,
"results": [
{
"rank": 1,
"arxiv_id": "2502.20766",
"title": "FlexPrefill: A Context-Aware Sparse Attention Mechanism for Efficient Long-Sequence Inference",
"category": "?"
},
{
"rank": 2,
"arxiv_id": "2502.12216",
"title": "Tactic: Adaptive Sparse Attention with Clustering and Distribution Fitting for Long-Context LLMs",
"category": "?"
},
{
"rank": 3,
"arxiv_id": "2502.01659",
"title": "Longer Attention Span: Increasing Transformer Context Length with Sparse Graph Processing Techniques",
"category": "?"
},
{
"rank": 4,
"arxiv_id": "2504.16795",
"title": "Random Long-Context Access for Mamba via Hardware-aligned Hierarchical Sparse Attention",
"category": "?"
},
{
"rank": 5,
"arxiv_id": "2109.01915",
"title": "Sparse Spatial Attention Network for Semantic Segmentation",
"category": "?"
},
{
"rank": 6,
"arxiv_id": "2412.06198",
"title": "SparseAccelerate: Efficient Long-Context Inference for Mid-Range GPUs",
"category": "?"
},
{
"rank": 7,
"arxiv_id": "2406.05678",
"title": "SinkLoRA: Enhanced Efficiency and Chat Capabilities for Long-Context Large Language Models",
"category": "?"
},
{
"rank": 8,
"arxiv_id": "2009.06097",
"title": "Cluster-Former: Clustering-based Sparse Transformer for Long-Range Dependency Encoding",
"category": "?"
},
{
"rank": 9,
"arxiv_id": "2406.15486",
"title": "SampleAttention: Near-Lossless Acceleration of Long Context LLM Inference with Adaptive Structured Sparse Attention",
"category": "?"
},
{
"rank": 10,
"arxiv_id": "2406.16747",
"title": "Sparser is Faster and Less is More: Efficient Sparse Attention for Long-Range Transformers",
"category": "?"
}
],
"expected_id": null,
"expected_found": null,
"expected_rank": null,
"topic_diversity": 0
},
{
"band": "K",
"query": "deep learning for protein structure prediction",
"description": "AlphaFold, ESMFold, protein language models",
"rewrite": "deep learning protein structure prediction AlphaFold convolutional neural networks",
"latency_ms": 1523.7058000057004,
"n_results": 10,
"results": [
{
"rank": 1,
"arxiv_id": "2503.13522",
"title": "Advanced Deep Learning Methods for Protein Structure Prediction and Design",
"category": "?"
},
{
"rank": 2,
"arxiv_id": "2504.01490",
"title": "Deep Learning-Driven Protein Structure Prediction and Design: Key Model Developments by Nobel Laureates and Multi-Domain Applications",
"category": "?"
},
{
"rank": 3,
"arxiv_id": "2107.12243",
"title": "Protein-RNA interaction prediction with deep learning: Structure matters",
"category": "?"
},
{
"rank": 4,
"arxiv_id": "2211.14939",
"title": "Applying Deep Reinforcement Learning to the HP Model for Protein Structure Prediction",
"category": "?"
},
{
"rank": 5,
"arxiv_id": "2012.09741",
"title": "On the performance of deep learning for numerical optimization: an application to protein structure prediction",
"category": "?"
},
{
"rank": 6,
"arxiv_id": "2002.05643",
"title": "DeepSurf: A surface-based deep learning approach for the prediction of ligand binding sites on proteins",
"category": "?"
},
{
"rank": 7,
"arxiv_id": "2412.20329",
"title": "Protein Structure Prediction in the 3D HP Model Using Deep Reinforcement Learning",
"category": "?"
},
{
"rank": 8,
"arxiv_id": "1610.07187",
"title": "Learning Deep Architectures for Interaction Prediction in Structure-based Virtual Screening",
"category": "?"
},
{
"rank": 9,
"arxiv_id": "2009.00133",
"title": "Unsupervised and Supervised Structure Learning for Protein Contact Prediction",
"category": "?"
},
{
"rank": 10,
"arxiv_id": "1910.02845",
"title": "Combining docking pose rank and structure with deep learning improves protein-ligand binding mode prediction",
"category": "?"
}
],
"expected_id": null,
"expected_found": null,
"expected_rank": null,
"topic_diversity": 0
},
{
"band": "K",
"query": "natural language processing for legal document analysis",
"description": "Legal NLP \u2014 contract analysis, legal BERT, court opinion mining",
"rewrite": null,
"latency_ms": 1012.3439999879338,
"n_results": 10,
"results": [
{
"rank": 1,
"arxiv_id": "2204.07182",
"title": "Analysing similarities between legal court documents using natural language processing approaches based on Transformers",
"category": "?"
},
{
"rank": 2,
"arxiv_id": "2110.15709",
"title": "LegalNLP -- Natural Language Processing methods for the Brazilian Legal Language",
"category": "?"
},
{
"rank": 3,
"arxiv_id": "2503.12100",
"title": "Large Language Models in Legislative Content Analysis: A Dataset from the Polish Parliament",
"category": "?"
},
{
"rank": 4,
"arxiv_id": "1311.6063",
"title": "NILE: Fast Natural Language Processing for Electronic Health Records",
"category": "?"
},
{
"rank": 5,
"arxiv_id": "2404.06751",
"title": "Leveraging open-source models for legal language modeling and analysis: a case study on the Indian constitution",
"category": "?"
},
{
"rank": 6,
"arxiv_id": "2203.08244",
"title": "Toward Improving Attentive Neural Networks in Legal Text Processing",
"category": "?"
},
{
"rank": 7,
"arxiv_id": "2410.21306",
"title": "Natural Language Processing for the Legal Domain: A Survey of Tasks, Datasets, Models, and Challenges",
"category": "?"
},
{
"rank": 8,
"arxiv_id": "2309.05500",
"title": "NeCo@ALQAC 2023: Legal Domain Knowledge Acquisition for Low-Resource Languages through Data Enrichment",
"category": "?"
},
{
"rank": 9,
"arxiv_id": "1806.03688",
"title": "LexNLP: Natural language processing and information extraction for legal and regulatory texts",
"category": "?"
},
{
"rank": 10,
"arxiv_id": "1709.05563",
"title": "Data Innovation for International Development: An overview of natural language processing for qualitative data analysis",
"category": "?"
}
],
"expected_id": null,
"expected_found": null,
"expected_rank": null,
"topic_diversity": 0
},
{
"band": "K",
"query": "machine learning for climate change prediction",
"description": "Climate ML \u2014 weather forecasting, carbon modeling",
"rewrite": "climate change prediction machine learning modeling uncertainty quantification",
"latency_ms": 2138.445299991872,
"n_results": 10,
"results": [
{
"rank": 1,
"arxiv_id": "1806.11037",
"title": "Using machine learning to parameterize moist convection: potential for modeling of climate, climate change and extreme events",
"category": "?"
},
{
"rank": 2,
"arxiv_id": "2112.08440",
"title": "Climate-Invariant Machine Learning",
"category": "?"
},
{
"rank": 3,
"arxiv_id": "2311.09413",
"title": "Leveraging machine learning to enhance climate models: a review",
"category": "?"
},
{
"rank": 4,
"arxiv_id": "2503.08163",
"title": "XAI4Extremes: An interpretable machine learning framework for understanding extreme-weather precursors under climate change",
"category": "?"
},
{
"rank": 5,
"arxiv_id": "2310.09162",
"title": "Quantum Machine Learning in Climate Change and Sustainability: a Review",
"category": "?"
},
{
"rank": 6,
"arxiv_id": "2207.07390",
"title": "Machine learning applications for weather and climate need greater focus on extremes",
"category": "?"
},
{
"rank": 7,
"arxiv_id": "1906.05433",
"title": "Tackling Climate Change with Machine Learning",
"category": "?"
},
{
"rank": 8,
"arxiv_id": "2310.03180",
"title": "Accessing Convective Hazards Frequency Shift with Climate Change using Physics-Informed Machine Learning",
"category": "?"
},
{
"rank": 9,
"arxiv_id": "2107.07997",
"title": "Uncertainty Prediction for Machine Learning Models of Material Properties",
"category": "?"
},
{
"rank": 10,
"arxiv_id": "1303.2739",
"title": "Machine Learning for Bioclimatic Modelling",
"category": "?"
}
],
"expected_id": null,
"expected_found": null,
"expected_rank": null,
"topic_diversity": 0
},
{
"band": "K",
"query": "using transformers for time series forecasting",
"description": "Time series transformers \u2014 Informer, Autoformer, PatchTST",
"rewrite": "Transformer time series forecasting Prophet LSTM Seq2Seq",
"latency_ms": 1477.9143999912776,
"n_results": 10,
"results": [
{
"rank": 1,
"arxiv_id": "2207.05397",
"title": "Dateformer: Time-modeling Transformer for Longer-term Series Forecasting",
"category": "?"
},
{
"rank": 2,
"arxiv_id": "2206.04038",
"title": "Scaleformer: Iterative Multi-scale Refining Transformers for Time Series Forecasting",
"category": "?"
},
{
"rank": 3,
"arxiv_id": "2402.06638",
"title": "Transformers with Attentive Federated Aggregation for Time Series Stock Forecasting",
"category": "?"
},
{
"rank": 4,
"arxiv_id": "2501.02735",
"title": "Sequence Complementor: Complementing Transformers For Time Series Forecasting with Learnable Sequences",
"category": "?"
},
{
"rank": 5,
"arxiv_id": "2411.01419",
"title": "PSformer: Parameter-efficient Transformer with Segment Attention for Time Series Forecasting",
"category": "?"
},
{
"rank": 6,
"arxiv_id": "2209.03945",
"title": "W-Transformers : A Wavelet-based Transformer Framework for Univariate Time Series Forecasting",
"category": "?"
},
{
"rank": 7,
"arxiv_id": "2210.17393",
"title": "Probabilistic Decomposition Transformer for Time Series Forecasting",
"category": "?"
},
{
"rank": 8,
"arxiv_id": "2503.09791",
"title": "Minimal Time Series Transformer",
"category": "?"
},
{
"rank": 9,
"arxiv_id": "2408.02279",
"title": "DRFormer: Multi-Scale Transformer Utilizing Diverse Receptive Fields for Long Time-Series Forecasting",
"category": "?"
},
{
"rank": 10,
"arxiv_id": "2310.01232",
"title": "Modality-aware Transformer for Financial Time series Forecasting",
"category": "?"
}
],
"expected_id": null,
"expected_found": null,
"expected_rank": null,
"topic_diversity": 0
},
{
"band": "K",
"query": "reinforcement learning for robotics manipulation",
"description": "RL + robotics \u2014 sim-to-real transfer, dexterous manipulation",
"rewrite": "reinforcement learning robotics manipulation control policy optimization deep learning",
"latency_ms": 1636.6290000150912,
"n_results": 10,
"results": [
{
"rank": 1,
"arxiv_id": "2102.04148",
"title": "Deep Reinforcement Learning for the Control of Robotic Manipulation: A Focussed Mini-Review",
"category": "?"
},
{
"rank": 2,
"arxiv_id": "1701.08878",
"title": "Deep Reinforcement Learning for Robotic Manipulation-The state of the art",
"category": "?"
},
{
"rank": 3,
"arxiv_id": "1610.00633",
"title": "Deep Reinforcement Learning for Robotic Manipulation with Asynchronous Off-Policy Updates",
"category": "?"
},
{
"rank": 4,
"arxiv_id": "1803.10371",
"title": "Reinforcement learning for non-prehensile manipulation: Transfer from simulation to physical system",
"category": "?"
},
{
"rank": 5,
"arxiv_id": "1910.07294",
"title": "Reinforcement Learning for Robotic Manipulation using Simulated Locomotion Demonstrations",
"category": "?"
},
{
"rank": 6,
"arxiv_id": "2107.13356",
"title": "Value-Based Reinforcement Learning for Continuous Control Robotic Manipulation in Multi-Task Sparse Reward Settings",
"category": "?"
},
{
"rank": 7,
"arxiv_id": "2210.08126",
"title": "Geometric Reinforcement Learning For Robotic Manipulation",
"category": "?"
},
{
"rank": 8,
"arxiv_id": "2311.11287",
"title": "Tactile Active Inference Reinforcement Learning for Efficient Robotic Manipulation Skill Acquisition",
"category": "?"
},
{
"rank": 9,
"arxiv_id": "1704.03073",
"title": "Data-efficient Deep Reinforcement Learning for Dexterous Manipulation",
"category": "?"
},
{
"rank": 10,
"arxiv_id": "2304.06055",
"title": "Exploiting Symmetry and Heuristic Demonstrations in Off-policy Reinforcement Learning for Robotic Manipulation",
"category": "?"
}
],
"expected_id": null,
"expected_found": null,
"expected_rank": null,
"topic_diversity": 0
},
{
"band": "L",
"query": "AI ethics",
"description": "Very broad \u2014 should return survey-level papers on AI ethics/fairness/bias",
"rewrite": "AI ethics fairness accountability transparency explainability bias mitigation",
"latency_ms": 1728.8928999914788,
"n_results": 10,
"results": [
{
"rank": 1,
"arxiv_id": "2302.12149",
"title": "Beyond Bias and Compliance: Towards Individual Agency and Plurality of Ethics in AI",
"category": "?"
},
{
"rank": 2,
"arxiv_id": "2403.14681",
"title": "AI Ethics: A Bibliometric Analysis, Critical Issues, and Key Gaps",
"category": "?"
},
{
"rank": 3,
"arxiv_id": "2109.07906",
"title": "Ethics of AI: A Systematic Literature Review of Principles and Challenges",
"category": "?"
},
{
"rank": 4,
"arxiv_id": "2304.08275",
"title": "Implementing Responsible AI: Tensions and Trade-Offs Between Ethics Aspects",
"category": "?"
},
{
"rank": 5,
"arxiv_id": "2207.01493",
"title": "AI Ethics: An Empirical Study on the Views of Practitioners and Lawmakers",
"category": "?"
},
{
"rank": 6,
"arxiv_id": "2307.10057",
"title": "Ethics in the Age of AI: An Analysis of AI Practitioners' Awareness and Challenges",
"category": "?"
},
{
"rank": 7,
"arxiv_id": "2503.18842",
"title": "Three Kinds of AI Ethics",
"category": "?"
},
{
"rank": 8,
"arxiv_id": "2206.09514",
"title": "Ethics in AI through the Practitioner's View: A Grounded Theory Literature Review",
"category": "?"
},
{
"rank": 9,
"arxiv_id": "2403.05551",
"title": "A Bibliometric View of AI Ethics Development",
"category": "?"
},
{
"rank": 10,
"arxiv_id": "2501.08497",
"title": "Addressing Intersectionality, Explainability, and Ethics in AI-Driven Diagnostics: A Rebuttal and Call for Transdiciplinary Action",
"category": "?"
}
],
"expected_id": null,
"expected_found": null,
"expected_rank": null,
"topic_diversity": 0
},
{
"band": "L",
"query": "embedding",
"description": "Single word \u2014 highly ambiguous. Word2Vec? Sentence embeddings? Image embeddings?",
"rewrite": "word embeddings neural language models representation learning",
"latency_ms": 1689.5794000010937,
"n_results": 10,
"results": [
{
"rank": 1,
"arxiv_id": "1607.06532",
"title": "Novel Word Embedding and Translation-based Language Modeling for Extractive Speech Summarization",
"category": "?"
},
{
"rank": 2,
"arxiv_id": "1608.05852",
"title": "Learning Word Embeddings from Intrinsic and Extrinsic Views",
"category": "?"
},
{
"rank": 3,
"arxiv_id": "1611.05962",
"title": "Word and Document Embeddings based on Neural Network Approaches",
"category": "?"
},
{
"rank": 4,
"arxiv_id": "1502.05746",
"title": "Binary Embedding: Fundamental Limits and Fast Algorithm",
"category": "?"
},
{
"rank": 5,
"arxiv_id": "1701.05291",
"title": "Heterogeneous Information Network Embedding for Meta Path based Proximity",
"category": "?"
},
{
"rank": 6,
"arxiv_id": "2309.11824",
"title": "Word Embedding with Neural Probabilistic Prior",
"category": "?"
},
{
"rank": 7,
"arxiv_id": "1412.6448",
"title": "Embedding Word Similarity with Neural Machine Translation",
"category": "?"
},
{
"rank": 8,
"arxiv_id": "1705.03556",
"title": "Relevance-based Word Embedding",
"category": "?"
},
{
"rank": 9,
"arxiv_id": "1910.03505",
"title": "Investigating the Effectiveness of Representations Based on Word-Embeddings in Active Learning for Labelling Text Datasets",
"category": "?"
},
{
"rank": 10,
"arxiv_id": "1705.07368",
"title": "Mixed Membership Word Embeddings for Computational Social Science",
"category": "?"
}
],
"expected_id": null,
"expected_found": null,
"expected_rank": null,
"topic_diversity": 0
},
{
"band": "L",
"query": "language model",
"description": "Broad \u2014 should return influential LM papers or surveys",
"rewrite": "neural language models NLP transformer architectures deep learning",
"latency_ms": 1597.907799994573,
"n_results": 10,
"results": [
{
"rank": 1,
"arxiv_id": "1606.00499",
"title": "Generalizing and Hybridizing Count-based and Neural Language Models",
"category": "?"
},
{
"rank": 2,
"arxiv_id": "2205.01398",
"title": "Neural language models for network configuration: Opportunities and reality check",
"category": "?"
},
{
"rank": 3,
"arxiv_id": "1502.01446",
"title": "Beyond Word-based Language Model in Statistical Machine Translation",
"category": "?"
},
{
"rank": 4,
"arxiv_id": "1909.04985",
"title": "Learning Dynamic Author Representations with Temporal Language Models",
"category": "?"
},
{
"rank": 5,
"arxiv_id": "2412.03220",
"title": "Survey of different Large Language Model Architectures: Trends, Benchmarks, and Challenges",
"category": "?"
},
{
"rank": 6,
"arxiv_id": "1801.08771",
"title": "Modeling of languages for tensor manipulation",
"category": "?"
},
{
"rank": 7,
"arxiv_id": "2111.00610",
"title": "Towards Language Modelling in the Speech Domain Using Sub-word Linguistic Units",
"category": "?"
},
{
"rank": 8,
"arxiv_id": "2205.05128",
"title": "Human Language Modeling",
"category": "?"
},
{
"rank": 9,
"arxiv_id": "2109.08270",
"title": "Language Models as a Knowledge Source for Cognitive Agents",
"category": "?"
},
{
"rank": 10,
"arxiv_id": "2110.10470",
"title": "Interpreting Deep Learning Models in Natural Language Processing: A Review",
"category": "?"
}
],
"expected_id": null,
"expected_found": null,
"expected_rank": null,
"topic_diversity": 0
},
{
"band": "L",
"query": "generate images from text",
"description": "Casual \u2014 should surface DALL-E, Stable Diffusion, Imagen",
"rewrite": "text-to-image synthesis generative models diffusion transformers",
"latency_ms": 1434.7260000067763,
"n_results": 10,
"results": [
{
"rank": 1,
"arxiv_id": "1806.11302",
"title": "Generate the corresponding Image from Text Description using Modified GAN-CLS Algorithm",
"category": "?"
},
{
"rank": 2,
"arxiv_id": "2309.00810",
"title": "RenAIssance: A Survey into AI Text-to-Image Generation in the Era of Large Model",
"category": "?"
},
{
"rank": 3,
"arxiv_id": "2209.10948",
"title": "Implementing and Experimenting with Diffusion Models for Text-to-Image Generation",
"category": "?"
},
{
"rank": 4,
"arxiv_id": "1910.04056",
"title": "Text-to-Image Synthesis Based on Machine Generated Captions",
"category": "?"
},
{
"rank": 5,
"arxiv_id": "2303.07909",
"title": "Text-to-image Diffusion Models in Generative AI: A Survey",
"category": "?"
},
{
"rank": 6,
"arxiv_id": "2407.06079",
"title": "Layered Diffusion Model for One-Shot High Resolution Text-to-Image Synthesis",
"category": "?"
},
{
"rank": 7,
"arxiv_id": "1904.01310",
"title": "DM-GAN: Dynamic Memory Generative Adversarial Networks for Text-to-Image Synthesis",
"category": "?"
},
{
"rank": 8,
"arxiv_id": "2505.10046",
"title": "Exploring the Deep Fusion of Large Language Models and Diffusion Transformers for Text-to-Image Synthesis",
"category": "?"
},
{
"rank": 9,
"arxiv_id": "1612.03242",
"title": "StackGAN: Text to Photo-realistic Image Synthesis with Stacked Generative Adversarial Networks",
"category": "?"
},
{
"rank": 10,
"arxiv_id": "2407.06642",
"title": "Powerful and Flexible: Personalized Text-to-Image Generation via Reinforcement Learning",
"category": "?"
}
],
"expected_id": null,
"expected_found": null,
"expected_rank": null,
"topic_diversity": 0
},
{
"band": "L",
"query": "make AI more safe",
"description": "Very casual \u2014 should surface alignment/safety papers",
"rewrite": "AI safety robustness adversarial attacks fairness transparency explainability",
"latency_ms": 1531.0878999880515,
"n_results": 10,
"results": [
{
"rank": 1,
"arxiv_id": "2504.16110",
"title": "Security-First AI: Foundations for Robust and Trustworthy Systems",
"category": "?"
},
{
"rank": 2,
"arxiv_id": "2405.06624",
"title": "Towards Guaranteed Safe AI: A Framework for Ensuring Robust and Reliable AI Systems",
"category": "?"
},
{
"rank": 3,
"arxiv_id": "2403.10462",
"title": "Safety Cases: How to Justify the Safety of Advanced AI Systems",
"category": "?"
},
{
"rank": 4,
"arxiv_id": "2309.12321",
"title": "A Case for AI Safety via Law",
"category": "?"
},
{
"rank": 5,
"arxiv_id": "2409.07878",
"title": "Mapping Technical Safety Research at AI Companies: A literature review and incentives analysis",
"category": "?"
},
{
"rank": 6,
"arxiv_id": "2408.12935",
"title": "Trustworthy, Responsible, and Safe AI: A Comprehensive Architectural Framework for AI Safety with Challenges and Mitigations",
"category": "?"
},
{
"rank": 7,
"arxiv_id": "2411.18526",
"title": "NeuroAI for AI Safety",
"category": "?"
},
{
"rank": 8,
"arxiv_id": "2201.10436",
"title": "Safe AI -- How is this Possible?",
"category": "?"
},
{
"rank": 9,
"arxiv_id": "2502.09288",
"title": "AI Safety for Everyone",
"category": "?"
},
{
"rank": 10,
"arxiv_id": "2410.21572",
"title": "Safety cases for frontier AI",
"category": "?"
}
],
"expected_id": null,
"expected_found": null,
"expected_rank": null,
"topic_diversity": 0
},
{
"band": "M",
"query": "improvements to the original transformer architecture",
"description": "Post-Vaswani improvements \u2014 Reformer, Performer, ALiBi, RoPE",
"rewrite": "Transformer-XL BERT RoBERTa DistilBERT efficient attention mechanisms",
"latency_ms": 1451.1854000156745,
"n_results": 10,
"results": [
{
"rank": 1,
"arxiv_id": "2410.23182",
"title": "ProTransformer: Robustify Transformers via Plug-and-Play Paradigm",
"category": "?"
},
{
"rank": 2,
"arxiv_id": "2305.14768",
"title": "Dual Path Transformer with Partition Attention",
"category": "?"
},
{
"rank": 3,
"arxiv_id": "2503.18565",
"title": "Distil-xLSTM: Learning Attention Mechanisms through Recurrent Structures",
"category": "?"
},
{
"rank": 4,
"arxiv_id": "2503.15983",
"title": "InhibiDistilbert: Knowledge Distillation for a ReLU and Addition-based Transformer",
"category": "?"
},
{
"rank": 5,
"arxiv_id": "2501.06098",
"title": "ELFATT: Efficient Linear Fast Attention for Vision Transformers",
"category": "?"
},
{
"rank": 6,
"arxiv_id": "2311.12678",
"title": "Interpretation of the Transformer and Improvement of the Extractor",
"category": "?"
},
{
"rank": 7,
"arxiv_id": "2004.05150",
"title": "Longformer: The Long-Document Transformer",
"category": "?"
},
{
"rank": 8,
"arxiv_id": "2503.16726",
"title": "EDiT: Efficient Diffusion Transformers with Linear Compressed Attention",
"category": "?"
},
{
"rank": 9,
"arxiv_id": "1906.05714",
"title": "A Multiscale Visualization of Attention in the Transformer Model",
"category": "?"
},
{
"rank": 10,
"arxiv_id": "2210.11909",
"title": "Boosting vision transformers for image retrieval",
"category": "?"
}
],
"expected_id": null,
"expected_found": null,
"expected_rank": null,
"topic_diversity": 0
},
{
"band": "M",
"query": "papers that cite ResNet and extend residual connections",
"description": "ResNet extensions \u2014 DenseNet, ResNeXt, WideResNet, SE-Net",
"rewrite": null,
"latency_ms": 859.7527000238188,
"n_results": 10,
"results": [
{
"rank": 1,
"arxiv_id": "1904.01367",
"title": "Why ResNet Works? Residuals Generalize",
"category": "?"
},
{
"rank": 2,
"arxiv_id": "1805.07477",
"title": "Norm-Preservation: Why Residual Networks Can Become Extremely Deep?",
"category": "?"
},
{
"rank": 3,
"arxiv_id": "1910.09599",
"title": "On the space-time expressivity of ResNets",
"category": "?"
},
{
"rank": 4,
"arxiv_id": "1707.08866",
"title": "Deep Residual Learning for Weakly-Supervised Relation Extraction",
"category": "?"
},
{
"rank": 5,
"arxiv_id": "1804.10123",
"title": "IamNN: Iterative and Adaptive Mobile Neural Network for Efficient Image Classification",
"category": "?"
},
{
"rank": 6,
"arxiv_id": "1905.10944",
"title": "Identity Connections in Residual Nets Improve Noise Stability",
"category": "?"
},
{
"rank": 7,
"arxiv_id": "1611.01186",
"title": "Demystifying ResNet",
"category": "?"
},
{
"rank": 8,
"arxiv_id": "1805.09585",
"title": "Residual Networks as Geodesic Flows of Diffeomorphisms",
"category": "?"
},
{
"rank": 9,
"arxiv_id": "2109.12556",
"title": "Frequency Disentangled Residual Network",
"category": "?"
},
{
"rank": 10,
"arxiv_id": "2004.04989",
"title": "Improved Residual Networks for Image and Video Recognition",
"category": "?"
}
],
"expected_id": null,
"expected_found": null,
"expected_rank": null,
"topic_diversity": 0
},
{
"band": "M",
"query": "alternatives to RLHF for aligning language models",
"description": "DPO, SPIN, KTO \u2014 methods that bypass reward modeling",
"rewrite": null,
"latency_ms": 1012.4404999660328,
"n_results": 10,
"results": [
{
"rank": 1,
"arxiv_id": "2406.15567",
"title": "SAIL: Self-Improving Efficient Online Alignment of Large Language Models",
"category": "?"
},
{
"rank": 2,
"arxiv_id": "2302.05206",
"title": "The Wisdom of Hindsight Makes Language Models Better Instruction Followers",
"category": "?"
},
{
"rank": 3,
"arxiv_id": "2305.06176",
"title": "Fine-tuning Language Models with Generative Adversarial Reward Modelling",
"category": "?"
},
{
"rank": 4,
"arxiv_id": "2408.14874",
"title": "Inverse-Q*: Token Level Reinforcement Learning for Aligning Large Language Models Without Preference Data",
"category": "?"
},
{
"rank": 5,
"arxiv_id": "2406.05587",
"title": "Creativity Has Left the Chat: The Price of Debiasing Language Models",
"category": "?"
},
{
"rank": 6,
"arxiv_id": "2407.04181",
"title": "Orchestrating LLMs with Different Personalizations",
"category": "?"
},
{
"rank": 7,
"arxiv_id": "2401.11458",
"title": "Linear Alignment: A Closed-form Solution for Aligning Human Preferences without Tuning and Feedback",
"category": "?"
},
{
"rank": 8,
"arxiv_id": "2403.16649",
"title": "CLHA: A Simple yet Effective Contrastive Learning Framework for Human Alignment",
"category": "?"
},
{
"rank": 9,
"arxiv_id": "2405.17956",
"title": "Unified Preference Optimization: Language Model Alignment Beyond the Preference Frontier",
"category": "?"
},
{
"rank": 10,
"arxiv_id": "2306.02231",
"title": "Fine-Tuning Language Models with Advantage-Induced Policy Alignment",
"category": "?"
}
],
"expected_id": null,
"expected_found": null,
"expected_rank": null,
"topic_diversity": 0
},
{
"band": "M",
"query": "BERT variants for low resource languages",
"description": "mBERT, XLM-R, AfricanBERT, ArabBERT \u2014 multilingual BERT variants",
"rewrite": "Multilingual BERT low-resource languages adaptation transfer learning",
"latency_ms": 1428.8215999840759,
"n_results": 10,
"results": [
{
"rank": 1,
"arxiv_id": "2012.02462",
"title": "Fine-tuning BERT for Low-Resource Natural Language Understanding via Active Learning",
"category": "?"
},
{
"rank": 2,
"arxiv_id": "2402.14408",
"title": "Transferring BERT Capabilities from High-Resource to Low-Resource Languages Using Vocabulary Matching",
"category": "?"
},
{
"rank": 3,
"arxiv_id": "2204.05814",
"title": "MuCoT: Multilingual Contrastive Training for Question-Answering in Low-resource Languages",
"category": "?"
},
{
"rank": 4,
"arxiv_id": "2004.13640",
"title": "Extending Multilingual BERT to Low-Resource Languages",
"category": "?"
},
{
"rank": 5,
"arxiv_id": "2005.09093",
"title": "Are All Languages Created Equal in Multilingual BERT?",
"category": "?"
},
{
"rank": 6,
"arxiv_id": "2012.15562",
"title": "UNKs Everywhere: Adapting Multilingual Language Models to New Scripts",
"category": "?"
},
{
"rank": 7,
"arxiv_id": "2409.10965",
"title": "Cross-lingual transfer of multilingual models on low resource African Languages",
"category": "?"
},
{
"rank": 8,
"arxiv_id": "2212.12510",
"title": "MicroBERT: Effective Training of Low-resource Monolingual BERTs through Parameter Reduction and Multitask Learning",
"category": "?"
},
{
"rank": 9,
"arxiv_id": "2408.03172",
"title": "Leveraging Parameter Efficient Training Methods for Low Resource Text Classification: A Case Study in Marathi",
"category": "?"
},
{
"rank": 10,
"arxiv_id": "1907.00409",
"title": "Evaluating Language Model Finetuning Techniques for Low-resource Languages",
"category": "?"
}
],
"expected_id": null,
"expected_found": null,
"expected_rank": null,
"topic_diversity": 0
}
]