Spaces:
Sleeping
Sleeping
File size: 8,502 Bytes
a0f27fa 430d0f8 a0f27fa 430d0f8 a0f27fa 430d0f8 a0f27fa 430d0f8 a0f27fa 430d0f8 a0f27fa 430d0f8 a0f27fa 430d0f8 a0f27fa 430d0f8 a0f27fa 430d0f8 a0f27fa 430d0f8 a0f27fa 430d0f8 a0f27fa 430d0f8 a0f27fa 430d0f8 a0f27fa 430d0f8 a0f27fa 430d0f8 a0f27fa 430d0f8 a0f27fa 430d0f8 a0f27fa 430d0f8 a0f27fa 430d0f8 a0f27fa 430d0f8 a0f27fa 430d0f8 a0f27fa 430d0f8 a0f27fa 430d0f8 a0f27fa 430d0f8 a0f27fa 430d0f8 a0f27fa 430d0f8 a0f27fa 430d0f8 a0f27fa 430d0f8 a0f27fa 430d0f8 a0f27fa 430d0f8 a0f27fa | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 | [
{
"arxiv_id": "2401.04088",
"title": "DeepSeek-Coder-V2: Breaking the Barrier of Closed-Source Models in Code Intelligence",
"domain": "aiml",
"categories": ["cs.CL", "cs.SE", "cs.AI"],
"summary": "Open-source code LLM matching GPT-4 Turbo on coding benchmarks with MoE architecture."
},
{
"arxiv_id": "2403.05530",
"title": "GaLore: Memory-Efficient LLM Training by Gradient Low-Rank Projection",
"domain": "aiml",
"categories": ["cs.LG", "cs.CL"],
"summary": "Reduces memory usage for LLM training via gradient projection, enabling 7B training on consumer GPUs."
},
{
"arxiv_id": "2402.13616",
"title": "World Model on Million-Length Video and Language with RingAttention",
"domain": "aiml",
"categories": ["cs.CV", "cs.CL", "cs.LG"],
"summary": "Trains world models on million-token video sequences using ring attention for long context."
},
{
"arxiv_id": "2403.03206",
"title": "The Claude 3 Model Family",
"domain": "aiml",
"categories": ["cs.CL", "cs.AI"],
"summary": "Multimodal LLM family with strong vision capabilities and extended context windows."
},
{
"arxiv_id": "2402.17764",
"title": "Sora: A Review on Background, Technology, Limitations, and Opportunities",
"domain": "aiml",
"categories": ["cs.CV", "cs.AI"],
"summary": "Analysis of video generation model capabilities, architecture, and limitations."
},
{
"arxiv_id": "2401.02954",
"title": "MoE-Mamba: Efficient Selective State Space Models with Mixture of Experts",
"domain": "aiml",
"categories": ["cs.LG", "cs.CL"],
"summary": "Combines Mamba state-space model with mixture-of-experts for efficient scaling."
},
{
"arxiv_id": "2403.09611",
"title": "Quiet-STaR: Language Models Can Teach Themselves to Think Before Speaking",
"domain": "aiml",
"categories": ["cs.CL", "cs.AI", "cs.LG"],
"summary": "Self-taught reasoning where LLMs learn to generate internal rationale tokens."
},
{
"arxiv_id": "2402.01032",
"title": "OLMo: Accelerating the Science of Language Models",
"domain": "aiml",
"categories": ["cs.CL", "cs.AI"],
"summary": "Fully open-source LLM with released weights, code, data, and training logs."
},
{
"arxiv_id": "2403.14608",
"title": "ReALM: Reference Resolution As Language Modeling",
"domain": "aiml",
"categories": ["cs.CL", "cs.AI"],
"summary": "Resolves onscreen and conversational references using LLMs for device agents."
},
{
"arxiv_id": "2402.14261",
"title": "Griffin: Mixing Gated Linear Recurrences with Local Attention for Efficient Language Models",
"domain": "aiml",
"categories": ["cs.LG", "cs.CL"],
"summary": "Hybrid architecture combining gated linear RNNs with local attention, matching transformer quality."
},
{
"arxiv_id": "2401.14196",
"title": "GPTQ: Accurate Post-Training Quantization for Generative Pre-trained Transformers",
"domain": "aiml",
"categories": ["cs.LG", "cs.CL"],
"summary": "One-shot quantization method reducing LLM size to 3-4 bits with minimal accuracy loss."
},
{
"arxiv_id": "2403.07691",
"title": "Stealing Part of a Production Language Model",
"domain": "security",
"categories": ["cs.CR", "cs.LG", "cs.AI"],
"summary": "Extracts internal architecture details from production LLM APIs through crafted queries."
},
{
"arxiv_id": "2402.06132",
"title": "SoK: Where's the Bug? A Study of Bug Localization Tools",
"domain": "security",
"categories": ["cs.CR", "cs.SE"],
"summary": "Systematizes bug localization approaches and evaluates 23 tools on real-world CVEs."
},
{
"arxiv_id": "2401.16727",
"title": "A Survey of Side-Channel Attacks on Intel SGX",
"domain": "security",
"categories": ["cs.CR"],
"summary": "Comprehensive analysis of side-channel attacks targeting Intel SGX enclaves."
},
{
"arxiv_id": "2403.02783",
"title": "SyzVegas: Beating Kernel Fuzzing Odds with Reinforcement Learning",
"domain": "security",
"categories": ["cs.CR", "cs.SE"],
"summary": "RL-guided kernel fuzzer that outperforms Syzkaller in bug discovery rate."
},
{
"arxiv_id": "2402.15483",
"title": "BSIMM: An Empirical Study of 130 Software Security Programs",
"domain": "security",
"categories": ["cs.CR", "cs.SE"],
"summary": "Large-scale study of enterprise security maturity across 130 organizations."
},
{
"arxiv_id": "2403.14469",
"title": "Reverse Engineering eBPF Programs: Challenges and Approaches",
"domain": "security",
"categories": ["cs.CR", "cs.OS"],
"summary": "Novel techniques for reverse engineering eBPF bytecode in Linux kernel security."
},
{
"arxiv_id": "2401.09577",
"title": "WiFi-Based Keystroke Inference Attack Using Adversarial CSI Perturbation",
"domain": "security",
"categories": ["cs.CR", "cs.NI"],
"summary": "Exploits WiFi channel state information to infer keystrokes from nearby devices."
},
{
"arxiv_id": "2402.08787",
"title": "Binary Code Similarity Detection via Graph Neural Networks",
"domain": "security",
"categories": ["cs.CR", "cs.SE", "cs.LG"],
"summary": "GNN-based approach to detect similar binary functions across compilers and architectures."
},
{
"arxiv_id": "2403.01218",
"title": "Practical Exploitation of DNS Rebinding in IoT Devices",
"domain": "security",
"categories": ["cs.CR", "cs.NI"],
"summary": "Demonstrates DNS rebinding attacks against 15 popular IoT devices in home networks."
},
{
"arxiv_id": "2401.15491",
"title": "GPU.zip: Side Channel Attacks on GPU-Based Graphical Data Compression",
"domain": "security",
"categories": ["cs.CR"],
"summary": "First cross-origin pixel-stealing attack through GPU hardware data compression."
},
{
"arxiv_id": "2402.03367",
"title": "CryptoFuzz: Fully Automated Testing of Cryptographic API Misuse",
"domain": "security",
"categories": ["cs.CR", "cs.SE"],
"summary": "Automated fuzzer detecting cryptographic API misuse patterns in Java applications."
},
{
"arxiv_id": "2403.08946",
"title": "Video Generation Models as World Simulators",
"domain": "aiml",
"categories": ["cs.CV", "cs.AI", "cs.LG"],
"summary": "Explores how video generation models learn physical world dynamics as implicit simulators."
},
{
"arxiv_id": "2402.05929",
"title": "V-JEPA: Video Joint Embedding Predictive Architecture",
"domain": "aiml",
"categories": ["cs.CV", "cs.LG"],
"summary": "Self-supervised video representation learning that predicts in latent space rather than pixel space."
},
{
"arxiv_id": "2401.10020",
"title": "AlphaGeometry: Solving Olympiad Geometry without Human Demonstrations",
"domain": "aiml",
"categories": ["cs.AI", "cs.LG"],
"summary": "AI system solving IMO-level geometry problems through neurosymbolic reasoning."
},
{
"arxiv_id": "2403.04132",
"title": "Design2Code: How Far Are We From Automating Front-End Engineering?",
"domain": "aiml",
"categories": ["cs.CV", "cs.CL", "cs.SE"],
"summary": "Benchmarks multimodal LLMs on converting visual designs to functional HTML/CSS code."
},
{
"arxiv_id": "2402.14905",
"title": "YOLOv9: Learning What You Want to Learn Using Programmable Gradient Information",
"domain": "aiml",
"categories": ["cs.CV"],
"summary": "New YOLO architecture using programmable gradient information for better object detection."
},
{
"arxiv_id": "2401.06066",
"title": "MagicVideo-V2: Multi-Stage High-Aesthetic Video Generation",
"domain": "aiml",
"categories": ["cs.CV", "cs.AI"],
"summary": "Multi-stage video generation pipeline producing high-quality aesthetic videos from text."
},
{
"arxiv_id": "2402.01680",
"title": "Grandmaster-Level Chess Without Search",
"domain": "aiml",
"categories": ["cs.AI", "cs.LG"],
"summary": "Transformer achieving grandmaster chess play through pure pattern recognition without tree search."
},
{
"arxiv_id": "2403.04706",
"title": "SWE-agent: Agent-Computer Interfaces Enable Automated Software Engineering",
"domain": "aiml",
"categories": ["cs.SE", "cs.CL", "cs.AI"],
"summary": "LLM agent that autonomously fixes GitHub issues by interacting with code repositories."
}
]
|