[ { "arxiv_id": "2401.04088", "title": "DeepSeek-Coder-V2: Breaking the Barrier of Closed-Source Models in Code Intelligence", "domain": "aiml", "categories": ["cs.CL", "cs.SE", "cs.AI"], "summary": "Open-source code LLM matching GPT-4 Turbo on coding benchmarks with MoE architecture." }, { "arxiv_id": "2403.05530", "title": "GaLore: Memory-Efficient LLM Training by Gradient Low-Rank Projection", "domain": "aiml", "categories": ["cs.LG", "cs.CL"], "summary": "Reduces memory usage for LLM training via gradient projection, enabling 7B training on consumer GPUs." }, { "arxiv_id": "2402.13616", "title": "World Model on Million-Length Video and Language with RingAttention", "domain": "aiml", "categories": ["cs.CV", "cs.CL", "cs.LG"], "summary": "Trains world models on million-token video sequences using ring attention for long context." }, { "arxiv_id": "2403.03206", "title": "The Claude 3 Model Family", "domain": "aiml", "categories": ["cs.CL", "cs.AI"], "summary": "Multimodal LLM family with strong vision capabilities and extended context windows." }, { "arxiv_id": "2402.17764", "title": "Sora: A Review on Background, Technology, Limitations, and Opportunities", "domain": "aiml", "categories": ["cs.CV", "cs.AI"], "summary": "Analysis of video generation model capabilities, architecture, and limitations." }, { "arxiv_id": "2401.02954", "title": "MoE-Mamba: Efficient Selective State Space Models with Mixture of Experts", "domain": "aiml", "categories": ["cs.LG", "cs.CL"], "summary": "Combines Mamba state-space model with mixture-of-experts for efficient scaling." }, { "arxiv_id": "2403.09611", "title": "Quiet-STaR: Language Models Can Teach Themselves to Think Before Speaking", "domain": "aiml", "categories": ["cs.CL", "cs.AI", "cs.LG"], "summary": "Self-taught reasoning where LLMs learn to generate internal rationale tokens." }, { "arxiv_id": "2402.01032", "title": "OLMo: Accelerating the Science of Language Models", "domain": "aiml", "categories": ["cs.CL", "cs.AI"], "summary": "Fully open-source LLM with released weights, code, data, and training logs." }, { "arxiv_id": "2403.14608", "title": "ReALM: Reference Resolution As Language Modeling", "domain": "aiml", "categories": ["cs.CL", "cs.AI"], "summary": "Resolves onscreen and conversational references using LLMs for device agents." }, { "arxiv_id": "2402.14261", "title": "Griffin: Mixing Gated Linear Recurrences with Local Attention for Efficient Language Models", "domain": "aiml", "categories": ["cs.LG", "cs.CL"], "summary": "Hybrid architecture combining gated linear RNNs with local attention, matching transformer quality." }, { "arxiv_id": "2401.14196", "title": "GPTQ: Accurate Post-Training Quantization for Generative Pre-trained Transformers", "domain": "aiml", "categories": ["cs.LG", "cs.CL"], "summary": "One-shot quantization method reducing LLM size to 3-4 bits with minimal accuracy loss." }, { "arxiv_id": "2403.07691", "title": "Stealing Part of a Production Language Model", "domain": "security", "categories": ["cs.CR", "cs.LG", "cs.AI"], "summary": "Extracts internal architecture details from production LLM APIs through crafted queries." }, { "arxiv_id": "2402.06132", "title": "SoK: Where's the Bug? A Study of Bug Localization Tools", "domain": "security", "categories": ["cs.CR", "cs.SE"], "summary": "Systematizes bug localization approaches and evaluates 23 tools on real-world CVEs." }, { "arxiv_id": "2401.16727", "title": "A Survey of Side-Channel Attacks on Intel SGX", "domain": "security", "categories": ["cs.CR"], "summary": "Comprehensive analysis of side-channel attacks targeting Intel SGX enclaves." }, { "arxiv_id": "2403.02783", "title": "SyzVegas: Beating Kernel Fuzzing Odds with Reinforcement Learning", "domain": "security", "categories": ["cs.CR", "cs.SE"], "summary": "RL-guided kernel fuzzer that outperforms Syzkaller in bug discovery rate." }, { "arxiv_id": "2402.15483", "title": "BSIMM: An Empirical Study of 130 Software Security Programs", "domain": "security", "categories": ["cs.CR", "cs.SE"], "summary": "Large-scale study of enterprise security maturity across 130 organizations." }, { "arxiv_id": "2403.14469", "title": "Reverse Engineering eBPF Programs: Challenges and Approaches", "domain": "security", "categories": ["cs.CR", "cs.OS"], "summary": "Novel techniques for reverse engineering eBPF bytecode in Linux kernel security." }, { "arxiv_id": "2401.09577", "title": "WiFi-Based Keystroke Inference Attack Using Adversarial CSI Perturbation", "domain": "security", "categories": ["cs.CR", "cs.NI"], "summary": "Exploits WiFi channel state information to infer keystrokes from nearby devices." }, { "arxiv_id": "2402.08787", "title": "Binary Code Similarity Detection via Graph Neural Networks", "domain": "security", "categories": ["cs.CR", "cs.SE", "cs.LG"], "summary": "GNN-based approach to detect similar binary functions across compilers and architectures." }, { "arxiv_id": "2403.01218", "title": "Practical Exploitation of DNS Rebinding in IoT Devices", "domain": "security", "categories": ["cs.CR", "cs.NI"], "summary": "Demonstrates DNS rebinding attacks against 15 popular IoT devices in home networks." }, { "arxiv_id": "2401.15491", "title": "GPU.zip: Side Channel Attacks on GPU-Based Graphical Data Compression", "domain": "security", "categories": ["cs.CR"], "summary": "First cross-origin pixel-stealing attack through GPU hardware data compression." }, { "arxiv_id": "2402.03367", "title": "CryptoFuzz: Fully Automated Testing of Cryptographic API Misuse", "domain": "security", "categories": ["cs.CR", "cs.SE"], "summary": "Automated fuzzer detecting cryptographic API misuse patterns in Java applications." }, { "arxiv_id": "2403.08946", "title": "Video Generation Models as World Simulators", "domain": "aiml", "categories": ["cs.CV", "cs.AI", "cs.LG"], "summary": "Explores how video generation models learn physical world dynamics as implicit simulators." }, { "arxiv_id": "2402.05929", "title": "V-JEPA: Video Joint Embedding Predictive Architecture", "domain": "aiml", "categories": ["cs.CV", "cs.LG"], "summary": "Self-supervised video representation learning that predicts in latent space rather than pixel space." }, { "arxiv_id": "2401.10020", "title": "AlphaGeometry: Solving Olympiad Geometry without Human Demonstrations", "domain": "aiml", "categories": ["cs.AI", "cs.LG"], "summary": "AI system solving IMO-level geometry problems through neurosymbolic reasoning." }, { "arxiv_id": "2403.04132", "title": "Design2Code: How Far Are We From Automating Front-End Engineering?", "domain": "aiml", "categories": ["cs.CV", "cs.CL", "cs.SE"], "summary": "Benchmarks multimodal LLMs on converting visual designs to functional HTML/CSS code." }, { "arxiv_id": "2402.14905", "title": "YOLOv9: Learning What You Want to Learn Using Programmable Gradient Information", "domain": "aiml", "categories": ["cs.CV"], "summary": "New YOLO architecture using programmable gradient information for better object detection." }, { "arxiv_id": "2401.06066", "title": "MagicVideo-V2: Multi-Stage High-Aesthetic Video Generation", "domain": "aiml", "categories": ["cs.CV", "cs.AI"], "summary": "Multi-stage video generation pipeline producing high-quality aesthetic videos from text." }, { "arxiv_id": "2402.01680", "title": "Grandmaster-Level Chess Without Search", "domain": "aiml", "categories": ["cs.AI", "cs.LG"], "summary": "Transformer achieving grandmaster chess play through pure pattern recognition without tree search." }, { "arxiv_id": "2403.04706", "title": "SWE-agent: Agent-Computer Interfaces Enable Automated Software Engineering", "domain": "aiml", "categories": ["cs.SE", "cs.CL", "cs.AI"], "summary": "LLM agent that autonomously fixes GitHub issues by interacting with code repositories." } ]