|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| LABELS = (
|
| "contribution",
|
| "method",
|
| "result",
|
| "limitation",
|
| "motivation",
|
| "related_work",
|
| )
|
|
|
|
|
| ML_EXAMPLES = [
|
|
|
| {
|
| "sentence": "We propose a new simple network architecture, the Transformer, based solely on attention mechanisms, dispensing with recurrence and convolutions entirely.",
|
| "paper_id": "vaswani-2017-attention",
|
| "paper_title": "Attention Is All You Need",
|
| "year": 2017,
|
| "label": "contribution",
|
| },
|
| {
|
| "sentence": "The Transformer follows an encoder-decoder structure using stacked self-attention and point-wise fully connected layers for both the encoder and decoder.",
|
| "paper_id": "vaswani-2017-attention",
|
| "paper_title": "Attention Is All You Need",
|
| "year": 2017,
|
| "label": "method",
|
| },
|
| {
|
| "sentence": "Our model achieves 28.4 BLEU on the WMT 2014 English-to-German translation task, improving over the existing best results by over 2 BLEU.",
|
| "paper_id": "vaswani-2017-attention",
|
| "paper_title": "Attention Is All You Need",
|
| "year": 2017,
|
| "label": "result",
|
| },
|
|
|
|
|
| {
|
| "sentence": "BERT is designed to pre-train deep bidirectional representations from unlabeled text by jointly conditioning on both left and right context in all layers.",
|
| "paper_id": "devlin-2018-bert",
|
| "paper_title": "BERT: Pre-training of Deep Bidirectional Transformers for Language Understanding",
|
| "year": 2018,
|
| "label": "method",
|
| },
|
| {
|
| "sentence": "BERT advances the state of the art for eleven NLP tasks, pushing the GLUE score to 80.5 percent and SQuAD v1.1 F1 to 93.2.",
|
| "paper_id": "devlin-2018-bert",
|
| "paper_title": "BERT: Pre-training of Deep Bidirectional Transformers for Language Understanding",
|
| "year": 2018,
|
| "label": "result",
|
| },
|
|
|
|
|
| {
|
| "sentence": "Scaling up language models greatly improves task-agnostic, few-shot performance, sometimes reaching competitiveness with prior fine-tuning approaches.",
|
| "paper_id": "brown-2020-gpt3",
|
| "paper_title": "Language Models are Few-Shot Learners",
|
| "year": 2020,
|
| "label": "contribution",
|
| },
|
| {
|
| "sentence": "We train GPT-3, an autoregressive language model with 175 billion parameters, 10x more than any previous non-sparse language model.",
|
| "paper_id": "brown-2020-gpt3",
|
| "paper_title": "Language Models are Few-Shot Learners",
|
| "year": 2020,
|
| "label": "method",
|
| },
|
| {
|
| "sentence": "GPT-3 still has notable weaknesses in text synthesis and several NLP tasks, particularly those requiring reasoning over long passages.",
|
| "paper_id": "brown-2020-gpt3",
|
| "paper_title": "Language Models are Few-Shot Learners",
|
| "year": 2020,
|
| "label": "limitation",
|
| },
|
|
|
|
|
| {
|
| "sentence": "Deeper neural networks are more difficult to train, and simply stacking more layers eventually degrades accuracy rather than improving it.",
|
| "paper_id": "he-2015-resnet",
|
| "paper_title": "Deep Residual Learning for Image Recognition",
|
| "year": 2015,
|
| "label": "motivation",
|
| },
|
| {
|
| "sentence": "We present a residual learning framework to ease the training of networks that are substantially deeper than those used previously.",
|
| "paper_id": "he-2015-resnet",
|
| "paper_title": "Deep Residual Learning for Image Recognition",
|
| "year": 2015,
|
| "label": "contribution",
|
| },
|
| {
|
| "sentence": "An ensemble of these residual nets achieves 3.57 percent error on the ImageNet test set.",
|
| "paper_id": "he-2015-resnet",
|
| "paper_title": "Deep Residual Learning for Image Recognition",
|
| "year": 2015,
|
| "label": "result",
|
| },
|
|
|
|
|
| {
|
| "sentence": "We introduce a new approach to computer Go using value networks to evaluate board positions and policy networks to select moves.",
|
| "paper_id": "silver-2016-alphago",
|
| "paper_title": "Mastering the game of Go with deep neural networks and tree search",
|
| "year": 2016,
|
| "label": "contribution",
|
| },
|
| {
|
| "sentence": "AlphaGo defeated the European champion Fan Hui by five games to zero, the first time a computer program has defeated a human professional on a full board.",
|
| "paper_id": "silver-2016-alphago",
|
| "paper_title": "Mastering the game of Go with deep neural networks and tree search",
|
| "year": 2016,
|
| "label": "result",
|
| },
|
|
|
|
|
| {
|
| "sentence": "Learning directly from raw text about images is a promising alternative which leverages a much broader source of supervision.",
|
| "paper_id": "radford-2021-clip",
|
| "paper_title": "Learning Transferable Visual Models From Natural Language Supervision",
|
| "year": 2021,
|
| "label": "motivation",
|
| },
|
| {
|
| "sentence": "We demonstrate that predicting which caption goes with which image is an efficient and scalable way to learn image representations from scratch.",
|
| "paper_id": "radford-2021-clip",
|
| "paper_title": "Learning Transferable Visual Models From Natural Language Supervision",
|
| "year": 2021,
|
| "label": "method",
|
| },
|
| {
|
| "sentence": "CLIP matches the accuracy of the original ResNet-50 on ImageNet zero-shot without using any of the 1.28 million original labeled training examples.",
|
| "paper_id": "radford-2021-clip",
|
| "paper_title": "Learning Transferable Visual Models From Natural Language Supervision",
|
| "year": 2021,
|
| "label": "result",
|
| },
|
|
|
|
|
| {
|
| "sentence": "Fine-tuning large pretrained models is often infeasible because it requires storing and deploying a separate set of parameters for every downstream task.",
|
| "paper_id": "hu-2021-lora",
|
| "paper_title": "LoRA: Low-Rank Adaptation of Large Language Models",
|
| "year": 2021,
|
| "label": "motivation",
|
| },
|
| {
|
| "sentence": "LoRA freezes pretrained model weights and injects trainable rank decomposition matrices into each Transformer layer, reducing trainable parameters by up to 10000x.",
|
| "paper_id": "hu-2021-lora",
|
| "paper_title": "LoRA: Low-Rank Adaptation of Large Language Models",
|
| "year": 2021,
|
| "label": "method",
|
| },
|
|
|
|
|
| {
|
| "sentence": "We introduce LLaMA, a collection of foundation language models ranging from 7B to 65B parameters, trained on trillions of tokens using only publicly available datasets.",
|
| "paper_id": "touvron-2023-llama",
|
| "paper_title": "LLaMA: Open and Efficient Foundation Language Models",
|
| "year": 2023,
|
| "label": "contribution",
|
| },
|
| {
|
| "sentence": "LLaMA-13B outperforms GPT-3 on most benchmarks despite being more than 10x smaller.",
|
| "paper_id": "touvron-2023-llama",
|
| "paper_title": "LLaMA: Open and Efficient Foundation Language Models",
|
| "year": 2023,
|
| "label": "result",
|
| },
|
| ]
|
|
|
|
|
|
|
|
|
|
|
| def search_examples(query):
|
| """Naive case-insensitive text match across sentence and paper title."""
|
| q = (query or "").lower().strip()
|
| if not q:
|
| return []
|
| return [
|
| e for e in ML_EXAMPLES
|
| if q in e["sentence"].lower() or q in e["paper_title"].lower()
|
| ]
|
|
|
|
|
| def get_paper_info(paper_id):
|
| """Return paper metadata (title, year, sentence count) for a given paper_id."""
|
| matches = [e for e in ML_EXAMPLES if e["paper_id"] == paper_id]
|
| if not matches:
|
| return None
|
| return {
|
| "paper_id": paper_id,
|
| "title": matches[0]["paper_title"],
|
| "year": matches[0]["year"],
|
| "sentence_count": len(matches),
|
| }
|
|
|
|
|
| def list_papers():
|
| """Return one dict per unique paper, sorted by year."""
|
| papers = {}
|
| for e in ML_EXAMPLES:
|
| pid = e["paper_id"]
|
| if pid not in papers:
|
| papers[pid] = {
|
| "paper_id": pid,
|
| "title": e["paper_title"],
|
| "year": e["year"],
|
| "sentence_count": 0,
|
| }
|
| papers[pid]["sentence_count"] += 1
|
| return sorted(papers.values(), key=lambda p: p["year"])
|
|
|