# ============================================================================ # examples.py — built-in labeled ML paper sentences # ============================================================================ # # PURPOSE # ------- # A tiny dataset of labeled sentences drawn from well-known machine learning # papers. Used in three places in the demo: # # 1. As TOOLS the agent can call (search, lookup, list) — see tools.py # 2. As a DATA SOURCE students can load as context — see app.py # 3. As the reference vocabulary for the CLASSIFY mode — see agent.py # # The same dataset feeds all three, so students can ask the same question # three different ways and compare the approaches side-by-side in the # Results tab. # # SCHEMA — each entry is a dict with exactly five keys: # sentence (str) the actual text # paper_id (str) stable slug "author-year-keyword" # paper_title (str) human-readable title # year (int) publication year # label (str) one of LABELS below # ============================================================================ # Closed vocabulary for classification. Keep this short — six labels is # enough to be interesting and few enough that students can remember them. LABELS = ( "contribution", # the paper's main claim ("we propose...") "method", # how the approach works "result", # a numerical or benchmark result "limitation", # a weakness or failure mode the paper admits "motivation", # why the problem matters "related_work", # a reference to prior work ) ML_EXAMPLES = [ # Attention Is All You Need (Vaswani 2017) { "sentence": "We propose a new simple network architecture, the Transformer, based solely on attention mechanisms, dispensing with recurrence and convolutions entirely.", "paper_id": "vaswani-2017-attention", "paper_title": "Attention Is All You Need", "year": 2017, "label": "contribution", }, { "sentence": "The Transformer follows an encoder-decoder structure using stacked self-attention and point-wise fully connected layers for both the encoder and decoder.", "paper_id": "vaswani-2017-attention", "paper_title": "Attention Is All You Need", "year": 2017, "label": "method", }, { "sentence": "Our model achieves 28.4 BLEU on the WMT 2014 English-to-German translation task, improving over the existing best results by over 2 BLEU.", "paper_id": "vaswani-2017-attention", "paper_title": "Attention Is All You Need", "year": 2017, "label": "result", }, # BERT (Devlin 2018) { "sentence": "BERT is designed to pre-train deep bidirectional representations from unlabeled text by jointly conditioning on both left and right context in all layers.", "paper_id": "devlin-2018-bert", "paper_title": "BERT: Pre-training of Deep Bidirectional Transformers for Language Understanding", "year": 2018, "label": "method", }, { "sentence": "BERT advances the state of the art for eleven NLP tasks, pushing the GLUE score to 80.5 percent and SQuAD v1.1 F1 to 93.2.", "paper_id": "devlin-2018-bert", "paper_title": "BERT: Pre-training of Deep Bidirectional Transformers for Language Understanding", "year": 2018, "label": "result", }, # GPT-3 (Brown 2020) { "sentence": "Scaling up language models greatly improves task-agnostic, few-shot performance, sometimes reaching competitiveness with prior fine-tuning approaches.", "paper_id": "brown-2020-gpt3", "paper_title": "Language Models are Few-Shot Learners", "year": 2020, "label": "contribution", }, { "sentence": "We train GPT-3, an autoregressive language model with 175 billion parameters, 10x more than any previous non-sparse language model.", "paper_id": "brown-2020-gpt3", "paper_title": "Language Models are Few-Shot Learners", "year": 2020, "label": "method", }, { "sentence": "GPT-3 still has notable weaknesses in text synthesis and several NLP tasks, particularly those requiring reasoning over long passages.", "paper_id": "brown-2020-gpt3", "paper_title": "Language Models are Few-Shot Learners", "year": 2020, "label": "limitation", }, # ResNet (He 2015) { "sentence": "Deeper neural networks are more difficult to train, and simply stacking more layers eventually degrades accuracy rather than improving it.", "paper_id": "he-2015-resnet", "paper_title": "Deep Residual Learning for Image Recognition", "year": 2015, "label": "motivation", }, { "sentence": "We present a residual learning framework to ease the training of networks that are substantially deeper than those used previously.", "paper_id": "he-2015-resnet", "paper_title": "Deep Residual Learning for Image Recognition", "year": 2015, "label": "contribution", }, { "sentence": "An ensemble of these residual nets achieves 3.57 percent error on the ImageNet test set.", "paper_id": "he-2015-resnet", "paper_title": "Deep Residual Learning for Image Recognition", "year": 2015, "label": "result", }, # AlphaGo (Silver 2016) { "sentence": "We introduce a new approach to computer Go using value networks to evaluate board positions and policy networks to select moves.", "paper_id": "silver-2016-alphago", "paper_title": "Mastering the game of Go with deep neural networks and tree search", "year": 2016, "label": "contribution", }, { "sentence": "AlphaGo defeated the European champion Fan Hui by five games to zero, the first time a computer program has defeated a human professional on a full board.", "paper_id": "silver-2016-alphago", "paper_title": "Mastering the game of Go with deep neural networks and tree search", "year": 2016, "label": "result", }, # CLIP (Radford 2021) { "sentence": "Learning directly from raw text about images is a promising alternative which leverages a much broader source of supervision.", "paper_id": "radford-2021-clip", "paper_title": "Learning Transferable Visual Models From Natural Language Supervision", "year": 2021, "label": "motivation", }, { "sentence": "We demonstrate that predicting which caption goes with which image is an efficient and scalable way to learn image representations from scratch.", "paper_id": "radford-2021-clip", "paper_title": "Learning Transferable Visual Models From Natural Language Supervision", "year": 2021, "label": "method", }, { "sentence": "CLIP matches the accuracy of the original ResNet-50 on ImageNet zero-shot without using any of the 1.28 million original labeled training examples.", "paper_id": "radford-2021-clip", "paper_title": "Learning Transferable Visual Models From Natural Language Supervision", "year": 2021, "label": "result", }, # LoRA (Hu 2021) { "sentence": "Fine-tuning large pretrained models is often infeasible because it requires storing and deploying a separate set of parameters for every downstream task.", "paper_id": "hu-2021-lora", "paper_title": "LoRA: Low-Rank Adaptation of Large Language Models", "year": 2021, "label": "motivation", }, { "sentence": "LoRA freezes pretrained model weights and injects trainable rank decomposition matrices into each Transformer layer, reducing trainable parameters by up to 10000x.", "paper_id": "hu-2021-lora", "paper_title": "LoRA: Low-Rank Adaptation of Large Language Models", "year": 2021, "label": "method", }, # LLaMA (Touvron 2023) { "sentence": "We introduce LLaMA, a collection of foundation language models ranging from 7B to 65B parameters, trained on trillions of tokens using only publicly available datasets.", "paper_id": "touvron-2023-llama", "paper_title": "LLaMA: Open and Efficient Foundation Language Models", "year": 2023, "label": "contribution", }, { "sentence": "LLaMA-13B outperforms GPT-3 on most benchmarks despite being more than 10x smaller.", "paper_id": "touvron-2023-llama", "paper_title": "LLaMA: Open and Efficient Foundation Language Models", "year": 2023, "label": "result", }, ] # ---------------------------------------------------------------- # Helper functions — used by tools.py and by run_classify in agent.py # ---------------------------------------------------------------- def search_examples(query): """Naive case-insensitive text match across sentence and paper title.""" q = (query or "").lower().strip() if not q: return [] return [ e for e in ML_EXAMPLES if q in e["sentence"].lower() or q in e["paper_title"].lower() ] def get_paper_info(paper_id): """Return paper metadata (title, year, sentence count) for a given paper_id.""" matches = [e for e in ML_EXAMPLES if e["paper_id"] == paper_id] if not matches: return None return { "paper_id": paper_id, "title": matches[0]["paper_title"], "year": matches[0]["year"], "sentence_count": len(matches), } def list_papers(): """Return one dict per unique paper, sorted by year.""" papers = {} for e in ML_EXAMPLES: pid = e["paper_id"] if pid not in papers: papers[pid] = { "paper_id": pid, "title": e["paper_title"], "year": e["year"], "sentence_count": 0, } papers[pid]["sentence_count"] += 1 return sorted(papers.values(), key=lambda p: p["year"])