{ "nbformat": 4, "nbformat_minor": 0, "metadata": { "colab": { "provenance": [], "toc_visible": true }, "kernelspec": { "name": "python3", "display_name": "Python 3" }, "language_info": { "name": "python" } }, "cells": [ { "cell_type": "markdown", "source": [ "![read_agent_teaser](https://read-agent.github.io/img/teaser.png)" ], "metadata": { "id": "1iqyV7VcsiXT" } }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "MYOnCMh83ZRE" }, "outputs": [], "source": [ "!wget https://github.com/nyu-mll/quality/raw/main/data/v1.0.1/QuALITY.v1.0.1.htmlstripped.dev\n", "import re, time, datetime, json, string, copy" ] }, { "cell_type": "code", "source": [ "# @title Using OpenAI GPT model (DO NOT run the next cell if using GPT)\n", "!pip3 install openai\n", "import openai\n", "\n", "key = 'YOUR API KEY' #@param {type: \"string\"}\n", "gpt_client = openai.OpenAI(api_key=key)\n", "model_type = 'gpt'\n", "\n", "def query_gpt_model(\n", " prompt: str,\n", " lm: str = 'gpt-3.5-turbo-1106',\n", " temperature: float = 0.0,\n", " max_decode_steps: int = 512,\n", " seconds_to_reset_tokens: float = 30.0,\n", ") -> str:\n", " while True:\n", " try:\n", " raw_response = gpt_client.chat.completions.with_raw_response.create(\n", " model=lm,\n", " max_tokens=max_decode_steps,\n", " temperature=temperature,\n", " messages=[\n", " {'role': 'user', 'content': prompt},\n", " ]\n", " )\n", " completion = raw_response.parse()\n", " return completion.choices[0].message.content\n", " except openai.RateLimitError as e:\n", " print(f'{datetime.datetime.now()}: query_gpt_model: RateLimitError {e.message}: {e}')\n", " time.sleep(seconds_to_reset_tokens)\n", " except openai.APIError as e:\n", " print(f'{datetime.datetime.now()}: query_gpt_model: APIError {e.message}: {e}')\n", " print(f'{datetime.datetime.now()}: query_gpt_model: Retrying after 5 seconds...')\n", " time.sleep(5)" ], "metadata": { "id": "oz0kOxYJ4n3e", "cellView": "form" }, "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [ "# @title Using Google Gemini model (DO NOT run this if using GPT)\n", "!pip3 install -q -U google-generativeai\n", "import google.generativeai as genai\n", "\n", "key = 'YOUR API KEY' #@param {type: \"string\"}\n", "\n", "genai.configure(api_key=key)\n", "model = genai.GenerativeModel('gemini-pro')\n", "model_type = 'gemini'\n", "\n", "def query_gemini_model(\n", " prompt: str,\n", " retries: int = 10,\n", ") -> str:\n", " while True and retries > 0:\n", " try:\n", " response = model.generate_content(prompt)\n", " text_response = response.text.replace(\"**\", \"\")\n", " return text_response\n", " except Exception as e:\n", " print(f'{datetime.datetime.now()}: query_gemini_model: Error: {e}')\n", " print(f'{datetime.datetime.now()}: query_gemini_model: Retrying after 5 seconds...')\n", " retries -= 1\n", " time.sleep(5)" ], "metadata": { "cellView": "form", "id": "YcP_tIpZKNFY" }, "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [ "def query_model(prompt):\n", " if model_type == \"gpt\":\n", " return query_gpt_model(prompt)\n", " elif model_type == \"gemini\":\n", " return query_gemini_model(prompt)" ], "metadata": { "id": "pYm2GsBGEvAI" }, "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [ "#@title Load a QuALITY example\n", "\n", "# Fields that are straight text copies from raw example to processed example.\n", "_ONE2ONE_FIELDS = (\n", " 'article',\n", " 'article_id',\n", " 'set_unique_id',\n", " 'writer_id',\n", " 'source',\n", " 'title',\n", " 'topic',\n", " 'url',\n", " 'writer_id',\n", " 'author',\n", ")\n", "\n", "quality_dev = []\n", "\n", "with open('QuALITY.v1.0.1.htmlstripped.dev', 'r') as f:\n", " for line in f.readlines():\n", " j = json.loads(line)\n", " fields = {k: j[k] for k in _ONE2ONE_FIELDS}\n", " fields.update({\n", " 'questions': [q['question'] for q in j['questions']],\n", " 'question_ids': [q['question_unique_id'] for q in j['questions']],\n", " 'difficults': [q['difficult'] for q in j['questions']],\n", " 'options': [q['options'] for q in j['questions']],\n", " })\n", "\n", " fields.update({\n", " 'gold_labels': [q['gold_label'] for q in j['questions']],\n", " 'writer_labels': [q['writer_label'] for q in j['questions']],\n", " })\n", "\n", " quality_dev.append(fields)\n", "\n", "example = quality_dev[13]" ], "metadata": { "id": "1B70Rqg97aXu", "cellView": "form" }, "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [ "#@title Helper functions\n", "\n", "all_lowercase_letters = string.ascii_lowercase # \"abcd...xyz\"\n", "bracketed_lowercase_letters_set = set(\n", " [f\"({l})\" for l in all_lowercase_letters]\n", ") # {\"(a)\", ...}\n", "bracketed_uppercase_letters_set = set(\n", " [f\"({l.upper()})\" for l in all_lowercase_letters]\n", ") # {\"(a)\", ...}\n", "\n", "choices = ['(A)', '(B)', '(C)', '(D)']\n", "\n", "def get_index_from_symbol(answer):\n", " \"\"\"Get the index from the letter symbols A, B, C, D, to extract answer texts.\n", "\n", " Args:\n", " answer (str): the string of answer like \"(B)\".\n", "\n", " Returns:\n", " index (int): how far the given choice is from \"a\", like 1 for answer \"(B)\".\n", " \"\"\"\n", " answer = str(answer).lower()\n", " # extract the choice letter from within bracket\n", " if answer in bracketed_lowercase_letters_set:\n", " answer = re.findall(r\"\\(.*?\\)\", answer)[0][1]\n", " index = ord(answer) - ord(\"a\")\n", " return index\n", "\n", "def count_words(text):\n", " \"\"\"Simple word counting.\"\"\"\n", " return len(text.split())\n", "\n", "def quality_gutenberg_parser(raw_article):\n", " \"\"\"Parse Gutenberg articles in the QuALITY dataset.\"\"\"\n", " lines = []\n", " previous_line = None\n", " for i, line in enumerate(raw_article.split('\\n')):\n", " line = line.strip()\n", " original_line = line\n", " if line == '':\n", " if previous_line == '':\n", " line = '\\n'\n", " else:\n", " previous_line = original_line\n", " continue\n", " previous_line = original_line\n", " lines.append(line)\n", " return ' '.join(lines)" ], "metadata": { "id": "nQsb3n6pOlz2", "cellView": "form" }, "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [ "#@title ReadAgent (1) Episode Pagination\n", "\n", "prompt_pagination_template = \"\"\"\n", "You are given a passage that is taken from a larger text (article, book, ...) and some numbered labels between the paragraphs in the passage.\n", "Numbered label are in angeled brackets. For example, if the label number is 19, it shows as <19> in text.\n", "Please choose one label that it is natural to break reading.\n", "Such point can be scene transition, end of a dialogue, end of an argument, narrative transition, etc.\n", "Please answer the break point label and explain.\n", "For example, if <57> is a good point to break, answer with \\\"Break point: <57>\\n Because ...\\\"\n", "\n", "Passage:\n", "\n", "{0}\n", "{1}\n", "{2}\n", "\n", "\"\"\"\n", "\n", "def parse_pause_point(text):\n", " text = text.strip(\"Break point: \")\n", " if text[0] != '<':\n", " return None\n", " for i, c in enumerate(text):\n", " if c == '>':\n", " if text[1:i].isnumeric():\n", " return int(text[1:i])\n", " else:\n", " return None\n", " return None\n", "\n", "\n", "def quality_pagination(example,\n", " word_limit=600,\n", " start_threshold=280,\n", " max_retires=10,\n", " verbose=True,\n", " allow_fallback_to_last=True):\n", " article = example['article']\n", " title = example['title']\n", " print(f\"[Pagination][Article {title}]\")\n", " paragraphs = quality_gutenberg_parser(article).split('\\n')\n", "\n", " i = 0\n", " pages = []\n", " while i < len(paragraphs):\n", " preceding = \"\" if i == 0 else \"...\\n\" + '\\n'.join(pages[-1])\n", " passage = [paragraphs[i]]\n", " wcount = count_words(paragraphs[i])\n", " j = i + 1\n", " while wcount < word_limit and j < len(paragraphs):\n", " wcount += count_words(paragraphs[j])\n", " if wcount >= start_threshold:\n", " passage.append(f\"<{j}>\")\n", " passage.append(paragraphs[j])\n", " j += 1\n", " passage.append(f\"<{j}>\")\n", " end_tag = \"\" if j == len(paragraphs) else paragraphs[j] + \"\\n...\"\n", "\n", " pause_point = None\n", " if wcount < 350:\n", " pause_point = len(paragraphs)\n", " else:\n", " prompt = prompt_pagination_template.format(preceding, '\\n'.join(passage), end_tag)\n", " response = query_model(prompt=prompt).strip()\n", " pause_point = parse_pause_point(response)\n", " if pause_point and (pause_point <= i or pause_point > j):\n", " print(f\"prompt:\\n{prompt},\\nresponse:\\n{response}\\n\")\n", " print(f\"i:{i} j:{j} pause_point:{pause_point}\")\n", " pause_point = None\n", " if pause_point is None:\n", " if allow_fallback_to_last:\n", " pause_point = j\n", " else:\n", " raise ValueError(f\"prompt:\\n{prompt},\\nresponse:\\n{response}\\n\")\n", "\n", " page = paragraphs[i:pause_point]\n", " pages.append(page)\n", " if verbose:\n", " print(f\"Paragraph {i}-{pause_point-1}\", page)\n", " i = pause_point\n", " print(f\"[Pagination] Done with {len(pages)} pages\")\n", " return pages\n", "\n", "pages = quality_pagination(example)" ], "metadata": { "id": "BfFkEQKx0u9U" }, "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [ "#@title ReadAgent (2) Memory Gisting\n", "\n", "prompt_shorten_template = \"\"\"\n", "Please shorten the following passage.\n", "Just give me a shortened version. DO NOT explain your reason.\n", "\n", "Passage:\n", "{}\n", "\n", "\"\"\"\n", "\n", "def quality_gisting(example, pages, word_limit=600, start_threshold=280, verbose=True):\n", " article = example['article']\n", " title = example['title']\n", " word_count = count_words(article)\n", " print(f\"[Gisting][Article {title}], {word_count} words\")\n", "\n", " shortened_pages = []\n", " for i, page in enumerate(pages):\n", " prompt = prompt_shorten_template.format('\\n'.join(page))\n", " response = query_model(prompt)\n", " shortened_text = response.strip()\n", " shortened_pages.append(shortened_text)\n", " if verbose:\n", " print(\"[gist] page {}:\".format(i), shortened_text, flush=True)\n", " shortened_article = '\\n'.join(shortened_pages)\n", " gist_word_count = count_words(shortened_article)\n", " if verbose:\n", " print(\"Shortened article:\\n\", shortened_article, flush=True)\n", " output = copy.deepcopy(example)\n", " output.update({'title': title, 'word_count': word_count, 'gist_word_count': gist_word_count, 'shortened_pages': shortened_pages, 'pages': pages})\n", " if verbose:\n", " print(f\"compression rate {round(100.0 - gist_word_count/word_count*100, 2)}% ({gist_word_count}/{word_count})\")\n", " return output\n", "example_with_gists = quality_gisting(example, pages)" ], "metadata": { "id": "DLBolKnkS_9y" }, "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [ "#@title ReadAgent (3) Look-Up\n", "\n", "prompt_lookup_template = \"\"\"\n", "The following text is what you remembered from reading an article and a multiple choice question related to it.\n", "You may read 1 to 6 page(s) of the article again to refresh your memory to prepare yourselve for the question.\n", "Please respond with which page(s) you would like to read.\n", "For example, if your only need to read Page 8, respond with \\\"I want to look up Page [8] to ...\\\";\n", "if your would like to read Page 7 and 12, respond with \\\"I want to look up Page [7, 12] to ...\\\";\n", "if your would like to read Page 2, 3, 7, 15 and 18, respond with \\\"I want to look up Page [2, 3, 7, 15, 18] to ...\\\".\n", "if your would like to read Page 3, 4, 5, 12, 13 and 16, respond with \\\"I want to look up Page [3, 3, 4, 12, 13, 16] to ...\\\".\n", "DO NOT select more pages if you don't need to.\n", "DO NOT answer the question yet.\n", "\n", "Text:\n", "{}\n", "\n", "Question:\n", "{}\n", "{}\n", "\n", "Take a deep breath and tell me: Which page(s) would you like to read again?\n", "\"\"\"\n", "\n", "prompt_answer_template = \"\"\"\n", "Read the following article and answer a multiple choice question.\n", "For example, if (C) is correct, answer with \\\"Answer: (C) ...\\\"\n", "\n", "Article:\n", "{}\n", "\n", "Question:\n", "{}\n", "{}\n", "\n", "\"\"\"\n", "\n", "def quality_parallel_lookup(example, verbose=True):\n", " preprocessed_pages = example['pages']\n", " article = example['article']\n", " title = example['title']\n", " word_count = example['word_count']\n", " gist_word_count = example['gist_word_count']\n", " pages = example['pages']\n", " shortened_pages = example['shortened_pages']\n", " questions = example['questions']\n", " options = example['options']\n", " gold_labels = example['gold_labels'] # numerical [1, 2, 3, 4]\n", "\n", " print(f\"[Look-Up][Article {title}] {word_count} words\")\n", "\n", " model_choices = []\n", " lookup_page_ids = []\n", "\n", " shortened_pages_pidx = []\n", " for i, shortened_text in enumerate(shortened_pages):\n", " shortened_pages_pidx.append(\"\\n\".format(i) + shortened_text)\n", " shortened_article = '\\n'.join(shortened_pages_pidx)\n", "\n", " expanded_gist_word_counts = []\n", " for i, label in enumerate(gold_labels):\n", " # only test the first question for demo\n", " if i != 1:\n", " continue\n", " q = questions[i]\n", " print(\"question: \", q)\n", " options_i = [f\"{ol} {o}\" for ol, o in zip(choices, options[i])]\n", " print(\"options: \", \"\\n\".join(options_i))\n", " prompt_lookup = prompt_lookup_template.format(shortened_article, q, '\\n'.join(options_i))\n", "\n", " page_ids = []\n", "\n", " response = query_model(prompt=prompt_lookup).strip()\n", "\n", " try: start = response.index('[')\n", " except ValueError: start = len(response)\n", " try: end = response.index(']')\n", " except ValueError: end = 0\n", " if start < end:\n", " page_ids_str = response[start+1:end].split(',')\n", " page_ids = []\n", " for p in page_ids_str:\n", " if p.strip().isnumeric():\n", " page_id = int(p)\n", " if page_id < 0 or page_id >= len(pages):\n", " print(\"Skip invalid page number: \", page_id, flush=True)\n", " else:\n", " page_ids.append(page_id)\n", "\n", " if verbose:\n", " print(\"Model chose to look up page {}\".format(page_ids))\n", "\n", " # Memory expansion after look-up, replacing the target shortened page with the original page\n", " expanded_shortened_pages = shortened_pages[:]\n", " if len(page_ids) > 0:\n", " for page_id in page_ids:\n", " expanded_shortened_pages[page_id] = '\\n'.join(pages[page_id])\n", "\n", " expanded_shortened_article = '\\n'.join(expanded_shortened_pages)\n", " expanded_gist_word_count = count_words(expanded_shortened_article)\n", " if verbose:\n", " print(\"Expanded shortened article:\\n\", expanded_shortened_article, flush=True)\n", " prompt_answer = prompt_answer_template.format(expanded_shortened_article, q, '\\n'.join(options_i))\n", "\n", " # If the response doesn't follow the template, retry\n", " model_choice = None\n", " response = query_model(prompt=prompt_answer)\n", " response = response.strip()\n", " for j, choice in enumerate(choices):\n", " if response.startswith(f\"Answer: {choice}\") or response.startswith(f\"Answer: {choice[1]}\"):\n", " model_choice = j+1\n", " break\n", " is_correct = 1 if model_choice == label else 0\n", " print(f\"question: {q}\")\n", " print(f\"reference answer: {choices[label]}, model prediction: {choices[model_choice]}, is_correct: {is_correct}\")\n", " print(f\"compression rate {round(100.0 - gist_word_count/word_count*100, 2)}% ({gist_word_count}/{word_count})\")\n", " print(f\"compression rate after look-up {round(100.0 - expanded_gist_word_count/word_count*100, 2)}% ({expanded_gist_word_count}/{word_count})\")\n", "\n", "quality_parallel_lookup(example_with_gists)" ], "metadata": { "id": "8YKNTyDsXNIn" }, "execution_count": null, "outputs": [] }, { "cell_type": "markdown", "source": [ "#Prompts that we used in the paper\n", "\n", "In the following we show the prompts that were used for the QuALTIY, QMSum, NarrativeQA datasets with the PaLM 2-L model. While there are slight differences in prompt design, most of these are not due to optimizing prompts for specific datasets but rather a results of that each author wrote the prompts independently." ], "metadata": { "id": "Gn8fjomx7iRz" } }, { "cell_type": "code", "source": [ "# @title The prompts we used for QuALITY with PaLM 2-L\n", "\n", "\n", "# Pagination\n", "pagination_prompt_template = \"\"\"\n", "You are given a passage that is taken from a larger text (article, book, ...) and some numbered labels between the paragraphs in the passage.\n", "Numbered label are in angeled brackets. For example, if the label number is 19, it shows as <19> in text.\n", "Please choose one label that it is natural to break reading.\n", "Such point can be scene transition, end of a dialogue, end of an argument, narrative transition, etc.\n", "Please answer the break point label and explain.\n", "For example, if <57> is a good point to break, answer with \\\"Break point: <57>\\n Because ...\\\"\n", "\n", "Passage:\n", "\n", "{passage_text}\n", "{end_tag}\n", "\n", "\"\"\"\n", "# passage_text: a chunk of text.\n", "# end_tag: a string, whose value is \"\" if the text is at the end of the article, and otherwise \"\\n...\".\n", "\n", "\n", "\n", "# Gisting\n", "gisting_prompt_template = \"\"\"\n", "Please shorten the following passage.\n", "Just give me a shortened version. DO NOT explain your reason.\n", "\n", "Passage:\n", "{page_text}\n", "\n", "\"\"\"\n", "# page_text: a page of text\n", "\n", "\n", "\n", "# Parallel Look-up (ReadAgent-P, up to 5 pages)\n", "parallel_lookup_prompt_template = \"\"\"\n", "The following text is what you remembered from reading an article and a multiple choice question related to it.\n", "You may read 1 to 5 page(s) of the article again to refresh your memory to prepare yourselve for the question.\n", "Please respond with which page(s) you would like to read again.\n", "For example, if your would like to only read Page 8, respond with \\\"I want to look up Page [8] to ...\\\";\n", "if your would like to read Page 7 and 12, respond with \\\"I want to look up Page [7, 12] to ...\\\";\n", "if your would like to read Page 2, 3, 7, 15 and 18, respond with \\\"I want to look up Page [2, 3, 7, 15, 18] to ...\\\".\n", "DO NOT select more pages if you don't need to.\n", "DO NOT answer the question yet.\n", "\n", "Text:\n", "{concatenated_gists}\n", "\n", "Question:\n", "{question}\n", "{options}\n", "\n", "Take a deep breath and tell me: Which page(s) would you like to read again?\n", "\"\"\"\n", "# concatenated_gists: concatenated gists\n", "# question: a question\n", "# options: multiple-choice options\n", "\n", "\n", "\n", "# Sequential Look-up (ReadAgent-S, up to 5 pages)\n", "sequential_lookup_prompt_template = \"\"\"\n", "The following text is what you remember from reading an article, followed by a question about the article.\n", "You may read multiple pages of the article again to refresh your memory and prepare to answer the question.\n", "Each page that you re-read can significantly improve your chance of answering the question correctly.\n", "Please specify a SINGLE page you would like to read again or say \"STOP\".\n", "To read a page again, respond with \"Page $PAGE_NUM\", replacing $PAGE_NUM with the target page number.\n", "You can only specify a SINGLE page in your response at this time.\n", "DO NOT select more pages if you don't need to.\n", "To stop, simply say \"STOP\".\n", "DO NOT answer the question in your response.\n", "\n", "Text:\n", "{concatenated_gists}\n", "End of text.\n", "\n", "Pages re-read already (DO NOT ask to read them again):\n", "{past_page_numbers}\n", "\n", "Question:\n", "{question}\n", "{options}\n", "\n", "Specify a SINGLE page to read again, or say STOP:\n", "\"\"\"\n", "# concatenated_gists: concatenated gists\n", "# past_page_numbers: page numbers that have already been retrieved\n", "# question: a question\n", "# options: options\n", "\n", "\n", "\n", "# Response/Answer\n", "answer_prompt_template = \"\"\"\n", "Read the following article and answer a multiple choice question.\n", "For example, if (C) is correct, answer with \\\"Answer: (C) ...\\\"\n", "\n", "Article:\n", "{concatenated_pages_and_gists}\n", "\n", "Question:\n", "{question}\n", "{options}\n", "\n", "\"\"\"\n", "# concatenated_pages_and_gists: concatenated raw pages and gists\n", "# question: a question\n", "# options: options" ], "metadata": { "id": "PGvYRIpO3J3Y" }, "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [ "# @title The prompts we used for QMSum with PaLM 2-L\n", "\n", "\n", "# Pagination\n", "pagination_prompt_template = \"\"\"\n", "You are given a passage that is taken from a larger meeting transcript.\n", "There are some numbered labels between the paragraphs (like <0>) in the passage.\n", "Please choose one label at a natural transition in the passage.\n", "For example, the label can be at the end of a dialogue, the end of an argument, a change in the topic being discussed, etc.\n", "Please respond with the label and explain your choice.\n", "For example, if <57> is a natural transition, answer with \"Label: <57>\\n Because ...\"\n", "\n", "Passage:\n", "\n", "{preceding_text}\n", "{passage_text}\n", "{end_tag}\n", "\n", "\"\"\"\n", "# preceding_text: a fraction of previous context\n", "# passage_text: a chunk of text.\n", "# end_tag: a string, whose value is \"\" if the text is at the end of the article, and otherwise \"\\n...\".\n", "\n", "\n", "\n", "# Gisting\n", "gisting_prompt_template = \"\"\"\n", "Please shorten the following passage.\n", "Just give a shortened version. DO NOT explain your reasoning.\n", "\n", "Passage:\n", "{page_text}\n", "\n", "\"\"\"\n", "# page_text: a page of text\n", "\n", "\n", "\n", "# Parallel Look-up (ReadAgent-P, up to 2 pages)\n", "parallel_lookup_prompt_template = \"\"\"\n", "The following text is what you remember from reading a meeting transcript, followed by a question about the transcript.\n", "You may read 1 or 2 pages of the transcript again to refresh your memory to prepare to answer the question.\n", "Please respond with which page(s) you would like to read.\n", "For example, if your would only like to read Page 8, respond with \"I want to look up Page [8] ...\"\n", "If you would like to read Page 7 and 12, respond with \"I want to look up Page [7, 12] ...\".\n", "Only select as many pages as you need, but no more than 2 pages.\n", "Don't answer the question yet.\n", "\n", "Text:\n", "{text}\n", "End of text.\n", "\n", "Question:\n", "{question}\n", "\n", "Which page(s) would you like to look up?\n", "\"\"\"\n", "# concatenated_gists: Concatenated gists\n", "# question: a question\n", "\n", "\n", "\n", "# Sequential Look-up (ReadAgent-S)\n", "sequential_lookup_prompt_template = \"\"\"\n", "The following text is what you remember from reading a meeting transcript, followed by a question about the transcript.\n", "You may read multiple pages of the transcript again to refresh your memory and prepare to answer the question.\n", "Each page that you re-read can significantly improve your chance of answering the question correctly.\n", "Please specify a SINGLE page you would like to read again or say \"STOP\".\n", "To read a page again, respond with \"Page $PAGE_NUM\", replacing $PAGE_NUM with the target page number.\n", "You can only specify a SINGLE page in your response at this time.\n", "DO NOT select more pages if you don't need to.\n", "To stop, simply say \"STOP\".\n", "DO NOT answer the question in your response.\n", "\n", "Text:\n", "{concatenated_gists}\n", "End of text.\n", "\n", "Pages re-read already (DO NOT ask to read them again):\n", "{past_page_numbers}\n", "\n", "Question:\n", "{question}\n", "\n", "Specify a SINGLE page to read again, or say STOP:\n", "\"\"\"\n", "# concatenated_gists: concatenated gists\n", "# past_page_numbers: page numbers that have already been retrieved\n", "# question: a question\n", "\n", "\n", "\n", "# Response/Answer\n", "answer_prompt_template = \"\"\"\n", "Read the question and text below and then answer the question.\n", "\n", "Question:\n", "{question}\n", "\n", "Text:\n", "{concatenated_pages_and_gists}\n", "End of Text.\n", "\n", "Answer the question based on the above passage and retrieved pages. Your answer should be short and concise.\n", "\"\"\"\n", "# question: a question\n", "# concatenated_pages_and_gists: concatenated raw pages and gists" ], "metadata": { "id": "Ya48p13EhhhI" }, "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [ "# @title The prompts we used for NarrativeQA - Gutenburg with PaLM 2-L\n", "\n", "\n", "# Pagination\n", "pagination_prompt_template = \"\"\"\n", "You are given a passage that is taken from a larger text (article, book, ...) and some numbered labels between the paragraphs in the passage.\n", "Numbered label are in angeled brackets. For example, if the label number is 19, it shows as <19> in text.\n", "Please choose one label that marks a major section break point.\n", "Such points can be the beginning/end of a book, beginning/end of a chapter, end of a content table, a scene transition, end of a dialogue, etc.\n", "If a point is chosen for the beginning of a book/chapter/etc and there is a title of the new book/chapter/etc, the break point must be chosen at a position right before the section number and title, not after.\n", "\n", "Please answer the break point label and explain.\n", "For example, if <57> is a good point to break, answer with \\\"Breakpoint: <57> ...\\\"\n", "\n", "Text:\n", "\n", "{preceding_text}\n", "{passage_text}\n", "{end_tag}\n", "\n", "\"\"\"\n", "# preceding_text: a fraction of previous context\n", "# passage_text: a chunk of text.\n", "# end_tag: a string, whose value is \"\" if the text is at the end of the article, and otherwise \"\\n...\".\n", "\n", "\n", "\n", "# Gisting\n", "gisting_prompt_template = \"\"\"\n", "Please shorten the following passage.\n", "Just give me a shortened version. DO NOT explain your reason.\n", "\n", "Passage:\n", "{page_text}\n", "\n", "\"\"\"\n", "# page_text: a page of text\n", "\n", "\n", "\n", "# Parallel Look-up (ReadAgent-P, up to 2 pages)\n", "parallel_lookup_prompt_template = \"\"\"\n", "The following text is what you remembered from reading an article and a question related to it.\n", "You may read 1 or 2 page(s) of the article again to refresh your memory to prepare yourselve for the question.\n", "Please respond with which page(s) you would like to read in the order of importance, beginning with the most important page number.\n", "For example, if your only need to read Page 8, respond with \\\"I want to look up Page [8] to ...\\\";\n", "if your would like to read Page 12 and 7, respond with \\\"I want to look up Page [12, 7] to ...\\\";\n", "DO NOT select more pages if you don't need to.\n", "You don't need to answer the question yet.\n", "\n", "Text:\n", "{concatenated_gists}\n", "\n", "Question:\n", "{question}\n", "\n", "\"\"\"\n", "# concatenated_gists: Concatenated gists\n", "# question: a question\n", "\n", "\n", "\n", "# Sequential Look-up (ReadAgent-S)\n", "sequential_lookup_prompt_template = \"\"\"\n", "The following text is what you remember from reading a meeting transcript, followed by a question about the transcript.\n", "You may read multiple pages of the transcript again to refresh your memory and prepare to answer the question.\n", "Each page that you re-read can significantly improve your chance of answering the question correctly.\n", "Please specify a SINGLE page you would like to read again or say \"STOP\".\n", "To read a page again, respond with \"Page $PAGE_NUM\", replacing $PAGE_NUM with the target page number.\n", "You can only specify a SINGLE page in your response at this time.\n", "DO NOT select more pages if you don't need to.\n", "To stop, simply say \"STOP\".\n", "DO NOT answer the question in your response.\n", "\n", "Text:\n", "{concatenated_gists}\n", "End of text.\n", "\n", "Pages re-read already (DO NOT ask to read them again):\n", "{past_page_numbers}\n", "\n", "Question:\n", "{question}\n", "\n", "Specify a SINGLE page to read again, or say STOP:\n", "\"\"\"\n", "# concatenated_gists: concatenated gists\n", "# past_page_numbers: page numbers that have already been retrieved\n", "# question: a question\n", "\n", "\n", "\n", "# Response/Answer\n", "answer_prompt_template = \"\"\"\n", "{concatenated_pages_and_gists}\n", "\n", "Question:\n", "{question}\n", "\n", "Answer the question based on the above passage and retrieved pages. Your answer should be short and concise.\n", "\"\"\"\n", "# concatenated_pages_and_gists: concatenated raw pages and gists\n", "# question: a question" ], "metadata": { "id": "U210HZJuP4_u" }, "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [ "# @title The prompts we used for NarrativeQA - Movie Scripts with PaLM 2-L\n", "\n", "\n", "# Pagination\n", "pagination_prompt_template = \"\"\"\n", "You are given a movie script and some numbered labels between the lines in the script.\n", "Numbered label are in angeled brackets.\n", "Please choose one label that it is natural to break reading. The label should be between <{start}> and <{end}>.\n", "Such point can be scene transition, end of a dialogue, end of an argument, narrative transition, etc.\n", "The answer should end with \"The break point is: \", where the break point number is between angeled brackets.\n", "\n", "Script:\n", "\n", "{passage_text}\n", "\"\"\"\n", "# passage_text: a chunk of text.\n", "\n", "\n", "\n", "# Gisting\n", "gisting_prompt_template = \"\"\"\n", "Please shorten the following passage. The shortened passage should be in 128 tokens. Please refer to people with their full names whenever possible.\n", "Just give me a shortened version. DO NOT explain your reason. If there is no meaning information in the passage, output \"I don't have enough information to shorten the passage.\"\n", "\n", "Passage:\n", "{page_text}\n", "\n", "\"\"\"\n", "# page_text: a page of text\n", "\n", "\n", "\n", "# Parallel Look-up (ReadAgent-P, up to 2 pages)\n", "parallel_lookup_prompt_template = \"\"\"\n", "The following text includes a summary of each page in a movie script, followed by a question about the script.\n", "\n", "Summary:\n", "{concatenated_gists}\n", "\n", "Question:\n", "{question}\n", "\n", "Based on the summary of each page, you may read the full details of 1 to 2 page(s) to obtain more information to answer the question.\n", "Please respond with which page(s) you would like to read.\n", "For example, if you only need to read Page X_0, the answer should end with \\\"I want to look up Page [X_0]\\\";\n", "if you would like to read Page X_0 and X_1, the answer should end with \\\"I want to look up Page [X_0, X_1]\\\".\n", "X_i above is a page index between 0 and {end}. DO NOT select more pages if you don't need to.\n", "You don't need to answer the question yet.\n", "\"\"\"\n", "# concatenated_gists: Concatenated gists\n", "# question: a question\n", "\n", "\n", "\n", "# Sequential Look-up (ReadAgent-S)\n", "sequential_lookup_prompt_template = \"\"\"\n", "The following text includes a summary of each page in a movie script, followed by a question about the script, and the previous answer based on the summary and already re-read pages.\n", "\n", "Summary:\n", "{concatenated_gists}\n", "\n", "Pages re-read already (DO NOT ask to read them again):\n", "{past_page_numbers}\n", "\n", "Question:\n", "{question}\n", "\n", "Previous Answer:\n", "{previous_answer}\n", "\n", "Based on the summary of each page, you may read the full details of multiple pages to obtain more information to answer the question.\n", "To read a page again, respond with \"I want to look up Page $PAGE_NUM\", replacing $PAGE_NUM with the target page number.\n", "PAGE_NUM is a page index between 0 and {end}, excluding {pages_reread}.\n", "You can only specify a SINGLE page in your response at this time. The page should not be in the re-read pages.\n", "To stop, simply say \"STOP\".\n", "DO NOT answer the question in your response.\n", "You don't need to answer the question yet.\n", "\"\"\"\n", "# concatenated_gists: concatenated gists\n", "# past_page_numbers: (only after the first query) page numbers that have already been retrieved\n", "# question: a question\n", "# previous_answer: the previous answer given by the model with gists and previously retrieved raw pages\n", "\n", "\n", "\n", "# Response/Answer\n", "answer_prompt_template = \"\"\"\n", "{concatenated_pages_and_gists}\n", "\n", "Question:\n", "{question}\n", "\n", "Answer the question based on the above passage and retrieved pages. Your answer should be short and concise.\n", "\"\"\"\n", "# concatenated_pages_and_gists: concatenated raw pages and gists\n", "# question: a question" ], "metadata": { "id": "K1pw1NPasHPC" }, "execution_count": null, "outputs": [] } ] }