Spaces:

jerpint
/

buster-dev

Runtime error

App Files Files Community

jerpint commited on Feb 28, 2023

Commit

5b7d0e6

unverified ·

1 Parent(s): 71e7dd8

Fix formatting issues (#56)

Browse files

* Rename formatter
* override response without sources
* Add gradio formatter
* add factory for responses
* Fix circular imports

Files changed (11) hide show

buster/apps/gradio_app.ipynb +8 -6
buster/apps/slackbot.py +5 -8
buster/chatbot.py +21 -17
buster/formatter/__init__.py +16 -5
buster/formatter/base.py +16 -8
buster/formatter/factory.py +22 -0
buster/formatter/gradio.py +28 -0
buster/formatter/html.py +4 -4
buster/formatter/markdown.py +3 -12
buster/formatter/slack.py +3 -12
pyproject.toml +1 -1

buster/apps/gradio_app.ipynb CHANGED Viewed

@@ -9,6 +9,9 @@
    },
    "outputs": [],
    "source": [
     "import gradio as gr\n",
     "\n",
     "from buster.chatbot import Chatbot, ChatbotConfig\n",
@@ -24,9 +27,8 @@
     "        \"engine\": \"text-davinci-003\",\n",
     "        \"max_tokens\": 500,\n",
     "    },\n",
-    "    separator=\"<br>\",\n",
-    "    link_format=\"markdown\",\n",
-    "    text_after_response=\"I'm a bot 🤖 trained to answer huggingface 🤗 transformers questions. My answers aren't always perfect.\",\n",
     "    text_before_prompt=\"\"\"You are a slack chatbot assistant answering technical questions about huggingface transformers, a library to train transformers in python.\n",
     "Make sure to format your answers in Markdown format, including code block and snippets.\n",
     "Do not include any links to urls or hyperlinks in your answers.\n",
@@ -109,7 +111,7 @@
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "base",
    "language": "python",
    "name": "python3"
   },
@@ -123,11 +125,11 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.9.12 (main, Apr  5 2022, 01:52:34) \n[Clang 12.0.0 ]"
   },
   "vscode": {
    "interpreter": {
-    "hash": "5abee959af829406add33dbeab4b81a0c8afd11a2faef151b217e9aebad2d8c1"
    }
   }
  },

    },
    "outputs": [],
    "source": [
+    "%load_ext autoreload\n",
+    "%autoreload 2\n",
+    "\n",
     "import gradio as gr\n",
     "\n",
     "from buster.chatbot import Chatbot, ChatbotConfig\n",
     "        \"engine\": \"text-davinci-003\",\n",
     "        \"max_tokens\": 500,\n",
     "    },\n",
+    "    link_format=\"gradio\",\n",
+    "    response_footnote=\"I'm a bot 🤖 trained to answer huggingface 🤗 transformers questions. My answers aren't always perfect.\",\n",
     "    text_before_prompt=\"\"\"You are a slack chatbot assistant answering technical questions about huggingface transformers, a library to train transformers in python.\n",
     "Make sure to format your answers in Markdown format, including code block and snippets.\n",
     "Do not include any links to urls or hyperlinks in your answers.\n",
  ],
  "metadata": {
   "kernelspec": {
+   "display_name": "buster",
    "language": "python",
    "name": "python3"
   },
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
+   "version": "3.10.9"
   },
   "vscode": {
    "interpreter": {
+    "hash": "bfa91706490f6a3314a87f4853806d905e46027cd889e58fcad4739e8600f624"
    }
   }
  },

buster/apps/slackbot.py CHANGED Viewed

@@ -26,8 +26,8 @@ mila_doc_cfg = ChatbotConfig(
         "max_tokens": 200,
     },
     separator="\n",
-    link_format="slack",
-    text_after_response="""I'm a bot 🤖 and not always perfect.
     For more info, view the full documentation here (https://docs.mila.quebec/) or contact support@mila.quebec
     """,
     text_before_prompt="""
@@ -62,8 +62,7 @@ orion_cfg = ChatbotConfig(
         "max_tokens": 200,
     },
     separator="\n",
-    link_format="slack",
-    text_after_response="I'm a bot 🤖 and not always perfect.",
     text_before_prompt="""You are a slack chatbot assistant answering technical questions about orion, a hyperparameter optimization library written in python.
     Make sure to format your answers in Markdown format, including code block and snippets.
     Do not include any links to urls or hyperlinks in your answers.
@@ -95,8 +94,7 @@ pytorch_cfg = ChatbotConfig(
         "max_tokens": 500,
     },
     separator="\n",
-    link_format="slack",
-    text_after_response="I'm a bot 🤖 and not always perfect.",
     text_before_prompt="""You are a slack chatbot assistant answering technical questions about pytorch, a library to train neural networks written in python.
     Make sure to format your answers in Markdown format, including code block and snippets.
     Do not include any links to urls or hyperlinks in your answers.
@@ -128,8 +126,7 @@ hf_transformers_cfg = ChatbotConfig(
         "max_tokens": 500,
     },
     separator="\n",
-    link_format="slack",
-    text_after_response="I'm a bot 🤖 and not always perfect.",
     text_before_prompt="""You are a slack chatbot assistant answering technical questions about huggingface transformers, a library to train transformers in python.
     Make sure to format your answers in Markdown format, including code block and snippets.
     Do not include any links to urls or hyperlinks in your answers.

         "max_tokens": 200,
     },
     separator="\n",
+    response_format="slack",
+    response_footnote="""I'm a bot 🤖 and not always perfect.
     For more info, view the full documentation here (https://docs.mila.quebec/) or contact support@mila.quebec
     """,
     text_before_prompt="""
         "max_tokens": 200,
     },
     separator="\n",
+    response_format="slack",
     text_before_prompt="""You are a slack chatbot assistant answering technical questions about orion, a hyperparameter optimization library written in python.
     Make sure to format your answers in Markdown format, including code block and snippets.
     Do not include any links to urls or hyperlinks in your answers.
         "max_tokens": 500,
     },
     separator="\n",
+    response_format="slack",
     text_before_prompt="""You are a slack chatbot assistant answering technical questions about pytorch, a library to train neural networks written in python.
     Make sure to format your answers in Markdown format, including code block and snippets.
     Do not include any links to urls or hyperlinks in your answers.
         "max_tokens": 500,
     },
     separator="\n",
+    response_format="slack",
     text_before_prompt="""You are a slack chatbot assistant answering technical questions about huggingface transformers, a library to train transformers in python.
     Make sure to format your answers in Markdown format, including code block and snippets.
     Do not include any links to urls or hyperlinks in your answers.

buster/chatbot.py CHANGED Viewed

@@ -10,11 +10,12 @@ import promptlayer
 from openai.embeddings_utils import cosine_similarity, get_embedding
 from buster.documents import get_documents_manager_from_extension
-from buster.formatter import Formatter, HTMLFormatter, MarkdownFormatter, SlackFormatter
-from buster.formatter.base import Response, Source
-FORMATTERS = {"text": Formatter, "slack": SlackFormatter, "html": HTMLFormatter, "markdown": MarkdownFormatter}
 logger = logging.getLogger(__name__)
 logging.basicConfig(level=logging.INFO)
@@ -41,10 +42,10 @@ class ChatbotConfig:
     max_words: maximum number of words the retrieved documents can be. Will truncate otherwise.
     completion_kwargs: kwargs for the OpenAI.Completion() method
     separator: the separator to use, can be either "\n" or <p> depending on rendering.
-    link_format: the type of format to render links with, e.g. slack or markdown
     unknown_prompt: Prompt to use to generate the "I don't know" embedding to compare to.
     text_before_prompt: Text to prompt GPT with before the user prompt, but after the documentation.
-    text_after_response: Generic response to add the the chatbot's reply.
     """
     documents_file: str = "buster/data/document_embeddings.tar.gz"
@@ -65,11 +66,11 @@ class ChatbotConfig:
         }
     )
     separator: str = "\n"
-    link_format: str = "slack"
     unknown_prompt: str = "I Don't know how to answer your question."
     text_before_documents: str = "You are a chatbot answering questions.\n"
     text_before_prompt: str = "Answer the following question:\n"
-    text_after_response: str = "I'm a chatbot, bleep bloop."
 class Chatbot:
@@ -78,6 +79,12 @@ class Chatbot:
         self.cfg = cfg
         self._init_documents()
         self._init_unk_embedding()
     def _init_documents(self):
         filepath = self.cfg.documents_file
@@ -183,10 +190,12 @@ class Chatbot:
             )
             if relevant:
                 sources = (
-                    Source(dct["name"], dct["url"], dct["similarity"])
                     for dct in matched_documents.to_dict(orient="records")
                 )
             else:
                 sources = tuple()
         return response, sources
@@ -211,16 +220,11 @@ class Chatbot:
         # Likely that the answer is meaningful, add the top sources
         return score < unk_threshold
-    def process_input(self, question: str, formatter: Formatter = None) -> str:
         """
         Main function to process the input question and generate a formatted output.
         """
-        if formatter is None and self.cfg.link_format not in FORMATTERS:
-            raise ValueError(f"Unknown link format {self.cfg.link_format}")
-        elif formatter is None:
-            formatter = FORMATTERS[self.cfg.link_format]()
         logger.info(f"User Question:\n{question}")
         # We make sure there is always a newline at the end of the question to avoid completing the question.
@@ -241,4 +245,4 @@ class Chatbot:
         )
         response, sources = self.generate_response(prompt, matched_documents, self.cfg.unknown_prompt)
-        return formatter(response, sources)

 from openai.embeddings_utils import cosine_similarity, get_embedding
 from buster.documents import get_documents_manager_from_extension
+from buster.formatter import (
+    Response,
+    ResponseFormatter,
+    Source,
+    response_formatter_factory,
+)
 logger = logging.getLogger(__name__)
 logging.basicConfig(level=logging.INFO)
     max_words: maximum number of words the retrieved documents can be. Will truncate otherwise.
     completion_kwargs: kwargs for the OpenAI.Completion() method
     separator: the separator to use, can be either "\n" or <p> depending on rendering.
+    response_format: the type of format to render links with, e.g. slack or markdown
     unknown_prompt: Prompt to use to generate the "I don't know" embedding to compare to.
     text_before_prompt: Text to prompt GPT with before the user prompt, but after the documentation.
+    reponse_footnote: Generic response to add the the chatbot's reply.
     """
     documents_file: str = "buster/data/document_embeddings.tar.gz"
         }
     )
     separator: str = "\n"
+    response_format: str = "slack"
     unknown_prompt: str = "I Don't know how to answer your question."
     text_before_documents: str = "You are a chatbot answering questions.\n"
     text_before_prompt: str = "Answer the following question:\n"
+    response_footnote: str = "I'm a bot 🤖 and not always perfect."
 class Chatbot:
         self.cfg = cfg
         self._init_documents()
         self._init_unk_embedding()
+        self._init_response_formatter()
+    def _init_response_formatter(self):
+        self.response_formatter = response_formatter_factory(
+            format=self.cfg.response_format, response_footnote=self.cfg.response_footnote
+        )
     def _init_documents(self):
         filepath = self.cfg.documents_file
             )
             if relevant:
                 sources = (
+                    Source(dct["source"], dct["url"], dct["similarity"])
                     for dct in matched_documents.to_dict(orient="records")
                 )
             else:
+                # Override the answer with a generic unknown prompt, without sources.
+                response = Response(text=self.cfg.unknown_prompt)
                 sources = tuple()
         return response, sources
         # Likely that the answer is meaningful, add the top sources
         return score < unk_threshold
+    def process_input(self, question: str, formatter: ResponseFormatter = None) -> str:
         """
         Main function to process the input question and generate a formatted output.
         """
         logger.info(f"User Question:\n{question}")
         # We make sure there is always a newline at the end of the question to avoid completing the question.
         )
         response, sources = self.generate_response(prompt, matched_documents, self.cfg.unknown_prompt)
+        return self.response_formatter(response, sources)

buster/formatter/__init__.py CHANGED Viewed

@@ -1,6 +1,17 @@
-from .base import Formatter
-from .html import HTMLFormatter
-from .markdown import MarkdownFormatter
-from .slack import SlackFormatter
-__all__ = [Formatter, HTMLFormatter, MarkdownFormatter, SlackFormatter]

+from .base import Response, ResponseFormatter, Source
+from .factory import response_formatter_factory
+from .gradio import GradioResponseFormatter
+from .html import HTMLResponseFormatter
+from .markdown import MarkdownResponseFormatter
+from .slack import SlackResponseFormatter
+__all__ = [
+    Source,
+    Response,
+    ResponseFormatter,
+    HTMLResponseFormatter,
+    MarkdownResponseFormatter,
+    SlackResponseFormatter,
+    GradioResponseFormatter,
+    response_formatter_factory,
+]

buster/formatter/base.py CHANGED Viewed

@@ -4,7 +4,7 @@ from typing import Iterable, NamedTuple
 # Should be from the `documents` module.
 class Source(NamedTuple):
-    name: str
     url: str
     question_similarity: float
     # TODO Add answer similarity.
@@ -20,12 +20,18 @@ class Response:
 @dataclass
-class Formatter:
     source_template: str = "{source.name} (relevance: {source.question_similarity:2.3f})"
-    error_msg_template: str = "Something went wrong: {response.error_msg}"
     error_fallback_template: str = "Something went very wrong."
-    sourced_answer_template: str = "{response.text}\n\nSources:\n{sources}\n\nBut what do I know, I'm a chatbot."
-    unsourced_answer_template: str = "{response.text}\n\nBut what do I know, I'm a chatbot."
     def source_item(self, source: Source) -> str:
         """Format a single source item."""
@@ -48,10 +54,12 @@ class Formatter:
     def answer(self, response: Response, sources: Iterable[Source]) -> str:
         """Format an answer and its sources."""
         sources_list = self.sources_list(sources)
-        if not sources_list:
-            return self.sourced_answer_template.format(response=response, sources=sources_list)
-        return self.unsourced_answer_template.format(response=response)
     def __call__(self, response: Response, sources: Iterable[Source]) -> str:
         """Format an answer and its sources, or an error message."""

 # Should be from the `documents` module.
 class Source(NamedTuple):
+    source: str
     url: str
     question_similarity: float
     # TODO Add answer similarity.
 @dataclass
+class ResponseFormatter:
+    response_footnote: str
     source_template: str = "{source.name} (relevance: {source.question_similarity:2.3f})"
+    error_msg_template: str = """Something went wrong:\n{response.error_msg}"""
     error_fallback_template: str = "Something went very wrong."
+    sourced_answer_template: str = (
+        """{response.text}\n\n"""
+        """📝 Here are the sources I used to answer your question:\n"""
+        """{sources}\n\n"""
+        """{footnote}"""
+    )
+    unsourced_answer_template: str = "{response.text}\n\n{footnote}"
     def source_item(self, source: Source) -> str:
         """Format a single source item."""
     def answer(self, response: Response, sources: Iterable[Source]) -> str:
         """Format an answer and its sources."""
         sources_list = self.sources_list(sources)
+        if sources_list:
+            return self.sourced_answer_template.format(
+                response=response, sources=sources_list, footnote=self.response_footnote
+            )
+        return self.unsourced_answer_template.format(response=response, footnote=self.response_footnote)
     def __call__(self, response: Response, sources: Iterable[Source]) -> str:
         """Format an answer and its sources, or an error message."""

buster/formatter/factory.py ADDED Viewed

	@@ -0,0 +1,22 @@

+import logging
+import buster.formatter as F
+logger = logging.getLogger(__name__)
+logging.basicConfig(level=logging.INFO)
+def response_formatter_factory(format: str, **kwargs):
+    logger.info(f"Using formatter: {format}")
+    if format == "text":
+        return F.ResponseFormatter(**kwargs)
+    elif format == "slack":
+        return F.SlackResponseFormatter(**kwargs)
+    elif format == "HTML":
+        return F.HTMLResponseFormatter(**kwargs)
+    elif format == "gradio":
+        return F.GradioResponseFormatter(**kwargs)
+    elif format == "markdown":
+        return F.MarkdownResponseFormatter(**kwargs)
+    else:
+        raise ValueError(f"Undefined {format=}")

buster/formatter/gradio.py ADDED Viewed

	@@ -0,0 +1,28 @@

+from dataclasses import dataclass
+from typing import Iterable
+from buster.formatter import ResponseFormatter, Source
+@dataclass
+class GradioResponseFormatter(ResponseFormatter):
+    """Format the answer for gradio chat interface."""
+    error_msg_template: str = """Something went wrong:<br>{response.error_msg}"""
+    error_fallback_template: str = "Something went very wrong."
+    sourced_answer_template: str = (
+        """{response.text}<br><br>"""
+        """📝 Here are the sources I used to answer your question:<br>"""
+        """{sources}<br><br>"""
+        """{footnote}"""
+    )
+    unsourced_answer_template: str = "{response.text}<br><br>{footnote}"
+    source_template: str = """[🔗 {source.source}]({source.url}), relevance: {source.question_similarity:2.3f}"""
+    def sources_list(self, sources: Iterable[Source]) -> str | None:
+        """Format sources into a list."""
+        items = [self.source_item(source) for source in sources]
+        if not items:
+            return None  # No list needed.
+        return "<br>".join(items)

buster/formatter/html.py CHANGED Viewed

@@ -2,14 +2,14 @@ import html
 from dataclasses import dataclass
 from typing import Iterable
-from buster.formatter.base import Formatter, Response, Source
 @dataclass
-class HTMLFormatter(Formatter):
     """Format the answer in HTML."""
-    source_template: str = """<li><a href='{source.url}'>🔗 {source.name}</a></li>"""
     error_msg_template: str = """<div class="error">Something went wrong:\n<p>{response.error_msg}</p></div>"""
     error_fallback_template: str = """<div class="error">Something went very wrong.</div>"""
     sourced_answer_template: str = (
@@ -37,5 +37,5 @@ class HTMLFormatter(Formatter):
             response.error,
             html.escape(response.error_msg) if response.error_msg else response.error_msg,
         )
-        sources = (Source(html.escape(source.name), source.url, source.question_similarity) for source in sources)
         return super().__call__(response, sources)

 from dataclasses import dataclass
 from typing import Iterable
+from buster.formatter.base import Response, ResponseFormatter, Source
 @dataclass
+class HTMLResponseFormatter(ResponseFormatter):
     """Format the answer in HTML."""
+    source_template: str = """<li><a href='{source.url}'>🔗 {source.source}</a></li>"""
     error_msg_template: str = """<div class="error">Something went wrong:\n<p>{response.error_msg}</p></div>"""
     error_fallback_template: str = """<div class="error">Something went very wrong.</div>"""
     sourced_answer_template: str = (
             response.error,
             html.escape(response.error_msg) if response.error_msg else response.error_msg,
         )
+        sources = (Source(html.escape(source.source), source.url, source.question_similarity) for source in sources)
         return super().__call__(response, sources)

buster/formatter/markdown.py CHANGED Viewed

@@ -1,23 +1,14 @@
 from dataclasses import dataclass
 from typing import Iterable
-from buster.formatter.base import Formatter, Source
 @dataclass
-class MarkdownFormatter(Formatter):
     """Format the answer in markdown."""
-    source_template: str = """[🔗 {source.name}]({source.url}), relevance: {source.question_similarity:2.3f}"""
-    error_msg_template: str = """Something went wrong:\n{response.error_msg}"""
-    error_fallback_template: str = """Something went very wrong."""
-    sourced_answer_template: str = (
-        """{response.text}\n\n"""
-        """📝 Here are the sources I used to answer your question:\n"""
-        """{sources}\n\n"""
-        """I'm a chatbot, bleep bloop."""
-    )
-    unsourced_answer_template: str = """{response.text}\n\nI'm a chatbot, bleep bloop."""
     def sources_list(self, sources: Iterable[Source]) -> str | None:
         """Format sources into a list."""

 from dataclasses import dataclass
 from typing import Iterable
+from buster.formatter.base import ResponseFormatter, Source
 @dataclass
+class MarkdownResponseFormatter(ResponseFormatter):
     """Format the answer in markdown."""
+    source_template: str = """[🔗 {source.source}]({source.url}), relevance: {source.question_similarity:2.3f}"""
     def sources_list(self, sources: Iterable[Source]) -> str | None:
         """Format sources into a list."""

buster/formatter/slack.py CHANGED Viewed

@@ -1,23 +1,14 @@
 from dataclasses import dataclass
 from typing import Iterable
-from buster.formatter.base import Formatter, Source
 @dataclass
-class SlackFormatter(Formatter):
     """Format the answer for Slack."""
-    source_template: str = """<{source.url}|🔗 {source.name}>, relevance: {source.question_similarity:2.3f}"""
-    error_msg_template: str = """Something went wrong:\n{response.error_msg}"""
-    error_fallback_template: str = """Something went very wrong."""
-    sourced_answer_template: str = (
-        """{response.text}\n\n"""
-        """📝 Here are the sources I used to answer your question:\n"""
-        """{sources}\n\n"""
-        """I'm a chatbot, bleep bloop."""
-    )
-    unsourced_answer_template: str = """{response.text}\n\nI'm a chatbot, bleep bloop."""
     def sources_list(self, sources: Iterable[Source]) -> str | None:
         """Format sources into a list."""

 from dataclasses import dataclass
 from typing import Iterable
+from buster.formatter import ResponseFormatter, Source
 @dataclass
+class SlackResponseFormatter(ResponseFormatter):
     """Format the answer for Slack."""
+    source_template: str = """<{source.url}|🔗 {source.source}>, relevance: {source.question_similarity:2.3f}"""
     def sources_list(self, sources: Iterable[Source]) -> str | None:
         """Format sources into a list."""

pyproject.toml CHANGED Viewed

@@ -7,7 +7,7 @@ name = "buster"
 version = "0.0.1"
 description = "buster the bot for the mila cluster"
 readme = "README.md"
-requires-python = ">=3.8"
 dynamic = ["dependencies"]
 [tool.setuptools.dynamic]

 version = "0.0.1"
 description = "buster the bot for the mila cluster"
 readme = "README.md"
+requires-python = ">=3.10"
 dynamic = ["dependencies"]
 [tool.setuptools.dynamic]