Spaces:

StewartLab
/

survey-chatbot

Build error

App Files Files Community

drmjh commited on May 21, 2024

Commit

617da91

1 Parent(s): 8c50ce7

initial commit for testing

Browse files

Files changed (11) hide show

.gitignore +3 -0
app.py +361 -0
arrow_icon.svg +1 -0
assets/arrow_icon.svg +1 -0
assets/config.json +6 -0
assets/initial_message.txt +7 -0
assets/question_mapping.json +4 -0
assets/questions/partisanship.json +9 -0
assets/system_message.txt +17 -0
requirements.txt +6 -0
utils.py +162 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,3 @@

+__pycache__
+.venv
+wandb

app.py ADDED Viewed

	@@ -0,0 +1,361 @@

+"""
+Chatbot App for Cognitive Debriefing Interview
+Author: Dr Musashi Hinck
+Version Log:
+- 02.04.24: Initial demo with passed values from Qualtrics survey
+- 07.04.24: Added configurations for survey edition
+Notes:
+- Need to call Request from start state
+- Example URL: localhost:7860/?user=123&session=456&questionid=0&response=0
+"""
+from __future__ import annotations
+import os
+import json
+import logging
+import gradio as gr
+from uuid import uuid4
+from typing import Generator, Any
+from pathlib import Path
+logger = logging.getLogger(__name__)
+from utils import (
+    PromptTemplate,
+    convert_gradio_to_openai,
+    initialize_client,
+    seed_azure_key,
+    upload_azure,
+    record_chat,
+)
+# %% Initialize common assets
+if os.environ.get("AZURE_ENDPOINT") is None:  # Set Azure credentials from local files
+    seed_azure_key()
+client = initialize_client()  # Shared across sessions
+question_mapping: dict[str, str] = json.loads(Path("assets/question_mapping.json").read_text())
+# %% (functions)
+# Initialization
+# - Record user and session id
+# - Record question and response
+# - Build system message
+# - Build initial message
+# - Wrapper - start_survey
+def initialize_interview(request: gr.Request) -> tuple:
+    """
+    Read: Request
+    Set: values of userId, sessionId, questionWording, initialMessage, systemMessage
+    """
+    # Parse request
+    request_params = request.query_params
+    user_id: str = request_params.get("user", "testUser")
+    session_id: str = request_params.get("session", "testSession")
+    logger.info(f"User: {user_id} (Session: {session_id})")
+    # Parse question
+    question_id: str = request_params.get("questionid", "0")
+    response_id: str = request_params.get("response", "0")
+    question_data: dict = json.loads(Path(f"./assets/questions/{question_mapping[question_id]}").read_text())
+    question_wording: str = question_data["question"]
+    question_choices: str = question_data["choices"]
+    response_text: str = question_choices[int(response_id)]
+    logger.info(f"Question: {question_wording} ({response_text})")
+    # Load initial and system messages
+    initial_message: str = PromptTemplate.from_file("assets/initial_message.txt").format(surveyQuestion=question_wording)
+    system_message: str = PromptTemplate.from_file("assets/system_message.txt").format(surveyQuestion=question_wording, responseVal=response_text)
+    logger.info(f"Initial message: {initial_message}")
+    logger.info(f"System message: {system_message}")
+    # Return all
+    return (
+        user_id,
+        session_id,
+        question_wording,
+        initial_message,
+        system_message
+    )
+def initialize_interface(initial_message: str) -> tuple:
+    """
+    Change interface to interactive mode.
+    Read: initial_message
+    Set:
+        instruction_text: modify (to empty)
+        chat_display: set initial_message
+        chat_input: update placeholder, make interactive
+        chat_submit: make interactive
+        start_button: hide
+    """
+    instruction_text = gr.Markdown("")
+    chat_display = gr.Chatbot(
+        value=[[None, initial_message]],
+        elem_id="chatDisplay",
+        show_label=False,
+        visible=True,
+    )
+    chat_input = gr.Textbox(
+        placeholder="Type response here. Hit `Enter` or click the arrow to submit.",
+        visible=True,
+        interactive=True,
+        show_label=False,
+        scale=10,
+    )
+    chat_submit = gr.Button(
+        "",
+        variant="primary",
+        interactive=True,
+        icon="./arrow_icon.svg",
+        visible=True,
+    )
+    start_button = gr.Button("Start Interview", visible=False, variant="primary")
+    return (instruction_text, chat_display, chat_input, chat_submit, start_button)
+# Interaction
+# - User message
+# - Bot message
+# - Check if interview finished
+# - Record interaction (local log)
+def user_message(
+    message: str, chat_history: list[list[str | None]]
+) -> tuple[str, list[list[str | None]]]:
+    "Display user message immediately"
+    return "", chat_history + [[message, None]]
+def bot_message(
+    chat_history: list[list[str | None]],
+    system_message: str,
+    model_args: dict = {"model": "gpt-4o-default", "temperature": 0.0},
+) -> Generator[Any, Any, Any]:
+    "Streams response from OpenAI API to chat interface."
+    # Prep messages
+    user_msg = chat_history[-1][0]
+    messages = convert_gradio_to_openai(chat_history[:-1])
+    messages = (
+        [{"role": "system", "content": system_message}]
+        + messages
+        + [{"role": "user", "content": user_msg}]
+    )
+    # API call
+    response = client.chat.completions.create(
+        messages=messages, stream=True, **model_args
+    )
+    # Streaming
+    chat_history[-1][1] = ""
+    for chunk in response:
+        delta = chunk.choices[0].delta.content
+        if delta:
+            chat_history[-1][1] += delta
+            yield chat_history
+def log_interaction(
+    chat_history: list[list[str | None]],
+    session_id: str,
+) -> None:
+    "Record last pair of interactions"
+    record_chat(session_id, "user", chat_history[-1][0])
+    record_chat(session_id, "bot", chat_history[-1][1])
+def interview_end_check(
+    chat_history: list[list[str | None]],
+    limit: int = 20,
+    end_of_interview: str = "<end_interview>",
+) -> tuple[list[list[str | None]], gr.Button]:
+    """
+    Checks if interview has completed using two conditions:
+    1. If the last bot message contains `end_of_interview` (default: "<end_interview>")
+    2. Conversation length has reached `limit` (default: 10)
+    If either condition is met, the end of interview button is displayed.
+    """
+    flag = False
+    if len(chat_history) >= limit:
+        flag = True
+    if end_of_interview in chat_history[-1][1]:
+        chat_history[-1][1] = chat_history[-1][1].replace(end_of_interview, "")
+        flag = True
+    button = gr.Button("Save and Exit", visible=flag, variant="stop")
+    return chat_history, button
+# Completion
+# - Create completion code
+# - Append to message history
+# - Display completion code
+def generate_completion_code() -> str:
+    return str(uuid4())
+def upload_interview(
+    session_id: str,
+    chat_history: list[list[str | None]],
+) -> None:
+    "Upload chat history to Azure blob storage"
+    upload_azure(session_id, chat_history)
+def end_interview(
+    session_id: str,
+    chat_history: list[list[str | None]],
+) -> list[list[str | None]]:
+    """Create completion code and display in chat interface."""
+    completion_message = (
+        "Thank you for participating.\n\n"
+        "Your completion code is: {}\n\n"
+        "Please now return to the Qualtrics survey "
+        "and paste this code into the  completion "
+        "code box.".format(generate_completion_code())
+    )
+    chat_history += [[None, completion_message]]
+    upload_interview(session_id, chat_history)
+    return chat_history
+# LAYOUT
+with gr.Blocks(theme="sudeepshouche/minimalist") as demo:
+    # Header and instructions
+    gr.Markdown("# SurveyGPT Interview")
+    instructionText = gr.Markdown(
+        "Use this chat interface to talk to SurveyGPT.\n"
+        "To start, click 'Start Interview' and follow the instructions.\n\n"
+        "You can type your answer into the box below and hit 'Enter' or click the arrow to submit.\n\n"
+        "The interview will end either after 2 minutes, or if the chatbot decides the interview is done.\n"
+        "At this point, you will see a 'Save and Exit' button. Click this to save your responses and receive a completion code."
+    )
+    # Initialize empty hidden values.
+    userId = gr.State()
+    sessionId = gr.State()
+    questionWording = gr.State()
+    initialMessage = gr.State()
+    systemMessage = gr.State()
+    modelArgs = gr.State(value={"model": "gpt-4o-default", "temperature": 0.0})
+    # Chat app (display, input, submit button)
+    startButton = gr.Button("Start Interview", visible=True, variant="primary")
+    chatDisplay = gr.Chatbot(
+        value=None,
+        elem_id="chatDisplay",
+        show_label=False,
+        visible=True,
+    )
+    with gr.Row():  # Interaction
+        chatInput = gr.Textbox(
+            placeholder="Click 'Start Interview' to begin.",
+            visible=False,
+            interactive=False,
+            show_label=False,
+            scale=10,
+        )
+        chatSubmit = gr.Button(
+            "",
+            variant="primary",
+            visible=False,
+            interactive=False,
+            icon="./arrow_icon.svg",
+        )
+    exitButton = gr.Button("Save and Exit", visible=False, variant="stop")
+    # Footer
+    disclaimer = gr.HTML(
+        """
+        <div
+        style='font-size: 1em;
+               font-style: italic;
+               position: fixed;
+               left: 50%;
+               bottom: 20px;
+               transform: translate(-50%, -50%);
+               margin: 0 auto;
+               '
+        >{}</div>
+        """.format(
+            "Statements by the chatbot may contain factual inaccuracies."
+        )
+    )
+    # INTERACTIONS
+    # Initialization
+    startButton.click(
+        initialize_interview, # Reads in request params
+        inputs=None,
+        outputs=[
+            userId,
+            sessionId,
+            questionWording,
+            initialMessage,
+            systemMessage,
+        ],
+    ).then(
+        initialize_interface, # Changes interface to interactive mode
+        inputs=[initialMessage],
+        outputs=[
+            instructionText,
+            chatDisplay,
+            chatInput,
+            chatSubmit,
+            startButton,
+        ],
+    )
+    # Chat interaction
+    # "Enter"
+    chatInput.submit(
+        user_message,
+        inputs=[chatInput, chatDisplay],
+        outputs=[chatInput, chatDisplay],
+        queue=False,
+    ).then(
+        bot_message,
+        inputs=[chatDisplay, systemMessage, modelArgs],
+        outputs=[chatDisplay],
+    ).then(
+        log_interaction,
+        inputs=[chatDisplay, sessionId],
+    ).then(
+        interview_end_check, inputs=[chatDisplay], outputs=[chatDisplay, exitButton]
+    )
+    # Button
+    chatSubmit.click(
+        user_message,
+        inputs=[chatInput, chatDisplay],
+        outputs=[chatInput, chatDisplay],
+        queue=False,
+    ).then(
+        bot_message,
+        inputs=[chatDisplay, systemMessage, modelArgs],
+        outputs=[chatDisplay],
+    ).then(
+        log_interaction,
+        inputs=[chatDisplay, sessionId],
+    ).then(
+        interview_end_check, inputs=[chatDisplay], outputs=[chatDisplay, exitButton]
+    )
+    # Reset button
+    exitButton.click(
+        end_interview, inputs=[sessionId, chatDisplay], outputs=[chatDisplay]
+    )
+if __name__ == "__main__":
+    demo.launch()#auth=auth_no_user)

arrow_icon.svg ADDED Viewed

assets/arrow_icon.svg ADDED Viewed

assets/config.json ADDED Viewed

	@@ -0,0 +1,6 @@

+{
+  "model_args" : {
+    "model": "gpt-4o-default",
+    "temperature": 0.0
+  }
+}

assets/initial_message.txt ADDED Viewed

	@@ -0,0 +1,7 @@

+Hello! My name is SurveyGPT, a conversational AI designed to help improve survey research.
+In the survey you were asked the following question:
+{surveyQuestion}
+What did you think was meant by the question?

assets/question_mapping.json ADDED Viewed

	@@ -0,0 +1,4 @@

+{
+  "0": "partisanship.json",
+  "1": "ideology.json"
+}

assets/questions/partisanship.json ADDED Viewed

	@@ -0,0 +1,9 @@

+{
+  "question": "Generally speaking, do you think of yourself as a Republican, a Democrat, an independent, or something else?",
+  "choices": [
+    "Republican",
+    "Democrat",
+    "Independent",
+    "Something else"
+  ]
+}

assets/system_message.txt ADDED Viewed

	@@ -0,0 +1,17 @@

+You are an AI designed to help researchers validate questions before they field their surveys by pre-testing interviews with human respondents.
+The person you are speaking with is a participant in a survey.
+Your job is to conduct a Cognitive Debriefing interview with the respondent.
+This interview consists of two parts.
+- In the first part, ask "In the survey you were asked the following question:\n{surveyQuestion}\n\nWhat did you think was meant by the question?"
+- In the second part, ask "You answered {responseVal} to the question. What did you mean by that?
+In both parts:
+- After each answer, ask follow-up questions designed to expand and clarify responses.
+- Move on to the next part once there is a satisfactory amount of information to conduct analyses of differences in how questions are understood.
+- Do not lead the respondent to a particular answer or suggest answers to the respondent; your goal is to provide an informative transcript that the researcher can review afterwards to help revise their question wording so that new respondents will have the same understanding as intended by the researcher.
+At the end of the survey, thank the respondent for participating and include the special <end_of_survey> token in the response to end the interview

requirements.txt ADDED Viewed

	@@ -0,0 +1,6 @@

+gradio
+openai
+wandb
+azure-storage-blob
+azure-identity
+debugpy

utils.py ADDED Viewed

	@@ -0,0 +1,162 @@

+from __future__ import annotations
+import datetime
+import json
+import os
+from configparser import ConfigParser
+from pathlib import Path
+from string import Formatter
+import openai
+from azure.storage.blob import BlobClient
+# Logging util
+def get_current_timestamp() -> str:
+    return datetime.datetime.now().isoformat()
+class ChatLoggerHandler:
+    """Shared logging handler for chat logs. Runs common to all Gradio sessions."""
+    def __init__(self, logdir: str = "./logs") -> None:
+        self.logdir: Path = Path(logdir)
+    def record(self, session: str, role: str, record: str):
+        log_entry = {
+            "metadata": {"session": session, "timestamp": get_current_timestamp()},
+            "record": {"role": role, "message": record},
+        }
+        (self.logdir / f"{session}.jsonl").write_text(json.dumps(log_entry) + "\n")
+def record_chat(
+    logger: ChatLoggerHandler, session: str, role: str, record: str
+) -> None:
+    logger.record(session, role, record)
+# General Class
+class PromptTemplate(str):
+    """More robust String Formatter. Takes a string and parses out the keywords."""
+    def __init__(self, template: str) -> None:
+        self.template: str = template
+        self.variables: list[str] = self.parse_template()
+    def parse_template(self) -> list[str]:
+        "Returns template variables"
+        return [
+            fn for _, fn, _, _ in Formatter().parse(self.template) if fn is not None
+        ]
+    def format(self, *args, **kwargs) -> str:
+        """
+        Formats the template string with the given arguments.
+        Provides slightly more informative error handling.
+        :param args: Positional arguments for unnamed placeholders.
+        :param kwargs: Keyword arguments for named placeholders.
+        :return: Formatted string.
+        :raises: ValueError if arguments do not match template variables.
+        """
+        # If keyword arguments are provided, check if they match the template variables
+        if kwargs and set(kwargs) != set(self.variables):
+            raise ValueError("Keyword arguments do not match template variables.")
+        # If positional arguments are provided, check if their count matches the number of template variables
+        if args and len(args) != len(self.variables):
+            raise ValueError(
+                "Number of arguments does not match the number of template variables."
+            )
+        # Check if a dictionary is passed as a single positional argument
+        if len(args) == 1 and isinstance(args[0], dict):
+            arg_dict = args[0]
+            if set(arg_dict) != set(self.variables):
+                raise ValueError("Dictionary keys do not match template variables.")
+            return self.template.format(**arg_dict)
+        # Check for the special case where both args and kwargs are empty, which means self.variables must also be empty
+        if not args and not kwargs and self.variables:
+            raise ValueError("No arguments provided, but template expects variables.")
+        # Use the arguments to format the template
+        try:
+            return self.template.format(*args, **kwargs)
+        except KeyError as e:
+            raise ValueError(f"Missing a keyword argument: {e}")
+    @classmethod
+    def from_file(cls, file_path: str) -> PromptTemplate:
+        with open(file_path, encoding="utf-8") as file:
+            template_content = file.read()
+        return cls(template_content)
+    def dump_prompt(self, file_path: str) -> None:
+        with open(file_path, "w", encoding="utf-8") as file:
+            file.write(self.template)
+            file.close()
+def convert_gradio_to_openai(
+    chat_history: list[list[str | None]],
+) -> list[dict[str, str]]:
+    "Converts gradio chat format -> openai chat request format"
+    messages = []
+    for pair in chat_history:  # [(user), (assistant)]
+        for i, role in enumerate(["user", "assistant"]):
+            if not ((pair[i] is None) or (pair[i] == "")):
+                messages += [{"role": role, "content": pair[i]}]
+    return messages
+def convert_openai_to_gradio(
+    messages: list[dict[str, str]]
+) -> list[list[str, str | None]]:
+    "Converts openai chat request format -> gradio chat format"
+    chat_history = []
+    if messages[0]["role"] != "user":
+        messages.insert(0, {"role": "user", "content": None})
+    for i in range(0, len(messages), 2):
+        chat_history.append([messages[i]["content"], messages[i + 1]["content"]])
+    return chat_history
+def seed_azure_key(cfg: str = "~/.cfg/openai.cfg") -> None:
+    config = ConfigParser()
+    try:
+        config.read(Path(cfg).expanduser())
+    except:
+        raise ValueError(f"Could not using read file at: {cfg}.")
+    os.environ["AZURE_ENDPOINT"] = config["AZURE"]["endpoint"]
+    os.environ["AZURE_SECRET"] = config["AZURE"]["key"]
+def initialize_client() -> openai.AsyncClient:
+    client = openai.AzureOpenAI(
+        azure_endpoint=os.environ["AZURE_ENDPOINT"],
+        api_key=os.environ["AZURE_SECRET"],
+        api_version="2023-05-15",
+    )
+    return client
+def auth_no_user(username, password):
+    if password == os.getenv("GRADIO_PASSWORD", ""):
+        return True
+    else:
+        return False
+def upload_azure(conversation_id: str, chat_history) -> None:
+    # Get blob client
+    conn_str = os.getenv("AZURE_CONN_STR")
+    container_name = os.getenv("AZURE_CONTAINER_NAME")
+    blob_name = conversation_id
+    blob_client = BlobClient.from_connection_string(conn_str, container_name, blob_name)
+    # Convert chat_history to json lines
+    records = convert_gradio_to_openai(chat_history)
+    records_text = "\n".join([json.dumps(record) for record in records])
+    blob_client.upload_blob(records_text, blob_type="AppendBlob", overwrite=True)