Spaces:

sevdeawesome
/

safetybot

Runtime error

App Files Files Community

sevdeawesome commited on Oct 3, 2023

Commit

cddd748

1 Parent(s): a95ca0c

add file

Browse files

Files changed (1) hide show

app2.py.deprocated +195 -0

app2.py.deprocated ADDED Viewed

	@@ -0,0 +1,195 @@

+'''
+CONFIG AND IMPORTS
+'''
+from config import default_config
+from types import SimpleNamespace
+import gradio as gr
+import os, random
+from pathlib import Path
+import tiktoken
+from getpass import getpass
+from rich.markdown import Markdown
+import openai
+import wandb
+from pprint import pprint
+from wandb.integration.openai import autolog
+from langchain.text_splitter import MarkdownHeaderTextSplitter
+from langchain.embeddings import OpenAIEmbeddings
+from langchain.vectorstores import Chroma
+from tenacity import (
+    retry,
+    stop_after_attempt,
+    wait_random_exponential, # for exponential backoff
+)
+if os.getenv("OPENAI_API_KEY") is None:
+  if any(['VSCODE' in x for x in os.environ.keys()]):
+    print('Please enter password in the VS Code prompt at the top of your VS Code window!')
+  os.environ["OPENAI_API_KEY"] = getpass("Paste your OpenAI key from: https://platform.openai.com/account/api-keys\n")
+  openai.api_key = os.getenv("OPENAI_API_KEY", "")
+assert os.getenv("OPENAI_API_KEY", "").startswith("sk-"), "This doesn't look like a valid OpenAI API key"
+print("OpenAI API key configured")
+def find_nearest_neighbor(argument="", max_args_in_output=3):
+    '''
+        INPUT:
+            argument (string)
+        RETURN the nearest neighbor(s) in vectorDB to argument as string
+    '''
+    md = ""
+    print(argument)
+    directory_path = "../../safety_docs"
+    for filename in os.listdir(directory_path):
+        if filename.endswith(".md"):
+            with open(os.path.join(directory_path, filename), 'r') as file:
+                content = file.read()
+                md = md + content
+    markdown_document = md
+    headers_to_split_on = [
+        ("#", "Header 1"),
+        ("##", "Header 2"),
+        ("###", "Header 3"),
+    ]
+    markdown_splitter = MarkdownHeaderTextSplitter(headers_to_split_on=headers_to_split_on)
+    md_header_splits = markdown_splitter.split_text(markdown_document)
+    embeddings = OpenAIEmbeddings()
+    db = Chroma.from_documents(md_header_splits, embeddings)
+    retriever = db.as_retriever(search_kwargs=dict(k=11))
+    docs = retriever.get_relevant_documents(argument)
+    output = "" # output to return, a list of common args
+    seen = set() # which documents have been added to output
+    count = 0 # count how many embeddings have been added to output
+    for doc in docs:
+        if doc.metadata["Header 1"] not in seen:
+            output = output + doc.metadata["Header 1"] + '\n'
+            count = count + 1
+        seen.add(doc.metadata["Header 1"])
+        if count >= max_args_in_output:
+            break
+    return output
+def get_gpt_response(argument, user_prompt, system_prompt=default_config.system_prompt, model=default_config.model_name, n=1, max_tokens=200):
+    '''
+    INPUT:
+    Argument
+    user_prompt
+    system_prompt
+    model
+    '''
+    @retry(wait=wait_random_exponential(min=1, max=3), stop=stop_after_attempt(1))
+    def completion_with_backoff(**kwargs):
+        return openai.ChatCompletion.create(**kwargs)
+    messages=[
+            {"role": "system", "content": system_prompt},
+            {"role": "user", "content": user_prompt},
+        ]
+    responses = completion_with_backoff(
+        model=model,
+        messages=messages,
+        n = n,
+        max_tokens=max_tokens
+        )
+    for response in responses.choices:
+        generation = response.message.content
+        return generation
+def greet(argument):
+    nearest_neighbor = find_nearest_neighbor(argument)
+    user_prompt = default_config.user_prompt_1 + argument + default_config.user_prompt_2
+    # response = get_gpt_response(argument, user_prompt)
+    response = "chatbot response here"
+    return "Hello " + "\n We think your argument matches common arguments in our database, is it one of these?:\n " + nearest_neighbor + "\n\n\n ------------------------- \n\n\n Lengthy response: \n" + response
+demo = gr.Interface(
+    fn=greet,
+    inputs=gr.Textbox(lines=2, placeholder="Anything past 200 tokens (roughly 200 words) will be cutoff. Please enter <=1 paragraph"),
+    outputs="text"
+)
+# demo.queue(max_size=20)
+demo.launch()
+def find_nearest_neighbor(argument=""):
+    '''
+        INPUT:
+            argument (string)
+        RETURN the nearest neighbor(s) in vectorDB to argument as string
+    '''
+    md = ""
+    directory_path = "../../safety_docs"
+    for filename in os.listdir(directory_path):
+        if filename.endswith(".md"):
+            with open(os.path.join(directory_path, filename), 'r') as file:
+                content = file.read()
+                md = md + content
+    markdown_document = md
+    headers_to_split_on = [
+        ("#", "Header 1"),
+        ("##", "Header 2"),
+        ("###", "Header 3"),
+    ]
+    markdown_splitter = MarkdownHeaderTextSplitter(headers_to_split_on=headers_to_split_on)
+    md_header_splits = markdown_splitter.split_text(markdown_document)
+    embeddings = OpenAIEmbeddings()
+    db = Chroma.from_documents(md_header_splits, embeddings)
+    retriever = db.as_retriever(search_kwargs=dict(k=11))
+    docs = retriever.get_relevant_documents(argument)
+    # return the content of the nearest neighbor document
+    return docs[0].metadata["Header 1"]