sevdeawesome commited on
Commit
cddd748
·
1 Parent(s): a95ca0c
Files changed (1) hide show
  1. app2.py.deprocated +195 -0
app2.py.deprocated ADDED
@@ -0,0 +1,195 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ '''
2
+
3
+ CONFIG AND IMPORTS
4
+
5
+ '''
6
+ from config import default_config
7
+
8
+ from types import SimpleNamespace
9
+ import gradio as gr
10
+ import os, random
11
+ from pathlib import Path
12
+ import tiktoken
13
+ from getpass import getpass
14
+ from rich.markdown import Markdown
15
+
16
+ import openai
17
+ import wandb
18
+ from pprint import pprint
19
+ from wandb.integration.openai import autolog
20
+ from langchain.text_splitter import MarkdownHeaderTextSplitter
21
+
22
+
23
+ from langchain.embeddings import OpenAIEmbeddings
24
+ from langchain.vectorstores import Chroma
25
+
26
+
27
+
28
+ from tenacity import (
29
+ retry,
30
+ stop_after_attempt,
31
+ wait_random_exponential, # for exponential backoff
32
+ )
33
+
34
+
35
+
36
+
37
+ if os.getenv("OPENAI_API_KEY") is None:
38
+ if any(['VSCODE' in x for x in os.environ.keys()]):
39
+ print('Please enter password in the VS Code prompt at the top of your VS Code window!')
40
+ os.environ["OPENAI_API_KEY"] = getpass("Paste your OpenAI key from: https://platform.openai.com/account/api-keys\n")
41
+ openai.api_key = os.getenv("OPENAI_API_KEY", "")
42
+
43
+ assert os.getenv("OPENAI_API_KEY", "").startswith("sk-"), "This doesn't look like a valid OpenAI API key"
44
+ print("OpenAI API key configured")
45
+
46
+
47
+
48
+
49
+
50
+
51
+ def find_nearest_neighbor(argument="", max_args_in_output=3):
52
+ '''
53
+ INPUT:
54
+ argument (string)
55
+
56
+ RETURN the nearest neighbor(s) in vectorDB to argument as string
57
+ '''
58
+
59
+ md = ""
60
+ print(argument)
61
+ directory_path = "../../safety_docs"
62
+
63
+ for filename in os.listdir(directory_path):
64
+ if filename.endswith(".md"):
65
+ with open(os.path.join(directory_path, filename), 'r') as file:
66
+ content = file.read()
67
+ md = md + content
68
+
69
+ markdown_document = md
70
+
71
+ headers_to_split_on = [
72
+ ("#", "Header 1"),
73
+ ("##", "Header 2"),
74
+ ("###", "Header 3"),
75
+ ]
76
+
77
+ markdown_splitter = MarkdownHeaderTextSplitter(headers_to_split_on=headers_to_split_on)
78
+ md_header_splits = markdown_splitter.split_text(markdown_document)
79
+
80
+ embeddings = OpenAIEmbeddings()
81
+ db = Chroma.from_documents(md_header_splits, embeddings)
82
+
83
+ retriever = db.as_retriever(search_kwargs=dict(k=11))
84
+
85
+ docs = retriever.get_relevant_documents(argument)
86
+
87
+ output = "" # output to return, a list of common args
88
+ seen = set() # which documents have been added to output
89
+ count = 0 # count how many embeddings have been added to output
90
+ for doc in docs:
91
+ if doc.metadata["Header 1"] not in seen:
92
+ output = output + doc.metadata["Header 1"] + '\n'
93
+ count = count + 1
94
+ seen.add(doc.metadata["Header 1"])
95
+ if count >= max_args_in_output:
96
+ break
97
+
98
+ return output
99
+
100
+
101
+
102
+ def get_gpt_response(argument, user_prompt, system_prompt=default_config.system_prompt, model=default_config.model_name, n=1, max_tokens=200):
103
+ '''
104
+ INPUT:
105
+ Argument
106
+ user_prompt
107
+ system_prompt
108
+ model
109
+ '''
110
+
111
+ @retry(wait=wait_random_exponential(min=1, max=3), stop=stop_after_attempt(1))
112
+ def completion_with_backoff(**kwargs):
113
+ return openai.ChatCompletion.create(**kwargs)
114
+
115
+ messages=[
116
+ {"role": "system", "content": system_prompt},
117
+ {"role": "user", "content": user_prompt},
118
+ ]
119
+ responses = completion_with_backoff(
120
+ model=model,
121
+ messages=messages,
122
+ n = n,
123
+ max_tokens=max_tokens
124
+ )
125
+ for response in responses.choices:
126
+ generation = response.message.content
127
+ return generation
128
+
129
+
130
+ def greet(argument):
131
+ nearest_neighbor = find_nearest_neighbor(argument)
132
+ user_prompt = default_config.user_prompt_1 + argument + default_config.user_prompt_2
133
+ # response = get_gpt_response(argument, user_prompt)
134
+ response = "chatbot response here"
135
+ return "Hello " + "\n We think your argument matches common arguments in our database, is it one of these?:\n " + nearest_neighbor + "\n\n\n ------------------------- \n\n\n Lengthy response: \n" + response
136
+
137
+
138
+ demo = gr.Interface(
139
+ fn=greet,
140
+ inputs=gr.Textbox(lines=2, placeholder="Anything past 200 tokens (roughly 200 words) will be cutoff. Please enter <=1 paragraph"),
141
+ outputs="text"
142
+ )
143
+
144
+ # demo.queue(max_size=20)
145
+ demo.launch()
146
+
147
+
148
+
149
+
150
+
151
+
152
+
153
+
154
+
155
+
156
+
157
+
158
+
159
+
160
+ def find_nearest_neighbor(argument=""):
161
+ '''
162
+ INPUT:
163
+ argument (string)
164
+
165
+ RETURN the nearest neighbor(s) in vectorDB to argument as string
166
+ '''
167
+
168
+ md = ""
169
+ directory_path = "../../safety_docs"
170
+
171
+ for filename in os.listdir(directory_path):
172
+ if filename.endswith(".md"):
173
+ with open(os.path.join(directory_path, filename), 'r') as file:
174
+ content = file.read()
175
+ md = md + content
176
+
177
+ markdown_document = md
178
+ headers_to_split_on = [
179
+ ("#", "Header 1"),
180
+ ("##", "Header 2"),
181
+ ("###", "Header 3"),
182
+ ]
183
+
184
+ markdown_splitter = MarkdownHeaderTextSplitter(headers_to_split_on=headers_to_split_on)
185
+ md_header_splits = markdown_splitter.split_text(markdown_document)
186
+
187
+ embeddings = OpenAIEmbeddings()
188
+ db = Chroma.from_documents(md_header_splits, embeddings)
189
+
190
+ retriever = db.as_retriever(search_kwargs=dict(k=11))
191
+
192
+ docs = retriever.get_relevant_documents(argument)
193
+
194
+ # return the content of the nearest neighbor document
195
+ return docs[0].metadata["Header 1"]