Michelangiolo commited on
Commit
d7e74cc
·
1 Parent(s): b5ed8b3

first push

Browse files
Files changed (1) hide show
  1. app.py +276 -0
app.py ADDED
@@ -0,0 +1,276 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import requests
2
+
3
+ def chatgpt3_question(context, question):
4
+ api_key = "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"
5
+ url = "https://api.openai.com/v1/chat/completions"
6
+
7
+ prompt = f"""
8
+ based on this context: {context}
9
+ answer this use question: {question}
10
+ """
11
+
12
+ headers = {
13
+ "Content-Type": "application/json",
14
+ "Authorization": f"Bearer {api_key}"
15
+ }
16
+
17
+ data = {
18
+ "model": "gpt-3.5-turbo",
19
+ "messages": [{"role": "user", "content": prompt}]
20
+ }
21
+
22
+ response = requests.post(url, headers=headers, json=data)
23
+ generated_text = response.json()['choices'][0]['message']['content']
24
+
25
+ return generated_text
26
+
27
+ import os
28
+ import requests
29
+ import pandas as pd
30
+
31
+ def split_paragraph(text, keyword):
32
+ list1 = [x.strip() for x in text.split('.')]
33
+ list2 = []
34
+
35
+ for sentence in list1:
36
+ # Check if the sentence contains the phrase "chamber of commerce"
37
+ if keyword in sentence.lower():
38
+ list2.append(1)
39
+ else:
40
+ list2.append(0)
41
+
42
+ #in case first sentence has no keyword, we add it
43
+ if list2[0] == 0:
44
+ list1[0] = f'the {keyword}: ' + list1[0]
45
+ list2[0] = 1
46
+
47
+ # print(list1)
48
+ # print(list2)
49
+
50
+ list3 = list()
51
+ current_string = ''
52
+ # Loop through each element of list1 and list2
53
+ for i in range(len(list1)):
54
+ # If the corresponding element in list2 is 1, add the current string to list3 and reset the current string
55
+
56
+ if list2[i] == 1:
57
+ list3.append(current_string)
58
+ current_string = "" #reset
59
+ current_string += list1[i]
60
+
61
+ # Otherwise, concatenate the current string with the current element of list1
62
+ if list2[i] == 0:
63
+ current_string += '. '+list1[i]
64
+
65
+ # Add the final concatenated string to list3
66
+ list3.append(current_string)
67
+
68
+ return [x.strip() for x in list3[1:]]
69
+
70
+ def context_dict2context_list(context_dict):
71
+ list1 = list()
72
+ for key in context_dict:
73
+ # print(key)
74
+ str1 = context_dict[key]
75
+
76
+ split_list = [x.replace('\n', '').strip() for x in str1.split('\n\n')]
77
+ split_list
78
+
79
+ for sentence in split_list:
80
+ for s in split_paragraph(sentence, key):
81
+ list1.append(s)
82
+ return list1
83
+
84
+ def list2vec(list1):
85
+ headers = {
86
+ 'Content-Type': 'application/json',
87
+ 'Authorization': 'Bearer ' + "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",
88
+ }
89
+
90
+ json_data = {
91
+ 'input': list1,
92
+ 'model': 'text-embedding-ada-002',
93
+ }
94
+
95
+ response = requests.post('https://api.openai.com/v1/embeddings', headers=headers, json=json_data)
96
+ return [x['embedding'] for x in response.json()['data']]
97
+
98
+ dict1 = dict()
99
+ for index in range(len(json_data['input'])):
100
+ dict1[json_data['input'][index]] = response.json()['data'][index]['embedding']
101
+ return dict1
102
+
103
+ def text2vec(query):
104
+ headers = {
105
+ 'Content-Type': 'application/json',
106
+ 'Authorization': 'Bearer ' + "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",
107
+ }
108
+
109
+ json_data = {
110
+ 'input': query,
111
+ 'model': 'text-embedding-ada-002',
112
+ }
113
+
114
+ response = requests.post('https://api.openai.com/v1/embeddings', headers=headers, json=json_data)
115
+ query = response.json()['data'][0]['embedding'] #len=1536 #pricing=0.0004
116
+ return query
117
+
118
+ context_dict = {
119
+ "goliath" :
120
+ """
121
+ Goliath operates in Japan. Goliath builds AI Recommendation Systems for Matching Platforms using the latest technology.
122
+ """
123
+ ,
124
+ "company" :
125
+ """
126
+ Our company builds AI Recommendation Systems for Matching Platforms using the latest technology. Our company is estabilished and operates in Japan. Our company uses the AWS Cloud to manage Servers. Our company can use GPT3 as well. Our company also builds GPT3-based chatbots.
127
+ """
128
+ ,
129
+ "price" :
130
+ """
131
+ The price of a recommendation system depends on the amount of complexity that is required to build, as well as the volume of customers. Reach us to get a quotation. The price of a chatbot depends by its intended usage and complexity, contact us for a quotation.
132
+ """
133
+ ,
134
+ "recommendation system" :
135
+ """
136
+ If your company wants to recommend products to customers, we can build a recommendation system for you. GPT3 can be used to build recommendation systems by using embeddings, mapping choices in a mathematical space. Once the recommendation system has been built, we will manage it in the future as well. Recommendation system could also be built for startups, though they will be in smaller size. We use AWS OpenSearch to host recommendation system.
137
+ """
138
+ ,
139
+ "a matching platform" :
140
+ """
141
+ A matching platform is a business with thousands of users, who could be customers, individuals or companies, who are interacting with one another. For example dating apps, ecommerce platforms, or job recruiting platforms.
142
+ """
143
+ }
144
+
145
+ import pandas as pd
146
+ from sentence_transformers import SentenceTransformer, util
147
+
148
+ #prepare context
149
+ context_list = context_dict2context_list(context_dict)
150
+
151
+ #adding invidivual sentences
152
+ context_list += [
153
+ 'We can also use GPT3, if requested',
154
+ 'Our email is ma@goliath.jp',
155
+ 'You can contact us at ma@goliath.jp'
156
+ ]
157
+
158
+ #create df
159
+ df = pd.DataFrame([context_list, list2vec(context_list)]).T
160
+ df.columns = ['description', 'text_vector_']
161
+ df['description'] = df['description'].apply(lambda x : x.strip())
162
+ df
163
+
164
+ qa_list = {
165
+ 'how long does it take to build a recommendation system?' : 'Usually, from a few weeks to one month',
166
+ 'how long does it take to build one' : 'Usually, from a few weeks to one month',
167
+ 'how many people are working for goliath?' : '5 people',
168
+ 'how many people are working for you?' : '5 people',
169
+ 'how much does it cost?' : 'The price depends by its intended usage and complexity, contact us for a quotation.',
170
+ 'do you use GPT3 API?' : 'yes, we can',
171
+ 'do you use GPT3?' : 'yes, we can',
172
+ 'do you use GPT4?' : 'yes, we can',
173
+ 'so you build chatbots' : 'yes, we built state-of-the art chatbots with GPT3 technology'
174
+ }
175
+ df_qa = pd.DataFrame([qa_list]).T.reset_index()
176
+ df_qa.columns = ['question', 'answer']
177
+ df_qa['text_vector_'] = list2vec(df_qa['question'].values.tolist())
178
+ df_qa
179
+
180
+ df_qa_ = df_qa.copy()
181
+ df_ = df.copy()
182
+
183
+ def qa(df_, df_qa_, min_qa_score, min_context_score, verbose, query):
184
+ query_vec = text2vec(query)
185
+
186
+ #first check if there is already a question in df_qa
187
+ df_qa_['score'] = df_qa_['text_vector_'].apply(lambda x : float(util.cos_sim(x, query_vec)))
188
+ df_qa_ = df_qa_.sort_values('score', ascending=False)
189
+ df_qa_ = df_qa_[df_qa_['score']>=min_qa_score]
190
+ #if we find at least one possible preset answer
191
+ if len(df_qa_) > 0:
192
+ if verbose : display(df_qa_)
193
+ answer = df_qa_[0:1]['answer'].values.tolist()[0]
194
+ return answer
195
+
196
+ #then check if we can use the context to answer a question
197
+ df_['score'] = df_['text_vector_'].apply(lambda x : float(util.cos_sim(x, query_vec)))
198
+ df_ = df_.sort_values('score', ascending=False)
199
+ df_ = df_[df_['score']>=min_context_score]
200
+ #if we find at least one possible preset answer
201
+ if len(df_) > 0:
202
+ if verbose : display(df_)
203
+ #in case we might decide to merge multiple context
204
+ context = ' '.join(df_['description'][0:1].values.tolist())
205
+ answer = chatgpt3_question(context, query)
206
+ return answer
207
+ else:
208
+ return 'impossible to give an answer'
209
+
210
+ # print(
211
+ # qa(
212
+ # df_,
213
+ # df_qa_,
214
+ # min_qa_score=0.92,
215
+ # min_context_score=.75,
216
+ # verbose=False,
217
+ # query='What is a recommender system?'
218
+ # )
219
+ # )
220
+
221
+ import subprocess
222
+ import random
223
+ import gradio as gr
224
+ import requests
225
+
226
+ history = None
227
+ history_prompt = None
228
+
229
+ def predict(input, history):
230
+ #WE CAN PLAY WITH user_input AND bot_answer, as well as history
231
+ user_input = input
232
+
233
+ global history_prompt
234
+ global block_predict
235
+
236
+ bot_answer = qa(
237
+ df_,
238
+ df_qa_,
239
+ min_qa_score=0.92,
240
+ min_context_score=.75,
241
+ verbose=False,
242
+ query=input
243
+ )
244
+
245
+ response = list()
246
+ response = [(input, bot_answer)]
247
+
248
+ history.append(response[0])
249
+ response = history
250
+
251
+ # print('#history', history)
252
+ # print('#response', response)
253
+
254
+ return response, history
255
+
256
+ demo = gr.Blocks()
257
+ with demo:
258
+ gr.Markdown(
259
+ """
260
+ Chatbot
261
+ """
262
+ )
263
+ state = gr.Variable(value=[]) #beginning
264
+ chatbot = gr.Chatbot() #color_map=("#00ff7f", "#00d5ff")
265
+ text = gr.Textbox(
266
+ label="Question",
267
+ value="What is a recommendation system?",
268
+ placeholder="",
269
+ max_lines=1,
270
+ )
271
+ text.submit(predict, [text, state], [chatbot, state])
272
+ text.submit(lambda x: "", text, text)
273
+ # btn = gr.Button(value="submit")
274
+ # btn.click(chatbot_foo, None, [chatbot, state])
275
+
276
+ demo.launch(share=False)