steveagi commited on
Commit
e094b3f
·
1 Parent(s): d19b4d7

rm emb related

Browse files
Files changed (1) hide show
  1. games/game.py +0 -113
games/game.py CHANGED
@@ -127,116 +127,3 @@ class game:
127
 
128
  return res
129
 
130
-
131
- def _compute_emb(self, command: str):
132
- # always normalize text when query OpenAI embedding service
133
- command = command.strip().lower()
134
- request_timeout = 5
135
- return openai_get_embedding(
136
- command,
137
- engine="text-embedding-ada-002",
138
- request_timeout=request_timeout,
139
- )
140
-
141
-
142
- def init_emb(self):
143
- '''
144
- initialize embeddings
145
-
146
- 1. load pkl file that is in train
147
- 2. run against OpenAI embedding service, but skip entires in pkl
148
- '''
149
-
150
- print('initializing...')
151
- # load from the pkl file
152
- pkl_embeddings = {}
153
- try:
154
- print('loading embeddings from a file...')
155
- # file_path = f'{self.emb_path}'
156
- # print(file_path)
157
- # with open(file_path, 'rb') as f:
158
- # pkl_embeddings = pkl.load(f)
159
- files = glob.glob(f"{self.ROOT}/data/{self.PATH}/emb/*.json")
160
- pkl_embeddings = {}
161
- for f in files:
162
- with open(f, "r") as f:
163
- pkl_embeddings |= json.load(f)
164
-
165
- # copy to self.embeddings, skip possible junk
166
- for cmd in self.train:
167
- if cmd in pkl_embeddings:
168
- self.embeddings[cmd] = pkl_embeddings[cmd]
169
- except Exception as e:
170
- print(e)
171
- print(f'pkl size: {len(pkl_embeddings)}')
172
- print(f'embedding size: {len(self.embeddings)}')
173
-
174
- # query OpenAI embedding service
175
- print('creating embeddings by query OpenAI...')
176
- for cmd in self.train:
177
- if cmd in self.embeddings:
178
- # print(f' - skip {command}')
179
- continue
180
- print(f'query \'{cmd}\'...')
181
- self.embeddings[cmd] = self._compute_emb(cmd)
182
- # print(json.dumps(self.embeddings, indent=2))
183
- print(f'embedding size: {len(self.embeddings)}')
184
- print(f'train size: {len(self.train)}')
185
- assert self.embeddings.keys() == self.train.keys()
186
-
187
-
188
- def query_emb(self, command:str):
189
- command = self.special_case(command)
190
-
191
- if len(self.embeddings) == 0:
192
- # place init_emb here, not __init__,
193
- # as we can avoid network connection when started.
194
- # In this way, we can turn off VPN before starting,
195
- # and turn on VPN after started.
196
- self.init_emb()
197
- else:
198
- print('skip init')
199
- if len(self.embeddings) == 0:
200
- return self.status_failure, ['AI internal error', 'embeding is empty'], 'no desc', [['', 0]]
201
-
202
- if command is None:
203
- return self.status_failure, ["Your input is empty."], 'no desc', [['', 0]]
204
- if command.strip() == '':
205
- return self.status_failure, ["Your input is empty."], 'no desc', [['', 0]]
206
-
207
- imput_emb = self._compute_emb(command)
208
-
209
- results = []
210
- for cmd in self.embeddings:
211
- sim = openai_cosine_similarity(
212
- imput_emb,
213
- self.embeddings[cmd],
214
- )
215
- results.append([cmd, sim])
216
- results.sort(key = lambda x: x[1], reverse=True)
217
- # shorten results
218
- topk = 3 # 10
219
- results = results[:topk]
220
-
221
- top_cmd = None
222
- top_sim = 0
223
- if results:
224
- top_cmd, top_sim = results[0]
225
-
226
- if top_sim > 0.8:
227
- return [
228
- self.status_OK,
229
- self.train[top_cmd]["result"],
230
- self.train[top_cmd]['desc'],
231
- results,
232
- ]
233
- else:
234
- return [
235
- self.status_failure,
236
- [
237
- "We don't understand your request.",
238
- f"Our best guess is: '{top_cmd}'"
239
- ],
240
- 'no desc',
241
- results,
242
- ]
 
127
 
128
  return res
129