Blanca commited on
Commit
52947a8
·
verified ·
1 Parent(s): cbc5c4f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +6 -8
app.py CHANGED
@@ -22,7 +22,7 @@ TOKEN = os.environ.get("TOKEN", None)
22
 
23
  OWNER="Blanca"
24
  DATA_DATASET = f"{OWNER}/CQs-Gen_test_embeddings"
25
- INTERNAL_DATA_DATASET = f"{OWNER}/CQs-Gen_test_embeddings
26
  SUBMISSION_DATASET = f"{OWNER}/submissions_internal"
27
  SUBMISSION_DATASET_PUBLIC = f"{OWNER}/submissions_public"
28
  #CONTACT_DATASET = f"{OWNER}/contact_info" # TODO: I should reactivate this
@@ -217,14 +217,12 @@ def add_new_eval(
217
  with open(file_path, 'r') as f:
218
  data = json.load(f)
219
  scores = []
220
- for id_to_eval, line in data.items():
221
  intervention_score = 0
222
- for indx, intervention_id in enumerate(gold_dataset['intervention_id']):
223
- if indx == 3:
224
- break
225
  if id_to_eval == intervention_id:
226
  references = gold_dataset['cqs']
227
- reference_set = [row['cq'] for row in references[indx]]
228
  # TODO: here upload the embedding that I have saved, so they can be used in similarity evaluation
229
 
230
  #print(reference_set, flush=True)
@@ -235,8 +233,8 @@ def add_new_eval(
235
 
236
  if METRIC == 'similarity':
237
  sentence_embedding = similarity_model.encode(cq_text)
238
- reference_embedding = similarity_model.encode(reference_set) # TODO: here have the embeddings directly, do no calculate each time
239
- sims = similarity_model.similarity(sentence_embedding, reference_embedding).tolist()[0]
240
 
241
  winner = np.argmax(sims)
242
  # make sure the similarity of the winning reference sentence is at least 0.65
 
22
 
23
  OWNER="Blanca"
24
  DATA_DATASET = f"{OWNER}/CQs-Gen_test_embeddings"
25
+ INTERNAL_DATA_DATASET = f"{OWNER}/CQs-Gen_test_embeddings"
26
  SUBMISSION_DATASET = f"{OWNER}/submissions_internal"
27
  SUBMISSION_DATASET_PUBLIC = f"{OWNER}/submissions_public"
28
  #CONTACT_DATASET = f"{OWNER}/contact_info" # TODO: I should reactivate this
 
217
  with open(file_path, 'r') as f:
218
  data = json.load(f)
219
  scores = []
220
+ for id_to_eval, line in data.items(): # data to evaluate
221
  intervention_score = 0
222
+ for indx, intervention_id in enumerate(gold_dataset['intervention_id']): # references
 
 
223
  if id_to_eval == intervention_id:
224
  references = gold_dataset['cqs']
225
+ reference_embeddings = [row['embedding'] for row in references[indx]]
226
  # TODO: here upload the embedding that I have saved, so they can be used in similarity evaluation
227
 
228
  #print(reference_set, flush=True)
 
233
 
234
  if METRIC == 'similarity':
235
  sentence_embedding = similarity_model.encode(cq_text)
236
+ #reference_embedding = similarity_model.encode(reference_set) # TODO: here have the embeddings directly, do no calculate each time
237
+ sims = similarity_model.similarity(sentence_embedding, reference_embeddings).tolist()[0]
238
 
239
  winner = np.argmax(sims)
240
  # make sure the similarity of the winning reference sentence is at least 0.65