Update app.py
Browse files
app.py
CHANGED
|
@@ -22,7 +22,7 @@ TOKEN = os.environ.get("TOKEN", None)
|
|
| 22 |
|
| 23 |
OWNER="Blanca"
|
| 24 |
DATA_DATASET = f"{OWNER}/CQs-Gen_test_embeddings"
|
| 25 |
-
INTERNAL_DATA_DATASET = f"{OWNER}/CQs-Gen_test_embeddings
|
| 26 |
SUBMISSION_DATASET = f"{OWNER}/submissions_internal"
|
| 27 |
SUBMISSION_DATASET_PUBLIC = f"{OWNER}/submissions_public"
|
| 28 |
#CONTACT_DATASET = f"{OWNER}/contact_info" # TODO: I should reactivate this
|
|
@@ -217,14 +217,12 @@ def add_new_eval(
|
|
| 217 |
with open(file_path, 'r') as f:
|
| 218 |
data = json.load(f)
|
| 219 |
scores = []
|
| 220 |
-
for id_to_eval, line in data.items():
|
| 221 |
intervention_score = 0
|
| 222 |
-
for indx, intervention_id in enumerate(gold_dataset['intervention_id']):
|
| 223 |
-
if indx == 3:
|
| 224 |
-
break
|
| 225 |
if id_to_eval == intervention_id:
|
| 226 |
references = gold_dataset['cqs']
|
| 227 |
-
|
| 228 |
# TODO: here upload the embedding that I have saved, so they can be used in similarity evaluation
|
| 229 |
|
| 230 |
#print(reference_set, flush=True)
|
|
@@ -235,8 +233,8 @@ def add_new_eval(
|
|
| 235 |
|
| 236 |
if METRIC == 'similarity':
|
| 237 |
sentence_embedding = similarity_model.encode(cq_text)
|
| 238 |
-
reference_embedding = similarity_model.encode(reference_set) # TODO: here have the embeddings directly, do no calculate each time
|
| 239 |
-
sims = similarity_model.similarity(sentence_embedding,
|
| 240 |
|
| 241 |
winner = np.argmax(sims)
|
| 242 |
# make sure the similarity of the winning reference sentence is at least 0.65
|
|
|
|
| 22 |
|
| 23 |
OWNER="Blanca"
|
| 24 |
DATA_DATASET = f"{OWNER}/CQs-Gen_test_embeddings"
|
| 25 |
+
INTERNAL_DATA_DATASET = f"{OWNER}/CQs-Gen_test_embeddings"
|
| 26 |
SUBMISSION_DATASET = f"{OWNER}/submissions_internal"
|
| 27 |
SUBMISSION_DATASET_PUBLIC = f"{OWNER}/submissions_public"
|
| 28 |
#CONTACT_DATASET = f"{OWNER}/contact_info" # TODO: I should reactivate this
|
|
|
|
| 217 |
with open(file_path, 'r') as f:
|
| 218 |
data = json.load(f)
|
| 219 |
scores = []
|
| 220 |
+
for id_to_eval, line in data.items(): # data to evaluate
|
| 221 |
intervention_score = 0
|
| 222 |
+
for indx, intervention_id in enumerate(gold_dataset['intervention_id']): # references
|
|
|
|
|
|
|
| 223 |
if id_to_eval == intervention_id:
|
| 224 |
references = gold_dataset['cqs']
|
| 225 |
+
reference_embeddings = [row['embedding'] for row in references[indx]]
|
| 226 |
# TODO: here upload the embedding that I have saved, so they can be used in similarity evaluation
|
| 227 |
|
| 228 |
#print(reference_set, flush=True)
|
|
|
|
| 233 |
|
| 234 |
if METRIC == 'similarity':
|
| 235 |
sentence_embedding = similarity_model.encode(cq_text)
|
| 236 |
+
#reference_embedding = similarity_model.encode(reference_set) # TODO: here have the embeddings directly, do no calculate each time
|
| 237 |
+
sims = similarity_model.similarity(sentence_embedding, reference_embeddings).tolist()[0]
|
| 238 |
|
| 239 |
winner = np.argmax(sims)
|
| 240 |
# make sure the similarity of the winning reference sentence is at least 0.65
|