Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
|
@@ -59,13 +59,21 @@ def evaluate_model_with_insights(model_name):
|
|
| 59 |
for dataset_name, dataset in datasets.items():
|
| 60 |
all_mrr, all_map, all_ndcg = [], [], []
|
| 61 |
dataset_samples = []
|
| 62 |
-
|
| 63 |
if 'candidate_document' in dataset.column_names:
|
| 64 |
grouped_data = dataset.to_pandas().groupby("query")
|
| 65 |
for query, group in grouped_data:
|
| 66 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 67 |
relevance_labels = group['relevance_label'].tolist()
|
| 68 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 69 |
scores = model.predict(pairs)
|
| 70 |
|
| 71 |
# Collecting top-5 results for display
|
|
@@ -83,8 +91,21 @@ def evaluate_model_with_insights(model_name):
|
|
| 83 |
else:
|
| 84 |
for entry in dataset:
|
| 85 |
query = entry['query']
|
| 86 |
-
|
| 87 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 88 |
pairs = [(query, doc) for doc in candidate_texts]
|
| 89 |
scores = model.predict(pairs)
|
| 90 |
|
|
@@ -100,6 +121,27 @@ def evaluate_model_with_insights(model_name):
|
|
| 100 |
all_mrr.append(mean_reciprocal_rank(relevance_labels, scores))
|
| 101 |
all_map.append(mean_average_precision(relevance_labels, scores))
|
| 102 |
all_ndcg.append(ndcg_at_k(relevance_labels, scores, k=10))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 103 |
|
| 104 |
# Metrics for this dataset
|
| 105 |
results.append({
|
|
@@ -155,4 +197,4 @@ interface = gr.Interface(
|
|
| 155 |
)
|
| 156 |
)
|
| 157 |
|
| 158 |
-
interface.launch(debug=True)
|
|
|
|
| 59 |
for dataset_name, dataset in datasets.items():
|
| 60 |
all_mrr, all_map, all_ndcg = [], [], []
|
| 61 |
dataset_samples = []
|
|
|
|
| 62 |
if 'candidate_document' in dataset.column_names:
|
| 63 |
grouped_data = dataset.to_pandas().groupby("query")
|
| 64 |
for query, group in grouped_data:
|
| 65 |
+
# Skip invalid queries
|
| 66 |
+
if query is None or not isinstance(query, str) or query.strip() == "":
|
| 67 |
+
continue
|
| 68 |
+
|
| 69 |
+
candidate_texts = group['candidate_document'].dropna().tolist()
|
| 70 |
relevance_labels = group['relevance_label'].tolist()
|
| 71 |
+
|
| 72 |
+
# Skip if no valid candidate documents
|
| 73 |
+
if not candidate_texts or len(candidate_texts) != len(relevance_labels):
|
| 74 |
+
continue
|
| 75 |
+
|
| 76 |
+
pairs = [(query, doc) for doc in candidate_texts if doc is not None and isinstance(doc, str) and doc.strip() != ""]
|
| 77 |
scores = model.predict(pairs)
|
| 78 |
|
| 79 |
# Collecting top-5 results for display
|
|
|
|
| 91 |
else:
|
| 92 |
for entry in dataset:
|
| 93 |
query = entry['query']
|
| 94 |
+
|
| 95 |
+
# Validate query and documents
|
| 96 |
+
if query is None or not isinstance(query, str) or query.strip() == "":
|
| 97 |
+
continue
|
| 98 |
+
|
| 99 |
+
candidate_texts = [
|
| 100 |
+
doc for doc in [entry.get('positive'), entry.get('negative1'), entry.get('negative2'), entry.get('negative3'), entry.get('negative4')]
|
| 101 |
+
if doc is not None and isinstance(doc, str) and doc.strip() != ""
|
| 102 |
+
]
|
| 103 |
+
relevance_labels = [1] + [0] * (len(candidate_texts) - 1)
|
| 104 |
+
|
| 105 |
+
# Skip if no valid candidate documents
|
| 106 |
+
if not candidate_texts or len(candidate_texts) != len(relevance_labels):
|
| 107 |
+
continue
|
| 108 |
+
|
| 109 |
pairs = [(query, doc) for doc in candidate_texts]
|
| 110 |
scores = model.predict(pairs)
|
| 111 |
|
|
|
|
| 121 |
all_mrr.append(mean_reciprocal_rank(relevance_labels, scores))
|
| 122 |
all_map.append(mean_average_precision(relevance_labels, scores))
|
| 123 |
all_ndcg.append(ndcg_at_k(relevance_labels, scores, k=10))
|
| 124 |
+
|
| 125 |
+
else:
|
| 126 |
+
for entry in dataset:
|
| 127 |
+
query = entry['query']
|
| 128 |
+
candidate_texts = [entry['positive'], entry['negative1'], entry['negative2'], entry['negative3'], entry['negative4']]
|
| 129 |
+
relevance_labels = [1, 0, 0, 0, 0]
|
| 130 |
+
pairs = [(query, doc) for doc in candidate_texts]
|
| 131 |
+
scores = model.predict(pairs)
|
| 132 |
+
|
| 133 |
+
# Collecting top-5 results for display
|
| 134 |
+
sorted_indices = np.argsort(scores)[::-1]
|
| 135 |
+
top_docs = [(candidate_texts[i], scores[i], relevance_labels[i]) for i in sorted_indices[:5]]
|
| 136 |
+
dataset_samples.append({
|
| 137 |
+
"Query": query,
|
| 138 |
+
"Top 5 Candidates": top_docs
|
| 139 |
+
})
|
| 140 |
+
|
| 141 |
+
# Metrics
|
| 142 |
+
all_mrr.append(mean_reciprocal_rank(relevance_labels, scores))
|
| 143 |
+
all_map.append(mean_average_precision(relevance_labels, scores))
|
| 144 |
+
all_ndcg.append(ndcg_at_k(relevance_labels, scores, k=10))
|
| 145 |
|
| 146 |
# Metrics for this dataset
|
| 147 |
results.append({
|
|
|
|
| 197 |
)
|
| 198 |
)
|
| 199 |
|
| 200 |
+
interface.launch(debug=True)
|