Spaces:
Sleeping
Sleeping
Ajaykanth Maddi commited on
Commit ·
3da3c8a
1
Parent(s): d75bc33
Code Changes - Reranking Implementation
Browse files
app.py
CHANGED
|
@@ -7,18 +7,9 @@ from datetime import datetime
|
|
| 7 |
import numpy as np
|
| 8 |
import matplotlib
|
| 9 |
|
| 10 |
-
# ==== Metrics Calculation
|
| 11 |
-
from sklearn.metrics import roc_auc_score
|
| 12 |
-
from sklearn.metrics import mean_squared_error
|
| 13 |
-
|
| 14 |
-
# === HuggingFace & Transformers ===
|
| 15 |
-
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
|
| 16 |
-
|
| 17 |
-
|
| 18 |
# === Misc ===
|
| 19 |
import json
|
| 20 |
import gradio as gr
|
| 21 |
-
from json_repair import repair_json
|
| 22 |
|
| 23 |
import datetime
|
| 24 |
|
|
@@ -114,54 +105,6 @@ def get_info_from_sample_questions(subset, question):
|
|
| 114 |
return orig_ans, y_metrics
|
| 115 |
return "No answer found.", "No metrics found."
|
| 116 |
|
| 117 |
-
def plot_subset_metrics_old(subset_name):
|
| 118 |
-
summary = ragbench_details[subset_name]["summary"]
|
| 119 |
-
|
| 120 |
-
# Create a DataFrame for plotting
|
| 121 |
-
keys = ['Entries', 'TotalDocs', 'TotalUniqueIds', 'TotalUniqueDocs', 'UniqueDocsPercent']
|
| 122 |
-
values = [summary.get(k, 0) for k in keys]
|
| 123 |
-
|
| 124 |
-
fig, ax = plt.subplots(figsize=(8, 4))
|
| 125 |
-
bars = ax.bar(keys, values, color='skyblue')
|
| 126 |
-
plt.tight_layout() # Apply tight layout after plotting
|
| 127 |
-
|
| 128 |
-
|
| 129 |
-
# ✅ Add count on top of bars
|
| 130 |
-
ax.bar_label(bars, fmt='%.0f', padding=3)
|
| 131 |
-
|
| 132 |
-
# ax.bar(keys, values, color="mediumseagreen")
|
| 133 |
-
ax.set_title(f"Metrics for Subset: {subset_name}, Domain: {summary.get('Domain')}", fontsize=14)
|
| 134 |
-
ax.set_ylabel("Value")
|
| 135 |
-
ax.grid(axis='y')
|
| 136 |
-
ax.set_xticks(range(len(keys)))
|
| 137 |
-
ax.set_xticklabels(keys, rotation=21, ha='right')
|
| 138 |
-
return fig
|
| 139 |
-
|
| 140 |
-
def plot_chunking_strategies_old(subset_name):
|
| 141 |
-
chunking_data = ragbench_details[subset_name]["chunking"]
|
| 142 |
-
|
| 143 |
-
plt.figure(figsize=(8, 4))
|
| 144 |
-
strategies = list(chunking_data.keys())
|
| 145 |
-
counts = list(chunking_data.values())
|
| 146 |
-
|
| 147 |
-
bars = plt.bar(strategies, counts, color="skyblue")
|
| 148 |
-
|
| 149 |
-
# Add value labels on top of bars
|
| 150 |
-
for bar in bars:
|
| 151 |
-
yval = bar.get_height()
|
| 152 |
-
plt.text(bar.get_x() + bar.get_width()/2, yval + 20, int(yval), ha='center', va='bottom', fontsize=10)
|
| 153 |
-
|
| 154 |
-
# plt.xlabel("Chunking Strategies")
|
| 155 |
-
plt.tight_layout() # Apply tight layout after plotting
|
| 156 |
-
|
| 157 |
-
plt.ylabel("Number of Chunks")
|
| 158 |
-
plt.title(f"Chunking Strategy Distribution - {subset_name}")
|
| 159 |
-
plt.xticks(rotation=30)
|
| 160 |
-
plt.tight_layout()
|
| 161 |
-
|
| 162 |
-
# Return plot as figure (Gradio accepts it)
|
| 163 |
-
return plt.gcf()
|
| 164 |
-
|
| 165 |
|
| 166 |
def plot_subset_metrics_old1(subset_name):
|
| 167 |
"""Generate a bar plot of key metrics for a given subset with proper title display."""
|
|
@@ -221,7 +164,6 @@ def plot_subset_metrics_old1(subset_name):
|
|
| 221 |
except Exception as e:
|
| 222 |
print(f"Plotting error: {str(e)}")
|
| 223 |
|
| 224 |
-
|
| 225 |
def plot_chunking_strategies_old1(subset_name):
|
| 226 |
"""Visualize chunking strategy distribution with enhanced formatting."""
|
| 227 |
try:
|
|
@@ -539,7 +481,7 @@ def generate_advance_report(subset_dropdown, dataset_type_dropdown, chunking_dro
|
|
| 539 |
rmEmbedName = embed_dropdown.replace("/", ":")
|
| 540 |
rmGenName = generator_dropdown.replace("/", ":")
|
| 541 |
|
| 542 |
-
fileName = f"{subset_dropdown}_{chunking_dropdown}_{rmEmbedName}_{rmGenName}_output_{datetime.datetime.now().strftime('%d-%B-%
|
| 543 |
# Save to file inside Space
|
| 544 |
with open(fileName, "w") as f:
|
| 545 |
f.write(json_str)
|
|
|
|
| 7 |
import numpy as np
|
| 8 |
import matplotlib
|
| 9 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 10 |
# === Misc ===
|
| 11 |
import json
|
| 12 |
import gradio as gr
|
|
|
|
| 13 |
|
| 14 |
import datetime
|
| 15 |
|
|
|
|
| 105 |
return orig_ans, y_metrics
|
| 106 |
return "No answer found.", "No metrics found."
|
| 107 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 108 |
|
| 109 |
def plot_subset_metrics_old1(subset_name):
|
| 110 |
"""Generate a bar plot of key metrics for a given subset with proper title display."""
|
|
|
|
| 164 |
except Exception as e:
|
| 165 |
print(f"Plotting error: {str(e)}")
|
| 166 |
|
|
|
|
| 167 |
def plot_chunking_strategies_old1(subset_name):
|
| 168 |
"""Visualize chunking strategy distribution with enhanced formatting."""
|
| 169 |
try:
|
|
|
|
| 481 |
rmEmbedName = embed_dropdown.replace("/", ":")
|
| 482 |
rmGenName = generator_dropdown.replace("/", ":")
|
| 483 |
|
| 484 |
+
fileName = f"{subset_dropdown}_{noOfQuestions}_{chunking_dropdown}_{rmEmbedName}_{rmGenName}_output_{datetime.datetime.now().strftime('%d-%B-%H-%M')}.json"
|
| 485 |
# Save to file inside Space
|
| 486 |
with open(fileName, "w") as f:
|
| 487 |
f.write(json_str)
|