Spaces:
Build error
Build error
Upload 4 files
Browse files- app.py +6 -5
- data_processing.py +10 -1
- evaluation.py +3 -2
- retrieval.py +2 -0
app.py
CHANGED
|
@@ -70,6 +70,7 @@ st.sidebar.title("Recent Questions")
|
|
| 70 |
recent_data = load_recent_questions()
|
| 71 |
for q in reversed(recent_data["questions"]): # Show latest first
|
| 72 |
st.sidebar.write(f"🔹 {q['question']}")
|
|
|
|
| 73 |
|
| 74 |
st.sidebar.markdown("---") # Separator
|
| 75 |
|
|
@@ -80,15 +81,15 @@ import matplotlib.pyplot as plt
|
|
| 80 |
# for visualization
|
| 81 |
st.sidebar.title("Analytics")
|
| 82 |
|
| 83 |
-
context_relevance = [q["metrics"]["context_relevance"] for q in recent_data["questions"]]
|
| 84 |
response_time = [q["metrics"]["response_time"] for q in recent_data["questions"]]
|
| 85 |
-
labels = [f"Q{i+1}" for i in range(len(
|
| 86 |
|
| 87 |
fig, ax = plt.subplots()
|
| 88 |
-
ax.plot(labels, context_relevance, marker="o", label="Context Relevance")
|
| 89 |
-
ax.plot(labels, response_time, marker="s", label="Response Time (sec)")
|
| 90 |
ax.set_xlabel("Recent Questions")
|
| 91 |
-
ax.set_ylabel("
|
| 92 |
ax.legend()
|
| 93 |
st.sidebar.pyplot(fig)
|
| 94 |
|
|
|
|
| 70 |
recent_data = load_recent_questions()
|
| 71 |
for q in reversed(recent_data["questions"]): # Show latest first
|
| 72 |
st.sidebar.write(f"🔹 {q['question']}")
|
| 73 |
+
st.json({q['metrics']})
|
| 74 |
|
| 75 |
st.sidebar.markdown("---") # Separator
|
| 76 |
|
|
|
|
| 81 |
# for visualization
|
| 82 |
st.sidebar.title("Analytics")
|
| 83 |
|
| 84 |
+
#context_relevance = [q["metrics"]["context_relevance"] for q in recent_data["questions"]]
|
| 85 |
response_time = [q["metrics"]["response_time"] for q in recent_data["questions"]]
|
| 86 |
+
labels = [f"Q{i+1}" for i in range(len(response_time))] # Labels for X-axis
|
| 87 |
|
| 88 |
fig, ax = plt.subplots()
|
| 89 |
+
#ax.plot(labels, context_relevance, marker="o", label="Context Relevance")
|
| 90 |
+
#ax.plot(labels, response_time, marker="s", label="Response Time (sec)")
|
| 91 |
ax.set_xlabel("Recent Questions")
|
| 92 |
+
ax.set_ylabel("Time Taken for Response")
|
| 93 |
ax.legend()
|
| 94 |
st.sidebar.pyplot(fig)
|
| 95 |
|
data_processing.py
CHANGED
|
@@ -33,6 +33,7 @@ ragbench = {}
|
|
| 33 |
index = None
|
| 34 |
chunk_docs = []
|
| 35 |
documents = []
|
|
|
|
| 36 |
|
| 37 |
# Ensure data directory exists
|
| 38 |
os.makedirs("data_local", exist_ok=True)
|
|
@@ -92,6 +93,14 @@ def load_ragbench():
|
|
| 92 |
ragbench[dataset] = load_dataset("rungalileo/ragbench", dataset)
|
| 93 |
return ragbench
|
| 94 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 95 |
def load_faiss(query_dataset):
|
| 96 |
global index
|
| 97 |
faiss_index_path = f"data_local/{query_dataset}_quantized.faiss"
|
|
@@ -135,7 +144,7 @@ def save_recent_question(question, metrics):
|
|
| 135 |
"question": question,
|
| 136 |
"metrics": metrics
|
| 137 |
})
|
| 138 |
-
|
| 139 |
# Keep only the last 5 questions
|
| 140 |
data["questions"] = data["questions"][-5:]
|
| 141 |
|
|
|
|
| 33 |
index = None
|
| 34 |
chunk_docs = []
|
| 35 |
documents = []
|
| 36 |
+
query_dataset_data = {}
|
| 37 |
|
| 38 |
# Ensure data directory exists
|
| 39 |
os.makedirs("data_local", exist_ok=True)
|
|
|
|
| 93 |
ragbench[dataset] = load_dataset("rungalileo/ragbench", dataset)
|
| 94 |
return ragbench
|
| 95 |
|
| 96 |
+
def load_query_dataset(query_dataset):
|
| 97 |
+
global query_dataset_data
|
| 98 |
+
if query_dataset_data:
|
| 99 |
+
return query_dataset_data
|
| 100 |
+
else:
|
| 101 |
+
query_dataset_data = load_dataset("rungalileo/ragbench", query_dataset)
|
| 102 |
+
return query_dataset_data
|
| 103 |
+
|
| 104 |
def load_faiss(query_dataset):
|
| 105 |
global index
|
| 106 |
faiss_index_path = f"data_local/{query_dataset}_quantized.faiss"
|
|
|
|
| 144 |
"question": question,
|
| 145 |
"metrics": metrics
|
| 146 |
})
|
| 147 |
+
|
| 148 |
# Keep only the last 5 questions
|
| 149 |
data["questions"] = data["questions"][-5:]
|
| 150 |
|
evaluation.py
CHANGED
|
@@ -4,7 +4,8 @@ from sklearn.metrics import mean_squared_error, roc_auc_score
|
|
| 4 |
from sklearn.feature_extraction.text import TfidfVectorizer
|
| 5 |
from sklearn.metrics.pairwise import cosine_similarity
|
| 6 |
|
| 7 |
-
from
|
|
|
|
| 8 |
|
| 9 |
global ground_truth_answer, ground_truth_metrics
|
| 10 |
|
|
@@ -92,7 +93,7 @@ def compute_rmse(predicted_values, ground_truth_values):
|
|
| 92 |
return np.sqrt(mean_squared_error(ground_truth_values, predicted_values))
|
| 93 |
|
| 94 |
def calculate_metrics(question, response, docs, time_taken):
|
| 95 |
-
data =
|
| 96 |
ground_truth_answer = retrieve_ground_truths(question, data) # Store the ground truth answer
|
| 97 |
|
| 98 |
# Ensure ground_truth_answer is not empty before proceeding
|
|
|
|
| 4 |
from sklearn.feature_extraction.text import TfidfVectorizer
|
| 5 |
from sklearn.metrics.pairwise import cosine_similarity
|
| 6 |
|
| 7 |
+
from retrieval import query_dataset
|
| 8 |
+
from data_processing import load_query_dataset
|
| 9 |
|
| 10 |
global ground_truth_answer, ground_truth_metrics
|
| 11 |
|
|
|
|
| 93 |
return np.sqrt(mean_squared_error(ground_truth_values, predicted_values))
|
| 94 |
|
| 95 |
def calculate_metrics(question, response, docs, time_taken):
|
| 96 |
+
data = load_query_dataset(query_dataset)
|
| 97 |
ground_truth_answer = retrieve_ground_truths(question, data) # Store the ground truth answer
|
| 98 |
|
| 99 |
# Ensure ground_truth_answer is not empty before proceeding
|
retrieval.py
CHANGED
|
@@ -9,9 +9,11 @@ from sentence_transformers import CrossEncoder
|
|
| 9 |
reranker = CrossEncoder("cross-encoder/ms-marco-MiniLM-L-6-v2")
|
| 10 |
|
| 11 |
retrieved_docs = None
|
|
|
|
| 12 |
|
| 13 |
|
| 14 |
def retrieve_documents_hybrid(query, top_k=5):
|
|
|
|
| 15 |
query_dataset = find_query_dataset(query)
|
| 16 |
|
| 17 |
with open( f"data_local/{query_dataset}_chunked_docs.json", "r") as f:
|
|
|
|
| 9 |
reranker = CrossEncoder("cross-encoder/ms-marco-MiniLM-L-6-v2")
|
| 10 |
|
| 11 |
retrieved_docs = None
|
| 12 |
+
query_dataset = 'hotpotqa'
|
| 13 |
|
| 14 |
|
| 15 |
def retrieve_documents_hybrid(query, top_k=5):
|
| 16 |
+
global query_dataset
|
| 17 |
query_dataset = find_query_dataset(query)
|
| 18 |
|
| 19 |
with open( f"data_local/{query_dataset}_chunked_docs.json", "r") as f:
|