Spaces:
Sleeping
Sleeping
Ajaykanth Maddi commited on
Commit ·
813913d
1
Parent(s): d167e4f
Code Changes - Reranking Implementation
Browse files
app.py
CHANGED
|
@@ -169,7 +169,7 @@ initial_plot = plot_subset_metrics(initial_subset) if initial_subset else "No da
|
|
| 169 |
def generate_advance_report(subset_dropdown, dataset_type_dropdown, chunking_dropdown,
|
| 170 |
embed_dropdown, generator_dropdown,
|
| 171 |
chunk_count, retriever_type, noOfQuestions,
|
| 172 |
-
reranking_checkbox, evaluator_dropdown):
|
| 173 |
|
| 174 |
export_data = {
|
| 175 |
"metadata": {
|
|
@@ -188,6 +188,7 @@ def generate_advance_report(subset_dropdown, dataset_type_dropdown, chunking_dro
|
|
| 188 |
"noOfQuestions": noOfQuestions,
|
| 189 |
"retriever_type": retriever_type,
|
| 190 |
"reranking": reranking_checkbox,
|
|
|
|
| 191 |
"evaluator_model": evaluator_dropdown
|
| 192 |
}
|
| 193 |
}
|
|
@@ -244,7 +245,7 @@ def generate_advance_report(subset_dropdown, dataset_type_dropdown, chunking_dro
|
|
| 244 |
def generate_file(subset_dropdown, dataset_type_dropdown,
|
| 245 |
chunking_dropdown, embed_dropdown, generator_dropdown,
|
| 246 |
chunk_count, retriever_type,
|
| 247 |
-
reranking_checkbox, evaluator_dropdown,
|
| 248 |
orig_ans_display, y_metrics_display,
|
| 249 |
gen_ans_display, y_pred_metrics_display,
|
| 250 |
chunks_retrieved_display,
|
|
@@ -268,6 +269,7 @@ def generate_file(subset_dropdown, dataset_type_dropdown,
|
|
| 268 |
"chunk_count": chunk_count,
|
| 269 |
"retriever_type": retriever_type,
|
| 270 |
"reranking": reranking_checkbox,
|
|
|
|
| 271 |
"evaluator_model": evaluator_dropdown
|
| 272 |
},
|
| 273 |
"results": {
|
|
@@ -291,7 +293,7 @@ def generate_file(subset_dropdown, dataset_type_dropdown,
|
|
| 291 |
return json_str, fileName
|
| 292 |
|
| 293 |
def run_rag_pipeline_multiple_questions(subset, chunking, embed_model, retriever, noOfQuestions, retriever_type,
|
| 294 |
-
chunk_count, reranking, evaluator):
|
| 295 |
print(f"Running RAG Pipeline for {noOfQuestions} questions in subset: {subset}")
|
| 296 |
global advanced_analysis
|
| 297 |
try:
|
|
@@ -302,6 +304,9 @@ def run_rag_pipeline_multiple_questions(subset, chunking, embed_model, retriever
|
|
| 302 |
logger.error(f"Failed to load data: {e}")
|
| 303 |
return None
|
| 304 |
|
|
|
|
|
|
|
|
|
|
| 305 |
print("Starting RAG pipeline for {noOfQuestions} questions!!!")
|
| 306 |
|
| 307 |
ragSystemObject = RAGSystem(
|
|
@@ -310,7 +315,8 @@ def run_rag_pipeline_multiple_questions(subset, chunking, embed_model, retriever
|
|
| 310 |
strategy=chunking,
|
| 311 |
chunks=[], # Not needed for loading
|
| 312 |
generator_model_name=retriever,
|
| 313 |
-
retriever_model_name=embed_model
|
|
|
|
| 314 |
)
|
| 315 |
|
| 316 |
# 3. Load or use stored vector DB
|
|
@@ -427,42 +433,6 @@ def run_rag_pipeline(subset, question, custom_question, chunking, embed_model, r
|
|
| 427 |
|
| 428 |
y_pred_metrics, evaluator_json_output = _evaluate_using_groq(context_docs, final_question, generated_answer)
|
| 429 |
|
| 430 |
-
|
| 431 |
-
# response_sentences = form_response_sentences(generated_answer)
|
| 432 |
-
|
| 433 |
-
# # print(f"\nResponse Sentences: {response_sentences}")
|
| 434 |
-
|
| 435 |
-
# print(f"Length of Response Sentences: {len(response_sentences)}")
|
| 436 |
-
# print(f"Length of Document Sentences : {len(document_sentences)}")
|
| 437 |
-
|
| 438 |
-
# y_pred_metrics = {
|
| 439 |
-
# "relevance_score": "NA",
|
| 440 |
-
# "utilization_score": "NA",
|
| 441 |
-
# "completeness_score": "NA",
|
| 442 |
-
# "adherence_score": "NA"
|
| 443 |
-
# }
|
| 444 |
-
|
| 445 |
-
# # Call evaluator with the right variables
|
| 446 |
-
# try:
|
| 447 |
-
# grok_api_key = os.environ.get("GROQ_API_KEY") # Safely loaded from HF Secrets
|
| 448 |
-
|
| 449 |
-
# evaluator = RAGEvaluator(
|
| 450 |
-
# use_groq=True,
|
| 451 |
-
# groq_api_key=grok_api_key,
|
| 452 |
-
# groq_model="llama3-70b-8192"
|
| 453 |
-
# )
|
| 454 |
-
|
| 455 |
-
# result = evaluator.evaluate(document_sentences, question, response_sentences)
|
| 456 |
-
# print(f"\nResult----\n: {result}")
|
| 457 |
-
# if result is not None:
|
| 458 |
-
# y_pred_metrics = evaluator.extract_trace_metrics_from_json(result, len(document_sentences))
|
| 459 |
-
# evaluator_json_output = json.dumps(result, indent=4)
|
| 460 |
-
# print(f"Result: {evaluator_json_output}")
|
| 461 |
-
# print(f"Metrics: {y_pred_metrics}")
|
| 462 |
-
# else:
|
| 463 |
-
# print("No result obtained for this question")
|
| 464 |
-
# except Exception as e:
|
| 465 |
-
# print(f"Exception Raised in evaluation / extract_trace_metrics_from_json. Details: {e}")
|
| 466 |
|
| 467 |
# Format as list of lists
|
| 468 |
formatted_chunks = [
|
|
@@ -609,7 +579,7 @@ with gr.Blocks(
|
|
| 609 |
subset_dropdown,
|
| 610 |
chunking_dropdown, embed_dropdown, generator_dropdown,
|
| 611 |
noOfQuestions, retriever_type, chunk_count,
|
| 612 |
-
reranking_checkbox, evaluator_dropdown
|
| 613 |
]
|
| 614 |
)
|
| 615 |
|
|
@@ -635,7 +605,7 @@ with gr.Blocks(
|
|
| 635 |
inputs=[subset_dropdown, dataset_type_dropdown,
|
| 636 |
chunking_dropdown, embed_dropdown, generator_dropdown,
|
| 637 |
chunk_count, retriever_type,
|
| 638 |
-
reranking_checkbox, evaluator_dropdown,
|
| 639 |
orig_ans_display, y_metrics_display,
|
| 640 |
gen_ans_display, y_pred_metrics_display,
|
| 641 |
chunks_retrieved_display,
|
|
@@ -649,7 +619,7 @@ with gr.Blocks(
|
|
| 649 |
inputs=[subset_dropdown, dataset_type_dropdown,
|
| 650 |
chunking_dropdown, embed_dropdown, generator_dropdown,
|
| 651 |
chunk_count, retriever_type, noOfQuestions,
|
| 652 |
-
reranking_checkbox, evaluator_dropdown
|
| 653 |
],
|
| 654 |
outputs=[json_output, download_file]
|
| 655 |
)
|
|
|
|
| 169 |
def generate_advance_report(subset_dropdown, dataset_type_dropdown, chunking_dropdown,
|
| 170 |
embed_dropdown, generator_dropdown,
|
| 171 |
chunk_count, retriever_type, noOfQuestions,
|
| 172 |
+
reranking_checkbox, reranking_dropdown, evaluator_dropdown):
|
| 173 |
|
| 174 |
export_data = {
|
| 175 |
"metadata": {
|
|
|
|
| 188 |
"noOfQuestions": noOfQuestions,
|
| 189 |
"retriever_type": retriever_type,
|
| 190 |
"reranking": reranking_checkbox,
|
| 191 |
+
"reranking_method": reranking_dropdown if reranking_checkbox else None,
|
| 192 |
"evaluator_model": evaluator_dropdown
|
| 193 |
}
|
| 194 |
}
|
|
|
|
| 245 |
def generate_file(subset_dropdown, dataset_type_dropdown,
|
| 246 |
chunking_dropdown, embed_dropdown, generator_dropdown,
|
| 247 |
chunk_count, retriever_type,
|
| 248 |
+
reranking_checkbox, reranking_dropdown, evaluator_dropdown,
|
| 249 |
orig_ans_display, y_metrics_display,
|
| 250 |
gen_ans_display, y_pred_metrics_display,
|
| 251 |
chunks_retrieved_display,
|
|
|
|
| 269 |
"chunk_count": chunk_count,
|
| 270 |
"retriever_type": retriever_type,
|
| 271 |
"reranking": reranking_checkbox,
|
| 272 |
+
"reranking_method": reranking_dropdown if reranking_checkbox else None,
|
| 273 |
"evaluator_model": evaluator_dropdown
|
| 274 |
},
|
| 275 |
"results": {
|
|
|
|
| 293 |
return json_str, fileName
|
| 294 |
|
| 295 |
def run_rag_pipeline_multiple_questions(subset, chunking, embed_model, retriever, noOfQuestions, retriever_type,
|
| 296 |
+
chunk_count, reranking, reranking_dropdown, evaluator):
|
| 297 |
print(f"Running RAG Pipeline for {noOfQuestions} questions in subset: {subset}")
|
| 298 |
global advanced_analysis
|
| 299 |
try:
|
|
|
|
| 304 |
logger.error(f"Failed to load data: {e}")
|
| 305 |
return None
|
| 306 |
|
| 307 |
+
ranking_method = reranking_dropdown if reranking else None
|
| 308 |
+
print(f"Using reranking: {reranking}, method: {ranking_method}")
|
| 309 |
+
|
| 310 |
print("Starting RAG pipeline for {noOfQuestions} questions!!!")
|
| 311 |
|
| 312 |
ragSystemObject = RAGSystem(
|
|
|
|
| 315 |
strategy=chunking,
|
| 316 |
chunks=[], # Not needed for loading
|
| 317 |
generator_model_name=retriever,
|
| 318 |
+
retriever_model_name=embed_model,
|
| 319 |
+
reranker_model_name=ranking_method
|
| 320 |
)
|
| 321 |
|
| 322 |
# 3. Load or use stored vector DB
|
|
|
|
| 433 |
|
| 434 |
y_pred_metrics, evaluator_json_output = _evaluate_using_groq(context_docs, final_question, generated_answer)
|
| 435 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 436 |
|
| 437 |
# Format as list of lists
|
| 438 |
formatted_chunks = [
|
|
|
|
| 579 |
subset_dropdown,
|
| 580 |
chunking_dropdown, embed_dropdown, generator_dropdown,
|
| 581 |
noOfQuestions, retriever_type, chunk_count,
|
| 582 |
+
reranking_checkbox, reranking_dropdown, evaluator_dropdown
|
| 583 |
]
|
| 584 |
)
|
| 585 |
|
|
|
|
| 605 |
inputs=[subset_dropdown, dataset_type_dropdown,
|
| 606 |
chunking_dropdown, embed_dropdown, generator_dropdown,
|
| 607 |
chunk_count, retriever_type,
|
| 608 |
+
reranking_checkbox, reranking_dropdown, evaluator_dropdown,
|
| 609 |
orig_ans_display, y_metrics_display,
|
| 610 |
gen_ans_display, y_pred_metrics_display,
|
| 611 |
chunks_retrieved_display,
|
|
|
|
| 619 |
inputs=[subset_dropdown, dataset_type_dropdown,
|
| 620 |
chunking_dropdown, embed_dropdown, generator_dropdown,
|
| 621 |
chunk_count, retriever_type, noOfQuestions,
|
| 622 |
+
reranking_checkbox, reranking_dropdown, evaluator_dropdown
|
| 623 |
],
|
| 624 |
outputs=[json_output, download_file]
|
| 625 |
)
|