Spaces:

davidr70
/

eval_results

Runtime error

App Files Files Community

davidr70 commited on May 6, 2025

Commit

0a408c8

1 Parent(s): 5cca310

fix to have baseline run from the runs table

Browse files

Files changed (4) hide show

app.py +53 -20
data_access.py +12 -11
load_ground_truth.py +0 -0
eval_tables.py → scripts/eval_tables.py +0 -0

app.py CHANGED Viewed

@@ -1,8 +1,8 @@
 import asyncio
 import gradio as gr
 import pandas as pd
-import logging
 from data_access import get_questions, get_source_finders, get_run_ids, get_baseline_rankers, \
     get_unified_sources, get_source_text, calculate_cumulative_statistics_for_all_questions, get_metadata, \
@@ -10,6 +10,8 @@ from data_access import get_questions, get_source_finders, get_run_ids, get_base
 logger = logging.getLogger(__name__)
 # Initialize data at the module level
 questions = []
 source_finders = []
@@ -22,9 +24,11 @@ run_ids = []
 available_run_id_dict = {}
 finder_options = []
 previous_run_id = "initial_run"
 run_id_dropdown = None
 # Get all questions
 # Initialize data in a single async function
@@ -36,7 +40,6 @@ async def initialize_data():
         source_finders = await get_source_finders(conn)
         baseline_rankers = await get_baseline_rankers(conn)
-    baseline_rankers_dict = {f["name"]: f["id"] for f in baseline_rankers}
     # Convert to dictionaries for easier lookup
     questions_dict = {q["text"]: q["id"] for q in questions}
     baseline_rankers_dict = {f["name"]: f["id"] for f in baseline_rankers}
@@ -46,9 +49,32 @@ async def initialize_data():
     question_options = [q['text'] for q in questions]
     finder_options = [s["name"] for s in source_finders]
     baseline_ranker_options = [b["name"] for b in baseline_rankers]
-def update_sources_list(question_option, source_finder_id, run_id: str, baseline_ranker_id: str, evt: gr.EventData = None):
     global previous_run_id
     if evt:
         logger.info(f"event: {evt.target.elem_id}")
@@ -70,27 +96,30 @@ async def update_sources_list_async(question_option, source_finder_name, run_id,
         if type(baseline_ranker_name) == list:
             baseline_ranker_name = baseline_ranker_name[0]
-        baseline_ranker_id_int = 1 if len(baseline_ranker_name) == 0 else baseline_rankers_dict.get(baseline_ranker_name)
         if len(source_finder_name):
             finder_id_int = source_finders_dict.get(source_finder_name)
         else:
             finder_id_int = None
-        if question_option == "All questions":
             if finder_id_int:
                 if run_id is None:
                     available_run_id_dict = await get_run_ids(conn, finder_id_int)
                     run_id = list(available_run_id_dict.keys())[0]
                     previous_run_id = run_id
                 run_id_int = available_run_id_dict.get(run_id)
-                all_stats = await calculate_cumulative_statistics_for_all_questions(conn, run_id_int, baseline_ranker_id_int)
             else:
                 run_id_options = list(available_run_id_dict.keys())
                 all_stats = None
             run_id_options = list(available_run_id_dict.keys())
-            return None, all_stats, gr.Dropdown(choices=run_id_options, value=run_id), "Select Run Id and source finder to see results", ""
         # Extract question ID from selection
         question_id = questions_dict.get(question_option)
@@ -102,8 +131,6 @@ async def update_sources_list_async(question_option, source_finder_name, run_id,
         previous_run_id = run_id
         run_id_int = available_run_id_dict.get(run_id)
         source_runs = None
         stats = None
         # Get source runs data
@@ -116,7 +143,8 @@ async def update_sources_list_async(question_option, source_finder_name, run_id,
             return None, None, run_id_options, "No results found for the selected filters",
         # Format table columns
-        columns_to_display = ['sugya_id', 'in_baseline', 'baseline_rank', 'in_source_run', 'source_run_rank', 'tractate',
                               'folio', 'reason']
         df_display = df[columns_to_display] if all(col in df.columns for col in columns_to_display) else df
@@ -147,6 +175,7 @@ async def handle_row_selection_async(evt: gr.SelectData):
 def handle_row_selection(evt: gr.SelectData):
     return asyncio.run(handle_row_selection_async(evt))
 # Create Gradio app
 # Ensure we clean up when done
@@ -162,7 +191,7 @@ async def main():
                     with gr.Column(scale=1):
                         # Main content area
                         question_dropdown = gr.Dropdown(
-                            choices=["All questions"] + question_options,
                             label="Select Question",
                             value=None,
                             interactive=True,
@@ -186,7 +215,7 @@ async def main():
                         )
                     with gr.Column(scale=1):
                         run_id_dropdown = gr.Dropdown(
-                            choices=[],
                             allow_custom_value=True,
                             label="Run id for Question and source finder",
                             interactive=True,
@@ -201,7 +230,6 @@ async def main():
                 gr.Markdown(f"Total Questions: {len(questions)}")
                 gr.Markdown(f"Source Finders: {len(source_finders)}")
         with gr.Row():
             result_text = gr.Markdown("Select a question to view source runs")
         with gr.Row():
@@ -221,14 +249,15 @@ async def main():
             metadata_text = gr.TextArea(
                 label="Metadata of Source Finder for Selected Question",
                 elem_id="metadata",
-                lines = 2
             )
         with gr.Row():
             gr.Markdown("# Sources Found")
         with gr.Row():
             with gr.Column(scale=3):
                 results_table = gr.DataFrame(
-                    headers=['id', 'tractate', 'folio', 'in_baseline', 'baseline_rank', 'in_source_run', 'source_run_rank', 'source_reason', 'metadata'],
                     interactive=False
                 )
             with gr.Column(scale=1):
@@ -246,8 +275,6 @@ async def main():
             #     visible=True
             # )
         # Set up event handlers
         results_table.select(
             handle_row_selection,
@@ -255,15 +282,22 @@ async def main():
             outputs=source_text
         )
-        question_dropdown.change(
             update_sources_list,
             inputs=[question_dropdown, source_finder_dropdown, run_id_dropdown, baseline_rankers_dropdown],
             outputs=[results_table, statistics_table, run_id_dropdown, result_text, metadata_text]
         )
-        source_finder_dropdown.change(
             update_sources_list,
             inputs=[question_dropdown, source_finder_dropdown, run_id_dropdown, baseline_rankers_dropdown],
             # outputs=[run_id_dropdown, results_table, result_text, download_button]
             outputs=[results_table, statistics_table, run_id_dropdown, result_text, metadata_text]
         )
@@ -274,7 +308,6 @@ async def main():
             outputs=[results_table, statistics_table, run_id_dropdown, result_text, metadata_text]
         )
     app.queue()
     app.launch()

 import asyncio
+import logging
 import gradio as gr
 import pandas as pd
 from data_access import get_questions, get_source_finders, get_run_ids, get_baseline_rankers, \
     get_unified_sources, get_source_text, calculate_cumulative_statistics_for_all_questions, get_metadata, \
 logger = logging.getLogger(__name__)
+ALL_QUESTIONS_STR = "All questions"
 # Initialize data at the module level
 questions = []
 source_finders = []
 available_run_id_dict = {}
 finder_options = []
 previous_run_id = "initial_run"
+run_id_options = []
 run_id_dropdown = None
 # Get all questions
 # Initialize data in a single async function
         source_finders = await get_source_finders(conn)
         baseline_rankers = await get_baseline_rankers(conn)
     # Convert to dictionaries for easier lookup
     questions_dict = {q["text"]: q["id"] for q in questions}
     baseline_rankers_dict = {f["name"]: f["id"] for f in baseline_rankers}
     question_options = [q['text'] for q in questions]
     finder_options = [s["name"] for s in source_finders]
     baseline_ranker_options = [b["name"] for b in baseline_rankers]
+    update_run_ids(ALL_QUESTIONS_STR, list(source_finders_dict.keys())[0])
+def update_run_ids(question_option, source_finder_name):
+    return asyncio.run(update_run_ids_async(question_option, source_finder_name))
+async def update_run_ids_async(question_option, source_finder_name):
+    global previous_run_id, available_run_id_dict, run_id_options
+    async with get_async_connection() as conn:
+        finder_id_int = source_finders_dict.get(source_finder_name)
+        if question_option and question_option != ALL_QUESTIONS_STR:
+            question_id = questions_dict.get(question_option)
+            available_run_id_dict = await get_run_ids(conn, finder_id_int, question_id)
+        else:
+            available_run_id_dict = await get_run_ids(conn, finder_id_int)
+        run_id = list(available_run_id_dict.keys())[0]
+        previous_run_id = run_id
+        run_id_options = list(available_run_id_dict.keys())
+        return None, None, gr.Dropdown(choices=run_id_options,
+                                       value=run_id), "Select Question to see results", ""
+def update_sources_list(question_option, source_finder_id, run_id: str, baseline_ranker_id: str,
+                        evt: gr.EventData = None):
     global previous_run_id
     if evt:
         logger.info(f"event: {evt.target.elem_id}")
         if type(baseline_ranker_name) == list:
             baseline_ranker_name = baseline_ranker_name[0]
+        baseline_ranker_id_int = 1 if len(baseline_ranker_name) == 0 else baseline_rankers_dict.get(
+            baseline_ranker_name)
         if len(source_finder_name):
             finder_id_int = source_finders_dict.get(source_finder_name)
         else:
             finder_id_int = None
+        if question_option == ALL_QUESTIONS_STR:
             if finder_id_int:
                 if run_id is None:
                     available_run_id_dict = await get_run_ids(conn, finder_id_int)
                     run_id = list(available_run_id_dict.keys())[0]
                     previous_run_id = run_id
                 run_id_int = available_run_id_dict.get(run_id)
+                all_stats = await calculate_cumulative_statistics_for_all_questions(conn, run_id_int,
+                                                                                    baseline_ranker_id_int)
             else:
                 run_id_options = list(available_run_id_dict.keys())
                 all_stats = None
             run_id_options = list(available_run_id_dict.keys())
+            return None, all_stats, gr.Dropdown(choices=run_id_options,
+                                                value=run_id), "Select Run Id and source finder to see results", ""
         # Extract question ID from selection
         question_id = questions_dict.get(question_option)
         previous_run_id = run_id
         run_id_int = available_run_id_dict.get(run_id)
         source_runs = None
         stats = None
         # Get source runs data
             return None, None, run_id_options, "No results found for the selected filters",
         # Format table columns
+        columns_to_display = ['sugya_id', 'in_baseline', 'baseline_rank', 'in_source_run', 'source_run_rank',
+                              'tractate',
                               'folio', 'reason']
         df_display = df[columns_to_display] if all(col in df.columns for col in columns_to_display) else df
 def handle_row_selection(evt: gr.SelectData):
     return asyncio.run(handle_row_selection_async(evt))
 # Create Gradio app
 # Ensure we clean up when done
                     with gr.Column(scale=1):
                         # Main content area
                         question_dropdown = gr.Dropdown(
+                            choices=[ALL_QUESTIONS_STR] + question_options,
                             label="Select Question",
                             value=None,
                             interactive=True,
                         )
                     with gr.Column(scale=1):
                         run_id_dropdown = gr.Dropdown(
+                            choices=run_id_options,
                             allow_custom_value=True,
                             label="Run id for Question and source finder",
                             interactive=True,
                 gr.Markdown(f"Total Questions: {len(questions)}")
                 gr.Markdown(f"Source Finders: {len(source_finders)}")
         with gr.Row():
             result_text = gr.Markdown("Select a question to view source runs")
         with gr.Row():
             metadata_text = gr.TextArea(
                 label="Metadata of Source Finder for Selected Question",
                 elem_id="metadata",
+                lines=2
             )
         with gr.Row():
             gr.Markdown("# Sources Found")
         with gr.Row():
             with gr.Column(scale=3):
                 results_table = gr.DataFrame(
+                    headers=['id', 'tractate', 'folio', 'in_baseline', 'baseline_rank', 'in_source_run',
+                             'source_run_rank', 'source_reason', 'metadata'],
                     interactive=False
                 )
             with gr.Column(scale=1):
             #     visible=True
             # )
         # Set up event handlers
         results_table.select(
             handle_row_selection,
             outputs=source_text
         )
+        baseline_rankers_dropdown.change(
             update_sources_list,
             inputs=[question_dropdown, source_finder_dropdown, run_id_dropdown, baseline_rankers_dropdown],
             outputs=[results_table, statistics_table, run_id_dropdown, result_text, metadata_text]
         )
+        question_dropdown.change(
             update_sources_list,
             inputs=[question_dropdown, source_finder_dropdown, run_id_dropdown, baseline_rankers_dropdown],
+            outputs=[results_table, statistics_table, run_id_dropdown, result_text, metadata_text]
+        )
+        source_finder_dropdown.change(
+            update_run_ids,
+            inputs=[question_dropdown, source_finder_dropdown],
             # outputs=[run_id_dropdown, results_table, result_text, download_button]
             outputs=[results_table, statistics_table, run_id_dropdown, result_text, metadata_text]
         )
             outputs=[results_table, statistics_table, run_id_dropdown, result_text, metadata_text]
         )
     app.queue()
     app.launch()

data_access.py CHANGED Viewed

@@ -15,6 +15,7 @@ load_dotenv()
 @asynccontextmanager
 async def get_async_connection(schema="talmudexplore"):
     """Get a connection for the current request."""
     try:
         # Create a single connection without relying on a shared pool
         conn = await asyncpg.connect(
@@ -27,7 +28,8 @@ async def get_async_connection(schema="talmudexplore"):
         await conn.execute(f'SET search_path TO {schema}')
         yield conn
     finally:
-        await conn.close()
 async def get_questions(conn: asyncpg.Connection):
@@ -73,8 +75,13 @@ async def get_run_ids(conn: asyncpg.Connection, source_finder_id: int, question_
 async def get_baseline_rankers(conn: asyncpg.Connection):
-    rankers = await conn.fetch("SELECT id, ranker FROM rankers ORDER BY id")
-    return [{"id": f["id"], "name": f["ranker"]} for f in rankers]
 async def calculate_baseline_vs_source_stats_for_question(conn: asyncpg.Connection, baseline_sources , source_runs_sources):
     # for a given question_id and source_finder_id and run_id calculate the baseline vs source stats
@@ -203,14 +210,8 @@ async def get_unified_sources(conn: asyncpg.Connection, question_id: int, source
                  """
     source_runs = await conn.fetch(query_runs, question_id, source_finder_run_id)
     # Get sources from baseline_sources
-    query_baseline = """
-                     SELECT tb.tractate_chunk_id as id, bs.rank as baseline_rank, bs.tractate, bs.folio
-                     FROM baseline_sources bs
-                              join talmud_bavli tb on bs.sugya_id = tb.xml_id
-                     WHERE bs.question_id = $1
-                       AND bs.ranker_id = $2
-                     """
-    baseline_sources = await conn.fetch(query_baseline, question_id, ranker_id)
     stats_df = await calculate_baseline_vs_source_stats_for_question(conn, baseline_sources, source_runs)
     # Convert to dictionaries for easier lookup
     source_runs_dict = {s["id"]: dict(s) for s in source_runs}

 @asynccontextmanager
 async def get_async_connection(schema="talmudexplore"):
     """Get a connection for the current request."""
+    conn = None
     try:
         # Create a single connection without relying on a shared pool
         conn = await asyncpg.connect(
         await conn.execute(f'SET search_path TO {schema}')
         yield conn
     finally:
+        if conn:
+            await conn.close()
 async def get_questions(conn: asyncpg.Connection):
 async def get_baseline_rankers(conn: asyncpg.Connection):
+    query = """
+    select sfr.id, sf.source_finder_type, sfr.description from talmudexplore.source_finder_runs sfr
+    join source_finders sf on sf.id = sfr.source_finder_id
+    order by sf.id
+    """
+    rankers = await conn.fetch(query)
+    return [{"id": r["id"], "name": f"{r['source_finder_type']} : {r['description']}"} for r in rankers]
 async def calculate_baseline_vs_source_stats_for_question(conn: asyncpg.Connection, baseline_sources , source_runs_sources):
     # for a given question_id and source_finder_id and run_id calculate the baseline vs source stats
                  """
     source_runs = await conn.fetch(query_runs, question_id, source_finder_run_id)
     # Get sources from baseline_sources
+    baseline_query = query_runs.replace("source_rank", "baseline_rank")
+    baseline_sources = await conn.fetch(baseline_query, question_id, ranker_id)
     stats_df = await calculate_baseline_vs_source_stats_for_question(conn, baseline_sources, source_runs)
     # Convert to dictionaries for easier lookup
     source_runs_dict = {s["id"]: dict(s) for s in source_runs}

load_ground_truth.py DELETED Viewed

File without changes

eval_tables.py → scripts/eval_tables.py RENAMED Viewed

File without changes