Spaces:

franlucc
/

Candidates_viewer_NPR_challenge

Sleeping

App Files Files Community

franlucc commited on Mar 11, 2025

Commit

a72f911

1 Parent(s): da8939e

update

Browse files

Files changed (2) hide show

candidates.py +54 -75
output.csv +2 -2

candidates.py CHANGED Viewed

@@ -6,22 +6,13 @@ import argparse
 from pathlib import Path
 import pandas as pd
 from metrics import load_results
 import numpy as np
 import json
 import ast
 import gradio as gr
-from typing import List
-from hashlib import sha256
 import re
-def _query_format_models(models: List[str]) -> str:
-    """
-    Format model names for the SQL query `WHERE <this_model> IN <models>
-    """
-    return "('" + "','".join(["completions-"+m for m in models]) + "')"
-def _hash(text: str) -> str:
-    return sha256(bytes(text, "utf-8")).hexdigest()
 SQL_QUERY = """
 WITH AllResults AS (
@@ -39,24 +30,7 @@ SELECT prompt_id, model, completion, answer as solution, prompt
 FROM AllResults
 WHERE
     AllResults.model IN {models}
-""".format(models=_query_format_models(['r1_distill_qwen32b','r1','gemini2']))
-def print_info(db_connection):
-    tables = db_connection.execute("SHOW TABLES").fetchall()
-    # Iterate over each table and print its name and columns
-    for table in tables:
-        table_name = table[0]
-        print(f"Table: {table_name}")
-        # Get the columns for this table
-        columns = db_connection.execute(f"DESCRIBE {table_name}").fetchall()
-        # Print the column details
-        for column in columns:
-            print(f"  - {column[0]} ({column[1]})")  # column[0] is the column name, column[1] is the data type
-        print()  # Add a blank line between tables for readability
 def _parse(x):
     if isinstance(x, str):
@@ -82,27 +56,15 @@ def _concat(series: pd.Series) -> np.array:
 def check_candidates(candidates: pd.DataFrame, merged_df: pd.DataFrame):
     """
     Perform a variety of sanity checks ie:
-    - all chunks are present
     - all attempted answers are in the completion
     """
-    MANUALLY_CHECKED_SPECIAL_CASES = [
-        "4fd9a9adf162fe558cd94ab7ebcf8f42882873dca133aa1a4620572caa364c0c", # extracted as a str list, eg. `FIED, GOA`
-        "7dd4a475af16d67ed896275674d6a9b51911a3ee22aaca84411fb0a946245fa1"
-    ]
     for _,row in merged_df.iterrows():
         candidates = json.loads(row["candidates"])
         comp = row["completion"].lower()
         for c in candidates:
-            assert c.lower() in comp or \
-                    c.lower() in re.sub(r'[^a-z0-9]', '', comp) or \
-                    row["_original_completion_hash"] in MANUALLY_CHECKED_SPECIAL_CASES, \
                 json.dumps({"candidate":c, "completion":row["completion"], "hash": row["_original_completion_hash"]}, indent=4)
-    # grouped = candidates.groupby(["model","prompt_id"]).agg({"chunk_id": "unique", "num_chunks":"first"})
-    # for _,row in grouped.iterrows():
-    #     assert list(row["chunk_id"]) == range(row["num_chunks"]+1), (row["chunk_id"], row["num_chunks"])
 def launch_app(df: pd.DataFrame, share_demo: bool = False):
     # Define function to display table and toggle completion
@@ -111,16 +73,22 @@ def launch_app(df: pd.DataFrame, share_demo: bool = False):
         example = df.iloc[example_idx]
         # Function to highlight words from the candidates list
-        def highlight_words(text, candidates):
             for word in candidates:
                 # Use word boundaries to ensure we only match whole words
-                text = re.sub(rf'\b({re.escape(word)})\b', r'<mark>\1</mark>', text, flags=re.IGNORECASE)
             return text
         # Highlight words in the 'completion' column
         candidates = json.loads(example['candidates'])
         highlighted_completion = highlight_words(example['completion'], candidates)
         # Create a table with the core columns
         table_html = f"""
         <table>
@@ -130,15 +98,23 @@ def launch_app(df: pd.DataFrame, share_demo: bool = False):
             <tr><td><b>Solution</b></td><td>{example['solution']}</td></tr>
             <tr><td><b>Prompt</b></td><td>{example['prompt']}</td></tr>
             <tr><td><b>Candidates</b></td><td>{candidates}</td></tr>
         </table>
         """
-        # If the toggle is checked, show the 'completion' column with highlighted words
-        if show_completion:
             table_html += f"""
             <br><b>Completion:</b><br>
-            <p>{highlighted_completion}</p>
             """
         return table_html
@@ -148,7 +124,7 @@ def launch_app(df: pd.DataFrame, share_demo: bool = False):
         example_slider = gr.Slider(minimum=0, maximum=len(df)-1, step=1, label="Example", value=0)
         # Toggle button for showing/hiding completion
-        toggle_button = gr.Checkbox(label="Show Completion", value=False)
         with gr.Row():
             gr.HTML('<h1>Candidates Table</h1>')
@@ -158,57 +134,60 @@ def launch_app(df: pd.DataFrame, share_demo: bool = False):
         # Set interaction behavior: update the table when slider or checkbox changes
         example_slider.change(show_table, inputs=[toggle_button, example_slider], outputs=[table_output])
-        toggle_button.change(show_table, inputs=[toggle_button, example_slider], outputs=[table_output])
     # Launch the app
     demo.launch(share=share_demo)
-def main(candidates: Path, output_csv: Path, launch_gradio: bool, share_demo: bool):
     if not output_csv.exists():
         candidates = pd.read_csv(candidates.as_posix())
         conn = load_results()
         completions = conn.sql(SQL_QUERY).df()
         candidates = candidates.groupby(["model","prompt_id","solution","prompt","_original_completion_hash"]).agg({
             "candidates": "unique"
         }).reset_index()
         candidates["candidates"] = candidates["candidates"].apply(lambda x: json.dumps(list(_concat(x))))
-        completions["_original_completion_hash"] = completions["completion"].apply(_hash)
         df = candidates.merge(completions, on=["model","prompt_id","prompt","solution","_original_completion_hash"])
-        print(df, candidates, completions, sep="\n")
-        # print_info(conn)
         # check_candidates(candidates, df)
         df.to_csv(output_csv)
-        # tables = conn.execute("SHOW TABLES").fetchall()
-        # if not ("candidates", ) in tables:
-        #     # Create a table in DuckDB and insert the candidate data
-        #     conn.execute("CREATE TABLE candidates (model VARCHAR, prompt_id INTEGER, \
-        #                 prompt VARCHAR, completion VARCHAR, solution VARCHAR, candidates VARCHAR)")
-        #     # Insert the list of rows into the table
-        #     for _,row in df.iterrows():
-        #         drow = [row["model"],row["prompt_id"],row["prompt"],row["completion"],row["solution"],row["candidates"]]
-        #         conn.execute("INSERT INTO candidates VALUES (?, ?, ?, ?, ?, ?)", drow)
-        #     conn.commit()
-        #     print_info(conn)
-        #     conn.close()
     else:
         df = pd.read_csv(output_csv.as_posix())
-    print(df)
     if launch_gradio:
         launch_app(df, share_demo)
 if __name__ == "__main__":
     parser = argparse.ArgumentParser()
-    parser.add_argument("--candidates", type=Path, help="path to .csv data containing extracted candidates", default="data.csv")
-    parser.add_argument("--output_csv", type=Path, help="path to .csv output file; will reload from here if path exists", default="output.csv")
     parser.add_argument("-gr","--launch_gradio", action="store_true")
     parser.add_argument("-s", "--share_demo", action="store_true")
     args = parser.parse_args()
-    args.launch_gradio = True
     main(**vars(args))

 from pathlib import Path
 import pandas as pd
 from metrics import load_results
+from utils import query_format_models, sha256_hash, get_completions, print_info, regex_compare
 import numpy as np
 import json
 import ast
 import gradio as gr
 import re
+from typing import List
 SQL_QUERY = """
 WITH AllResults AS (
 FROM AllResults
 WHERE
     AllResults.model IN {models}
+""".format(models=query_format_models(['r1','gemini2']))
 def _parse(x):
     if isinstance(x, str):
 def check_candidates(candidates: pd.DataFrame, merged_df: pd.DataFrame):
     """
     Perform a variety of sanity checks ie:
     - all attempted answers are in the completion
     """
     for _,row in merged_df.iterrows():
         candidates = json.loads(row["candidates"])
         comp = row["completion"].lower()
         for c in candidates:
+            assert c.lower() in comp or regex_compare(c.lower(), comp), \
                 json.dumps({"candidate":c, "completion":row["completion"], "hash": row["_original_completion_hash"]}, indent=4)
 def launch_app(df: pd.DataFrame, share_demo: bool = False):
     # Define function to display table and toggle completion
         example = df.iloc[example_idx]
         # Function to highlight words from the candidates list
+        def highlight_words(text, candidates, color="yellow"):
+            if color == "yellow":
+                pattern = r'<mark>\1</mark>'
+            else:
+                pattern = r'<mark style="background-color:{green};">\1</mark>'
             for word in candidates:
                 # Use word boundaries to ensure we only match whole words
+                text = re.sub(rf'\b({re.escape(word)})\b', r'<mark style="background-color:{color};">\1</mark>'.format(color=color), text, flags=re.IGNORECASE)
             return text
         # Highlight words in the 'completion' column
         candidates = json.loads(example['candidates'])
+        regex_candidates = json.loads(example['regex_candidates'])
         highlighted_completion = highlight_words(example['completion'], candidates)
+        highlighted_regex_completion = highlight_words(example['completion'], regex_candidates, color="green")
         # Create a table with the core columns
         table_html = f"""
         <table>
             <tr><td><b>Solution</b></td><td>{example['solution']}</td></tr>
             <tr><td><b>Prompt</b></td><td>{example['prompt']}</td></tr>
             <tr><td><b>Candidates</b></td><td>{candidates}</td></tr>
+            <tr><td><b>Regex Candidates</b></td><td>{regex_candidates}</td></tr>
         </table>
         """
+        # If the toggle is checked, show the 'completion' column with highlighted words
+        if "highlight_regex" in show_completion:
+            completion = highlighted_regex_completion
             table_html += f"""
             <br><b>Completion:</b><br>
+            <p>{completion}</p>
             """
+        if "highlight_candidates" in show_completion:
+            completion = highlighted_completion
+            table_html += f"""
+                <br><b>Completion:</b><br>
+                <p>{completion}</p>
+                """
         return table_html
         example_slider = gr.Slider(minimum=0, maximum=len(df)-1, step=1, label="Example", value=0)
         # Toggle button for showing/hiding completion
+        toggle_button = gr.CheckboxGroup(["highlight_candidates", "highlight_regex"])
         with gr.Row():
             gr.HTML('<h1>Candidates Table</h1>')
         # Set interaction behavior: update the table when slider or checkbox changes
         example_slider.change(show_table, inputs=[toggle_button, example_slider], outputs=[table_output])
+        toggle_button.input(show_table, inputs=[toggle_button, example_slider], outputs=[table_output])
     # Launch the app
     demo.launch(share=share_demo)
+def _extract_candidates(row, do_regex: bool) -> str:
+    """
+    Try to re-extract candidates assuming between quotes
+    """
+    if do_regex:
+        # pattern = r'"(.+?)"|\*(.+?)\*'
+        pattern = r'"(.+?)"'
+        found_c = set([i.group(0)[1:-1] for i in re.finditer(pattern, row["completion"])])
+        return json.dumps(list(found_c))
+    elif np.isnan(candidates) or candidates == []:
+        candidates = re.findall(r'"(\w+)"', row["generated"])
+        return json.dumps(list(set(candidates)))
+    else:
+        return candidates
+def main(candidates: Path, output_csv: Path, launch_gradio: bool, share_demo: bool, do_regex:bool):
     if not output_csv.exists():
         candidates = pd.read_csv(candidates.as_posix())
         conn = load_results()
         completions = conn.sql(SQL_QUERY).df()
+        candidates["candidates"] = candidates.apply(lambda x: _extract_candidates(x, False), axis=1)
         candidates = candidates.groupby(["model","prompt_id","solution","prompt","_original_completion_hash"]).agg({
             "candidates": "unique"
         }).reset_index()
         candidates["candidates"] = candidates["candidates"].apply(lambda x: json.dumps(list(_concat(x))))
+        completions["_original_completion_hash"] = completions["completion"].apply(sha256_hash)
+        print(completions["model"].value_counts())
+        print(candidates["model"].value_counts())
         df = candidates.merge(completions, on=["model","prompt_id","prompt","solution","_original_completion_hash"])
+        print(df["model"].value_counts())
         # check_candidates(candidates, df)
         df.to_csv(output_csv)
     else:
         df = pd.read_csv(output_csv.as_posix())
+        df["regex_candidates"] = df.apply(lambda x: _extract_candidates(x, True), axis=1)
     if launch_gradio:
+        df = df.sort_values(by="prompt_id")
         launch_app(df, share_demo)
 if __name__ == "__main__":
     parser = argparse.ArgumentParser()
+    parser.add_argument("candidates", type=Path, help="path to .csv data containing extracted candidates",default="data.csv")
+    parser.add_argument("output_csv", type=Path, help="path to .csv output file; will reload from here if path exists",default="output.csv")
     parser.add_argument("-gr","--launch_gradio", action="store_true")
     parser.add_argument("-s", "--share_demo", action="store_true")
+    parser.add_argument("-r", "--do_regex", action="store_true")
     args = parser.parse_args()
+    args.do_regex = True
     main(**vars(args))

output.csv CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:4ad970dce3fb60473dcbfa707515ab67dd78b3cbcc2856feeff6fcb33c918e69
-size 18655953

 version https://git-lfs.github.com/spec/v1
+oid sha256:95c3ef7fd5b1cbcc504a14b04b7b05474eeda116a670a0441081011411eff42f
+size 6485882