franlucc commited on
Commit
a72f911
·
1 Parent(s): da8939e
Files changed (2) hide show
  1. candidates.py +54 -75
  2. output.csv +2 -2
candidates.py CHANGED
@@ -6,22 +6,13 @@ import argparse
6
  from pathlib import Path
7
  import pandas as pd
8
  from metrics import load_results
 
9
  import numpy as np
10
  import json
11
  import ast
12
  import gradio as gr
13
- from typing import List
14
- from hashlib import sha256
15
  import re
16
-
17
- def _query_format_models(models: List[str]) -> str:
18
- """
19
- Format model names for the SQL query `WHERE <this_model> IN <models>
20
- """
21
- return "('" + "','".join(["completions-"+m for m in models]) + "')"
22
-
23
- def _hash(text: str) -> str:
24
- return sha256(bytes(text, "utf-8")).hexdigest()
25
 
26
  SQL_QUERY = """
27
  WITH AllResults AS (
@@ -39,24 +30,7 @@ SELECT prompt_id, model, completion, answer as solution, prompt
39
  FROM AllResults
40
  WHERE
41
  AllResults.model IN {models}
42
- """.format(models=_query_format_models(['r1_distill_qwen32b','r1','gemini2']))
43
-
44
-
45
- def print_info(db_connection):
46
- tables = db_connection.execute("SHOW TABLES").fetchall()
47
- # Iterate over each table and print its name and columns
48
- for table in tables:
49
- table_name = table[0]
50
- print(f"Table: {table_name}")
51
-
52
- # Get the columns for this table
53
- columns = db_connection.execute(f"DESCRIBE {table_name}").fetchall()
54
-
55
- # Print the column details
56
- for column in columns:
57
- print(f" - {column[0]} ({column[1]})") # column[0] is the column name, column[1] is the data type
58
-
59
- print() # Add a blank line between tables for readability
60
 
61
  def _parse(x):
62
  if isinstance(x, str):
@@ -82,27 +56,15 @@ def _concat(series: pd.Series) -> np.array:
82
  def check_candidates(candidates: pd.DataFrame, merged_df: pd.DataFrame):
83
  """
84
  Perform a variety of sanity checks ie:
85
- - all chunks are present
86
  - all attempted answers are in the completion
87
  """
88
- MANUALLY_CHECKED_SPECIAL_CASES = [
89
- "4fd9a9adf162fe558cd94ab7ebcf8f42882873dca133aa1a4620572caa364c0c", # extracted as a str list, eg. `FIED, GOA`
90
- "7dd4a475af16d67ed896275674d6a9b51911a3ee22aaca84411fb0a946245fa1"
91
- ]
92
  for _,row in merged_df.iterrows():
93
  candidates = json.loads(row["candidates"])
94
  comp = row["completion"].lower()
95
  for c in candidates:
96
- assert c.lower() in comp or \
97
- c.lower() in re.sub(r'[^a-z0-9]', '', comp) or \
98
- row["_original_completion_hash"] in MANUALLY_CHECKED_SPECIAL_CASES, \
99
  json.dumps({"candidate":c, "completion":row["completion"], "hash": row["_original_completion_hash"]}, indent=4)
100
 
101
- # grouped = candidates.groupby(["model","prompt_id"]).agg({"chunk_id": "unique", "num_chunks":"first"})
102
- # for _,row in grouped.iterrows():
103
- # assert list(row["chunk_id"]) == range(row["num_chunks"]+1), (row["chunk_id"], row["num_chunks"])
104
-
105
-
106
  def launch_app(df: pd.DataFrame, share_demo: bool = False):
107
 
108
  # Define function to display table and toggle completion
@@ -111,16 +73,22 @@ def launch_app(df: pd.DataFrame, share_demo: bool = False):
111
  example = df.iloc[example_idx]
112
 
113
  # Function to highlight words from the candidates list
114
- def highlight_words(text, candidates):
 
 
 
 
 
115
  for word in candidates:
116
  # Use word boundaries to ensure we only match whole words
117
- text = re.sub(rf'\b({re.escape(word)})\b', r'<mark>\1</mark>', text, flags=re.IGNORECASE)
118
  return text
119
 
120
  # Highlight words in the 'completion' column
121
  candidates = json.loads(example['candidates'])
 
122
  highlighted_completion = highlight_words(example['completion'], candidates)
123
-
124
  # Create a table with the core columns
125
  table_html = f"""
126
  <table>
@@ -130,15 +98,23 @@ def launch_app(df: pd.DataFrame, share_demo: bool = False):
130
  <tr><td><b>Solution</b></td><td>{example['solution']}</td></tr>
131
  <tr><td><b>Prompt</b></td><td>{example['prompt']}</td></tr>
132
  <tr><td><b>Candidates</b></td><td>{candidates}</td></tr>
 
133
  </table>
134
  """
135
 
136
- # If the toggle is checked, show the 'completion' column with highlighted words
137
- if show_completion:
 
138
  table_html += f"""
139
  <br><b>Completion:</b><br>
140
- <p>{highlighted_completion}</p>
141
  """
 
 
 
 
 
 
142
 
143
  return table_html
144
 
@@ -148,7 +124,7 @@ def launch_app(df: pd.DataFrame, share_demo: bool = False):
148
  example_slider = gr.Slider(minimum=0, maximum=len(df)-1, step=1, label="Example", value=0)
149
 
150
  # Toggle button for showing/hiding completion
151
- toggle_button = gr.Checkbox(label="Show Completion", value=False)
152
 
153
  with gr.Row():
154
  gr.HTML('<h1>Candidates Table</h1>')
@@ -158,57 +134,60 @@ def launch_app(df: pd.DataFrame, share_demo: bool = False):
158
 
159
  # Set interaction behavior: update the table when slider or checkbox changes
160
  example_slider.change(show_table, inputs=[toggle_button, example_slider], outputs=[table_output])
161
- toggle_button.change(show_table, inputs=[toggle_button, example_slider], outputs=[table_output])
162
 
163
  # Launch the app
164
  demo.launch(share=share_demo)
165
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
166
 
167
- def main(candidates: Path, output_csv: Path, launch_gradio: bool, share_demo: bool):
168
  if not output_csv.exists():
169
  candidates = pd.read_csv(candidates.as_posix())
170
  conn = load_results()
171
  completions = conn.sql(SQL_QUERY).df()
172
 
 
173
  candidates = candidates.groupby(["model","prompt_id","solution","prompt","_original_completion_hash"]).agg({
174
  "candidates": "unique"
175
  }).reset_index()
 
176
  candidates["candidates"] = candidates["candidates"].apply(lambda x: json.dumps(list(_concat(x))))
177
- completions["_original_completion_hash"] = completions["completion"].apply(_hash)
178
-
 
179
  df = candidates.merge(completions, on=["model","prompt_id","prompt","solution","_original_completion_hash"])
180
- print(df, candidates, completions, sep="\n")
181
- # print_info(conn)
182
  # check_candidates(candidates, df)
183
  df.to_csv(output_csv)
184
-
185
- # tables = conn.execute("SHOW TABLES").fetchall()
186
- # if not ("candidates", ) in tables:
187
- # # Create a table in DuckDB and insert the candidate data
188
- # conn.execute("CREATE TABLE candidates (model VARCHAR, prompt_id INTEGER, \
189
- # prompt VARCHAR, completion VARCHAR, solution VARCHAR, candidates VARCHAR)")
190
-
191
- # # Insert the list of rows into the table
192
- # for _,row in df.iterrows():
193
- # drow = [row["model"],row["prompt_id"],row["prompt"],row["completion"],row["solution"],row["candidates"]]
194
- # conn.execute("INSERT INTO candidates VALUES (?, ?, ?, ?, ?, ?)", drow)
195
-
196
- # conn.commit()
197
- # print_info(conn)
198
- # conn.close()
199
  else:
200
  df = pd.read_csv(output_csv.as_posix())
201
-
202
- print(df)
203
  if launch_gradio:
 
204
  launch_app(df, share_demo)
205
 
206
  if __name__ == "__main__":
207
  parser = argparse.ArgumentParser()
208
- parser.add_argument("--candidates", type=Path, help="path to .csv data containing extracted candidates", default="data.csv")
209
- parser.add_argument("--output_csv", type=Path, help="path to .csv output file; will reload from here if path exists", default="output.csv")
210
  parser.add_argument("-gr","--launch_gradio", action="store_true")
211
  parser.add_argument("-s", "--share_demo", action="store_true")
 
212
  args = parser.parse_args()
213
- args.launch_gradio = True
214
  main(**vars(args))
 
6
  from pathlib import Path
7
  import pandas as pd
8
  from metrics import load_results
9
+ from utils import query_format_models, sha256_hash, get_completions, print_info, regex_compare
10
  import numpy as np
11
  import json
12
  import ast
13
  import gradio as gr
 
 
14
  import re
15
+ from typing import List
 
 
 
 
 
 
 
 
16
 
17
  SQL_QUERY = """
18
  WITH AllResults AS (
 
30
  FROM AllResults
31
  WHERE
32
  AllResults.model IN {models}
33
+ """.format(models=query_format_models(['r1','gemini2']))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
34
 
35
  def _parse(x):
36
  if isinstance(x, str):
 
56
  def check_candidates(candidates: pd.DataFrame, merged_df: pd.DataFrame):
57
  """
58
  Perform a variety of sanity checks ie:
 
59
  - all attempted answers are in the completion
60
  """
 
 
 
 
61
  for _,row in merged_df.iterrows():
62
  candidates = json.loads(row["candidates"])
63
  comp = row["completion"].lower()
64
  for c in candidates:
65
+ assert c.lower() in comp or regex_compare(c.lower(), comp), \
 
 
66
  json.dumps({"candidate":c, "completion":row["completion"], "hash": row["_original_completion_hash"]}, indent=4)
67
 
 
 
 
 
 
68
  def launch_app(df: pd.DataFrame, share_demo: bool = False):
69
 
70
  # Define function to display table and toggle completion
 
73
  example = df.iloc[example_idx]
74
 
75
  # Function to highlight words from the candidates list
76
+ def highlight_words(text, candidates, color="yellow"):
77
+ if color == "yellow":
78
+ pattern = r'<mark>\1</mark>'
79
+ else:
80
+ pattern = r'<mark style="background-color:{green};">\1</mark>'
81
+
82
  for word in candidates:
83
  # Use word boundaries to ensure we only match whole words
84
+ text = re.sub(rf'\b({re.escape(word)})\b', r'<mark style="background-color:{color};">\1</mark>'.format(color=color), text, flags=re.IGNORECASE)
85
  return text
86
 
87
  # Highlight words in the 'completion' column
88
  candidates = json.loads(example['candidates'])
89
+ regex_candidates = json.loads(example['regex_candidates'])
90
  highlighted_completion = highlight_words(example['completion'], candidates)
91
+ highlighted_regex_completion = highlight_words(example['completion'], regex_candidates, color="green")
92
  # Create a table with the core columns
93
  table_html = f"""
94
  <table>
 
98
  <tr><td><b>Solution</b></td><td>{example['solution']}</td></tr>
99
  <tr><td><b>Prompt</b></td><td>{example['prompt']}</td></tr>
100
  <tr><td><b>Candidates</b></td><td>{candidates}</td></tr>
101
+ <tr><td><b>Regex Candidates</b></td><td>{regex_candidates}</td></tr>
102
  </table>
103
  """
104
 
105
+ # If the toggle is checked, show the 'completion' column with highlighted words
106
+ if "highlight_regex" in show_completion:
107
+ completion = highlighted_regex_completion
108
  table_html += f"""
109
  <br><b>Completion:</b><br>
110
+ <p>{completion}</p>
111
  """
112
+ if "highlight_candidates" in show_completion:
113
+ completion = highlighted_completion
114
+ table_html += f"""
115
+ <br><b>Completion:</b><br>
116
+ <p>{completion}</p>
117
+ """
118
 
119
  return table_html
120
 
 
124
  example_slider = gr.Slider(minimum=0, maximum=len(df)-1, step=1, label="Example", value=0)
125
 
126
  # Toggle button for showing/hiding completion
127
+ toggle_button = gr.CheckboxGroup(["highlight_candidates", "highlight_regex"])
128
 
129
  with gr.Row():
130
  gr.HTML('<h1>Candidates Table</h1>')
 
134
 
135
  # Set interaction behavior: update the table when slider or checkbox changes
136
  example_slider.change(show_table, inputs=[toggle_button, example_slider], outputs=[table_output])
137
+ toggle_button.input(show_table, inputs=[toggle_button, example_slider], outputs=[table_output])
138
 
139
  # Launch the app
140
  demo.launch(share=share_demo)
141
 
142
+ def _extract_candidates(row, do_regex: bool) -> str:
143
+ """
144
+ Try to re-extract candidates assuming between quotes
145
+ """
146
+ if do_regex:
147
+ # pattern = r'"(.+?)"|\*(.+?)\*'
148
+ pattern = r'"(.+?)"'
149
+ found_c = set([i.group(0)[1:-1] for i in re.finditer(pattern, row["completion"])])
150
+ return json.dumps(list(found_c))
151
+ elif np.isnan(candidates) or candidates == []:
152
+ candidates = re.findall(r'"(\w+)"', row["generated"])
153
+ return json.dumps(list(set(candidates)))
154
+ else:
155
+ return candidates
156
 
157
+ def main(candidates: Path, output_csv: Path, launch_gradio: bool, share_demo: bool, do_regex:bool):
158
  if not output_csv.exists():
159
  candidates = pd.read_csv(candidates.as_posix())
160
  conn = load_results()
161
  completions = conn.sql(SQL_QUERY).df()
162
 
163
+ candidates["candidates"] = candidates.apply(lambda x: _extract_candidates(x, False), axis=1)
164
  candidates = candidates.groupby(["model","prompt_id","solution","prompt","_original_completion_hash"]).agg({
165
  "candidates": "unique"
166
  }).reset_index()
167
+
168
  candidates["candidates"] = candidates["candidates"].apply(lambda x: json.dumps(list(_concat(x))))
169
+ completions["_original_completion_hash"] = completions["completion"].apply(sha256_hash)
170
+ print(completions["model"].value_counts())
171
+ print(candidates["model"].value_counts())
172
  df = candidates.merge(completions, on=["model","prompt_id","prompt","solution","_original_completion_hash"])
173
+ print(df["model"].value_counts())
 
174
  # check_candidates(candidates, df)
175
  df.to_csv(output_csv)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
176
  else:
177
  df = pd.read_csv(output_csv.as_posix())
178
+ df["regex_candidates"] = df.apply(lambda x: _extract_candidates(x, True), axis=1)
179
+
180
  if launch_gradio:
181
+ df = df.sort_values(by="prompt_id")
182
  launch_app(df, share_demo)
183
 
184
  if __name__ == "__main__":
185
  parser = argparse.ArgumentParser()
186
+ parser.add_argument("candidates", type=Path, help="path to .csv data containing extracted candidates",default="data.csv")
187
+ parser.add_argument("output_csv", type=Path, help="path to .csv output file; will reload from here if path exists",default="output.csv")
188
  parser.add_argument("-gr","--launch_gradio", action="store_true")
189
  parser.add_argument("-s", "--share_demo", action="store_true")
190
+ parser.add_argument("-r", "--do_regex", action="store_true")
191
  args = parser.parse_args()
192
+ args.do_regex = True
193
  main(**vars(args))
output.csv CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4ad970dce3fb60473dcbfa707515ab67dd78b3cbcc2856feeff6fcb33c918e69
3
- size 18655953
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:95c3ef7fd5b1cbcc504a14b04b7b05474eeda116a670a0441081011411eff42f
3
+ size 6485882