Spaces:
Sleeping
Sleeping
jedick
commited on
Commit
·
a4966c6
1
Parent(s):
e41c71b
Move dev files to development directory
Browse files- collect_data.py +0 -68
- create_examples.py +0 -69
- {data → development}/alignment_fewshot.txt +0 -0
- {data → development}/alignment_heuristic.txt +0 -0
- judge_disagreements.py +0 -59
- models.py +2 -2
- test_workflows.py +0 -30
- workflows.py +0 -23
collect_data.py
DELETED
|
@@ -1,68 +0,0 @@
|
|
| 1 |
-
import time
|
| 2 |
-
import csv
|
| 3 |
-
from wiki_data_fetcher import (
|
| 4 |
-
get_previous_revisions,
|
| 5 |
-
extract_revision_info,
|
| 6 |
-
get_wikipedia_introduction,
|
| 7 |
-
)
|
| 8 |
-
|
| 9 |
-
title = []
|
| 10 |
-
revid_0, revid_10, revid_100 = [], [], []
|
| 11 |
-
ts_0, ts_10, ts_100 = [], [], []
|
| 12 |
-
intro_0, intro_10, intro_100 = [], [], []
|
| 13 |
-
|
| 14 |
-
|
| 15 |
-
if __name__ == "__main__":
|
| 16 |
-
|
| 17 |
-
# Open the file in read mode
|
| 18 |
-
with open("data/wikipedia_titles.txt", "r") as file:
|
| 19 |
-
# Iterate through each line in the file
|
| 20 |
-
for line in file:
|
| 21 |
-
# Get title from each line without trailing newline characters
|
| 22 |
-
this_title = line.strip()
|
| 23 |
-
print(this_title)
|
| 24 |
-
# Append title
|
| 25 |
-
title.append(this_title)
|
| 26 |
-
# Get info for most recent 100 revisions
|
| 27 |
-
json_data = get_previous_revisions(this_title, revisions=100)
|
| 28 |
-
# Append data for current revision
|
| 29 |
-
info_0 = extract_revision_info(json_data, 0)
|
| 30 |
-
revid_0.append(info_0["revid"])
|
| 31 |
-
ts_0.append(info_0["timestamp"])
|
| 32 |
-
intro_0.append(get_wikipedia_introduction(info_0["revid"]))
|
| 33 |
-
# Append data for 10th revision before current
|
| 34 |
-
info_10 = extract_revision_info(json_data, 10, limit_revnum=False)
|
| 35 |
-
revid_10.append(info_10["revid"])
|
| 36 |
-
ts_10.append(info_10["timestamp"])
|
| 37 |
-
intro_10.append(get_wikipedia_introduction(info_10["revid"]))
|
| 38 |
-
# Append data for 100th revision before current
|
| 39 |
-
info_100 = extract_revision_info(json_data, 100, limit_revnum=False)
|
| 40 |
-
revid_100.append(info_100["revid"])
|
| 41 |
-
ts_100.append(info_100["timestamp"])
|
| 42 |
-
intro_100.append(get_wikipedia_introduction(info_100["revid"]))
|
| 43 |
-
|
| 44 |
-
# Write the CSV in each loop in case we need to restart after an error
|
| 45 |
-
# Combine the lists
|
| 46 |
-
# fmt: off
|
| 47 |
-
export_data = zip(
|
| 48 |
-
title, revid_0, revid_10, revid_100,
|
| 49 |
-
ts_0, ts_10, ts_100, intro_0, intro_10, intro_100,
|
| 50 |
-
)
|
| 51 |
-
column_names = [
|
| 52 |
-
"title", "revid_0", "revid_10", "revid_100",
|
| 53 |
-
"ts_0", "ts_10", "ts_100",
|
| 54 |
-
"intro_0", "intro_10", "intro_100",
|
| 55 |
-
]
|
| 56 |
-
# fmt: on
|
| 57 |
-
|
| 58 |
-
with open(
|
| 59 |
-
"data/wikipedia_introductions.csv", "w", newline="", encoding="utf-8"
|
| 60 |
-
) as myfile:
|
| 61 |
-
wr = csv.writer(myfile)
|
| 62 |
-
# Write a header row
|
| 63 |
-
wr.writerow(column_names)
|
| 64 |
-
# Write the combined data rows
|
| 65 |
-
wr.writerows(export_data)
|
| 66 |
-
|
| 67 |
-
# Rate limit our API calls
|
| 68 |
-
time.sleep(5)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
create_examples.py
DELETED
|
@@ -1,69 +0,0 @@
|
|
| 1 |
-
import pandas as pd
|
| 2 |
-
from models import classifier
|
| 3 |
-
|
| 4 |
-
|
| 5 |
-
def run_classifier(row):
|
| 6 |
-
"""
|
| 7 |
-
Run the model on one row of data from 'data/wikipedia_introductions.csv'.
|
| 8 |
-
The model is run up to four times: two prompt styles (heuristic and few-shot)
|
| 9 |
-
and two revision intervals (from 10th and 100th previous revisions to current).
|
| 10 |
-
|
| 11 |
-
Usage:
|
| 12 |
-
|
| 13 |
-
df = pd.read_csv("data/wikipedia_introductions.csv")
|
| 14 |
-
row = df.iloc[38]
|
| 15 |
-
run_classifier(row)
|
| 16 |
-
"""
|
| 17 |
-
|
| 18 |
-
# Initialize output dict
|
| 19 |
-
output = {}
|
| 20 |
-
|
| 21 |
-
output["heuristic_10"] = classifier(row["intro_10"], row["intro_0"], "heuristic")
|
| 22 |
-
output["few-shot_10"] = classifier(row["intro_10"], row["intro_0"], "few-shot")
|
| 23 |
-
output["heuristic_100"] = classifier(row["intro_100"], row["intro_0"], "heuristic")
|
| 24 |
-
output["few-shot_100"] = classifier(row["intro_100"], row["intro_0"], "few-shot")
|
| 25 |
-
|
| 26 |
-
return output
|
| 27 |
-
|
| 28 |
-
|
| 29 |
-
if __name__ == "__main__":
|
| 30 |
-
|
| 31 |
-
"""
|
| 32 |
-
Run the classifier on all rows from 'data/wikipedia_introductions.csv' and save results in 'data/examples.csv'.
|
| 33 |
-
"""
|
| 34 |
-
|
| 35 |
-
# Read the data
|
| 36 |
-
df = pd.read_csv("data/wikipedia_introductions.csv")
|
| 37 |
-
|
| 38 |
-
# For reference: Find row indices with at least one missing value
|
| 39 |
-
# missing_rows = df.index[df.isnull().any(axis=1)].tolist()
|
| 40 |
-
# print("\nRow indices with missing values:", missing_rows)
|
| 41 |
-
|
| 42 |
-
# Initialize output data frame
|
| 43 |
-
df_out = None
|
| 44 |
-
|
| 45 |
-
for index, row in df.iterrows():
|
| 46 |
-
# Print the title to see progress
|
| 47 |
-
print(row["title"])
|
| 48 |
-
# Run classifier
|
| 49 |
-
output = run_classifier(row)
|
| 50 |
-
print(output)
|
| 51 |
-
# Create column names and row for data frame
|
| 52 |
-
column_names = [
|
| 53 |
-
outer_k + "_" + inner_k
|
| 54 |
-
for outer_k in output.keys()
|
| 55 |
-
for inner_k in output[outer_k].keys()
|
| 56 |
-
]
|
| 57 |
-
row_values = [
|
| 58 |
-
inner_v for outer_k in output.keys() for inner_v in output[outer_k].values()
|
| 59 |
-
]
|
| 60 |
-
# Add title to output
|
| 61 |
-
column_names = ["title"] + column_names
|
| 62 |
-
row_values = [row["title"]] + row_values
|
| 63 |
-
df_row = pd.DataFrame([row_values], columns=column_names)
|
| 64 |
-
if df_out is None:
|
| 65 |
-
df_out = df_row
|
| 66 |
-
else:
|
| 67 |
-
df_out = pd.concat([df_out, df_row])
|
| 68 |
-
# Write CSV in every loop to avoid data loss if errors occur
|
| 69 |
-
df_out.to_csv("data/examples.csv", index=False, encoding="utf-8")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
{data → development}/alignment_fewshot.txt
RENAMED
|
File without changes
|
{data → development}/alignment_heuristic.txt
RENAMED
|
File without changes
|
judge_disagreements.py
DELETED
|
@@ -1,59 +0,0 @@
|
|
| 1 |
-
import sys
|
| 2 |
-
import pandas as pd
|
| 3 |
-
from models import judge
|
| 4 |
-
|
| 5 |
-
if __name__ == "__main__":
|
| 6 |
-
|
| 7 |
-
"""
|
| 8 |
-
Run the judge on all rows from 'data/disagreements_for_AI.csv' and save results in 'data/AI_judgments_unaligned.csv'.
|
| 9 |
-
"""
|
| 10 |
-
|
| 11 |
-
# Read the data
|
| 12 |
-
df = pd.read_csv("data/disagreements_for_AI.csv")
|
| 13 |
-
|
| 14 |
-
# Add empty columns for AI judgments
|
| 15 |
-
df["noteworthy"] = None
|
| 16 |
-
df["reasoning"] = None
|
| 17 |
-
|
| 18 |
-
# We run the unaligned judge unless the script is called with --aligned-fewshot or --aligned--heuristic
|
| 19 |
-
mode = "unaligned"
|
| 20 |
-
outfile = "data/AI_judgments_unaligned.csv"
|
| 21 |
-
# Check if an argument was passed
|
| 22 |
-
if len(sys.argv) > 1:
|
| 23 |
-
# sys.argv[0] is the script name, sys.argv[1] is the first argument
|
| 24 |
-
argument = sys.argv[1]
|
| 25 |
-
if argument == "--aligned-fewshot":
|
| 26 |
-
mode = "aligned-fewshot"
|
| 27 |
-
outfile = "data/AI_judgments_fewshot.csv"
|
| 28 |
-
elif argument == "--aligned-heuristic":
|
| 29 |
-
mode = "aligned-heuristic"
|
| 30 |
-
outfile = "data/AI_judgments_heuristic.csv"
|
| 31 |
-
else:
|
| 32 |
-
raise ValueError(f"Unknown argument: {argument}")
|
| 33 |
-
|
| 34 |
-
print(f"Saving judgments to {outfile}")
|
| 35 |
-
|
| 36 |
-
for index, row in df.iterrows():
|
| 37 |
-
# Change this if needed (to restart after errors)
|
| 38 |
-
if index < 0:
|
| 39 |
-
next
|
| 40 |
-
else:
|
| 41 |
-
# Print the title to see progress
|
| 42 |
-
print(row["title"])
|
| 43 |
-
# Run judge
|
| 44 |
-
try:
|
| 45 |
-
output = judge(
|
| 46 |
-
df.iloc[index]["old_revision"],
|
| 47 |
-
df.iloc[index]["new_revision"],
|
| 48 |
-
df.iloc[index]["heuristic_rationale"],
|
| 49 |
-
df.iloc[index]["few-shot_rationale"],
|
| 50 |
-
mode=mode,
|
| 51 |
-
)
|
| 52 |
-
except:
|
| 53 |
-
output = {"noteworthy": None, "reasoning": None}
|
| 54 |
-
print(output)
|
| 55 |
-
# Update data frame
|
| 56 |
-
df.at[index, "noteworthy"] = output["noteworthy"]
|
| 57 |
-
df.at[index, "reasoning"] = output["reasoning"]
|
| 58 |
-
# Write CSV in every loop to avoid data loss if errors occur
|
| 59 |
-
df.to_csv(outfile, index=False, encoding="utf-8")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
models.py
CHANGED
|
@@ -99,11 +99,11 @@ def judge(old_revision, new_revision, rationale_1, rationale_2, mode="unaligned"
|
|
| 99 |
if mode == "unaligned":
|
| 100 |
alignment_text = ""
|
| 101 |
elif mode == "aligned-fewshot":
|
| 102 |
-
with open("
|
| 103 |
lines = file.readlines()
|
| 104 |
alignment_text = "".join(lines)
|
| 105 |
elif mode == "aligned-heuristic":
|
| 106 |
-
with open("
|
| 107 |
lines = file.readlines()
|
| 108 |
alignment_text = "".join(lines)
|
| 109 |
else:
|
|
|
|
| 99 |
if mode == "unaligned":
|
| 100 |
alignment_text = ""
|
| 101 |
elif mode == "aligned-fewshot":
|
| 102 |
+
with open("development/alignment_fewshot.txt", "r") as file:
|
| 103 |
lines = file.readlines()
|
| 104 |
alignment_text = "".join(lines)
|
| 105 |
elif mode == "aligned-heuristic":
|
| 106 |
+
with open("development/alignment_heuristic.txt", "r") as file:
|
| 107 |
lines = file.readlines()
|
| 108 |
alignment_text = "".join(lines)
|
| 109 |
else:
|
test_workflows.py
DELETED
|
@@ -1,30 +0,0 @@
|
|
| 1 |
-
from workflows import llm_workflow
|
| 2 |
-
|
| 3 |
-
|
| 4 |
-
def llm_workflow_logic():
|
| 5 |
-
|
| 6 |
-
old_revision = """Kaman-Kalehöyük Archaeological Museum (Turkish: Kaman-Kalehöyük Arkeoloji Müzesi) is an archaeological museum in Kaman District of Kırşehir Province in Turkey. It exhibits artifacts of seven civilizations excavated in the nearby multi-period mound Kaman-Kalehöyük. It was opened in 2010. A Japanese garden is next to the museum building.[1][2]"""
|
| 7 |
-
|
| 8 |
-
new_revision = """The Kaman-Kalehöyük Archaeological Museum (Turkish: Kaman-Kalehöyük Arkeoloji Müzesi) is an archaeological museum in Çağırkan, Kaman District, Kırşehir Province, Turkey. It exhibits artifacts of seven civilizations excavated in the nearby multi-period mound Kaman-Kalehöyük. It opened in 2010. A Japanese garden is next to the museum building.[1][2]"""
|
| 9 |
-
|
| 10 |
-
response = llm_workflow(old_revision, new_revision, "aligned-fewshot")
|
| 11 |
-
|
| 12 |
-
# The judge should responsd with noteworthy: False regardless of the classifier models' responses
|
| 13 |
-
return response["judge"]["noteworthy"] is False
|
| 14 |
-
|
| 15 |
-
|
| 16 |
-
# pytest -vv test_workflows.py::test_llm_workflow
|
| 17 |
-
def test_llm_workflow():
|
| 18 |
-
"""Run LLM workflow logic up to 5 times"""
|
| 19 |
-
current_try = 0
|
| 20 |
-
max_trys = 5
|
| 21 |
-
while current_try < max_trys:
|
| 22 |
-
current_try += 1
|
| 23 |
-
result = llm_workflow_logic()
|
| 24 |
-
if result is True:
|
| 25 |
-
print(f"Try {current_try} succeeded")
|
| 26 |
-
break
|
| 27 |
-
else:
|
| 28 |
-
print(f"Try {current_try} failed")
|
| 29 |
-
# The actual test for pytest
|
| 30 |
-
assert result is True
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
workflows.py
DELETED
|
@@ -1,23 +0,0 @@
|
|
| 1 |
-
from models import classifier, judge
|
| 2 |
-
|
| 3 |
-
|
| 4 |
-
def llm_workflow(old_revision, new_revision, mode="aligned-fewshot"):
|
| 5 |
-
"""
|
| 6 |
-
Run LLM workflow (input to response)
|
| 7 |
-
|
| 8 |
-
Args:
|
| 9 |
-
mode: "aligned-fewshot" for few-shot alignment or "aligned-heuristic" for heuristic alignment
|
| 10 |
-
"""
|
| 11 |
-
|
| 12 |
-
# Run classifier and judge models
|
| 13 |
-
heuristic = classifier(old_revision, new_revision, "heuristic")
|
| 14 |
-
few_shot = classifier(old_revision, new_revision, "few-shot")
|
| 15 |
-
judge_response = judge(
|
| 16 |
-
old_revision,
|
| 17 |
-
new_revision,
|
| 18 |
-
heuristic["rationale"],
|
| 19 |
-
few_shot["rationale"],
|
| 20 |
-
mode=mode,
|
| 21 |
-
)
|
| 22 |
-
|
| 23 |
-
return {"heuristic": heuristic, "few-shot": few_shot, "judge": judge_response}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|