Spaces:

jedick
/

noteworthy-differences

Sleeping

App Files Files Community

jedick commited on Dec 20, 2025

Commit

a4966c6

1 Parent(s): e41c71b

Move dev files to development directory

Browse files

Files changed (8) hide show

collect_data.py +0 -68
create_examples.py +0 -69
{data → development}/alignment_fewshot.txt +0 -0
{data → development}/alignment_heuristic.txt +0 -0
judge_disagreements.py +0 -59
models.py +2 -2
test_workflows.py +0 -30
workflows.py +0 -23

collect_data.py DELETED Viewed

@@ -1,68 +0,0 @@
-import time
-import csv
-from wiki_data_fetcher import (
-    get_previous_revisions,
-    extract_revision_info,
-    get_wikipedia_introduction,
-)
-title = []
-revid_0, revid_10, revid_100 = [], [], []
-ts_0, ts_10, ts_100 = [], [], []
-intro_0, intro_10, intro_100 = [], [], []
-if __name__ == "__main__":
-    # Open the file in read mode
-    with open("data/wikipedia_titles.txt", "r") as file:
-        # Iterate through each line in the file
-        for line in file:
-            # Get title from each line without trailing newline characters
-            this_title = line.strip()
-            print(this_title)
-            # Append title
-            title.append(this_title)
-            # Get info for most recent 100 revisions
-            json_data = get_previous_revisions(this_title, revisions=100)
-            # Append data for current revision
-            info_0 = extract_revision_info(json_data, 0)
-            revid_0.append(info_0["revid"])
-            ts_0.append(info_0["timestamp"])
-            intro_0.append(get_wikipedia_introduction(info_0["revid"]))
-            # Append data for 10th revision before current
-            info_10 = extract_revision_info(json_data, 10, limit_revnum=False)
-            revid_10.append(info_10["revid"])
-            ts_10.append(info_10["timestamp"])
-            intro_10.append(get_wikipedia_introduction(info_10["revid"]))
-            # Append data for 100th revision before current
-            info_100 = extract_revision_info(json_data, 100, limit_revnum=False)
-            revid_100.append(info_100["revid"])
-            ts_100.append(info_100["timestamp"])
-            intro_100.append(get_wikipedia_introduction(info_100["revid"]))
-            # Write the CSV in each loop in case we need to restart after an error
-            # Combine the lists
-            # fmt: off
-            export_data = zip(
-                title, revid_0, revid_10, revid_100,
-                ts_0, ts_10, ts_100, intro_0, intro_10, intro_100,
-            )
-            column_names = [
-                "title", "revid_0", "revid_10", "revid_100",
-                "ts_0", "ts_10", "ts_100",
-                "intro_0", "intro_10", "intro_100",
-            ]
-            # fmt: on
-            with open(
-                "data/wikipedia_introductions.csv", "w", newline="", encoding="utf-8"
-            ) as myfile:
-                wr = csv.writer(myfile)
-                # Write a header row
-                wr.writerow(column_names)
-                # Write the combined data rows
-                wr.writerows(export_data)
-            # Rate limit our API calls
-            time.sleep(5)

create_examples.py DELETED Viewed

@@ -1,69 +0,0 @@
-import pandas as pd
-from models import classifier
-def run_classifier(row):
-    """
-    Run the model on one row of data from 'data/wikipedia_introductions.csv'.
-    The model is run up to four times: two prompt styles (heuristic and few-shot)
-    and two revision intervals (from 10th and 100th previous revisions to current).
-    Usage:
-    df = pd.read_csv("data/wikipedia_introductions.csv")
-    row = df.iloc[38]
-    run_classifier(row)
-    """
-    # Initialize output dict
-    output = {}
-    output["heuristic_10"] = classifier(row["intro_10"], row["intro_0"], "heuristic")
-    output["few-shot_10"] = classifier(row["intro_10"], row["intro_0"], "few-shot")
-    output["heuristic_100"] = classifier(row["intro_100"], row["intro_0"], "heuristic")
-    output["few-shot_100"] = classifier(row["intro_100"], row["intro_0"], "few-shot")
-    return output
-if __name__ == "__main__":
-    """
-    Run the classifier on all rows from 'data/wikipedia_introductions.csv' and save results in 'data/examples.csv'.
-    """
-    # Read the data
-    df = pd.read_csv("data/wikipedia_introductions.csv")
-    # For reference: Find row indices with at least one missing value
-    # missing_rows = df.index[df.isnull().any(axis=1)].tolist()
-    # print("\nRow indices with missing values:", missing_rows)
-    # Initialize output data frame
-    df_out = None
-    for index, row in df.iterrows():
-        # Print the title to see progress
-        print(row["title"])
-        # Run classifier
-        output = run_classifier(row)
-        print(output)
-        # Create column names and row for data frame
-        column_names = [
-            outer_k + "_" + inner_k
-            for outer_k in output.keys()
-            for inner_k in output[outer_k].keys()
-        ]
-        row_values = [
-            inner_v for outer_k in output.keys() for inner_v in output[outer_k].values()
-        ]
-        # Add title to output
-        column_names = ["title"] + column_names
-        row_values = [row["title"]] + row_values
-        df_row = pd.DataFrame([row_values], columns=column_names)
-        if df_out is None:
-            df_out = df_row
-        else:
-            df_out = pd.concat([df_out, df_row])
-        # Write CSV in every loop to avoid data loss if errors occur
-        df_out.to_csv("data/examples.csv", index=False, encoding="utf-8")

{data → development}/alignment_fewshot.txt RENAMED Viewed

File without changes

{data → development}/alignment_heuristic.txt RENAMED Viewed

File without changes

judge_disagreements.py DELETED Viewed

@@ -1,59 +0,0 @@
-import sys
-import pandas as pd
-from models import judge
-if __name__ == "__main__":
-    """
-    Run the judge on all rows from 'data/disagreements_for_AI.csv' and save results in 'data/AI_judgments_unaligned.csv'.
-    """
-    # Read the data
-    df = pd.read_csv("data/disagreements_for_AI.csv")
-    # Add empty columns for AI judgments
-    df["noteworthy"] = None
-    df["reasoning"] = None
-    # We run the unaligned judge unless the script is called with --aligned-fewshot or --aligned--heuristic
-    mode = "unaligned"
-    outfile = "data/AI_judgments_unaligned.csv"
-    # Check if an argument was passed
-    if len(sys.argv) > 1:
-        # sys.argv[0] is the script name, sys.argv[1] is the first argument
-        argument = sys.argv[1]
-        if argument == "--aligned-fewshot":
-            mode = "aligned-fewshot"
-            outfile = "data/AI_judgments_fewshot.csv"
-        elif argument == "--aligned-heuristic":
-            mode = "aligned-heuristic"
-            outfile = "data/AI_judgments_heuristic.csv"
-        else:
-            raise ValueError(f"Unknown argument: {argument}")
-    print(f"Saving judgments to {outfile}")
-    for index, row in df.iterrows():
-        # Change this if needed (to restart after errors)
-        if index < 0:
-            next
-        else:
-            # Print the title to see progress
-            print(row["title"])
-            # Run judge
-            try:
-                output = judge(
-                    df.iloc[index]["old_revision"],
-                    df.iloc[index]["new_revision"],
-                    df.iloc[index]["heuristic_rationale"],
-                    df.iloc[index]["few-shot_rationale"],
-                    mode=mode,
-                )
-            except:
-                output = {"noteworthy": None, "reasoning": None}
-            print(output)
-            # Update data frame
-            df.at[index, "noteworthy"] = output["noteworthy"]
-            df.at[index, "reasoning"] = output["reasoning"]
-            # Write CSV in every loop to avoid data loss if errors occur
-            df.to_csv(outfile, index=False, encoding="utf-8")

models.py CHANGED Viewed

@@ -99,11 +99,11 @@ def judge(old_revision, new_revision, rationale_1, rationale_2, mode="unaligned"
     if mode == "unaligned":
         alignment_text = ""
     elif mode == "aligned-fewshot":
-        with open("data/alignment_fewshot.txt", "r") as file:
             lines = file.readlines()
             alignment_text = "".join(lines)
     elif mode == "aligned-heuristic":
-        with open("data/alignment_heuristic.txt", "r") as file:
             lines = file.readlines()
             alignment_text = "".join(lines)
     else:

     if mode == "unaligned":
         alignment_text = ""
     elif mode == "aligned-fewshot":
+        with open("development/alignment_fewshot.txt", "r") as file:
             lines = file.readlines()
             alignment_text = "".join(lines)
     elif mode == "aligned-heuristic":
+        with open("development/alignment_heuristic.txt", "r") as file:
             lines = file.readlines()
             alignment_text = "".join(lines)
     else:

test_workflows.py DELETED Viewed

@@ -1,30 +0,0 @@
-from workflows import llm_workflow
-def llm_workflow_logic():
-    old_revision = """Kaman-Kalehöyük Archaeological Museum (Turkish: Kaman-Kalehöyük Arkeoloji Müzesi) is an archaeological museum in Kaman District of Kırşehir Province in Turkey. It exhibits artifacts of seven civilizations excavated in the nearby multi-period mound Kaman-Kalehöyük. It was opened in 2010. A Japanese garden is next to the museum building.[1][2]"""
-    new_revision = """The Kaman-Kalehöyük Archaeological Museum (Turkish: Kaman-Kalehöyük Arkeoloji Müzesi) is an archaeological museum in Çağırkan, Kaman District, Kırşehir Province, Turkey. It exhibits artifacts of seven civilizations excavated in the nearby multi-period mound Kaman-Kalehöyük. It opened in 2010. A Japanese garden is next to the museum building.[1][2]"""
-    response = llm_workflow(old_revision, new_revision, "aligned-fewshot")
-    # The judge should responsd with noteworthy: False regardless of the classifier models' responses
-    return response["judge"]["noteworthy"] is False
-# pytest -vv test_workflows.py::test_llm_workflow
-def test_llm_workflow():
-    """Run LLM workflow logic up to 5 times"""
-    current_try = 0
-    max_trys = 5
-    while current_try < max_trys:
-        current_try += 1
-        result = llm_workflow_logic()
-        if result is True:
-            print(f"Try {current_try} succeeded")
-            break
-        else:
-            print(f"Try {current_try} failed")
-    # The actual test for pytest
-    assert result is True

workflows.py DELETED Viewed

@@ -1,23 +0,0 @@
-from models import classifier, judge
-def llm_workflow(old_revision, new_revision, mode="aligned-fewshot"):
-    """
-    Run LLM workflow (input to response)
-    Args:
-        mode: "aligned-fewshot" for few-shot alignment or "aligned-heuristic" for heuristic alignment
-    """
-    # Run classifier and judge models
-    heuristic = classifier(old_revision, new_revision, "heuristic")
-    few_shot = classifier(old_revision, new_revision, "few-shot")
-    judge_response = judge(
-        old_revision,
-        new_revision,
-        heuristic["rationale"],
-        few_shot["rationale"],
-        mode=mode,
-    )
-    return {"heuristic": heuristic, "few-shot": few_shot, "judge": judge_response}