Spaces:
Sleeping
Sleeping
jedick
commited on
Commit
·
8ae98a8
1
Parent(s):
63e2c58
Add alignment 4
Browse files- evaluate.py +4 -1
- production/alignment_4.txt +72 -0
- update_alignment.py +4 -4
evaluate.py
CHANGED
|
@@ -28,6 +28,7 @@ def select_round(dataset, split, round=None):
|
|
| 28 |
[None, None],
|
| 29 |
["2025-12-19T13:29:42", "2025-12-20T07:25:12"],
|
| 30 |
["2025-12-23T01:20:55", "2025-12-23T06:39:43"],
|
|
|
|
| 31 |
]
|
| 32 |
# If no round is specified, use the most recent one
|
| 33 |
if round is None:
|
|
@@ -107,8 +108,10 @@ def get_evalset(round=None):
|
|
| 107 |
index, _ = select_round(dataset, "test", round)
|
| 108 |
# Convert to DataFrame
|
| 109 |
df = dataset.to_pandas()
|
| 110 |
-
# Use only
|
| 111 |
df = df.iloc[index]
|
|
|
|
|
|
|
| 112 |
# Reset the index after subsetting
|
| 113 |
df.reset_index(drop=True, inplace=True)
|
| 114 |
# Construct y list (ground truth)
|
|
|
|
| 28 |
[None, None],
|
| 29 |
["2025-12-19T13:29:42", "2025-12-20T07:25:12"],
|
| 30 |
["2025-12-23T01:20:55", "2025-12-23T06:39:43"],
|
| 31 |
+
["2025-12-25T03:46:46", "2025-12-25T07:38:35"],
|
| 32 |
]
|
| 33 |
# If no round is specified, use the most recent one
|
| 34 |
if round is None:
|
|
|
|
| 108 |
index, _ = select_round(dataset, "test", round)
|
| 109 |
# Convert to DataFrame
|
| 110 |
df = dataset.to_pandas()
|
| 111 |
+
# Use only the examples in the selected round
|
| 112 |
df = df.iloc[index]
|
| 113 |
+
# Drop rows with None for judge_noteworthy
|
| 114 |
+
df = df.dropna(subset=["judge_noteworthy"])
|
| 115 |
# Reset the index after subsetting
|
| 116 |
df.reset_index(drop=True, inplace=True)
|
| 117 |
# Construct y list (ground truth)
|
production/alignment_4.txt
ADDED
|
@@ -0,0 +1,72 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
## Noteworthy Changes (True)
|
| 2 |
+
|
| 3 |
+
These changes introduce fundamentally essential, previously unstated, or critically altered information that directly impacts the core identity, primary function, or crucial biographical/historical understanding of a *person* or *work*. Such changes are *extremely rare*, representing approximately 14% of revisions according to human preferences.
|
| 4 |
+
|
| 5 |
+
* **Fundamental Biographical Details for Individuals:**
|
| 6 |
+
* Adding a specific playing position for an individual (e.g., a footballer's specific position).
|
| 7 |
+
* Adding a precise birth date for an individual (month and day, where only the year was previously stated).
|
| 8 |
+
* Adding a precise death date for an individual (month and day, where only the year was previously stated).
|
| 9 |
+
* **Fundamental Status Updates:**
|
| 10 |
+
* Updating an entity's or individual's fundamental status (e.g., a film changing from "upcoming" to "was theatrically released"; a person's role changing from 'footballer' to 'former footballer').
|
| 11 |
+
* **Core Understanding of a Work:**
|
| 12 |
+
* Adding significant contextual information about a *work's* fundamental themes or purpose (e.g., a book's specific purpose and theme).
|
| 13 |
+
|
| 14 |
+
## Not Noteworthy Changes (False)
|
| 15 |
+
|
| 16 |
+
These changes are typically minor stylistic adjustments, rephrasing, additions of easily inferable or minor contextual details, or information that, while adding depth, does not alter the fundamental understanding, core identity, or crucial biographical information of the subject. This category represents the vast majority (approximately 86%) of revisions according to human preferences.
|
| 17 |
+
|
| 18 |
+
* **Minor Stylistic, Grammar, or Formatting Adjustments:**
|
| 19 |
+
* Minor stylistic corrections, grammar adjustments, capitalization changes, or punctuation changes.
|
| 20 |
+
* Rephrasing, structural reorganization, or moving content that does not introduce new fundamental information or alter the core meaning.
|
| 21 |
+
* Fixing broken language templates or clarifying official names/abbreviations.
|
| 22 |
+
* Changes in distance formatting, units, or adding metric equivalents.
|
| 23 |
+
* Correction of units for a quantifiable trait (e.g., 'cm' to 'mm').
|
| 24 |
+
* Correcting placeholder text or minor formatting elements.
|
| 25 |
+
* Replacing templates with equivalent content, changing abbreviations, or repositioning references without altering factual content or meaning.
|
| 26 |
+
* Minor formatting adjustments (e.g., hyphens, spaces in date ranges).
|
| 27 |
+
* **Linguistic, Cultural, and Etymological Details:**
|
| 28 |
+
* Adding or removing names, scripts, or characters in indigenous, native, or other foreign languages (e.g., Hebrew script, Chinese characters, Persian name, Hungarian name) or their literal translations.
|
| 29 |
+
* Adding any form of pronunciation (e.g., IPA, romanization, Latin American Spanish pronunciation).
|
| 30 |
+
* Adding alternative official or common names for places, entities, or concepts (e.g., "Banqiao Night Market", common English name for a building, scientific binomial name in Latin, Italian title for a sculpture).
|
| 31 |
+
* Adding etymological explanations or linguistic context.
|
| 32 |
+
* Adding full romanized names when a shorter form is present.
|
| 33 |
+
* **Minor Geographical, Locational, or Identifying Specificity:**
|
| 34 |
+
* Adding minor geographical specificity (e.g., a sub-district within a district, specific river banks, broader regional context within a known area).
|
| 35 |
+
* Adding or removing specific routes, roads, or less prominent geographical identifiers (e.g., U.S. Route 50).
|
| 36 |
+
* Adding specific sub-location details (e.g., county) if the primary identifier and broader location (city, state) are still present.
|
| 37 |
+
* Adding precise dates for *events or accidents* (month and day, where only the year was previously stated).
|
| 38 |
+
* **Details about Individuals (beyond strict 'True' criteria):**
|
| 39 |
+
* Adding a single additional profession for an individual (e.g., 'comedian').
|
| 40 |
+
* Adding the number of books authored or specific editorial roles (e.g., 'founding editor').
|
| 41 |
+
* Adding a previously unstated maternal grandfather's name or a middle name if the primary name is already present.
|
| 42 |
+
* Refining terminology for ethnicity or identity (e.g., 'American Indian woman Lakota' to 'Lakota woman' and 'Indian' to 'Native').
|
| 43 |
+
* Adding biographical details such as nationality, specific physical attributes (e.g., height), or general career statistics (e.g., matches played) that do not fundamentally alter core identity.
|
| 44 |
+
* **Detailed or Expanded Information (without fundamental change):**
|
| 45 |
+
* Adding specific examples, anecdotes, or detailed explanations that deepen understanding but do not change the fundamental nature or core conclusion of a concept or definition.
|
| 46 |
+
* Adding information that is highly inferable or readily implied from existing text or context.
|
| 47 |
+
* Adding minor example devices or specific qualifications about effectiveness.
|
| 48 |
+
* Adding fundamental biological characteristics (e.g., "diurnal" activity pattern).
|
| 49 |
+
* Adding record-breaking characteristics or superlatives (e.g., "largest member of X family").
|
| 50 |
+
* Adding hydrological data (e.g., basin area) or clarifying downstream connections to other entities.
|
| 51 |
+
* Clarifying distinctions between related concepts (e.g., tax exclusion, tax deduction vs. tax exemption).
|
| 52 |
+
* Adding specific breed names or clarifying primary types for animals (e.g., "Tulim", "beef cattle breed").
|
| 53 |
+
* Expanding an institution's stated scope or mentioning a major collaborating institution.
|
| 54 |
+
* Adding information about a group's accomplishments or historical impact (e.g., specific policies, social spending, poverty reduction) if it does not fundamentally alter the primary understanding of the group's core identity.
|
| 55 |
+
* Adding details such as specific flowering times, an extra access point for a location, or specific duration details for episodes.
|
| 56 |
+
* **Removals:**
|
| 57 |
+
* Removing an adjective that describes an achievement (e.g., "award-winning").
|
| 58 |
+
* Removing a specific detail about literary output if the core biographical details remain largely intact (e.g., "never yet a novel", initial book title).
|
| 59 |
+
* Removing an alternative name, alternative spelling, or specific sub-location detail (e.g., county) if the primary identifier and broader location (city, state) are still present.
|
| 60 |
+
* Removing specific taxonomic clades if the core family classification remains present.
|
| 61 |
+
* Removing authors of a foreword or afterword.
|
| 62 |
+
* Removing specific practical, operational, or contextual information (e.g., postal services details, operational details of a railway, career length).
|
| 63 |
+
* **Corrections and Clarifications (unless they fit strict 'True' criteria):**
|
| 64 |
+
* Correction or clarification of an individual's specific political party affiliation (e.g., 'conservative' to 'Progressive Conservative').
|
| 65 |
+
* Correction or clarification of the full name of a historical political body or an event's official title (e.g., 'Constituent Assembly' to 'National Constituent Assembly', '8th World Badminton Championships' to '1993 IBF World Championships').
|
| 66 |
+
* Changing the primary name or identifier of an individual (e.g., 'Ivane I' to 'John I').
|
| 67 |
+
* More precise description of victims or adding specific significant elements of a scandal (e.g., 'underage women' to 'children and girls', 'client list').
|
| 68 |
+
* Direct factual correction of a major collaborating institution's name.
|
| 69 |
+
* **Administrative & Utility-focused additions:**
|
| 70 |
+
* Adding 'citation needed' tags or similar administrative notes.
|
| 71 |
+
* Adding related names for context that do not fundamentally alter the definition or core identity of the primary subject.
|
| 72 |
+
* Adding a passenger figure that quantifies an already established characteristic (e.g., low usage).
|
update_alignment.py
CHANGED
|
@@ -34,7 +34,7 @@ def update_alignment(round=None):
|
|
| 34 |
# This also gets the number of the most recent round if the argument is None
|
| 35 |
index, round = select_round(dataset, "train", round)
|
| 36 |
examples = df.iloc[index]
|
| 37 |
-
|
| 38 |
# Loop over rows
|
| 39 |
for index, row in examples.iterrows():
|
| 40 |
# Construct training text for this row
|
|
@@ -46,9 +46,9 @@ def update_alignment(round=None):
|
|
| 46 |
judge = f"AI Judge: {row['judge_reasoning']}"
|
| 47 |
human = f"Human feedback: {row['feedback']} ({ground_truth})."
|
| 48 |
row_text = f"{judge} {human}"
|
| 49 |
-
|
| 50 |
|
| 51 |
-
|
| 52 |
|
| 53 |
# Read the existing alignment
|
| 54 |
with open(f"production/alignment_{str(round - 1)}.txt", "r") as file:
|
|
@@ -57,7 +57,7 @@ def update_alignment(round=None):
|
|
| 57 |
|
| 58 |
# Write prompt to update alignment
|
| 59 |
prompt = update_prompt.replace("{{alignment_text}}", alignment_text).replace(
|
| 60 |
-
"{{
|
| 61 |
)
|
| 62 |
|
| 63 |
# Function to generate response
|
|
|
|
| 34 |
# This also gets the number of the most recent round if the argument is None
|
| 35 |
index, round = select_round(dataset, "train", round)
|
| 36 |
examples = df.iloc[index]
|
| 37 |
+
feedback_data = []
|
| 38 |
# Loop over rows
|
| 39 |
for index, row in examples.iterrows():
|
| 40 |
# Construct training text for this row
|
|
|
|
| 46 |
judge = f"AI Judge: {row['judge_reasoning']}"
|
| 47 |
human = f"Human feedback: {row['feedback']} ({ground_truth})."
|
| 48 |
row_text = f"{judge} {human}"
|
| 49 |
+
feedback_data.append(row_text)
|
| 50 |
|
| 51 |
+
feedback_data = "\n\n".join(feedback_data)
|
| 52 |
|
| 53 |
# Read the existing alignment
|
| 54 |
with open(f"production/alignment_{str(round - 1)}.txt", "r") as file:
|
|
|
|
| 57 |
|
| 58 |
# Write prompt to update alignment
|
| 59 |
prompt = update_prompt.replace("{{alignment_text}}", alignment_text).replace(
|
| 60 |
+
"{{feedback_data}}", feedback_data
|
| 61 |
)
|
| 62 |
|
| 63 |
# Function to generate response
|