jedick
Initial commit
48c27bb
# Noteworthy Differences:
# Classification of noteworthy differences between revisions of Wikipedia articles: an AI alignment project
# 20251114 jmd version 1
from google import genai
from google.genai import types
from pydantic import BaseModel
from dotenv import load_dotenv
import json
import os
import pandas as pd
from prompts import analyzer_prompts, judge_prompt
from retry_with_backoff import retry_with_backoff
import logfire
# Load API keys
load_dotenv()
# Setup Logfire
logfire.configure()
# This wraps Google Gen AI client calls
# to capture prompts, responses, and metadata
logfire.instrument_google_genai()
# Initialize the Gemini LLM
client = genai.Client()
@retry_with_backoff()
def classifier(old_revision, new_revision, prompt_style):
"""
Classify noteworthy differences between revisions of a Wikipedia article
Args:
old_revision: Old revision of article
new_revision: New revision of article
Returns:
noteworthy: True if the differences are noteworthy; False if not
rationale: One-sentence rational for the classification
"""
# Return None for missing revisions
if not pd.notna(old_revision) or not pd.notna(new_revision):
return {"noteworthy": None, "rationale": None}
# Get prompt template for given style
prompt_template = analyzer_prompts[prompt_style]
# Add article revisions to prompt
prompt = prompt_template.replace("{{old_revision}}", old_revision).replace(
"{{new_revision}}", new_revision
)
# Define response schema
class Response(BaseModel):
noteworthy: bool
rationale: str
# Generate response
response = client.models.generate_content(
model="gemini-2.5-flash",
contents=prompt,
config=types.GenerateContentConfig(
response_mime_type="application/json",
response_schema=Response.model_json_schema(),
),
)
return json.loads(response.text)
@retry_with_backoff()
def judge(old_revision, new_revision, rationale_1, rationale_2, mode="unaligned"):
"""
AI judge to settle disagreements between classification models
Args:
old_revision: Old revision of article
new_revision: New revision of article
rationale_1: Rationale provided by model 1 (i.e., heuristic prompt)
rationale_2: Rationale provided by model 2 (i.e., few-shot prompt)
mode: Prompt mode: unaligned, aligned-fewshot, or aligned-heuristic
Returns:
noteworthy: True if the differences are noteworthy; False if not
reasoning: One-sentence reason for the judgment
"""
prompt = judge_prompt
# Add article revisions to prompt
prompt = prompt.replace("{{old_revision}}", old_revision).replace(
"{{new_revision}}", new_revision
)
# Add rationales to prompt
prompt = prompt.replace("{{model_1_rationale}}", rationale_1).replace(
"{{model_2_rationale}}", rationale_2
)
# Optionally add alignment text to prompt
if mode == "unaligned":
alignment_text = ""
elif mode == "aligned-fewshot":
with open("data/alignment_fewshot.txt", "r") as file:
lines = file.readlines()
alignment_text = "".join(lines)
elif mode == "aligned-heuristic":
with open("data/alignment_heuristic.txt", "r") as file:
lines = file.readlines()
alignment_text = "".join(lines)
else:
raise ValueError(f"Unknown mode: {mode}")
prompt = prompt.replace("{{alignment_text}}", alignment_text)
# Define response schema
class Response(BaseModel):
noteworthy: bool
reasoning: str
# Generate response
response = client.models.generate_content(
model="gemini-2.5-flash",
contents=prompt,
config=types.GenerateContentConfig(
response_mime_type="application/json",
response_schema=Response.model_json_schema(),
),
)
return json.loads(response.text)