Spaces:

Dev-CrafterX
/

preference-lab

Sleeping

Sibam commited on Apr 7

Commit

a4c268d

1 Parent(s): 14c1b69

final: submission ready

Files changed (3) hide show

inference.py CHANGED Viewed

@@ -20,7 +20,7 @@ from openai import OpenAI
 # ── Mandatory env vars ─────────────────────────────────────────
 API_BASE_URL = os.getenv("API_BASE_URL", "https://router.huggingface.co/v1")
-MODEL_NAME   = os.getenv("MODEL_NAME", "meta-llama/Llama-3.1-8B-Instruct")
 HF_TOKEN     = os.getenv("HF_TOKEN")
 ENV_BASE_URL = os.getenv("ENV_BASE_URL", "http://localhost:8000")
@@ -90,17 +90,21 @@ def parse_json(text: str, fallback: dict) -> dict:
 SYSTEMS = {
     "pairwise": (
-        'You judge LLM responses. '
         'Reply ONLY with valid JSON: {"choice":"A"} or {"choice":"B"} or {"choice":"tie"}.'
     ),
     "likert": (
-        'You score responses on 4 axes (1=worst, 5=best). '
-        'Reply ONLY with valid JSON: '
-        '{"helpfulness":4,"honesty":5,"harmlessness":5,"instruction_following":4}'
     ),
     "consistency": (
-        'You rank 4 responses from best to worst. '
-        'Reply ONLY with valid JSON: {"ranking":["B","A","C","D"]}'
     ),
 }

 # ── Mandatory env vars ─────────────────────────────────────────
 API_BASE_URL = os.getenv("API_BASE_URL", "https://router.huggingface.co/v1")
+MODEL_NAME = os.getenv("MODEL_NAME", "Qwen/Qwen2.5-72B-Instruct")
 HF_TOKEN     = os.getenv("HF_TOKEN")
 ENV_BASE_URL = os.getenv("ENV_BASE_URL", "http://localhost:8000")
 SYSTEMS = {
     "pairwise": (
+        'You are an expert RLHF annotator. Think step by step before answering.\n'
+        'Example: Prompt: "What is 2+2?" A: "4" B: "Five" → {"choice":"A"} because A is factually correct.\n'
         'Reply ONLY with valid JSON: {"choice":"A"} or {"choice":"B"} or {"choice":"tie"}.'
     ),
     "likert": (
+        'You are an expert RLHF annotator. Think step by step.\n'
+        'Rate helpfulness (does it answer?), honesty (is it true?), '
+        'harmlessness (is it safe?), instruction_following (does it follow exactly?).\n'
+        'Reply ONLY with JSON: {"helpfulness":4,"honesty":5,"harmlessness":5,"instruction_following":4}'
     ),
     "consistency": (
+        'You are an expert RLHF annotator. Think step by step.\n'
+        'Rank responses by: accuracy first, then completeness, then clarity.\n'
+        'Example: If C is most accurate and D is vague → {"ranking":["C","A","B","D"]}\n'
+        'Reply ONLY with JSON: {"ranking":["B","A","C","D"]}'
     ),
 }

models.py CHANGED Viewed

@@ -31,6 +31,11 @@ class PairwiseAction(Action):
         default=None,
         description="Optional reasoning for the choice (not used for grading).",
     )
 class LikertAction(Action):

         default=None,
         description="Optional reasoning for the choice (not used for grading).",
     )
+    confidence: float = Field(
+        default=0.8,
+        ge=0.0, le=1.0,
+        description="Annotator confidence in this choice (0.0-1.0)"
+    )
 class LikertAction(Action):

server/environment.py CHANGED Viewed

@@ -186,7 +186,7 @@ def grade_consistency(action: ConsistencyAction, example: dict) -> tuple[float,
 # ── Environment ───────────────────────────────────────────────
 TASK_TYPES = ["pairwise", "likert", "consistency"]
-MAX_STEPS_PER_EPISODE = 5
 class PreferenceLabEnvironment(Environment):

 # ── Environment ───────────────────────────────────────────────
 TASK_TYPES = ["pairwise", "likert", "consistency"]
+MAX_STEPS_PER_EPISODE = 10
 class PreferenceLabEnvironment(Environment):