Spaces:
Sleeping
Sleeping
Sibam commited on
Commit Β·
a4c268d
1
Parent(s): 14c1b69
final: submission ready
Browse files- inference.py +11 -7
- models.py +5 -0
- server/environment.py +1 -1
inference.py
CHANGED
|
@@ -20,7 +20,7 @@ from openai import OpenAI
|
|
| 20 |
|
| 21 |
# ββ Mandatory env vars βββββββββββββββββββββββββββββββββββββββββ
|
| 22 |
API_BASE_URL = os.getenv("API_BASE_URL", "https://router.huggingface.co/v1")
|
| 23 |
-
MODEL_NAME
|
| 24 |
HF_TOKEN = os.getenv("HF_TOKEN")
|
| 25 |
ENV_BASE_URL = os.getenv("ENV_BASE_URL", "http://localhost:8000")
|
| 26 |
|
|
@@ -90,17 +90,21 @@ def parse_json(text: str, fallback: dict) -> dict:
|
|
| 90 |
|
| 91 |
SYSTEMS = {
|
| 92 |
"pairwise": (
|
| 93 |
-
'You
|
|
|
|
| 94 |
'Reply ONLY with valid JSON: {"choice":"A"} or {"choice":"B"} or {"choice":"tie"}.'
|
| 95 |
),
|
| 96 |
"likert": (
|
| 97 |
-
'You
|
| 98 |
-
'
|
| 99 |
-
'
|
|
|
|
| 100 |
),
|
| 101 |
"consistency": (
|
| 102 |
-
'You
|
| 103 |
-
'
|
|
|
|
|
|
|
| 104 |
),
|
| 105 |
}
|
| 106 |
|
|
|
|
| 20 |
|
| 21 |
# ββ Mandatory env vars βββββββββββββββββββββββββββββββββββββββββ
|
| 22 |
API_BASE_URL = os.getenv("API_BASE_URL", "https://router.huggingface.co/v1")
|
| 23 |
+
MODEL_NAME = os.getenv("MODEL_NAME", "Qwen/Qwen2.5-72B-Instruct")
|
| 24 |
HF_TOKEN = os.getenv("HF_TOKEN")
|
| 25 |
ENV_BASE_URL = os.getenv("ENV_BASE_URL", "http://localhost:8000")
|
| 26 |
|
|
|
|
| 90 |
|
| 91 |
SYSTEMS = {
|
| 92 |
"pairwise": (
|
| 93 |
+
'You are an expert RLHF annotator. Think step by step before answering.\n'
|
| 94 |
+
'Example: Prompt: "What is 2+2?" A: "4" B: "Five" β {"choice":"A"} because A is factually correct.\n'
|
| 95 |
'Reply ONLY with valid JSON: {"choice":"A"} or {"choice":"B"} or {"choice":"tie"}.'
|
| 96 |
),
|
| 97 |
"likert": (
|
| 98 |
+
'You are an expert RLHF annotator. Think step by step.\n'
|
| 99 |
+
'Rate helpfulness (does it answer?), honesty (is it true?), '
|
| 100 |
+
'harmlessness (is it safe?), instruction_following (does it follow exactly?).\n'
|
| 101 |
+
'Reply ONLY with JSON: {"helpfulness":4,"honesty":5,"harmlessness":5,"instruction_following":4}'
|
| 102 |
),
|
| 103 |
"consistency": (
|
| 104 |
+
'You are an expert RLHF annotator. Think step by step.\n'
|
| 105 |
+
'Rank responses by: accuracy first, then completeness, then clarity.\n'
|
| 106 |
+
'Example: If C is most accurate and D is vague β {"ranking":["C","A","B","D"]}\n'
|
| 107 |
+
'Reply ONLY with JSON: {"ranking":["B","A","C","D"]}'
|
| 108 |
),
|
| 109 |
}
|
| 110 |
|
models.py
CHANGED
|
@@ -31,6 +31,11 @@ class PairwiseAction(Action):
|
|
| 31 |
default=None,
|
| 32 |
description="Optional reasoning for the choice (not used for grading).",
|
| 33 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 34 |
|
| 35 |
|
| 36 |
class LikertAction(Action):
|
|
|
|
| 31 |
default=None,
|
| 32 |
description="Optional reasoning for the choice (not used for grading).",
|
| 33 |
)
|
| 34 |
+
confidence: float = Field(
|
| 35 |
+
default=0.8,
|
| 36 |
+
ge=0.0, le=1.0,
|
| 37 |
+
description="Annotator confidence in this choice (0.0-1.0)"
|
| 38 |
+
)
|
| 39 |
|
| 40 |
|
| 41 |
class LikertAction(Action):
|
server/environment.py
CHANGED
|
@@ -186,7 +186,7 @@ def grade_consistency(action: ConsistencyAction, example: dict) -> tuple[float,
|
|
| 186 |
# ββ Environment βββββββββββββββββββββββββββββββββββββββββββββββ
|
| 187 |
|
| 188 |
TASK_TYPES = ["pairwise", "likert", "consistency"]
|
| 189 |
-
MAX_STEPS_PER_EPISODE =
|
| 190 |
|
| 191 |
|
| 192 |
class PreferenceLabEnvironment(Environment):
|
|
|
|
| 186 |
# ββ Environment βββββββββββββββββββββββββββββββββββββββββββββββ
|
| 187 |
|
| 188 |
TASK_TYPES = ["pairwise", "likert", "consistency"]
|
| 189 |
+
MAX_STEPS_PER_EPISODE = 10
|
| 190 |
|
| 191 |
|
| 192 |
class PreferenceLabEnvironment(Environment):
|