Jasonkim8652's picture
Phase A: integrate LLM judge panel for hybrid scoring
8e08ed6 verified
"""LLM-as-a-Judge scoring for BioDesignBench Tier 2 evaluation.
Provides cross-model LLM judge panels that evaluate subjective dimensions
(approach, orchestration, feasibility, novelty, diversity) while quality
metrics remain 100% algorithmic.
Usage:
from llm_judge import LLMJudgePanel
panel = LLMJudgePanel(agent_model_family="anthropic", dry_run=True)
result = panel.evaluate_sync(
task_description="Design a binder for IL-6R",
tool_call_log=[...],
designed_sequences=["MKVL..."],
algorithmic_metrics={"pLDDT": 82.5},
)
"""
from llm_judge.aggregation import (
WEIGHT_SPLIT,
aggregate_judge_scores,
merge_algo_and_judge_scores,
split_algo_score,
)
from llm_judge.judge import LLMJudge, parse_judge_response
from llm_judge.panel import (
LLMJudgePanel,
detect_agent_family,
get_judge_models,
)
from llm_judge.rubrics import (
JUDGE_DIMENSIONS,
JUDGE_SYSTEM_PROMPT,
build_judge_prompt,
)
__all__ = [
"LLMJudge",
"LLMJudgePanel",
"JUDGE_DIMENSIONS",
"JUDGE_SYSTEM_PROMPT",
"WEIGHT_SPLIT",
"aggregate_judge_scores",
"build_judge_prompt",
"detect_agent_family",
"get_judge_models",
"merge_algo_and_judge_scores",
"parse_judge_response",
"split_algo_score",
]