Eric Xu commited on
Two-model approach: 7B for setup calls, 72B for evaluation
Browse filesSetup calls (infer-spec, suggest-segments, suggest-changes, extract-filters)
use Qwen2.5-7B-Instruct (~0.5s each). Evaluation and counterfactual probes
use the main model (Qwen2.5-72B-Instruct, ~5s each) where quality matters.
- web/app.py +12 -4
web/app.py
CHANGED
|
@@ -132,6 +132,11 @@ def get_model(model=None):
|
|
| 132 |
return model or os.getenv("LLM_MODEL_NAME", "openai/gpt-4o-mini")
|
| 133 |
|
| 134 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 135 |
IS_SPACES = bool(os.getenv("SPACE_ID"))
|
| 136 |
|
| 137 |
|
|
@@ -329,7 +334,8 @@ class InferSpecInput(BaseModel):
|
|
| 329 |
async def infer_spec(input: InferSpecInput, request: Request):
|
| 330 |
"""Infer goal and audience from entity text."""
|
| 331 |
log.info(f"Infer spec ({request.client.host})")
|
| 332 |
-
client,
|
|
|
|
| 333 |
|
| 334 |
prompt = f"""Read this entity and infer two things:
|
| 335 |
1. What is the most likely GOAL the author has? (what outcome they want)
|
|
@@ -371,7 +377,8 @@ Be specific to THIS entity, not generic."""
|
|
| 371 |
async def suggest_changes(input: SuggestChangesInput, request: Request):
|
| 372 |
"""Generate candidate changes from evaluation concerns and goal."""
|
| 373 |
log.info(f"Suggest changes ({len(input.concerns)} concerns)")
|
| 374 |
-
client,
|
|
|
|
| 375 |
|
| 376 |
concerns_text = "\n".join(f"- {c}" for c in input.concerns[:15])
|
| 377 |
prompt = f"""Based on these evaluation results, suggest 3-5 specific, actionable changes.
|
|
@@ -414,7 +421,8 @@ Return JSON:
|
|
| 414 |
async def suggest_segments(input: SuggestSegmentsInput, request: Request):
|
| 415 |
"""Use LLM to suggest audience segments based on entity and context."""
|
| 416 |
log.info("Suggest segments")
|
| 417 |
-
client,
|
|
|
|
| 418 |
|
| 419 |
prompt = f"""Given this entity and audience context, suggest 4-5 evaluator segments.
|
| 420 |
Each segment should represent a distinct perspective that would evaluate this entity differently.
|
|
@@ -510,7 +518,7 @@ async def generate_cohort_endpoint(config: CohortConfig, request: Request):
|
|
| 510 |
|
| 511 |
# Extract structured filters from audience context
|
| 512 |
client, model = llm_from_request(request)
|
| 513 |
-
filters = extract_filters(client,
|
| 514 |
print(f"Nemotron filters from audience context: {filters}")
|
| 515 |
|
| 516 |
filtered = pl.filter_personas(ds, filters, limit=max(total * 20, 2000))
|
|
|
|
| 132 |
return model or os.getenv("LLM_MODEL_NAME", "openai/gpt-4o-mini")
|
| 133 |
|
| 134 |
|
| 135 |
+
def get_fast_model():
|
| 136 |
+
"""Smaller model for cheap setup calls (infer-spec, segments, filters, changes)."""
|
| 137 |
+
return os.getenv("LLM_FAST_MODEL", "Qwen/Qwen2.5-7B-Instruct")
|
| 138 |
+
|
| 139 |
+
|
| 140 |
IS_SPACES = bool(os.getenv("SPACE_ID"))
|
| 141 |
|
| 142 |
|
|
|
|
| 334 |
async def infer_spec(input: InferSpecInput, request: Request):
|
| 335 |
"""Infer goal and audience from entity text."""
|
| 336 |
log.info(f"Infer spec ({request.client.host})")
|
| 337 |
+
client, _ = llm_from_request(request)
|
| 338 |
+
model = get_fast_model()
|
| 339 |
|
| 340 |
prompt = f"""Read this entity and infer two things:
|
| 341 |
1. What is the most likely GOAL the author has? (what outcome they want)
|
|
|
|
| 377 |
async def suggest_changes(input: SuggestChangesInput, request: Request):
|
| 378 |
"""Generate candidate changes from evaluation concerns and goal."""
|
| 379 |
log.info(f"Suggest changes ({len(input.concerns)} concerns)")
|
| 380 |
+
client, _ = llm_from_request(request)
|
| 381 |
+
model = get_fast_model()
|
| 382 |
|
| 383 |
concerns_text = "\n".join(f"- {c}" for c in input.concerns[:15])
|
| 384 |
prompt = f"""Based on these evaluation results, suggest 3-5 specific, actionable changes.
|
|
|
|
| 421 |
async def suggest_segments(input: SuggestSegmentsInput, request: Request):
|
| 422 |
"""Use LLM to suggest audience segments based on entity and context."""
|
| 423 |
log.info("Suggest segments")
|
| 424 |
+
client, _ = llm_from_request(request)
|
| 425 |
+
model = get_fast_model()
|
| 426 |
|
| 427 |
prompt = f"""Given this entity and audience context, suggest 4-5 evaluator segments.
|
| 428 |
Each segment should represent a distinct perspective that would evaluate this entity differently.
|
|
|
|
| 518 |
|
| 519 |
# Extract structured filters from audience context
|
| 520 |
client, model = llm_from_request(request)
|
| 521 |
+
filters = extract_filters(client, get_fast_model(), config.audience_context, config.description)
|
| 522 |
print(f"Nemotron filters from audience context: {filters}")
|
| 523 |
|
| 524 |
filtered = pl.filter_personas(ds, filters, limit=max(total * 20, 2000))
|