Eric Xu commited on
Commit
c5a1119
·
unverified ·
1 Parent(s): 8a25fc1

Two-model approach: 7B for setup calls, 72B for evaluation

Browse files

Setup calls (infer-spec, suggest-segments, suggest-changes, extract-filters)
use Qwen2.5-7B-Instruct (~0.5s each). Evaluation and counterfactual probes
use the main model (Qwen2.5-72B-Instruct, ~5s each) where quality matters.

Files changed (1) hide show
  1. web/app.py +12 -4
web/app.py CHANGED
@@ -132,6 +132,11 @@ def get_model(model=None):
132
  return model or os.getenv("LLM_MODEL_NAME", "openai/gpt-4o-mini")
133
 
134
 
 
 
 
 
 
135
  IS_SPACES = bool(os.getenv("SPACE_ID"))
136
 
137
 
@@ -329,7 +334,8 @@ class InferSpecInput(BaseModel):
329
  async def infer_spec(input: InferSpecInput, request: Request):
330
  """Infer goal and audience from entity text."""
331
  log.info(f"Infer spec ({request.client.host})")
332
- client, model = llm_from_request(request)
 
333
 
334
  prompt = f"""Read this entity and infer two things:
335
  1. What is the most likely GOAL the author has? (what outcome they want)
@@ -371,7 +377,8 @@ Be specific to THIS entity, not generic."""
371
  async def suggest_changes(input: SuggestChangesInput, request: Request):
372
  """Generate candidate changes from evaluation concerns and goal."""
373
  log.info(f"Suggest changes ({len(input.concerns)} concerns)")
374
- client, model = llm_from_request(request)
 
375
 
376
  concerns_text = "\n".join(f"- {c}" for c in input.concerns[:15])
377
  prompt = f"""Based on these evaluation results, suggest 3-5 specific, actionable changes.
@@ -414,7 +421,8 @@ Return JSON:
414
  async def suggest_segments(input: SuggestSegmentsInput, request: Request):
415
  """Use LLM to suggest audience segments based on entity and context."""
416
  log.info("Suggest segments")
417
- client, model = llm_from_request(request)
 
418
 
419
  prompt = f"""Given this entity and audience context, suggest 4-5 evaluator segments.
420
  Each segment should represent a distinct perspective that would evaluate this entity differently.
@@ -510,7 +518,7 @@ async def generate_cohort_endpoint(config: CohortConfig, request: Request):
510
 
511
  # Extract structured filters from audience context
512
  client, model = llm_from_request(request)
513
- filters = extract_filters(client, model, config.audience_context, config.description)
514
  print(f"Nemotron filters from audience context: {filters}")
515
 
516
  filtered = pl.filter_personas(ds, filters, limit=max(total * 20, 2000))
 
132
  return model or os.getenv("LLM_MODEL_NAME", "openai/gpt-4o-mini")
133
 
134
 
135
+ def get_fast_model():
136
+ """Smaller model for cheap setup calls (infer-spec, segments, filters, changes)."""
137
+ return os.getenv("LLM_FAST_MODEL", "Qwen/Qwen2.5-7B-Instruct")
138
+
139
+
140
  IS_SPACES = bool(os.getenv("SPACE_ID"))
141
 
142
 
 
334
  async def infer_spec(input: InferSpecInput, request: Request):
335
  """Infer goal and audience from entity text."""
336
  log.info(f"Infer spec ({request.client.host})")
337
+ client, _ = llm_from_request(request)
338
+ model = get_fast_model()
339
 
340
  prompt = f"""Read this entity and infer two things:
341
  1. What is the most likely GOAL the author has? (what outcome they want)
 
377
  async def suggest_changes(input: SuggestChangesInput, request: Request):
378
  """Generate candidate changes from evaluation concerns and goal."""
379
  log.info(f"Suggest changes ({len(input.concerns)} concerns)")
380
+ client, _ = llm_from_request(request)
381
+ model = get_fast_model()
382
 
383
  concerns_text = "\n".join(f"- {c}" for c in input.concerns[:15])
384
  prompt = f"""Based on these evaluation results, suggest 3-5 specific, actionable changes.
 
421
  async def suggest_segments(input: SuggestSegmentsInput, request: Request):
422
  """Use LLM to suggest audience segments based on entity and context."""
423
  log.info("Suggest segments")
424
+ client, _ = llm_from_request(request)
425
+ model = get_fast_model()
426
 
427
  prompt = f"""Given this entity and audience context, suggest 4-5 evaluator segments.
428
  Each segment should represent a distinct perspective that would evaluate this entity differently.
 
518
 
519
  # Extract structured filters from audience context
520
  client, model = llm_from_request(request)
521
+ filters = extract_filters(client, get_fast_model(), config.audience_context, config.description)
522
  print(f"Nemotron filters from audience context: {filters}")
523
 
524
  filtered = pl.filter_personas(ds, filters, limit=max(total * 20, 2000))