wenjiao commited on
Commit
4802f1a
Β·
1 Parent(s): f5ea11f

fix init Evaluation queue count + add quant description + update overhead_factor params

Browse files
app.py CHANGED
@@ -1012,6 +1012,16 @@ with demo:
1012
  _quant_submit_evt.then(fn=refresh_queue_tables, inputs=_REFRESH_INPUTS, outputs=_REFRESH_OUTPUTS)
1013
  _eval_submit_evt.then(fn=refresh_queue_tables, inputs=_REFRESH_INPUTS, outputs=_REFRESH_OUTPUTS)
1014
 
 
 
 
 
 
 
 
 
 
 
1015
  # Checkbox filters β€” instantly re-filter the queue tables
1016
  my_submissions_quant_cb.change(
1017
  fn=refresh_queue_tables,
 
1012
  _quant_submit_evt.then(fn=refresh_queue_tables, inputs=_REFRESH_INPUTS, outputs=_REFRESH_OUTPUTS)
1013
  _eval_submit_evt.then(fn=refresh_queue_tables, inputs=_REFRESH_INPUTS, outputs=_REFRESH_OUTPUTS)
1014
 
1015
+ # Refresh queue tables on every page load so the accordion counts reflect
1016
+ # the current state of disk β€” not the (possibly stale) values captured at
1017
+ # module-load time. Without this, the labels show whatever count was true
1018
+ # when the server started, until the 60 s timer fires for the first time.
1019
+ demo.load(
1020
+ fn=refresh_queue_tables,
1021
+ inputs=_REFRESH_INPUTS,
1022
+ outputs=_REFRESH_OUTPUTS,
1023
+ )
1024
+
1025
  # Checkbox filters β€” instantly re-filter the queue tables
1026
  my_submissions_quant_cb.change(
1027
  fn=refresh_queue_tables,
src/app_helpers/assets.py CHANGED
@@ -431,10 +431,12 @@ SIDEBAR_HEAD = r"""
431
  quant: {
432
  title: 'Quantization',
433
  subtitle: 'Submit a source model and quantization scheme to run managed low-bit quantization jobs on the hosted pipeline',
 
434
  badges: [
435
  { icon: 'πŸ†', text: 'Open Source Model' },
436
  { icon: '⚑', text: 'INT4 (W4A16) / MXFP4 / NVFP4' },
437
  { icon: 'πŸ“Š', text: 'Open Source Benchmark' },
 
438
  { icon: 'πŸ€–', text: 'Agentic AI for AutoQuant' }
439
  ]
440
  },
@@ -494,10 +496,20 @@ SIDEBAR_HEAD = r"""
494
  function syncHero(tid) {
495
  var heroTitle = document.getElementById('hero-title');
496
  var heroSubtitle = document.getElementById('hero-subtitle');
 
497
  var heroBadges = document.getElementById('hero-badges');
498
  var copy = HERO_COPY[tid] || HERO_COPY['pipeline-results'];
499
  if (heroTitle) heroTitle.textContent = copy.title;
500
  if (heroSubtitle) heroSubtitle.textContent = copy.subtitle;
 
 
 
 
 
 
 
 
 
501
  if (heroBadges && copy.badges !== undefined) {
502
  heroBadges.innerHTML = copy.badges.map(function(b) {
503
  return '<span class="hero-badge">' + b.icon + ' ' + b.text + '</span>';
 
431
  quant: {
432
  title: 'Quantization',
433
  subtitle: 'Submit a source model and quantization scheme to run managed low-bit quantization jobs on the hosted pipeline',
434
+ note: 'πŸ’‘ <strong>Powered by AutoRound.</strong> Find it useful? <a href="https://github.com/intel/auto-round" target="_blank" rel="noopener">Star intel/auto-round ⭐</a> on GitHub.',
435
  badges: [
436
  { icon: 'πŸ†', text: 'Open Source Model' },
437
  { icon: '⚑', text: 'INT4 (W4A16) / MXFP4 / NVFP4' },
438
  { icon: 'πŸ“Š', text: 'Open Source Benchmark' },
439
+ { icon: '⭐', text: 'AutoRound' },
440
  { icon: 'πŸ€–', text: 'Agentic AI for AutoQuant' }
441
  ]
442
  },
 
496
  function syncHero(tid) {
497
  var heroTitle = document.getElementById('hero-title');
498
  var heroSubtitle = document.getElementById('hero-subtitle');
499
+ var heroNote = document.getElementById('hero-note');
500
  var heroBadges = document.getElementById('hero-badges');
501
  var copy = HERO_COPY[tid] || HERO_COPY['pipeline-results'];
502
  if (heroTitle) heroTitle.textContent = copy.title;
503
  if (heroSubtitle) heroSubtitle.textContent = copy.subtitle;
504
+ if (heroNote) {
505
+ if (copy.note) {
506
+ heroNote.innerHTML = copy.note;
507
+ heroNote.style.display = '';
508
+ } else {
509
+ heroNote.innerHTML = '';
510
+ heroNote.style.display = 'none';
511
+ }
512
+ }
513
  if (heroBadges && copy.badges !== undefined) {
514
  heroBadges.innerHTML = copy.badges.map(function(b) {
515
  return '<span class="hero-badge">' + b.icon + ' ' + b.text + '</span>';
src/display/about.py CHANGED
@@ -8,6 +8,7 @@ TITLE = """
8
  <div class="hero-text" id="hero-copy">
9
  <h1 class="hero-title" id="hero-title">Low-bit LLM Leaderboard</h1>
10
  <p class="hero-subtitle" id="hero-subtitle">Track, compare and benchmark quantized language models across 3 standard evaluation tasks</p>
 
11
  <div class="hero-badges" id="hero-badges">
12
  <span class="hero-badge">πŸ† Open Source Model</span>
13
  <span class="hero-badge">⚑ INT4 (W4A16) / MXFP4 / NVFP4</span>
 
8
  <div class="hero-text" id="hero-copy">
9
  <h1 class="hero-title" id="hero-title">Low-bit LLM Leaderboard</h1>
10
  <p class="hero-subtitle" id="hero-subtitle">Track, compare and benchmark quantized language models across 3 standard evaluation tasks</p>
11
+ <p class="hero-note" id="hero-note" style="display:none;"></p>
12
  <div class="hero-badges" id="hero-badges">
13
  <span class="hero-badge">πŸ† Open Source Model</span>
14
  <span class="hero-badge">⚑ INT4 (W4A16) / MXFP4 / NVFP4</span>
src/display/css_html_js.py CHANGED
@@ -567,6 +567,23 @@ body {
567
  line-height: 1.5;
568
  }
569
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
570
  .hero-badges {
571
  display: flex;
572
  flex-wrap: wrap;
 
567
  line-height: 1.5;
568
  }
569
 
570
+ .hero-note {
571
+ color: rgba(255, 255, 255, 0.92) !important;
572
+ font-size: 0.92rem !important;
573
+ margin: 0 0 14px 0 !important;
574
+ padding: 8px 12px;
575
+ line-height: 1.5;
576
+ background: rgba(255, 255, 255, 0.10);
577
+ border-left: 3px solid #fcd34d;
578
+ border-radius: 6px;
579
+ }
580
+
581
+ .hero-note a {
582
+ color: #fde68a !important;
583
+ text-decoration: underline;
584
+ font-weight: 600;
585
+ }
586
+
587
  .hero-badges {
588
  display: flex;
589
  flex-wrap: wrap;
src/submission/check_validity.py CHANGED
@@ -603,24 +603,24 @@ PRECISION_TO_BITS: dict[str, int] = {
603
  def estimate_weight_memory_gb(
604
  params_b: float,
605
  bits: int,
606
- overhead_factor: float = 2.2,
607
  ) -> float:
608
  """Estimate GPU VRAM (GB) required to load a model with *bits*-bit weights.
609
 
610
  The formula covers only weight storage; *overhead_factor* accounts for KV
611
- cache, activations, and framework buffers (default 2.2 = 120 % overhead).
612
 
613
  Examples
614
  --------
615
- - 7 B W4A16: 7 Γ— 0.5 Γ— 2.2 β‰ˆ 7.7 GB
616
- - 13 B W4A16: 13 Γ— 0.5 Γ— 2.2 β‰ˆ 14.3 GB
617
- - 70 B W4A16: 70 Γ— 0.5 Γ— 2.2 β‰ˆ 77.0 GB
618
 
619
  Args:
620
  params_b: Parameter count in billions.
621
  bits: Weight quantization bits (e.g. 4 for W4A16, 8 for W8A16,
622
  16 for FP16/BF16, 32 for FP32).
623
- overhead_factor: Multiplier for non-weight memory. Default 2.2.
624
 
625
  Returns:
626
  Estimated GPU memory in GB (rounded to 2 decimal places).
 
603
  def estimate_weight_memory_gb(
604
  params_b: float,
605
  bits: int,
606
+ overhead_factor: float = 4.4,
607
  ) -> float:
608
  """Estimate GPU VRAM (GB) required to load a model with *bits*-bit weights.
609
 
610
  The formula covers only weight storage; *overhead_factor* accounts for KV
611
+ cache, activations, and framework buffers (default 4.4 = 340 % overhead).
612
 
613
  Examples
614
  --------
615
+ - 7 B W4A16: 7 Γ— 0.5 Γ— 4.4 β‰ˆ 15.4 GB
616
+ - 13 B W4A16: 13 Γ— 0.5 Γ— 4.4 β‰ˆ 28.6 GB
617
+ - 70 B W4A16: 70 Γ— 0.5 Γ— 4.4 β‰ˆ 154.0 GB
618
 
619
  Args:
620
  params_b: Parameter count in billions.
621
  bits: Weight quantization bits (e.g. 4 for W4A16, 8 for W8A16,
622
  16 for FP16/BF16, 32 for FP32).
623
+ overhead_factor: Multiplier for non-weight memory. Default 4.4.
624
 
625
  Returns:
626
  Estimated GPU memory in GB (rounded to 2 decimal places).
src/submission/submit.py CHANGED
@@ -572,7 +572,7 @@ def add_new_eval(
572
  # ── Step 3: Estimate VRAM ────────────────────────────────────────
573
  bits_for_precision = PRECISION_TO_BITS.get(precision, 4)
574
  estimated_memory_gb = estimate_weight_memory_gb(
575
- params_b=model_params, bits=bits_for_precision, overhead_factor=2.2,
576
  )
577
 
578
 
@@ -846,7 +846,7 @@ def add_new_quant(
846
  # ── Step 4: Estimate VRAM for post-quantization evaluation ───────
847
  output_bits = scheme.bits
848
  eval_memory_gb = estimate_weight_memory_gb(
849
- params_b=model_params, bits=output_bits, overhead_factor=2.2,
850
  )
851
 
852
  eval_gpu_type, eval_gpu_nums = select_gpu_with_override(eval_memory_gb, hardware_override, gpu_count_override)
 
572
  # ── Step 3: Estimate VRAM ────────────────────────────────────────
573
  bits_for_precision = PRECISION_TO_BITS.get(precision, 4)
574
  estimated_memory_gb = estimate_weight_memory_gb(
575
+ params_b=model_params, bits=bits_for_precision, overhead_factor=4.4,
576
  )
577
 
578
 
 
846
  # ── Step 4: Estimate VRAM for post-quantization evaluation ───────
847
  output_bits = scheme.bits
848
  eval_memory_gb = estimate_weight_memory_gb(
849
+ params_b=model_params, bits=output_bits, overhead_factor=4.4,
850
  )
851
 
852
  eval_gpu_type, eval_gpu_nums = select_gpu_with_override(eval_memory_gb, hardware_override, gpu_count_override)
tests/test_submit.py CHANGED
@@ -440,7 +440,7 @@ def test_auto_eval():
440
  print(f"\n--- Step 3: VRAM estimation ---")
441
  if params_b:
442
  bits = PRECISION_TO_BITS.get(precision, 4)
443
- est_mem = estimate_weight_memory_gb(params_b, bits=bits, overhead_factor=2.2)
444
  print(f" bits: {bits}")
445
  print(f" estimated_vram: {est_mem} GB")
446
  else:
@@ -549,7 +549,7 @@ def test_auto_quant():
549
  scheme = SUPPORTED_QUANT_SCHEMES.get(quant_scheme)
550
  eval_mem = None
551
  if params_b and scheme:
552
- eval_mem = estimate_weight_memory_gb(params_b, bits=scheme.bits, overhead_factor=2.2)
553
  print(f" params_b: {params_b}")
554
  print(f" output_bits: {scheme.bits}")
555
  print(f" eval_vram: {eval_mem} GB")
 
440
  print(f"\n--- Step 3: VRAM estimation ---")
441
  if params_b:
442
  bits = PRECISION_TO_BITS.get(precision, 4)
443
+ est_mem = estimate_weight_memory_gb(params_b, bits=bits, overhead_factor=4.4)
444
  print(f" bits: {bits}")
445
  print(f" estimated_vram: {est_mem} GB")
446
  else:
 
549
  scheme = SUPPORTED_QUANT_SCHEMES.get(quant_scheme)
550
  eval_mem = None
551
  if params_b and scheme:
552
+ eval_mem = estimate_weight_memory_gb(params_b, bits=scheme.bits, overhead_factor=4.4)
553
  print(f" params_b: {params_b}")
554
  print(f" output_bits: {scheme.bits}")
555
  print(f" eval_vram: {eval_mem} GB")