kleervoyans commited on
Commit
99f56e7
Β·
verified Β·
1 Parent(s): 875fd0b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +55 -192
app.py CHANGED
@@ -10,8 +10,8 @@ import pandas as pd
10
  import plotly.express as px
11
  import time
12
  import difflib
13
-
14
  from typing import List, Union
 
15
  from langdetect import detect, LangDetectException
16
  from transformers import (
17
  AutoTokenizer,
@@ -20,7 +20,10 @@ from transformers import (
20
  BitsAndBytesConfig,
21
  )
22
  import evaluate
23
- from sacrebleu import corpus_bleu, sentence_bleu # Doc vs. segment BLEU
 
 
 
24
 
25
  # ────────── Global CSS ──────────
26
  st.markdown("""
@@ -46,7 +49,6 @@ def bootstrap(
46
  fn, predictions: List[str], references: List[str], sources: List[str]=None,
47
  n_resamples: int = 200, seed: int = 42
48
  ) -> List[float]:
49
- """Bootstrap metric fn over (predictions, references, [sources])."""
50
  random.seed(seed)
51
  scores = []
52
  N = len(predictions)
@@ -63,16 +65,12 @@ def bootstrap(
63
 
64
  # ────────── Model Manager ──────────
65
  class ModelManager:
66
- """
67
- Loads the best translation model (NLLB‐200 or M2M100),
68
- 8-bit if GPU available; auto-detects src_lang; dynamic tgt_lang.
69
- """
70
  def __init__(self, candidates=None, quantize=True, default_tgt=None):
71
  if quantize and not torch.cuda.is_available():
72
  logger.warning("CUDA unavailable; disabling 8-bit quantization")
73
  quantize = False
74
- self.quantize = quantize
75
- self.candidates = candidates or [
76
  "facebook/nllb-200-distilled-600M",
77
  "facebook/m2m100_418M",
78
  ]
@@ -93,16 +91,13 @@ class ModelManager:
93
  name, device_map="auto", quantization_config=bnb
94
  )
95
  else:
96
- mdl = AutoModelForSeq2SeqLM.from_pretrained(
97
- name, device_map="auto"
98
- )
99
  pipe = pipeline("translation", model=mdl, tokenizer=tok)
100
  self.model_name = name
101
  self.tokenizer = tok
102
  self.model = mdl
103
  self.pipeline = pipe
104
  self.lang_codes = list(tok.lang_code_to_id.keys())
105
- # pick default target if none
106
  if not self.default_tgt:
107
  tur = [c for c in self.lang_codes if c.lower().startswith("tr")]
108
  if not tur:
@@ -115,12 +110,8 @@ class ModelManager:
115
  last_err = e
116
  raise RuntimeError(f"No model loaded: {last_err}")
117
 
118
- def translate(
119
- self, text: Union[str, List[str]],
120
- src_lang: str = None, tgt_lang: str = None
121
- ):
122
  tgt = tgt_lang or self.default_tgt
123
- # auto-detect src
124
  if not src_lang:
125
  sample = text[0] if isinstance(text, list) else text
126
  try:
@@ -153,161 +144,57 @@ class ModelManager:
153
 
154
  # ────────── Evaluator ──────────
155
  class TranslationEvaluator:
156
- """
157
- Wraps BLEU (corpus), ChrF, TER, BERTScore, COMET (ref & ref-free), and provides CIs.
158
- """
159
  def __init__(self):
160
- # BLEU (corpus)
161
- self.bleu = evaluate.load("bleu")
162
- # ChrF :contentReference[oaicite:0]{index=0}
163
- self.chrf = evaluate.load("chrf")
164
- # TER :contentReference[oaicite:1]{index=1}
165
- self.ter = evaluate.load("ter")
166
- # BERTScore
167
  self.bertscore = evaluate.load("bertscore")
168
- # COMET (ref-based)
169
  self.comet_ref = evaluate.load("comet", model_id="unbabel/comet-mqm-qe-da")
170
- # COMET QE (ref-free) :contentReference[oaicite:2]{index=2}
171
  self.comet_qe = evaluate.load("comet", model_id="unbabel/wmt20-comet-qe-da")
172
- logger.info("Loaded BLEU, ChrF, TER, BERTScore, COMET (ref & QE)")
173
 
174
- def compute_metrics(
175
- self,
176
- sources: List[str],
177
- references: List[str],
178
- predictions: List[str],
179
- metrics: List[str],
180
- ci: bool = True
181
- ) -> dict:
182
  out = {}
183
-
184
- # -- BLEU (document-level)
185
  if "BLEU_doc" in metrics:
186
- doc_bleu = self.bleu.compute(
187
- predictions=predictions,
188
- references=[[r] for r in references]
189
- )["bleu"]
190
- out["BLEU_doc"] = float(doc_bleu)
191
-
192
- # -- BLEU (segment-level avg)
193
  if "BLEU_seg" in metrics:
194
- seg_scores = [
195
- sentence_bleu([r], p).score
196
- for p, r in zip(predictions, references)
197
- ]
198
- out["BLEU_seg"] = float(sum(seg_scores) / len(seg_scores))
199
-
200
- # -- ChrF
201
  if "ChrF" in metrics:
202
- cf = self.chrf.compute(
203
- predictions=predictions,
204
- references=[[r] for r in references]
205
- )["score"]
206
- out["ChrF"] = float(cf)
207
-
208
- # -- TER
209
  if "TER" in metrics:
210
- tr = self.ter.compute(
211
- predictions=predictions,
212
- references=[[r] for r in references],
213
- normalized=True
214
- )["score"]
215
- out["TER"] = float(tr)
216
-
217
- # -- BERTScore
218
  if "BERTScore" in metrics:
219
- bs = self.bertscore.compute(
220
- predictions=predictions,
221
- references=references,
222
- lang="xx"
223
- )["f1"]
224
- out["BERTScore"] = float(sum(bs) / len(bs)) if bs else 0.0
225
-
226
- # -- BERTurk
227
  if "BERTurk" in metrics:
228
- bt = self.bertscore.compute(
229
- predictions=predictions,
230
- references=references,
231
- lang="tr"
232
- )["f1"]
233
- out["BERTurk"] = float(sum(bt) / len(bt)) if bt else 0.0
234
-
235
- # -- COMET (ref-based)
236
  if "COMET" in metrics:
237
- cr = self.comet_ref.compute(
238
- srcs=sources, hyps=predictions, refs=references
239
- ).get("scores", 0.0)
240
- out["COMET"] = float(cr[0] if isinstance(cr, list) else cr)
241
-
242
- # -- QE (ref-free)
243
  if "QE" in metrics:
244
- cq = self.comet_qe.compute(
245
- srcs=sources, hyps=predictions
246
- ).get("scores", 0.0)
247
- out["QE"] = float(cq[0] if isinstance(cq, list) else cq)
248
-
249
- # -- Bootstrap CIs
250
  if ci:
251
- # BLEU_doc CI
252
  if "CI_BLEU_doc" in metrics:
253
- bsamp = bootstrap(
254
- lambda ps, rs: self.bleu.compute(
255
- predictions=ps,
256
- references=[[r] for r in rs]
257
- )["bleu"],
258
- predictions, references
259
- )
260
- out["CI_BLEU_doc"] = (
261
- float(np.percentile(bsamp, 2.5)),
262
- float(np.percentile(bsamp, 97.5))
263
- )
264
- # BERTScore CI
265
  if "CI_BERTScore" in metrics:
266
- bsamp = bootstrap(
267
- lambda ps, rs: sum(
268
- self.bertscore.compute(
269
- predictions=ps, references=rs, lang="xx"
270
- )["f1"]
271
- ) / len(ps),
272
- predictions, references
273
- )
274
- out["CI_BERTScore"] = (
275
- float(np.percentile(bsamp, 2.5)),
276
- float(np.percentile(bsamp, 97.5))
277
- )
278
- # COMET CI
279
  if "CI_COMET" in metrics:
280
- bsamp = bootstrap(
281
- lambda ps, rs, ss: float(
282
- self.comet_ref.compute(
283
- srcs=ss, hyps=ps, refs=rs
284
- ).get("scores", [0.0])[0]
285
- ),
286
- predictions, references, sources
287
- )
288
- out["CI_COMET"] = (
289
- float(np.percentile(bsamp, 2.5)),
290
- float(np.percentile(bsamp, 97.5))
291
- )
292
-
293
  return out
294
 
295
  # ────────── Error Categorizer ──────────
296
  class ErrorCategorizer:
297
- """
298
- Optional: classify error types via a fine-tuned text-classification model.
299
- Supply your own HF model name for real categories.
300
- """
301
- def __init__(self, model_name: str = None):
302
- if model_name:
303
- self.pipe = pipeline("text-classification", model=model_name, device=0 if torch.cuda.is_available() else -1)
304
- else:
305
- self.pipe = None
306
-
307
- def categorize(self, src: str, hyp: str):
308
- if not self.pipe:
309
- return []
310
- inp = f"SRC: {src}\nHYP: {hyp}\nError types (pick from taxonomy):"
311
  return self.pipe(inp, top_k=None)
312
 
313
  # ────────── Streamlit App ──────────
@@ -315,8 +202,7 @@ class ErrorCategorizer:
315
  def load_resources():
316
  mgr = ModelManager(quantize=True)
317
  ev = TranslationEvaluator()
318
- # set your error-classifier HF model here, or None to disable
319
- err = ErrorCategorizer(model_name="your-org/translation-error-categorizer")
320
  return mgr, ev, err
321
 
322
  def display_model_info(info: dict):
@@ -326,43 +212,29 @@ def display_model_info(info: dict):
326
  st.sidebar.write(f"β€’ **Device:** {info['device']}")
327
  st.sidebar.write(f"β€’ **Default tgt:** {info['default_tgt']}")
328
 
329
- def show_diff(ref: str, hyp: str):
330
  differ = difflib.HtmlDiff(tabsize=4, wrapcolumn=60)
331
- html = differ.make_table(
332
- ref.split(), hyp.split(),
333
- fromdesc="Reference", todesc="Hypothesis",
334
- context=True, numlines=1
335
- )
336
  components.html(html, height=200, scrolling=True)
337
 
338
  def main():
339
- st.set_page_config(page_title="πŸ”€ Translateβ†’Eval+", layout="wide")
340
- st.title("🌐 Translate β†’ πŸ”  Evaluate & Analyze")
341
- st.write("Translate from any language, choose target, eval with advanced metrics, and inspect errors.")
342
 
343
- # Sidebar
344
  with st.sidebar:
345
  st.header("Settings")
346
  mgr, ev, err = load_resources()
347
  info = mgr.get_info()
348
  display_model_info(info)
349
 
350
- tgt = st.selectbox(
351
- "Target language", info["langs"],
352
- index=info["langs"].index(info["default_tgt"])
353
- )
354
-
355
- metric_opts = [
356
- "BLEU_doc","BLEU_seg","ChrF","TER",
357
- "BERTScore","BERTurk","COMET","QE",
358
- "CI_BLEU_doc","CI_BERTScore","CI_COMET"
359
- ]
360
  metrics = st.multiselect("Metrics & CIs", metric_opts, default=["BLEU_doc","BERTScore","COMET"])
361
  batch_size = st.slider("Batch size", 1, 32, 8)
362
 
363
  tab1, tab2 = st.tabs(["Single","Batch CSV"])
364
 
365
- # ────────── Single Sentence ──────────
366
  with tab1:
367
  src = st.text_area("Source text:", height=120)
368
  ref = st.text_area("Gold reference (optional):", height=80)
@@ -372,32 +244,25 @@ def main():
372
  hyp = out[0]["translation_text"]
373
  st.markdown(f"**Hypothesis ({tgt}):** {hyp}")
374
 
375
- # metrics
376
  scores = ev.compute_metrics([src],[ref or ""],[hyp], metrics)
377
- # display
378
  sd = {}
379
  for m in metrics:
380
  v = scores.get(m)
381
- if m.startswith("CI_"):
382
- low, high = v
383
- sd[m] = f"{low:.3f} – {high:.3f}"
384
  else:
385
  sd[m] = f"{v:.4f}" if v is not None else "N/A"
386
  st.markdown("### Scores")
387
  st.table(pd.DataFrame([sd]))
388
 
389
- # diff
390
  if ref.strip():
391
  st.markdown("### Diff View")
392
  show_diff(ref, hyp)
393
-
394
- # error categories
395
  cats = err.categorize(src, hyp)
396
  if cats:
397
  st.markdown("### Error Categories")
398
  st.json(cats)
399
 
400
- # ────────── Batch CSV ──────────
401
  with tab2:
402
  uploaded = st.file_uploader("Upload CSV with `src`,`ref_tr`", type=["csv"])
403
  if uploaded:
@@ -410,36 +275,34 @@ def main():
410
  prog = st.progress(0)
411
  N = len(df)
412
  for i in range(0, N, batch_size):
413
- batch = df.iloc[i : i+batch_size]
414
  srcs, refs = batch["src"].tolist(), batch["ref_tr"].tolist()
415
  outs = mgr.translate(srcs, tgt_lang=tgt)
416
  hyps = [o["translation_text"] for o in outs]
417
- for s, r, h in zip(srcs, refs, hyps):
418
- base = {"src":s, "ref_tr":r, "hyp_tr":h}
419
  if r.strip():
420
  sc = ev.compute_metrics([s],[r],[h], metrics)
421
  for m in metrics:
422
- if m.startswith("CI_"):
423
  low, high = sc[m]
424
  base[m] = f"{low:.3f}–{high:.3f}"
425
  else:
426
- base[m] = sc[m]
427
  else:
428
  for m in metrics:
429
  base[m] = None
430
  all_rows.append(base)
431
- prog.progress(min(i+batch_size, N)/N)
432
  res_df = pd.DataFrame(all_rows)
433
 
434
  st.markdown("### Results")
435
  st.dataframe(res_df, use_container_width=True)
436
-
437
- # histograms
438
  for m in metrics:
439
  st.markdown(f"#### {m} Distribution")
440
  col = pd.to_numeric(res_df[m], errors="coerce").dropna()
441
  if col.empty:
442
- st.write("No valid data for this metric.")
443
  else:
444
  fig = px.histogram(col, x=col)
445
  st.plotly_chart(fig, use_container_width=True)
 
10
  import plotly.express as px
11
  import time
12
  import difflib
 
13
  from typing import List, Union
14
+
15
  from langdetect import detect, LangDetectException
16
  from transformers import (
17
  AutoTokenizer,
 
20
  BitsAndBytesConfig,
21
  )
22
  import evaluate
23
+ from sacrebleu import corpus_bleu, sentence_bleu
24
+
25
+ # ────────── Page Config (MUST be first) ──────────
26
+ st.set_page_config(page_title="πŸ”€ Translateβ†’Eval+", layout="wide")
27
 
28
  # ────────── Global CSS ──────────
29
  st.markdown("""
 
49
  fn, predictions: List[str], references: List[str], sources: List[str]=None,
50
  n_resamples: int = 200, seed: int = 42
51
  ) -> List[float]:
 
52
  random.seed(seed)
53
  scores = []
54
  N = len(predictions)
 
65
 
66
  # ────────── Model Manager ──────────
67
  class ModelManager:
 
 
 
 
68
  def __init__(self, candidates=None, quantize=True, default_tgt=None):
69
  if quantize and not torch.cuda.is_available():
70
  logger.warning("CUDA unavailable; disabling 8-bit quantization")
71
  quantize = False
72
+ self.quantize = quantize
73
+ self.candidates = candidates or [
74
  "facebook/nllb-200-distilled-600M",
75
  "facebook/m2m100_418M",
76
  ]
 
91
  name, device_map="auto", quantization_config=bnb
92
  )
93
  else:
94
+ mdl = AutoModelForSeq2SeqLM.from_pretrained(name, device_map="auto")
 
 
95
  pipe = pipeline("translation", model=mdl, tokenizer=tok)
96
  self.model_name = name
97
  self.tokenizer = tok
98
  self.model = mdl
99
  self.pipeline = pipe
100
  self.lang_codes = list(tok.lang_code_to_id.keys())
 
101
  if not self.default_tgt:
102
  tur = [c for c in self.lang_codes if c.lower().startswith("tr")]
103
  if not tur:
 
110
  last_err = e
111
  raise RuntimeError(f"No model loaded: {last_err}")
112
 
113
+ def translate(self, text: Union[str, List[str]], src_lang: str=None, tgt_lang: str=None):
 
 
 
114
  tgt = tgt_lang or self.default_tgt
 
115
  if not src_lang:
116
  sample = text[0] if isinstance(text, list) else text
117
  try:
 
144
 
145
  # ────────── Evaluator ──────────
146
  class TranslationEvaluator:
 
 
 
147
  def __init__(self):
148
+ self.bleu = evaluate.load("bleu")
149
+ self.chrf = evaluate.load("chrf")
150
+ self.ter = evaluate.load("ter")
 
 
 
 
151
  self.bertscore = evaluate.load("bertscore")
 
152
  self.comet_ref = evaluate.load("comet", model_id="unbabel/comet-mqm-qe-da")
 
153
  self.comet_qe = evaluate.load("comet", model_id="unbabel/wmt20-comet-qe-da")
154
+ logger.info("Loaded BLEU, ChrF, TER, BERTScore, COMET")
155
 
156
+ def compute_metrics(self, srcs, refs, hyps, metrics, ci=True):
 
 
 
 
 
 
 
157
  out = {}
 
 
158
  if "BLEU_doc" in metrics:
159
+ out["BLEU_doc"] = float(self.bleu.compute(predictions=hyps, references=[[r] for r in refs])["bleu"])
 
 
 
 
 
 
160
  if "BLEU_seg" in metrics:
161
+ segs = [sentence_bleu([r], p).score for p,r in zip(hyps, refs)]
162
+ out["BLEU_seg"] = float(sum(segs)/len(segs))
 
 
 
 
 
163
  if "ChrF" in metrics:
164
+ out["ChrF"] = float(self.chrf.compute(predictions=hyps, references=[[r] for r in refs])["score"])
 
 
 
 
 
 
165
  if "TER" in metrics:
166
+ out["TER"] = float(self.ter.compute(predictions=hyps, references=[[r] for r in refs], normalized=True)["score"])
 
 
 
 
 
 
 
167
  if "BERTScore" in metrics:
168
+ bs = self.bertscore.compute(predictions=hyps, references=refs, lang="xx")["f1"]
169
+ out["BERTScore"] = float(sum(bs)/len(bs)) if bs else 0.0
 
 
 
 
 
 
170
  if "BERTurk" in metrics:
171
+ bt = self.bertscore.compute(predictions=hyps, references=refs, lang="tr")["f1"]
172
+ out["BERTurk"] = float(sum(bt)/len(bt)) if bt else 0.0
 
 
 
 
 
 
173
  if "COMET" in metrics:
174
+ sc = self.comet_ref.compute(srcs=srcs, hyps=hyps, refs=refs).get("scores",0.0)
175
+ out["COMET"] = float(sc[0] if isinstance(sc,list) else sc)
 
 
 
 
176
  if "QE" in metrics:
177
+ q = self.comet_qe.compute(srcs=srcs, hyps=hyps).get("scores",0.0)
178
+ out["QE"] = float(q[0] if isinstance(q,list) else q)
 
 
 
 
179
  if ci:
 
180
  if "CI_BLEU_doc" in metrics:
181
+ bsamp = bootstrap(lambda ps,rs: self.bleu.compute(predictions=ps,references=[[r] for r in rs])["bleu"], hyps, refs)
182
+ out["CI_BLEU_doc"] = (float(np.percentile(bsamp,2.5)), float(np.percentile(bsamp,97.5)))
 
 
 
 
 
 
 
 
 
 
183
  if "CI_BERTScore" in metrics:
184
+ bsamp = bootstrap(lambda ps,rs: sum(self.bertscore.compute(predictions=ps,references=rs,lang="xx")["f1"])/len(ps), hyps, refs)
185
+ out["CI_BERTScore"] = (float(np.percentile(bsamp,2.5)), float(np.percentile(bsamp,97.5)))
 
 
 
 
 
 
 
 
 
 
 
186
  if "CI_COMET" in metrics:
187
+ bsamp = bootstrap(lambda ps,rs,ss: float(self.comet_ref.compute(srcs=ss,hyps=ps,refs=rs).get("scores",[0.0])[0]), hyps, refs, srcs)
188
+ out["CI_COMET"] = (float(np.percentile(bsamp,2.5)), float(np.percentile(bsamp,97.5)))
 
 
 
 
 
 
 
 
 
 
 
189
  return out
190
 
191
  # ────────── Error Categorizer ──────────
192
  class ErrorCategorizer:
193
+ def __init__(self, model_name=None):
194
+ self.pipe = pipeline("text-classification", model=model_name, device=0 if torch.cuda.is_available() else -1) if model_name else None
195
+ def categorize(self, src, hyp):
196
+ if not self.pipe: return []
197
+ inp = f"SRC: {src}\nHYP: {hyp}\nError types:"
 
 
 
 
 
 
 
 
 
198
  return self.pipe(inp, top_k=None)
199
 
200
  # ────────── Streamlit App ──────────
 
202
  def load_resources():
203
  mgr = ModelManager(quantize=True)
204
  ev = TranslationEvaluator()
205
+ err = ErrorCategorizer(model_name=None) # set your HF model here
 
206
  return mgr, ev, err
207
 
208
  def display_model_info(info: dict):
 
212
  st.sidebar.write(f"β€’ **Device:** {info['device']}")
213
  st.sidebar.write(f"β€’ **Default tgt:** {info['default_tgt']}")
214
 
215
+ def show_diff(ref, hyp):
216
  differ = difflib.HtmlDiff(tabsize=4, wrapcolumn=60)
217
+ html = differ.make_table(ref.split(), hyp.split(), fromdesc="Reference", todesc="Hypothesis", context=True, numlines=1)
 
 
 
 
218
  components.html(html, height=200, scrolling=True)
219
 
220
  def main():
221
+ # Note: set_page_config has been moved to the top!
222
+ st.title("🌐 Translate β†’ Evaluate & Analyze")
223
+ st.write("Translate any language, choose target, eval with advanced metrics, and inspect errors.")
224
 
 
225
  with st.sidebar:
226
  st.header("Settings")
227
  mgr, ev, err = load_resources()
228
  info = mgr.get_info()
229
  display_model_info(info)
230
 
231
+ tgt = st.selectbox("Target language", info["langs"], index=info["langs"].index(info["default_tgt"]))
232
+ metric_opts = ["BLEU_doc","BLEU_seg","ChrF","TER","BERTScore","BERTurk","COMET","QE","CI_BLEU_doc","CI_BERTScore","CI_COMET"]
 
 
 
 
 
 
 
 
233
  metrics = st.multiselect("Metrics & CIs", metric_opts, default=["BLEU_doc","BERTScore","COMET"])
234
  batch_size = st.slider("Batch size", 1, 32, 8)
235
 
236
  tab1, tab2 = st.tabs(["Single","Batch CSV"])
237
 
 
238
  with tab1:
239
  src = st.text_area("Source text:", height=120)
240
  ref = st.text_area("Gold reference (optional):", height=80)
 
244
  hyp = out[0]["translation_text"]
245
  st.markdown(f"**Hypothesis ({tgt}):** {hyp}")
246
 
 
247
  scores = ev.compute_metrics([src],[ref or ""],[hyp], metrics)
 
248
  sd = {}
249
  for m in metrics:
250
  v = scores.get(m)
251
+ if m.startswith("CI_") and v:
252
+ sd[m] = f"{v[0]:.3f} – {v[1]:.3f}"
 
253
  else:
254
  sd[m] = f"{v:.4f}" if v is not None else "N/A"
255
  st.markdown("### Scores")
256
  st.table(pd.DataFrame([sd]))
257
 
 
258
  if ref.strip():
259
  st.markdown("### Diff View")
260
  show_diff(ref, hyp)
 
 
261
  cats = err.categorize(src, hyp)
262
  if cats:
263
  st.markdown("### Error Categories")
264
  st.json(cats)
265
 
 
266
  with tab2:
267
  uploaded = st.file_uploader("Upload CSV with `src`,`ref_tr`", type=["csv"])
268
  if uploaded:
 
275
  prog = st.progress(0)
276
  N = len(df)
277
  for i in range(0, N, batch_size):
278
+ batch = df.iloc[i:i+batch_size]
279
  srcs, refs = batch["src"].tolist(), batch["ref_tr"].tolist()
280
  outs = mgr.translate(srcs, tgt_lang=tgt)
281
  hyps = [o["translation_text"] for o in outs]
282
+ for s,r,h in zip(srcs,refs,hyps):
283
+ base = {"src":s,"ref_tr":r,"hyp_tr":h}
284
  if r.strip():
285
  sc = ev.compute_metrics([s],[r],[h], metrics)
286
  for m in metrics:
287
+ if m.startswith("CI_") and sc.get(m):
288
  low, high = sc[m]
289
  base[m] = f"{low:.3f}–{high:.3f}"
290
  else:
291
+ base[m] = sc.get(m)
292
  else:
293
  for m in metrics:
294
  base[m] = None
295
  all_rows.append(base)
296
+ prog.progress(min(i+batch_size,N)/N)
297
  res_df = pd.DataFrame(all_rows)
298
 
299
  st.markdown("### Results")
300
  st.dataframe(res_df, use_container_width=True)
 
 
301
  for m in metrics:
302
  st.markdown(f"#### {m} Distribution")
303
  col = pd.to_numeric(res_df[m], errors="coerce").dropna()
304
  if col.empty:
305
+ st.write("No valid data.")
306
  else:
307
  fig = px.histogram(col, x=col)
308
  st.plotly_chart(fig, use_container_width=True)