Roman190928 commited on
Commit
6cfa53e
·
verified ·
1 Parent(s): 68e0a34

Fix seed summary rendering: plain text summary instead of raw HTML widget markup

Browse files
Files changed (1) hide show
  1. app.py +25 -5
app.py CHANGED
@@ -467,6 +467,26 @@ def render_seed_widget_html(seed_urls_input: Any) -> str:
467
  )
468
 
469
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
470
  def render_tokenization_widget_html(snapshot: dict[str, Any]) -> str:
471
  tokenized_shards = int(snapshot.get("tokenized_shards", 0) or 0)
472
  tokenized_rows = int(snapshot.get("tokenized_rows", 0) or 0)
@@ -849,7 +869,7 @@ def poll_dashboard() -> tuple[str, str, str, str]:
849
 
850
 
851
  def render_seed_widget(seed_urls_input: Any) -> str:
852
- return render_seed_widget_html(seed_urls_input)
853
 
854
 
855
  def noop_event(*_args: Any) -> None:
@@ -908,9 +928,11 @@ def build_ui() -> gr.Blocks:
908
  label="Seed URL List (one URL per line)",
909
  placeholder="https://example.com",
910
  )
911
- seed_widget_html = gr.HTML(
912
  label="Seed URL Summary",
913
- value=render_seed_widget_html(default_seed_text),
 
 
914
  )
915
  token_widget_html = gr.HTML(
916
  label="Live Tokenization",
@@ -1025,7 +1047,6 @@ def build_ui() -> gr.Blocks:
1025
  fn=render_seed_widget,
1026
  inputs=[seed_urls_input],
1027
  outputs=[seed_widget_html],
1028
- js=SEED_WIDGET_JS,
1029
  queue=False,
1030
  )
1031
 
@@ -1048,7 +1069,6 @@ def build_ui() -> gr.Blocks:
1048
  fn=render_seed_widget,
1049
  inputs=[seed_urls_input],
1050
  outputs=[seed_widget_html],
1051
- js=SEED_WIDGET_JS,
1052
  queue=False,
1053
  )
1054
  demo.load(fn=poll_dashboard, inputs=[], outputs=outputs)
 
467
  )
468
 
469
 
470
+ def render_seed_summary_text(seed_urls_input: Any) -> str:
471
+ seeds = collect_seed_urls(seed_urls_input)
472
+ domains = {(urlsplit(u).hostname or "").lower().strip(".") for u in seeds}
473
+ domains = {d for d in domains if d}
474
+ first_url_chars = len(seeds[0]) if seeds else 0
475
+
476
+ lines = [
477
+ f"Seeds: {len(seeds)}",
478
+ f"Domains: {len(domains)}",
479
+ f"First URL chars: {first_url_chars}",
480
+ "",
481
+ "Seed URLs:",
482
+ ]
483
+ if seeds:
484
+ lines.extend([f"- {url}" for url in seeds])
485
+ else:
486
+ lines.append("- (none)")
487
+ return "\n".join(lines)
488
+
489
+
490
  def render_tokenization_widget_html(snapshot: dict[str, Any]) -> str:
491
  tokenized_shards = int(snapshot.get("tokenized_shards", 0) or 0)
492
  tokenized_rows = int(snapshot.get("tokenized_rows", 0) or 0)
 
869
 
870
 
871
  def render_seed_widget(seed_urls_input: Any) -> str:
872
+ return render_seed_summary_text(seed_urls_input)
873
 
874
 
875
  def noop_event(*_args: Any) -> None:
 
928
  label="Seed URL List (one URL per line)",
929
  placeholder="https://example.com",
930
  )
931
+ seed_widget_html = gr.Textbox(
932
  label="Seed URL Summary",
933
+ value=render_seed_summary_text(default_seed_text),
934
+ lines=10,
935
+ interactive=False,
936
  )
937
  token_widget_html = gr.HTML(
938
  label="Live Tokenization",
 
1047
  fn=render_seed_widget,
1048
  inputs=[seed_urls_input],
1049
  outputs=[seed_widget_html],
 
1050
  queue=False,
1051
  )
1052
 
 
1069
  fn=render_seed_widget,
1070
  inputs=[seed_urls_input],
1071
  outputs=[seed_widget_html],
 
1072
  queue=False,
1073
  )
1074
  demo.load(fn=poll_dashboard, inputs=[], outputs=outputs)