Spaces:
Running
Running
Fix seed summary rendering: plain text summary instead of raw HTML widget markup
Browse files
app.py
CHANGED
|
@@ -467,6 +467,26 @@ def render_seed_widget_html(seed_urls_input: Any) -> str:
|
|
| 467 |
)
|
| 468 |
|
| 469 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 470 |
def render_tokenization_widget_html(snapshot: dict[str, Any]) -> str:
|
| 471 |
tokenized_shards = int(snapshot.get("tokenized_shards", 0) or 0)
|
| 472 |
tokenized_rows = int(snapshot.get("tokenized_rows", 0) or 0)
|
|
@@ -849,7 +869,7 @@ def poll_dashboard() -> tuple[str, str, str, str]:
|
|
| 849 |
|
| 850 |
|
| 851 |
def render_seed_widget(seed_urls_input: Any) -> str:
|
| 852 |
-
return
|
| 853 |
|
| 854 |
|
| 855 |
def noop_event(*_args: Any) -> None:
|
|
@@ -908,9 +928,11 @@ def build_ui() -> gr.Blocks:
|
|
| 908 |
label="Seed URL List (one URL per line)",
|
| 909 |
placeholder="https://example.com",
|
| 910 |
)
|
| 911 |
-
seed_widget_html = gr.
|
| 912 |
label="Seed URL Summary",
|
| 913 |
-
value=
|
|
|
|
|
|
|
| 914 |
)
|
| 915 |
token_widget_html = gr.HTML(
|
| 916 |
label="Live Tokenization",
|
|
@@ -1025,7 +1047,6 @@ def build_ui() -> gr.Blocks:
|
|
| 1025 |
fn=render_seed_widget,
|
| 1026 |
inputs=[seed_urls_input],
|
| 1027 |
outputs=[seed_widget_html],
|
| 1028 |
-
js=SEED_WIDGET_JS,
|
| 1029 |
queue=False,
|
| 1030 |
)
|
| 1031 |
|
|
@@ -1048,7 +1069,6 @@ def build_ui() -> gr.Blocks:
|
|
| 1048 |
fn=render_seed_widget,
|
| 1049 |
inputs=[seed_urls_input],
|
| 1050 |
outputs=[seed_widget_html],
|
| 1051 |
-
js=SEED_WIDGET_JS,
|
| 1052 |
queue=False,
|
| 1053 |
)
|
| 1054 |
demo.load(fn=poll_dashboard, inputs=[], outputs=outputs)
|
|
|
|
| 467 |
)
|
| 468 |
|
| 469 |
|
| 470 |
+
def render_seed_summary_text(seed_urls_input: Any) -> str:
|
| 471 |
+
seeds = collect_seed_urls(seed_urls_input)
|
| 472 |
+
domains = {(urlsplit(u).hostname or "").lower().strip(".") for u in seeds}
|
| 473 |
+
domains = {d for d in domains if d}
|
| 474 |
+
first_url_chars = len(seeds[0]) if seeds else 0
|
| 475 |
+
|
| 476 |
+
lines = [
|
| 477 |
+
f"Seeds: {len(seeds)}",
|
| 478 |
+
f"Domains: {len(domains)}",
|
| 479 |
+
f"First URL chars: {first_url_chars}",
|
| 480 |
+
"",
|
| 481 |
+
"Seed URLs:",
|
| 482 |
+
]
|
| 483 |
+
if seeds:
|
| 484 |
+
lines.extend([f"- {url}" for url in seeds])
|
| 485 |
+
else:
|
| 486 |
+
lines.append("- (none)")
|
| 487 |
+
return "\n".join(lines)
|
| 488 |
+
|
| 489 |
+
|
| 490 |
def render_tokenization_widget_html(snapshot: dict[str, Any]) -> str:
|
| 491 |
tokenized_shards = int(snapshot.get("tokenized_shards", 0) or 0)
|
| 492 |
tokenized_rows = int(snapshot.get("tokenized_rows", 0) or 0)
|
|
|
|
| 869 |
|
| 870 |
|
| 871 |
def render_seed_widget(seed_urls_input: Any) -> str:
|
| 872 |
+
return render_seed_summary_text(seed_urls_input)
|
| 873 |
|
| 874 |
|
| 875 |
def noop_event(*_args: Any) -> None:
|
|
|
|
| 928 |
label="Seed URL List (one URL per line)",
|
| 929 |
placeholder="https://example.com",
|
| 930 |
)
|
| 931 |
+
seed_widget_html = gr.Textbox(
|
| 932 |
label="Seed URL Summary",
|
| 933 |
+
value=render_seed_summary_text(default_seed_text),
|
| 934 |
+
lines=10,
|
| 935 |
+
interactive=False,
|
| 936 |
)
|
| 937 |
token_widget_html = gr.HTML(
|
| 938 |
label="Live Tokenization",
|
|
|
|
| 1047 |
fn=render_seed_widget,
|
| 1048 |
inputs=[seed_urls_input],
|
| 1049 |
outputs=[seed_widget_html],
|
|
|
|
| 1050 |
queue=False,
|
| 1051 |
)
|
| 1052 |
|
|
|
|
| 1069 |
fn=render_seed_widget,
|
| 1070 |
inputs=[seed_urls_input],
|
| 1071 |
outputs=[seed_widget_html],
|
|
|
|
| 1072 |
queue=False,
|
| 1073 |
)
|
| 1074 |
demo.load(fn=poll_dashboard, inputs=[], outputs=outputs)
|