Roman190928 commited on
Commit
1271a45
·
verified ·
1 Parent(s): b18026c

UI help tooltips + simplify incomplete shard upload behavior

Browse files
Files changed (1) hide show
  1. app.py +57 -34
app.py CHANGED
@@ -267,6 +267,24 @@ APP_CSS = """
267
  font-size: 0.83rem;
268
  padding: 0.24rem 0.3rem;
269
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
270
  """
271
 
272
  THEME_JS = """
@@ -328,6 +346,42 @@ SEED_WIDGET_JS = """
328
  }
329
  """
330
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
331
 
332
  def utc_now_iso() -> str:
333
  return datetime.now(timezone.utc).isoformat(timespec="seconds")
@@ -461,7 +515,6 @@ def build_crawler_config(
461
  shard_size_rows: int,
462
  enable_hf_upload: bool,
463
  upload_incomplete_shards: bool,
464
- incomplete_shard_flush_seconds: float,
465
  hf_repo_id: str,
466
  hf_token: str,
467
  hf_private_repo: bool,
@@ -480,7 +533,6 @@ def build_crawler_config(
480
  output_dir=Path(__file__).resolve().parent / "shards",
481
  enable_hf_upload=bool(enable_hf_upload),
482
  upload_incomplete_shards=bool(upload_incomplete_shards),
483
- incomplete_shard_flush_seconds=float(incomplete_shard_flush_seconds),
484
  hf_repo_id=hf_repo_id.strip(),
485
  hf_token=hf_token.strip(),
486
  hf_private_repo=bool(hf_private_repo),
@@ -699,7 +751,6 @@ def _start_crawl(
699
  shard_size_rows: int,
700
  enable_hf_upload: bool,
701
  upload_incomplete_shards: bool,
702
- incomplete_shard_flush_seconds: float,
703
  hf_repo_id: str,
704
  hf_token: str,
705
  hf_private_repo: bool,
@@ -714,7 +765,6 @@ def _start_crawl(
714
  shard_size_rows=shard_size_rows,
715
  enable_hf_upload=enable_hf_upload,
716
  upload_incomplete_shards=upload_incomplete_shards,
717
- incomplete_shard_flush_seconds=incomplete_shard_flush_seconds,
718
  hf_repo_id=hf_repo_id,
719
  hf_token=hf_token,
720
  hf_private_repo=hf_private_repo,
@@ -737,7 +787,6 @@ def start_crawl_standard(
737
  shard_size_rows: int,
738
  enable_hf_upload: bool,
739
  upload_incomplete_shards: bool,
740
- incomplete_shard_flush_seconds: float,
741
  hf_repo_id: str,
742
  hf_token: str,
743
  hf_private_repo: bool,
@@ -752,7 +801,6 @@ def start_crawl_standard(
752
  shard_size_rows=shard_size_rows,
753
  enable_hf_upload=enable_hf_upload,
754
  upload_incomplete_shards=upload_incomplete_shards,
755
- incomplete_shard_flush_seconds=incomplete_shard_flush_seconds,
756
  hf_repo_id=hf_repo_id,
757
  hf_token=hf_token,
758
  hf_private_repo=hf_private_repo,
@@ -768,7 +816,6 @@ def start_crawl_super(
768
  shard_size_rows: int,
769
  enable_hf_upload: bool,
770
  upload_incomplete_shards: bool,
771
- incomplete_shard_flush_seconds: float,
772
  hf_repo_id: str,
773
  hf_token: str,
774
  hf_private_repo: bool,
@@ -783,7 +830,6 @@ def start_crawl_super(
783
  shard_size_rows=shard_size_rows,
784
  enable_hf_upload=enable_hf_upload,
785
  upload_incomplete_shards=upload_incomplete_shards,
786
- incomplete_shard_flush_seconds=incomplete_shard_flush_seconds,
787
  hf_repo_id=hf_repo_id,
788
  hf_token=hf_token,
789
  hf_private_repo=hf_private_repo,
@@ -807,13 +853,6 @@ def toggle_hf_fields(enable_hf_upload: bool) -> tuple[Any, Any, Any, Any, Any]:
807
  return update, update, update, update, update
808
 
809
 
810
- def toggle_incomplete_flush_field(
811
- enable_hf_upload: bool,
812
- upload_incomplete_shards: bool,
813
- ) -> Any:
814
- return gr.update(visible=bool(enable_hf_upload and upload_incomplete_shards))
815
-
816
-
817
  def build_ui() -> gr.Blocks:
818
  defaults = CrawlerConfig(
819
  seed_urls=[
@@ -931,14 +970,6 @@ def build_ui() -> gr.Blocks:
931
  value=False,
932
  visible=False,
933
  )
934
- incomplete_shard_flush_seconds = gr.Slider(
935
- label="Incomplete Upload Flush Interval (seconds)",
936
- minimum=5,
937
- maximum=300,
938
- step=1,
939
- value=int(defaults.incomplete_shard_flush_seconds),
940
- visible=False,
941
- )
942
 
943
  with gr.Row():
944
  start_button = gr.Button("Start Crawl (12 Threads)", variant="primary")
@@ -958,7 +989,6 @@ def build_ui() -> gr.Blocks:
958
  shard_size_rows,
959
  enable_hf_upload,
960
  upload_incomplete_shards,
961
- incomplete_shard_flush_seconds,
962
  hf_repo_id,
963
  hf_token,
964
  hf_private_repo,
@@ -982,16 +1012,6 @@ def build_ui() -> gr.Blocks:
982
  upload_incomplete_shards,
983
  ],
984
  )
985
- enable_hf_upload.change(
986
- toggle_incomplete_flush_field,
987
- inputs=[enable_hf_upload, upload_incomplete_shards],
988
- outputs=[incomplete_shard_flush_seconds],
989
- )
990
- upload_incomplete_shards.change(
991
- toggle_incomplete_flush_field,
992
- inputs=[enable_hf_upload, upload_incomplete_shards],
993
- outputs=[incomplete_shard_flush_seconds],
994
- )
995
 
996
  seed_urls_input.change(
997
  fn=None,
@@ -1001,6 +1021,7 @@ def build_ui() -> gr.Blocks:
1001
  )
1002
 
1003
  theme_name.change(fn=None, inputs=theme_name, outputs=[], js=THEME_JS)
 
1004
  demo.load(
1005
  fn=None,
1006
  inputs=[],
@@ -1014,6 +1035,8 @@ def build_ui() -> gr.Blocks:
1014
  js=SEED_WIDGET_JS,
1015
  )
1016
  demo.load(fn=poll_dashboard, inputs=[], outputs=outputs)
 
 
1017
 
1018
  timer = gr.Timer(value=1.0)
1019
  timer.tick(fn=poll_dashboard, inputs=[], outputs=outputs)
 
267
  font-size: 0.83rem;
268
  padding: 0.24rem 0.3rem;
269
  }
270
+
271
+ .setting-help-q {
272
+ display: inline-flex;
273
+ align-items: center;
274
+ justify-content: center;
275
+ width: 1.05rem;
276
+ height: 1.05rem;
277
+ margin-left: 0.42rem;
278
+ border: 1px solid var(--border);
279
+ border-radius: 999px;
280
+ color: var(--text-main);
281
+ background: color-mix(in srgb, var(--bg-panel) 90%, transparent);
282
+ font-size: 0.74rem;
283
+ font-weight: 700;
284
+ cursor: help;
285
+ line-height: 1;
286
+ vertical-align: middle;
287
+ }
288
  """
289
 
290
  THEME_JS = """
 
346
  }
347
  """
348
 
349
+ SETTING_HELP_JS = """
350
+ () => {
351
+ const helpByPrefix = [
352
+ ["Theme", "Switch between visual color themes."],
353
+ ["Seed URL List (one URL per line)", "Provide crawl entry points. Put one URL per line; duplicates are ignored."],
354
+ ["Shard Size Rows", "Rows written per parquet shard before a full shard is emitted."],
355
+ ["Max Links Per Page", "Maximum discovered links to enqueue from each parsed page."],
356
+ ["Request Timeout (seconds)", "HTTP request timeout per URL."],
357
+ ["Max Response Bytes", "Maximum response body bytes to read per page."],
358
+ ["Upload shards to my HF repo", "Enable direct upload of produced shards to your Hugging Face Space repo."],
359
+ ["HF Repo ID", "Target Hugging Face repo in owner/name format."],
360
+ ["HF Token (write permissions)", "Token with write access to the target repo."],
361
+ ["Private HF Repo", "Create the target repo as private if it does not exist."],
362
+ ["HF Path Prefix", "Folder path inside the repo where shards are uploaded."],
363
+ ["Upload incomplete shard buffers", "On crawl finish/stop, flush the current partial shard buffer and upload it too."],
364
+ ];
365
+
366
+ const clean = (value) => String(value || "").replace(/\\s+/g, " ").trim();
367
+ const labels = document.querySelectorAll(".gradio-container label");
368
+
369
+ for (const label of labels) {
370
+ if (label.querySelector(".setting-help-q")) continue;
371
+ const text = clean(label.textContent);
372
+ const match = helpByPrefix.find(([prefix]) => text.startsWith(prefix));
373
+ if (!match) continue;
374
+
375
+ const q = document.createElement("span");
376
+ q.className = "setting-help-q";
377
+ q.textContent = "?";
378
+ q.title = match[1];
379
+ label.appendChild(q);
380
+ }
381
+ return [];
382
+ }
383
+ """
384
+
385
 
386
  def utc_now_iso() -> str:
387
  return datetime.now(timezone.utc).isoformat(timespec="seconds")
 
515
  shard_size_rows: int,
516
  enable_hf_upload: bool,
517
  upload_incomplete_shards: bool,
 
518
  hf_repo_id: str,
519
  hf_token: str,
520
  hf_private_repo: bool,
 
533
  output_dir=Path(__file__).resolve().parent / "shards",
534
  enable_hf_upload=bool(enable_hf_upload),
535
  upload_incomplete_shards=bool(upload_incomplete_shards),
 
536
  hf_repo_id=hf_repo_id.strip(),
537
  hf_token=hf_token.strip(),
538
  hf_private_repo=bool(hf_private_repo),
 
751
  shard_size_rows: int,
752
  enable_hf_upload: bool,
753
  upload_incomplete_shards: bool,
 
754
  hf_repo_id: str,
755
  hf_token: str,
756
  hf_private_repo: bool,
 
765
  shard_size_rows=shard_size_rows,
766
  enable_hf_upload=enable_hf_upload,
767
  upload_incomplete_shards=upload_incomplete_shards,
 
768
  hf_repo_id=hf_repo_id,
769
  hf_token=hf_token,
770
  hf_private_repo=hf_private_repo,
 
787
  shard_size_rows: int,
788
  enable_hf_upload: bool,
789
  upload_incomplete_shards: bool,
 
790
  hf_repo_id: str,
791
  hf_token: str,
792
  hf_private_repo: bool,
 
801
  shard_size_rows=shard_size_rows,
802
  enable_hf_upload=enable_hf_upload,
803
  upload_incomplete_shards=upload_incomplete_shards,
 
804
  hf_repo_id=hf_repo_id,
805
  hf_token=hf_token,
806
  hf_private_repo=hf_private_repo,
 
816
  shard_size_rows: int,
817
  enable_hf_upload: bool,
818
  upload_incomplete_shards: bool,
 
819
  hf_repo_id: str,
820
  hf_token: str,
821
  hf_private_repo: bool,
 
830
  shard_size_rows=shard_size_rows,
831
  enable_hf_upload=enable_hf_upload,
832
  upload_incomplete_shards=upload_incomplete_shards,
 
833
  hf_repo_id=hf_repo_id,
834
  hf_token=hf_token,
835
  hf_private_repo=hf_private_repo,
 
853
  return update, update, update, update, update
854
 
855
 
 
 
 
 
 
 
 
856
  def build_ui() -> gr.Blocks:
857
  defaults = CrawlerConfig(
858
  seed_urls=[
 
970
  value=False,
971
  visible=False,
972
  )
 
 
 
 
 
 
 
 
973
 
974
  with gr.Row():
975
  start_button = gr.Button("Start Crawl (12 Threads)", variant="primary")
 
989
  shard_size_rows,
990
  enable_hf_upload,
991
  upload_incomplete_shards,
 
992
  hf_repo_id,
993
  hf_token,
994
  hf_private_repo,
 
1012
  upload_incomplete_shards,
1013
  ],
1014
  )
 
 
 
 
 
 
 
 
 
 
1015
 
1016
  seed_urls_input.change(
1017
  fn=None,
 
1021
  )
1022
 
1023
  theme_name.change(fn=None, inputs=theme_name, outputs=[], js=THEME_JS)
1024
+ demo.load(fn=None, inputs=[], outputs=[], js=SETTING_HELP_JS)
1025
  demo.load(
1026
  fn=None,
1027
  inputs=[],
 
1035
  js=SEED_WIDGET_JS,
1036
  )
1037
  demo.load(fn=poll_dashboard, inputs=[], outputs=outputs)
1038
+ enable_hf_upload.change(fn=None, inputs=[], outputs=[], js=SETTING_HELP_JS)
1039
+ upload_incomplete_shards.change(fn=None, inputs=[], outputs=[], js=SETTING_HELP_JS)
1040
 
1041
  timer = gr.Timer(value=1.0)
1042
  timer.tick(fn=poll_dashboard, inputs=[], outputs=outputs)