chrissoria Claude commited on
Commit
7165ab2
Β·
1 Parent(s): 1c1f244

Add 5 UX improvements: progress indicator, example dataset, better placeholders, dark mode, large file warning

Browse files

- Convert classify_data() to generator for real-time progress updates during classification
- Add "Try Example Dataset" button with sample survey responses
- Update category placeholder text with diverse, helpful examples
- Add dark mode support via gr.themes.Soft()
- Show warning for datasets > 1000 rows with time estimate

πŸ€– Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>

Files changed (2) hide show
  1. app.py +83 -21
  2. example_data.csv +5 -0
app.py CHANGED
@@ -399,6 +399,21 @@ def get_model_source(model):
399
  return "huggingface"
400
 
401
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
402
  def load_columns(file):
403
  if file is None:
404
  return gr.update(choices=[], value=None), "Please upload a file first"
@@ -411,9 +426,18 @@ def load_columns(file):
411
  df = pd.read_excel(file_path)
412
 
413
  columns = df.columns.tolist()
 
 
 
 
 
 
 
 
 
414
  return (
415
  gr.update(choices=columns, value=columns[0] if columns else None),
416
- f"Loaded {len(df)} rows. Select column and click Classify."
417
  )
418
  except Exception as e:
419
  return gr.update(choices=[], value=None), f"**Error:** {str(e)}"
@@ -422,15 +446,17 @@ def load_columns(file):
422
  def classify_data(spreadsheet_file, spreadsheet_column,
423
  cat1, cat2, cat3, cat4, cat5, cat6, cat7, cat8, cat9, cat10,
424
  model_tier, model, model_source_input, api_key_input):
425
- """Main classification function. Returns distribution, samples, full results, files, and status."""
426
  if not CATLLM_AVAILABLE:
427
- return None, None, None, None, "**Error:** catllm package not available"
 
428
 
429
  all_cats = [cat1, cat2, cat3, cat4, cat5, cat6, cat7, cat8, cat9, cat10]
430
  categories = [c.strip() for c in all_cats if c and c.strip()]
431
 
432
  if not categories:
433
- return None, None, None, None, "**Error:** Please enter at least one category"
 
434
 
435
  actual_model = model
436
 
@@ -440,31 +466,38 @@ def classify_data(spreadsheet_file, spreadsheet_column,
440
  if model in HF_ROUTED_MODELS:
441
  actual_api_key = os.environ.get("HF_API_KEY", "")
442
  if not actual_api_key:
443
- return None, None, None, None, "**Error:** HuggingFace API key not configured in Space secrets"
 
444
  elif "gpt" in model.lower():
445
  actual_api_key = os.environ.get("OPENAI_API_KEY", "")
446
  if not actual_api_key:
447
- return None, None, None, None, "**Error:** OpenAI API key not configured in Space secrets"
 
448
  elif "gemini" in model.lower():
449
  actual_api_key = os.environ.get("GOOGLE_API_KEY", "")
450
  if not actual_api_key:
451
- return None, None, None, None, "**Error:** Google API key not configured in Space secrets"
 
452
  elif "mistral" in model.lower():
453
  actual_api_key = os.environ.get("MISTRAL_API_KEY", "")
454
  if not actual_api_key:
455
- return None, None, None, None, "**Error:** Mistral API key not configured in Space secrets"
 
456
  elif "claude" in model.lower():
457
  actual_api_key = os.environ.get("ANTHROPIC_API_KEY", "")
458
  if not actual_api_key:
459
- return None, None, None, None, "**Error:** Anthropic API key not configured in Space secrets"
 
460
  elif "sonar" in model.lower():
461
  actual_api_key = os.environ.get("PERPLEXITY_API_KEY", "")
462
  if not actual_api_key:
463
- return None, None, None, None, "**Error:** Perplexity API key not configured in Space secrets"
 
464
  elif "grok" in model.lower():
465
  actual_api_key = os.environ.get("XAI_API_KEY", "")
466
  if not actual_api_key:
467
- return None, None, None, None, "**Error:** xAI API key not configured in Space secrets"
 
468
  else:
469
  actual_api_key = os.environ.get("HF_API_KEY", "")
470
  else:
@@ -472,7 +505,8 @@ def classify_data(spreadsheet_file, spreadsheet_column,
472
  if api_key_input and api_key_input.strip():
473
  actual_api_key = api_key_input.strip()
474
  else:
475
- return None, None, None, None, f"**Error:** Please provide your API key for {model}"
 
476
 
477
  # Use user-selected model_source, or auto-detect if "auto"
478
  if model_source_input == "auto":
@@ -482,9 +516,11 @@ def classify_data(spreadsheet_file, spreadsheet_column,
482
 
483
  try:
484
  if not spreadsheet_file:
485
- return None, None, None, None, "**Error:** Please upload a file"
 
486
  if not spreadsheet_column:
487
- return None, None, None, None, "**Error:** Please select a column to classify"
 
488
 
489
  file_path = spreadsheet_file if isinstance(spreadsheet_file, str) else spreadsheet_file.name
490
  if file_path.endswith('.csv'):
@@ -493,10 +529,14 @@ def classify_data(spreadsheet_file, spreadsheet_column,
493
  df = pd.read_excel(file_path)
494
 
495
  if spreadsheet_column not in df.columns:
496
- return None, None, None, None, f"**Error:** Column '{spreadsheet_column}' not found"
 
497
 
498
  input_data = df[spreadsheet_column].tolist()
499
 
 
 
 
500
  # Calculate data quality metrics before classification
501
  text_series = df[spreadsheet_column].dropna().astype(str)
502
  data_quality = {
@@ -507,6 +547,9 @@ def classify_data(spreadsheet_file, spreadsheet_column,
507
  'error_count': 0 # Will be updated after classification
508
  }
509
 
 
 
 
510
  # Capture timing
511
  start_time = time.time()
512
 
@@ -557,6 +600,9 @@ Provide your work in JSON format where the number belonging to each category is
557
  catllm_version = "unknown"
558
  python_version = sys.version.split()[0]
559
 
 
 
 
560
  # Generate PDF methodology report with all new data
561
  pdf_path = generate_methodology_report_pdf(
562
  categories=categories,
@@ -624,17 +670,17 @@ Provide your work in JSON format where the number belonging to each category is
624
  })
625
  sample_df = pd.DataFrame(sample_data)
626
 
627
- # Return: distribution plot (visible), samples (visible), full results (visible), files, status
628
- return (
629
  gr.update(value=distribution_fig, visible=True),
630
  gr.update(value=sample_df, visible=True),
631
  gr.update(value=result, visible=True),
632
  [csv_path, pdf_path],
633
- f"**Success!** Classified {len(input_data)} responses in {processing_time:.1f}s"
634
  )
635
 
636
  except Exception as e:
637
- return None, None, None, None, f"**Error:** {str(e)}"
638
 
639
 
640
  def add_category_field(current_count):
@@ -728,7 +774,7 @@ result.to_csv("classified_results.csv", index=False)
728
  return gr.update(value=code, visible=True)
729
 
730
 
731
- with gr.Blocks(title="CatLLM - Survey Response Classifier") as demo:
732
  gr.Image("logo.png", show_label=False, show_download_button=False, height=100, container=False)
733
  gr.Markdown("# CatLLM - Survey Response Classifier")
734
  gr.Markdown("Classify survey responses into custom categories using LLMs.")
@@ -772,6 +818,7 @@ https://github.com/chrissoria/cat-llm
772
  label="Upload Survey Data (CSV or Excel)",
773
  file_types=[".csv", ".xlsx", ".xls"]
774
  )
 
775
 
776
  spreadsheet_column = gr.Dropdown(
777
  label="Column to Classify",
@@ -781,11 +828,20 @@ https://github.com/chrissoria/cat-llm
781
 
782
  gr.Markdown("### Categories")
783
  category_inputs = []
 
 
 
 
 
 
 
 
784
  for i in range(MAX_CATEGORIES):
785
  visible = i < INITIAL_CATEGORIES
 
786
  cat_input = gr.Textbox(
787
  label=f"Category {i+1}",
788
- placeholder=f"e.g., {'Positive' if i==0 else 'Negative' if i==1 else 'Neutral'}",
789
  visible=visible
790
  )
791
  category_inputs.append(cat_input)
@@ -872,6 +928,12 @@ https://github.com/chrissoria/cat-llm
872
  outputs=[spreadsheet_column, status]
873
  )
874
 
 
 
 
 
 
 
875
  add_category_btn.click(
876
  fn=add_category_field,
877
  inputs=[category_count],
 
399
  return "huggingface"
400
 
401
 
402
+ def load_example_dataset():
403
+ """Load the example dataset for users to try the app."""
404
+ example_path = "example_data.csv"
405
+ try:
406
+ df = pd.read_csv(example_path)
407
+ columns = df.columns.tolist()
408
+ return (
409
+ example_path, # file path
410
+ gr.update(choices=columns, value=columns[0] if columns else None), # column dropdown
411
+ f"Loaded example dataset ({len(df)} rows). Select column and click Classify." # status
412
+ )
413
+ except Exception as e:
414
+ return None, gr.update(choices=[], value=None), f"**Error loading example:** {str(e)}"
415
+
416
+
417
  def load_columns(file):
418
  if file is None:
419
  return gr.update(choices=[], value=None), "Please upload a file first"
 
426
  df = pd.read_excel(file_path)
427
 
428
  columns = df.columns.tolist()
429
+ num_rows = len(df)
430
+
431
+ # Warning for large datasets
432
+ if num_rows > 1000:
433
+ est_minutes = round(num_rows * 1.5 / 60) # ~1.5 seconds per row estimate
434
+ status_msg = f"⚠️ **Large dataset** ({num_rows:,} rows). Classification may take ~{est_minutes} minutes. Select column and click Classify."
435
+ else:
436
+ status_msg = f"Loaded {num_rows:,} rows. Select column and click Classify."
437
+
438
  return (
439
  gr.update(choices=columns, value=columns[0] if columns else None),
440
+ status_msg
441
  )
442
  except Exception as e:
443
  return gr.update(choices=[], value=None), f"**Error:** {str(e)}"
 
446
  def classify_data(spreadsheet_file, spreadsheet_column,
447
  cat1, cat2, cat3, cat4, cat5, cat6, cat7, cat8, cat9, cat10,
448
  model_tier, model, model_source_input, api_key_input):
449
+ """Main classification function with progress updates. Yields status updates then final results."""
450
  if not CATLLM_AVAILABLE:
451
+ yield None, None, None, None, "**Error:** catllm package not available"
452
+ return
453
 
454
  all_cats = [cat1, cat2, cat3, cat4, cat5, cat6, cat7, cat8, cat9, cat10]
455
  categories = [c.strip() for c in all_cats if c and c.strip()]
456
 
457
  if not categories:
458
+ yield None, None, None, None, "**Error:** Please enter at least one category"
459
+ return
460
 
461
  actual_model = model
462
 
 
466
  if model in HF_ROUTED_MODELS:
467
  actual_api_key = os.environ.get("HF_API_KEY", "")
468
  if not actual_api_key:
469
+ yield None, None, None, None, "**Error:** HuggingFace API key not configured in Space secrets"
470
+ return
471
  elif "gpt" in model.lower():
472
  actual_api_key = os.environ.get("OPENAI_API_KEY", "")
473
  if not actual_api_key:
474
+ yield None, None, None, None, "**Error:** OpenAI API key not configured in Space secrets"
475
+ return
476
  elif "gemini" in model.lower():
477
  actual_api_key = os.environ.get("GOOGLE_API_KEY", "")
478
  if not actual_api_key:
479
+ yield None, None, None, None, "**Error:** Google API key not configured in Space secrets"
480
+ return
481
  elif "mistral" in model.lower():
482
  actual_api_key = os.environ.get("MISTRAL_API_KEY", "")
483
  if not actual_api_key:
484
+ yield None, None, None, None, "**Error:** Mistral API key not configured in Space secrets"
485
+ return
486
  elif "claude" in model.lower():
487
  actual_api_key = os.environ.get("ANTHROPIC_API_KEY", "")
488
  if not actual_api_key:
489
+ yield None, None, None, None, "**Error:** Anthropic API key not configured in Space secrets"
490
+ return
491
  elif "sonar" in model.lower():
492
  actual_api_key = os.environ.get("PERPLEXITY_API_KEY", "")
493
  if not actual_api_key:
494
+ yield None, None, None, None, "**Error:** Perplexity API key not configured in Space secrets"
495
+ return
496
  elif "grok" in model.lower():
497
  actual_api_key = os.environ.get("XAI_API_KEY", "")
498
  if not actual_api_key:
499
+ yield None, None, None, None, "**Error:** xAI API key not configured in Space secrets"
500
+ return
501
  else:
502
  actual_api_key = os.environ.get("HF_API_KEY", "")
503
  else:
 
505
  if api_key_input and api_key_input.strip():
506
  actual_api_key = api_key_input.strip()
507
  else:
508
+ yield None, None, None, None, f"**Error:** Please provide your API key for {model}"
509
+ return
510
 
511
  # Use user-selected model_source, or auto-detect if "auto"
512
  if model_source_input == "auto":
 
516
 
517
  try:
518
  if not spreadsheet_file:
519
+ yield None, None, None, None, "**Error:** Please upload a file"
520
+ return
521
  if not spreadsheet_column:
522
+ yield None, None, None, None, "**Error:** Please select a column to classify"
523
+ return
524
 
525
  file_path = spreadsheet_file if isinstance(spreadsheet_file, str) else spreadsheet_file.name
526
  if file_path.endswith('.csv'):
 
529
  df = pd.read_excel(file_path)
530
 
531
  if spreadsheet_column not in df.columns:
532
+ yield None, None, None, None, f"**Error:** Column '{spreadsheet_column}' not found"
533
+ return
534
 
535
  input_data = df[spreadsheet_column].tolist()
536
 
537
+ # Progress update: data loaded
538
+ yield None, None, None, None, f"⏳ **Loading data...** Found {len(input_data)} responses to classify."
539
+
540
  # Calculate data quality metrics before classification
541
  text_series = df[spreadsheet_column].dropna().astype(str)
542
  data_quality = {
 
547
  'error_count': 0 # Will be updated after classification
548
  }
549
 
550
+ # Progress update: starting classification
551
+ yield None, None, None, None, f"πŸ”„ **Classifying {len(input_data)} responses...** This may take a moment."
552
+
553
  # Capture timing
554
  start_time = time.time()
555
 
 
600
  catllm_version = "unknown"
601
  python_version = sys.version.split()[0]
602
 
603
+ # Progress update: generating report
604
+ yield None, None, None, None, f"πŸ“„ **Generating methodology report...** Classification complete in {processing_time:.1f}s."
605
+
606
  # Generate PDF methodology report with all new data
607
  pdf_path = generate_methodology_report_pdf(
608
  categories=categories,
 
670
  })
671
  sample_df = pd.DataFrame(sample_data)
672
 
673
+ # Final yield: distribution plot (visible), samples (visible), full results (visible), files, status
674
+ yield (
675
  gr.update(value=distribution_fig, visible=True),
676
  gr.update(value=sample_df, visible=True),
677
  gr.update(value=result, visible=True),
678
  [csv_path, pdf_path],
679
+ f"βœ… **Success!** Classified {len(input_data)} responses in {processing_time:.1f}s"
680
  )
681
 
682
  except Exception as e:
683
+ yield None, None, None, None, f"**Error:** {str(e)}"
684
 
685
 
686
  def add_category_field(current_count):
 
774
  return gr.update(value=code, visible=True)
775
 
776
 
777
+ with gr.Blocks(title="CatLLM - Survey Response Classifier", theme=gr.themes.Soft()) as demo:
778
  gr.Image("logo.png", show_label=False, show_download_button=False, height=100, container=False)
779
  gr.Markdown("# CatLLM - Survey Response Classifier")
780
  gr.Markdown("Classify survey responses into custom categories using LLMs.")
 
818
  label="Upload Survey Data (CSV or Excel)",
819
  file_types=[".csv", ".xlsx", ".xls"]
820
  )
821
+ example_btn = gr.Button("πŸ“‹ Try Example Dataset", variant="secondary", size="sm")
822
 
823
  spreadsheet_column = gr.Dropdown(
824
  label="Column to Classify",
 
828
 
829
  gr.Markdown("### Categories")
830
  category_inputs = []
831
+ placeholder_examples = [
832
+ "e.g., Positive sentiment",
833
+ "e.g., Negative sentiment",
834
+ "e.g., Product feedback",
835
+ "e.g., Service complaint",
836
+ "e.g., Feature request",
837
+ "e.g., Custom category"
838
+ ]
839
  for i in range(MAX_CATEGORIES):
840
  visible = i < INITIAL_CATEGORIES
841
+ placeholder = placeholder_examples[i] if i < len(placeholder_examples) else "e.g., Custom category"
842
  cat_input = gr.Textbox(
843
  label=f"Category {i+1}",
844
+ placeholder=placeholder,
845
  visible=visible
846
  )
847
  category_inputs.append(cat_input)
 
928
  outputs=[spreadsheet_column, status]
929
  )
930
 
931
+ example_btn.click(
932
+ fn=load_example_dataset,
933
+ inputs=[],
934
+ outputs=[spreadsheet_file, spreadsheet_column, status]
935
+ )
936
+
937
  add_category_btn.click(
938
  fn=add_category_field,
939
  inputs=[category_count],
example_data.csv ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ Response,
2
+ I wanted to live in San Diego,
3
+ I really hated my apartment,
4
+ My grandparents needed me to live nearby ,
5
+ "Tony, my husband, got a new job at UC Berkeley",