Spaces:

sklearn-docs
/

text-feature-extraction-evaluation

Sleeping

App Files Files Community

dominguesm commited on Apr 27, 2023

Commit

2efc1aa

1 Parent(s): d79693f

Adicionado descrição na seleção de categoria e explicação de maximo 2 cats.

Browse files

Files changed (1) hide show

app.py +18 -3

app.py CHANGED Viewed

@@ -1,5 +1,5 @@
 import math
 import gradio as gr
 import numpy as np
 import pandas as pd
@@ -91,7 +91,12 @@ def train_model(categories):
     )
     random_search.fit(data_train.data, data_train.target)
-    best_parameters = random_search.best_estimator_.get_params()
     test_accuracy = random_search.score(data_test.data, data_test.target)
@@ -165,6 +170,15 @@ DESCRIPTION_PART2 = [
     "[Classification of text documents using sparse features](https://scikit-learn.org/stable/auto_examples/text/plot_document_classification_20newsgroups.html#sphx-glr-auto-examples-text-plot-document-classification-20newsgroups-py) notebook.",
 ]
 AUTHOR = """
 Created by [@dominguesm](https://huggingface.co/dominguesm) based on [scikit-learn docs](https://scikit-learn.org/stable/auto_examples/model_selection/plot_grid_search_text_feature_extraction.html)
 """
@@ -181,12 +195,13 @@ with gr.Blocks(theme=gr.themes.Soft()) as app:
     with gr.Row():
         with gr.Column():
             gr.Markdown("""## CATEGORY SELECTION""")
             drop_categories = gr.Dropdown(
                 CATEGORIES,
                 value=["alt.atheism", "talk.religion.misc"],
                 multiselect=True,
                 label="Categories",
-                info="Select the categories you want to train on.",
                 max_choices=2,
                 interactive=True,
             )

 import math
+import json
 import gradio as gr
 import numpy as np
 import pandas as pd
     )
     random_search.fit(data_train.data, data_train.target)
+    best_parameters = json.dumps(
+        random_search.best_estimator_.get_params(),
+        indent=4,
+        sort_keys=True,
+        default=str,
+    )
     test_accuracy = random_search.score(data_test.data, data_test.target)
     "[Classification of text documents using sparse features](https://scikit-learn.org/stable/auto_examples/text/plot_document_classification_20newsgroups.html#sphx-glr-auto-examples-text-plot-document-classification-20newsgroups-py) notebook.",
 ]
+CATEGORY_SELECTION_DESCRIPTION = [
+    "The task of text classification is easier when there is little overlap between the characteristic terms ",
+    "of different topics. This is because the presence of common terms can make it difficult to distinguish between ",
+    "different topics. On the other hand, when there is little overlap between the characteristic terms of different ",
+    "topics, the task of text classification becomes easier, as the unique terms of each topic provide a solid basis ",
+    "for accurately classifying the document into its respective category. Therefore, careful selection of characteristic",
+    " terms for each topic is crucial to ensure accuracy in text classification."
+]
 AUTHOR = """
 Created by [@dominguesm](https://huggingface.co/dominguesm) based on [scikit-learn docs](https://scikit-learn.org/stable/auto_examples/model_selection/plot_grid_search_text_feature_extraction.html)
 """
     with gr.Row():
         with gr.Column():
             gr.Markdown("""## CATEGORY SELECTION""")
+            gr.Markdown("".join(CATEGORY_SELECTION_DESCRIPTION))
             drop_categories = gr.Dropdown(
                 CATEGORIES,
                 value=["alt.atheism", "talk.religion.misc"],
                 multiselect=True,
                 label="Categories",
+                info="Please select up to two categories that you want to receive training on.",
                 max_choices=2,
                 interactive=True,
             )