Commit
·
2efc1aa
1
Parent(s):
d79693f
Adicionado descrição na seleção de categoria e explicação de maximo 2 cats.
Browse files
app.py
CHANGED
|
@@ -1,5 +1,5 @@
|
|
| 1 |
import math
|
| 2 |
-
|
| 3 |
import gradio as gr
|
| 4 |
import numpy as np
|
| 5 |
import pandas as pd
|
|
@@ -91,7 +91,12 @@ def train_model(categories):
|
|
| 91 |
)
|
| 92 |
|
| 93 |
random_search.fit(data_train.data, data_train.target)
|
| 94 |
-
best_parameters =
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 95 |
|
| 96 |
test_accuracy = random_search.score(data_test.data, data_test.target)
|
| 97 |
|
|
@@ -165,6 +170,15 @@ DESCRIPTION_PART2 = [
|
|
| 165 |
"[Classification of text documents using sparse features](https://scikit-learn.org/stable/auto_examples/text/plot_document_classification_20newsgroups.html#sphx-glr-auto-examples-text-plot-document-classification-20newsgroups-py) notebook.",
|
| 166 |
]
|
| 167 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 168 |
AUTHOR = """
|
| 169 |
Created by [@dominguesm](https://huggingface.co/dominguesm) based on [scikit-learn docs](https://scikit-learn.org/stable/auto_examples/model_selection/plot_grid_search_text_feature_extraction.html)
|
| 170 |
"""
|
|
@@ -181,12 +195,13 @@ with gr.Blocks(theme=gr.themes.Soft()) as app:
|
|
| 181 |
with gr.Row():
|
| 182 |
with gr.Column():
|
| 183 |
gr.Markdown("""## CATEGORY SELECTION""")
|
|
|
|
| 184 |
drop_categories = gr.Dropdown(
|
| 185 |
CATEGORIES,
|
| 186 |
value=["alt.atheism", "talk.religion.misc"],
|
| 187 |
multiselect=True,
|
| 188 |
label="Categories",
|
| 189 |
-
info="
|
| 190 |
max_choices=2,
|
| 191 |
interactive=True,
|
| 192 |
)
|
|
|
|
| 1 |
import math
|
| 2 |
+
import json
|
| 3 |
import gradio as gr
|
| 4 |
import numpy as np
|
| 5 |
import pandas as pd
|
|
|
|
| 91 |
)
|
| 92 |
|
| 93 |
random_search.fit(data_train.data, data_train.target)
|
| 94 |
+
best_parameters = json.dumps(
|
| 95 |
+
random_search.best_estimator_.get_params(),
|
| 96 |
+
indent=4,
|
| 97 |
+
sort_keys=True,
|
| 98 |
+
default=str,
|
| 99 |
+
)
|
| 100 |
|
| 101 |
test_accuracy = random_search.score(data_test.data, data_test.target)
|
| 102 |
|
|
|
|
| 170 |
"[Classification of text documents using sparse features](https://scikit-learn.org/stable/auto_examples/text/plot_document_classification_20newsgroups.html#sphx-glr-auto-examples-text-plot-document-classification-20newsgroups-py) notebook.",
|
| 171 |
]
|
| 172 |
|
| 173 |
+
CATEGORY_SELECTION_DESCRIPTION = [
|
| 174 |
+
"The task of text classification is easier when there is little overlap between the characteristic terms ",
|
| 175 |
+
"of different topics. This is because the presence of common terms can make it difficult to distinguish between ",
|
| 176 |
+
"different topics. On the other hand, when there is little overlap between the characteristic terms of different ",
|
| 177 |
+
"topics, the task of text classification becomes easier, as the unique terms of each topic provide a solid basis ",
|
| 178 |
+
"for accurately classifying the document into its respective category. Therefore, careful selection of characteristic",
|
| 179 |
+
" terms for each topic is crucial to ensure accuracy in text classification."
|
| 180 |
+
]
|
| 181 |
+
|
| 182 |
AUTHOR = """
|
| 183 |
Created by [@dominguesm](https://huggingface.co/dominguesm) based on [scikit-learn docs](https://scikit-learn.org/stable/auto_examples/model_selection/plot_grid_search_text_feature_extraction.html)
|
| 184 |
"""
|
|
|
|
| 195 |
with gr.Row():
|
| 196 |
with gr.Column():
|
| 197 |
gr.Markdown("""## CATEGORY SELECTION""")
|
| 198 |
+
gr.Markdown("".join(CATEGORY_SELECTION_DESCRIPTION))
|
| 199 |
drop_categories = gr.Dropdown(
|
| 200 |
CATEGORIES,
|
| 201 |
value=["alt.atheism", "talk.religion.misc"],
|
| 202 |
multiselect=True,
|
| 203 |
label="Categories",
|
| 204 |
+
info="Please select up to two categories that you want to receive training on.",
|
| 205 |
max_choices=2,
|
| 206 |
interactive=True,
|
| 207 |
)
|