Spaces:
Build error
Build error
Pietro Lesci
commited on
Commit
·
8400e75
1
Parent(s):
11bd087
UI and disable preprocessing
Browse files- src/components.py +39 -35
src/components.py
CHANGED
|
@@ -7,25 +7,25 @@ from src.utils import get_col_indices
|
|
| 7 |
|
| 8 |
|
| 9 |
def form(df):
|
| 10 |
-
with st.form("
|
| 11 |
-
col1, col2 = st.columns(
|
|
|
|
|
|
|
| 12 |
with col1:
|
| 13 |
-
|
| 14 |
-
cols = [""] + df.columns.tolist()
|
| 15 |
-
text_index, label_index = get_col_indices(cols)
|
| 16 |
-
|
| 17 |
label_column = st.selectbox(
|
| 18 |
"Select label column",
|
| 19 |
cols,
|
| 20 |
index=label_index,
|
| 21 |
help="Select the column containing the labels",
|
| 22 |
)
|
|
|
|
| 23 |
text_column = st.selectbox(
|
| 24 |
"Select text column",
|
| 25 |
cols,
|
| 26 |
index=text_index,
|
| 27 |
help="Select the column containing the text",
|
| 28 |
)
|
|
|
|
| 29 |
language = st.selectbox(
|
| 30 |
"Select language",
|
| 31 |
[i.name for i in Languages],
|
|
@@ -35,41 +35,45 @@ def form(df):
|
|
| 35 |
""",
|
| 36 |
)
|
| 37 |
|
| 38 |
-
with
|
| 39 |
-
|
| 40 |
-
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
|
| 44 |
-
|
| 45 |
-
|
| 46 |
-
|
| 47 |
-
|
| 48 |
-
|
| 49 |
-
|
| 50 |
-
|
| 51 |
-
|
| 52 |
-
|
| 53 |
-
|
| 54 |
-
|
| 55 |
-
|
| 56 |
-
|
| 57 |
-
|
| 58 |
-
|
| 59 |
-
|
| 60 |
-
|
| 61 |
-
|
| 62 |
-
|
|
|
|
|
|
|
|
|
|
| 63 |
|
| 64 |
# Every form must have a submit button.
|
| 65 |
submitted = st.form_submit_button("Submit")
|
| 66 |
if submitted:
|
| 67 |
|
| 68 |
# preprocess
|
| 69 |
-
|
| 70 |
-
|
| 71 |
-
|
| 72 |
-
|
|
|
|
| 73 |
# prepare input
|
| 74 |
with st.spinner("Step 2/4: Preparing inputs"):
|
| 75 |
input_dict = input_transform(df[text_column], df[label_column])
|
|
|
|
| 7 |
|
| 8 |
|
| 9 |
def form(df):
|
| 10 |
+
with st.form("Wordify form"):
|
| 11 |
+
col1, col2, col3 = st.columns(3)
|
| 12 |
+
cols = [""] + df.columns.tolist()
|
| 13 |
+
text_index, label_index = get_col_indices(cols)
|
| 14 |
with col1:
|
|
|
|
|
|
|
|
|
|
|
|
|
| 15 |
label_column = st.selectbox(
|
| 16 |
"Select label column",
|
| 17 |
cols,
|
| 18 |
index=label_index,
|
| 19 |
help="Select the column containing the labels",
|
| 20 |
)
|
| 21 |
+
with col2:
|
| 22 |
text_column = st.selectbox(
|
| 23 |
"Select text column",
|
| 24 |
cols,
|
| 25 |
index=text_index,
|
| 26 |
help="Select the column containing the text",
|
| 27 |
)
|
| 28 |
+
with col3:
|
| 29 |
language = st.selectbox(
|
| 30 |
"Select language",
|
| 31 |
[i.name for i in Languages],
|
|
|
|
| 35 |
""",
|
| 36 |
)
|
| 37 |
|
| 38 |
+
with st.expander("Advanced Options"):
|
| 39 |
+
disable_preprocessing = st.checkbox("Disable Preprocessing", False)
|
| 40 |
+
|
| 41 |
+
if not disable_preprocessing:
|
| 42 |
+
steps_options = list(PreprocessingPipeline.pipeline_components().keys())
|
| 43 |
+
pre_steps = st.multiselect(
|
| 44 |
+
"Select pre-lemmatization processing steps (ordered)",
|
| 45 |
+
options=steps_options,
|
| 46 |
+
default=[steps_options[i] for i in PreprocessingConfigs.DEFAULT_PRE.value],
|
| 47 |
+
format_func=lambda x: x.replace("_", " ").title(),
|
| 48 |
+
help="Select the processing steps to apply before the text is lemmatized",
|
| 49 |
+
)
|
| 50 |
+
|
| 51 |
+
lammatization_options = list(PreprocessingPipeline.lemmatization_component().keys())
|
| 52 |
+
lemmatization_step = st.selectbox(
|
| 53 |
+
"Select lemmatization",
|
| 54 |
+
options=lammatization_options,
|
| 55 |
+
index=PreprocessingConfigs.DEFAULT_LEMMA.value,
|
| 56 |
+
help="Select lemmatization procedure",
|
| 57 |
+
)
|
| 58 |
+
|
| 59 |
+
post_steps = st.multiselect(
|
| 60 |
+
"Select post-lemmatization processing steps (ordered)",
|
| 61 |
+
options=steps_options,
|
| 62 |
+
default=[steps_options[i] for i in PreprocessingConfigs.DEFAULT_POST.value],
|
| 63 |
+
format_func=lambda x: x.replace("_", " ").title(),
|
| 64 |
+
help="Select the processing steps to apply after the text is lemmatized",
|
| 65 |
+
)
|
| 66 |
|
| 67 |
# Every form must have a submit button.
|
| 68 |
submitted = st.form_submit_button("Submit")
|
| 69 |
if submitted:
|
| 70 |
|
| 71 |
# preprocess
|
| 72 |
+
if not disable_preprocessing:
|
| 73 |
+
with st.spinner("Step 1/4: Preprocessing text"):
|
| 74 |
+
pipe = PreprocessingPipeline(language, pre_steps, lemmatization_step, post_steps)
|
| 75 |
+
df = pipe.vaex_process(df, text_column)
|
| 76 |
+
|
| 77 |
# prepare input
|
| 78 |
with st.spinner("Step 2/4: Preparing inputs"):
|
| 79 |
input_dict = input_transform(df[text_column], df[label_column])
|