Spaces:
Build error
Build error
Pietro Lesci
commited on
Commit
·
c718eb8
1
Parent(s):
e8a4a19
update
Browse files- src/configs.py +1 -1
- src/pages/home.py +7 -1
- src/preprocessing.py +1 -1
src/configs.py
CHANGED
|
@@ -33,4 +33,4 @@ class Languages(Enum):
|
|
| 33 |
class SupportedFiles(Enum):
|
| 34 |
xlsx = (lambda x: pd.read_excel(x, dtype=str),)
|
| 35 |
csv = (lambda x: pd.read_csv(x, dtype=str),)
|
| 36 |
-
parquet = (lambda x: pd.read_parquet(x
|
|
|
|
| 33 |
class SupportedFiles(Enum):
|
| 34 |
xlsx = (lambda x: pd.read_excel(x, dtype=str),)
|
| 35 |
csv = (lambda x: pd.read_csv(x, dtype=str),)
|
| 36 |
+
parquet = (lambda x: pd.read_parquet(x),)
|
src/pages/home.py
CHANGED
|
@@ -108,7 +108,7 @@ def write(session, uploaded_file):
|
|
| 108 |
pre_steps = pre_steps_elem.multiselect(
|
| 109 |
"Select pre-lemmatization preprocessing steps (ordered)",
|
| 110 |
options=steps_options,
|
| 111 |
-
default=steps_options
|
| 112 |
format_func=lambda x: x.replace("_", " ").title(),
|
| 113 |
key=session.run_id,
|
| 114 |
)
|
|
@@ -146,6 +146,8 @@ def write(session, uploaded_file):
|
|
| 146 |
post_steps=post_steps,
|
| 147 |
)
|
| 148 |
|
|
|
|
|
|
|
| 149 |
# ==== 3. PROVIDE FEEDBACK ON OPTIONS ==== #
|
| 150 |
if show_sample and not (label_column and text_column):
|
| 151 |
st.warning("Please select `label` and `text` columns")
|
|
@@ -155,6 +157,8 @@ def write(session, uploaded_file):
|
|
| 155 |
sample_data[f"preprocessed_{text_column}"] = preprocessing_pipeline(
|
| 156 |
sample_data[text_column]
|
| 157 |
).values
|
|
|
|
|
|
|
| 158 |
st.table(
|
| 159 |
sample_data.loc[
|
| 160 |
:, [label_column, text_column, f"preprocessed_{text_column}"]
|
|
@@ -174,6 +178,8 @@ def write(session, uploaded_file):
|
|
| 174 |
data[text_column]
|
| 175 |
).values
|
| 176 |
|
|
|
|
|
|
|
| 177 |
inputs = encode(data[f"preprocessed_{text_column}"], data[label_column])
|
| 178 |
session.posdf, session.negdf = wordifier(**inputs)
|
| 179 |
st.success("Wordified!")
|
|
|
|
| 108 |
pre_steps = pre_steps_elem.multiselect(
|
| 109 |
"Select pre-lemmatization preprocessing steps (ordered)",
|
| 110 |
options=steps_options,
|
| 111 |
+
default=steps_options,
|
| 112 |
format_func=lambda x: x.replace("_", " ").title(),
|
| 113 |
key=session.run_id,
|
| 114 |
)
|
|
|
|
| 146 |
post_steps=post_steps,
|
| 147 |
)
|
| 148 |
|
| 149 |
+
print(preprocessing_pipeline.pre_steps)
|
| 150 |
+
|
| 151 |
# ==== 3. PROVIDE FEEDBACK ON OPTIONS ==== #
|
| 152 |
if show_sample and not (label_column and text_column):
|
| 153 |
st.warning("Please select `label` and `text` columns")
|
|
|
|
| 157 |
sample_data[f"preprocessed_{text_column}"] = preprocessing_pipeline(
|
| 158 |
sample_data[text_column]
|
| 159 |
).values
|
| 160 |
+
|
| 161 |
+
print(sample_data)
|
| 162 |
st.table(
|
| 163 |
sample_data.loc[
|
| 164 |
:, [label_column, text_column, f"preprocessed_{text_column}"]
|
|
|
|
| 178 |
data[text_column]
|
| 179 |
).values
|
| 180 |
|
| 181 |
+
print(data.head())
|
| 182 |
+
|
| 183 |
inputs = encode(data[f"preprocessed_{text_column}"], data[label_column])
|
| 184 |
session.posdf, session.negdf = wordifier(**inputs)
|
| 185 |
st.success("Wordified!")
|
src/preprocessing.py
CHANGED
|
@@ -115,7 +115,7 @@ class Lemmatizer:
|
|
| 115 |
elif remove_stop and not lemmatization:
|
| 116 |
|
| 117 |
def lemmatizer_fn(doc: spacy.tokens.doc.Doc) -> str:
|
| 118 |
-
return " ".join([t for t in doc if not t.is_stop])
|
| 119 |
|
| 120 |
elif lemmatization and not remove_stop:
|
| 121 |
|
|
|
|
| 115 |
elif remove_stop and not lemmatization:
|
| 116 |
|
| 117 |
def lemmatizer_fn(doc: spacy.tokens.doc.Doc) -> str:
|
| 118 |
+
return " ".join([t.text for t in doc if not t.is_stop])
|
| 119 |
|
| 120 |
elif lemmatization and not remove_stop:
|
| 121 |
|