Spaces:
Sleeping
Sleeping
Commit
·
bd9233d
1
Parent(s):
efbae10
add
Browse files
app.py
CHANGED
|
@@ -25,12 +25,11 @@ ARTICLE = r"""<center>
|
|
| 25 |
</center>"""
|
| 26 |
|
| 27 |
|
| 28 |
-
|
| 29 |
-
def load_data(file_path):
|
| 30 |
"""Load dataset (keep only 500 rows for efficiency)"""
|
| 31 |
-
data = pd.read_csv(
|
| 32 |
assert "text" in data.columns, "The data must have a column named 'text'"
|
| 33 |
-
return data
|
| 34 |
|
| 35 |
|
| 36 |
def run_nlp_processing(data):
|
|
@@ -147,7 +146,7 @@ with blocks:
|
|
| 147 |
topics_text = gr.Textbox(label="Topics", lines=50)
|
| 148 |
gr.Markdown(ARTICLE)
|
| 149 |
# event listeners
|
| 150 |
-
in_file = in_file.upload(inputs=in_file
|
| 151 |
# submit_button.click(inputs=in_data, outputs=out_dataset, fn=run_bert_tokenization)
|
| 152 |
# out_dataset.change(inputs=out_dataset, outputs=embedding_plot, fn=run_bertopic)
|
| 153 |
|
|
|
|
| 25 |
</center>"""
|
| 26 |
|
| 27 |
|
| 28 |
+
def load_data(fileobj):
|
|
|
|
| 29 |
"""Load dataset (keep only 500 rows for efficiency)"""
|
| 30 |
+
data = pd.read_csv(fileobj, on_bad_lines='skip', nrows=500)
|
| 31 |
assert "text" in data.columns, "The data must have a column named 'text'"
|
| 32 |
+
return data[['text']]
|
| 33 |
|
| 34 |
|
| 35 |
def run_nlp_processing(data):
|
|
|
|
| 146 |
topics_text = gr.Textbox(label="Topics", lines=50)
|
| 147 |
gr.Markdown(ARTICLE)
|
| 148 |
# event listeners
|
| 149 |
+
in_file = in_file.upload(inputs=in_file, outputs=in_data, fn=load_data)
|
| 150 |
# submit_button.click(inputs=in_data, outputs=out_dataset, fn=run_bert_tokenization)
|
| 151 |
# out_dataset.change(inputs=out_dataset, outputs=embedding_plot, fn=run_bertopic)
|
| 152 |
|