Spaces:
Runtime error
Runtime error
Commit ·
cfa89f0
1
Parent(s): 294751b
Update app.py
Browse files
app.py
CHANGED
|
@@ -3,6 +3,7 @@ import pandas as pd
|
|
| 3 |
import numpy as np
|
| 4 |
from transformers import AutoTokenizer, AutoModelForSequenceClassification, Trainer
|
| 5 |
# summary function - test for single gradio function interface
|
|
|
|
| 6 |
def bulk_function(filename):
|
| 7 |
# Create class for data preparation
|
| 8 |
class SimpleDataset:
|
|
@@ -20,18 +21,25 @@ def bulk_function(filename):
|
|
| 20 |
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
| 21 |
model = AutoModelForSequenceClassification.from_pretrained(model_name)
|
| 22 |
trainer = Trainer(model=model)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 23 |
|
| 24 |
# read file lines
|
| 25 |
with open(filename.name, "r") as f:
|
| 26 |
lines = f.readlines()
|
| 27 |
# expects unnamed:0 or index, col name -> strip both
|
| 28 |
lines_s = [item.split("\n")[0].split(",")[-1] for item in lines]
|
|
|
|
|
|
|
|
|
|
| 29 |
|
| 30 |
# Tokenize texts and create prediction data set
|
| 31 |
tokenized_texts = tokenizer(lines_s,truncation=True,padding=True)
|
| 32 |
pred_dataset = SimpleDataset(tokenized_texts)
|
| 33 |
|
| 34 |
-
# Run predictions
|
| 35 |
predictions = trainer.predict(pred_dataset)
|
| 36 |
|
| 37 |
# Transform predictions to labels
|
|
@@ -65,11 +73,8 @@ def bulk_function(filename):
|
|
| 65 |
df = pd.DataFrame(list(zip(lines_s,preds,labels,scores, anger, disgust, fear, joy, neutral, sadness, surprise)), columns=['text','pred','label','score', 'anger', 'disgust', 'fear', 'joy', 'neutral', 'sadness', 'surprise'])
|
| 66 |
|
| 67 |
# save results to csv
|
| 68 |
-
YOUR_FILENAME = "
|
| 69 |
df.to_csv(YOUR_FILENAME)
|
| 70 |
|
| 71 |
# return dataframe for space output
|
| 72 |
-
return df
|
| 73 |
-
# launch space
|
| 74 |
-
gr.Interface(bulk_function, [gr.inputs.File(file_count="single", type="file", label="str", optional=False),], "dataframe",
|
| 75 |
-
).launch()
|
|
|
|
| 3 |
import numpy as np
|
| 4 |
from transformers import AutoTokenizer, AutoModelForSequenceClassification, Trainer
|
| 5 |
# summary function - test for single gradio function interface
|
| 6 |
+
# summary function - test for single gradio function interfrace
|
| 7 |
def bulk_function(filename):
|
| 8 |
# Create class for data preparation
|
| 9 |
class SimpleDataset:
|
|
|
|
| 21 |
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
| 22 |
model = AutoModelForSequenceClassification.from_pretrained(model_name)
|
| 23 |
trainer = Trainer(model=model)
|
| 24 |
+
print(filename, type(filename))
|
| 25 |
+
print(filename.name)
|
| 26 |
+
|
| 27 |
+
|
| 28 |
|
| 29 |
# read file lines
|
| 30 |
with open(filename.name, "r") as f:
|
| 31 |
lines = f.readlines()
|
| 32 |
# expects unnamed:0 or index, col name -> strip both
|
| 33 |
lines_s = [item.split("\n")[0].split(",")[-1] for item in lines]
|
| 34 |
+
print(lines_s)
|
| 35 |
+
print(filename)
|
| 36 |
+
|
| 37 |
|
| 38 |
# Tokenize texts and create prediction data set
|
| 39 |
tokenized_texts = tokenizer(lines_s,truncation=True,padding=True)
|
| 40 |
pred_dataset = SimpleDataset(tokenized_texts)
|
| 41 |
|
| 42 |
+
# Run predictions -> predict whole df
|
| 43 |
predictions = trainer.predict(pred_dataset)
|
| 44 |
|
| 45 |
# Transform predictions to labels
|
|
|
|
| 73 |
df = pd.DataFrame(list(zip(lines_s,preds,labels,scores, anger, disgust, fear, joy, neutral, sadness, surprise)), columns=['text','pred','label','score', 'anger', 'disgust', 'fear', 'joy', 'neutral', 'sadness', 'surprise'])
|
| 74 |
|
| 75 |
# save results to csv
|
| 76 |
+
YOUR_FILENAME = filename.name.split(".")[0] + "_emotion_predictions" + ".csv" # name your output file
|
| 77 |
df.to_csv(YOUR_FILENAME)
|
| 78 |
|
| 79 |
# return dataframe for space output
|
| 80 |
+
return df
|
|
|
|
|
|
|
|
|