Update app.py
Browse files
app.py
CHANGED
|
@@ -22,11 +22,10 @@ emotion_labels = ["admiration", "amusement", "anger", "annoyance", "approval",
|
|
| 22 |
"neutral"]
|
| 23 |
|
| 24 |
# Function to classify emotions in batches
|
| 25 |
-
def classify_emotions_in_batches(texts, batch_size=64):
|
| 26 |
results = []
|
| 27 |
start_time = time.time()
|
| 28 |
-
|
| 29 |
-
for i in range(0, num_batches * batch_size, batch_size):
|
| 30 |
batch = texts[i:i+batch_size]
|
| 31 |
inputs = tokenizer(batch, return_tensors="pt", truncation=True, padding=True).to(device)
|
| 32 |
with torch.no_grad():
|
|
@@ -39,7 +38,9 @@ def classify_emotions_in_batches(texts, batch_size=64):
|
|
| 39 |
batch_time = time.time() - start_time
|
| 40 |
st.write(f"Processed batch {i//batch_size + 1} of {num_batches} in {batch_time:.2f} seconds")
|
| 41 |
start_time = time.time()
|
| 42 |
-
|
|
|
|
|
|
|
| 43 |
|
| 44 |
# Streamlit interface
|
| 45 |
st.title("Enron Emails Emotion Analysis")
|
|
@@ -54,8 +55,10 @@ if st.button("Run Inference"):
|
|
| 54 |
# Apply emotion classification to the email content
|
| 55 |
with st.spinner('Running inference...'):
|
| 56 |
email_texts = enron_data['body'].tolist()
|
| 57 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 58 |
|
| 59 |
-
|
| 60 |
-
enron_data.to_csv("enron_emails_with_emotions.csv", index=False)
|
| 61 |
-
st.success("Inference completed and results saved!")
|
|
|
|
| 22 |
"neutral"]
|
| 23 |
|
| 24 |
# Function to classify emotions in batches
|
| 25 |
+
def classify_emotions_in_batches(texts, batch_size=64, num_batches=20):
|
| 26 |
results = []
|
| 27 |
start_time = time.time()
|
| 28 |
+
for i in range(0, min(num_batches * batch_size, len(texts)), batch_size):
|
|
|
|
| 29 |
batch = texts[i:i+batch_size]
|
| 30 |
inputs = tokenizer(batch, return_tensors="pt", truncation=True, padding=True).to(device)
|
| 31 |
with torch.no_grad():
|
|
|
|
| 38 |
batch_time = time.time() - start_time
|
| 39 |
st.write(f"Processed batch {i//batch_size + 1} of {num_batches} in {batch_time:.2f} seconds")
|
| 40 |
start_time = time.time()
|
| 41 |
+
|
| 42 |
+
# Ensure results length matches the processed texts length
|
| 43 |
+
return results[:min(num_batches * batch_size, len(texts))]
|
| 44 |
|
| 45 |
# Streamlit interface
|
| 46 |
st.title("Enron Emails Emotion Analysis")
|
|
|
|
| 55 |
# Apply emotion classification to the email content
|
| 56 |
with st.spinner('Running inference...'):
|
| 57 |
email_texts = enron_data['body'].tolist()
|
| 58 |
+
results = classify_emotions_in_batches(email_texts, batch_size=64)
|
| 59 |
+
|
| 60 |
+
# Add results to the DataFrame and save
|
| 61 |
+
enron_data = enron_data.iloc[:len(results)].copy()
|
| 62 |
+
enron_data['emotion'] = results
|
| 63 |
|
| 64 |
+
# Save the results to a CSV f
|
|
|
|
|
|