Spaces:

umang018
/

pg3

Sleeping

umang018 commited on Jul 23, 2024

Commit

a3dbcca

verified ·

1 Parent(s): ba95aa8

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -22,11 +22,10 @@ emotion_labels = ["admiration", "amusement", "anger", "annoyance", "approval",
                   "neutral"]
 # Function to classify emotions in batches
-def classify_emotions_in_batches(texts, batch_size=64):
     results = []
     start_time = time.time()
-    num_batches = min(20, (len(texts) + batch_size - 1) // batch_size)  # Calculate the number of batches to run (up to 20)
-    for i in range(0, num_batches * batch_size, batch_size):
         batch = texts[i:i+batch_size]
         inputs = tokenizer(batch, return_tensors="pt", truncation=True, padding=True).to(device)
         with torch.no_grad():
@@ -39,7 +38,9 @@ def classify_emotions_in_batches(texts, batch_size=64):
         batch_time = time.time() - start_time
         st.write(f"Processed batch {i//batch_size + 1} of {num_batches} in {batch_time:.2f} seconds")
         start_time = time.time()
-    return results
 # Streamlit interface
 st.title("Enron Emails Emotion Analysis")
@@ -54,8 +55,10 @@ if st.button("Run Inference"):
     # Apply emotion classification to the email content
     with st.spinner('Running inference...'):
         email_texts = enron_data['body'].tolist()
-        enron_data['emotion'] = classify_emotions_in_batches(email_texts, batch_size=64)
-    # Save the results to a CSV file
-    enron_data.to_csv("enron_emails_with_emotions.csv", index=False)
-    st.success("Inference completed and results saved!")

                   "neutral"]
 # Function to classify emotions in batches
+def classify_emotions_in_batches(texts, batch_size=64, num_batches=20):
     results = []
     start_time = time.time()
+    for i in range(0, min(num_batches * batch_size, len(texts)), batch_size):
         batch = texts[i:i+batch_size]
         inputs = tokenizer(batch, return_tensors="pt", truncation=True, padding=True).to(device)
         with torch.no_grad():
         batch_time = time.time() - start_time
         st.write(f"Processed batch {i//batch_size + 1} of {num_batches} in {batch_time:.2f} seconds")
         start_time = time.time()
+    # Ensure results length matches the processed texts length
+    return results[:min(num_batches * batch_size, len(texts))]
 # Streamlit interface
 st.title("Enron Emails Emotion Analysis")
     # Apply emotion classification to the email content
     with st.spinner('Running inference...'):
         email_texts = enron_data['body'].tolist()
+        results = classify_emotions_in_batches(email_texts, batch_size=64)
+        # Add results to the DataFrame and save
+        enron_data = enron_data.iloc[:len(results)].copy()
+        enron_data['emotion'] = results
+        # Save the results to a CSV f