Spaces:
Runtime error
Runtime error
Update eda.py
Browse files
eda.py
CHANGED
|
@@ -21,6 +21,14 @@ def process_review(review):
|
|
| 21 |
review = re.sub(r'[^a-z\s]', '', review) # Remove non-alphabetical characters
|
| 22 |
return review
|
| 23 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 24 |
def display_eda(data):
|
| 25 |
# Derive the 'sentiment' column from 'rating' if it doesn't exist
|
| 26 |
if 'sentiment' not in data.columns:
|
|
@@ -42,11 +50,16 @@ def display_eda(data):
|
|
| 42 |
|
| 43 |
# Word cloud for each sentiment
|
| 44 |
st.subheader("Word Clouds for Sentiments")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 45 |
sentiments = data['sentiment'].unique()
|
| 46 |
for sentiment in sentiments:
|
| 47 |
st.write(f"Word Cloud for {sentiment}")
|
| 48 |
subset = data[data['sentiment'] == sentiment]
|
| 49 |
-
text = " ".join(process_review(review) for review in subset[
|
| 50 |
wordcloud = WordCloud(max_words=100, background_color="white").generate(text)
|
| 51 |
plt.figure()
|
| 52 |
plt.imshow(wordcloud, interpolation="bilinear")
|
|
|
|
| 21 |
review = re.sub(r'[^a-z\s]', '', review) # Remove non-alphabetical characters
|
| 22 |
return review
|
| 23 |
|
| 24 |
+
def find_review_column(data):
|
| 25 |
+
"""Attempt to identify the review column from the dataset."""
|
| 26 |
+
potential_columns = ['review', 'text', 'comment', 'message', 'description']
|
| 27 |
+
for col in potential_columns:
|
| 28 |
+
if col in data.columns:
|
| 29 |
+
return col
|
| 30 |
+
return None
|
| 31 |
+
|
| 32 |
def display_eda(data):
|
| 33 |
# Derive the 'sentiment' column from 'rating' if it doesn't exist
|
| 34 |
if 'sentiment' not in data.columns:
|
|
|
|
| 50 |
|
| 51 |
# Word cloud for each sentiment
|
| 52 |
st.subheader("Word Clouds for Sentiments")
|
| 53 |
+
review_column = find_review_column(data)
|
| 54 |
+
if not review_column:
|
| 55 |
+
st.error("Couldn't find a column with reviews. Please check the dataset.")
|
| 56 |
+
return
|
| 57 |
+
|
| 58 |
sentiments = data['sentiment'].unique()
|
| 59 |
for sentiment in sentiments:
|
| 60 |
st.write(f"Word Cloud for {sentiment}")
|
| 61 |
subset = data[data['sentiment'] == sentiment]
|
| 62 |
+
text = " ".join(process_review(review) for review in subset[review_column])
|
| 63 |
wordcloud = WordCloud(max_words=100, background_color="white").generate(text)
|
| 64 |
plt.figure()
|
| 65 |
plt.imshow(wordcloud, interpolation="bilinear")
|