Spaces:
Runtime error
Runtime error
Update eda.py
Browse files
eda.py
CHANGED
|
@@ -21,14 +21,6 @@ def process_review(review):
|
|
| 21 |
review = re.sub(r'[^a-z\s]', '', review) # Remove non-alphabetical characters
|
| 22 |
return review
|
| 23 |
|
| 24 |
-
def find_review_column(data):
|
| 25 |
-
"""Attempt to identify the review column from the dataset."""
|
| 26 |
-
potential_columns = ['review', 'text', 'comment', 'message', 'description']
|
| 27 |
-
for col in potential_columns:
|
| 28 |
-
if col in data.columns:
|
| 29 |
-
return col
|
| 30 |
-
return None
|
| 31 |
-
|
| 32 |
def display_eda(data):
|
| 33 |
# Derive the 'sentiment' column from 'rating' if it doesn't exist
|
| 34 |
if 'sentiment' not in data.columns:
|
|
@@ -50,16 +42,11 @@ def display_eda(data):
|
|
| 50 |
|
| 51 |
# Word cloud for each sentiment
|
| 52 |
st.subheader("Word Clouds for Sentiments")
|
| 53 |
-
review_column = find_review_column(data)
|
| 54 |
-
if not review_column:
|
| 55 |
-
st.error("Couldn't find a column with reviews. Please check the dataset.")
|
| 56 |
-
return
|
| 57 |
-
|
| 58 |
sentiments = data['sentiment'].unique()
|
| 59 |
for sentiment in sentiments:
|
| 60 |
st.write(f"Word Cloud for {sentiment}")
|
| 61 |
subset = data[data['sentiment'] == sentiment]
|
| 62 |
-
text = " ".join(process_review(review) for review in subset[
|
| 63 |
wordcloud = WordCloud(max_words=100, background_color="white").generate(text)
|
| 64 |
plt.figure()
|
| 65 |
plt.imshow(wordcloud, interpolation="bilinear")
|
|
|
|
| 21 |
review = re.sub(r'[^a-z\s]', '', review) # Remove non-alphabetical characters
|
| 22 |
return review
|
| 23 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 24 |
def display_eda(data):
|
| 25 |
# Derive the 'sentiment' column from 'rating' if it doesn't exist
|
| 26 |
if 'sentiment' not in data.columns:
|
|
|
|
| 42 |
|
| 43 |
# Word cloud for each sentiment
|
| 44 |
st.subheader("Word Clouds for Sentiments")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 45 |
sentiments = data['sentiment'].unique()
|
| 46 |
for sentiment in sentiments:
|
| 47 |
st.write(f"Word Cloud for {sentiment}")
|
| 48 |
subset = data[data['sentiment'] == sentiment]
|
| 49 |
+
text = " ".join(process_review(review) for review in subset['review_description'])
|
| 50 |
wordcloud = WordCloud(max_words=100, background_color="white").generate(text)
|
| 51 |
plt.figure()
|
| 52 |
plt.imshow(wordcloud, interpolation="bilinear")
|