Kurkur99 commited on
Commit
a0e0a21
·
1 Parent(s): 0a2d8ac

Update eda.py

Browse files
Files changed (1) hide show
  1. eda.py +14 -1
eda.py CHANGED
@@ -21,6 +21,14 @@ def process_review(review):
21
  review = re.sub(r'[^a-z\s]', '', review) # Remove non-alphabetical characters
22
  return review
23
 
 
 
 
 
 
 
 
 
24
  def display_eda(data):
25
  # Derive the 'sentiment' column from 'rating' if it doesn't exist
26
  if 'sentiment' not in data.columns:
@@ -42,11 +50,16 @@ def display_eda(data):
42
 
43
  # Word cloud for each sentiment
44
  st.subheader("Word Clouds for Sentiments")
 
 
 
 
 
45
  sentiments = data['sentiment'].unique()
46
  for sentiment in sentiments:
47
  st.write(f"Word Cloud for {sentiment}")
48
  subset = data[data['sentiment'] == sentiment]
49
- text = " ".join(process_review(review) for review in subset['review'])
50
  wordcloud = WordCloud(max_words=100, background_color="white").generate(text)
51
  plt.figure()
52
  plt.imshow(wordcloud, interpolation="bilinear")
 
21
  review = re.sub(r'[^a-z\s]', '', review) # Remove non-alphabetical characters
22
  return review
23
 
24
+ def find_review_column(data):
25
+ """Attempt to identify the review column from the dataset."""
26
+ potential_columns = ['review', 'text', 'comment', 'message', 'description']
27
+ for col in potential_columns:
28
+ if col in data.columns:
29
+ return col
30
+ return None
31
+
32
  def display_eda(data):
33
  # Derive the 'sentiment' column from 'rating' if it doesn't exist
34
  if 'sentiment' not in data.columns:
 
50
 
51
  # Word cloud for each sentiment
52
  st.subheader("Word Clouds for Sentiments")
53
+ review_column = find_review_column(data)
54
+ if not review_column:
55
+ st.error("Couldn't find a column with reviews. Please check the dataset.")
56
+ return
57
+
58
  sentiments = data['sentiment'].unique()
59
  for sentiment in sentiments:
60
  st.write(f"Word Cloud for {sentiment}")
61
  subset = data[data['sentiment'] == sentiment]
62
+ text = " ".join(process_review(review) for review in subset[review_column])
63
  wordcloud = WordCloud(max_words=100, background_color="white").generate(text)
64
  plt.figure()
65
  plt.imshow(wordcloud, interpolation="bilinear")