FarhinSadia commited on
Commit
47dd793
·
1 Parent(s): 3b875a3

fixed the sentiment analysis

Browse files
Files changed (1) hide show
  1. src/data_processor.py +10 -4
src/data_processor.py CHANGED
@@ -5,7 +5,7 @@ import re
5
  from textblob import TextBlob
6
  import numpy as np
7
  import json
8
- import streamlit as st
9
 
10
  try:
11
  import openai
@@ -25,7 +25,6 @@ class DataProcessor:
25
  def __init__(self, openai_api_key=None):
26
  self.processed_data = None
27
 
28
-
29
  if NLTK_AVAILABLE:
30
  try:
31
  self.sia = SentimentIntensityAnalyzer()
@@ -34,7 +33,6 @@ class DataProcessor:
34
  else:
35
  self.sia = None
36
 
37
-
38
  self.use_gpt = False
39
  if openai_api_key and OPENAI_AVAILABLE:
40
  openai.api_key = openai_api_key
@@ -186,7 +184,7 @@ class DataProcessor:
186
 
187
  text_lower = str(text).lower()
188
 
189
- if '?' in text_lower or any(phrase in text_lower for phrase in ['how do', 'what is', 'when', 'where', 'can i', 'could you']):
190
  return 'Inquiry', 'Contains questions or information seeking'
191
  elif any(word in text_lower for word in ['complaint', 'problem', 'issue', 'error', 'failed', 'not working', 'terrible', 'worst']):
192
  return 'Complaint', 'Contains complaint or problem description'
@@ -223,6 +221,14 @@ class DataProcessor:
223
  df[['emotion', 'emotion_keywords']] = df['text'].apply(lambda x: pd.Series(self.detect_emotion(x)))
224
  df[['category', 'category_reason']] = df['text'].apply(lambda x: pd.Series(self.categorize_post(x)))
225
 
 
 
 
 
 
 
 
 
226
  df['viral_score'] = 0
227
  if 'likes' in df.columns:
228
  df['viral_score'] += df['likes'].fillna(0)
 
5
  from textblob import TextBlob
6
  import numpy as np
7
  import json
8
+ import streamlit as st
9
 
10
  try:
11
  import openai
 
25
  def __init__(self, openai_api_key=None):
26
  self.processed_data = None
27
 
 
28
  if NLTK_AVAILABLE:
29
  try:
30
  self.sia = SentimentIntensityAnalyzer()
 
33
  else:
34
  self.sia = None
35
 
 
36
  self.use_gpt = False
37
  if openai_api_key and OPENAI_AVAILABLE:
38
  openai.api_key = openai_api_key
 
184
 
185
  text_lower = str(text).lower()
186
 
187
+ if '?' in text_lower or any(phrase in text_lower for phrase in ['how do', 'what is', 'when', 'where', 'can i', 'could you', 'explain']):
188
  return 'Inquiry', 'Contains questions or information seeking'
189
  elif any(word in text_lower for word in ['complaint', 'problem', 'issue', 'error', 'failed', 'not working', 'terrible', 'worst']):
190
  return 'Complaint', 'Contains complaint or problem description'
 
221
  df[['emotion', 'emotion_keywords']] = df['text'].apply(lambda x: pd.Series(self.detect_emotion(x)))
222
  df[['category', 'category_reason']] = df['text'].apply(lambda x: pd.Series(self.categorize_post(x)))
223
 
224
+ # --- START OF FIX ---
225
+ # This is the new, crucial part.
226
+ # It corrects the sentiment for any post that was categorized as an 'Inquiry'.
227
+ # This fixes the issue where questions were incorrectly marked as 'Positive'.
228
+ df.loc[df['category'] == 'Inquiry', 'sentiment'] = 'Neutral'
229
+ df.loc[df['category'] == 'Inquiry', 'polarity'] = 0.0
230
+ # --- END OF FIX ---
231
+
232
  df['viral_score'] = 0
233
  if 'likes' in df.columns:
234
  df['viral_score'] += df['likes'].fillna(0)