Spaces:
Sleeping
Sleeping
Commit
·
47dd793
1
Parent(s):
3b875a3
fixed the sentiment analysis
Browse files- src/data_processor.py +10 -4
src/data_processor.py
CHANGED
|
@@ -5,7 +5,7 @@ import re
|
|
| 5 |
from textblob import TextBlob
|
| 6 |
import numpy as np
|
| 7 |
import json
|
| 8 |
-
import streamlit as st
|
| 9 |
|
| 10 |
try:
|
| 11 |
import openai
|
|
@@ -25,7 +25,6 @@ class DataProcessor:
|
|
| 25 |
def __init__(self, openai_api_key=None):
|
| 26 |
self.processed_data = None
|
| 27 |
|
| 28 |
-
|
| 29 |
if NLTK_AVAILABLE:
|
| 30 |
try:
|
| 31 |
self.sia = SentimentIntensityAnalyzer()
|
|
@@ -34,7 +33,6 @@ class DataProcessor:
|
|
| 34 |
else:
|
| 35 |
self.sia = None
|
| 36 |
|
| 37 |
-
|
| 38 |
self.use_gpt = False
|
| 39 |
if openai_api_key and OPENAI_AVAILABLE:
|
| 40 |
openai.api_key = openai_api_key
|
|
@@ -186,7 +184,7 @@ class DataProcessor:
|
|
| 186 |
|
| 187 |
text_lower = str(text).lower()
|
| 188 |
|
| 189 |
-
if '?' in text_lower or any(phrase in text_lower for phrase in ['how do', 'what is', 'when', 'where', 'can i', 'could you']):
|
| 190 |
return 'Inquiry', 'Contains questions or information seeking'
|
| 191 |
elif any(word in text_lower for word in ['complaint', 'problem', 'issue', 'error', 'failed', 'not working', 'terrible', 'worst']):
|
| 192 |
return 'Complaint', 'Contains complaint or problem description'
|
|
@@ -223,6 +221,14 @@ class DataProcessor:
|
|
| 223 |
df[['emotion', 'emotion_keywords']] = df['text'].apply(lambda x: pd.Series(self.detect_emotion(x)))
|
| 224 |
df[['category', 'category_reason']] = df['text'].apply(lambda x: pd.Series(self.categorize_post(x)))
|
| 225 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 226 |
df['viral_score'] = 0
|
| 227 |
if 'likes' in df.columns:
|
| 228 |
df['viral_score'] += df['likes'].fillna(0)
|
|
|
|
| 5 |
from textblob import TextBlob
|
| 6 |
import numpy as np
|
| 7 |
import json
|
| 8 |
+
import streamlit as st
|
| 9 |
|
| 10 |
try:
|
| 11 |
import openai
|
|
|
|
| 25 |
def __init__(self, openai_api_key=None):
|
| 26 |
self.processed_data = None
|
| 27 |
|
|
|
|
| 28 |
if NLTK_AVAILABLE:
|
| 29 |
try:
|
| 30 |
self.sia = SentimentIntensityAnalyzer()
|
|
|
|
| 33 |
else:
|
| 34 |
self.sia = None
|
| 35 |
|
|
|
|
| 36 |
self.use_gpt = False
|
| 37 |
if openai_api_key and OPENAI_AVAILABLE:
|
| 38 |
openai.api_key = openai_api_key
|
|
|
|
| 184 |
|
| 185 |
text_lower = str(text).lower()
|
| 186 |
|
| 187 |
+
if '?' in text_lower or any(phrase in text_lower for phrase in ['how do', 'what is', 'when', 'where', 'can i', 'could you', 'explain']):
|
| 188 |
return 'Inquiry', 'Contains questions or information seeking'
|
| 189 |
elif any(word in text_lower for word in ['complaint', 'problem', 'issue', 'error', 'failed', 'not working', 'terrible', 'worst']):
|
| 190 |
return 'Complaint', 'Contains complaint or problem description'
|
|
|
|
| 221 |
df[['emotion', 'emotion_keywords']] = df['text'].apply(lambda x: pd.Series(self.detect_emotion(x)))
|
| 222 |
df[['category', 'category_reason']] = df['text'].apply(lambda x: pd.Series(self.categorize_post(x)))
|
| 223 |
|
| 224 |
+
# --- START OF FIX ---
|
| 225 |
+
# This is the new, crucial part.
|
| 226 |
+
# It corrects the sentiment for any post that was categorized as an 'Inquiry'.
|
| 227 |
+
# This fixes the issue where questions were incorrectly marked as 'Positive'.
|
| 228 |
+
df.loc[df['category'] == 'Inquiry', 'sentiment'] = 'Neutral'
|
| 229 |
+
df.loc[df['category'] == 'Inquiry', 'polarity'] = 0.0
|
| 230 |
+
# --- END OF FIX ---
|
| 231 |
+
|
| 232 |
df['viral_score'] = 0
|
| 233 |
if 'likes' in df.columns:
|
| 234 |
df['viral_score'] += df['likes'].fillna(0)
|