Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -16,42 +16,60 @@ from googleapiclient.discovery import build
|
|
| 16 |
import warnings
|
| 17 |
warnings.filterwarnings('ignore')
|
| 18 |
|
|
|
|
| 19 |
np.random.seed(42)
|
| 20 |
tf.random.set_seed(42)
|
| 21 |
|
|
|
|
| 22 |
st.set_page_config(page_title="Sentiment Pulse", layout="wide")
|
| 23 |
st.markdown("<h1 style='text-align: center; color: #7B68EE;'>Sentiment Pulse: Multi-Platform Analysis</h1>", unsafe_allow_html=True)
|
| 24 |
|
|
|
|
| 25 |
REDDIT_CLIENT_ID = "S7pTXhj5JDFGDb3-_zrJEA"
|
| 26 |
REDDIT_CLIENT_SECRET = "QP3NYN4lrAKVLrBamzLGrpFywiVg8w"
|
| 27 |
REDDIT_USER_AGENT = "SoundaryaR_Bot/1.0"
|
| 28 |
YOUTUBE_API_KEY = "AIzaSyAChqXPaiNE9hKhApkgjgonzdgiCCOo"
|
| 29 |
|
|
|
|
| 30 |
reddit = praw.Reddit(client_id=REDDIT_CLIENT_ID, client_secret=REDDIT_CLIENT_SECRET, user_agent=REDDIT_USER_AGENT)
|
| 31 |
youtube = build('youtube', 'v3', developerKey=YOUTUBE_API_KEY)
|
|
|
|
|
|
|
| 32 |
bert_classifier = pipeline("sentiment-analysis", model="distilbert-base-uncased-finetuned-sst-2-english")
|
| 33 |
vader_analyzer = SentimentIntensityAnalyzer()
|
| 34 |
|
| 35 |
@st.cache_data
|
| 36 |
def load_twitter_data():
|
| 37 |
-
|
| 38 |
-
|
| 39 |
-
|
| 40 |
-
|
| 41 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 42 |
|
| 43 |
def fetch_reddit_data(keyword):
|
| 44 |
-
|
| 45 |
-
|
| 46 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 47 |
|
| 48 |
def fetch_youtube_data(keyword):
|
| 49 |
-
|
| 50 |
-
|
| 51 |
-
|
| 52 |
-
|
| 53 |
-
|
| 54 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 55 |
|
| 56 |
def get_bert_sentiment(text):
|
| 57 |
try:
|
|
@@ -70,9 +88,11 @@ def combined_sentiment(text):
|
|
| 70 |
avg_score = (bert_score + abs(vader_score)) / 2
|
| 71 |
return 1 if avg_score > 0.5 else 0, avg_score
|
| 72 |
|
|
|
|
| 73 |
st.sidebar.title("Keyword Search")
|
| 74 |
keyword = st.sidebar.text_input("Enter a keyword (e.g., 'happy')", value="happy")
|
| 75 |
|
|
|
|
| 76 |
twitter_df = load_twitter_data()
|
| 77 |
twitter_filtered = twitter_df[twitter_df['text'].str.contains(keyword, case=False, na=False)]
|
| 78 |
reddit_df = fetch_reddit_data(keyword)
|
|
@@ -103,6 +123,7 @@ else:
|
|
| 103 |
st.warning(f"Not enough {platform} data for prediction.")
|
| 104 |
fig, ax = plt.subplots()
|
| 105 |
ax.plot(daily_sentiment['date'], daily_sentiment['combined_score'], label='Historical')
|
|
|
|
| 106 |
st.pyplot(fig)
|
| 107 |
continue
|
| 108 |
|
|
@@ -159,4 +180,4 @@ else:
|
|
| 159 |
explainer = shap.Explainer(lr_model, X_lr)
|
| 160 |
shap_values = explainer(X_lr)
|
| 161 |
shap.plots.beeswarm(shap_values, show=False)
|
| 162 |
-
st.pyplot(plt.gcf())
|
|
|
|
| 16 |
import warnings
|
| 17 |
warnings.filterwarnings('ignore')
|
| 18 |
|
| 19 |
+
# Set random seeds for reproducibility
|
| 20 |
np.random.seed(42)
|
| 21 |
tf.random.set_seed(42)
|
| 22 |
|
| 23 |
+
# Streamlit page configuration
|
| 24 |
st.set_page_config(page_title="Sentiment Pulse", layout="wide")
|
| 25 |
st.markdown("<h1 style='text-align: center; color: #7B68EE;'>Sentiment Pulse: Multi-Platform Analysis</h1>", unsafe_allow_html=True)
|
| 26 |
|
| 27 |
+
# API credentials (replace with your own if needed)
|
| 28 |
REDDIT_CLIENT_ID = "S7pTXhj5JDFGDb3-_zrJEA"
|
| 29 |
REDDIT_CLIENT_SECRET = "QP3NYN4lrAKVLrBamzLGrpFywiVg8w"
|
| 30 |
REDDIT_USER_AGENT = "SoundaryaR_Bot/1.0"
|
| 31 |
YOUTUBE_API_KEY = "AIzaSyAChqXPaiNE9hKhApkgjgonzdgiCCOo"
|
| 32 |
|
| 33 |
+
# Initialize APIs
|
| 34 |
reddit = praw.Reddit(client_id=REDDIT_CLIENT_ID, client_secret=REDDIT_CLIENT_SECRET, user_agent=REDDIT_USER_AGENT)
|
| 35 |
youtube = build('youtube', 'v3', developerKey=YOUTUBE_API_KEY)
|
| 36 |
+
|
| 37 |
+
# Load sentiment analysis models
|
| 38 |
bert_classifier = pipeline("sentiment-analysis", model="distilbert-base-uncased-finetuned-sst-2-english")
|
| 39 |
vader_analyzer = SentimentIntensityAnalyzer()
|
| 40 |
|
| 41 |
@st.cache_data
|
| 42 |
def load_twitter_data():
|
| 43 |
+
try:
|
| 44 |
+
df = pd.read_csv("twitter_dataset.csv", encoding='latin-1',
|
| 45 |
+
names=['sentiment', 'id', 'date', 'query', 'user', 'text'])
|
| 46 |
+
df['date'] = pd.to_datetime(df['date'], errors='coerce')
|
| 47 |
+
df['sentiment'] = df['sentiment'].map({0: 'negative', 4: 'positive'})
|
| 48 |
+
return df.sample(10000, random_state=42)
|
| 49 |
+
except FileNotFoundError:
|
| 50 |
+
st.error("twitter_dataset.csv not found. Please ensure the file is in the working directory.")
|
| 51 |
+
return pd.DataFrame()
|
| 52 |
|
| 53 |
def fetch_reddit_data(keyword):
|
| 54 |
+
try:
|
| 55 |
+
subreddit = reddit.subreddit("all")
|
| 56 |
+
posts = subreddit.search(keyword, limit=100)
|
| 57 |
+
return pd.DataFrame([{'date': datetime.fromtimestamp(post.created_utc), 'text': post.title + " " + post.selftext} for post in posts])
|
| 58 |
+
except Exception as e:
|
| 59 |
+
st.error(f"Error fetching Reddit data: {e}")
|
| 60 |
+
return pd.DataFrame()
|
| 61 |
|
| 62 |
def fetch_youtube_data(keyword):
|
| 63 |
+
try:
|
| 64 |
+
request = youtube.search().list(q=keyword, part="snippet", maxResults=50, type="video")
|
| 65 |
+
response = request.execute()
|
| 66 |
+
return pd.DataFrame([{
|
| 67 |
+
'date': datetime.strptime(item['snippet']['publishedAt'], "%Y-%m-%dT%H:%M:%SZ"),
|
| 68 |
+
'text': item['snippet']['title'] + " " + item['snippet']['description']
|
| 69 |
+
} for item in response['items']])
|
| 70 |
+
except Exception as e:
|
| 71 |
+
st.error(f"Error fetching YouTube data: {e}")
|
| 72 |
+
return pd.DataFrame()
|
| 73 |
|
| 74 |
def get_bert_sentiment(text):
|
| 75 |
try:
|
|
|
|
| 88 |
avg_score = (bert_score + abs(vader_score)) / 2
|
| 89 |
return 1 if avg_score > 0.5 else 0, avg_score
|
| 90 |
|
| 91 |
+
# Sidebar for keyword input
|
| 92 |
st.sidebar.title("Keyword Search")
|
| 93 |
keyword = st.sidebar.text_input("Enter a keyword (e.g., 'happy')", value="happy")
|
| 94 |
|
| 95 |
+
# Load and filter data
|
| 96 |
twitter_df = load_twitter_data()
|
| 97 |
twitter_filtered = twitter_df[twitter_df['text'].str.contains(keyword, case=False, na=False)]
|
| 98 |
reddit_df = fetch_reddit_data(keyword)
|
|
|
|
| 123 |
st.warning(f"Not enough {platform} data for prediction.")
|
| 124 |
fig, ax = plt.subplots()
|
| 125 |
ax.plot(daily_sentiment['date'], daily_sentiment['combined_score'], label='Historical')
|
| 126 |
+
ax.legend()
|
| 127 |
st.pyplot(fig)
|
| 128 |
continue
|
| 129 |
|
|
|
|
| 180 |
explainer = shap.Explainer(lr_model, X_lr)
|
| 181 |
shap_values = explainer(X_lr)
|
| 182 |
shap.plots.beeswarm(shap_values, show=False)
|
| 183 |
+
st.pyplot(plt.gcf())
|