import os import streamlit as st import praw import googleapiclient.discovery import joblib import numpy as np from sklearn.feature_extraction.text import TfidfVectorizer from sklearn.linear_model import LinearRegression from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer from transformers import pipeline # Load environment variables REDDIT_CLIENT_ID = os.getenv("REDDIT_CLIENT_ID") REDDIT_CLIENT_SECRET = os.getenv("REDDIT_CLIENT_SECRET") REDDIT_USER_AGENT = os.getenv("REDDIT_USER_AGENT") YOUTUBE_API_KEY = os.getenv("YOUTUBE_API_KEY") # Authenticate Reddit def authenticate_reddit(): return praw.Reddit( client_id=REDDIT_CLIENT_ID, client_secret=REDDIT_CLIENT_SECRET, user_agent=REDDIT_USER_AGENT ) # Authenticate YouTube def authenticate_youtube(): return googleapiclient.discovery.build("youtube", "v3", developerKey=YOUTUBE_API_KEY) # VADER Sentiment Analysis vader = SentimentIntensityAnalyzer() def get_vader_sentiment(text): scores = vader.polarity_scores(text) return scores['compound'] # Ranges from -1 (negative) to +1 (positive) # BERT Sentiment Analysis bert_sentiment = pipeline("sentiment-analysis") def get_bert_sentiment(text): result = bert_sentiment(text)[0] return result['label'], result['score'] # Regression Sentiment Analysis vectorizer = TfidfVectorizer() regressor = LinearRegression() def train_regression_model(): sample_data = [ ("I love this!", 1.0), ("This is amazing", 0.9), ("It's okay", 0.5), ("Not great", 0.3), ("I hate this", 0.1) ] texts, scores = zip(*sample_data) X = vectorizer.fit_transform(texts) regressor.fit(X, scores) joblib.dump((vectorizer, regressor), "sentiment_model.pkl") train_regression_model() # Predict with Regression Model def get_regression_sentiment(text): vectorizer, regressor = joblib.load("sentiment_model.pkl") X = vectorizer.transform([text]) return regressor.predict(X)[0] # Streamlit UI st.title("Sentiment Analysis App") user_input = st.text_area("Enter text for sentiment analysis") if st.button("Analyze"): vader_score = get_vader_sentiment(user_input) bert_label, bert_score = get_bert_sentiment(user_input) regression_score = get_regression_sentiment(user_input) st.write(f"**VADER Sentiment Score:** {vader_score}") st.write(f"**BERT Sentiment:** {bert_label} ({bert_score:.2f})") st.write(f"**Regression Sentiment Score:** {regression_score:.2f}")