Spaces:
Build error
Build error
| import gradio as gr | |
| import pandas as pd | |
| from nltk.sentiment import SentimentIntensityAnalyzer | |
| from transformers import AutoTokenizer, AutoModelForSequenceClassification | |
| import torch | |
| import requests | |
| import re | |
| import sentence_transformers | |
| from sentence_transformers import SentenceTransformer | |
| from sklearn.feature_extraction.text import TfidfVectorizer | |
| import matplotlib.pyplot as plt | |
| import seaborn as sns | |
| import nltk | |
| from nltk.tokenize import word_tokenize | |
| from nltk import pos_tag, ne_chunk | |
| from nltk.tree import Tree | |
| from googleapiclient.discovery import build | |
| import emoji | |
| from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer | |
| nltk.download('vader_lexicon') | |
| nltk.download('punkt') | |
| nltk.download('averaged_perceptron_tagger') | |
| nltk.download('maxent_ne_chunker') | |
| nltk.download('words') | |
| # Initialize the SentimentIntensityAnalyzer | |
| sia = SentimentIntensityAnalyzer() | |
| # Load the Sarcasm Detection model | |
| sarcasm_tokenizer = AutoTokenizer.from_pretrained("jkhan447/sarcasm-detection-Bert-base-uncased") | |
| sarcasm_model = AutoModelForSequenceClassification.from_pretrained("jkhan447/sarcasm-detection-Bert-base-uncased") | |
| # Move model to GPU if available | |
| device = torch.device("cuda" if torch.cuda.is_available() else "cpu") | |
| sarcasm_model.to(device) | |
| # Load SentenceTransformer model | |
| sentence_transformer_model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2') | |
| api_key = "AIzaSyDOw_v-T58ATLOmQjF00k5Mjha6VPQ-TAk" | |
| def extract_video_id(url): | |
| match = re.search(r"v=([a-zA-Z0-9_-]{11})", url) | |
| return match.group(1) if match else None | |
| def get_video_details(video_id): | |
| url = f"https://www.googleapis.com/youtube/v3/videos?part=snippet&id={video_id}&key={api_key}" | |
| response = requests.get(url).json() | |
| if response["items"]: | |
| snippet = response["items"][0]["snippet"] | |
| return snippet["title"], snippet["categoryId"] | |
| return None, None | |
| def get_comments(video_id): | |
| comments = [] | |
| url = f"https://www.googleapis.com/youtube/v3/commentThreads?part=snippet&videoId={video_id}&key={api_key}&maxResults=100&order=relevance" | |
| response = requests.get(url).json() | |
| for item in response["items"]: | |
| comment = item["snippet"]["topLevelComment"]["snippet"]["textOriginal"] | |
| comments.append(comment) | |
| return comments | |
| def sentiment_scores(comment_text): | |
| sentiment_dict = sia.polarity_scores(comment_text) | |
| return sentiment_dict['compound'] | |
| def detect_sarcasm_batch(comments): | |
| inputs = sarcasm_tokenizer(comments, return_tensors="pt", truncation=True, padding=True).to(device) | |
| with torch.no_grad(): | |
| outputs = sarcasm_model(**inputs) | |
| probs = torch.nn.functional.softmax(outputs.logits, dim=-1) | |
| sarcasm_scores = probs[:, 1].tolist() | |
| return sarcasm_scores | |
| def get_sentiment_label(row): | |
| polarity = row['polarity'] | |
| sarcasm_score = row['sarcasm_score'] | |
| category = row['category'] | |
| if sarcasm_score > 0.5: | |
| return "Sarcastic" | |
| if category == "Comedy": | |
| if polarity > 0.05: | |
| return "Funny/Enjoyable" | |
| elif polarity < -0.05: | |
| return "Unfunny/Criticism" | |
| else: | |
| return "Neutral" | |
| elif category == "Education": | |
| if polarity > 0.05: | |
| return "Helpful/Informative" | |
| elif polarity < -0.05: | |
| return "Confusing/Criticism" | |
| else: | |
| return "Neutral" | |
| elif category == "Music": | |
| if polarity > 0.05: | |
| return "Enjoyed" | |
| elif polarity < -0.05: | |
| return "Criticism/Disliked" | |
| else: | |
| return "Neutral" | |
| elif category == "Entertainment": | |
| if polarity > 0.05: | |
| return "Entertained" | |
| elif polarity < -0.05: | |
| return "Bored/Criticism" | |
| else: | |
| return "Neutral" | |
| else: | |
| if polarity > 0.05: | |
| return "Positive" | |
| elif polarity < -0.05: | |
| return "Negative" | |
| else: | |
| return "Neutral" | |
| def extract_keywords(comments_for_video_df): | |
| comment_embeddings = sentence_transformer_model.encode(comments_for_video_df['comment_text'].tolist()) | |
| tfidf = TfidfVectorizer(stop_words='english', max_features=20) | |
| tfidf.fit(comments_for_video_df['comment_text']) | |
| keywords = tfidf.get_feature_names_out() | |
| keyword_importance = tfidf.idf_ | |
| keyword_importance_df = pd.DataFrame({'keyword': keywords, 'importance': keyword_importance}) | |
| plt.figure(figsize=(10, 6)) | |
| sns.barplot(y='keyword', x='importance', data=keyword_importance_df, palette='pastel') | |
| plt.title('Top Keywords in Comments') | |
| plt.xlabel('TF-IDF Importance') | |
| plt.ylabel('Keyword') | |
| plt.tight_layout() | |
| return plt.gcf() | |
| def analyze_video_sentiment(video_url): | |
| video_id = extract_video_id(video_url) | |
| if video_id: | |
| video_title, category_id = get_video_details(video_id) | |
| categories = { | |
| "1": "Film & Animation", "2": "Autos & Vehicles", "10": "Music", "15": "Pets & Animals", | |
| "17": "Sports", "18": "Short Movies", "19": "Travel & Events", "20": "Gaming", | |
| "21": "Videoblogging", "22": "People & Blogs", "23": "Comedy", "24": "Entertainment", | |
| "25": "News & Politics", "26": "Howto & Style", "27": "Education", "28": "Science & Technology", | |
| "29": "Nonprofits & Activism", "30": "Movies", "31": "Anime/Animation", "32": "Action/Adventure", | |
| "33": "Classics", "34": "Comedy", "35": "Documentary", "36": "Drama", "37": "Family", | |
| "38": "Foreign", "39": "Horror", "40": "Sci-Fi/Fantasy", "41": "Thriller", "42": "Shorts", | |
| "43": "Shows", "44": "Trailers" | |
| } | |
| category = categories.get(category_id, "Unknown Category") | |
| comments = get_comments(video_id) | |
| if comments: | |
| comments_for_video_df = pd.DataFrame(comments, columns=["comment_text"]) | |
| comments_for_video_df['polarity'] = comments_for_video_df['comment_text'].apply(sentiment_scores) | |
| batch_size = 32 | |
| sarcasm_scores = [] | |
| for i in range(0, len(comments_for_video_df), batch_size): | |
| batch_comments = comments_for_video_df['comment_text'][i:i+batch_size].tolist() | |
| batch_scores = detect_sarcasm_batch(batch_comments) | |
| sarcasm_scores.extend(batch_scores) | |
| comments_for_video_df['sarcasm_score'] = sarcasm_scores | |
| comments_for_video_df['category'] = category # Assign the correct category to each comment | |
| comments_for_video_df['Prominent sentiment'] = comments_for_video_df.apply(get_sentiment_label, axis=1) | |
| keyword_plot = extract_keywords(comments_for_video_df) | |
| # Analyze all comments but display only the top 10 comments based on relevance | |
| top_10_comments = comments_for_video_df[['comment_text', 'Prominent sentiment']].head(10) | |
| return comments_for_video_df, top_10_comments, video_title, category, keyword_plot | |
| else: | |
| return pd.DataFrame({"Error": ["No comments found."]}), None, None, None, None | |
| else: | |
| return pd.DataFrame({"Error": ["Invalid YouTube URL."]}), None, None, None, None | |
| def plot_sentiment_distribution(df): | |
| if 'Prominent sentiment' in df.columns: | |
| sentiment_counts = df['Prominent sentiment'].value_counts().reset_index() | |
| sentiment_counts.columns = ['Sentiment', 'Comment Count'] | |
| plt.figure(figsize=(10, 6)) | |
| sns.barplot(x='Sentiment', y='Comment Count', hue='Sentiment', data=sentiment_counts, palette="pastel", legend=False) | |
| plt.title('Number of Comments by Sentiment', fontsize=14) | |
| plt.xlabel('Sentiment', fontsize=12) | |
| plt.ylabel('Number of Comments', fontsize=12) | |
| plt.xticks(rotation=45) | |
| plt.tight_layout() | |
| return plt.gcf() | |
| else: | |
| return None | |
| def plot_sarcasm_vs_polarity(df): | |
| if 'polarity' in df.columns and 'sarcasm_score' in df.columns: | |
| plt.figure(figsize=(10, 6)) | |
| sns.scatterplot(x='polarity', y='sarcasm_score', hue='Prominent sentiment', data=df, palette="pastel") | |
| plt.title('Polarity vs. Sarcasm Score', fontsize=14) | |
| plt.xlabel('Polarity Score', fontsize=12) | |
| plt.ylabel('Sarcasm Score', fontsize=12) | |
| plt.tight_layout() | |
| return plt.gcf() | |
| else: | |
| return None | |
| def gradio_interface(video_url): | |
| full_df, df, video_title, category, keyword_plot = analyze_video_sentiment(video_url) | |
| if category: | |
| sentiment_plot = plot_sentiment_distribution(full_df) | |
| sarcasm_plot = plot_sarcasm_vs_polarity(full_df) | |
| insights = f"**Title:** {video_title}\n\n**Category:** {category}" | |
| return df, sentiment_plot, sarcasm_plot, keyword_plot, insights, insights | |
| else: | |
| return df, None, None, None, "No insights available.", None | |
| with gr.Blocks(theme=gr.themes.Monochrome()) as demo: # Dark theme applied | |
| gr.Markdown( | |
| """ | |
| # 🎥 YouTube Sentiment Analysis | |
| Enter a YouTube video URL below to analyze the comments for sentiment and sarcasm | |
| """ | |
| ) | |
| with gr.Row(): | |
| video_input = gr.Textbox(label="YouTube Video URL", placeholder="Enter a YouTube video URL here...") | |
| analyze_button = gr.Button("Analyze", variant="primary", elem_id="analyze-btn") | |
| video_details = gr.Markdown(label="Video Details", elem_id="video-details-box") | |
| with gr.Accordion("Top 10 Comments", open=False): | |
| comment_text = gr.Dataframe(label="Top 10 Comments", interactive=False) | |
| sentiment_graph = gr.Plot(label="Sentiment Distribution") | |
| sarcasm_graph = gr.Plot(label="Sarcasm vs Polarity") | |
| keyword_graph = gr.Plot(label="Top Keywords") | |
| insights_box = gr.Markdown(label="Insights", elem_id="insights-box") | |
| analyze_button.click(gradio_interface, | |
| inputs=video_input, | |
| outputs=[comment_text, sentiment_graph, sarcasm_graph, keyword_graph, insights_box, video_details]) | |
| # Custom CSS for improved styling | |
| gr.HTML( | |
| """ | |
| <style> | |
| #analyze-btn { | |
| background-color: #4CAF50; /* Green */ | |
| color: white; | |
| border: none; | |
| padding: 10px 24px; | |
| text-align: center; | |
| text-decoration: none; | |
| display: inline-block; | |
| font-size: 16px; | |
| border-radius: 8px; | |
| cursor: pointer; | |
| } | |
| #insights-box { | |
| color: #FFD700; | |
| font-weight: bold; | |
| } | |
| #video-details-box { | |
| color: #1E90FF; | |
| font-weight: bold; | |
| } | |
| body { | |
| background-color: #1f1f1f; | |
| color: #e0e0e0; | |
| } | |
| </style> | |
| """ | |
| ) | |
| demo.launch(debug=True) | |