File size: 2,296 Bytes
73dbe48
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
import gradio as gr
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch
import pandas as pd
import numpy as np
import googleapiclient.discovery

# Initialize the tokenizer and model
tokenizer = AutoTokenizer.from_pretrained("nlptown/bert-base-multilingual-uncased-sentiment")
model = AutoModelForSequenceClassification.from_pretrained("nlptown/bert-base-multilingual-uncased-sentiment")

# Function to get comments from a YouTube video
def get_comments(youtube, **kwargs):
    comments = []
    results = youtube.commentThreads().list(**kwargs).execute()

    while results:
        for item in results['items']:
            comment = item['snippet']['topLevelComment']['snippet']['textDisplay']
            comments.append(comment)

        # Check if there are more comments
        if 'nextPageToken' in results:
            kwargs['pageToken'] = results['nextPageToken']
            results = youtube.commentThreads().list(**kwargs).execute()
        else:
            break

    return comments

# Function to get sentiment score
def sentiment_score(comment):
    sentiment = model(tokenizer.encode(comment, return_tensors='pt'))
    return torch.argmax(sentiment.logits).item() + 1

# Gradio function to analyze video comments
def analyze_video(video_id, api_key):
    youtube = googleapiclient.discovery.build("youtube", "v3", developerKey=api_key)
    comments = get_comments(youtube, part="snippet", videoId=video_id, textFormat="plainText")
    
    if not comments:
        return "No comments found."

    df = pd.DataFrame(np.array(comments[:100]), columns=['comments'])
    df['sentiment'] = df['comments'].apply(lambda x: sentiment_score(x[:512]))

    sentiment_counts = df['sentiment'].value_counts().sort_index().to_dict()

    # Returning as a dictionary, where the key is the sentiment score (1-5) and value is the count
    return sentiment_counts

# Creating the Gradio Interface
gr_interface = gr.Interface(
    fn=analyze_video,
    inputs=[gr.Textbox(label="YouTube Video ID"), gr.Textbox(label="YouTube API Key")],
    outputs="json",
    title="YouTube Comment Sentiment Analysis",
    description="Input a YouTube video ID and your API key to analyze the sentiment of the comments."
)

# Launch the Gradio app
gr_interface.launch()