|
|
import gradio as gr |
|
|
import requests |
|
|
from transformers import AutoTokenizer, AutoModelForSequenceClassification |
|
|
import torch |
|
|
import matplotlib.pyplot as plt |
|
|
import pandas as pd |
|
|
from io import BytesIO |
|
|
import base64 |
|
|
import re |
|
|
|
|
|
|
|
|
model_name = "hasbigani/indobertsentiment" |
|
|
tokenizer = AutoTokenizer.from_pretrained(model_name) |
|
|
model = AutoModelForSequenceClassification.from_pretrained(model_name) |
|
|
|
|
|
|
|
|
def clean_text(text): |
|
|
|
|
|
text = re.sub(r'http\S+|www\S+', '', text) |
|
|
|
|
|
text = re.sub(r'[^\w\s]', '', text) |
|
|
|
|
|
text = re.sub(r'\d+', '', text) |
|
|
|
|
|
text = text.lower() |
|
|
return text |
|
|
|
|
|
|
|
|
def extract_video_id(url): |
|
|
import re |
|
|
match = re.search(r"(?:v=|youtu\.be/)([\w-]{11})", url) |
|
|
return match.group(1) if match else None |
|
|
|
|
|
|
|
|
def get_youtube_comments(url, max_comments=100): |
|
|
video_id = extract_video_id(url) |
|
|
if not video_id: |
|
|
return [] |
|
|
comments = [] |
|
|
next_page_token = "" |
|
|
while len(comments) < max_comments: |
|
|
api_url = ( |
|
|
f"https://www.googleapis.com/youtube/v3/commentThreads" |
|
|
f"?part=snippet&videoId={video_id}&key=AIzaSyCsgA_lFc6rQTHiHWWDikYQDEHU8rtbygU" |
|
|
f"&textFormat=plainText&maxResults=100&pageToken={next_page_token}" |
|
|
) |
|
|
response = requests.get(api_url) |
|
|
if response.status_code != 200: |
|
|
break |
|
|
data = response.json() |
|
|
for item in data.get("items", []): |
|
|
comment = item["snippet"]["topLevelComment"]["snippet"]["textDisplay"] |
|
|
comments.append(comment) |
|
|
if len(comments) >= max_comments: |
|
|
break |
|
|
next_page_token = data.get("nextPageToken", "") |
|
|
if not next_page_token: |
|
|
break |
|
|
return comments |
|
|
|
|
|
|
|
|
def classify_sentiment(comments): |
|
|
results = [] |
|
|
label_map = {0: "Negative", 1: "Neutral", 2: "Positive"} |
|
|
|
|
|
|
|
|
cleaned_comments = [clean_text(comment) for comment in comments] |
|
|
|
|
|
for comment in cleaned_comments: |
|
|
|
|
|
inputs = tokenizer(comment, return_tensors="pt", truncation=True, padding=True) |
|
|
with torch.no_grad(): |
|
|
outputs = model(**inputs) |
|
|
probs = torch.nn.functional.softmax(outputs.logits, dim=1) |
|
|
predicted = torch.argmax(probs, dim=1).item() |
|
|
confidence = torch.max(probs).item() |
|
|
indo_label = label_map[predicted] |
|
|
results.append((comment, indo_label, confidence)) |
|
|
return results |
|
|
|
|
|
|
|
|
def generate_visualization(results): |
|
|
df = pd.DataFrame(results, columns=["Comment", "IndoBERT", "Confidence"]) |
|
|
fig, axs = plt.subplots(1, 2, figsize=(18, 5)) |
|
|
|
|
|
indo_counts = df["IndoBERT"].value_counts().reindex(["Positive", "Neutral", "Negative"], fill_value=0) |
|
|
axs[0].pie(indo_counts, labels=indo_counts.index, autopct='%1.1f%%', colors=["green", "yellow", "red"]) |
|
|
axs[0].set_title("IndoBERT Sentiment Distribution") |
|
|
|
|
|
axs[1].bar(["Positive", "Neutral", "Negative"], |
|
|
indo_counts.values, color=["green", "yellow", "red"]) |
|
|
axs[1].set_title("Sentiment Comparison (Bar)") |
|
|
|
|
|
buf = BytesIO() |
|
|
plt.tight_layout() |
|
|
plt.savefig(buf, format="png") |
|
|
buf.seek(0) |
|
|
encoded = base64.b64encode(buf.read()).decode("utf-8") |
|
|
plt.close() |
|
|
return f"<img src='data:image/png;base64,{encoded}'/>" |
|
|
|
|
|
|
|
|
def analyze_sentiment(url, jumlah): |
|
|
comments = get_youtube_comments(url, max_comments=jumlah) |
|
|
if not comments: |
|
|
return pd.DataFrame(), "Tidak ada komentar ditemukan" |
|
|
results = classify_sentiment(comments) |
|
|
df = pd.DataFrame(results, columns=["Komentar", "IndoBERT", "Confidence"]) |
|
|
chart = generate_visualization(results) |
|
|
return df, chart |
|
|
|
|
|
gr.Interface( |
|
|
fn=analyze_sentiment, |
|
|
inputs=[ |
|
|
gr.Text(label="URL Video YouTube"), |
|
|
gr.Slider(10, 200, value=50, step=10, label="Jumlah komentar yang dianalisis") |
|
|
], |
|
|
outputs=[ |
|
|
gr.Dataframe(label="Preview Komentar dan Sentimen"), |
|
|
gr.HTML(label="Visualisasi Sentimen") |
|
|
], |
|
|
title="Analisis Komentar YouTube 🇮🇩 dengan IndoBERT", |
|
|
description="Masukkan URL YouTube dan sistem akan menarik komentar dan menganalisisnya menggunakan model IndoBERT." |
|
|
).launch() |
|
|
|