Spaces:

hasbigani
/

testing

Sleeping

App Files Files Community

testing / app.py

hasbigani

Update app.py

46269ce verified 5 months ago

raw

history blame contribute delete

4.64 kB

	import gradio as gr
	import requests
	from transformers import AutoTokenizer, AutoModelForSequenceClassification
	import torch
	import matplotlib.pyplot as plt
	import pandas as pd
	from io import BytesIO
	import base64
	import re

	# Model yang digunakan sekarang hasbigani/indobertsentiment
	model_name = "hasbigani/indobertsentiment"
	tokenizer = AutoTokenizer.from_pretrained(model_name)
	model = AutoModelForSequenceClassification.from_pretrained(model_name)

	# Fungsi untuk membersihkan teks
	def clean_text(text):
	# Menghapus URL
	text = re.sub(r'http\S+\|www\S+', '', text)
	# Menghapus emoji dan karakter non-alfabet
	text = re.sub(r'[^\w\s]', '', text)
	# Menghapus angka
	text = re.sub(r'\d+', '', text)
	# Mengubah teks ke huruf kecil
	text = text.lower()
	return text

	# Fungsi untuk mengambil ID video dari URL YouTube
	def extract_video_id(url):
	import re
	match = re.search(r"(?:v=\|youtu\.be/)([\w-]{11})", url)
	return match.group(1) if match else None

	# Fungsi untuk mendapatkan komentar YouTube
	def get_youtube_comments(url, max_comments=100):
	video_id = extract_video_id(url)
	if not video_id:
	return []
	comments = []
	next_page_token = ""
	while len(comments) < max_comments:
	api_url = (
	f"https://www.googleapis.com/youtube/v3/commentThreads"
	f"?part=snippet&videoId={video_id}&key=AIzaSyCsgA_lFc6rQTHiHWWDikYQDEHU8rtbygU"
	f"&textFormat=plainText&maxResults=100&pageToken={next_page_token}"
	)
	response = requests.get(api_url)
	if response.status_code != 200:
	break
	data = response.json()
	for item in data.get("items", []):
	comment = item["snippet"]["topLevelComment"]["snippet"]["textDisplay"]
	comments.append(comment)
	if len(comments) >= max_comments:
	break
	next_page_token = data.get("nextPageToken", "")
	if not next_page_token:
	break
	return comments

	# Fungsi untuk mengklasifikasikan sentimen komentar menggunakan IndoBERT
	def classify_sentiment(comments):
	results = []
	label_map = {0: "Negative", 1: "Neutral", 2: "Positive"}

	# Proses cleaning sebelum dikirim ke model
	cleaned_comments = [clean_text(comment) for comment in comments]

	for comment in cleaned_comments:
	# Tokenisasi menggunakan IndoBERT
	inputs = tokenizer(comment, return_tensors="pt", truncation=True, padding=True)
	with torch.no_grad():
	outputs = model(**inputs)
	probs = torch.nn.functional.softmax(outputs.logits, dim=1)
	predicted = torch.argmax(probs, dim=1).item()
	confidence = torch.max(probs).item()
	indo_label = label_map[predicted]
	results.append((comment, indo_label, confidence))
	return results

	# Fungsi untuk menghasilkan visualisasi data
	def generate_visualization(results):
	df = pd.DataFrame(results, columns=["Comment", "IndoBERT", "Confidence"])
	fig, axs = plt.subplots(1, 2, figsize=(18, 5))

	indo_counts = df["IndoBERT"].value_counts().reindex(["Positive", "Neutral", "Negative"], fill_value=0)
	axs[0].pie(indo_counts, labels=indo_counts.index, autopct='%1.1f%%', colors=["green", "yellow", "red"])
	axs[0].set_title("IndoBERT Sentiment Distribution")

	axs[1].bar(["Positive", "Neutral", "Negative"],
	indo_counts.values, color=["green", "yellow", "red"])
	axs[1].set_title("Sentiment Comparison (Bar)")

	buf = BytesIO()
	plt.tight_layout()
	plt.savefig(buf, format="png")
	buf.seek(0)
	encoded = base64.b64encode(buf.read()).decode("utf-8")
	plt.close()
	return f"<img src='data:image/png;base64,{encoded}'/>"

	# Fungsi utama untuk analisis sentimen
	def analyze_sentiment(url, jumlah):
	comments = get_youtube_comments(url, max_comments=jumlah)
	if not comments:
	return pd.DataFrame(), "Tidak ada komentar ditemukan"
	results = classify_sentiment(comments)
	df = pd.DataFrame(results, columns=["Komentar", "IndoBERT", "Confidence"])
	chart = generate_visualization(results)
	return df, chart

	gr.Interface(
	fn=analyze_sentiment,
	inputs=[
	gr.Text(label="URL Video YouTube"),
	gr.Slider(10, 200, value=50, step=10, label="Jumlah komentar yang dianalisis")
	],
	outputs=[
	gr.Dataframe(label="Preview Komentar dan Sentimen"),
	gr.HTML(label="Visualisasi Sentimen")
	],
	title="Analisis Komentar YouTube 🇮🇩 dengan IndoBERT",
	description="Masukkan URL YouTube dan sistem akan menarik komentar dan menganalisisnya menggunakan model IndoBERT."
	).launch()