Spaces:

hasbigani
/

testing

Sleeping

App Files Files Community

hasbigani commited on Jul 26, 2025

Commit

7cfa7d7

verified ·

1 Parent(s): 7112c24

Update app.py

Browse files

Files changed (1) hide show

app.py +128 -28

app.py CHANGED Viewed

@@ -1,36 +1,136 @@
 import gradio as gr
 from transformers import AutoTokenizer, AutoModelForSequenceClassification
 import torch
-# Ganti dengan nama repository model kamu
 model_name = "hasbigani/indobertsentiment"
-# Load model & tokenizer
 tokenizer = AutoTokenizer.from_pretrained(model_name)
 model = AutoModelForSequenceClassification.from_pretrained(model_name)
-# Label mapping (ubah sesuai label modelmu)
-label_map = {
-    0: "Negatif",
-    1: "Netral",
-    2: "Positif"
-}
-# Fungsi prediksi
-def predict_sentiment(text):
-    inputs = tokenizer([text], padding=True, truncation=True, return_tensors="pt")
-    with torch.no_grad():
-        outputs = model(**inputs)
-        pred = torch.argmax(outputs.logits, dim=-1).item()
-    return label_map[pred]
-# Gradio interface
-iface = gr.Interface(
-    fn=predict_sentiment,
-    inputs=gr.Textbox(lines=3, placeholder="Tulis teks di sini..."),
-    outputs=gr.Label(),
-    title="Demo Sentimen IndoBERT",
-    description="Masukkan kalimat berbahasa Indonesia untuk menguji model sentimen yang sudah diupload di Hugging Face."
-)
-iface.launch()

 import gradio as gr
+import requests
 from transformers import AutoTokenizer, AutoModelForSequenceClassification
 import torch
+import matplotlib.pyplot as plt
+import pandas as pd
+from io import BytesIO
+import base64
+import re
+from PIL import Image
+from io import BytesIO
+# Model yang digunakan sekarang hasbigani/indobertsentiment
 model_name = "hasbigani/indobertsentiment"
 tokenizer = AutoTokenizer.from_pretrained(model_name)
 model = AutoModelForSequenceClassification.from_pretrained(model_name)
+# Fungsi untuk membersihkan teks
+def clean_text(text):
+    # Menghapus URL
+    text = re.sub(r'http\S+|www\S+', '', text)
+    # Menghapus emoji dan karakter non-alfabet
+    text = re.sub(r'[^\w\s]', '', text)
+    # Menghapus angka
+    text = re.sub(r'\d+', '', text)
+    # Mengubah teks ke huruf kecil
+    text = text.lower()
+    return text
+# Fungsi untuk mengambil ID video dari URL YouTube
+def extract_video_id(url):
+    import re
+    match = re.search(r"(?:v=|youtu\.be/)([\w-]{11})", url)
+    return match.group(1) if match else None
+# Fungsi untuk mendapatkan komentar YouTube
+def get_youtube_comments(url, max_comments=100):
+    video_id = extract_video_id(url)
+    if not video_id:
+        return []
+    comments = []
+    next_page_token = ""
+    while len(comments) < max_comments:
+        api_url = (
+            f"https://www.googleapis.com/youtube/v3/commentThreads"
+            f"?part=snippet&videoId={video_id}&key=AIzaSyCsgA_lFc6rQTHiHWWDikYQDEHU8rtbygU"
+            f"&textFormat=plainText&maxResults=100&pageToken={next_page_token}"
+        )
+        response = requests.get(api_url)
+        if response.status_code != 200:
+            break
+        data = response.json()
+        for item in data.get("items", []):
+            comment = item["snippet"]["topLevelComment"]["snippet"]["textDisplay"]
+            comments.append(comment)
+            if len(comments) >= max_comments:
+                break
+        next_page_token = data.get("nextPageToken", "")
+        if not next_page_token:
+            break
+    return comments
+# Fungsi untuk mengklasifikasikan sentimen komentar menggunakan IndoBERT
+def classify_sentiment(comments):
+    results = []
+    label_map = {0: "Negative", 1: "Neutral", 2: "Positive"}
+    # Proses cleaning sebelum dikirim ke model
+    cleaned_comments = [clean_text(comment) for comment in comments]
+    for comment in cleaned_comments:
+        # Tokenisasi menggunakan IndoBERT
+        inputs = tokenizer(comment, return_tensors="pt", truncation=True, padding=True)
+        with torch.no_grad():
+            outputs = model(**inputs)
+        probs = torch.nn.functional.softmax(outputs.logits, dim=1)
+        predicted = torch.argmax(probs, dim=1).item()
+        confidence = torch.max(probs).item()
+        indo_label = label_map[predicted]
+        results.append((comment, indo_label, confidence))
+    return results
+# Fungsi untuk menghasilkan visualisasi data
+def generate_visualization(results):
+    df = pd.DataFrame(results, columns=["Comment", "IndoBERT", "Confidence"])
+    fig, axs = plt.subplots(1, 2, figsize=(18, 5))
+    indo_counts = df["IndoBERT"].value_counts().reindex(["Positive", "Neutral", "Negative"], fill_value=0)
+    axs[0].pie(indo_counts, labels=indo_counts.index, autopct='%1.1f%%', colors=["green", "yellow", "red"])
+    axs[0].set_title("IndoBERT Sentiment Distribution")
+    axs[1].bar(["Positive", "Neutral", "Negative"],
+               indo_counts.values, color=["green", "yellow", "red"])
+    axs[1].set_title("Sentiment Comparison (Bar)")
+    buf = BytesIO()
+    plt.tight_layout()
+    plt.savefig(buf, format="png")
+    buf.seek(0)
+    encoded = base64.b64encode(buf.read()).decode("utf-8")
+    plt.close()
+    return f"<img src='data:image/png;base64,{encoded}'/>"
+# Fungsi untuk mengambil thumbnail dari URL YouTube
+def get_thumbnail(url):
+    video_id = extract_video_id(url)
+    if video_id:
+        return f"https://img.youtube.com/vi/{video_id}/0.jpg"
+    return None
+# Fungsi utama untuk analisis sentimen
+def analyze_sentiment(url, jumlah):
+    comments = get_youtube_comments(url, max_comments=jumlah)
+    if not comments:
+        return pd.DataFrame(), "Tidak ada komentar ditemukan", None
+    results = classify_sentiment(comments)
+    df = pd.DataFrame(results, columns=["Komentar", "IndoBERT", "Confidence"])
+    chart = generate_visualization(results)
+    thumbnail_url = get_thumbnail(url)
+    return df, chart, thumbnail_url
+gr.Interface(
+    fn=analyze_sentiment,
+    inputs=[
+        gr.Text(label="URL Video YouTube"),
+        gr.Slider(10, 200, value=50, step=10, label="Jumlah komentar yang dianalisis")
+    ],
+    outputs=[
+        gr.Dataframe(label="Preview Komentar dan Sentimen"),
+        gr.HTML(label="Visualisasi Sentimen"),
+        gr.Image(label="Thumbnail Video YouTube", type="url")
+    ],
+    title="Analisis Komentar YouTube 🇮🇩 dengan IndoBERT",
+    description="Masukkan URL YouTube dan sistem akan menarik komentar dan menganalisisnya menggunakan model IndoBERT."
+).launch()