| import streamlit as st |
| import pandas as pd |
| import numpy as np |
| import re |
| import io |
| import time |
| import requests |
| import matplotlib.pyplot as plt |
| import seaborn as sns |
| from datetime import datetime, timezone |
| from textblob import TextBlob |
| from scipy.stats import pearsonr |
| import nltk |
| from nltk.corpus import stopwords |
| from nltk.sentiment.vader import SentimentIntensityAnalyzer |
| from transformers import pipeline |
| import os |
| import streamlit.components.v1 as components |
|
|
| from langdetect import detect, DetectorFactory |
| DetectorFactory.seed = 0 |
|
|
| |
| |
| |
| BASE_DIR = os.path.dirname(os.path.abspath(__file__)) |
|
|
| img_hero = os.path.join(BASE_DIR, "bitcoin1.gif") |
| img_batch = os.path.join(BASE_DIR, "bitcoin2.gif") |
|
|
| |
| |
| |
| st.set_page_config( |
| page_title="Bitcoin Volatility Sentiment", |
| page_icon="βΏ", |
| layout="wide", |
| initial_sidebar_state="collapsed" |
| ) |
|
|
| if 'page' not in st.session_state: |
| st.session_state.page = "uji_kalimat" |
|
|
| |
| |
| |
| st.markdown(""" |
| <style> |
| /* ββ Google Fonts ββ */ |
| @import url('https://fonts.googleapis.com/css2?family=Inter:wght@400;500;600;700;800&display=swap'); |
| |
| /* ββ Reset Streamlit chrome ββ */ |
| #MainMenu, footer, header { visibility: hidden; } |
| .block-container { |
| padding-top: 1rem !important; |
| padding-bottom: 0 !important; |
| max-width: 100% !important; |
| } |
| |
| html, body, [class*="css"] { |
| font-family: 'Inter', -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, "Helvetica Neue", Arial, sans-serif !important; |
| color: #202630 !important; |
| } |
| |
| .stApp { |
| background-color: #FAFAFA !important; |
| } |
| |
| /* ββ Custom Scrollbar ββ */ |
| ::-webkit-scrollbar { width: 6px; } |
| ::-webkit-scrollbar-track { background: transparent; } |
| ::-webkit-scrollbar-thumb { background: #eaecef; border-radius: 3px; } |
| ::-webkit-scrollbar-thumb:hover { background: #10b981; } |
| |
| /* ββ NAVBAR WRAPPER ββ */ |
| .vbc-logo { |
| font-weight: 700; |
| font-size: 1.25rem; |
| color: #0f172a !important; |
| display: flex; |
| align-items: center; |
| gap: 10px; |
| } |
| .vbc-logo-icon { |
| background: #10b981; |
| color: white; |
| width: 36px; |
| height: 36px; |
| border-radius: 8px; |
| display: inline-flex; |
| align-items: center; |
| justify-content: center; |
| font-size: 1.1rem; |
| font-weight: 800; |
| } |
| |
| /* ββ HERO SECTION (Uji Kalimat) ββ */ |
| .hero-wrap { |
| background: #FAFAFA; |
| min-height: auto; |
| display: flex; |
| align-items: center; |
| position: relative; |
| overflow: hidden; |
| padding: 0.5rem 3rem 2rem 3rem; |
| } |
| |
| .hero-badge { |
| display: inline-block; |
| background: #e6fff1; |
| color: #1aa64a; |
| font-size: 0.75rem; |
| font-weight: 600; |
| padding: 6px 12px; |
| border-radius: 4px; |
| margin-bottom: 1.5rem; |
| } |
| .hero-title { |
| font-size: 3rem; |
| font-weight: 800; |
| line-height: 1.2; |
| color: #0f172a !important; |
| margin: 0 0 1rem; |
| } |
| .hero-title span { |
| color: #10b981; |
| } |
| .hero-sub { |
| font-size: 1rem; |
| color: #64748b !important; |
| max-width: 500px; |
| line-height: 1.6; |
| margin-bottom: 2rem; |
| } |
| .hero-card { |
| background: #FFFFFF; |
| border: 1px solid #e2e8f0; |
| border-radius: 50px; |
| padding: 10px 20px; |
| display: inline-flex; |
| align-items: center; |
| gap: 10px; |
| margin-bottom: 2rem; |
| box-shadow: 0 2px 4px rgba(0,0,0,0.02); |
| } |
| .hero-card-dot { |
| width: 8px; height: 8px; |
| border-radius: 50%; |
| background: #10b981; |
| flex-shrink: 0; |
| } |
| .hero-card p { |
| margin: 0; |
| font-size: 0.85rem; |
| color: #334155 !important; |
| font-weight: 600; |
| } |
| |
| /* ββ BATCH SECTION ββ */ |
| .batch-wrap { |
| background: #FAFAFA; |
| min-height: auto; |
| padding: 0.5rem 3rem 2rem 3rem; |
| } |
| .batch-eyebrow { |
| font-size: 0.85rem; |
| font-weight: 700; |
| color: #10b981 !important; |
| margin-bottom: 0.5rem; |
| } |
| .batch-title { |
| font-size: 2.5rem; |
| font-weight: 800; |
| color: #0f172a !important; |
| line-height: 1.2; |
| margin-bottom: 1rem; |
| } |
| .batch-sub { |
| font-size: 1rem; |
| color: #64748b !important; |
| max-width: 480px; |
| line-height: 1.6; |
| margin-bottom: 2rem; |
| } |
| |
| /* ββ RESULT / DASHBOARD SECTION ββ */ |
| .result-wrap { |
| background: #FFFFFF; |
| padding: 2rem 3rem 3rem 3rem; |
| border: 1px solid #e2e8f0; |
| border-radius: 16px; |
| box-shadow: 0 4px 6px rgba(0,0,0,0.01); |
| margin-bottom: 3rem; |
| } |
| .section-label { |
| font-size: 0.85rem; |
| font-weight: 700; |
| color: #10b981 !important; |
| margin-bottom: 0.5rem; |
| } |
| .section-title { |
| font-size: 1.5rem; |
| font-weight: 800; |
| color: #0f172a !important; |
| margin-bottom: 1.5rem; |
| } |
| |
| /* ββ METRIC CARDS ββ */ |
| div[data-testid="stMetric"] { |
| background: #FFFFFF; |
| border: 1px solid #e2e8f0; |
| border-radius: 12px; |
| padding: 1rem 1.2rem !important; |
| box-shadow: 0 2px 4px rgba(0,0,0,0.02); |
| } |
| div[data-testid="stMetricLabel"] > div { |
| color: #64748b !important; |
| font-size: 0.85rem !important; |
| font-weight: 600 !important; |
| } |
| div[data-testid="stMetricValue"] > div { |
| color: #0f172a !important; |
| font-weight: 800 !important; |
| font-size: 1.8rem !important; |
| } |
| |
| /* ββ BUTTONS ββ */ |
| div[data-testid="stButton"] > button { |
| font-weight: 600 !important; |
| font-size: 0.9rem !important; |
| border-radius: 50px !important; |
| padding: 0.5rem 1.2rem !important; |
| height: 42px !important; |
| transition: all 0.2s ease-in-out !important; |
| } |
| div[data-testid="stButton"] > button:focus:not(:active) { |
| box-shadow: none !important; |
| } |
| |
| /* Primary CTA */ |
| .btn-primary div[data-testid="stButton"] > button { |
| background: #10b981 !important; |
| color: #FFFFFF !important; |
| border: none !important; |
| } |
| .btn-primary div[data-testid="stButton"] > button:hover { |
| background: #059669 !important; |
| } |
| |
| /* Secondary outline */ |
| .btn-outline-white div[data-testid="stButton"] > button { |
| background: #FFFFFF !important; |
| color: #0f172a !important; |
| border: 1px solid #e2e8f0 !important; |
| } |
| .btn-outline-white div[data-testid="stButton"] > button:hover { |
| border-color: #10b981 !important; |
| color: #10b981 !important; |
| } |
| |
| /* Active nav */ |
| .btn-orange div[data-testid="stButton"] > button { |
| background: #e6fff1 !important; |
| color: #10b981 !important; |
| border: none !important; |
| } |
| |
| /* Ghost nav β inactive */ |
| .btn-ghost div[data-testid="stButton"] > button { |
| background: transparent !important; |
| color: #64748b !important; |
| border: 1px solid transparent !important; |
| } |
| .btn-ghost div[data-testid="stButton"] > button:hover { |
| color: #0f172a !important; |
| background: #f1f5f9 !important; |
| } |
| |
| /* ββ TEXT INPUT / TEXTAREA ββ */ |
| .stTextArea textarea { |
| background-color: #FFFFFF !important; |
| color: #0f172a !important; |
| border: 1px solid #e2e8f0 !important; |
| border-radius: 12px !important; |
| font-size: 0.95rem !important; |
| padding: 0.8rem 1rem !important; |
| transition: border-color 0.2s !important; |
| } |
| .stTextArea textarea:focus { |
| border-color: #10b981 !important; |
| box-shadow: 0 0 0 1px #10b981 !important; |
| } |
| .stTextArea label { |
| color: #334155 !important; |
| font-size: 0.85rem !important; |
| font-weight: 600 !important; |
| } |
| |
| /* ββ DATA TABLE ββ */ |
| div[data-testid="stDataFrame"] { |
| border: 1px solid #e2e8f0 !important; |
| border-radius: 12px !important; |
| } |
| |
| /* ββ FILE UPLOADER ββ */ |
| div[data-testid="stFileUploader"] { |
| border: 1px dashed #cbd5e1 !important; |
| border-radius: 12px !important; |
| background: #FFFFFF !important; |
| padding: 1.5rem !important; |
| } |
| div[data-testid="stFileUploader"]:hover { |
| border-color: #10b981 !important; |
| } |
| |
| /* ββ EXPANDER ββ */ |
| div[data-testid="stExpander"] { |
| border: 1px solid #e2e8f0 !important; |
| border-radius: 12px !important; |
| background: #FFFFFF !important; |
| } |
| |
| /* ββ DOWNLOAD BUTTON ββ */ |
| div[data-testid="stDownloadButton"] > button { |
| background: #FFFFFF !important; |
| color: #0f172a !important; |
| border-radius: 50px !important; |
| font-weight: 600 !important; |
| border: 1px solid #e2e8f0 !important; |
| padding: 0.5rem 1.2rem !important; |
| transition: all 0.2s !important; |
| } |
| div[data-testid="stDownloadButton"] > button:hover { |
| border-color: #10b981 !important; |
| color: #10b981 !important; |
| } |
| |
| /* ββ DIVIDER ββ */ |
| .vbc-divider { |
| border: none; |
| border-top: 1px solid #e2e8f0; |
| margin: 2rem 0; |
| } |
| |
| </style> |
| """, unsafe_allow_html=True) |
|
|
| |
| |
| |
| def scroll_to_target(target_id): |
| js_code = f""" |
| <script> |
| var target = window.parent.document.getElementById('{target_id}'); |
| if(target) {{ |
| target.scrollIntoView({{behavior: 'smooth', block: 'start'}}); |
| }} |
| </script> |
| """ |
| components.html(js_code, height=0, width=0) |
|
|
| |
| |
| |
| def set_page(page_name): |
| st.session_state.page = page_name |
|
|
| col_logo, col_space, col_btn1, col_btn2 = st.columns([5, 3, 2, 2], vertical_alignment="center") |
|
|
| with col_logo: |
| st.markdown(""" |
| <div class="vbc-logo" style="padding-left: 2rem;"> |
| <span class="vbc-logo-icon">βΏ</span> |
| Bitcoin Volatility Sentiment |
| </div> |
| """, unsafe_allow_html=True) |
|
|
| with col_btn1: |
| is_uji = st.session_state.page == "uji_kalimat" |
| css_class = "btn-orange" if is_uji else "btn-ghost" |
| st.markdown(f'<div class="{css_class}">', unsafe_allow_html=True) |
| if st.button("Uji Kalimat", use_container_width=True, key="nav_uji"): |
| set_page("uji_kalimat"); st.rerun() |
| st.markdown('</div>', unsafe_allow_html=True) |
|
|
| with col_btn2: |
| is_batch = st.session_state.page == "analisis_batch" |
| css_class = "btn-orange" if is_batch else "btn-ghost" |
| st.markdown(f'<div class="{css_class}">', unsafe_allow_html=True) |
| if st.button("Analisis Batch", use_container_width=True, key="nav_batch"): |
| set_page("analisis_batch"); st.rerun() |
| st.markdown('</div>', unsafe_allow_html=True) |
|
|
| st.markdown("<hr style='margin-top: 0.5rem; margin-bottom: 0.5rem; border: none; border-bottom: 1px solid #e2e8f0;'>", unsafe_allow_html=True) |
|
|
| |
| |
| |
| @st.cache_resource |
| def download_nltk_resources(): |
| nltk.download('stopwords', quiet=True) |
| nltk.download('vader_lexicon', quiet=True) |
| nltk.download('punkt', quiet=True) |
| nltk.download('omw-1.4', quiet=True) |
|
|
| download_nltk_resources() |
| stop_words = set(stopwords.words('english')) |
|
|
| @st.cache_resource |
| def load_all_models(): |
| vader = SentimentIntensityAnalyzer() |
| bertweet = pipeline("sentiment-analysis", model="finiteautomata/bertweet-base-sentiment-analysis", device=-1, truncation=True, max_length=128) |
| roberta = pipeline("sentiment-analysis", model="cardiffnlp/twitter-roberta-base-sentiment", device=-1, truncation=True, max_length=512) |
| roberta_large = pipeline("sentiment-analysis", model="siebert/sentiment-roberta-large-english", device=-1, truncation=True, max_length=512) |
| return vader, bertweet, roberta, roberta_large |
|
|
| with st.spinner('Mempersiapkan model AI...'): |
| vader, bertweet, roberta, roberta_large = load_all_models() |
|
|
|
|
| |
| |
| |
| def clean_text(text): |
| text = str(text).lower() |
| text = re.sub(r"http\S+", "", text) |
| text = re.sub(r"@\w+", "", text) |
| text = re.sub(r"#\w+", "", text) |
| text = re.sub(r"[^\w\s]", "", text) |
| tokens = text.split() |
| tokens = [word for word in tokens if word not in stop_words] |
| return " ".join(tokens) |
|
|
| def classify_tb(score): |
| if score > 0.05: return 'positive' |
| if score < -0.05: return 'negative' |
| return 'neutral' |
|
|
| def map_roberta(label): |
| return {"LABEL_0": "negative", "LABEL_1": "neutral", "LABEL_2": "positive"}.get(label, "neutral") |
|
|
| def map_bertweet(label): |
| return {"pos": "positive", "neu": "neutral", "neg": "negative"}.get(label.lower(), "neutral") |
|
|
| def get_daily_label(score): |
| if score > 0.05: return 'Positive' |
| elif score < -0.05: return 'Negative' |
| else: return 'Neutral' |
|
|
|
|
| |
| |
| |
| if st.session_state.page == "uji_kalimat": |
|
|
| st.markdown('<div class="hero-wrap">', unsafe_allow_html=True) |
|
|
| col_text, col_img = st.columns([1.1, 1], gap="large") |
|
|
| with col_text: |
| st.markdown(""" |
| <div class="hero-badge">Website ini bukanlah alat prediksi harga Bitcoin real time, melainkan instrumen untuk melakukan analisis sentimen publik secara batch</div> |
| <h1 class="hero-title"> |
| Bitcoin Volatility<br> |
| <span>vs Public</span> Sentiment |
| </h1> |
| <p class="hero-sub"> |
| Analisis Volatilitas Harga Bitcoin Terhadap Sentimen Publik |
| Pada Platform X Berbasis Python. |
| </p> |
| <div class="hero-card"> |
| <div class="hero-card-dot"></div> |
| <p><b>Peneliti:</b> Arya Galuh Saputra Β· H1D022022</p> |
| </div> |
| """, unsafe_allow_html=True) |
|
|
| user_input = st.text_area( |
| "Masukkan Tweet (Bahasa Inggris):", |
| "Great, Bitcoin just crashed another 10% today.", |
| height=120 |
| ) |
|
|
| st.markdown("<br>", unsafe_allow_html=True) |
|
|
| col_btn1, col_btn2 = st.columns([1.6, 1]) |
| with col_btn1: |
| st.markdown('<div class="btn-primary">', unsafe_allow_html=True) |
| analyze_btn = st.button("Proses Uji Kalimat", use_container_width=True) |
| st.markdown('</div>', unsafe_allow_html=True) |
|
|
| with col_img: |
| st.markdown("<div style='margin-top: 4rem;'></div>", unsafe_allow_html=True) |
| try: |
| st.image(img_hero, use_container_width=True) |
| except Exception: |
| st.markdown(""" |
| <div style="background:#f5f5f5;border:1px dashed #cbd5e1; |
| border-radius:12px;height:320px;display:flex;align-items:center; |
| justify-content:center;color:#64748b;font-size:0.9rem; |
| text-align:center;padding:2rem;"> |
| πΌοΈ Gambar Tidak Ditemukan<br>Pastikan file <code>bitcoin1.gif</code> ada di direktori |
| </div>""", unsafe_allow_html=True) |
|
|
| st.markdown('</div>', unsafe_allow_html=True) |
| |
| st.markdown('<div id="target-uji-kalimat"></div>', unsafe_allow_html=True) |
|
|
| if analyze_btn: |
| |
| scroll_to_target("target-uji-kalimat") |
| |
| col_space_left, col_center_output, col_space_right = st.columns([1, 4, 1]) |
| |
| with col_center_output: |
| |
| st.markdown(""" |
| <div class="result-wrap" style="padding-bottom: 2rem; margin-bottom: 1.5rem;"> |
| <p class="section-label">Output Analisis</p> |
| <p class="section-title" style="margin-bottom: 0;">Hasil Deteksi Sentimen</p> |
| </div> |
| """, unsafe_allow_html=True) |
|
|
| try: |
| if detect(user_input) != 'en': |
| st.warning("β οΈ Teks sepertinya bukan bahasa Inggris. Hasil prediksi mungkin memiliki bias.") |
| except: |
| pass |
|
|
| text = clean_text(user_input) |
|
|
| with st.spinner("Mengekstraksi sentimen dengan 5 Model..."): |
| time.sleep(0.5) |
| try: v_label = "positive" if vader.polarity_scores(text)['compound'] > 0.05 else ("negative" if vader.polarity_scores(text)['compound'] < -0.05 else "neutral") |
| except: v_label = "neutral" |
|
|
| try: t_label = classify_tb(TextBlob(text).sentiment.polarity) |
| except: t_label = "neutral" |
|
|
| try: b_label = map_bertweet(bertweet(text)[0]['label']) |
| except: b_label = "neutral" |
|
|
| try: r_label = map_roberta(roberta(text)[0]['label']) |
| except: r_label = "neutral" |
|
|
| try: rl_label = roberta_large(text)[0]['label'].lower() |
| except: rl_label = "neutral" |
|
|
| def badge_color(label): |
| return {"positive": "#e6fff1", "negative": "#fef1f2", "neutral": "#f1f5f9"}[label] |
|
|
| def badge_text_color(label): |
| return {"positive": "#10b981", "negative": "#f43f5e", "neutral": "#64748b"}[label] |
|
|
| results = [ |
| ("VADER", v_label), |
| ("TextBlob", t_label), |
| ("BERTweet", b_label), |
| ("RoBERTa Base", r_label), |
| ("RoBERTa Large", rl_label), |
| ] |
|
|
| col_a, col_b = st.columns(2) |
|
|
| for i, (method, label) in enumerate(results): |
| col = col_a if i % 2 == 0 else col_b |
| bg = badge_color(label) |
| tc = badge_text_color(label) |
| icon = "β" if label == "positive" else ("β" if label == "negative" else "β") |
| with col: |
| st.markdown(f""" |
| <div style="background:#FAFAFA;border:1px solid #e2e8f0;border-left:4px solid {'#10b981' if label=='positive' else ('#f43f5e' if label=='negative' else '#cbd5e1')}; |
| border-radius:12px;padding:1rem 1.2rem;margin-bottom:1rem; |
| display:flex;align-items:center;justify-content:space-between; |
| box-shadow:0 2px 4px rgba(0,0,0,0.02);"> |
| <div> |
| <div style="font-weight:600;font-size:0.75rem;color:#64748b;margin-bottom:4px;">{method}</div> |
| <div style="font-weight:800;font-size:1.05rem;color:#0f172a;">{label.capitalize()}</div> |
| </div> |
| <div style="background:{bg};color:{tc};font-size:0.75rem;font-weight:700; |
| padding:6px 12px;border-radius:50px;"> |
| {icon} {label.upper()} |
| </div> |
| </div> |
| """, unsafe_allow_html=True) |
|
|
|
|
| |
| |
| |
| elif st.session_state.page == "analisis_batch": |
|
|
| plt.style.use('default') |
| sns.set_theme(style="whitegrid", rc={ |
| "axes.facecolor": "#FFFFFF", |
| "figure.facecolor": "#FAFAFA", |
| "axes.edgecolor": "#e2e8f0", |
| "text.color": "#0f172a", |
| "xtick.color": "#64748b", |
| "ytick.color": "#64748b", |
| "grid.color": "#f1f5f9", |
| }) |
|
|
| st.markdown('<div class="batch-wrap">', unsafe_allow_html=True) |
|
|
| col_upload, col_img_b = st.columns([1.4, 1], gap="large") |
|
|
| with col_upload: |
| st.markdown(""" |
| <p class="batch-eyebrow">Analisis Batch Processing</p> |
| <h2 class="batch-title">Volatilitas Harga Bitcoin Vs Sentimen Publik<br>Kolerasi Multi-Metode Analisis Sentimen</h2> |
| <p class="batch-sub"> |
| Unggah file tweets (.txt) untuk diekstraksi dan |
| dianalisis terhadap volatilitas harga Bitcoin. |
| </p>""", unsafe_allow_html=True) |
|
|
| tweet_files = st.file_uploader( |
| "Pilih file Tweet (.txt)", |
| type=['txt'], |
| accept_multiple_files=True |
| ) |
|
|
| with st.expander("Format TXT yang Didukung"): |
| st.code( |
| "username | 2024-03-01 14:00:00\n" |
| "Isi tweet baris pertama di sini\n\n" |
| "username2 | 2024-03-01 15:30:00\n" |
| "Isi tweet baris kedua di sini", |
| language="text" |
| ) |
|
|
| st.markdown("<br>", unsafe_allow_html=True) |
| st.markdown('<div class="btn-primary">', unsafe_allow_html=True) |
| analyze_batch_btn = st.button("Eksekusi Analisis", key="batch_btn", use_container_width=False) |
| st.markdown('</div>', unsafe_allow_html=True) |
|
|
| with col_img_b: |
| st.markdown("<div style='margin-top: 4rem;'></div>", unsafe_allow_html=True) |
| try: |
| st.image(img_batch, use_container_width=True) |
| except Exception: |
| st.markdown(""" |
| <div style="background:#f5f5f5;border:1px dashed #cbd5e1; |
| border-radius:12px;height:280px;display:flex;align-items:center; |
| justify-content:center;color:#64748b;font-size:0.9rem; |
| text-align:center;padding:2rem;"> |
| πΌοΈ Gambar Tidak Ditemukan<br>Pastikan file <code>bitcoin2.gif</code> ada di direktori |
| </div>""", unsafe_allow_html=True) |
|
|
| st.markdown('</div>', unsafe_allow_html=True) |
|
|
| st.markdown('<div id="target-analisis-batch"></div>', unsafe_allow_html=True) |
|
|
| if tweet_files and analyze_batch_btn: |
| scroll_to_target("target-analisis-batch") |
| |
| col_b_space1, col_b_content, col_b_space2 = st.columns([1, 8, 1]) |
| |
| with col_b_content: |
| |
| st.markdown(""" |
| <div class="result-wrap" style="padding-bottom: 2rem; margin-bottom: 1.5rem;"> |
| <p class="section-label">Hasil Pemrosesan</p> |
| <p class="section-title" style="margin-bottom: 0;">Dashboard Analisis</p> |
| </div> |
| """, unsafe_allow_html=True) |
|
|
| tweet_files = sorted(tweet_files, key=lambda x: x.name) |
|
|
| data = [] |
| |
| with st.status("π Memproses data sentimen...", expanded=True) as status: |
| progress_bar = st.progress(0, text="Mengekstrak sentimen dari data...") |
| |
| total_tweets_uploaded = 0 |
| total_tweets_skipped = 0 |
|
|
| for idx, file in enumerate(tweet_files): |
| content = file.getvalue().decode("utf-8").replace("\r\n", "\n").strip() |
| tweets = content.split("\n\n") |
|
|
| for tweet in tweets: |
| parts = tweet.strip().split("\n", 1) |
| if len(parts) != 2: continue |
|
|
| meta, text_raw = parts |
|
|
| try: |
| DetectorFactory.seed = 0 |
| lang = detect(text_raw) |
| if lang != 'en': |
| total_tweets_skipped += 1 |
| continue |
| except: |
| total_tweets_skipped += 1 |
| continue |
|
|
| username, date_val = meta.split(" | ") if " | " in meta else ("unknown", "unknown") |
| short_date = date_val[:10] |
| text = clean_text(text_raw) |
|
|
| try: v_score = vader.polarity_scores(text)['compound']; vader_label = "positive" if v_score > 0.05 else ("negative" if v_score < -0.05 else "neutral") |
| except: vader_label = "neutral" |
|
|
| try: tb_label = classify_tb(TextBlob(text).sentiment.polarity) |
| except: tb_label = "neutral" |
|
|
| try: bertweet_label = map_bertweet(bertweet(text)[0]['label']) |
| except: bertweet_label = "neutral" |
|
|
| try: roberta_label = map_roberta(roberta(text)[0]['label']) |
| except: roberta_label = "neutral" |
|
|
| try: roberta_large_label = roberta_large(text)[0]['label'].lower() |
| except: roberta_large_label = "neutral" |
|
|
| data.append({ |
| "date": short_date, "raw_tweet": text_raw.strip(), "cleaned_tweet": text, |
| "vader": vader_label, "textblob": tb_label, "bertweet": bertweet_label, |
| "roberta": roberta_label, "roberta_large": roberta_large_label, |
| }) |
| total_tweets_uploaded += 1 |
|
|
| progress_bar.progress((idx + 1) / len(tweet_files), |
| text=f"Memproses file {idx+1} dari {len(tweet_files)}") |
| status.update(label="β
Pemrosesan sentimen teks selesai!", state="complete", expanded=False) |
|
|
| df = pd.DataFrame(data) |
|
|
| if df.empty: |
| st.error("β Data kosong. Pastikan format TXT benar dan tweet berbahasa Inggris.") |
| else: |
| col_m1, col_m2, col_m3 = st.columns(3) |
| col_m1.metric("Tweet Diproses", f"{total_tweets_uploaded}", border=True) |
| col_m2.metric("Tweet Diabaikan (Non-EN)", f"{total_tweets_skipped}", border=True) |
| col_m3.metric("Model", "5 Model", border=True) |
|
|
| target_dates = sorted(df['date'].unique()) |
| start_unix = int(datetime.strptime(target_dates[0], "%Y-%m-%d").replace(tzinfo=timezone.utc).timestamp()) - 86400 |
| end_unix = int(datetime.strptime(target_dates[-1], "%Y-%m-%d").replace(tzinfo=timezone.utc).timestamp()) + 86400 |
|
|
| with st.spinner("π‘ Mengambil data harga Bitcoin dari CoinGecko API..."): |
| url = "https://api.coingecko.com/api/v3/coins/bitcoin/market_chart/range" |
| params = {"vs_currency": "usd", "from": start_unix, "to": end_unix} |
| headers = {"accept": "application/json", "User-Agent": "Mozilla/5.0"} |
|
|
| try: |
| time.sleep(2) |
| res = requests.get(url, params=params, headers=headers) |
|
|
| if res.status_code != 200: |
| st.error(f"API Error {res.status_code}: {res.text}") |
| else: |
| data_json = res.json() |
|
|
| if "prices" not in data_json: |
| st.error("Data harga tidak ditemukan di respons API.") |
| else: |
| prices = data_json["prices"] |
|
|
| df_price = pd.DataFrame(prices, columns=["timestamp", "price"]) |
| df_price["date"] = pd.to_datetime(df_price["timestamp"], unit="ms").dt.date |
| df_price = df_price.groupby("date")["price"].mean().reset_index() |
| df_price["pct_change"] = df_price["price"].pct_change() * 100 |
| df_price["log_return"] = np.log(df_price["price"] / df_price["price"].shift(1)) |
| df_price.dropna(inplace=True) |
| df_price = df_price[df_price["date"].isin(pd.to_datetime(target_dates).date)] |
|
|
| if df_price.empty: |
| st.warning("β οΈ Data Harga API kosong. Pastikan rentang tanggal di .txt sesuai (yyyy-mm-dd).") |
| else: |
| st.markdown("<hr class='vbc-divider'>", unsafe_allow_html=True) |
|
|
| st.markdown("π£οΈ Data Sentimen") |
| raw_display_cols = ["date","raw_tweet","vader","textblob","bertweet","roberta","roberta_large"] |
| st.dataframe(df[raw_display_cols], use_container_width=True, hide_index=True) |
|
|
| sentiment_map = {"positive": 1, "neutral": 0, "negative": -1} |
| df_score = df.copy() |
| for col in ["vader","textblob","bertweet","roberta","roberta_large"]: |
| df_score[col] = df_score[col].map(sentiment_map) |
|
|
| models = ["vader","textblob","bertweet","roberta","roberta_large"] |
| df_sentiment_daily = df_score.groupby("date")[models].mean().reset_index() |
| df_sentiment_daily["date"] = pd.to_datetime(df_sentiment_daily["date"]).dt.date |
|
|
| for col in models: |
| df_sentiment_daily[f"{col}_label"] = df_sentiment_daily[col].apply(get_daily_label) |
|
|
| daily_display_cols = ["date"] |
| for col in models: |
| daily_display_cols.extend([col, f"{col}_label"]) |
|
|
| st.markdown("βΏ Data Harga & Volatilitas Bitcoin") |
| st.dataframe(df_price[["date","price","pct_change","log_return"]], use_container_width=True, hide_index=True) |
|
|
| df_merged = pd.merge(df_price, df_sentiment_daily, on="date", how="inner") |
|
|
| st.markdown("ποΈ Data Final") |
| final_display_cols = ["date","price","pct_change","log_return"] + [c for c in daily_display_cols if c != "date"] |
| st.dataframe(df_merged[final_display_cols], use_container_width=True, hide_index=True) |
|
|
| col_dl1, col_dl2, _ = st.columns([1, 1, 3]) |
| csv_data = df_merged.to_csv(index=False).encode('utf-8') |
| col_dl1.download_button("π₯ Unduh CSV", data=csv_data, file_name="sentiment_volatility.csv", mime="text/csv", use_container_width=True) |
|
|
| buffer = io.BytesIO() |
| with pd.ExcelWriter(buffer, engine='xlsxwriter') as writer: |
| df_merged.to_excel(writer, index=False) |
| col_dl2.download_button("π₯ Unduh Excel", data=buffer.getvalue(), file_name="sentiment_volatility.xlsx", mime="application/vnd.ms-excel", use_container_width=True) |
|
|
| st.markdown("<hr class='vbc-divider'>", unsafe_allow_html=True) |
|
|
| |
| st.subheader("π¬ Uji Korelasi Pearson") |
| st.caption("Menganalisis hubungan statistik antara skor sentimen harian dan volatilitas log-return BTC.") |
|
|
| corr_data = [] |
| raw_corr_results = [] |
|
|
| for method in ["vader","textblob","bertweet","roberta","roberta_large"]: |
| corr, pval = pearsonr(df_merged["log_return"], df_merged[method]) |
| arah = "Positif" if corr > 0 else "Negatif" |
| sig = "Signifikan" if pval < 0.05 else "Tidak Signifikan" |
| corr_data.append({"Metode": method.upper(), "r (Korelasi)": f"{corr:.4f}", "Arah": arah, "p-value": f"{pval:.4f}", "Status": sig}) |
| raw_corr_results.append({"metode": method.upper(), "r": corr, "p": pval}) |
|
|
| st.table(pd.DataFrame(corr_data)) |
|
|
| |
| st.subheader("π΅ Pola Distribusi Scatter Plot") |
| cols = st.columns(3) |
| for idx2, method in enumerate(["vader","textblob","bertweet","roberta","roberta_large"]): |
| with cols[idx2 % 3]: |
| fig_s, ax_s = plt.subplots(figsize=(5, 4)) |
| sns.regplot(data=df_merged, x=method, y="log_return", ax=ax_s, |
| scatter_kws={"s": 40, "color": "#10b981", "alpha": 0.5}, |
| line_kws={"color": "#0f172a", "linewidth": 2}) |
| ax_s.set_title(f"{method.upper()}", fontweight='bold') |
| ax_s.set_xlabel("Sentimen Score") |
| ax_s.set_ylabel("Log Return") |
| plt.tight_layout() |
| st.pyplot(fig_s) |
|
|
| |
| st.subheader("π Trend Analisis: Sentiment vs BTC Volatility") |
| fig_line, ax_line = plt.subplots(figsize=(14, 6)) |
| ax_line.plot(df_merged["date"], df_merged["log_return"], label="BTC Log Return", color="#f7931a", linewidth=3) |
| colors = ["#3B82F6","#10B981","#EC4899","#14B8A6","#6366F1"] |
| for i, method in enumerate(["vader","textblob","roberta","roberta_large","bertweet"]): |
| ax_line.plot(df_merged["date"], df_merged[method], label=f"Sentiment: {method.upper()}", color=colors[i], linewidth=1.5, linestyle="--", alpha=0.8) |
| ax_line.set_title("Pergerakan Sentimen vs Log Return Bitcoin", fontsize=14, pad=15, fontweight='bold') |
| ax_line.set_xlabel("Tanggal", fontsize=11) |
| ax_line.set_ylabel("Nilai Metrik", fontsize=11) |
| ax_line.legend(loc='upper left', bbox_to_anchor=(1, 1), frameon=True) |
| plt.tight_layout() |
| st.pyplot(fig_line) |
|
|
| |
| st.markdown("<hr class='vbc-divider'>", unsafe_allow_html=True) |
| st.subheader("π Kesimpulan") |
|
|
| max_idx = df_merged["log_return"].idxmax() |
| min_idx = df_merged["log_return"].idxmin() |
| date_max = df_merged.loc[max_idx, "date"] |
| date_min = df_merged.loc[min_idx, "date"] |
|
|
| sig_models = [r["metode"] for r in raw_corr_results if r["p"] < 0.05] |
| strongest = max(raw_corr_results, key=lambda x: abs(x["r"])) |
| arah_text = "berbanding lurus (positif)" if strongest["r"] > 0 else "berbanding terbalik (negatif)" |
|
|
| st.write(f"Puncak lonjakan positif (*max log return*) terjadi pada **{date_max}**, sedangkan penurunan ekstrem terjadi pada **{date_min}**.") |
|
|
| if sig_models: |
| st.success(f""" |
| **Hipotesis Diterima (H1):** Ditemukan korelasi linier yang signifikan pada metode **{', '.join(sig_models)}** (*p-value* < 0.05). |
| Metode dengan pemetaan respons pasar terkuat adalah **{strongest['metode']}**, dengan sifat hubungan **{arah_text}**. |
| """) |
| else: |
| st.warning(""" |
| **Hipotesis Ditolak (H0 Diterima):** Tidak ditemukan bukti empiris korelasi linier yang signifikan (seluruh *p-value* >= 0.05). |
| Volatilitas harga cenderung dipengaruhi oleh faktor teknikal/fundamental di luar sentimen X. |
| """) |
| |
| except Exception as e: |
| st.error(f"β οΈ Terjadi kesalahan saat mengambil atau memproses data API CoinGecko: {e}") |
|
|
| elif analyze_batch_btn and not tweet_files: |
| st.warning("β οΈ Silakan unggah minimal satu file .txt terlebih dahulu.") |