import streamlit as st import pandas as pd import numpy as np import re import io import time import requests import matplotlib.pyplot as plt import seaborn as sns from datetime import datetime, timezone from textblob import TextBlob from scipy.stats import pearsonr import nltk from nltk.corpus import stopwords from nltk.sentiment.vader import SentimentIntensityAnalyzer from transformers import pipeline import os import streamlit.components.v1 as components from langdetect import detect, DetectorFactory DetectorFactory.seed = 0 # ============================== # SETTING PATH ABSOLUT GAMBAR # ============================== BASE_DIR = os.path.dirname(os.path.abspath(__file__)) img_hero = os.path.join(BASE_DIR, "bitcoin1.gif") img_batch = os.path.join(BASE_DIR, "bitcoin2.gif") # ============================== # KONFIGURASI HALAMAN & STATE NAVIGASI # ============================== st.set_page_config( page_title="Bitcoin Volatility Sentiment", page_icon="₿", layout="wide", initial_sidebar_state="collapsed" ) if 'page' not in st.session_state: st.session_state.page = "uji_kalimat" # ============================== # GLOBAL CSS # ============================== st.markdown(""" """, unsafe_allow_html=True) # ============================== # FUNGSI AUTO-SCROLL # ============================== def scroll_to_target(target_id): js_code = f""" """ components.html(js_code, height=0, width=0) # ============================== # HEADER / NAVBAR # ============================== def set_page(page_name): st.session_state.page = page_name col_logo, col_space, col_btn1, col_btn2 = st.columns([5, 3, 2, 2], vertical_alignment="center") with col_logo: st.markdown(""" """, unsafe_allow_html=True) with col_btn1: is_uji = st.session_state.page == "uji_kalimat" css_class = "btn-orange" if is_uji else "btn-ghost" st.markdown(f'
', unsafe_allow_html=True) if st.button("Uji Kalimat", use_container_width=True, key="nav_uji"): set_page("uji_kalimat"); st.rerun() st.markdown('
', unsafe_allow_html=True) with col_btn2: is_batch = st.session_state.page == "analisis_batch" css_class = "btn-orange" if is_batch else "btn-ghost" st.markdown(f'
', unsafe_allow_html=True) if st.button("Analisis Batch", use_container_width=True, key="nav_batch"): set_page("analisis_batch"); st.rerun() st.markdown('
', unsafe_allow_html=True) st.markdown("
", unsafe_allow_html=True) # ============================== # DOWNLOAD RESOURCES & LOAD MODELS # ============================== @st.cache_resource def download_nltk_resources(): nltk.download('stopwords', quiet=True) nltk.download('vader_lexicon', quiet=True) nltk.download('punkt', quiet=True) nltk.download('omw-1.4', quiet=True) download_nltk_resources() stop_words = set(stopwords.words('english')) @st.cache_resource def load_all_models(): vader = SentimentIntensityAnalyzer() bertweet = pipeline("sentiment-analysis", model="finiteautomata/bertweet-base-sentiment-analysis", device=-1, truncation=True, max_length=128) roberta = pipeline("sentiment-analysis", model="cardiffnlp/twitter-roberta-base-sentiment", device=-1, truncation=True, max_length=512) roberta_large = pipeline("sentiment-analysis", model="siebert/sentiment-roberta-large-english", device=-1, truncation=True, max_length=512) return vader, bertweet, roberta, roberta_large with st.spinner('Mempersiapkan model AI...'): vader, bertweet, roberta, roberta_large = load_all_models() # ============================== # FUNGSI CLEAN TEXT & MAPPING # ============================== def clean_text(text): text = str(text).lower() text = re.sub(r"http\S+", "", text) text = re.sub(r"@\w+", "", text) text = re.sub(r"#\w+", "", text) text = re.sub(r"[^\w\s]", "", text) tokens = text.split() tokens = [word for word in tokens if word not in stop_words] return " ".join(tokens) def classify_tb(score): if score > 0.05: return 'positive' if score < -0.05: return 'negative' return 'neutral' def map_roberta(label): return {"LABEL_0": "negative", "LABEL_1": "neutral", "LABEL_2": "positive"}.get(label, "neutral") def map_bertweet(label): return {"pos": "positive", "neu": "neutral", "neg": "negative"}.get(label.lower(), "neutral") def get_daily_label(score): if score > 0.05: return 'Positive' elif score < -0.05: return 'Negative' else: return 'Neutral' # ============================================================================== # HALAMAN 1 — UJI KALIMAT # ============================================================================== if st.session_state.page == "uji_kalimat": st.markdown('
', unsafe_allow_html=True) col_text, col_img = st.columns([1.1, 1], gap="large") with col_text: st.markdown("""
Website ini bukanlah alat prediksi harga Bitcoin real time, melainkan instrumen untuk melakukan analisis sentimen publik secara batch

Bitcoin Volatility
vs Public Sentiment

Analisis Volatilitas Harga Bitcoin Terhadap Sentimen Publik Pada Platform X Berbasis Python.

Peneliti: Arya Galuh Saputra  ·  H1D022022

""", unsafe_allow_html=True) user_input = st.text_area( "Masukkan Tweet (Bahasa Inggris):", "Great, Bitcoin just crashed another 10% today.", height=120 ) st.markdown("
", unsafe_allow_html=True) col_btn1, col_btn2 = st.columns([1.6, 1]) with col_btn1: st.markdown('
', unsafe_allow_html=True) analyze_btn = st.button("Proses Uji Kalimat", use_container_width=True) st.markdown('
', unsafe_allow_html=True) with col_img: st.markdown("
", unsafe_allow_html=True) try: st.image(img_hero, use_container_width=True) except Exception: st.markdown("""
🖼️ Gambar Tidak Ditemukan
Pastikan file bitcoin1.gif ada di direktori
""", unsafe_allow_html=True) st.markdown('
', unsafe_allow_html=True) st.markdown('
', unsafe_allow_html=True) if analyze_btn: scroll_to_target("target-uji-kalimat") col_space_left, col_center_output, col_space_right = st.columns([1, 4, 1]) with col_center_output: st.markdown("""

Output Analisis

Hasil Deteksi Sentimen

""", unsafe_allow_html=True) try: if detect(user_input) != 'en': st.warning("⚠️ Teks sepertinya bukan bahasa Inggris. Hasil prediksi mungkin memiliki bias.") except: pass text = clean_text(user_input) with st.spinner("Mengekstraksi sentimen dengan 5 Model..."): time.sleep(0.5) try: v_label = "positive" if vader.polarity_scores(text)['compound'] > 0.05 else ("negative" if vader.polarity_scores(text)['compound'] < -0.05 else "neutral") except: v_label = "neutral" try: t_label = classify_tb(TextBlob(text).sentiment.polarity) except: t_label = "neutral" try: b_label = map_bertweet(bertweet(text)[0]['label']) except: b_label = "neutral" try: r_label = map_roberta(roberta(text)[0]['label']) except: r_label = "neutral" try: rl_label = roberta_large(text)[0]['label'].lower() except: rl_label = "neutral" def badge_color(label): return {"positive": "#e6fff1", "negative": "#fef1f2", "neutral": "#f1f5f9"}[label] def badge_text_color(label): return {"positive": "#10b981", "negative": "#f43f5e", "neutral": "#64748b"}[label] results = [ ("VADER", v_label), ("TextBlob", t_label), ("BERTweet", b_label), ("RoBERTa Base", r_label), ("RoBERTa Large", rl_label), ] col_a, col_b = st.columns(2) for i, (method, label) in enumerate(results): col = col_a if i % 2 == 0 else col_b bg = badge_color(label) tc = badge_text_color(label) icon = "↗" if label == "positive" else ("↘" if label == "negative" else "→") with col: st.markdown(f"""
{method}
{label.capitalize()}
{icon} {label.upper()}
""", unsafe_allow_html=True) # ============================================================================== # HALAMAN 2 — ANALISIS BATCH # ============================================================================== elif st.session_state.page == "analisis_batch": plt.style.use('default') sns.set_theme(style="whitegrid", rc={ "axes.facecolor": "#FFFFFF", "figure.facecolor": "#FAFAFA", "axes.edgecolor": "#e2e8f0", "text.color": "#0f172a", "xtick.color": "#64748b", "ytick.color": "#64748b", "grid.color": "#f1f5f9", }) st.markdown('
', unsafe_allow_html=True) col_upload, col_img_b = st.columns([1.4, 1], gap="large") with col_upload: st.markdown("""

Analisis Batch Processing

Volatilitas Harga Bitcoin Vs Sentimen Publik
Kolerasi Multi-Metode Analisis Sentimen

Unggah file tweets (.txt) untuk diekstraksi dan dianalisis terhadap volatilitas harga Bitcoin.

""", unsafe_allow_html=True) tweet_files = st.file_uploader( "Pilih file Tweet (.txt)", type=['txt'], accept_multiple_files=True ) with st.expander("Format TXT yang Didukung"): st.code( "username | 2024-03-01 14:00:00\n" "Isi tweet baris pertama di sini\n\n" "username2 | 2024-03-01 15:30:00\n" "Isi tweet baris kedua di sini", language="text" ) st.markdown("
", unsafe_allow_html=True) st.markdown('
', unsafe_allow_html=True) analyze_batch_btn = st.button("Eksekusi Analisis", key="batch_btn", use_container_width=False) st.markdown('
', unsafe_allow_html=True) with col_img_b: st.markdown("
", unsafe_allow_html=True) try: st.image(img_batch, use_container_width=True) except Exception: st.markdown("""
🖼️ Gambar Tidak Ditemukan
Pastikan file bitcoin2.gif ada di direktori
""", unsafe_allow_html=True) st.markdown('
', unsafe_allow_html=True) st.markdown('
', unsafe_allow_html=True) if tweet_files and analyze_batch_btn: scroll_to_target("target-analisis-batch") col_b_space1, col_b_content, col_b_space2 = st.columns([1, 8, 1]) with col_b_content: st.markdown("""

Hasil Pemrosesan

Dashboard Analisis

""", unsafe_allow_html=True) tweet_files = sorted(tweet_files, key=lambda x: x.name) data = [] with st.status("🔄 Memproses data sentimen...", expanded=True) as status: progress_bar = st.progress(0, text="Mengekstrak sentimen dari data...") total_tweets_uploaded = 0 total_tweets_skipped = 0 for idx, file in enumerate(tweet_files): content = file.getvalue().decode("utf-8").replace("\r\n", "\n").strip() tweets = content.split("\n\n") for tweet in tweets: parts = tweet.strip().split("\n", 1) if len(parts) != 2: continue meta, text_raw = parts try: DetectorFactory.seed = 0 lang = detect(text_raw) if lang != 'en': total_tweets_skipped += 1 continue except: total_tweets_skipped += 1 continue username, date_val = meta.split(" | ") if " | " in meta else ("unknown", "unknown") short_date = date_val[:10] text = clean_text(text_raw) try: v_score = vader.polarity_scores(text)['compound']; vader_label = "positive" if v_score > 0.05 else ("negative" if v_score < -0.05 else "neutral") except: vader_label = "neutral" try: tb_label = classify_tb(TextBlob(text).sentiment.polarity) except: tb_label = "neutral" try: bertweet_label = map_bertweet(bertweet(text)[0]['label']) except: bertweet_label = "neutral" try: roberta_label = map_roberta(roberta(text)[0]['label']) except: roberta_label = "neutral" try: roberta_large_label = roberta_large(text)[0]['label'].lower() except: roberta_large_label = "neutral" data.append({ "date": short_date, "raw_tweet": text_raw.strip(), "cleaned_tweet": text, "vader": vader_label, "textblob": tb_label, "bertweet": bertweet_label, "roberta": roberta_label, "roberta_large": roberta_large_label, }) total_tweets_uploaded += 1 progress_bar.progress((idx + 1) / len(tweet_files), text=f"Memproses file {idx+1} dari {len(tweet_files)}") status.update(label="✅ Pemrosesan sentimen teks selesai!", state="complete", expanded=False) df = pd.DataFrame(data) if df.empty: st.error("❌ Data kosong. Pastikan format TXT benar dan tweet berbahasa Inggris.") else: col_m1, col_m2, col_m3 = st.columns(3) col_m1.metric("Tweet Diproses", f"{total_tweets_uploaded}", border=True) col_m2.metric("Tweet Diabaikan (Non-EN)", f"{total_tweets_skipped}", border=True) col_m3.metric("Model", "5 Model", border=True) target_dates = sorted(df['date'].unique()) start_unix = int(datetime.strptime(target_dates[0], "%Y-%m-%d").replace(tzinfo=timezone.utc).timestamp()) - 86400 end_unix = int(datetime.strptime(target_dates[-1], "%Y-%m-%d").replace(tzinfo=timezone.utc).timestamp()) + 86400 with st.spinner("📡 Mengambil data harga Bitcoin dari CoinGecko API..."): url = "https://api.coingecko.com/api/v3/coins/bitcoin/market_chart/range" params = {"vs_currency": "usd", "from": start_unix, "to": end_unix} headers = {"accept": "application/json", "User-Agent": "Mozilla/5.0"} try: time.sleep(2) res = requests.get(url, params=params, headers=headers) if res.status_code != 200: st.error(f"API Error {res.status_code}: {res.text}") else: data_json = res.json() if "prices" not in data_json: st.error("Data harga tidak ditemukan di respons API.") else: prices = data_json["prices"] df_price = pd.DataFrame(prices, columns=["timestamp", "price"]) df_price["date"] = pd.to_datetime(df_price["timestamp"], unit="ms").dt.date df_price = df_price.groupby("date")["price"].mean().reset_index() df_price["pct_change"] = df_price["price"].pct_change() * 100 df_price["log_return"] = np.log(df_price["price"] / df_price["price"].shift(1)) df_price.dropna(inplace=True) df_price = df_price[df_price["date"].isin(pd.to_datetime(target_dates).date)] if df_price.empty: st.warning("⚠️ Data Harga API kosong. Pastikan rentang tanggal di .txt sesuai (yyyy-mm-dd).") else: st.markdown("
", unsafe_allow_html=True) st.markdown("🗣️ Data Sentimen") raw_display_cols = ["date","raw_tweet","vader","textblob","bertweet","roberta","roberta_large"] st.dataframe(df[raw_display_cols], use_container_width=True, hide_index=True) sentiment_map = {"positive": 1, "neutral": 0, "negative": -1} df_score = df.copy() for col in ["vader","textblob","bertweet","roberta","roberta_large"]: df_score[col] = df_score[col].map(sentiment_map) models = ["vader","textblob","bertweet","roberta","roberta_large"] df_sentiment_daily = df_score.groupby("date")[models].mean().reset_index() df_sentiment_daily["date"] = pd.to_datetime(df_sentiment_daily["date"]).dt.date for col in models: df_sentiment_daily[f"{col}_label"] = df_sentiment_daily[col].apply(get_daily_label) daily_display_cols = ["date"] for col in models: daily_display_cols.extend([col, f"{col}_label"]) st.markdown("₿ Data Harga & Volatilitas Bitcoin") st.dataframe(df_price[["date","price","pct_change","log_return"]], use_container_width=True, hide_index=True) df_merged = pd.merge(df_price, df_sentiment_daily, on="date", how="inner") st.markdown("🗂️ Data Final") final_display_cols = ["date","price","pct_change","log_return"] + [c for c in daily_display_cols if c != "date"] st.dataframe(df_merged[final_display_cols], use_container_width=True, hide_index=True) col_dl1, col_dl2, _ = st.columns([1, 1, 3]) csv_data = df_merged.to_csv(index=False).encode('utf-8') col_dl1.download_button("📥 Unduh CSV", data=csv_data, file_name="sentiment_volatility.csv", mime="text/csv", use_container_width=True) buffer = io.BytesIO() with pd.ExcelWriter(buffer, engine='xlsxwriter') as writer: df_merged.to_excel(writer, index=False) col_dl2.download_button("📥 Unduh Excel", data=buffer.getvalue(), file_name="sentiment_volatility.xlsx", mime="application/vnd.ms-excel", use_container_width=True) st.markdown("
", unsafe_allow_html=True) # Pearson st.subheader("🔬 Uji Korelasi Pearson") st.caption("Menganalisis hubungan statistik antara skor sentimen harian dan volatilitas log-return BTC.") corr_data = [] raw_corr_results = [] for method in ["vader","textblob","bertweet","roberta","roberta_large"]: corr, pval = pearsonr(df_merged["log_return"], df_merged[method]) arah = "Positif" if corr > 0 else "Negatif" sig = "Signifikan" if pval < 0.05 else "Tidak Signifikan" corr_data.append({"Metode": method.upper(), "r (Korelasi)": f"{corr:.4f}", "Arah": arah, "p-value": f"{pval:.4f}", "Status": sig}) raw_corr_results.append({"metode": method.upper(), "r": corr, "p": pval}) st.table(pd.DataFrame(corr_data)) # Scatter st.subheader("🔵 Pola Distribusi Scatter Plot") cols = st.columns(3) for idx2, method in enumerate(["vader","textblob","bertweet","roberta","roberta_large"]): with cols[idx2 % 3]: fig_s, ax_s = plt.subplots(figsize=(5, 4)) sns.regplot(data=df_merged, x=method, y="log_return", ax=ax_s, scatter_kws={"s": 40, "color": "#10b981", "alpha": 0.5}, line_kws={"color": "#0f172a", "linewidth": 2}) ax_s.set_title(f"{method.upper()}", fontweight='bold') ax_s.set_xlabel("Sentimen Score") ax_s.set_ylabel("Log Return") plt.tight_layout() st.pyplot(fig_s) # Line chart st.subheader("📈 Trend Analisis: Sentiment vs BTC Volatility") fig_line, ax_line = plt.subplots(figsize=(14, 6)) ax_line.plot(df_merged["date"], df_merged["log_return"], label="BTC Log Return", color="#f7931a", linewidth=3) colors = ["#3B82F6","#10B981","#EC4899","#14B8A6","#6366F1"] for i, method in enumerate(["vader","textblob","roberta","roberta_large","bertweet"]): ax_line.plot(df_merged["date"], df_merged[method], label=f"Sentiment: {method.upper()}", color=colors[i], linewidth=1.5, linestyle="--", alpha=0.8) ax_line.set_title("Pergerakan Sentimen vs Log Return Bitcoin", fontsize=14, pad=15, fontweight='bold') ax_line.set_xlabel("Tanggal", fontsize=11) ax_line.set_ylabel("Nilai Metrik", fontsize=11) ax_line.legend(loc='upper left', bbox_to_anchor=(1, 1), frameon=True) plt.tight_layout() st.pyplot(fig_line) # Kesimpulan st.markdown("
", unsafe_allow_html=True) st.subheader("📝 Kesimpulan") max_idx = df_merged["log_return"].idxmax() min_idx = df_merged["log_return"].idxmin() date_max = df_merged.loc[max_idx, "date"] date_min = df_merged.loc[min_idx, "date"] sig_models = [r["metode"] for r in raw_corr_results if r["p"] < 0.05] strongest = max(raw_corr_results, key=lambda x: abs(x["r"])) arah_text = "berbanding lurus (positif)" if strongest["r"] > 0 else "berbanding terbalik (negatif)" st.write(f"Puncak lonjakan positif (*max log return*) terjadi pada **{date_max}**, sedangkan penurunan ekstrem terjadi pada **{date_min}**.") if sig_models: st.success(f""" **Hipotesis Diterima (H1):** Ditemukan korelasi linier yang signifikan pada metode **{', '.join(sig_models)}** (*p-value* < 0.05). Metode dengan pemetaan respons pasar terkuat adalah **{strongest['metode']}**, dengan sifat hubungan **{arah_text}**. """) else: st.warning(""" **Hipotesis Ditolak (H0 Diterima):** Tidak ditemukan bukti empiris korelasi linier yang signifikan (seluruh *p-value* >= 0.05). Volatilitas harga cenderung dipengaruhi oleh faktor teknikal/fundamental di luar sentimen X. """) except Exception as e: st.error(f"⚠️ Terjadi kesalahan saat mengambil atau memproses data API CoinGecko: {e}") elif analyze_batch_btn and not tweet_files: st.warning("⚠️ Silakan unggah minimal satu file .txt terlebih dahulu.")