import streamlit as st
import pandas as pd
import numpy as np
import re
import io
import time
import requests
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime, timezone
from textblob import TextBlob
from scipy.stats import pearsonr
import nltk
from nltk.corpus import stopwords
from nltk.sentiment.vader import SentimentIntensityAnalyzer
from transformers import pipeline
import os
import streamlit.components.v1 as components
from langdetect import detect, DetectorFactory
DetectorFactory.seed = 0
# ==============================
# SETTING PATH ABSOLUT GAMBAR
# ==============================
BASE_DIR = os.path.dirname(os.path.abspath(__file__))
img_hero = os.path.join(BASE_DIR, "bitcoin1.gif")
img_batch = os.path.join(BASE_DIR, "bitcoin2.gif")
# ==============================
# KONFIGURASI HALAMAN & STATE NAVIGASI
# ==============================
st.set_page_config(
page_title="Bitcoin Volatility Sentiment",
page_icon="₿",
layout="wide",
initial_sidebar_state="collapsed"
)
if 'page' not in st.session_state:
st.session_state.page = "uji_kalimat"
# ==============================
# GLOBAL CSS
# ==============================
st.markdown("""
""", unsafe_allow_html=True)
# ==============================
# FUNGSI AUTO-SCROLL
# ==============================
def scroll_to_target(target_id):
js_code = f"""
"""
components.html(js_code, height=0, width=0)
# ==============================
# HEADER / NAVBAR
# ==============================
def set_page(page_name):
st.session_state.page = page_name
col_logo, col_space, col_btn1, col_btn2 = st.columns([5, 3, 2, 2], vertical_alignment="center")
with col_logo:
st.markdown("""
₿
Bitcoin Volatility Sentiment
""", unsafe_allow_html=True)
with col_btn1:
is_uji = st.session_state.page == "uji_kalimat"
css_class = "btn-orange" if is_uji else "btn-ghost"
st.markdown(f'', unsafe_allow_html=True)
if st.button("Uji Kalimat", use_container_width=True, key="nav_uji"):
set_page("uji_kalimat"); st.rerun()
st.markdown('
', unsafe_allow_html=True)
with col_btn2:
is_batch = st.session_state.page == "analisis_batch"
css_class = "btn-orange" if is_batch else "btn-ghost"
st.markdown(f'', unsafe_allow_html=True)
if st.button("Analisis Batch", use_container_width=True, key="nav_batch"):
set_page("analisis_batch"); st.rerun()
st.markdown('
', unsafe_allow_html=True)
st.markdown("
", unsafe_allow_html=True)
# ==============================
# DOWNLOAD RESOURCES & LOAD MODELS
# ==============================
@st.cache_resource
def download_nltk_resources():
nltk.download('stopwords', quiet=True)
nltk.download('vader_lexicon', quiet=True)
nltk.download('punkt', quiet=True)
nltk.download('omw-1.4', quiet=True)
download_nltk_resources()
stop_words = set(stopwords.words('english'))
@st.cache_resource
def load_all_models():
vader = SentimentIntensityAnalyzer()
bertweet = pipeline("sentiment-analysis", model="finiteautomata/bertweet-base-sentiment-analysis", device=-1, truncation=True, max_length=128)
roberta = pipeline("sentiment-analysis", model="cardiffnlp/twitter-roberta-base-sentiment", device=-1, truncation=True, max_length=512)
roberta_large = pipeline("sentiment-analysis", model="siebert/sentiment-roberta-large-english", device=-1, truncation=True, max_length=512)
return vader, bertweet, roberta, roberta_large
with st.spinner('Mempersiapkan model AI...'):
vader, bertweet, roberta, roberta_large = load_all_models()
# ==============================
# FUNGSI CLEAN TEXT & MAPPING
# ==============================
def clean_text(text):
text = str(text).lower()
text = re.sub(r"http\S+", "", text)
text = re.sub(r"@\w+", "", text)
text = re.sub(r"#\w+", "", text)
text = re.sub(r"[^\w\s]", "", text)
tokens = text.split()
tokens = [word for word in tokens if word not in stop_words]
return " ".join(tokens)
def classify_tb(score):
if score > 0.05: return 'positive'
if score < -0.05: return 'negative'
return 'neutral'
def map_roberta(label):
return {"LABEL_0": "negative", "LABEL_1": "neutral", "LABEL_2": "positive"}.get(label, "neutral")
def map_bertweet(label):
return {"pos": "positive", "neu": "neutral", "neg": "negative"}.get(label.lower(), "neutral")
def get_daily_label(score):
if score > 0.05: return 'Positive'
elif score < -0.05: return 'Negative'
else: return 'Neutral'
# ==============================================================================
# HALAMAN 1 — UJI KALIMAT
# ==============================================================================
if st.session_state.page == "uji_kalimat":
st.markdown('', unsafe_allow_html=True)
col_text, col_img = st.columns([1.1, 1], gap="large")
with col_text:
st.markdown("""
Website ini bukanlah alat prediksi harga Bitcoin real time, melainkan instrumen untuk melakukan analisis sentimen publik secara batch
Bitcoin Volatility
vs Public Sentiment
Analisis Volatilitas Harga Bitcoin Terhadap Sentimen Publik
Pada Platform X Berbasis Python.
Peneliti: Arya Galuh Saputra · H1D022022
""", unsafe_allow_html=True)
user_input = st.text_area(
"Masukkan Tweet (Bahasa Inggris):",
"Great, Bitcoin just crashed another 10% today.",
height=120
)
st.markdown("
", unsafe_allow_html=True)
col_btn1, col_btn2 = st.columns([1.6, 1])
with col_btn1:
st.markdown('
', unsafe_allow_html=True)
analyze_btn = st.button("Proses Uji Kalimat", use_container_width=True)
st.markdown('
', unsafe_allow_html=True)
with col_img:
st.markdown("
", unsafe_allow_html=True)
try:
st.image(img_hero, use_container_width=True)
except Exception:
st.markdown("""
🖼️ Gambar Tidak Ditemukan
Pastikan file bitcoin1.gif ada di direktori
""", unsafe_allow_html=True)
st.markdown('
', unsafe_allow_html=True)
st.markdown('', unsafe_allow_html=True)
if analyze_btn:
scroll_to_target("target-uji-kalimat")
col_space_left, col_center_output, col_space_right = st.columns([1, 4, 1])
with col_center_output:
st.markdown("""
Output Analisis
Hasil Deteksi Sentimen
""", unsafe_allow_html=True)
try:
if detect(user_input) != 'en':
st.warning("⚠️ Teks sepertinya bukan bahasa Inggris. Hasil prediksi mungkin memiliki bias.")
except:
pass
text = clean_text(user_input)
with st.spinner("Mengekstraksi sentimen dengan 5 Model..."):
time.sleep(0.5)
try: v_label = "positive" if vader.polarity_scores(text)['compound'] > 0.05 else ("negative" if vader.polarity_scores(text)['compound'] < -0.05 else "neutral")
except: v_label = "neutral"
try: t_label = classify_tb(TextBlob(text).sentiment.polarity)
except: t_label = "neutral"
try: b_label = map_bertweet(bertweet(text)[0]['label'])
except: b_label = "neutral"
try: r_label = map_roberta(roberta(text)[0]['label'])
except: r_label = "neutral"
try: rl_label = roberta_large(text)[0]['label'].lower()
except: rl_label = "neutral"
def badge_color(label):
return {"positive": "#e6fff1", "negative": "#fef1f2", "neutral": "#f1f5f9"}[label]
def badge_text_color(label):
return {"positive": "#10b981", "negative": "#f43f5e", "neutral": "#64748b"}[label]
results = [
("VADER", v_label),
("TextBlob", t_label),
("BERTweet", b_label),
("RoBERTa Base", r_label),
("RoBERTa Large", rl_label),
]
col_a, col_b = st.columns(2)
for i, (method, label) in enumerate(results):
col = col_a if i % 2 == 0 else col_b
bg = badge_color(label)
tc = badge_text_color(label)
icon = "↗" if label == "positive" else ("↘" if label == "negative" else "→")
with col:
st.markdown(f"""
{method}
{label.capitalize()}
{icon} {label.upper()}
""", unsafe_allow_html=True)
# ==============================================================================
# HALAMAN 2 — ANALISIS BATCH
# ==============================================================================
elif st.session_state.page == "analisis_batch":
plt.style.use('default')
sns.set_theme(style="whitegrid", rc={
"axes.facecolor": "#FFFFFF",
"figure.facecolor": "#FAFAFA",
"axes.edgecolor": "#e2e8f0",
"text.color": "#0f172a",
"xtick.color": "#64748b",
"ytick.color": "#64748b",
"grid.color": "#f1f5f9",
})
st.markdown('', unsafe_allow_html=True)
col_upload, col_img_b = st.columns([1.4, 1], gap="large")
with col_upload:
st.markdown("""
Analisis Batch Processing
Volatilitas Harga Bitcoin Vs Sentimen Publik
Kolerasi Multi-Metode Analisis Sentimen
Unggah file tweets (.txt) untuk diekstraksi dan
dianalisis terhadap volatilitas harga Bitcoin.
""", unsafe_allow_html=True)
tweet_files = st.file_uploader(
"Pilih file Tweet (.txt)",
type=['txt'],
accept_multiple_files=True
)
with st.expander("Format TXT yang Didukung"):
st.code(
"username | 2024-03-01 14:00:00\n"
"Isi tweet baris pertama di sini\n\n"
"username2 | 2024-03-01 15:30:00\n"
"Isi tweet baris kedua di sini",
language="text"
)
st.markdown("
", unsafe_allow_html=True)
st.markdown('
', unsafe_allow_html=True)
analyze_batch_btn = st.button("Eksekusi Analisis", key="batch_btn", use_container_width=False)
st.markdown('
', unsafe_allow_html=True)
with col_img_b:
st.markdown("
", unsafe_allow_html=True)
try:
st.image(img_batch, use_container_width=True)
except Exception:
st.markdown("""
🖼️ Gambar Tidak Ditemukan
Pastikan file bitcoin2.gif ada di direktori
""", unsafe_allow_html=True)
st.markdown('
', unsafe_allow_html=True)
st.markdown('', unsafe_allow_html=True)
if tweet_files and analyze_batch_btn:
scroll_to_target("target-analisis-batch")
col_b_space1, col_b_content, col_b_space2 = st.columns([1, 8, 1])
with col_b_content:
st.markdown("""
Hasil Pemrosesan
Dashboard Analisis
""", unsafe_allow_html=True)
tweet_files = sorted(tweet_files, key=lambda x: x.name)
data = []
with st.status("🔄 Memproses data sentimen...", expanded=True) as status:
progress_bar = st.progress(0, text="Mengekstrak sentimen dari data...")
total_tweets_uploaded = 0
total_tweets_skipped = 0
for idx, file in enumerate(tweet_files):
content = file.getvalue().decode("utf-8").replace("\r\n", "\n").strip()
tweets = content.split("\n\n")
for tweet in tweets:
parts = tweet.strip().split("\n", 1)
if len(parts) != 2: continue
meta, text_raw = parts
try:
DetectorFactory.seed = 0
lang = detect(text_raw)
if lang != 'en':
total_tweets_skipped += 1
continue
except:
total_tweets_skipped += 1
continue
username, date_val = meta.split(" | ") if " | " in meta else ("unknown", "unknown")
short_date = date_val[:10]
text = clean_text(text_raw)
try: v_score = vader.polarity_scores(text)['compound']; vader_label = "positive" if v_score > 0.05 else ("negative" if v_score < -0.05 else "neutral")
except: vader_label = "neutral"
try: tb_label = classify_tb(TextBlob(text).sentiment.polarity)
except: tb_label = "neutral"
try: bertweet_label = map_bertweet(bertweet(text)[0]['label'])
except: bertweet_label = "neutral"
try: roberta_label = map_roberta(roberta(text)[0]['label'])
except: roberta_label = "neutral"
try: roberta_large_label = roberta_large(text)[0]['label'].lower()
except: roberta_large_label = "neutral"
data.append({
"date": short_date, "raw_tweet": text_raw.strip(), "cleaned_tweet": text,
"vader": vader_label, "textblob": tb_label, "bertweet": bertweet_label,
"roberta": roberta_label, "roberta_large": roberta_large_label,
})
total_tweets_uploaded += 1
progress_bar.progress((idx + 1) / len(tweet_files),
text=f"Memproses file {idx+1} dari {len(tweet_files)}")
status.update(label="✅ Pemrosesan sentimen teks selesai!", state="complete", expanded=False)
df = pd.DataFrame(data)
if df.empty:
st.error("❌ Data kosong. Pastikan format TXT benar dan tweet berbahasa Inggris.")
else:
col_m1, col_m2, col_m3 = st.columns(3)
col_m1.metric("Tweet Diproses", f"{total_tweets_uploaded}", border=True)
col_m2.metric("Tweet Diabaikan (Non-EN)", f"{total_tweets_skipped}", border=True)
col_m3.metric("Model", "5 Model", border=True)
target_dates = sorted(df['date'].unique())
start_unix = int(datetime.strptime(target_dates[0], "%Y-%m-%d").replace(tzinfo=timezone.utc).timestamp()) - 86400
end_unix = int(datetime.strptime(target_dates[-1], "%Y-%m-%d").replace(tzinfo=timezone.utc).timestamp()) + 86400
with st.spinner("📡 Mengambil data harga Bitcoin dari CoinGecko API..."):
url = "https://api.coingecko.com/api/v3/coins/bitcoin/market_chart/range"
params = {"vs_currency": "usd", "from": start_unix, "to": end_unix}
headers = {"accept": "application/json", "User-Agent": "Mozilla/5.0"}
try:
time.sleep(2)
res = requests.get(url, params=params, headers=headers)
if res.status_code != 200:
st.error(f"API Error {res.status_code}: {res.text}")
else:
data_json = res.json()
if "prices" not in data_json:
st.error("Data harga tidak ditemukan di respons API.")
else:
prices = data_json["prices"]
df_price = pd.DataFrame(prices, columns=["timestamp", "price"])
df_price["date"] = pd.to_datetime(df_price["timestamp"], unit="ms").dt.date
df_price = df_price.groupby("date")["price"].mean().reset_index()
df_price["pct_change"] = df_price["price"].pct_change() * 100
df_price["log_return"] = np.log(df_price["price"] / df_price["price"].shift(1))
df_price.dropna(inplace=True)
df_price = df_price[df_price["date"].isin(pd.to_datetime(target_dates).date)]
if df_price.empty:
st.warning("⚠️ Data Harga API kosong. Pastikan rentang tanggal di .txt sesuai (yyyy-mm-dd).")
else:
st.markdown("
", unsafe_allow_html=True)
st.markdown("🗣️ Data Sentimen")
raw_display_cols = ["date","raw_tweet","vader","textblob","bertweet","roberta","roberta_large"]
st.dataframe(df[raw_display_cols], use_container_width=True, hide_index=True)
sentiment_map = {"positive": 1, "neutral": 0, "negative": -1}
df_score = df.copy()
for col in ["vader","textblob","bertweet","roberta","roberta_large"]:
df_score[col] = df_score[col].map(sentiment_map)
models = ["vader","textblob","bertweet","roberta","roberta_large"]
df_sentiment_daily = df_score.groupby("date")[models].mean().reset_index()
df_sentiment_daily["date"] = pd.to_datetime(df_sentiment_daily["date"]).dt.date
for col in models:
df_sentiment_daily[f"{col}_label"] = df_sentiment_daily[col].apply(get_daily_label)
daily_display_cols = ["date"]
for col in models:
daily_display_cols.extend([col, f"{col}_label"])
st.markdown("₿ Data Harga & Volatilitas Bitcoin")
st.dataframe(df_price[["date","price","pct_change","log_return"]], use_container_width=True, hide_index=True)
df_merged = pd.merge(df_price, df_sentiment_daily, on="date", how="inner")
st.markdown("🗂️ Data Final")
final_display_cols = ["date","price","pct_change","log_return"] + [c for c in daily_display_cols if c != "date"]
st.dataframe(df_merged[final_display_cols], use_container_width=True, hide_index=True)
col_dl1, col_dl2, _ = st.columns([1, 1, 3])
csv_data = df_merged.to_csv(index=False).encode('utf-8')
col_dl1.download_button("📥 Unduh CSV", data=csv_data, file_name="sentiment_volatility.csv", mime="text/csv", use_container_width=True)
buffer = io.BytesIO()
with pd.ExcelWriter(buffer, engine='xlsxwriter') as writer:
df_merged.to_excel(writer, index=False)
col_dl2.download_button("📥 Unduh Excel", data=buffer.getvalue(), file_name="sentiment_volatility.xlsx", mime="application/vnd.ms-excel", use_container_width=True)
st.markdown("
", unsafe_allow_html=True)
# Pearson
st.subheader("🔬 Uji Korelasi Pearson")
st.caption("Menganalisis hubungan statistik antara skor sentimen harian dan volatilitas log-return BTC.")
corr_data = []
raw_corr_results = []
for method in ["vader","textblob","bertweet","roberta","roberta_large"]:
corr, pval = pearsonr(df_merged["log_return"], df_merged[method])
arah = "Positif" if corr > 0 else "Negatif"
sig = "Signifikan" if pval < 0.05 else "Tidak Signifikan"
corr_data.append({"Metode": method.upper(), "r (Korelasi)": f"{corr:.4f}", "Arah": arah, "p-value": f"{pval:.4f}", "Status": sig})
raw_corr_results.append({"metode": method.upper(), "r": corr, "p": pval})
st.table(pd.DataFrame(corr_data))
# Scatter
st.subheader("🔵 Pola Distribusi Scatter Plot")
cols = st.columns(3)
for idx2, method in enumerate(["vader","textblob","bertweet","roberta","roberta_large"]):
with cols[idx2 % 3]:
fig_s, ax_s = plt.subplots(figsize=(5, 4))
sns.regplot(data=df_merged, x=method, y="log_return", ax=ax_s,
scatter_kws={"s": 40, "color": "#10b981", "alpha": 0.5},
line_kws={"color": "#0f172a", "linewidth": 2})
ax_s.set_title(f"{method.upper()}", fontweight='bold')
ax_s.set_xlabel("Sentimen Score")
ax_s.set_ylabel("Log Return")
plt.tight_layout()
st.pyplot(fig_s)
# Line chart
st.subheader("📈 Trend Analisis: Sentiment vs BTC Volatility")
fig_line, ax_line = plt.subplots(figsize=(14, 6))
ax_line.plot(df_merged["date"], df_merged["log_return"], label="BTC Log Return", color="#f7931a", linewidth=3)
colors = ["#3B82F6","#10B981","#EC4899","#14B8A6","#6366F1"]
for i, method in enumerate(["vader","textblob","roberta","roberta_large","bertweet"]):
ax_line.plot(df_merged["date"], df_merged[method], label=f"Sentiment: {method.upper()}", color=colors[i], linewidth=1.5, linestyle="--", alpha=0.8)
ax_line.set_title("Pergerakan Sentimen vs Log Return Bitcoin", fontsize=14, pad=15, fontweight='bold')
ax_line.set_xlabel("Tanggal", fontsize=11)
ax_line.set_ylabel("Nilai Metrik", fontsize=11)
ax_line.legend(loc='upper left', bbox_to_anchor=(1, 1), frameon=True)
plt.tight_layout()
st.pyplot(fig_line)
# Kesimpulan
st.markdown("
", unsafe_allow_html=True)
st.subheader("📝 Kesimpulan")
max_idx = df_merged["log_return"].idxmax()
min_idx = df_merged["log_return"].idxmin()
date_max = df_merged.loc[max_idx, "date"]
date_min = df_merged.loc[min_idx, "date"]
sig_models = [r["metode"] for r in raw_corr_results if r["p"] < 0.05]
strongest = max(raw_corr_results, key=lambda x: abs(x["r"]))
arah_text = "berbanding lurus (positif)" if strongest["r"] > 0 else "berbanding terbalik (negatif)"
st.write(f"Puncak lonjakan positif (*max log return*) terjadi pada **{date_max}**, sedangkan penurunan ekstrem terjadi pada **{date_min}**.")
if sig_models:
st.success(f"""
**Hipotesis Diterima (H1):** Ditemukan korelasi linier yang signifikan pada metode **{', '.join(sig_models)}** (*p-value* < 0.05).
Metode dengan pemetaan respons pasar terkuat adalah **{strongest['metode']}**, dengan sifat hubungan **{arah_text}**.
""")
else:
st.warning("""
**Hipotesis Ditolak (H0 Diterima):** Tidak ditemukan bukti empiris korelasi linier yang signifikan (seluruh *p-value* >= 0.05).
Volatilitas harga cenderung dipengaruhi oleh faktor teknikal/fundamental di luar sentimen X.
""")
except Exception as e:
st.error(f"⚠️ Terjadi kesalahan saat mengambil atau memproses data API CoinGecko: {e}")
elif analyze_batch_btn and not tweet_files:
st.warning("⚠️ Silakan unggah minimal satu file .txt terlebih dahulu.")