BTS / src /streamlit_app.py
Varriety's picture
Update src/streamlit_app.py
b613c5c verified
import streamlit as st
import pandas as pd
import numpy as np
import re
import io
import time
import requests
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime, timezone
from textblob import TextBlob
from scipy.stats import pearsonr
import nltk
from nltk.corpus import stopwords
from nltk.sentiment.vader import SentimentIntensityAnalyzer
from transformers import pipeline
import os
import streamlit.components.v1 as components
from langdetect import detect, DetectorFactory
DetectorFactory.seed = 0
# ==============================
# SETTING PATH ABSOLUT GAMBAR
# ==============================
BASE_DIR = os.path.dirname(os.path.abspath(__file__))
img_hero = os.path.join(BASE_DIR, "bitcoin1.gif")
img_batch = os.path.join(BASE_DIR, "bitcoin2.gif")
# ==============================
# KONFIGURASI HALAMAN & STATE NAVIGASI
# ==============================
st.set_page_config(
page_title="Bitcoin Volatility Sentiment",
page_icon="β‚Ώ",
layout="wide",
initial_sidebar_state="collapsed"
)
if 'page' not in st.session_state:
st.session_state.page = "uji_kalimat"
# ==============================
# GLOBAL CSS
# ==============================
st.markdown("""
<style>
/* ── Google Fonts ── */
@import url('https://fonts.googleapis.com/css2?family=Inter:wght@400;500;600;700;800&display=swap');
/* ── Reset Streamlit chrome ── */
#MainMenu, footer, header { visibility: hidden; }
.block-container {
padding-top: 1rem !important;
padding-bottom: 0 !important;
max-width: 100% !important;
}
html, body, [class*="css"] {
font-family: 'Inter', -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, "Helvetica Neue", Arial, sans-serif !important;
color: #202630 !important;
}
.stApp {
background-color: #FAFAFA !important;
}
/* ── Custom Scrollbar ── */
::-webkit-scrollbar { width: 6px; }
::-webkit-scrollbar-track { background: transparent; }
::-webkit-scrollbar-thumb { background: #eaecef; border-radius: 3px; }
::-webkit-scrollbar-thumb:hover { background: #10b981; }
/* ── NAVBAR WRAPPER ── */
.vbc-logo {
font-weight: 700;
font-size: 1.25rem;
color: #0f172a !important;
display: flex;
align-items: center;
gap: 10px;
}
.vbc-logo-icon {
background: #10b981;
color: white;
width: 36px;
height: 36px;
border-radius: 8px;
display: inline-flex;
align-items: center;
justify-content: center;
font-size: 1.1rem;
font-weight: 800;
}
/* ── HERO SECTION (Uji Kalimat) ── */
.hero-wrap {
background: #FAFAFA;
min-height: auto;
display: flex;
align-items: center;
position: relative;
overflow: hidden;
padding: 0.5rem 3rem 2rem 3rem;
}
.hero-badge {
display: inline-block;
background: #e6fff1;
color: #1aa64a;
font-size: 0.75rem;
font-weight: 600;
padding: 6px 12px;
border-radius: 4px;
margin-bottom: 1.5rem;
}
.hero-title {
font-size: 3rem;
font-weight: 800;
line-height: 1.2;
color: #0f172a !important;
margin: 0 0 1rem;
}
.hero-title span {
color: #10b981;
}
.hero-sub {
font-size: 1rem;
color: #64748b !important;
max-width: 500px;
line-height: 1.6;
margin-bottom: 2rem;
}
.hero-card {
background: #FFFFFF;
border: 1px solid #e2e8f0;
border-radius: 50px;
padding: 10px 20px;
display: inline-flex;
align-items: center;
gap: 10px;
margin-bottom: 2rem;
box-shadow: 0 2px 4px rgba(0,0,0,0.02);
}
.hero-card-dot {
width: 8px; height: 8px;
border-radius: 50%;
background: #10b981;
flex-shrink: 0;
}
.hero-card p {
margin: 0;
font-size: 0.85rem;
color: #334155 !important;
font-weight: 600;
}
/* ── BATCH SECTION ── */
.batch-wrap {
background: #FAFAFA;
min-height: auto;
padding: 0.5rem 3rem 2rem 3rem;
}
.batch-eyebrow {
font-size: 0.85rem;
font-weight: 700;
color: #10b981 !important;
margin-bottom: 0.5rem;
}
.batch-title {
font-size: 2.5rem;
font-weight: 800;
color: #0f172a !important;
line-height: 1.2;
margin-bottom: 1rem;
}
.batch-sub {
font-size: 1rem;
color: #64748b !important;
max-width: 480px;
line-height: 1.6;
margin-bottom: 2rem;
}
/* ── RESULT / DASHBOARD SECTION ── */
.result-wrap {
background: #FFFFFF;
padding: 2rem 3rem 3rem 3rem;
border: 1px solid #e2e8f0;
border-radius: 16px;
box-shadow: 0 4px 6px rgba(0,0,0,0.01);
margin-bottom: 3rem;
}
.section-label {
font-size: 0.85rem;
font-weight: 700;
color: #10b981 !important;
margin-bottom: 0.5rem;
}
.section-title {
font-size: 1.5rem;
font-weight: 800;
color: #0f172a !important;
margin-bottom: 1.5rem;
}
/* ── METRIC CARDS ── */
div[data-testid="stMetric"] {
background: #FFFFFF;
border: 1px solid #e2e8f0;
border-radius: 12px;
padding: 1rem 1.2rem !important;
box-shadow: 0 2px 4px rgba(0,0,0,0.02);
}
div[data-testid="stMetricLabel"] > div {
color: #64748b !important;
font-size: 0.85rem !important;
font-weight: 600 !important;
}
div[data-testid="stMetricValue"] > div {
color: #0f172a !important;
font-weight: 800 !important;
font-size: 1.8rem !important;
}
/* ── BUTTONS ── */
div[data-testid="stButton"] > button {
font-weight: 600 !important;
font-size: 0.9rem !important;
border-radius: 50px !important;
padding: 0.5rem 1.2rem !important;
height: 42px !important;
transition: all 0.2s ease-in-out !important;
}
div[data-testid="stButton"] > button:focus:not(:active) {
box-shadow: none !important;
}
/* Primary CTA */
.btn-primary div[data-testid="stButton"] > button {
background: #10b981 !important;
color: #FFFFFF !important;
border: none !important;
}
.btn-primary div[data-testid="stButton"] > button:hover {
background: #059669 !important;
}
/* Secondary outline */
.btn-outline-white div[data-testid="stButton"] > button {
background: #FFFFFF !important;
color: #0f172a !important;
border: 1px solid #e2e8f0 !important;
}
.btn-outline-white div[data-testid="stButton"] > button:hover {
border-color: #10b981 !important;
color: #10b981 !important;
}
/* Active nav */
.btn-orange div[data-testid="stButton"] > button {
background: #e6fff1 !important;
color: #10b981 !important;
border: none !important;
}
/* Ghost nav β€” inactive */
.btn-ghost div[data-testid="stButton"] > button {
background: transparent !important;
color: #64748b !important;
border: 1px solid transparent !important;
}
.btn-ghost div[data-testid="stButton"] > button:hover {
color: #0f172a !important;
background: #f1f5f9 !important;
}
/* ── TEXT INPUT / TEXTAREA ── */
.stTextArea textarea {
background-color: #FFFFFF !important;
color: #0f172a !important;
border: 1px solid #e2e8f0 !important;
border-radius: 12px !important;
font-size: 0.95rem !important;
padding: 0.8rem 1rem !important;
transition: border-color 0.2s !important;
}
.stTextArea textarea:focus {
border-color: #10b981 !important;
box-shadow: 0 0 0 1px #10b981 !important;
}
.stTextArea label {
color: #334155 !important;
font-size: 0.85rem !important;
font-weight: 600 !important;
}
/* ── DATA TABLE ── */
div[data-testid="stDataFrame"] {
border: 1px solid #e2e8f0 !important;
border-radius: 12px !important;
}
/* ── FILE UPLOADER ── */
div[data-testid="stFileUploader"] {
border: 1px dashed #cbd5e1 !important;
border-radius: 12px !important;
background: #FFFFFF !important;
padding: 1.5rem !important;
}
div[data-testid="stFileUploader"]:hover {
border-color: #10b981 !important;
}
/* ── EXPANDER ── */
div[data-testid="stExpander"] {
border: 1px solid #e2e8f0 !important;
border-radius: 12px !important;
background: #FFFFFF !important;
}
/* ── DOWNLOAD BUTTON ── */
div[data-testid="stDownloadButton"] > button {
background: #FFFFFF !important;
color: #0f172a !important;
border-radius: 50px !important;
font-weight: 600 !important;
border: 1px solid #e2e8f0 !important;
padding: 0.5rem 1.2rem !important;
transition: all 0.2s !important;
}
div[data-testid="stDownloadButton"] > button:hover {
border-color: #10b981 !important;
color: #10b981 !important;
}
/* ── DIVIDER ── */
.vbc-divider {
border: none;
border-top: 1px solid #e2e8f0;
margin: 2rem 0;
}
</style>
""", unsafe_allow_html=True)
# ==============================
# FUNGSI AUTO-SCROLL
# ==============================
def scroll_to_target(target_id):
js_code = f"""
<script>
var target = window.parent.document.getElementById('{target_id}');
if(target) {{
target.scrollIntoView({{behavior: 'smooth', block: 'start'}});
}}
</script>
"""
components.html(js_code, height=0, width=0)
# ==============================
# HEADER / NAVBAR
# ==============================
def set_page(page_name):
st.session_state.page = page_name
col_logo, col_space, col_btn1, col_btn2 = st.columns([5, 3, 2, 2], vertical_alignment="center")
with col_logo:
st.markdown("""
<div class="vbc-logo" style="padding-left: 2rem;">
<span class="vbc-logo-icon">β‚Ώ</span>
Bitcoin Volatility Sentiment
</div>
""", unsafe_allow_html=True)
with col_btn1:
is_uji = st.session_state.page == "uji_kalimat"
css_class = "btn-orange" if is_uji else "btn-ghost"
st.markdown(f'<div class="{css_class}">', unsafe_allow_html=True)
if st.button("Uji Kalimat", use_container_width=True, key="nav_uji"):
set_page("uji_kalimat"); st.rerun()
st.markdown('</div>', unsafe_allow_html=True)
with col_btn2:
is_batch = st.session_state.page == "analisis_batch"
css_class = "btn-orange" if is_batch else "btn-ghost"
st.markdown(f'<div class="{css_class}">', unsafe_allow_html=True)
if st.button("Analisis Batch", use_container_width=True, key="nav_batch"):
set_page("analisis_batch"); st.rerun()
st.markdown('</div>', unsafe_allow_html=True)
st.markdown("<hr style='margin-top: 0.5rem; margin-bottom: 0.5rem; border: none; border-bottom: 1px solid #e2e8f0;'>", unsafe_allow_html=True)
# ==============================
# DOWNLOAD RESOURCES & LOAD MODELS
# ==============================
@st.cache_resource
def download_nltk_resources():
nltk.download('stopwords', quiet=True)
nltk.download('vader_lexicon', quiet=True)
nltk.download('punkt', quiet=True)
nltk.download('omw-1.4', quiet=True)
download_nltk_resources()
stop_words = set(stopwords.words('english'))
@st.cache_resource
def load_all_models():
vader = SentimentIntensityAnalyzer()
bertweet = pipeline("sentiment-analysis", model="finiteautomata/bertweet-base-sentiment-analysis", device=-1, truncation=True, max_length=128)
roberta = pipeline("sentiment-analysis", model="cardiffnlp/twitter-roberta-base-sentiment", device=-1, truncation=True, max_length=512)
roberta_large = pipeline("sentiment-analysis", model="siebert/sentiment-roberta-large-english", device=-1, truncation=True, max_length=512)
return vader, bertweet, roberta, roberta_large
with st.spinner('Mempersiapkan model AI...'):
vader, bertweet, roberta, roberta_large = load_all_models()
# ==============================
# FUNGSI CLEAN TEXT & MAPPING
# ==============================
def clean_text(text):
text = str(text).lower()
text = re.sub(r"http\S+", "", text)
text = re.sub(r"@\w+", "", text)
text = re.sub(r"#\w+", "", text)
text = re.sub(r"[^\w\s]", "", text)
tokens = text.split()
tokens = [word for word in tokens if word not in stop_words]
return " ".join(tokens)
def classify_tb(score):
if score > 0.05: return 'positive'
if score < -0.05: return 'negative'
return 'neutral'
def map_roberta(label):
return {"LABEL_0": "negative", "LABEL_1": "neutral", "LABEL_2": "positive"}.get(label, "neutral")
def map_bertweet(label):
return {"pos": "positive", "neu": "neutral", "neg": "negative"}.get(label.lower(), "neutral")
def get_daily_label(score):
if score > 0.05: return 'Positive'
elif score < -0.05: return 'Negative'
else: return 'Neutral'
# ==============================================================================
# HALAMAN 1 β€” UJI KALIMAT
# ==============================================================================
if st.session_state.page == "uji_kalimat":
st.markdown('<div class="hero-wrap">', unsafe_allow_html=True)
col_text, col_img = st.columns([1.1, 1], gap="large")
with col_text:
st.markdown("""
<div class="hero-badge">Website ini bukanlah alat prediksi harga Bitcoin real time, melainkan instrumen untuk melakukan analisis sentimen publik secara batch</div>
<h1 class="hero-title">
Bitcoin Volatility<br>
<span>vs Public</span> Sentiment
</h1>
<p class="hero-sub">
Analisis Volatilitas Harga Bitcoin Terhadap Sentimen Publik
Pada Platform X Berbasis Python.
</p>
<div class="hero-card">
<div class="hero-card-dot"></div>
<p><b>Peneliti:</b> Arya Galuh Saputra &nbsp;Β·&nbsp; H1D022022</p>
</div>
""", unsafe_allow_html=True)
user_input = st.text_area(
"Masukkan Tweet (Bahasa Inggris):",
"Great, Bitcoin just crashed another 10% today.",
height=120
)
st.markdown("<br>", unsafe_allow_html=True)
col_btn1, col_btn2 = st.columns([1.6, 1])
with col_btn1:
st.markdown('<div class="btn-primary">', unsafe_allow_html=True)
analyze_btn = st.button("Proses Uji Kalimat", use_container_width=True)
st.markdown('</div>', unsafe_allow_html=True)
with col_img:
st.markdown("<div style='margin-top: 4rem;'></div>", unsafe_allow_html=True)
try:
st.image(img_hero, use_container_width=True)
except Exception:
st.markdown("""
<div style="background:#f5f5f5;border:1px dashed #cbd5e1;
border-radius:12px;height:320px;display:flex;align-items:center;
justify-content:center;color:#64748b;font-size:0.9rem;
text-align:center;padding:2rem;">
πŸ–ΌοΈ Gambar Tidak Ditemukan<br>Pastikan file <code>bitcoin1.gif</code> ada di direktori
</div>""", unsafe_allow_html=True)
st.markdown('</div>', unsafe_allow_html=True)
st.markdown('<div id="target-uji-kalimat"></div>', unsafe_allow_html=True)
if analyze_btn:
scroll_to_target("target-uji-kalimat")
col_space_left, col_center_output, col_space_right = st.columns([1, 4, 1])
with col_center_output:
st.markdown("""
<div class="result-wrap" style="padding-bottom: 2rem; margin-bottom: 1.5rem;">
<p class="section-label">Output Analisis</p>
<p class="section-title" style="margin-bottom: 0;">Hasil Deteksi Sentimen</p>
</div>
""", unsafe_allow_html=True)
try:
if detect(user_input) != 'en':
st.warning("⚠️ Teks sepertinya bukan bahasa Inggris. Hasil prediksi mungkin memiliki bias.")
except:
pass
text = clean_text(user_input)
with st.spinner("Mengekstraksi sentimen dengan 5 Model..."):
time.sleep(0.5)
try: v_label = "positive" if vader.polarity_scores(text)['compound'] > 0.05 else ("negative" if vader.polarity_scores(text)['compound'] < -0.05 else "neutral")
except: v_label = "neutral"
try: t_label = classify_tb(TextBlob(text).sentiment.polarity)
except: t_label = "neutral"
try: b_label = map_bertweet(bertweet(text)[0]['label'])
except: b_label = "neutral"
try: r_label = map_roberta(roberta(text)[0]['label'])
except: r_label = "neutral"
try: rl_label = roberta_large(text)[0]['label'].lower()
except: rl_label = "neutral"
def badge_color(label):
return {"positive": "#e6fff1", "negative": "#fef1f2", "neutral": "#f1f5f9"}[label]
def badge_text_color(label):
return {"positive": "#10b981", "negative": "#f43f5e", "neutral": "#64748b"}[label]
results = [
("VADER", v_label),
("TextBlob", t_label),
("BERTweet", b_label),
("RoBERTa Base", r_label),
("RoBERTa Large", rl_label),
]
col_a, col_b = st.columns(2)
for i, (method, label) in enumerate(results):
col = col_a if i % 2 == 0 else col_b
bg = badge_color(label)
tc = badge_text_color(label)
icon = "β†—" if label == "positive" else ("β†˜" if label == "negative" else "β†’")
with col:
st.markdown(f"""
<div style="background:#FAFAFA;border:1px solid #e2e8f0;border-left:4px solid {'#10b981' if label=='positive' else ('#f43f5e' if label=='negative' else '#cbd5e1')};
border-radius:12px;padding:1rem 1.2rem;margin-bottom:1rem;
display:flex;align-items:center;justify-content:space-between;
box-shadow:0 2px 4px rgba(0,0,0,0.02);">
<div>
<div style="font-weight:600;font-size:0.75rem;color:#64748b;margin-bottom:4px;">{method}</div>
<div style="font-weight:800;font-size:1.05rem;color:#0f172a;">{label.capitalize()}</div>
</div>
<div style="background:{bg};color:{tc};font-size:0.75rem;font-weight:700;
padding:6px 12px;border-radius:50px;">
{icon} {label.upper()}
</div>
</div>
""", unsafe_allow_html=True)
# ==============================================================================
# HALAMAN 2 β€” ANALISIS BATCH
# ==============================================================================
elif st.session_state.page == "analisis_batch":
plt.style.use('default')
sns.set_theme(style="whitegrid", rc={
"axes.facecolor": "#FFFFFF",
"figure.facecolor": "#FAFAFA",
"axes.edgecolor": "#e2e8f0",
"text.color": "#0f172a",
"xtick.color": "#64748b",
"ytick.color": "#64748b",
"grid.color": "#f1f5f9",
})
st.markdown('<div class="batch-wrap">', unsafe_allow_html=True)
col_upload, col_img_b = st.columns([1.4, 1], gap="large")
with col_upload:
st.markdown("""
<p class="batch-eyebrow">Analisis Batch Processing</p>
<h2 class="batch-title">Volatilitas Harga Bitcoin Vs Sentimen Publik<br>Kolerasi Multi-Metode Analisis Sentimen</h2>
<p class="batch-sub">
Unggah file tweets (.txt) untuk diekstraksi dan
dianalisis terhadap volatilitas harga Bitcoin.
</p>""", unsafe_allow_html=True)
tweet_files = st.file_uploader(
"Pilih file Tweet (.txt)",
type=['txt'],
accept_multiple_files=True
)
with st.expander("Format TXT yang Didukung"):
st.code(
"username | 2024-03-01 14:00:00\n"
"Isi tweet baris pertama di sini\n\n"
"username2 | 2024-03-01 15:30:00\n"
"Isi tweet baris kedua di sini",
language="text"
)
st.markdown("<br>", unsafe_allow_html=True)
st.markdown('<div class="btn-primary">', unsafe_allow_html=True)
analyze_batch_btn = st.button("Eksekusi Analisis", key="batch_btn", use_container_width=False)
st.markdown('</div>', unsafe_allow_html=True)
with col_img_b:
st.markdown("<div style='margin-top: 4rem;'></div>", unsafe_allow_html=True)
try:
st.image(img_batch, use_container_width=True)
except Exception:
st.markdown("""
<div style="background:#f5f5f5;border:1px dashed #cbd5e1;
border-radius:12px;height:280px;display:flex;align-items:center;
justify-content:center;color:#64748b;font-size:0.9rem;
text-align:center;padding:2rem;">
πŸ–ΌοΈ Gambar Tidak Ditemukan<br>Pastikan file <code>bitcoin2.gif</code> ada di direktori
</div>""", unsafe_allow_html=True)
st.markdown('</div>', unsafe_allow_html=True)
st.markdown('<div id="target-analisis-batch"></div>', unsafe_allow_html=True)
if tweet_files and analyze_batch_btn:
scroll_to_target("target-analisis-batch")
col_b_space1, col_b_content, col_b_space2 = st.columns([1, 8, 1])
with col_b_content:
st.markdown("""
<div class="result-wrap" style="padding-bottom: 2rem; margin-bottom: 1.5rem;">
<p class="section-label">Hasil Pemrosesan</p>
<p class="section-title" style="margin-bottom: 0;">Dashboard Analisis</p>
</div>
""", unsafe_allow_html=True)
tweet_files = sorted(tweet_files, key=lambda x: x.name)
data = []
with st.status("πŸ”„ Memproses data sentimen...", expanded=True) as status:
progress_bar = st.progress(0, text="Mengekstrak sentimen dari data...")
total_tweets_uploaded = 0
total_tweets_skipped = 0
for idx, file in enumerate(tweet_files):
content = file.getvalue().decode("utf-8").replace("\r\n", "\n").strip()
tweets = content.split("\n\n")
for tweet in tweets:
parts = tweet.strip().split("\n", 1)
if len(parts) != 2: continue
meta, text_raw = parts
try:
DetectorFactory.seed = 0
lang = detect(text_raw)
if lang != 'en':
total_tweets_skipped += 1
continue
except:
total_tweets_skipped += 1
continue
username, date_val = meta.split(" | ") if " | " in meta else ("unknown", "unknown")
short_date = date_val[:10]
text = clean_text(text_raw)
try: v_score = vader.polarity_scores(text)['compound']; vader_label = "positive" if v_score > 0.05 else ("negative" if v_score < -0.05 else "neutral")
except: vader_label = "neutral"
try: tb_label = classify_tb(TextBlob(text).sentiment.polarity)
except: tb_label = "neutral"
try: bertweet_label = map_bertweet(bertweet(text)[0]['label'])
except: bertweet_label = "neutral"
try: roberta_label = map_roberta(roberta(text)[0]['label'])
except: roberta_label = "neutral"
try: roberta_large_label = roberta_large(text)[0]['label'].lower()
except: roberta_large_label = "neutral"
data.append({
"date": short_date, "raw_tweet": text_raw.strip(), "cleaned_tweet": text,
"vader": vader_label, "textblob": tb_label, "bertweet": bertweet_label,
"roberta": roberta_label, "roberta_large": roberta_large_label,
})
total_tweets_uploaded += 1
progress_bar.progress((idx + 1) / len(tweet_files),
text=f"Memproses file {idx+1} dari {len(tweet_files)}")
status.update(label="βœ… Pemrosesan sentimen teks selesai!", state="complete", expanded=False)
df = pd.DataFrame(data)
if df.empty:
st.error("❌ Data kosong. Pastikan format TXT benar dan tweet berbahasa Inggris.")
else:
col_m1, col_m2, col_m3 = st.columns(3)
col_m1.metric("Tweet Diproses", f"{total_tweets_uploaded}", border=True)
col_m2.metric("Tweet Diabaikan (Non-EN)", f"{total_tweets_skipped}", border=True)
col_m3.metric("Model", "5 Model", border=True)
target_dates = sorted(df['date'].unique())
start_unix = int(datetime.strptime(target_dates[0], "%Y-%m-%d").replace(tzinfo=timezone.utc).timestamp()) - 86400
end_unix = int(datetime.strptime(target_dates[-1], "%Y-%m-%d").replace(tzinfo=timezone.utc).timestamp()) + 86400
with st.spinner("πŸ“‘ Mengambil data harga Bitcoin dari CoinGecko API..."):
url = "https://api.coingecko.com/api/v3/coins/bitcoin/market_chart/range"
params = {"vs_currency": "usd", "from": start_unix, "to": end_unix}
headers = {"accept": "application/json", "User-Agent": "Mozilla/5.0"}
try:
time.sleep(2)
res = requests.get(url, params=params, headers=headers)
if res.status_code != 200:
st.error(f"API Error {res.status_code}: {res.text}")
else:
data_json = res.json()
if "prices" not in data_json:
st.error("Data harga tidak ditemukan di respons API.")
else:
prices = data_json["prices"]
df_price = pd.DataFrame(prices, columns=["timestamp", "price"])
df_price["date"] = pd.to_datetime(df_price["timestamp"], unit="ms").dt.date
df_price = df_price.groupby("date")["price"].mean().reset_index()
df_price["pct_change"] = df_price["price"].pct_change() * 100
df_price["log_return"] = np.log(df_price["price"] / df_price["price"].shift(1))
df_price.dropna(inplace=True)
df_price = df_price[df_price["date"].isin(pd.to_datetime(target_dates).date)]
if df_price.empty:
st.warning("⚠️ Data Harga API kosong. Pastikan rentang tanggal di .txt sesuai (yyyy-mm-dd).")
else:
st.markdown("<hr class='vbc-divider'>", unsafe_allow_html=True)
st.markdown("πŸ—£οΈ Data Sentimen")
raw_display_cols = ["date","raw_tweet","vader","textblob","bertweet","roberta","roberta_large"]
st.dataframe(df[raw_display_cols], use_container_width=True, hide_index=True)
sentiment_map = {"positive": 1, "neutral": 0, "negative": -1}
df_score = df.copy()
for col in ["vader","textblob","bertweet","roberta","roberta_large"]:
df_score[col] = df_score[col].map(sentiment_map)
models = ["vader","textblob","bertweet","roberta","roberta_large"]
df_sentiment_daily = df_score.groupby("date")[models].mean().reset_index()
df_sentiment_daily["date"] = pd.to_datetime(df_sentiment_daily["date"]).dt.date
for col in models:
df_sentiment_daily[f"{col}_label"] = df_sentiment_daily[col].apply(get_daily_label)
daily_display_cols = ["date"]
for col in models:
daily_display_cols.extend([col, f"{col}_label"])
st.markdown("β‚Ώ Data Harga & Volatilitas Bitcoin")
st.dataframe(df_price[["date","price","pct_change","log_return"]], use_container_width=True, hide_index=True)
df_merged = pd.merge(df_price, df_sentiment_daily, on="date", how="inner")
st.markdown("πŸ—‚οΈ Data Final")
final_display_cols = ["date","price","pct_change","log_return"] + [c for c in daily_display_cols if c != "date"]
st.dataframe(df_merged[final_display_cols], use_container_width=True, hide_index=True)
col_dl1, col_dl2, _ = st.columns([1, 1, 3])
csv_data = df_merged.to_csv(index=False).encode('utf-8')
col_dl1.download_button("πŸ“₯ Unduh CSV", data=csv_data, file_name="sentiment_volatility.csv", mime="text/csv", use_container_width=True)
buffer = io.BytesIO()
with pd.ExcelWriter(buffer, engine='xlsxwriter') as writer:
df_merged.to_excel(writer, index=False)
col_dl2.download_button("πŸ“₯ Unduh Excel", data=buffer.getvalue(), file_name="sentiment_volatility.xlsx", mime="application/vnd.ms-excel", use_container_width=True)
st.markdown("<hr class='vbc-divider'>", unsafe_allow_html=True)
# Pearson
st.subheader("πŸ”¬ Uji Korelasi Pearson")
st.caption("Menganalisis hubungan statistik antara skor sentimen harian dan volatilitas log-return BTC.")
corr_data = []
raw_corr_results = []
for method in ["vader","textblob","bertweet","roberta","roberta_large"]:
corr, pval = pearsonr(df_merged["log_return"], df_merged[method])
arah = "Positif" if corr > 0 else "Negatif"
sig = "Signifikan" if pval < 0.05 else "Tidak Signifikan"
corr_data.append({"Metode": method.upper(), "r (Korelasi)": f"{corr:.4f}", "Arah": arah, "p-value": f"{pval:.4f}", "Status": sig})
raw_corr_results.append({"metode": method.upper(), "r": corr, "p": pval})
st.table(pd.DataFrame(corr_data))
# Scatter
st.subheader("πŸ”΅ Pola Distribusi Scatter Plot")
cols = st.columns(3)
for idx2, method in enumerate(["vader","textblob","bertweet","roberta","roberta_large"]):
with cols[idx2 % 3]:
fig_s, ax_s = plt.subplots(figsize=(5, 4))
sns.regplot(data=df_merged, x=method, y="log_return", ax=ax_s,
scatter_kws={"s": 40, "color": "#10b981", "alpha": 0.5},
line_kws={"color": "#0f172a", "linewidth": 2})
ax_s.set_title(f"{method.upper()}", fontweight='bold')
ax_s.set_xlabel("Sentimen Score")
ax_s.set_ylabel("Log Return")
plt.tight_layout()
st.pyplot(fig_s)
# Line chart
st.subheader("πŸ“ˆ Trend Analisis: Sentiment vs BTC Volatility")
fig_line, ax_line = plt.subplots(figsize=(14, 6))
ax_line.plot(df_merged["date"], df_merged["log_return"], label="BTC Log Return", color="#f7931a", linewidth=3)
colors = ["#3B82F6","#10B981","#EC4899","#14B8A6","#6366F1"]
for i, method in enumerate(["vader","textblob","roberta","roberta_large","bertweet"]):
ax_line.plot(df_merged["date"], df_merged[method], label=f"Sentiment: {method.upper()}", color=colors[i], linewidth=1.5, linestyle="--", alpha=0.8)
ax_line.set_title("Pergerakan Sentimen vs Log Return Bitcoin", fontsize=14, pad=15, fontweight='bold')
ax_line.set_xlabel("Tanggal", fontsize=11)
ax_line.set_ylabel("Nilai Metrik", fontsize=11)
ax_line.legend(loc='upper left', bbox_to_anchor=(1, 1), frameon=True)
plt.tight_layout()
st.pyplot(fig_line)
# Kesimpulan
st.markdown("<hr class='vbc-divider'>", unsafe_allow_html=True)
st.subheader("πŸ“ Kesimpulan")
max_idx = df_merged["log_return"].idxmax()
min_idx = df_merged["log_return"].idxmin()
date_max = df_merged.loc[max_idx, "date"]
date_min = df_merged.loc[min_idx, "date"]
sig_models = [r["metode"] for r in raw_corr_results if r["p"] < 0.05]
strongest = max(raw_corr_results, key=lambda x: abs(x["r"]))
arah_text = "berbanding lurus (positif)" if strongest["r"] > 0 else "berbanding terbalik (negatif)"
st.write(f"Puncak lonjakan positif (*max log return*) terjadi pada **{date_max}**, sedangkan penurunan ekstrem terjadi pada **{date_min}**.")
if sig_models:
st.success(f"""
**Hipotesis Diterima (H1):** Ditemukan korelasi linier yang signifikan pada metode **{', '.join(sig_models)}** (*p-value* < 0.05).
Metode dengan pemetaan respons pasar terkuat adalah **{strongest['metode']}**, dengan sifat hubungan **{arah_text}**.
""")
else:
st.warning("""
**Hipotesis Ditolak (H0 Diterima):** Tidak ditemukan bukti empiris korelasi linier yang signifikan (seluruh *p-value* >= 0.05).
Volatilitas harga cenderung dipengaruhi oleh faktor teknikal/fundamental di luar sentimen X.
""")
except Exception as e:
st.error(f"⚠️ Terjadi kesalahan saat mengambil atau memproses data API CoinGecko: {e}")
elif analyze_batch_btn and not tweet_files:
st.warning("⚠️ Silakan unggah minimal satu file .txt terlebih dahulu.")