Update src/streamlit_app.py
Browse files- src/streamlit_app.py +25 -25
src/streamlit_app.py
CHANGED
|
@@ -14,7 +14,6 @@ import nltk
|
|
| 14 |
from nltk.corpus import stopwords
|
| 15 |
from nltk.sentiment.vader import SentimentIntensityAnalyzer
|
| 16 |
from transformers import pipeline
|
| 17 |
-
from tqdm import tqdm
|
| 18 |
|
| 19 |
# ==============================
|
| 20 |
# KONFIGURASI HALAMAN
|
|
@@ -35,16 +34,17 @@ download_nltk_resources()
|
|
| 35 |
stop_words = set(stopwords.words('english'))
|
| 36 |
|
| 37 |
# ==============================
|
| 38 |
-
# LOAD MODELS
|
| 39 |
# ==============================
|
| 40 |
@st.cache_resource
|
| 41 |
def load_all_models():
|
| 42 |
-
with st.spinner('
|
| 43 |
vader = SentimentIntensityAnalyzer()
|
| 44 |
-
bertweet = pipeline("sentiment-analysis", model="finiteautomata/bertweet-base-sentiment-analysis", device=-1)
|
| 45 |
-
roberta = pipeline("sentiment-analysis", model="cardiffnlp/twitter-roberta-base-sentiment", device=-1)
|
| 46 |
-
roberta_large = pipeline("sentiment-analysis", model="siebert/sentiment-roberta-large-english", device=-1)
|
| 47 |
return vader, bertweet, roberta, roberta_large
|
|
|
|
| 48 |
vader, bertweet, roberta, roberta_large = load_all_models()
|
| 49 |
|
| 50 |
# ==============================
|
|
@@ -102,13 +102,14 @@ with tab1:
|
|
| 102 |
try: t_label = classify_tb(TextBlob(text).sentiment.polarity)
|
| 103 |
except: t_label = "neutral"
|
| 104 |
|
| 105 |
-
|
|
|
|
| 106 |
except: b_label = "neutral"
|
| 107 |
|
| 108 |
-
try: r_label = map_roberta(roberta(text
|
| 109 |
except: r_label = "neutral"
|
| 110 |
|
| 111 |
-
try: rl_label = roberta_large(text
|
| 112 |
except: rl_label = "neutral"
|
| 113 |
|
| 114 |
data_test = {
|
|
@@ -135,7 +136,8 @@ with tab2:
|
|
| 135 |
|
| 136 |
total_tweets_uploaded = 0
|
| 137 |
for idx, file in enumerate(tweet_files):
|
| 138 |
-
|
|
|
|
| 139 |
tweets = content.split("\n\n")
|
| 140 |
|
| 141 |
for tweet in tweets:
|
|
@@ -157,13 +159,14 @@ with tab2:
|
|
| 157 |
try: tb_label = classify_tb(TextBlob(text).sentiment.polarity)
|
| 158 |
except: tb_label = "neutral"
|
| 159 |
|
| 160 |
-
|
|
|
|
| 161 |
except: bertweet_label = "neutral"
|
| 162 |
|
| 163 |
-
try: roberta_label = map_roberta(roberta(text
|
| 164 |
except: roberta_label = "neutral"
|
| 165 |
|
| 166 |
-
try: roberta_large_label = roberta_large(text
|
| 167 |
except: roberta_large_label = "neutral"
|
| 168 |
|
| 169 |
data.append({
|
|
@@ -188,19 +191,16 @@ with tab2:
|
|
| 188 |
st.success(f"Berhasil mengekstrak {total_tweets_uploaded} tweets!")
|
| 189 |
|
| 190 |
target_dates = sorted(df['date'].unique())
|
| 191 |
-
|
| 192 |
-
|
| 193 |
-
|
| 194 |
-
delta_days = (today_date - oldest_date).days + 2
|
| 195 |
-
fetch_days = max(7, delta_days)
|
| 196 |
|
| 197 |
-
st.write(f"Mencari data harga Bitcoin dari CoinGecko API
|
| 198 |
|
| 199 |
-
url = "https://api.coingecko.com/api/v3/coins/bitcoin/market_chart"
|
| 200 |
params = {
|
| 201 |
"vs_currency": "usd",
|
| 202 |
-
"
|
| 203 |
-
"
|
| 204 |
}
|
| 205 |
headers = {"accept": "application/json", "User-Agent": "Mozilla/5.0"}
|
| 206 |
|
|
@@ -383,10 +383,10 @@ with tab2:
|
|
| 383 |
arah_text = "berbanding lurus (positif)" if strongest_model["r"] > 0 else "berbanding terbalik (negatif)"
|
| 384 |
|
| 385 |
# ==============================
|
| 386 |
-
# KESIMPULAN & PEMBAHASAN AKHIR
|
| 387 |
# ==============================
|
| 388 |
st.write("---")
|
| 389 |
-
st.header("📝
|
| 390 |
|
| 391 |
# 1. Pembahasan
|
| 392 |
st.subheader("1. Pembahasan Analisis")
|
|
@@ -414,7 +414,7 @@ with tab2:
|
|
| 414 |
st.write(f"- **Metode {res['metode']}:** Menghasilkan nilai korelasi *r* sebesar **{res['r']:.4f}** dengan *p-value* **{res['p']:.4f}**. Arah garis tren {arah_garis}, menandakan bahwa kekuatan hubungan antara sentimen opini publik dan volatilitas harga masuk ke dalam kategori **{kategori}**.")
|
| 415 |
|
| 416 |
# 2. Kesimpulan
|
| 417 |
-
st.subheader("2. Kesimpulan
|
| 418 |
|
| 419 |
if len(sig_models) > 0:
|
| 420 |
st.success(f"""
|
|
|
|
| 14 |
from nltk.corpus import stopwords
|
| 15 |
from nltk.sentiment.vader import SentimentIntensityAnalyzer
|
| 16 |
from transformers import pipeline
|
|
|
|
| 17 |
|
| 18 |
# ==============================
|
| 19 |
# KONFIGURASI HALAMAN
|
|
|
|
| 34 |
stop_words = set(stopwords.words('english'))
|
| 35 |
|
| 36 |
# ==============================
|
| 37 |
+
# LOAD MODELS (DIPERBAIKI)
|
| 38 |
# ==============================
|
| 39 |
@st.cache_resource
|
| 40 |
def load_all_models():
|
| 41 |
+
with st.spinner('...'):
|
| 42 |
vader = SentimentIntensityAnalyzer()
|
| 43 |
+
bertweet = pipeline("sentiment-analysis", model="finiteautomata/bertweet-base-sentiment-analysis", device=-1, truncation=True, max_length=128)
|
| 44 |
+
roberta = pipeline("sentiment-analysis", model="cardiffnlp/twitter-roberta-base-sentiment", device=-1, truncation=True, max_length=512)
|
| 45 |
+
roberta_large = pipeline("sentiment-analysis", model="siebert/sentiment-roberta-large-english", device=-1, truncation=True, max_length=512)
|
| 46 |
return vader, bertweet, roberta, roberta_large
|
| 47 |
+
|
| 48 |
vader, bertweet, roberta, roberta_large = load_all_models()
|
| 49 |
|
| 50 |
# ==============================
|
|
|
|
| 102 |
try: t_label = classify_tb(TextBlob(text).sentiment.polarity)
|
| 103 |
except: t_label = "neutral"
|
| 104 |
|
| 105 |
+
# Slicing [:128] dan [:512] dihapus karena sudah di-handle oleh pipeline truncation
|
| 106 |
+
try: b_label = map_bertweet(bertweet(text)[0]['label'])
|
| 107 |
except: b_label = "neutral"
|
| 108 |
|
| 109 |
+
try: r_label = map_roberta(roberta(text)[0]['label'])
|
| 110 |
except: r_label = "neutral"
|
| 111 |
|
| 112 |
+
try: rl_label = roberta_large(text)[0]['label'].lower()
|
| 113 |
except: rl_label = "neutral"
|
| 114 |
|
| 115 |
data_test = {
|
|
|
|
| 136 |
|
| 137 |
total_tweets_uploaded = 0
|
| 138 |
for idx, file in enumerate(tweet_files):
|
| 139 |
+
# Perbaikan: replace("\r\n", "\n") memastikan format enter dibaca utuh
|
| 140 |
+
content = file.getvalue().decode("utf-8").replace("\r\n", "\n").strip()
|
| 141 |
tweets = content.split("\n\n")
|
| 142 |
|
| 143 |
for tweet in tweets:
|
|
|
|
| 159 |
try: tb_label = classify_tb(TextBlob(text).sentiment.polarity)
|
| 160 |
except: tb_label = "neutral"
|
| 161 |
|
| 162 |
+
# Slicing [:128] dan [:512] dihapus
|
| 163 |
+
try: bertweet_label = map_bertweet(bertweet(text)[0]['label'])
|
| 164 |
except: bertweet_label = "neutral"
|
| 165 |
|
| 166 |
+
try: roberta_label = map_roberta(roberta(text)[0]['label'])
|
| 167 |
except: roberta_label = "neutral"
|
| 168 |
|
| 169 |
+
try: roberta_large_label = roberta_large(text)[0]['label'].lower()
|
| 170 |
except: roberta_large_label = "neutral"
|
| 171 |
|
| 172 |
data.append({
|
|
|
|
| 191 |
st.success(f"Berhasil mengekstrak {total_tweets_uploaded} tweets!")
|
| 192 |
|
| 193 |
target_dates = sorted(df['date'].unique())
|
| 194 |
+
start_unix = int(datetime.strptime(target_dates[0], "%Y-%m-%d").timestamp()) - 86400
|
| 195 |
+
end_unix = int(datetime.strptime(target_dates[-1], "%Y-%m-%d").timestamp()) + 86400
|
|
|
|
|
|
|
|
|
|
| 196 |
|
| 197 |
+
st.write(f"Mencari data harga Bitcoin dari CoinGecko API menggunakan rentang Unix Timestamp...")
|
| 198 |
|
| 199 |
+
url = "https://api.coingecko.com/api/v3/coins/bitcoin/market_chart/range"
|
| 200 |
params = {
|
| 201 |
"vs_currency": "usd",
|
| 202 |
+
"from": start_unix,
|
| 203 |
+
"to": end_unix
|
| 204 |
}
|
| 205 |
headers = {"accept": "application/json", "User-Agent": "Mozilla/5.0"}
|
| 206 |
|
|
|
|
| 383 |
arah_text = "berbanding lurus (positif)" if strongest_model["r"] > 0 else "berbanding terbalik (negatif)"
|
| 384 |
|
| 385 |
# ==============================
|
| 386 |
+
# KESIMPULAN & PEMBAHASAN AKHIR
|
| 387 |
# ==============================
|
| 388 |
st.write("---")
|
| 389 |
+
st.header("📝 Hasil, Pembahasan, dan Kesimpulan")
|
| 390 |
|
| 391 |
# 1. Pembahasan
|
| 392 |
st.subheader("1. Pembahasan Analisis")
|
|
|
|
| 414 |
st.write(f"- **Metode {res['metode']}:** Menghasilkan nilai korelasi *r* sebesar **{res['r']:.4f}** dengan *p-value* **{res['p']:.4f}**. Arah garis tren {arah_garis}, menandakan bahwa kekuatan hubungan antara sentimen opini publik dan volatilitas harga masuk ke dalam kategori **{kategori}**.")
|
| 415 |
|
| 416 |
# 2. Kesimpulan
|
| 417 |
+
st.subheader("2. Kesimpulan")
|
| 418 |
|
| 419 |
if len(sig_models) > 0:
|
| 420 |
st.success(f"""
|