CryptoNews3 / app.py
Mrttbn's picture
Update app.py
23a49ca verified
import gradio as gr
import feedparser
import pandas as pd
import numpy as np
import faiss
import matplotlib.pyplot as plt
import re
from collections import Counter
from sentence_transformers import SentenceTransformer
from transformers import AutoTokenizer, AutoModelForSequenceClassification, pipeline
import torch
import warnings
warnings.filterwarnings('ignore')
# Global değişkenler
model = None
sentiment_analyzer = None
summarizer = None
df = None
index = None
embeddings = None
def initialize_models():
"""FinBERT ve özetleme modellerini yükle"""
global model, sentiment_analyzer, summarizer
try:
if model is None:
model = SentenceTransformer('all-MiniLM-L6-v2')
if sentiment_analyzer is None:
# FinBERT modelini yükle
tokenizer = AutoTokenizer.from_pretrained("ProsusAI/finbert")
finbert_model = AutoModelForSequenceClassification.from_pretrained("ProsusAI/finbert")
sentiment_analyzer = pipeline(
"sentiment-analysis",
model=finbert_model,
tokenizer=tokenizer,
device=-1 # CPU kullan (GPU için 0)
)
if summarizer is None:
# Özetleme modeli
summarizer = pipeline(
"summarization",
model="facebook/bart-large-cnn",
device=-1
)
return "✅ FinBERT ve özetleme modelleri başarıyla yüklendi!"
except Exception as e:
return f"❌ Model yükleme hatası: {str(e)}"
def fetch_news():
"""RSS feedlerinden haberleri topla"""
global df, index, embeddings
RSS_URLS = [
"https://cointelegraph.com/rss",
"https://cryptonews.com/news/feed",
"https://www.coindesk.com/arc/outboundfeeds/rss/",
"https://tr.investing.com/rss/302.rss"
]
all_entries = []
status_messages = []
for url in RSS_URLS:
try:
feed = feedparser.parse(url)
for entry in feed.entries[:30]:
all_entries.append({
"title": entry.get("title", ""),
"link": entry.get("link", ""),
"summary": entry.get("summary", ""),
"published": entry.get("published", "")
})
status_messages.append(f"✓ {url.split('/')[2]} - {len(feed.entries[:30])} haber")
except Exception as e:
status_messages.append(f"✗ {url.split('/')[2]} - Hata")
df = pd.DataFrame(all_entries).drop_duplicates(subset="title").reset_index(drop=True)
# FinBERT ile sentiment analizi
if df is not None and len(df) > 0:
def analyze_sentiment_finbert(text):
try:
# FinBERT maksimum 512 token kabul eder
text = text[:512]
result = sentiment_analyzer(text)[0]
# FinBERT çıktıları: positive, negative, neutral
label = result['label'].lower()
score = result['score']
return label, score
except Exception as e:
return "neutral", 0.5
df["sentiment_label"], df["sentiment_score"] = zip(*df["title"].apply(analyze_sentiment_finbert))
# FAISS index oluştur
corpus = df['title'].tolist()
embeddings = model.encode(corpus)
dimension = embeddings.shape[1]
index = faiss.IndexFlatL2(dimension)
index.add(embeddings.astype('float32'))
status = "\n".join(status_messages)
status += f"\n\n✅ Toplam {len(df)} benzersiz haber toplandı ve FinBERT ile analiz edildi!"
return status, df[["title", "sentiment_label", "published"]].head(10)
def generate_news_summary():
"""Haberlerin genel özetini oluştur"""
global df
if df is None or len(df) == 0:
return "⚠️ Önce haberleri toplamalısınız!", None, None
try:
# Sentiment istatistikleri
sentiment_counts = df["sentiment_label"].value_counts()
total_news = len(df)
positive_count = sentiment_counts.get('positive', 0)
negative_count = sentiment_counts.get('negative', 0)
neutral_count = sentiment_counts.get('neutral', 0)
positive_pct = (positive_count / total_news) * 100
negative_pct = (negative_count / total_news) * 100
neutral_pct = (neutral_count / total_news) * 100
# Genel duygu
if positive_pct > 50:
overall_sentiment = "📈 POZITIF"
sentiment_emoji = "🟢"
elif negative_pct > 50:
overall_sentiment = "📉 NEGATIF"
sentiment_emoji = "🔴"
else:
overall_sentiment = "➡️ NÖTR"
sentiment_emoji = "🟡"
# En çok bahsedilen kelimeler
all_titles = " ".join(df["title"].tolist())
potential_coins = re.findall(r'\b[A-Z][a-zA-Z]+\b', all_titles)
stop_words = {
"Coin", "Price", "Market", "News", "Today", "Crypto", "Token",
"The", "This", "That", "What", "When", "Where", "How", "Why",
"And", "But", "For", "From", "With", "About", "After", "Before"
}
filtered_tokens = [t for t in potential_coins if t not in stop_words]
trending = Counter(filtered_tokens).most_common(5)
# En önemli haberler
top_positive = df.nlargest(3, "sentiment_score")[["title", "sentiment_score"]]
top_negative = df.nsmallest(3, "sentiment_score")[["title", "sentiment_score"]]
# Özet metni oluştur
summary_text = f"""
# 🌐 GÜNCEL KRİPTO HABER ÖZETİ
## {sentiment_emoji} Genel Durum: {overall_sentiment}
### 📊 Sentiment Dağılımı
- **Toplam Haber:** {total_news}
- 🟢 **Pozitif:** {positive_count} (%{positive_pct:.1f})
- 🔴 **Negatif:** {negative_count} (%{negative_pct:.1f})
- 🟡 **Nötr:** {neutral_count} (%{neutral_pct:.1f})
### 🔥 En Çok Bahsedilen Konular
"""
for i, (term, count) in enumerate(trending, 1):
summary_text += f"{i}. **{term}** - {count} kez bahsedildi\n"
summary_text += "\n### ⭐ En Pozitif Haberler\n"
for idx, row in top_positive.iterrows():
summary_text += f"- {row['title'][:100]}... (Skor: {row['sentiment_score']:.3f})\n"
summary_text += "\n### ⚠️ En Negatif Haberler\n"
for idx, row in top_negative.iterrows():
summary_text += f"- {row['title'][:100]}... (Skor: {row['sentiment_score']:.3f})\n"
# Grafik oluştur
fig, axes = plt.subplots(1, 2, figsize=(14, 5))
# Sentiment dağılımı
ax1 = axes[0]
color_map = {
'positive': '#2ecc71',
'negative': '#e74c3c',
'neutral': '#95a5a6'
}
colors = [color_map.get(x, '#95a5a6') for x in sentiment_counts.index]
ax1.pie(sentiment_counts.values, labels=sentiment_counts.index, autopct='%1.1f%%',
colors=colors, startangle=90, textprops={'fontsize': 12, 'fontweight': 'bold'})
ax1.set_title('Genel Sentiment Dağılımı', fontweight='bold', fontsize=14)
# Trend grafiği
ax2 = axes[1]
if len(trending) > 0:
terms, counts = zip(*trending)
ax2.barh(terms, counts, color='#3498db', alpha=0.7)
ax2.set_xlabel('Frekans', fontweight='bold', fontsize=11)
ax2.set_title('En Popüler 5 Konu', fontweight='bold', fontsize=14)
ax2.invert_yaxis()
ax2.grid(axis='x', alpha=0.3)
plt.tight_layout()
# Özet tablo
summary_table = pd.DataFrame({
'Metric': ['Toplam Haber', 'Pozitif', 'Negatif', 'Nötr', 'Genel Durum'],
'Value': [total_news, positive_count, negative_count, neutral_count, overall_sentiment]
})
return summary_text, fig, summary_table
except Exception as e:
return f"❌ Özet oluşturma hatası: {str(e)}", None, None
def search_similar_news(query, top_k=5):
"""Semantik arama"""
global df, index, model
if df is None or index is None:
return "⚠️ Önce haberleri toplamalısınız!", None
try:
q_embedding = model.encode([query])
distances, indices = index.search(q_embedding.astype('float32'), k=min(top_k, len(df)))
results = []
for idx, dist in zip(indices[0], distances[0]):
news = df.iloc[idx]
results.append({
"Başlık": news['title'],
"Sentiment": news['sentiment_label'],
"Skor": f"{news['sentiment_score']:.3f}",
"Link": news['link']
})
results_df = pd.DataFrame(results)
message = f"🔎 '{query}' için {len(results)} sonuç bulundu"
return message, results_df
except Exception as e:
return f"❌ Hata: {str(e)}", None
def analyze_coin_sentiment(coin_name):
"""Coin bazlı sentiment analizi"""
global df
if df is None:
return "⚠️ Önce haberleri toplamalısınız!", None, None
filtered = df[df["title"].str.contains(coin_name, case=False, na=False)]
if len(filtered) == 0:
return f"⚠️ '{coin_name}' hakkında haber bulunamadı.", None, None
# Sentiment dağılımı
sentiment_dist = filtered["sentiment_label"].value_counts()
# Grafik oluştur
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 4))
# Renk haritası (FinBERT için)
color_map = {
'positive': '#2ecc71',
'negative': '#e74c3c',
'neutral': '#95a5a6'
}
colors = [color_map.get(x, '#95a5a6') for x in sentiment_dist.index]
# Bar chart
ax1.bar(sentiment_dist.index, sentiment_dist.values, color=colors, alpha=0.7)
ax1.set_title(f'{coin_name} - Sentiment Dağılımı (FinBERT)', fontweight='bold')
ax1.set_ylabel('Haber Sayısı')
ax1.grid(axis='y', alpha=0.3)
# Pie chart
ax2.pie(sentiment_dist.values, labels=sentiment_dist.index, autopct='%1.1f%%',
colors=colors, startangle=90)
ax2.set_title(f'{coin_name} - Yüzdelik Dağılım', fontweight='bold')
plt.tight_layout()
# Metin rapor
total = len(filtered)
report = f"""
📊 **{coin_name.upper()} ANALİZ RAPORU (FinBERT)**
📰 Toplam Haber: {total}
💭 **Sentiment Dağılımı:**
"""
for label, count in sentiment_dist.items():
percentage = (count / total) * 100
report += f"\n• {label.capitalize()}: {count} haber (%{percentage:.1f})"
# En pozitif haberler
top_positive = filtered.nlargest(3, "sentiment_score")[["title", "sentiment_score"]]
report += "\n\n⭐ **En Pozitif 3 Haber:**\n"
for idx, row in top_positive.iterrows():
report += f"\n• {row['title'][:80]}...\n Skor: {row['sentiment_score']:.3f}"
# Tablo
table_data = filtered[["title", "sentiment_label", "sentiment_score", "link"]].head(10)
return report, fig, table_data
def get_trending_topics():
"""Trend analizi"""
global df
if df is None:
return "⚠️ Önce haberleri toplamalısınız!", None, None
all_titles = " ".join(df["title"].tolist())
potential_coins = re.findall(r'\b[A-Z][a-zA-Z]+\b', all_titles)
stop_words = {
"Coin", "Price", "Market", "News", "Today", "Crypto", "Token",
"The", "This", "That", "What", "When", "Where", "How", "Why",
"And", "But", "For", "From", "With", "About", "After", "Before"
}
filtered_tokens = [t for t in potential_coins if t not in stop_words]
trending = Counter(filtered_tokens).most_common(10)
# Grafik
fig, ax = plt.subplots(figsize=(10, 6))
terms, counts = zip(*trending)
ax.barh(terms, counts, color='#3498db', alpha=0.7)
ax.set_xlabel('Frekans', fontweight='bold')
ax.set_title('En Çok Bahsedilen 10 Terim', fontweight='bold', fontsize=14)
ax.invert_yaxis()
ax.grid(axis='x', alpha=0.3)
plt.tight_layout()
# Tablo
trend_df = pd.DataFrame(trending, columns=["Terim", "Frekans"])
message = f"🔥 En popüler terim: **{trending[0][0]}** ({trending[0][1]} kez bahsedildi)"
return message, fig, trend_df
def create_overview_chart():
"""Genel sentiment grafiği"""
global df
if df is None:
return "⚠️ Önce haberleri toplamalısınız!"
fig, axes = plt.subplots(2, 2, figsize=(12, 10))
fig.suptitle('Kripto Haber Analizi - Genel Bakış (FinBERT)', fontsize=16, fontweight='bold')
# Renk haritası
color_map = {
'positive': '#2ecc71',
'negative': '#e74c3c',
'neutral': '#95a5a6'
}
# 1. Genel sentiment
ax1 = axes[0, 0]
sentiment_counts = df["sentiment_label"].value_counts()
colors = [color_map.get(x, '#95a5a6') for x in sentiment_counts.index]
ax1.bar(sentiment_counts.index, sentiment_counts.values, color=colors, alpha=0.7)
ax1.set_title('Tüm Haberlerde Sentiment', fontweight='bold')
ax1.set_ylabel('Haber Sayısı')
ax1.grid(axis='y', alpha=0.3)
# 2. Skor dağılımı
ax2 = axes[0, 1]
positive_scores = df[df["sentiment_label"] == "positive"]["sentiment_score"]
negative_scores = df[df["sentiment_label"] == "negative"]["sentiment_score"]
neutral_scores = df[df["sentiment_label"] == "neutral"]["sentiment_score"]
ax2.hist([positive_scores, negative_scores, neutral_scores], bins=15,
label=['Positive', 'Negative', 'Neutral'],
color=['#2ecc71', '#e74c3c', '#95a5a6'], alpha=0.6)
ax2.set_title('Sentiment Skor Dağılımı', fontweight='bold')
ax2.set_xlabel('Güven Skoru')
ax2.legend()
ax2.grid(axis='y', alpha=0.3)
# 3. Kaynak dağılımı
ax3 = axes[1, 0]
sources = df["link"].apply(lambda x: x.split('/')[2] if '/' in x else 'Unknown')
source_counts = sources.value_counts().head(5)
ax3.barh(source_counts.index, source_counts.values, color='#9b59b6', alpha=0.7)
ax3.set_title('En Çok Haber Kaynağı', fontweight='bold')
ax3.set_xlabel('Haber Sayısı')
ax3.invert_yaxis()
# 4. Pie chart
ax4 = axes[1, 1]
ax4.pie(sentiment_counts.values, labels=sentiment_counts.index, autopct='%1.1f%%',
colors=colors, startangle=90)
ax4.set_title('Sentiment Yüzdesi', fontweight='bold')
plt.tight_layout()
return fig
# Gradio arayüzü
with gr.Blocks(theme=gr.themes.Soft(), title="Kripto Haber Analiz Platformu - FinBERT") as app:
gr.Markdown("""
# 🚀 Kripto Haber Analiz Platformu (FinBERT)
Bu uygulama kripto para haberlerini toplar ve **FinBERT** modeli ile finansal sentiment analizi yapar.
### 📋 Kullanım Adımları:
1. **"FinBERT Modelini Başlat"** butonuna tıklayın
2. **"Haberleri Topla ve Analiz Et"** butonuna tıklayın
3. **"Genel Özet"** sekmesinden haberlerin özetini görün
4. Diğer sekmeleri kullanarak detaylı analiz yapın
### 🎯 FinBERT Nedir?
FinBERT, finansal metinler üzerinde eğitilmiş özel bir BERT modelidir.
Kripto ve finans haberlerinde daha doğru sentiment analizi sağlar.
""")
with gr.Tab("🏠 Başlangıç"):
gr.Markdown("### Sistemi Başlatın")
init_btn = gr.Button("🔧 FinBERT Modelini Başlat (İlk adım)", variant="primary", size="lg")
init_output = gr.Textbox(label="Durum", lines=2)
gr.Markdown("---")
fetch_btn = gr.Button("📰 Haberleri Topla ve Analiz Et (İkinci adım)", variant="primary", size="lg")
fetch_output = gr.Textbox(label="Toplama Durumu", lines=10)
fetch_table = gr.Dataframe(label="İlk 10 Haber")
init_btn.click(initialize_models, outputs=init_output)
fetch_btn.click(fetch_news, outputs=[fetch_output, fetch_table])
with gr.Tab("📰 Genel Özet"):
gr.Markdown("### Tüm Haberlerin Genel Özeti")
gr.Markdown("Toplanan tüm haberlerin sentiment analizi, en önemli haberler ve trend konuları.")
summary_btn = gr.Button("📰 Genel Özet Oluştur", variant="primary", size="lg")
summary_text = gr.Markdown(label="Özet Rapor")
summary_chart = gr.Plot(label="Özet Grafikler")
summary_table = gr.Dataframe(label="İstatistikler")
summary_btn.click(generate_news_summary, outputs=[summary_text, summary_chart, summary_table])
with gr.Tab("🔍 Haber Arama"):
gr.Markdown("### Semantik Arama ile Benzer Haberleri Bulun")
with gr.Row():
search_input = gr.Textbox(label="Arama Sorgusu", placeholder="Örn: Bitcoin, Ethereum, NFT...")
search_k = gr.Slider(minimum=1, maximum=10, value=5, step=1, label="Sonuç Sayısı")
search_btn = gr.Button("🔎 Ara", variant="primary")
search_status = gr.Textbox(label="Durum")
search_results = gr.Dataframe(label="Sonuçlar")
search_btn.click(search_similar_news, inputs=[search_input, search_k],
outputs=[search_status, search_results])
with gr.Tab("📊 Coin Analizi"):
gr.Markdown("### Belirli Bir Coin için Detaylı Sentiment Analizi (FinBERT)")
coin_input = gr.Textbox(label="Coin Adı", placeholder="Örn: Bitcoin, Ethereum, Solana...")
analyze_btn = gr.Button("📊 Analiz Et", variant="primary")
coin_report = gr.Markdown(label="Rapor")
coin_chart = gr.Plot(label="Grafikler")
coin_table = gr.Dataframe(label="Detaylı Haberler")
analyze_btn.click(analyze_coin_sentiment, inputs=coin_input,
outputs=[coin_report, coin_chart, coin_table])
with gr.Tab("🔥 Trend Analizi"):
gr.Markdown("### En Çok Bahsedilen Konular")
trend_btn = gr.Button("🔥 Trendleri Göster", variant="primary")
trend_status = gr.Markdown(label="Özet")
trend_chart = gr.Plot(label="Trend Grafiği")
trend_table = gr.Dataframe(label="Trend Tablosu")
trend_btn.click(get_trending_topics, outputs=[trend_status, trend_chart, trend_table])
with gr.Tab("📈 Genel Bakış"):
gr.Markdown("### Tüm Verilerin Genel Görünümü")
overview_btn = gr.Button("📈 Genel İstatistikleri Göster", variant="primary")
overview_chart = gr.Plot(label="Genel Grafikler")
overview_btn.click(create_overview_chart, outputs=overview_chart)
gr.Markdown("""
---
### 💡 İpuçları:
- FinBERT ilk çalıştırmada yüklendiği için biraz zaman alabilir
- Haberler RSS feedlerinden gerçek zamanlı olarak çekilir
- **Genel Özet** sekmesi tüm haberlerin analizini sunar
- FinBERT finansal metinler için özelleştirilmiştir
- FAISS kullanarak semantik benzerlik hesaplanır
### 🛠️ Kullanılan Teknolojiler:
- **FinBERT (ProsusAI/finbert)**: Finansal sentiment analizi
- **Sentence Transformers**: Semantik embedding
- **FAISS**: Hızlı benzerlik araması
- **Gradio**: Kullanıcı arayüzü
### 📊 FinBERT Çıktıları:
- **Positive**: Pozitif finansal sentiment
- **Negative**: Negatif finansal sentiment
- **Neutral**: Nötr finansal sentiment
""")
# Uygulamayı başlat
if __name__ == "__main__":
app.launch()