File size: 6,322 Bytes
384f5e4 bfd4ab7 7c4d402 febef4d 2c8cb06 80a6db2 c120109 2c8cb06 80a6db2 2c8cb06 82d33ff 2c8cb06 5bcb8da 717fe8c 04a8794 7c4d402 717fe8c 2c8cb06 4f7bc9c 531e3fa 0a4103c bfd4ab7 0a4103c 82d33ff 0a4103c 82d33ff 0a4103c 717fe8c 0a4103c 2c8cb06 0a4103c 8b3a49e 0a4103c 2c8cb06 c120109 14ae745 c120109 2c8cb06 c120109 ebf358a d84d5f9 ebf358a d10e98d 7c4d402 2c8cb06 7c4d402 2c8cb06 7c4d402 2c8cb06 ebf358a 04a8794 d10e98d 242b668 5a80960 2c8cb06 c120109 5bcb8da 2c8cb06 82d33ff 2c8cb06 5bcb8da c120109 82d33ff 2c8cb06 c120109 2c8cb06 c120109 82d33ff 2c8cb06 5bcb8da 82d33ff 4f7bc9c c120109 4f7bc9c c120109 82d33ff |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 |
import streamlit as st
import requests
import openai
import os
from datasets import load_dataset
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import SGDClassifier # Incremental Learning์ ์ ํฉํ ๋ชจ๋ธ
from sklearn.metrics import classification_report, accuracy_score
import joblib
import matplotlib.pyplot as plt
import seaborn as sns
from pathlib import Path
# Streamlit ํ์ด์ง ์ค์
st.set_page_config(page_title="์ ์น์ ์ฑํฅ ๋ถ์ ๋ฐ ๋ฐ๋ ๊ด์ ์์ฑ", page_icon="๐ฐ", layout="wide")
# OpenAI API ํค ์ค์
openai.api_key = os.getenv("OPENAI_API_KEY")
# ํ๊น
ํ์ด์ค ๋ฐ์ดํฐ์
๋ก๋
@st.cache_data
def load_huggingface_data():
dataset = load_dataset("jacobvs/PoliticalTweets")
return dataset
# ๋ค์ด๋ฒ ๋ด์ค API๋ฅผ ํตํด ๋ด์ค ๋ฐ์ดํฐ ๊ฐ์ ธ์ค๊ธฐ
def fetch_naver_news(query, display=15):
client_id = "I_8koTJh3R5l4wLurQbG" # ๋ค์ด๋ฒ ๊ฐ๋ฐ์ ์ผํฐ์์ ๋ฐ๊ธ๋ฐ์ Client ID
client_secret = "W5oWYlAgur" # ๋ค์ด๋ฒ ๊ฐ๋ฐ์ ์ผํฐ์์ ๋ฐ๊ธ๋ฐ์ Client Secret
url = "https://openapi.naver.com/v1/search/news.json"
headers = {
"X-Naver-Client-Id": client_id,
"X-Naver-Client-Secret": client_secret,
}
params = {
"query": query,
"display": display, # ๋ด์ค 15๊ฐ ๊ฐ์ ธ์ค๊ธฐ
"start": 1,
"sort": "date", # ์ต์ ์์ผ๋ก ์ ๋ ฌ
}
response = requests.get(url, headers=headers, params=params)
if response.status_code == 200:
return response.json()['items']
else:
st.error("๋ด์ค ๋ฐ์ดํฐ๋ฅผ ๋ถ๋ฌ์ค๋ ๋ฐ ์คํจํ์ต๋๋ค.")
return []
# ํ๊น
ํ์ด์ค ๋ฐ์ดํฐ์ ๋ค์ด๋ฒ ๋ด์ค ๋ฐ์ดํฐ๋ฅผ ๊ฒฐํฉ
def combine_datasets(huggingface_data, naver_data):
additional_texts = [item['title'] + ". " + item['description'] for item in naver_data]
additional_labels = ["NEUTRAL"] * len(additional_texts) # ๊ธฐ๋ณธ์ ์ผ๋ก ์ค๋ฆฝ์ผ๋ก ๋ผ๋ฒจ๋ง
hf_texts = huggingface_data['train']['text']
hf_labels = huggingface_data['train']['party']
return hf_texts + additional_texts, hf_labels + additional_labels
# ๋ชจ๋ธ ์ด๊ธฐํ
def initialize_model():
if os.path.exists("incremental_model.pkl") and os.path.exists("tfidf_vectorizer.pkl"):
model = joblib.load("incremental_model.pkl")
vectorizer = joblib.load("tfidf_vectorizer.pkl")
else:
# ์ด๊ธฐ ๋ชจ๋ธ ๋ฐ ๋ฒกํฐ๋ผ์ด์ ์ค์
model = SGDClassifier(loss='log_loss', max_iter=5, tol=None) # 'log_loss'๋ก ์์
vectorizer = TfidfVectorizer(max_features=1000, stop_words="english")
return model, vectorizer
# ์ถ๊ฐ ํ์ต ์ํ
def incremental_training(texts, labels, model, vectorizer):
X = vectorizer.fit_transform(texts)
y = [0 if label == "Democrat" else 1 if label == "Republican" else 2 for label in labels]
model.partial_fit(X, y, classes=[0, 1, 2]) # Incremental Learning
# ๋ชจ๋ธ ๋ฐ ๋ฒกํฐ๋ผ์ด์ ์ ์ฅ
joblib.dump(model, "incremental_model.pkl")
joblib.dump(vectorizer, "tfidf_vectorizer.pkl")
return model, vectorizer
# GPT-4๋ฅผ ์ด์ฉํด ๋ฐ๋ ๊ด์ ๊ธฐ์ฌ ์์ฑ
def generate_article_gpt4(prompt):
try:
response = openai.ChatCompletion.create(
model="gpt-4",
messages=[
{"role": "system", "content": "You are a helpful assistant that generates articles."},
{"role": "user", "content": prompt}
],
max_tokens=1024,
temperature=0.7
)
return response['choices'][0]['message']['content']
except Exception as e:
return f"Error generating text: {e}"
# Streamlit ์ ํ๋ฆฌ์ผ์ด์
์์
st.title("๐ฐ ์ ์น์ ์ฑํฅ ๋ถ์ ๋ฐ ๋ฐ๋ ๊ด์ ์์ฑ ๋๊ตฌ")
st.markdown("๋ค์ด๋ฒ ๋ด์ค์ ํ๊น
ํ์ด์ค ๋ฐ์ดํฐ๋ฅผ ํ์ฉํ์ฌ ๋ด์ค ์ฑํฅ์ ๋ถ์ํ๊ณ , ๋ฐ๋ ๊ด์ ์ ์์ฑํฉ๋๋ค.")
# ๋ฐ์ดํฐ ๋ก๋
huggingface_data = load_huggingface_data()
query = st.text_input("๋ค์ด๋ฒ ๋ด์ค์์ ๊ฒ์ํ ํค์๋๋ฅผ ์
๋ ฅํ์ธ์", value="์ ์น")
# ๋ฐ์ดํฐ ๊ฒฐํฉ ๋ฐ ํ์ต
if st.button("๋ฐ์ดํฐ ๊ฒฐํฉ ๋ฐ ํ์ต"):
texts, labels = combine_datasets(huggingface_data, fetch_naver_news(query))
model, vectorizer = initialize_model()
model, vectorizer = incremental_training(texts, labels, model, vectorizer)
# ์ฑ๋ฅ ํ๊ฐ
X_test = vectorizer.transform(texts)
y_test = [0 if label == "Democrat" else 1 if label == "Republican" else 2 for label in labels]
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
st.write(f"๋ชจ๋ธ ์ ํ๋: {accuracy:.2f}")
st.text("๋ถ๋ฅ ๋ฆฌํฌํธ:")
st.text(classification_report(y_test, y_pred, target_names=["Democrat", "Republican", "NEUTRAL"]))
st.success("๋ชจ๋ธ์ด ์๋ก์ด ๋ฐ์ดํฐ๋ก ์ถ๊ฐ ํ์ต๋์์ต๋๋ค.")
# ๋ด์ค ๋ฐ์ดํฐ ๋ถ์ ๋ฐ ๋ฐ๋ ๊ด์ ๊ธฐ์ฌ ์์ฑ
if st.button("๋ด์ค ์ฑํฅ ๋ถ์"):
model, vectorizer = initialize_model()
news_items = fetch_naver_news(query, display=15) # ๋ด์ค 15๊ฐ ๊ฐ์ ธ์ค๊ธฐ
if news_items:
st.subheader("๋ด์ค ์ฑํฅ ๋ถ์ ๊ฒฐ๊ณผ")
for item in news_items:
title = item["title"]
description = item["description"]
link = item["link"]
combined_text = f"{title}. {description}"
# ํ
์คํธ ๋ฒกํฐํ ๋ฐ ์์ธก
vectorized_text = vectorizer.transform([combined_text])
prediction = model.predict(vectorized_text)[0]
sentiment = ["Democrat", "Republican", "NEUTRAL"][prediction]
# ๋ฐ๋ ๊ด์ ์์ฑ
opposite_perspective = "๋ณด์์ " if sentiment == "Democrat" else "์ง๋ณด์ " if sentiment == "Republican" else "์ค๋ฆฝ์ "
prompt = f"๋ค์ ๊ธฐ์ฌ์ ๋ฐ๋ ๊ด์ ์ผ๋ก ๊ธฐ์ฌ๋ฅผ ์์ฑํ์ธ์:\n\n{combined_text}\n\n๋ฐ๋ ๊ด์ : {opposite_perspective}"
opposite_article = generate_article_gpt4(prompt)
st.write(f"**์ ๋ชฉ:** {title}")
st.write(f"**๊ธฐ์ฌ ๋ด์ฉ:** {description}")
st.write(f"**์ฑํฅ:** {sentiment}")
st.write(f"**๋ฐ๋ ๊ด์ ๊ธฐ์ฌ:** {opposite_article}")
st.write(f"**๋งํฌ:** [๊ธฐ์ฌ ๋งํฌ]({link})")
st.markdown("---")
|