Update app.py
Browse files
app.py
CHANGED
|
@@ -5,14 +5,14 @@ import os
|
|
| 5 |
from datasets import load_dataset
|
| 6 |
from sklearn.feature_extraction.text import TfidfVectorizer
|
| 7 |
from sklearn.linear_model import LogisticRegression
|
| 8 |
-
from sklearn.model_selection import train_test_split
|
| 9 |
from sklearn.metrics import classification_report, accuracy_score
|
| 10 |
import joblib
|
| 11 |
import matplotlib.pyplot as plt
|
| 12 |
import seaborn as sns
|
| 13 |
|
| 14 |
# Streamlit ํ์ด์ง ์ค์
|
| 15 |
-
st.set_page_config(page_title="์ ์น์ ์ฑํฅ ๋ถ์", page_icon="๐ฐ", layout="wide")
|
| 16 |
|
| 17 |
# OpenAI API ํค ์ค์
|
| 18 |
openai.api_key = os.getenv("OPENAI_API_KEY")
|
|
@@ -85,17 +85,51 @@ def generate_article_gpt4(prompt):
|
|
| 85 |
except Exception as e:
|
| 86 |
return f"Error generating text: {e}"
|
| 87 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 88 |
# Streamlit ์ ํ๋ฆฌ์ผ์ด์
์์
|
| 89 |
-
st.title("๐ฐ ์ ์น์ ์ฑํฅ ๋ถ์ ๋ฐ
|
| 90 |
-
st.markdown("ํ๊น
ํ์ด์ค
|
| 91 |
|
| 92 |
# ๋ฐ์ดํฐ ๋ก๋
|
| 93 |
huggingface_data = load_huggingface_data()
|
| 94 |
query = st.text_input("๋ค์ด๋ฒ ๋ด์ค์์ ๊ฒ์ํ ํค์๋๋ฅผ ์
๋ ฅํ์ธ์", value="์ ์น")
|
| 95 |
-
naver_data = fetch_naver_news(query)
|
| 96 |
|
| 97 |
if st.button("๋ฐ์ดํฐ ๊ฒฐํฉ ๋ฐ ํ์ต"):
|
| 98 |
-
texts, labels = combine_datasets(huggingface_data,
|
| 99 |
label_mapping = {"Democrat": 0, "Republican": 1, "NEUTRAL": 2}
|
| 100 |
y = [label_mapping[label] for label in labels]
|
| 101 |
model, vectorizer, X_test, y_test = train_model(texts, y)
|
|
@@ -107,23 +141,18 @@ if st.button("๋ฐ์ดํฐ ๊ฒฐํฉ ๋ฐ ํ์ต"):
|
|
| 107 |
st.text("๋ถ๋ฅ ๋ฆฌํฌํธ:")
|
| 108 |
st.text(classification_report(y_test, y_pred, target_names=list(label_mapping.keys())))
|
| 109 |
|
| 110 |
-
#
|
| 111 |
-
st.
|
| 112 |
-
user_input = st.text_area("๋ถ์ํ ํ
์คํธ๋ฅผ ์
๋ ฅํ์ธ์", placeholder="์: The government should invest more in public health.")
|
| 113 |
-
|
| 114 |
-
if st.button("์ฑํฅ ๋ถ์"):
|
| 115 |
vectorizer = joblib.load("tfidf_vectorizer.pkl")
|
| 116 |
model = joblib.load("political_tweets_model.pkl")
|
| 117 |
-
|
| 118 |
-
|
| 119 |
-
|
| 120 |
-
|
| 121 |
-
|
| 122 |
-
|
| 123 |
-
|
| 124 |
-
|
| 125 |
-
|
| 126 |
-
|
| 127 |
-
|
| 128 |
-
st.write(f"[๊ธฐ์ฌ ๋งํฌ]({item['link']})")
|
| 129 |
-
st.markdown("---")
|
|
|
|
| 5 |
from datasets import load_dataset
|
| 6 |
from sklearn.feature_extraction.text import TfidfVectorizer
|
| 7 |
from sklearn.linear_model import LogisticRegression
|
| 8 |
+
from sklearn.model_selection import train_test_split
|
| 9 |
from sklearn.metrics import classification_report, accuracy_score
|
| 10 |
import joblib
|
| 11 |
import matplotlib.pyplot as plt
|
| 12 |
import seaborn as sns
|
| 13 |
|
| 14 |
# Streamlit ํ์ด์ง ์ค์
|
| 15 |
+
st.set_page_config(page_title="์ ์น์ ์ฑํฅ ๋ถ์ ๋ฐ ๋ฐ๋ ๊ด์ ์์ฑ", page_icon="๐ฐ", layout="wide")
|
| 16 |
|
| 17 |
# OpenAI API ํค ์ค์
|
| 18 |
openai.api_key = os.getenv("OPENAI_API_KEY")
|
|
|
|
| 85 |
except Exception as e:
|
| 86 |
return f"Error generating text: {e}"
|
| 87 |
|
| 88 |
+
# ์ ์น ์ฑํฅ๋ณ ๋ถ์ ๋ฐ ๋ฐ๋ ๊ด์ ๊ธฐ์ฌ ์์ฑ
|
| 89 |
+
def analyze_and_generate_articles(query, model, vectorizer):
|
| 90 |
+
news_items = fetch_naver_news(query)
|
| 91 |
+
results = {"์ง๋ณด": [], "๋ณด์": [], "์ค๋ฆฝ": []}
|
| 92 |
+
|
| 93 |
+
if not news_items:
|
| 94 |
+
st.error("๋ด์ค ๋ฐ์ดํฐ๊ฐ ์์ต๋๋ค.")
|
| 95 |
+
return results
|
| 96 |
+
|
| 97 |
+
for item in news_items:
|
| 98 |
+
title = item["title"]
|
| 99 |
+
description = item["description"]
|
| 100 |
+
link = item["link"]
|
| 101 |
+
combined_text = f"{title}. {description}"
|
| 102 |
+
|
| 103 |
+
# ํ
์คํธ ๋ฒกํฐํ ๋ฐ ์์ธก
|
| 104 |
+
vectorized_text = vectorizer.transform([combined_text])
|
| 105 |
+
prediction = model.predict(vectorized_text)[0]
|
| 106 |
+
sentiment = "์ง๋ณด" if prediction == 0 else "๋ณด์" if prediction == 1 else "์ค๋ฆฝ"
|
| 107 |
+
|
| 108 |
+
# ๋ฐ๋ ๊ด์ ์์ฑ
|
| 109 |
+
opposite_perspective = "๋ณด์์ " if sentiment == "์ง๋ณด" else "์ง๋ณด์ "
|
| 110 |
+
prompt = f"๋ค์ ๊ธฐ์ฌ์ ๋ฐ๋ ๊ด์ ์ผ๋ก ๊ธฐ์ฌ๋ฅผ ์์ฑํ์ธ์:\n\n{combined_text}\n\n๋ฐ๋ ๊ด์ : {opposite_perspective}"
|
| 111 |
+
opposite_article = generate_article_gpt4(prompt)
|
| 112 |
+
|
| 113 |
+
# ๊ฒฐ๊ณผ ์ ์ฅ
|
| 114 |
+
results[sentiment].append({
|
| 115 |
+
"์ ๋ชฉ": title,
|
| 116 |
+
"๊ธฐ์ฌ": description,
|
| 117 |
+
"์ฑํฅ": sentiment,
|
| 118 |
+
"๋ฐ๋ ๊ด์ ๊ธฐ์ฌ": opposite_article,
|
| 119 |
+
"๋งํฌ": link
|
| 120 |
+
})
|
| 121 |
+
return results
|
| 122 |
+
|
| 123 |
# Streamlit ์ ํ๋ฆฌ์ผ์ด์
์์
|
| 124 |
+
st.title("๐ฐ ์ ์น์ ์ฑํฅ ๋ถ์ ๋ฐ ๋ฐ๋ ๊ด์ ๊ธฐ์ฌ ์์ฑ ๋๊ตฌ")
|
| 125 |
+
st.markdown("๋ค์ด๋ฒ ๋ด์ค์ ํ๊น
ํ์ด์ค ๋ฐ์ดํฐ๋ฅผ ํ์ฉํ์ฌ ๋ด์ค ์ฑํฅ์ ๋ถ์ํ๊ณ , ๋ฐ๋ ๊ด์ ์ ์์ฑํฉ๋๋ค.")
|
| 126 |
|
| 127 |
# ๋ฐ์ดํฐ ๋ก๋
|
| 128 |
huggingface_data = load_huggingface_data()
|
| 129 |
query = st.text_input("๋ค์ด๋ฒ ๋ด์ค์์ ๊ฒ์ํ ํค์๋๋ฅผ ์
๋ ฅํ์ธ์", value="์ ์น")
|
|
|
|
| 130 |
|
| 131 |
if st.button("๋ฐ์ดํฐ ๊ฒฐํฉ ๋ฐ ํ์ต"):
|
| 132 |
+
texts, labels = combine_datasets(huggingface_data, fetch_naver_news(query))
|
| 133 |
label_mapping = {"Democrat": 0, "Republican": 1, "NEUTRAL": 2}
|
| 134 |
y = [label_mapping[label] for label in labels]
|
| 135 |
model, vectorizer, X_test, y_test = train_model(texts, y)
|
|
|
|
| 141 |
st.text("๋ถ๋ฅ ๋ฆฌํฌํธ:")
|
| 142 |
st.text(classification_report(y_test, y_pred, target_names=list(label_mapping.keys())))
|
| 143 |
|
| 144 |
+
# ๋ด์ค ๋ฐ์ดํฐ ๋ถ์ ๋ฐ ๋ฐ๋ ๊ด์ ๊ธฐ์ฌ ์์ฑ
|
| 145 |
+
if st.button("๋ด์ค ์ฑํฅ ๋ถ์"):
|
|
|
|
|
|
|
|
|
|
| 146 |
vectorizer = joblib.load("tfidf_vectorizer.pkl")
|
| 147 |
model = joblib.load("political_tweets_model.pkl")
|
| 148 |
+
results = analyze_and_generate_articles(query, model, vectorizer)
|
| 149 |
+
|
| 150 |
+
st.subheader("๋ถ์ ๊ฒฐ๊ณผ")
|
| 151 |
+
for sentiment, articles in results.items():
|
| 152 |
+
st.write(f"### {sentiment} ๊ธฐ์ฌ ({len(articles)}๊ฐ)")
|
| 153 |
+
for article in articles:
|
| 154 |
+
st.write(f"**์ ๋ชฉ:** {article['์ ๋ชฉ']}")
|
| 155 |
+
st.write(f"**๊ธฐ์ฌ ๋ด์ฉ:** {article['๊ธฐ์ฌ']}")
|
| 156 |
+
st.write(f"**๋ฐ๋ ๊ด์ ๊ธฐ์ฌ:** {article['๋ฐ๋ ๊ด์ ๊ธฐ์ฌ']}")
|
| 157 |
+
st.write(f"**๋งํฌ:** [๊ธฐ์ฌ ๋งํฌ]({article['๋งํฌ']})")
|
| 158 |
+
st.markdown("---")
|
|
|
|
|
|