820nam commited on
Commit
5bcb8da
ยท
verified ยท
1 Parent(s): 2c8cb06

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +53 -24
app.py CHANGED
@@ -5,14 +5,14 @@ import os
5
  from datasets import load_dataset
6
  from sklearn.feature_extraction.text import TfidfVectorizer
7
  from sklearn.linear_model import LogisticRegression
8
- from sklearn.model_selection import train_test_split, cross_val_score
9
  from sklearn.metrics import classification_report, accuracy_score
10
  import joblib
11
  import matplotlib.pyplot as plt
12
  import seaborn as sns
13
 
14
  # Streamlit ํŽ˜์ด์ง€ ์„ค์ •
15
- st.set_page_config(page_title="์ •์น˜์  ์„ฑํ–ฅ ๋ถ„์„", page_icon="๐Ÿ“ฐ", layout="wide")
16
 
17
  # OpenAI API ํ‚ค ์„ค์ •
18
  openai.api_key = os.getenv("OPENAI_API_KEY")
@@ -85,17 +85,51 @@ def generate_article_gpt4(prompt):
85
  except Exception as e:
86
  return f"Error generating text: {e}"
87
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
88
  # Streamlit ์• ํ”Œ๋ฆฌ์ผ€์ด์…˜ ์‹œ์ž‘
89
- st.title("๐Ÿ“ฐ ์ •์น˜์  ์„ฑํ–ฅ ๋ถ„์„ ๋ฐ ๋‰ด์Šค ๋น„๊ต ๋„๊ตฌ")
90
- st.markdown("ํ—ˆ๊น…ํŽ˜์ด์Šค์˜ `PoliticalTweets` ๋ฐ์ดํ„ฐ์…‹๊ณผ ๋„ค์ด๋ฒ„ ๋‰ด์Šค API๋ฅผ ํ™œ์šฉํ•˜์—ฌ ํ…์ŠคํŠธ ์„ฑํ–ฅ์„ ๋ถ„์„ํ•ฉ๋‹ˆ๋‹ค.")
91
 
92
  # ๋ฐ์ดํ„ฐ ๋กœ๋“œ
93
  huggingface_data = load_huggingface_data()
94
  query = st.text_input("๋„ค์ด๋ฒ„ ๋‰ด์Šค์—์„œ ๊ฒ€์ƒ‰ํ•  ํ‚ค์›Œ๋“œ๋ฅผ ์ž…๋ ฅํ•˜์„ธ์š”", value="์ •์น˜")
95
- naver_data = fetch_naver_news(query)
96
 
97
  if st.button("๋ฐ์ดํ„ฐ ๊ฒฐํ•ฉ ๋ฐ ํ•™์Šต"):
98
- texts, labels = combine_datasets(huggingface_data, naver_data)
99
  label_mapping = {"Democrat": 0, "Republican": 1, "NEUTRAL": 2}
100
  y = [label_mapping[label] for label in labels]
101
  model, vectorizer, X_test, y_test = train_model(texts, y)
@@ -107,23 +141,18 @@ if st.button("๋ฐ์ดํ„ฐ ๊ฒฐํ•ฉ ๋ฐ ํ•™์Šต"):
107
  st.text("๋ถ„๋ฅ˜ ๋ฆฌํฌํŠธ:")
108
  st.text(classification_report(y_test, y_pred, target_names=list(label_mapping.keys())))
109
 
110
- # ์‚ฌ์šฉ์ž ์ž…๋ ฅ ๋ฐ ์˜ˆ์ธก
111
- st.subheader("ํŠธ์œ— ๋˜๋Š” ๋‰ด์Šค ์„ฑํ–ฅ ์˜ˆ์ธก")
112
- user_input = st.text_area("๋ถ„์„ํ•  ํ…์ŠคํŠธ๋ฅผ ์ž…๋ ฅํ•˜์„ธ์š”", placeholder="์˜ˆ: The government should invest more in public health.")
113
-
114
- if st.button("์„ฑํ–ฅ ๋ถ„์„"):
115
  vectorizer = joblib.load("tfidf_vectorizer.pkl")
116
  model = joblib.load("political_tweets_model.pkl")
117
- user_tfidf = vectorizer.transform([user_input])
118
- prediction = model.predict(user_tfidf)[0]
119
- prediction_label = list(label_mapping.keys())[prediction]
120
- st.write(f"์˜ˆ์ธก๋œ ์„ฑํ–ฅ: {prediction_label}")
121
-
122
- # ๋‰ด์Šค ๋ฐ์ดํ„ฐ ์‹œ๊ฐํ™”
123
- if naver_data:
124
- st.subheader("๋„ค์ด๋ฒ„ ๋‰ด์Šค ๋ฐ์ดํ„ฐ")
125
- for item in naver_data:
126
- st.write(f"์ œ๋ชฉ: {item['title']}")
127
- st.write(f"๋‚ด์šฉ: {item['description']}")
128
- st.write(f"[๊ธฐ์‚ฌ ๋งํฌ]({item['link']})")
129
- st.markdown("---")
 
5
  from datasets import load_dataset
6
  from sklearn.feature_extraction.text import TfidfVectorizer
7
  from sklearn.linear_model import LogisticRegression
8
+ from sklearn.model_selection import train_test_split
9
  from sklearn.metrics import classification_report, accuracy_score
10
  import joblib
11
  import matplotlib.pyplot as plt
12
  import seaborn as sns
13
 
14
  # Streamlit ํŽ˜์ด์ง€ ์„ค์ •
15
+ st.set_page_config(page_title="์ •์น˜์  ์„ฑํ–ฅ ๋ถ„์„ ๋ฐ ๋ฐ˜๋Œ€ ๊ด€์  ์ƒ์„ฑ", page_icon="๐Ÿ“ฐ", layout="wide")
16
 
17
  # OpenAI API ํ‚ค ์„ค์ •
18
  openai.api_key = os.getenv("OPENAI_API_KEY")
 
85
  except Exception as e:
86
  return f"Error generating text: {e}"
87
 
88
+ # ์ •์น˜ ์„ฑํ–ฅ๋ณ„ ๋ถ„์„ ๋ฐ ๋ฐ˜๋Œ€ ๊ด€์  ๊ธฐ์‚ฌ ์ƒ์„ฑ
89
+ def analyze_and_generate_articles(query, model, vectorizer):
90
+ news_items = fetch_naver_news(query)
91
+ results = {"์ง„๋ณด": [], "๋ณด์ˆ˜": [], "์ค‘๋ฆฝ": []}
92
+
93
+ if not news_items:
94
+ st.error("๋‰ด์Šค ๋ฐ์ดํ„ฐ๊ฐ€ ์—†์Šต๋‹ˆ๋‹ค.")
95
+ return results
96
+
97
+ for item in news_items:
98
+ title = item["title"]
99
+ description = item["description"]
100
+ link = item["link"]
101
+ combined_text = f"{title}. {description}"
102
+
103
+ # ํ…์ŠคํŠธ ๋ฒกํ„ฐํ™” ๋ฐ ์˜ˆ์ธก
104
+ vectorized_text = vectorizer.transform([combined_text])
105
+ prediction = model.predict(vectorized_text)[0]
106
+ sentiment = "์ง„๋ณด" if prediction == 0 else "๋ณด์ˆ˜" if prediction == 1 else "์ค‘๋ฆฝ"
107
+
108
+ # ๋ฐ˜๋Œ€ ๊ด€์  ์ƒ์„ฑ
109
+ opposite_perspective = "๋ณด์ˆ˜์ " if sentiment == "์ง„๋ณด" else "์ง„๋ณด์ "
110
+ prompt = f"๋‹ค์Œ ๊ธฐ์‚ฌ์˜ ๋ฐ˜๋Œ€ ๊ด€์ ์œผ๋กœ ๊ธฐ์‚ฌ๋ฅผ ์ž‘์„ฑํ•˜์„ธ์š”:\n\n{combined_text}\n\n๋ฐ˜๋Œ€ ๊ด€์ : {opposite_perspective}"
111
+ opposite_article = generate_article_gpt4(prompt)
112
+
113
+ # ๊ฒฐ๊ณผ ์ €์žฅ
114
+ results[sentiment].append({
115
+ "์ œ๋ชฉ": title,
116
+ "๊ธฐ์‚ฌ": description,
117
+ "์„ฑํ–ฅ": sentiment,
118
+ "๋ฐ˜๋Œ€ ๊ด€์  ๊ธฐ์‚ฌ": opposite_article,
119
+ "๋งํฌ": link
120
+ })
121
+ return results
122
+
123
  # Streamlit ์• ํ”Œ๋ฆฌ์ผ€์ด์…˜ ์‹œ์ž‘
124
+ st.title("๐Ÿ“ฐ ์ •์น˜์  ์„ฑํ–ฅ ๋ถ„์„ ๋ฐ ๋ฐ˜๋Œ€ ๊ด€์  ๊ธฐ์‚ฌ ์ƒ์„ฑ ๋„๊ตฌ")
125
+ st.markdown("๋„ค์ด๋ฒ„ ๋‰ด์Šค์™€ ํ—ˆ๊น…ํŽ˜์ด์Šค ๋ฐ์ดํ„ฐ๋ฅผ ํ™œ์šฉํ•˜์—ฌ ๋‰ด์Šค ์„ฑํ–ฅ์„ ๋ถ„์„ํ•˜๊ณ , ๋ฐ˜๋Œ€ ๊ด€์ ์„ ์ƒ์„ฑํ•ฉ๋‹ˆ๋‹ค.")
126
 
127
  # ๋ฐ์ดํ„ฐ ๋กœ๋“œ
128
  huggingface_data = load_huggingface_data()
129
  query = st.text_input("๋„ค์ด๋ฒ„ ๋‰ด์Šค์—์„œ ๊ฒ€์ƒ‰ํ•  ํ‚ค์›Œ๋“œ๋ฅผ ์ž…๋ ฅํ•˜์„ธ์š”", value="์ •์น˜")
 
130
 
131
  if st.button("๋ฐ์ดํ„ฐ ๊ฒฐํ•ฉ ๋ฐ ํ•™์Šต"):
132
+ texts, labels = combine_datasets(huggingface_data, fetch_naver_news(query))
133
  label_mapping = {"Democrat": 0, "Republican": 1, "NEUTRAL": 2}
134
  y = [label_mapping[label] for label in labels]
135
  model, vectorizer, X_test, y_test = train_model(texts, y)
 
141
  st.text("๋ถ„๋ฅ˜ ๋ฆฌํฌํŠธ:")
142
  st.text(classification_report(y_test, y_pred, target_names=list(label_mapping.keys())))
143
 
144
+ # ๋‰ด์Šค ๋ฐ์ดํ„ฐ ๋ถ„์„ ๋ฐ ๋ฐ˜๋Œ€ ๊ด€์  ๊ธฐ์‚ฌ ์ƒ์„ฑ
145
+ if st.button("๋‰ด์Šค ์„ฑํ–ฅ ๋ถ„์„"):
 
 
 
146
  vectorizer = joblib.load("tfidf_vectorizer.pkl")
147
  model = joblib.load("political_tweets_model.pkl")
148
+ results = analyze_and_generate_articles(query, model, vectorizer)
149
+
150
+ st.subheader("๋ถ„์„ ๊ฒฐ๊ณผ")
151
+ for sentiment, articles in results.items():
152
+ st.write(f"### {sentiment} ๊ธฐ์‚ฌ ({len(articles)}๊ฐœ)")
153
+ for article in articles:
154
+ st.write(f"**์ œ๋ชฉ:** {article['์ œ๋ชฉ']}")
155
+ st.write(f"**๊ธฐ์‚ฌ ๋‚ด์šฉ:** {article['๊ธฐ์‚ฌ']}")
156
+ st.write(f"**๋ฐ˜๋Œ€ ๊ด€์  ๊ธฐ์‚ฌ:** {article['๋ฐ˜๋Œ€ ๊ด€์  ๊ธฐ์‚ฌ']}")
157
+ st.write(f"**๋งํฌ:** [๊ธฐ์‚ฌ ๋งํฌ]({article['๋งํฌ']})")
158
+ st.markdown("---")