820nam commited on
Commit
04a8794
ยท
verified ยท
1 Parent(s): 1a9adeb

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +34 -53
app.py CHANGED
@@ -1,40 +1,18 @@
1
  import streamlit as st
2
  import requests
3
- import matplotlib.pyplot as plt
4
- import seaborn as sns
5
- from transformers import pipeline
6
  import openai
7
  import os
8
  from sklearn.feature_extraction.text import TfidfVectorizer
9
  from sklearn.linear_model import LogisticRegression
10
- from sklearn.model_selection import train_test_split
11
  from sklearn.metrics import accuracy_score
12
  import joblib
 
13
 
14
- # Streamlit ํŽ˜์ด์ง€ ์„ค์ •์„ ๊ฐ€์žฅ ๋จผ์ € ํ˜ธ์ถœ
15
- st.set_page_config(page_title="์ •์น˜์  ๊ด€์  ๋ถ„์„", page_icon="๐Ÿ“ฐ", layout="wide")
16
-
17
- # OpenAI API ํ‚ค ์„ค์ • (ํ™˜๊ฒฝ๋ณ€์ˆ˜์—์„œ ๊ฐ€์ ธ์˜ค๋Š” ๋ฐฉ๋ฒ•)
18
  openai.api_key = os.getenv("OPENAI_API_KEY")
19
 
20
- # ํ•œ๊ธ€ ํฐํŠธ ์„ค์ • (Streamlit์—์„œ ์ ์šฉํ•˜๊ธฐ ์œ„ํ•ด CSS ์ถ”๊ฐ€)
21
- st.markdown(
22
- """
23
- <style>
24
- body {
25
- font-family: 'Nanum Gothic', sans-serif;
26
- }
27
- </style>
28
- """,
29
- unsafe_allow_html=True
30
- )
31
-
32
- # matplotlib ํ•œ๊ธ€ ํฐํŠธ ์„ค์ •
33
- import matplotlib
34
- matplotlib.rcParams['font.family'] = 'NanumGothic' # ํ•œ๊ธ€ ํฐํŠธ๋ฅผ ์„ค์ •ํ•ฉ๋‹ˆ๋‹ค.
35
- matplotlib.rcParams['axes.unicode_minus'] = False # ๋งˆ์ด๋„ˆ์Šค ๊ธฐํ˜ธ ๊นจ์ง ๋ฐฉ์ง€
36
-
37
- # ๋„ค์ด๋ฒ„ ๋‰ด์Šค API๋ฅผ ํ†ตํ•ด ์‹ค์ œ ๋‰ด์Šค ๊ธฐ์‚ฌ ๊ฐ€์ ธ์˜ค๊ธฐ
38
  def fetch_naver_news(query, display=5):
39
  client_id = "I_8koTJh3R5l4wLurQbG" # ๋„ค์ด๋ฒ„ ๊ฐœ๋ฐœ์ž ์„ผํ„ฐ์—์„œ ๋ฐœ๊ธ‰๋ฐ›์€ Client ID
40
  client_secret = "W5oWYlAgur" # ๋„ค์ด๋ฒ„ ๊ฐœ๋ฐœ์ž ์„ผํ„ฐ์—์„œ ๋ฐœ๊ธ‰๋ฐ›์€ Client Secret
@@ -54,15 +32,14 @@ def fetch_naver_news(query, display=5):
54
  response = requests.get(url, headers=headers, params=params)
55
  if response.status_code == 200:
56
  news_data = response.json()
57
- return news_data['items'] # ๋‰ด์Šค ๊ธฐ์‚ฌ ๋ฆฌ์ŠคํŠธ ๋ฐ˜ํ™˜
58
  else:
59
  st.error("๋‰ด์Šค ๋ฐ์ดํ„ฐ๋ฅผ ๋ถˆ๋Ÿฌ์˜ค๋Š” ๋ฐ ์‹คํŒจํ–ˆ์Šต๋‹ˆ๋‹ค.")
60
  return []
61
 
62
- # ๋จธ์‹ ๋Ÿฌ๋‹ ๋ชจ๋ธ ๋กœ๋“œ ๋ฐ ํ•™์Šต
63
  def train_ml_model():
64
- # ์—ฌ๊ธฐ์„œ๋Š” ์ƒ˜ํ”Œ ๋ฐ์ดํ„ฐ๋ฅผ ์‚ฌ์šฉํ•˜์—ฌ ํ•™์Šต
65
- # ์‹ค์ œ ๋ฐ์ดํ„ฐ๋ฅผ ์ด์šฉํ•œ ํ•™์Šต ๊ณผ์ •์ด ํ•„์š”ํ•ฉ๋‹ˆ๋‹ค.
66
  data = [
67
  ("์ง„๋ณด์ ์ธ ์ •๋ถ€ ์ •์ฑ…์„ ๊ฐ•ํ™”ํ•ด์•ผ ํ•œ๋‹ค", "LEFT"),
68
  ("๋ณด์ˆ˜์ ์ธ ๊ฒฝ์ œ ์ •์ฑ…์ด ํ•„์š”ํ•˜๋‹ค", "RIGHT"),
@@ -79,26 +56,34 @@ def train_ml_model():
79
  X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
80
 
81
  # ๋กœ์ง€์Šคํ‹ฑ ํšŒ๊ท€ ๋ชจ๋ธ ํ•™์Šต
82
- model = LogisticRegression()
83
- model.fit(X_train, y_train)
 
 
 
 
 
 
 
 
 
84
 
85
  # ๋ชจ๋ธ ์„ฑ๋Šฅ ํ‰๊ฐ€
86
- y_pred = model.predict(X_test)
87
  accuracy = accuracy_score(y_test, y_pred)
88
  st.write(f"๋ชจ๋ธ ์ •ํ™•๋„: {accuracy:.2f}")
89
 
90
  # ๋ชจ๋ธ ์ €์žฅ
91
- joblib.dump(model, 'political_bias_model.pkl')
92
  joblib.dump(vectorizer, 'tfidf_vectorizer.pkl')
93
 
94
- return model, vectorizer
95
 
96
  # ๋กœ๋“œ๋œ ๋จธ์‹ ๋Ÿฌ๋‹ ๋ชจ๋ธ๋กœ ์„ฑํ–ฅ ๋ถ„์„
97
  def analyze_article_sentiment_ml(text, model, vectorizer):
98
  X = vectorizer.transform([text])
99
  prediction = model.predict(X)[0]
100
 
101
- # ์„ฑํ–ฅ์— ๋”ฐ๋ฅธ ๋ ˆ์ด๋ธ” ๋ฐ˜ํ™˜
102
  if prediction == "LEFT":
103
  return "์ง„๋ณด"
104
  elif prediction == "RIGHT":
@@ -109,41 +94,37 @@ def analyze_article_sentiment_ml(text, model, vectorizer):
109
  # GPT-4๋ฅผ ์ด์šฉํ•ด ๋ฐ˜๋Œ€ ๊ด€์  ๊ธฐ์‚ฌ ์ƒ์„ฑ
110
  def generate_article_gpt4(prompt):
111
  try:
112
- # GPT-4 ๋ชจ๋ธ์„ ์ด์šฉํ•ด ๋ฐ˜๋Œ€ ๊ด€์  ๊ธฐ์‚ฌ๋ฅผ ์ƒ์„ฑ
113
  response = openai.ChatCompletion.create(
114
- model="gpt-4", # GPT-4 ๋ชจ๋ธ์„ ์‚ฌ์šฉ
115
  messages=[
116
  {"role": "system", "content": "You are a helpful assistant that generates articles."},
117
- {"role": "user", "content": prompt} # ์‚ฌ์šฉ์ž๊ฐ€ ์ œ๊ณตํ•œ ํ”„๋กฌํ”„ํŠธ
118
  ],
119
- max_tokens=1024, # ๊ธ€์ž ์ˆ˜ ์ œํ•œ ํ•ด์ œ (์ตœ๋Œ€ 1024 ํ† ํฐ)
120
- temperature=0.7 # ์ฐฝ์˜์„ฑ ์ •๋„
121
  )
122
- return response['choices'][0]['message']['content'] # GPT์˜ ์‘๋‹ต ํ…์ŠคํŠธ ๋ฐ˜ํ™˜
123
  except Exception as e:
124
  return f"Error generating text: {e}"
125
 
126
  # ์ •์น˜์  ๊ด€์  ๋น„๊ต ๋ฐ ๋ฐ˜๋Œ€ ๊ด€์  ์ƒ์„ฑ
127
  def analyze_news_political_viewpoint(query, model, vectorizer):
128
- # ๋‰ด์Šค ๋ฐ์ดํ„ฐ ๊ฐ€์ ธ์˜ค๊ธฐ
129
  news_items = fetch_naver_news(query)
130
  if not news_items:
131
  return [], {}
132
 
133
  results = []
134
- sentiment_counts = {"์ง„๋ณด": 0, "๋ณด์ˆ˜": 0, "์ค‘๋ฆฝ": 0} # ๋งคํ•‘๋œ ๋ผ๋ฒจ์— ๋งž๊ฒŒ ์ดˆ๊ธฐํ™”
135
 
136
  for item in news_items:
137
  title = item["title"]
138
  description = item["description"]
139
- link = item["link"] # ๋‰ด์Šค ๋งํฌ ๊ฐ€์ ธ์˜ค๊ธฐ
140
  combined_text = f"{title}. {description}"
141
 
142
- # ๋จธ์‹ ๋Ÿฌ๋‹ ๋ชจ๋ธ์„ ์ด์šฉํ•œ ์„ฑํ–ฅ ๋ถ„์„
143
  sentiment = analyze_article_sentiment_ml(combined_text, model, vectorizer)
144
- sentiment_counts[sentiment] += 1 # ๋งคํ•‘๋œ ํ‚ค๋กœ ์นด์šดํŠธ ์ฆ๊ฐ€
145
 
146
- # ๋ฐ˜๋Œ€ ๊ด€์  ๊ธฐ์‚ฌ ์ƒ์„ฑ
147
  opposite_perspective = "๋ณด์ˆ˜์ " if sentiment == "์ง„๋ณด" else "์ง„๋ณด์ "
148
  prompt = f"{combined_text}๋ฅผ ๊ธฐ๋ฐ˜์œผ๋กœ {opposite_perspective} ๊ด€์ ์˜ ๊ธฐ์‚ฌ๋ฅผ ์ž‘์„ฑํ•ด์ฃผ์„ธ์š”."
149
  opposite_article = generate_article_gpt4(prompt)
@@ -153,18 +134,20 @@ def analyze_news_political_viewpoint(query, model, vectorizer):
153
  "์›๋ณธ ๊ธฐ์‚ฌ": description,
154
  "์„ฑํ–ฅ": sentiment,
155
  "๋Œ€์กฐ ๊ด€์  ๊ธฐ์‚ฌ": opposite_article,
156
- "๋‰ด์Šค ๋งํฌ": link # ๋งํฌ ์ถ”๊ฐ€
157
  })
158
 
159
  return results, sentiment_counts
160
 
161
- # ์„ฑํ–ฅ ๋ถ„ํฌ ์‹œ๊ฐํ™” (๋ง‰๋Œ€ ๊ทธ๋ž˜ํ”„)
162
  def visualize_sentiment_distribution(sentiment_counts):
 
 
 
163
  fig, ax = plt.subplots(figsize=(8, 5))
164
  labels = list(sentiment_counts.keys())
165
  sizes = list(sentiment_counts.values())
166
 
167
- # ์ƒ‰์ƒ ์„ค์ • (๋ถ€๋“œ๋Ÿฌ์šด ํŒ”๋ ˆํŠธ)
168
  color_palette = sns.color_palette("pastel")[0:len(sizes)]
169
 
170
  ax.bar(labels, sizes, color=color_palette)
@@ -195,7 +178,6 @@ if st.button("๐Ÿ” ๋ถ„์„ ์‹œ์ž‘"):
195
  if analysis_results:
196
  st.success("๋‰ด์Šค ๋ถ„์„์ด ์™„๋ฃŒ๋˜์—ˆ์Šต๋‹ˆ๋‹ค.")
197
 
198
- # ๋‰ด์Šค ๊ธฐ์‚ฌ ๋ชฉ๋ก ํ‘œ์‹œ
199
  for result in analysis_results:
200
  st.subheader(result["์ œ๋ชฉ"])
201
  st.write(f"์„ฑํ–ฅ: {result['์„ฑํ–ฅ']}")
@@ -204,7 +186,6 @@ if st.button("๐Ÿ” ๋ถ„์„ ์‹œ์ž‘"):
204
  st.write(f"๋Œ€์กฐ ๊ด€์  ๊ธฐ์‚ฌ: {result['๋Œ€์กฐ ๊ด€์  ๊ธฐ์‚ฌ']}")
205
  st.markdown("---")
206
 
207
- # ์„ฑํ–ฅ ๋ถ„ํฌ ์‹œ๊ฐํ™”
208
  visualize_sentiment_distribution(sentiment_counts)
209
  else:
210
  st.warning("๊ฒ€์ƒ‰๋œ ๋‰ด์Šค๊ฐ€ ์—†์Šต๋‹ˆ๋‹ค.")
 
1
  import streamlit as st
2
  import requests
 
 
 
3
  import openai
4
  import os
5
  from sklearn.feature_extraction.text import TfidfVectorizer
6
  from sklearn.linear_model import LogisticRegression
7
+ from sklearn.model_selection import train_test_split, cross_val_score
8
  from sklearn.metrics import accuracy_score
9
  import joblib
10
+ from sklearn.model_selection import GridSearchCV
11
 
12
+ # OpenAI API ํ‚ค ์„ค์ •
 
 
 
13
  openai.api_key = os.getenv("OPENAI_API_KEY")
14
 
15
+ # ๋„ค์ด๋ฒ„ ๋‰ด์Šค API๋ฅผ ํ†ตํ•ด ๋‰ด์Šค ๊ธฐ์‚ฌ ๊ฐ€์ ธ์˜ค๊ธฐ
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
16
  def fetch_naver_news(query, display=5):
17
  client_id = "I_8koTJh3R5l4wLurQbG" # ๋„ค์ด๋ฒ„ ๊ฐœ๋ฐœ์ž ์„ผํ„ฐ์—์„œ ๋ฐœ๊ธ‰๋ฐ›์€ Client ID
18
  client_secret = "W5oWYlAgur" # ๋„ค์ด๋ฒ„ ๊ฐœ๋ฐœ์ž ์„ผํ„ฐ์—์„œ ๋ฐœ๊ธ‰๋ฐ›์€ Client Secret
 
32
  response = requests.get(url, headers=headers, params=params)
33
  if response.status_code == 200:
34
  news_data = response.json()
35
+ return news_data['items']
36
  else:
37
  st.error("๋‰ด์Šค ๋ฐ์ดํ„ฐ๋ฅผ ๋ถˆ๋Ÿฌ์˜ค๋Š” ๋ฐ ์‹คํŒจํ–ˆ์Šต๋‹ˆ๋‹ค.")
38
  return []
39
 
40
+ # ๋จธ์‹ ๋Ÿฌ๋‹ ๋ชจ๋ธ ํ•™์Šต ๋ฐ ๊ฐœ์„ 
41
  def train_ml_model():
42
+ # ์˜ˆ์‹œ ๋ฐ์ดํ„ฐ
 
43
  data = [
44
  ("์ง„๋ณด์ ์ธ ์ •๋ถ€ ์ •์ฑ…์„ ๊ฐ•ํ™”ํ•ด์•ผ ํ•œ๋‹ค", "LEFT"),
45
  ("๋ณด์ˆ˜์ ์ธ ๊ฒฝ์ œ ์ •์ฑ…์ด ํ•„์š”ํ•˜๋‹ค", "RIGHT"),
 
56
  X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
57
 
58
  # ๋กœ์ง€์Šคํ‹ฑ ํšŒ๊ท€ ๋ชจ๋ธ ํ•™์Šต
59
+ model = LogisticRegression(max_iter=1000, solver='liblinear') # ๋” ๋งŽ์€ ๋ฐ˜๋ณต ํšŸ์ˆ˜์™€ 'liblinear' solver ์‚ฌ์šฉ
60
+
61
+ # ํ•˜์ดํผํŒŒ๋ผ๋ฏธํ„ฐ ํŠœ๋‹ (์ •๊ทœํ™” ๊ฐ•๋„ C)
62
+ param_grid = {'C': [0.1, 1, 10, 100]}
63
+ grid_search = GridSearchCV(model, param_grid, cv=5)
64
+ grid_search.fit(X_train, y_train)
65
+ best_model = grid_search.best_estimator_
66
+
67
+ # ๊ต์ฐจ ๊ฒ€์ฆ์„ ํ†ตํ•œ ํ‰๊ฐ€
68
+ cv_scores = cross_val_score(best_model, X, y, cv=5)
69
+ st.write(f"๊ต์ฐจ ๊ฒ€์ฆ ํ‰๊ท  ์ •ํ™•๋„: {cv_scores.mean():.2f}")
70
 
71
  # ๋ชจ๋ธ ์„ฑ๋Šฅ ํ‰๊ฐ€
72
+ y_pred = best_model.predict(X_test)
73
  accuracy = accuracy_score(y_test, y_pred)
74
  st.write(f"๋ชจ๋ธ ์ •ํ™•๋„: {accuracy:.2f}")
75
 
76
  # ๋ชจ๋ธ ์ €์žฅ
77
+ joblib.dump(best_model, 'political_bias_model.pkl')
78
  joblib.dump(vectorizer, 'tfidf_vectorizer.pkl')
79
 
80
+ return best_model, vectorizer
81
 
82
  # ๋กœ๋“œ๋œ ๋จธ์‹ ๋Ÿฌ๋‹ ๋ชจ๋ธ๋กœ ์„ฑํ–ฅ ๋ถ„์„
83
  def analyze_article_sentiment_ml(text, model, vectorizer):
84
  X = vectorizer.transform([text])
85
  prediction = model.predict(X)[0]
86
 
 
87
  if prediction == "LEFT":
88
  return "์ง„๋ณด"
89
  elif prediction == "RIGHT":
 
94
  # GPT-4๋ฅผ ์ด์šฉํ•ด ๋ฐ˜๋Œ€ ๊ด€์  ๊ธฐ์‚ฌ ์ƒ์„ฑ
95
  def generate_article_gpt4(prompt):
96
  try:
 
97
  response = openai.ChatCompletion.create(
98
+ model="gpt-4",
99
  messages=[
100
  {"role": "system", "content": "You are a helpful assistant that generates articles."},
101
+ {"role": "user", "content": prompt}
102
  ],
103
+ max_tokens=1024,
104
+ temperature=0.7
105
  )
106
+ return response['choices'][0]['message']['content']
107
  except Exception as e:
108
  return f"Error generating text: {e}"
109
 
110
  # ์ •์น˜์  ๊ด€์  ๋น„๊ต ๋ฐ ๋ฐ˜๋Œ€ ๊ด€์  ์ƒ์„ฑ
111
  def analyze_news_political_viewpoint(query, model, vectorizer):
 
112
  news_items = fetch_naver_news(query)
113
  if not news_items:
114
  return [], {}
115
 
116
  results = []
117
+ sentiment_counts = {"์ง„๋ณด": 0, "๋ณด์ˆ˜": 0, "์ค‘๋ฆฝ": 0}
118
 
119
  for item in news_items:
120
  title = item["title"]
121
  description = item["description"]
122
+ link = item["link"]
123
  combined_text = f"{title}. {description}"
124
 
 
125
  sentiment = analyze_article_sentiment_ml(combined_text, model, vectorizer)
126
+ sentiment_counts[sentiment] += 1
127
 
 
128
  opposite_perspective = "๋ณด์ˆ˜์ " if sentiment == "์ง„๋ณด" else "์ง„๋ณด์ "
129
  prompt = f"{combined_text}๋ฅผ ๊ธฐ๋ฐ˜์œผ๋กœ {opposite_perspective} ๊ด€์ ์˜ ๊ธฐ์‚ฌ๋ฅผ ์ž‘์„ฑํ•ด์ฃผ์„ธ์š”."
130
  opposite_article = generate_article_gpt4(prompt)
 
134
  "์›๋ณธ ๊ธฐ์‚ฌ": description,
135
  "์„ฑํ–ฅ": sentiment,
136
  "๋Œ€์กฐ ๊ด€์  ๊ธฐ์‚ฌ": opposite_article,
137
+ "๋‰ด์Šค ๋งํฌ": link
138
  })
139
 
140
  return results, sentiment_counts
141
 
142
+ # ์„ฑํ–ฅ ๋ถ„ํฌ ์‹œ๊ฐํ™”
143
  def visualize_sentiment_distribution(sentiment_counts):
144
+ import matplotlib.pyplot as plt
145
+ import seaborn as sns
146
+
147
  fig, ax = plt.subplots(figsize=(8, 5))
148
  labels = list(sentiment_counts.keys())
149
  sizes = list(sentiment_counts.values())
150
 
 
151
  color_palette = sns.color_palette("pastel")[0:len(sizes)]
152
 
153
  ax.bar(labels, sizes, color=color_palette)
 
178
  if analysis_results:
179
  st.success("๋‰ด์Šค ๋ถ„์„์ด ์™„๋ฃŒ๋˜์—ˆ์Šต๋‹ˆ๋‹ค.")
180
 
 
181
  for result in analysis_results:
182
  st.subheader(result["์ œ๋ชฉ"])
183
  st.write(f"์„ฑํ–ฅ: {result['์„ฑํ–ฅ']}")
 
186
  st.write(f"๋Œ€์กฐ ๊ด€์  ๊ธฐ์‚ฌ: {result['๋Œ€์กฐ ๊ด€์  ๊ธฐ์‚ฌ']}")
187
  st.markdown("---")
188
 
 
189
  visualize_sentiment_distribution(sentiment_counts)
190
  else:
191
  st.warning("๊ฒ€์ƒ‰๋œ ๋‰ด์Šค๊ฐ€ ์—†์Šต๋‹ˆ๋‹ค.")