Spaces:

820nam
/

Test

Sleeping

App Files Files Community

820nam commited on Nov 28, 2024

Commit

384f5e4

verified ·

1 Parent(s): bb49491

Update app.py

Browse files

Files changed (1) hide show

app.py +61 -56

app.py CHANGED Viewed

@@ -1,7 +1,9 @@
 import requests
-from transformers import pipeline, MarianMTModel, MarianTokenizer
-# Step 1: 네이버 뉴스 API로 뉴스 데이터 가져오기
 def fetch_naver_news(query, display=10, start=1, sort="date"):
     client_id = "I_8koTJh3R5l4wLurQbG"  # 네이버 개발자 센터에서 발급받은 Client ID
     client_secret = "W5oWYlAgur"  # 네이버 개발자 센터에서 발급받은 Client Secret
@@ -24,27 +26,15 @@ def fetch_naver_news(query, display=10, start=1, sort="date"):
     else:
         raise Exception(f"Error: {response.status_code}, {response.text}")
-# Step 2: 한국어 -> 영어 번역 모델 로드
-def load_translation_model():
-    model_name = "Helsinki-NLP/opus-mt-ko-en"  # 한국어 -> 영어 번역 모델
-    model = MarianMTModel.from_pretrained(model_name)
-    tokenizer = MarianTokenizer.from_pretrained(model_name)
-    return model, tokenizer
-# 한국어 텍스트를 영어로 번역하는 함수
-def translate_to_english(text, model, tokenizer):
-    translated = tokenizer.encode(text, return_tensors="pt", padding=True)
-    translated_text = model.generate(translated, max_length=512)
-    return tokenizer.decode(translated_text[0], skip_special_tokens=True)
-# Step 3: 정치 성향 분석 모델 로드 (PoliticalBiasBERT)
-def load_political_bias_model():
-    classifier = pipeline("text-classification", model="bucketresearch/politicalBiasBERT")
     return classifier
-# Step 4: 정치 성향 분류
 def classify_political_sentiment(text, classifier):
-    result = classifier(text[:512])
     sentiment = result[0]
     label = sentiment["label"]
     score = sentiment["score"]
@@ -63,52 +53,67 @@ def classify_political_sentiment(text, classifier):
     else:
         return "중립", sentiment_score
-# Step 5: 전체 뉴스 분석 및 결과 출력
-def analyze_news_political_orientation(news_items, classifier, translation_model, tokenizer):
     results = {"진보": 0, "보수": 0, "중립": 0}
     detailed_results = []
     for item in news_items:
         title = item["title"]
         description = item["description"]
-        # 한국어 기사 텍스트를 영어로 번역
         combined_text = f"{title}. {description}"
-        translated_text = translate_to_english(combined_text, translation_model, tokenizer)
         # 정치 성향 분류
-        orientation, score = classify_political_sentiment(translated_text, classifier)
         results[orientation] += 1
-        detailed_results.append((title, description, orientation, score))
-        # 출력
-        print(f"Title: {title}")
-        print(f"Description: {description}")
-        print(f"Orientation: {orientation}, Score: {score}")
-        print("-" * 80)
     return results, detailed_results
-# Step 6: 실행 파이프라인
-if __name__ == "__main__":
-    try:
-        # 네이버 뉴스 API 호출 (여기서는 미리 가져온 뉴스 데이터가 필요)
-        query = "정치"  # 검색 키워드
-        news_data = fetch_naver_news(query, display=5)
-        # 번역 모델과 정치 성향 분석 모델 로드
-        translation_model, tokenizer = load_translation_model()
-        classifier = load_political_bias_model()
-        # 뉴스 데이터 감성 분석 및 정치 성향 분류
-        news_items = news_data["items"]
-        results, detailed_results = analyze_news_political_orientation(news_items, classifier, translation_model, tokenizer)
-        # 전체 결과 출력
-        print("\n정치 성향 분석 결과")
-        print(f"진보: {results['진보']}건")
-        print(f"보수: {results['보수']}건")
-        print(f"중립: {results['중립']}건")
-    except Exception as e:
-        print(f"오류 발생: {e}")

+import streamlit as st
 import requests
+from transformers import pipeline
+import pandas as pd
+# Step 1: 네이버 뉴스 API 호출 함수
 def fetch_naver_news(query, display=10, start=1, sort="date"):
     client_id = "I_8koTJh3R5l4wLurQbG"  # 네이버 개발자 센터에서 발급받은 Client ID
     client_secret = "W5oWYlAgur"  # 네이버 개발자 센터에서 발급받은 Client Secret
     else:
         raise Exception(f"Error: {response.status_code}, {response.text}")
+# Step 2: Hugging Face 감성 분석 모델 로드
+def load_huggingface_model():
+    classifier = pipeline("sentiment-analysis", model="distilbert-base-uncased-finetuned-sst-2-english")
     return classifier
+# Step 3: 정치 성향 분류 함수
 def classify_political_sentiment(text, classifier):
+    # 감성 분석 실행
+    result = classifier(text[:512])  # 입력이 너무 길면 잘라서 분석
     sentiment = result[0]
     label = sentiment["label"]
     score = sentiment["score"]
     else:
         return "중립", sentiment_score
+# Step 4: 뉴스 분석 및 결과 출력
+def analyze_news_political_orientation(news_items, classifier):
     results = {"진보": 0, "보수": 0, "중립": 0}
     detailed_results = []
     for item in news_items:
         title = item["title"]
         description = item["description"]
         combined_text = f"{title}. {description}"
         # 정치 성향 분류
+        orientation, score = classify_political_sentiment(combined_text, classifier)
         results[orientation] += 1
+        detailed_results.append({
+            "제목": title,
+            "요약": description,
+            "성향": orientation,
+            "점수": score,
+        })
     return results, detailed_results
+# Streamlit 앱 시작
+st.title("정치 성향 분석 대시보드")
+st.markdown("### 네이버 뉴스 데이터를 실시간으로 수집하고 정치 성향을 분석합니다.")
+# 검색 키워드 입력
+query = st.text_input("검색 키워드를 입력하세요", value="정치")
+if st.button("분석 시작"):
+    with st.spinner("데이터를 분석 중입니다..."):
+        try:
+            # 네이버 뉴스 데이터 수집
+            news_data = fetch_naver_news(query, display=10)
+            news_items = news_data["items"]
+            # Hugging Face 모델 로드
+            classifier = load_huggingface_model()
+            # 뉴스 데이터 분석
+            results, detailed_results = analyze_news_political_orientation(news_items, classifier)
+            # 분석 결과 시각화
+            st.subheader("분석 결과 요약")
+            st.write(f"진보: {results['진보']}건")
+            st.write(f"보수: {results['보수']}건")
+            st.write(f"중립: {results['중립']}건")
+            # 파이 차트
+            st.subheader("성향 분포 차트")
+            st.bar_chart(pd.DataFrame.from_dict(results, orient='index', columns=["건수"]))
+            # 세부 결과 출력
+            st.subheader("세부 결과")
+            df = pd.DataFrame(detailed_results)
+            st.dataframe(df)
+            # 링크 포함한 뉴스 출력
+            st.subheader("뉴스 링크")
+            for index, row in df.iterrows():
+                st.write(f"- [{row['제목']}] (성향: {row['성향']}, 점수: {row['점수']:.2f})")
+        except Exception as e:
+            st.error(f"오류 발생: {e}")