Spaces:

duwing
/

comment_filter

Sleeping

App Files Files Community

duwing commited on Nov 16, 2024

Commit

6c0af57

verified ·

1 Parent(s): 5bb7d28

Update app.py

Browse files

function about comments

Files changed (1) hide show

app.py +56 -22

app.py CHANGED Viewed

@@ -53,33 +53,63 @@ def movie_evaluation_predict(sentence):
     data_x = sentence_convert_data(sentence)
     predict = sentiment_model.predict(data_x)
     predict_value = np.ravel(predict)
     predict_answer = np.round(predict_value,0).item()
-    print(predict_value)
-    if predict_answer == 0:
-      st.write("(부정 확률 : %.2f) 부정적인 영화 평가입니다." % (1.0-predict_value))
-    elif predict_answer == 1:
-      st.write("(긍정 확률 : %.2f) 긍정적인 영화 평가입니다." % predict_value)
-def scrape_content(url):
     headers = {
-    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.93 Safari/537.36"
     }
-    # 웹 페이지 요청
-    response = requests.get(url, headers=headers)
-    soup = BeautifulSoup(response.content, 'html.parser')
     # 본문 추출
-    article_body = soup.find_all('div', class_='newsct_article')
-    content = ' '.join([p.get_text() for p in article_body])
-    # 댓글 추출 (예시)
-    comments = soup.find_all('span', class_='u_cbox_contents')
-    comment_list = [comment.get_text() for comment in comments]
-    return content, comment_list
 def main():
@@ -88,19 +118,23 @@ def main():
     st.title("웹 컨텐츠 스크래퍼")
     # URL 입력 받기
-    url = st.text_input("URL을 입력하세요")
     if st.button("스크랩 시작"):
         if url:
-            content, comments = scrape_content(url)
             # 결과 표시
             st.subheader("본문 내용")
             st.write(content)
             st.subheader("댓글")
             for comment in comments:
-                st.write(comment)
     '''
     test = st.form('test')

     data_x = sentence_convert_data(sentence)
     predict = sentiment_model.predict(data_x)
     predict_value = np.ravel(predict)
+    # 0:부정, 1:긍정
     predict_answer = np.round(predict_value,0).item()
+    return predict_answer
+def get_comments(news_url):
+    # oid, aid 추출
+    list = news_url.split("/")
+    oid = list[-2]
+    aid = list[-1]
+    # API URL 구성
+    api_url = "https://apis.naver.com/commentBox/cbox/web_naver_list_jsonp.json"
+    params = {
+        "ticket": "news",
+        "templateId": "default_society",
+        "pool": "cbox5",
+        "lang": "ko",
+        "country": "KR",
+        "objectId": f"news{oid},{aid}",
+        "pageSize": 100,
+        "indexSize": 10,
+        "page": 1,
+        "sort": "FAVORITE" # 'NEW'(최신순), 'FAVORITE'(순공감순)
+    }
     headers = {
+        "User-Agent": "Mozilla/5.0",
+        "Referer": news_url
     }
+    # API 호출 및 데이터 처리
+    response = requests.get(api_url, params=params, headers=headers)
+    content = response.text.replace("_callback(", "").replace(");", "")
+    json_data = json.loads(content)
+    response = requests.get(news_url)
+    soup = BeautifulSoup(response.text, "html.parser")
+    # 제목 추출
+        title = article_soup.select_one("#ct > div.media_end_head.go_trans > div.media_end_head_title > h2")
+        if title is None:
+            title = article_soup.select_one("#content > div.end_ct > div > h2")
     # 본문 추출
+    content = article_soup.select_one("#dic_area")
+    if content is None:
+        content = article_soup.select_one("#articeBody")
+    return title, article, processing_data(json_data['result']['commentList'])
+def processing_data(comments):
+    comment_list = []
+    for comment in comments:
+        comment_list.append(comment['contents'])
+    comment_listR = [x for x in comment_list if x]
+    return comment_listR
 def main():
     st.title("웹 컨텐츠 스크래퍼")
     # URL 입력 받기
+    url = st.text_input("url을 입력하세요")
     if st.button("스크랩 시작"):
         if url:
+            title, content, comments = get_comments(url)
             # 결과 표시
+            st.subheader("기사 제목")
+            st.write(title)
             st.subheader("본문 내용")
             st.write(content)
             st.subheader("댓글")
             for comment in comments:
+                if movie_evaluation_predict(comment) == 1:
+                    st.write(comment)
     '''
     test = st.form('test')