Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| import tensorflow as tf | |
| import numpy as np | |
| import pandas as pd | |
| from transformers import * | |
| from tqdm import tqdm | |
| from tensorflow.python.client import device_lib | |
| from selenium import webdriver | |
| from selenium.webdriver.chrome.service import Service | |
| from selenium.webdriver.chrome.options import Options | |
| from selenium.webdriver.common.by import By | |
| from selenium.webdriver.support.ui import WebDriverWait | |
| from selenium.webdriver.support import expected_conditions as EC | |
| from bs4 import BeautifulSoup | |
| import time | |
| PATH = './checkpoint-7500/' | |
| SEQ_LEN = 128 | |
| tokenizer = BertTokenizer.from_pretrained('bert-base-multilingual-cased') | |
| def create_sentiment_bert(): | |
| # ๋ฒํธ pretrained ๋ชจ๋ธ ๋ก๋ | |
| model = TFAutoModel.from_pretrained(PATH,local_files_only=True) | |
| # ํ ํฐ ์ธํ, ๋ง์คํฌ ์ธํ, ์ธ๊ทธ๋จผํธ ์ธํ ์ ์ | |
| token_inputs = tf.keras.layers.Input((SEQ_LEN,), dtype=tf.int32, name='input_word_ids') | |
| mask_inputs = tf.keras.layers.Input((SEQ_LEN,), dtype=tf.int32, name='input_masks') | |
| segment_inputs = tf.keras.layers.Input((SEQ_LEN,), dtype=tf.int32, name='input_segment') | |
| # ์ธํ์ด [ํ ํฐ, ๋ง์คํฌ, ์ธ๊ทธ๋จผํธ]์ธ ๋ชจ๋ธ ์ ์ | |
| bert_outputs = model([token_inputs, mask_inputs, segment_inputs]) | |
| bert_outputs = bert_outputs[1] | |
| sentiment_first = tf.keras.layers.Dense(1, activation='sigmoid', kernel_initializer=tf.keras.initializers.TruncatedNormal(stddev=0.02))(bert_outputs) | |
| sentiment_model = tf.keras.Model([token_inputs, mask_inputs, segment_inputs], sentiment_first) | |
| sentiment_model.compile(loss=tf.keras.losses.BinaryCrossentropy(), metrics = ['accuracy']) | |
| return sentiment_model | |
| def sentence_convert_data(data): | |
| global tokenizer | |
| tokens, masks, segments = [], [], [] | |
| token = tokenizer.encode(data, max_length=SEQ_LEN, truncation=True, padding='max_length') | |
| num_zeros = token.count(0) | |
| mask = [1]*(SEQ_LEN-num_zeros) + [0]*num_zeros | |
| segment = [0]*SEQ_LEN | |
| tokens.append(token) | |
| segments.append(segment) | |
| masks.append(mask) | |
| tokens = np.array(tokens) | |
| masks = np.array(masks) | |
| segments = np.array(segments) | |
| return [tokens, masks, segments] | |
| def movie_evaluation_predict(sentence): | |
| data_x = sentence_convert_data(sentence) | |
| predict = sentiment_model.predict(data_x) | |
| predict_value = np.ravel(predict) | |
| predict_answer = np.round(predict_value,0).item() | |
| print(predict_value) | |
| if predict_answer == 0: | |
| st.write("(๋ถ์ ํ๋ฅ : %.2f) ๋ถ์ ์ ์ธ ์ํ ํ๊ฐ์ ๋๋ค." % (1.0-predict_value)) | |
| elif predict_answer == 1: | |
| st.write("(๊ธ์ ํ๋ฅ : %.2f) ๊ธ์ ์ ์ธ ์ํ ํ๊ฐ์ ๋๋ค." % predict_value) | |
| def setup_driver(): | |
| chrome_options = Options() | |
| chrome_options.add_argument("--headless") # ๋ฐฑ๊ทธ๋ผ์ด๋ ์คํ | |
| chrome_options.add_argument("--no-sandbox") | |
| chrome_options.add_argument("--disable-dev-shm-usage") | |
| driver = webdriver.Chrome(options=chrome_options) | |
| return driver | |
| def scrape_content(url): | |
| driver = setup_driver() | |
| try: | |
| driver.get(url) | |
| # ํ์ด์ง ๋ก๋ฉ ๋๊ธฐ | |
| time.sleep(3) | |
| # ๋ณธ๋ฌธ ์ถ์ถ | |
| soup = BeautifulSoup(driver.page_source, 'html.parser') | |
| content = soup.find('article') # ๋ณธ๋ฌธ ํ๊ทธ์ ๋ง๊ฒ ์์ | |
| # ๋๊ธ ์ถ์ถ | |
| comments = soup.find_all('span', class_='u_cbox_contents') # ๋๊ธ ํ๊ทธ์ ๋ง๊ฒ ์์ | |
| return { | |
| 'content': content.text if content else "๋ณธ๋ฌธ์ ์ฐพ์ ์ ์์ต๋๋ค.", | |
| 'comments': [comment.text for comment in comments] | |
| } | |
| finally: | |
| driver.quit() | |
| def main(): | |
| sentiment_model = create_sentiment_bert() | |
| url = st.text_input("URL์ ์ ๋ ฅํ์ธ์") | |
| if st.button("ํฌ๋กค๋ง ์์"): | |
| if url: | |
| with st.spinner("ํฌ๋กค๋ง ์ค..."): | |
| result = scrape_content(url) | |
| st.subheader("๋ณธ๋ฌธ") | |
| st.write(result['content']) | |
| st.subheader("๋๊ธ") | |
| for idx, comment in enumerate(result['comments'], 1): | |
| st.write(f"{idx}. {comment}") | |
| else: | |
| st.error("URL์ ์ ๋ ฅํด์ฃผ์ธ์") | |
| ''' | |
| test = st.form('test') | |
| sentence = test.text_input("Your sentence") | |
| submit = test.form_submit_button("Submit") | |
| if submit: | |
| movie_evaluation_predict(sentence) | |
| ''' | |
| return 0 | |
| if __name__ == "__main__": | |
| main() | |