import streamlit as st import pandas as pd import warnings import openpyxl from xml.etree import ElementTree as ET warnings.filterwarnings('ignore') import requests import json from sklearn.metrics.pairwise import cosine_similarity import time from bs4 import BeautifulSoup as bs from googletrans import Translator import re import nltk from nltk.corpus import stopwords from nltk.tokenize import word_tokenize from nltk.corpus import wordnet from nltk.stem import WordNetLemmatizer nltk.download('punkt') nltk.download('averaged_perceptron_tagger') nltk.download('wordnet') nltk.download('stopwords') from sklearn.feature_extraction.text import TfidfVectorizer import joblib st.header('Book-OST๐ŸŽง') st.markdown('๊ทผ๋ž˜์— ๋“ค์–ดย **ํ•œ๊ตญ์ธ์˜ ๋…์„œ๋Ÿ‰ ๊ฐ์†Œ**์™€ **์ Š์€ ์ธต์˜ ๋ฌธํ•ด๋ ฅ ์ €ํ•˜**๊ฐ€ ์‚ฌํšŒ์  ๋ฌธ์ œ๋กœ ๋– ์˜ค๋ฅด๊ณ  ์žˆ์Šต๋‹ˆ๋‹ค. ์ฑ…์ด๋‚˜ ์‹ ๋ฌธ๊ณผ ๊ฐ™์€ ์ถœํŒ๋ฌผ๋กœ ์ •๋ณด๋ฅผ ์Šต๋“ํ–ˆ๋˜ ๊ณผ๊ฑฐ์™€ ๋‹ฌ๋ฆฌ, ์˜ค๋Š˜๋‚  ์‚ฌ๋žŒ๋“ค์€ ์ฑ… ์ด์™ธ์˜ ์ˆ˜๋งŽ์€ ์ •๋ณด ๋งค์ฒด์™€ ๋ฏธ๋””์–ด๋กœ๋ถ€ํ„ฐ ์ •๋ณด๋ฅผ ์Šต๋“ํ•  ์ˆ˜ ์žˆ๊ฒŒ ๋˜๋ฉฐ ์ž์—ฐ์Šค๋Ÿฝ๊ฒŒ ๋…์„œ๋Ÿ‰์ด ๊ฐ์†Œํ•ด์˜ค๊ณ  ์žˆ์Šต๋‹ˆ๋‹ค.') st.markdown('๋ฏธ๋””์–ด๋ฅผ ํ†ตํ•œ ์ •๋ณด ์Šต๋“๊ณผ ๋‹ฌ๋ฆฌ, ๋…์„œ๋Š” ์ •์ œ๋˜์ง€ ์•Š์€ ์ •๋ณด๋ฅผ ์Šค์Šค๋กœ ์ดํ•ดํ•˜๊ณ  ์ž์‹ ์˜ ๊ฒƒ์œผ๋กœ ์Šต๋“ํ•˜๋Š” ์ง€์  ๊ณผ์ •์„ ๊ฑฐ์น˜๊ธฐ ๋•Œ๋ฌธ์— ๋…์„œ๊ฐ€ ๋ฌธํ•ด๋ ฅ๊ณผ ๊ฐ™์€ ์ง€์  ๋Šฅ๋ ฅ ๋ฐœ๋‹ฌ์— ๋งค์šฐ ์ค‘์š”ํ•œ ๊ฒƒ์œผ๋กœ ์•Œ๋ ค์ ธ ์žˆ์Šต๋‹ˆ๋‹ค. ๋”ฐ๋ผ์„œ ์ Š์€ ์ธต์˜ ๋ฌธํ•ด๋ ฅ ์ €ํ•˜ ๋ฌธ์ œ์˜ ์›์ธ์ด โ€˜๋…์„œ๋Ÿ‰ ๊ฐ์†Œโ€™์— ์žˆ๋‹ค๋Š” ์˜๊ฒฌ์ด ์ œ๊ธฐ๋˜๊ณ  ์žˆ์Šต๋‹ˆ๋‹ค.') st.markdown('์ด๋Ÿฌํ•œ **ํ•œ๊ตญ์ธ์˜ ๋…์„œ๋Ÿ‰ ๊ฐ์†Œ**์™€ **์ Š์€ ์ธต์˜ ๋ฌธํ•ด๋ ฅ ์ €ํ•˜**์— ๋Œ€ํ•˜์—ฌ, ์ €ํฌ ํŒ€์€ **๋…์„œ์— ๋Œ€ํ•œ ํฅ๋ฏธ๋ฅผ ๋†’์ด๊ณ  ๋…์„œ๋ฅผ ์žฅ๋ คํ•  ์ˆ˜ ์žˆ๋Š” ๋ฐฉ์•ˆ์„ ์ œ์‹œํ•˜๋Š” ๊ฒƒ**์ด ๋‘ ๋ฌธ์ œ์˜ ํ•ด๊ฒฐ ๋ฐฉ์•ˆ์ด ๋  ๊ฒƒ์ด๋ผ ์ƒ๊ฐํ–ˆ์Šต๋‹ˆ๋‹ค.') st.markdown('') st.image('https://velog.velcdn.com/images/jeo0534/post/36d2b899-9a13-410e-950c-ec6d227cdcf2/image.png') st.markdown('') st.markdown('''์˜ํ™”๋‚˜ ๋“œ๋ผ๋งˆ์ฒ˜๋Ÿผ **์ฑ…์—๋„ ost๊ฐ€ ํ•„์š”ํ•˜๋‹ค๋Š” Jtbc ๋ฉœ๋กœ๋””์ฑ…๋ฐฉ ํ”„๋กœ๊ทธ๋žจ**์œผ๋กœ๋ถ€ํ„ฐ ์˜๊ฐ์„ ์–ป์–ด, **๋„์„œ ๋งž์ถค ์Œ์•… ์ถ”์ฒœ ์‹œ์Šคํ…œ**์ด๋ผ๋Š” ์ฃผ์ œ๋ฅผ ์„ ์ •ํ–ˆ์Šต๋‹ˆ๋‹ค. ์ž์‹ ์ด ์ฝ๊ณ  ์žˆ๋Š” ์ฑ…์„ ์ž…๋ ฅํ•˜๋ฉด ์ฑ…๊ณผ ์ž˜ ์–ด์šธ๋ฆฌ๋Š” ์Œ์•…์„ ์ถ”์ฒœํ•ด์คŒ์œผ๋กœ์จ **์ฑ…์˜ ๊ฐ์ •๊ณผ ๋‚ด์šฉ์„ ์Œ์•… ํ•จ๊ป˜ ๋”์šฑ ๊นŠ์ด ์Œ๋ฏธํ•˜๋Š” ๋…์„œ ๊ฒฝํ—˜์„ ์ œ๊ณต**ํ•˜๊ณ ์ž ํ•ฉ๋‹ˆ๋‹ค. ์ Š์€ ์ธต์—๊ฒŒ ์นœ์ˆ™ํ•œ ์Œ์•…์„ ๋…์„œ์™€ ๊ฒฐํ•ฉํ•จ์œผ๋กœ์จ ๋…์„œ์— ๋Œ€ํ•œ ํฅ๋ฏธ์™€ ์ฆ๊ฑฐ์›€์„ ๋”ํ•˜๊ณ , ์žฅ๊ธฐ์ ์œผ๋กœ ๋…์„œ๋ฅผ ์žฅ๋ คํ•˜๋Š” ํ•˜๋‚˜์˜ ๋ฌธํ™”์  ์„œ๋น„์Šค๊ฐ€ ๋  ์ˆ˜ ์žˆ์„ ๊ฒƒ์œผ๋กœ ๊ธฐ๋Œ€ํ•˜๊ณ  ์žˆ์Šต๋‹ˆ๋‹ค.''') st.header('์ „์ฒด์ ์ธ ํ”„๋กœ์„ธ์Šค') st.markdown('--------------------------------------------------------------------------------------') st.image('https://velog.velcdn.com/images/jeo0534/post/23b53885-f063-4066-9b6d-b736a6a846c9/image.png') st.markdown('--------------------------------------------------------------------------------------') st.markdown('##### 1๏ธโƒฃ ์ฝ๊ณ  ์žˆ๋Š” ๋„์„œ ์ž…๋ ฅ') st.markdown('๋…ธ๋ž˜๋ฅผ ์ถ”์ฒœ ๋ฐ›๊ณ  ์‹ถ์€ ๋„์„œ์˜ ์ œ๋ชฉ ์ž…๋ ฅ') st.markdown('') st.markdown('##### 2๏ธโƒฃ ์ž…๋ ฅํ•œ ๋„์„œ์™€ ๋…ธ๋ž˜ ๊ฐ„ ์œ ์‚ฌ๋„ ๋ถ„์„') st.markdown('๋„์„œ์™€ ๋…ธ๋ž˜์˜ **โ‘ ๊ฐ์ •์  ํŠน์„ฑ** + **โ‘ก๋‚ด์šฉ ํ‚ค์›Œ๋“œ**๋ฅผ ๊ธฐ๋ฐ˜์œผ๋กœ ์œ ์‚ฌ๋„๋ฅผ ๊ณ„์‚ฐํ•˜๋Š” Content-based Filtering (CBF) ๋ฐฉ์‹์„ ์‚ฌ์šฉ') st.markdown('') st.markdown('##### 3๏ธโƒฃ ์œ ์‚ฌ๋„๊ฐ€ ๋†’์€ ์ˆœ์„œ๋Œ€๋กœ ์ถ”์ฒœ ๋…ธ๋ž˜ ํ”Œ๋ ˆ์ด๋ฆฌ์ŠคํŠธ ์ œ๊ณต') st.header('**Example**') with st.expander("๋‚ ์”จ๊ฐ€ ์ข‹์œผ๋ฉด ์ฐพ์•„๊ฐ€๊ฒ ์–ด์š”"): st.markdown(' ') st.markdown('๊ฐ์ •์  ํŠน์„ฑ ์œ ์‚ฌ๋„์™€ ๋‚ด์šฉ ์œ ์‚ฌ๋„์— ์—ฌ๋Ÿฌ๊ฐ€์ง€ ๊ฐ€์ค‘์น˜๋ฅผ ๋ถ€์—ฌํ•˜์—ฌ ๊ฒฐ๊ณผ๋ฅผ ๊ฐœ์„ ํ•ด ๋ณธ ๊ฒฐ๊ณผ') st.markdown('**- audio feature ๊ฐ์ • : ๊ฐ€์‚ฌ ๊ฐ์ • : ๊ฐ€์‚ฌ ๋‚ด์šฉ = 0.8 : 0.1 : 0.1 ( audio feature ๊ธฐ๋ฐ˜ ์œ ์‚ฌ๋„ ์ค‘์‹ฌ)**') st.markdown('**- audio feature ๊ฐ์ • : ๊ฐ€์‚ฌ ๊ฐ์ • : ๊ฐ€์‚ฌ ๋‚ด์šฉ = 0 : 0.5 : 0.5 ( ๊ฐ€์‚ฌ ๊ธฐ๋ฐ˜ ์œ ์‚ฌ๋„๋งŒ ์‚ฌ์šฉ)**') st.markdown('์˜ ๋น„์œจ๋กœ ๊ฐ€์ค‘์น˜๋ฅผ ๋ถ€์—ฌ ํ–ˆ์„ ๋•Œ ์ข‹์€ ์ถ”์ฒœ์ด ์ด๋ฃจ์–ด ์ง์„ ํ™•์ธ ํ•  ์ˆ˜ ์žˆ์—ˆ์Šต๋‹ˆ๋‹ค.') st.image('https://velog.velcdn.com/images/jeo0534/post/68833f04-55ba-4831-8dc3-ade372efd42e/image.png') st.divider() st.header('๐Ÿ“— ์ถ”์ฒœ ๋„์„œ ๋ฆฌ์ŠคํŠธ') with st.expander('AF : ๊ฐ€์‚ฌ : ํ‚ค์›Œ๋“œ = 0.8 : 0.1 : 0.1 ์ธ ๊ฒฝ์šฐ'): col1,col2,col3= st.columns([1,1,1]) with col1: st.markdown('**1. ์ฐธ์„ ์ˆ˜ ์—†๋Š” ์กด์žฌ์˜ ๊ฐ€๋ฒผ์›€**') st.image('https://velog.velcdn.com/images/jeo0534/post/71f6da54-1ac8-4581-8f77-f55ed5c56dbc/image.png') st.caption('์‚ฌ๋ž‘์€ ์€์œ ๋กœ ์‹œ์ž‘๋œ๋‹ค. ๋‹ฌ๋ฆฌ ๋งํ•˜์ž๋ฉด, ํ•œ ์—ฌ์ž๊ฐ€ ์–ธ์–ด๋ฅผ ํ†ตํ•ด ์šฐ๋ฆฌ์˜ ์‹œ์  ๊ธฐ์–ต์— ์•„๋กœ์ƒˆ๊ฒจ์ง€๋Š” ์ˆœ๊ฐ„, ์‚ฌ๋ž‘์€ ์‹œ์ž‘๋˜๋Š” ๊ฒƒ์ด๋‹ค.') st.caption('๊ทธ๋“ค์€ ์„œ๋กœ ์‚ฌ๋ž‘ํ–ˆ๋Š”๋ฐ๋„ ์ƒ๋Œ€๋ฐฉ์—๊ฒŒ ํ•˜๋‚˜์˜ ์ง€์˜ฅ์„ ์„ ์‚ฌํ–ˆ๋‹ค.') with col2: st.markdown('**2. ์ด์„ฑ๊ณผ ๊ฐ์„ฑ**') st.image('https://velog.velcdn.com/images/jeo0534/post/74f8850c-4d18-42f5-ac29-7e3a1f4f68f4/image.png') st.caption('"์ด์„ฑ"๊ณผ "๊ฐ์„ฑ"์ด๋ผ๋Š” ๋‘ ๊ฐ€์ง€ ์ธ๊ฐ„์„ฑ์„ ์—ฐ์• ์™€ ๊ฒฐํ˜ผ์ด๋ผ๋Š” ๋ณดํŽธ์  ์ฃผ์ œ๋ฅผ ํ†ตํ•œ ๊ณ ์ฐฐ') with col3: st.markdown('**3. ์ง€๊ธˆ, ๋งŒ๋‚˜๋Ÿฌ ๊ฐ‘๋‹ˆ๋‹ค**') st.image('https://velog.velcdn.com/images/jeo0534/post/fca0c4eb-51bb-4b3c-93d0-866ed37f60fc/image.png') st.caption('๋‹น์‹ ์—๊ฒ ์žˆ๋‚˜์š”? ๊ธฐ์ ๊ฐ™์€ ๋‹จ ํ•œ์‚ฌ๋žŒ') st.caption('๊ทธ ์‚ฌ๋žŒ์„ ๋‹ค์‹œ ํ•œ ๋ฒˆ ๋งŒ๋‚  ์ˆ˜ ์žˆ๋‹ค๋ฉด.') st.caption('') st.caption('๋” ์ด์ƒ ๋ณผ ์ˆ˜ ์—†๊ฒŒ ๋œ ๊ทธ๋ฆฌ์šด ์‚ฌ๋žŒ๊ณผ์˜ ๊ธฐ์  ๊ฐ™์€ ์žฌํšŒ๋ฅผ ๊ทธ๋ฆฐ๋‹ค. 1๋…„ ์ „ ์„ธ์ƒ์„ ๋– ๋‚œ ์•„๋‚ด ๋ฏธ์˜ค๋ฅผ ๊ทธ๋ฆฌ์›Œํ•˜๋ฉฐ ํ•˜๋ฃจํ•˜๋ฃจ๋ฅผ ๋ณด๋‚ด๋Š” ๋‹ค์ฟ ๋ฏธ๋Š” ๋น„ ์˜ค๋Š” ๋‚  ์•„๋“ค ์œ ์ง€์™€ ํ•จ๊ป˜ ์ฐพ์€ ์ˆฒ์†์—์„œ ๋†€๋ž๊ฒŒ๋„ ์ฃฝ์€ ๋ฏธ์˜ค์™€ ์žฌํšŒํ•œ๋‹ค. ์ด์•ผ๊ธฐ๋Š” ๋ˆ„๊ตฌ๋ณด๋‹ค ์ฐจ๊ทผ์ฐจ๊ทผ ๋งˆ์Œ์„ ์Œ“์•„๊ฐ€๋ฉฐ ๋А๋ฆฌ๊ฒŒ ์‚ฌ๋ž‘ํ•ด์˜จ ๋‘ ์‚ฌ๋žŒ์˜ ๊ณผ๊ฑฐ๋กœ ๊ฑฐ์Šฌ๋Ÿฌ ์˜ฌ๋ผ๊ฐ„๋‹ค.') col4,col5,col6= st.columns([1,1,1]) with col4: st.markdown('**4. ๋ชจ์ˆœ**') st.image('https://velog.velcdn.com/images/jeo0534/post/66e26a96-c2d5-4c15-a931-66093fb0798e/image.png') st.caption('์ธ์ƒ์€ ํƒ๊ตฌํ•˜๋ฉด์„œ ์‚ด์•„๊ฐ€๋Š” ๊ฒƒ์ด ์•„๋‹ˆ๋ผ, ์‚ด์•„๊ฐ€๋ฉด์„œ ํƒ๊ตฌํ•˜๋Š” ๊ฒƒ์ด๋‹ค. ์‹ค์ˆ˜๋Š” ๋˜ํ’€์ด๋œ๋‹ค. ๊ทธ๊ฒƒ์ด ์ธ์ƒ์ด๋‹คโ€ฆโ€ฆ.') st.caption('๋ฐ”๋กœ ๊ทธ ์ด์œ  ๋•Œ๋ฌธ์— ์‚ฌ๋ž‘์„ ์‹œ์ž‘ํ–ˆ๊ณ , ๋ฐ”๋กœ ๊ทธ ์ด์œ  ๋•Œ๋ฌธ์— ๋ฏธ์›Œํ•˜๊ฒŒ ๋œ๋‹ค๋Š”, ์ธ๊ฐ„์ด๋ž€ ์กด์žฌ์˜ ํ•œ์—†๋Š” ๋ชจ์ˆœ......') with col5: st.markdown('**5. ์‚ฌ๋ž‘์˜ ํŒŒ๊ดด**') st.image('https://velog.velcdn.com/images/jeo0534/post/0192b5f2-7218-42b7-a5dc-7fa66f654f1c/image.png') st.caption('์—˜๋ ˆ๋‚˜๋Š” ์ž์‹ ์„ ์œ„ํ•ด์„œ ๋‚ด๊ฐ€ ๋‚˜ ์ž์‹ ์„ ํŒŒ๊ดดํ•˜๊ธฐ๋ฅผ ์›ํ•˜๊ณ  ์žˆ์—ˆ๋‹ค.') st.caption('์‚ฌ๋ž‘ํ•˜๋Š” ๋งŒํผ ์‚ฌ๋ž‘๋ฐ›๊ณ ์ž ํ•˜๋Š” ์š•๋ง, ์ˆœ์ง„ํ•˜๊ธฐ์— ๋”์šฑ๋” ์ž”ํ˜นํ•œ ์œ ๋…„์˜ ์‚ฌ๋ž‘') with col6: st.markdown('**6. ์ œ์ธ์—์–ด**') st.image('https://velog.velcdn.com/images/jeo0534/post/e2c36e8d-7446-44af-a387-3f43e968d713/image.png') st.caption('์ˆœ์‘ํ•˜๊ณ  ์ธ๋‚ดํ•˜๋ฉฐ ๋ด‰์‚ฌํ•˜๋Š” ์—ฌ์„ฑ์ด ์ด์ƒ์ ์œผ๋กœ ์—ฌ๊ฒจ์ง€๋˜ ๋น…ํ† ๋ฆฌ์•„ ์‹œ๋Œ€์—, ํ˜„์‹ค์ ์ธ ์กฐ๊ฑด์ด๋‚˜ ๊ฐœ์ธ์  ์ž์งˆ์—์„œ ์ด์™€ ๋™๋–จ์–ด์ง„ ์—ฌ์„ฑ์ธ ์ œ์ธ์˜ ์„ฑ์žฅ์„ ํ†ตํ•ด ๋‹น๋Œ€ ์—ฌ์„ฑ์˜ ์‚ถ ์ „๋ฐ˜, ์ฆ‰ ์—ฌ์„ฑ์˜ ๊ต์œก, ๊ณ ์šฉ, ์‚ฌ๋ž‘, ๊ฒฐํ˜ผ์— ๋Œ€ํ•œ ์˜๋ฌธ') col7,col8,col9= st.columns([1,1,1]) with col7: st.markdown('**7. ๋ฌด์˜๋ฏธ์˜ ์ถ•์ œ**') st.image('https://velog.velcdn.com/images/jeo0534/post/8e967400-dc99-4d63-8ed1-94ab93396b0e/image.png') st.caption('๋ณด์ž˜๊ฒƒ์—†๋Š” ๊ฒƒ์„ ์‚ฌ๋ž‘ํ•ด์•ผ ํ•ด์š”,์‚ฌ๋ž‘ํ•˜๋Š” ๋ฒ•์„ ๋ฐฐ์›Œ์•ผ ํ•ด์š”.') st.caption('๋†๋‹ด๊ณผ ๊ฑฐ์ง“๋ง, ์˜๋ฏธ์™€ ๋ฌด์˜๋ฏธ, ์ผ์ƒ๊ณผ ์ถ•์ œ์˜ ๊ฒฝ๊ณ„์—์„œ์‚ถ๊ณผ ์ธ๊ฐ„์˜ ๋ณธ์งˆ์„ ๋ฐ”๋ผ๋ณด๋Š” ์‹œ์„ ') with col8: st.markdown('**8. 80์ผ๊ฐ„์˜ ์„ธ๊ณ„์ผ์ฃผ**') st.image('https://velog.velcdn.com/images/jeo0534/post/8875c9d4-d568-4242-a478-4358e97411df/image.png') st.caption('2๋งŒ ํŒŒ์šด๋“œ๋ฅผ ๊ฑธ๊ณ  80์ผ ๋™์•ˆ์˜ ์„ธ๊ณ„ ์ผ์ฃผ์— ๋‚˜์„  ์˜๊ตญ ์‹ ์‚ฌ ํ•„๋ฆฌ์–ด์Šค ํฌ๊ทธ. ') st.caption('๊ทธ๋Š” ๊ธฐ๊ณ„์ฒ˜๋Ÿผ ์ •ํ™•ํ•˜๊ณ  ๋ƒ‰์ •ํ•œ ์˜๊ตญ ์‹ ์‚ฌ๋‹ค. ํ•œ ์น˜์˜ ์˜ค์ฐจ๋„ ์—†์ด ์—ฌํ–‰์„ ๊ณ„ํšํ•˜๋Š” ์ฃผ์ธ๊ณต์„ ํ†ตํ•ด ์ฅ˜ ๋ฒ ๋ฅธ์€ ์น˜๋ฐ€ํ•˜๊ณ  ๊ณผํ•™์ ์ด๋ฉฐ ์ด์„ฑ์ ์ธ ์ธ๊ฐ„๊ณผ, ์ธ๊ฐ„์— ๋Œ€ํ•œ ์‹ ๋ขฐ์™€ ์• ์ • ๊ทธ๋ฆฌ๊ณ  ์„ธ๊ณ„์— ๋Œ€ํ•œ ๊ธ์ •์œผ๋กœ ์ฐจ ์žˆ๋Š” ์ธ๊ฐ„์ƒ์„ ๊ทธ๋ ค ๋‚ธ๋‹ค.') with col9: st.markdown('**9. ๋ชฌํ…Œํฌ๋ฆฌ์Šคํ†  ๋ฐฑ์ž‘**') st.image('https://velog.velcdn.com/images/jeo0534/post/aef3eb6b-e3d9-4745-8b7b-07e592e4637b/image.png') st.caption('๋ชจ๋“  ์•…์—๋Š” ๋‘ ๊ฐœ์˜ ์•ฝ์ด ์žˆ๋‹ค. ์‹œ๊ฐ„๊ณผ ์นจ๋ฌต์ด ๊ทธ๊ฒƒ์ด๋‹ค') st.caption('์ธ๊ฐ„์‚ฌ์—์„œ ๊ฐ€์žฅ ํฅ๊ฒจ์šด ์ด์•ผ๊ธฐ๋Š” ๋ถˆํ–‰์„ ๋”›๊ณ  ํ–‰๋ณต์„ ๋˜์ฐพ๋Š” ์ด์•ผ๊ธฐ๊ฐ€ ์•„๋‹๊นŒ?') st.caption('๋ชจ๋žต๊ณผ ํ•จ์ •์— ๋น ์ง€์ง€๋งŒ, ๋ถ€์™€ ๋ช…์˜ˆ๋ฅผ ํšŒ๋ณตํ•˜์—ฌ ํ™”๋ คํ•˜๊ฒŒ ๋ณต์ˆ˜ํ•œ๋‹ค๋Š” ์ด์•ผ๊ธฐ์— ์‚ฌ๋žŒ๋“ค์€ ์‰ฝ๊ฒŒ ์—ด๊ด‘ํ•œ๋‹ค.') st.caption('<๋ชฌํ…Œํฌ๋ฆฌ์Šคํ†  ๋ฐฑ์ž‘>์ด ๋Œ€ํ‘œ์ ์ธ ๊ฒฝ์šฐ. ๋ฐฐ์‹ , ์–ต์šธํ•œ ๊ฐ๊ธˆ, ๋ณต์ˆ˜ ์ด 3์š”์†Œ๋Š” ์‹œ๋Œ€๋ฅผ ๋ถˆ๋ฌธํ•˜๊ณ  ๋…์ž๋“ค์„ ๋งค๋ฃŒ์‹œ์ผฐ๋‹ค.') col10,col11,col12 = st.columns([1,1,1]) with col10: st.markdown('**10. ํŽ˜๋“œ๋ฅด์™€ ์ดํด๋ฆฌํŠธ**') st.image('https://velog.velcdn.com/images/jeo0534/post/fc8c56f5-5661-427b-bb39-70c6e6169fe4/image.png') st.caption('์ธ๊ฐ„์€ ์ง„์ • ์ž์‹ ์„ ์˜ฅ์ฃ„๋Š” ์ •๋…์œผ๋กœ๋ถ€ํ„ฐ ์Šค์Šค๋กœ๋ฅผ ๊ตฌํ•  ์˜์ง€๋„, ๋Šฅ๋ ฅ๋„ ์—†๋Š” ์กด์žฌ์ธ๊ฐ€.') st.caption('์—์šฐ๋ฆฌํ”ผ๋ฐ์Šค์˜ ใ€Œํžˆํด๋ฆฌํ† ์Šคใ€๋ฅผ ๋ฐ”ํƒ•์œผ๋กœ ์ •๋…์ด ์ง€๋‹Œ ํŒŒ๊ดด์  ๋ณธ์„ฑ,ํ†ต์ œํ•  ์ˆ˜ ์—†๋Š” ์ •๋…์— ๋น ์ง„ ํ•œ ์ธ๊ฐ„์ด ๋ณด์—ฌ ์ฃผ๋Š” ๊ฐ์ •์˜ ๊ฒฉ์ •์„ ํŒŒ๊ณ ๋“  ๋ผ์‹  ๋น„๊ทน์˜ ์ •์ˆ˜.') with col11: st.markdown('**11. ๊ฒฐํ˜ผใ†์—ฌ๋ฆ„**') st.image('https://velog.velcdn.com/images/jeo0534/post/81bbcf18-81e5-4952-853b-927bb8c2223d/image.png') st.caption('๊นŠ์ด ์‚ฌ๋ž‘ํ•˜๋Š” ์—ฌ์ธ์˜ ๋งค๋ ฅ์„ ํ•ญ๋ชฉ๋ณ„๋กœ ์กฐ๋ชฉ์กฐ๋ชฉ ์Š์„ ์ˆ˜ ์žˆ๊ฒ ๋Š”๊ฐ€?๊ทธ๋Ÿด ์ˆ˜ ์—†๋‹ค, ๊ทธ๋ƒฅ ์ „์ฒด๋ฅผ ์‚ฌ๋ž‘ํ•˜๋Š” ๊ฒƒ์ด๋‹ค.') st.caption('์นด๋ฎˆ ์‚ฌ์ƒ์˜ ํ•ต์‹ฌ์ธ โ€˜๋ถ€์กฐ๋ฆฌโ€™์™€ โ€˜๋ฐ˜ํ•ญโ€™์˜ ์ถœ๋ฐœ ๋ฐ ์™„์„ฑ ๊ณผ์ •์ด ์œก์„ฑ์œผ๋กœ ๋“ค๋ฆฌ๋Š” ๋“ฏํ•œ ์ž์ „์  ๊ธฐ๋ก') with st.expander('๊ฐ€์‚ฌ ์ค‘์‹ฌ์ธ ๊ฒฝ์šฐ'): col1,col2,col3= st.columns([1,1,1]) with col1: st.markdown('**1. ์ฐธ์„ ์ˆ˜ ์—†๋Š” ์กด์žฌ์˜ ๊ฐ€๋ฒผ์›€**') st.image('https://velog.velcdn.com/images/jeo0534/post/71f6da54-1ac8-4581-8f77-f55ed5c56dbc/image.png') st.caption('์‚ฌ๋ž‘์€ ์€์œ ๋กœ ์‹œ์ž‘๋œ๋‹ค. ๋‹ฌ๋ฆฌ ๋งํ•˜์ž๋ฉด, ํ•œ ์—ฌ์ž๊ฐ€ ์–ธ์–ด๋ฅผ ํ†ตํ•ด ์šฐ๋ฆฌ์˜ ์‹œ์  ๊ธฐ์–ต์— ์•„๋กœ์ƒˆ๊ฒจ์ง€๋Š” ์ˆœ๊ฐ„, ์‚ฌ๋ž‘์€ ์‹œ์ž‘๋˜๋Š” ๊ฒƒ์ด๋‹ค.') st.caption('๊ทธ๋“ค์€ ์„œ๋กœ ์‚ฌ๋ž‘ํ–ˆ๋Š”๋ฐ๋„ ์ƒ๋Œ€๋ฐฉ์—๊ฒŒ ํ•˜๋‚˜์˜ ์ง€์˜ฅ์„ ์„ ์‚ฌํ–ˆ๋‹ค.') with col2: st.markdown('**2. ์–ด๋ฆฐ์™•์ž**') st.image('https://velog.velcdn.com/images/jeo0534/post/93a93b74-d728-4727-81b5-7af32114a2aa/image.png') st.caption('๋„ค๊ฐ€ ์˜คํ›„ 4์‹œ์— ์˜จ๋‹ค๋ฉด ๋‚œ 3์‹œ๋ถ€ํ„ฐ ์„ค๋  ๊ฑฐ์•ผ. 4์‹œ๊ฐ€ ๊ฐ€๊นŒ์›Œ์งˆ์ˆ˜๋ก ์ ์  ๋” ํ–‰๋ณตํ•ด์ง€๊ฒ ์ง€. 4์‹œ๊ฐ€ ๋˜๋ฉด ๋‚œ ๊ฐ€์Šด์ด ๋‘๊ทผ๊ฑฐ๋ ค์„œ ์•ˆ์ ˆ๋ถ€์ ˆ๋ชปํ•˜๊ณ  ๊ฑฑ์ •์„ ํ•  ๊ฑฐ์•ผ. ํ–‰๋ณต์˜ ๋Œ€๊ฐ€๋ฅผ ์•Œ๊ฒŒ ๋˜๊ฒ ์ง€! ํ•˜์ง€๋งŒ ๋„ค๊ฐ€ ์•„๋ฌด ๋•Œ๋‚˜ ์˜จ๋‹ค๋ฉด ์–ธ์ œ๋ถ€ํ„ฐ ๋งˆ์Œ์˜ ์ค€๋น„๋ฅผ ํ•ด์•ผ ํ• ์ง€ ๋„๋ฌด์ง€ ์•Œ ์ˆ˜ ์—†์ž–์•„.') st.caption('์ˆœ์ˆ˜์„ฑ์„ ํ—ˆ๋ฝํ•˜์ง€ ์•Š๋Š” ์„ธ์ƒ์—์„œ ๋Š์ž„์—†์ด ๋ฐฉํ™ฉํ•˜๊ณ  ๊ณ ๋‡Œํ•œ ์ƒํ…์ฅํŽ˜๋ฆฌ. ๊ทธ๋Š” ์„ธ์ƒ์„ ๋ฐ”๊ฟ€ ์ˆ˜๋Š” ์—†์ง€๋งŒ ํฌ๋ง์„ ๊ทธ๋ฆฌ๊ณ  ์‹ถ์—ˆ๊ณ , ์ž์‹ ์ด ๋™๊ฒฝํ•˜๊ณ  ํฌ๋งํ•˜๋Š” ์‚ถ์„ โ€˜์–ด๋ฆฐ ์™•์žโ€™๋กœ ํ˜•์ƒํ™”ํ–ˆ๋‹ค.') with col3: st.markdown('**3. ๋ชฌํ…Œํฌ๋ฆฌ์Šคํ†  ๋ฐฑ์ž‘**') st.image('https://velog.velcdn.com/images/jeo0534/post/aef3eb6b-e3d9-4745-8b7b-07e592e4637b/image.png') st.caption('๋ชจ๋“  ์•…์—๋Š” ๋‘ ๊ฐœ์˜ ์•ฝ์ด ์žˆ๋‹ค. ์‹œ๊ฐ„๊ณผ ์นจ๋ฌต์ด ๊ทธ๊ฒƒ์ด๋‹ค') st.caption('์ธ๊ฐ„์‚ฌ์—์„œ ๊ฐ€์žฅ ํฅ๊ฒจ์šด ์ด์•ผ๊ธฐ๋Š” ๋ถˆํ–‰์„ ๋”›๊ณ  ํ–‰๋ณต์„ ๋˜์ฐพ๋Š” ์ด์•ผ๊ธฐ๊ฐ€ ์•„๋‹๊นŒ?') st.caption('๋ชจ๋žต๊ณผ ํ•จ์ •์— ๋น ์ง€์ง€๋งŒ, ๋ถ€์™€ ๋ช…์˜ˆ๋ฅผ ํšŒ๋ณตํ•˜์—ฌ ํ™”๋ คํ•˜๊ฒŒ ๋ณต์ˆ˜ํ•œ๋‹ค๋Š” ์ด์•ผ๊ธฐ์— ์‚ฌ๋žŒ๋“ค์€ ์‰ฝ๊ฒŒ ์—ด๊ด‘ํ•œ๋‹ค.') st.caption('<๋ชฌํ…Œํฌ๋ฆฌ์Šคํ†  ๋ฐฑ์ž‘>์ด ๋Œ€ํ‘œ์ ์ธ ๊ฒฝ์šฐ. ๋ฐฐ์‹ , ์–ต์šธํ•œ ๊ฐ๊ธˆ, ๋ณต์ˆ˜ ์ด 3์š”์†Œ๋Š” ์‹œ๋Œ€๋ฅผ ๋ถˆ๋ฌธํ•˜๊ณ  ๋…์ž๋“ค์„ ๋งค๋ฃŒ์‹œ์ผฐ๋‹ค.') col4,col5,col6= st.columns([1,1,1]) with col4: st.markdown('**4. ๋กœ๋ฏธ์˜ค์™€ ์ค„๋ฆฌ์—ฃ**') st.image('https://velog.velcdn.com/images/jeo0534/post/6edfa385-5695-4ba8-a14e-45ce4871f5ca/image.png') st.caption('์˜ค, ๋‘ฅ๊ทผ ๊ถค๋„ ์•ˆ์—์„œ ํ•œ ๋‹ฌ ๋‚ด๋‚ด ๋ณ€ํ•˜๋Š”์ง€์กฐ ์—†๋Š” ๋‹ฌ์—๊ฒŒ ๋งน์„ธํ•˜์ง„ ๋งˆ์„ธ์š”') st.caption('๋‹ค์ณ ๋ณธ ์  ์—†๋Š” ์ž๊ฐ€ ํ‰ํ„ฐ๋ฅผ ๋น„์›ƒ๋Š” ๋ฒ•โ€ฆ') st.caption('๋‹ฌ๋น› ์•„๋ž˜ ์ฃผ๊ณ ๋ฐ›์€ ์ฒซ ํ‚ค์Šค์™€ ์‚ฌ๋ž‘์˜ ๋งน์„ธ,์‚ด์•„ ์žˆ๋Š” ์ฃฝ์Œ์„ ํ†ตํ•ด ๋„๋‹ฌํ•˜๋Š” ์ฃฝ์Œ์„ ๋„˜์–ด์„œ๋Š” ์‚ฌ๋ž‘!์…ฐ์ต์Šคํ”ผ์–ด๊ฐ€ ๋นš์–ด๋‚ธ ์ˆœ์ˆ˜ํ•œ ์—ด์ •์˜ ๋น„๊ทน, ๊ทธ ์‚ฌ๋ž‘์˜ ๋ชจ์ˆœ์–ด๋ฒ•') with col5: st.markdown('**5. ์•„์ฃผ ํŽธ์•ˆํ•œ ์ฃฝ์Œ**') st.image('https://velog.velcdn.com/images/jeo0534/post/86c06fae-d47c-462f-b8bf-2fbfadcd9f33/image.png') st.caption('์—„๋งˆ๋Š” ์œ ๋…„ ์‹œ์ ˆ ๋‚ด๋‚ด ๊ทœ๋ฒ”๊ณผ ๊ธˆ๊ธฐ๋ผ๋Š” ๊ฐ‘์˜ท์„ ๋‘๋ฅธ ์ฑ„ ๋ชธ๊ณผ ๋งˆ์Œ, ์ •์‹ ์„ ์–ต์••๋‹นํ–ˆ๋‹ค. ๊ทธ๋ฆฌ๊ณ  ์Šค์Šค๋กœ๋ฅผ ๋ˆ์œผ๋กœ ์˜ญ์•„๋งค๋„๋ก ๊ต์œก๋ฐ›์•˜๋‹ค. ๊ทธ๋Ÿฐ ์—„๋งˆ์˜ ๋‚ด๋ฉด์—๋Š”๋“์–ด์˜ค๋ฅด๋Š” ํ”ผ์™€ ๋ถˆ๊ฐ™์€ ์ •์—ด์„ ์ง€๋‹Œ ํ•œ ์—ฌ์ธ์ด ์‚ด์•„ ์ˆจ ์‰ฌ๊ณ  ์žˆ์—ˆ๋‹ค. ๊ทธ๋Ÿฌ๋‚˜ ๊ทธ ์—ฌ์ธ์€ ๋’คํ‹€๋ฆฌ๊ณ  ํ›ผ์†๋œ ๋์— ์ž๊ธฐ ์ž์‹ ์—๊ฒŒ์กฐ์ฐจ ๋‚ฏ์„  ์กด์žฌ๊ฐ€ ๋˜์–ด ๋ฒ„๋ฆฐ ๋ชจ์Šต์ด์—ˆ๋‹ค.') st.caption('์ฃผ์ฒด์„ฑ์„ ํฌ๊ธฐํ•˜๋ฉฐ ํƒ€์ž๋กœ ์‚ด๋„๋ก ๊ฐ•์š”๋ฐ›์•„ ์˜จ ํ•œ ์ธ๊ฐ„์˜ ์ƒ์• , ๋‚˜์•„๊ฐ€ ๋‹น๋Œ€ ์—ฌ์„ฑ ์ „์ฒด์˜ ๋ชจ์Šต. ') st.caption('๋ƒ‰๋Œ€ํ•˜๋ฉฐ ์™ธ๋ฉดํ–ˆ๋˜ ์„ธ๊ณ„๋ฅผ ์ƒˆ๋กญ๊ฒŒ ์ธ์‹ํ•˜๋ฉฐ ์ž๊ธฐ ์ •์ฒด์„ฑ์˜ ์ผ๋ถ€๋กœ ๋ฐ›์•„๋“ค์ด๋Š” ๊ณผ์ •์ด๋ฉฐ, ๊ทธ์™€ ๋™์‹œ์— ๋‚จ๊ณผ ์—ฌ, ์œก์ฒด์™€ ์ •์‹ , ์‚ถ๊ณผ ์ฃฝ์Œ ๋“ฑ ๊ตฌ๋ณ„ ์ง“๊ธฐ๋กœ ๊ฐ€๋“ํ–ˆ๋˜ ์ธ๊ฐ„ ๋‚ด๋ฉด์˜ ๊ฒฝ๊ณ„๋ฅผ ํ—ˆ๋ฌด๋Š” ์ž‘ํ’ˆ.') with col6: st.markdown('**6. ๋ฌด์˜๋ฏธ์˜ ์ถ•์ œ**') st.image('https://velog.velcdn.com/images/jeo0534/post/8e967400-dc99-4d63-8ed1-94ab93396b0e/image.png') st.caption('๋ณด์ž˜๊ฒƒ์—†๋Š” ๊ฒƒ์„ ์‚ฌ๋ž‘ํ•ด์•ผ ํ•ด์š”,์‚ฌ๋ž‘ํ•˜๋Š” ๋ฒ•์„ ๋ฐฐ์›Œ์•ผ ํ•ด์š”.') st.caption('๋†๋‹ด๊ณผ ๊ฑฐ์ง“๋ง, ์˜๋ฏธ์™€ ๋ฌด์˜๋ฏธ, ์ผ์ƒ๊ณผ ์ถ•์ œ์˜ ๊ฒฝ๊ณ„์—์„œ์‚ถ๊ณผ ์ธ๊ฐ„์˜ ๋ณธ์งˆ์„ ๋ฐ”๋ผ๋ณด๋Š” ์‹œ์„ ') col7,col8,col9= st.columns([1,1,1]) with col7: st.markdown('**7. ์ž˜๋ชป ๊ฑธ๋ ค์˜จ ์ „ํ™”**') st.image('https://velog.velcdn.com/images/jeo0534/post/9fa4d386-e2fa-435d-91bc-7841c5ddd96e/image.png') st.caption('๊ทธ๋Ÿฐ ์‹์œผ๋กœ ์„ธ์›”์€ ํ˜๋Ÿฌ๊ฐˆ ๊ฒƒ์ด๋‹ค. ๊ทธ๋ฆฌ๊ณ  ์•…๋ชฝ ๊ฐ™๋˜ ๋‚ด ์ธ์ƒ์˜ ์žฅ๋ฉด๋“ค์ด ๋ˆˆ์— ์„ ํ•  ๊ฒƒ์ด๋‹ค. ๊ทธ๋Ÿฌ๋‚˜ ๋‚˜๋Š” ์ด์ œ ๊ทธ๊ฒƒ๋“ค๋กœ ์ธํ•ด ์•„ํŒŒํ•˜์ง€ ์•Š์„ ๊ฒƒ์ด๋‹ค.') st.caption('์ฃฝ์Œ, ์‚ฌ๋ž‘, ๊ทธ๋ฆฌ๊ณ  ์ƒ์‹ค"์•„๊ณ ํƒ€ ํฌ๋ฆฌ์Šคํ† ํ”„์˜ ์ž‘ํ’ˆ ์ค‘ ๊ฐ€์žฅ ๋‚ฏ์„ค๊ณ  ๋น„๋ฐ€์Šค๋Ÿฌ์šด ์•…๋ชฝ๊ณผ ์šฐํ™”" - ๋ฅด ๋ชฝ๋“œ(Le Monde)') with col8: st.markdown('**8. ํŒŒ์šฐ์ŠคํŠธ**') st.image('https://velog.velcdn.com/images/jeo0534/post/9bc7048f-9e21-43cc-8db8-a71bb1bc8dfa/image.png') st.caption('๋‚ด๊ฐ€ ๋„ˆ์˜ ๋…ธ์˜ˆ๊ฐ€ ๋˜์–ด ์ด ์„ธ์ƒ ๋ชจ๋“  ์˜ํ™”๋ฅผ ์ฒดํ—˜ํ•˜๊ฒŒ ํ•ด์ฃผ๋Š” ๋Œ€์‹ ๏ผŒ๋„ค๊ฐ€ ์–ด๋А ํ•œ์ˆœ๊ฐ„ `๋ฉˆ์ถ”์–ด๋ผ๏ผŽ๋„ˆ๋Š” ๋„ˆ๋ฌด๋„ ์•„๋ฆ„๋‹ต๋‹คโ€™๋ผ๋ฉฐ ํœด์‹์„ ์›ํ•˜๋ฉด ๊ทธ๋•Œ๋ถ€ํ„ฐ ๋„ˆ์˜ ์˜ํ˜ผ์€ ์˜์›ํžˆ ๋‚˜์˜ ๊ฒƒ์ด๋‹ค.') st.caption('์ง€์‹๊ณผ ํ•™๋ฌธ์— ์ ˆ๋งํ•œ ๋…ธํ•™์ž ํŒŒ์šฐ์ŠคํŠธ ๋ฐ•์‚ฌ์˜ ๋ฏธ๋ง(่ฟทๅฆ„)๊ณผ ๊ตฌ์›์˜ ์žฅ๊ตฌํ•œ ๋…ธ์ •์„ ๊ทธ๋ฆฐ๋‹ค. ์•…๋งˆ ๋ฉ”ํ”ผ์Šคํ† ํŽ ๋ ˆ์Šค์˜ ์œ ํ˜น์— ๋น ์ ธ ํ˜„์„ธ์˜ ์พŒ๋ฝ์„ ์ซ“์œผ๋ฉฐ ๋ฐฉํ™ฉํ•˜๋˜ ํŒŒ์šฐ์ŠคํŠธ๋Š” ๋งˆ์นจ๋‚ด ์ž˜๋ชป์„ ๊นจ๋‹ซ๊ณ  ์ฒœ์ƒ์˜ ๊ตฌ์›์„ ๋ฐ›๋Š”๋‹ค.') with col9: st.markdown('**9. ์–ด๋–ป๊ฒŒ๋“  ์ด๋ณ„**') st.image('https://velog.velcdn.com/images/jeo0534/post/10ca695a-924b-44be-a573-1227e1525510/image.png') st.caption('์ด ๊ณ„์ ˆ์€ ์กฐ๊ธˆ ๊ฐ€๋ฒผ์šด ์ ˆ๋ง์„ ์•“๊ธฐ์— ์–ผ๋งˆ๋‚˜ ์ฐฌ๋ž€ํ•œ๊ฐ€') st.caption('์‚ฌ๋ž‘, ๊ฒฐ๊ตญ์—๋Š” ์ด๋ณ„, ๋๋‚ด ๋ถˆ๊ฐ€ํ”ผํ•œ ๊ณ ๋…์ง€๊ทนํ•œ ์ƒ์ฒ˜ ์•ˆ์— ์›ƒ์Œ์„ ํ’ˆ์€ ์“ธ์“ธํ•œ ํ†ต์ฐฐ') @st.cache_data def load_data(): return pd.read_excel('final_data.xlsx',index_col=0) data = load_data() @st.cache_data def load_lyrics(): return pd.read_excel('lyrics.xlsx',index_col=0) lyrics = load_lyrics() @st.cache_data def load_model(): return joblib.load('SVM.pkl') model = load_model() ############################ st.header('1๏ธโƒฃ ์ฑ… ์ œ๋ชฉ์„ ์ž…๋ ฅํ•ด์ฃผ์„ธ์š”.') book_title = st.text_input(label = '์˜ˆ์‹œ) ๋‚ ์”จ๊ฐ€ ์ข‹์œผ๋ฉด ์ฐพ์•„๊ฐ€๊ฒ ์–ด์š”',value="",key='text') def reset(): st.session_state.text = "" reset = st.button('Reset',on_click=reset) if not book_title: con = st.container() con.caption('Result') con.error('์ฑ… ์ œ๋ชฉ์„ ์ž…๋ ฅํ•ด์ฃผ์„ธ์š”.',icon="โš ๏ธ") st.stop() rest_api_key = "41d651c93152d5ec054dc828cacfa671" url = "https://dapi.kakao.com/v3/search/book" header = {"authorization": "KakaoAK "+rest_api_key} querynum = {"query": book_title} try: response = requests.get(url, headers=header, params = querynum) content = response.text ์ฑ…์ •๋ณด = json.loads(content)['documents'][0] except: con = st.container() con.caption('Result') con.error('์กด์žฌํ•˜์ง€ ์•Š๋Š” ์ฑ…์ž…๋‹ˆ๋‹ค. ๋‹ค์‹œ ์ž…๋ ฅํ•ด์ฃผ์„ธ์š”.',icon="๐Ÿšจ") st.stop() book = pd.DataFrame({'title': ์ฑ…์ •๋ณด['title'], 'isbn': ์ฑ…์ •๋ณด['isbn'], 'authors': ์ฑ…์ •๋ณด['authors'], 'publisher': ์ฑ…์ •๋ณด['publisher']}) target_url = ์ฑ…์ •๋ณด['url'] response = requests.get(target_url) soup = bs(response.text, "html.parser") ์ฑ…์†Œ๊ฐœ = soup.select('#tabContent > div:nth-child(1) > div:nth-child(3) > p') ์ฑ…์†์œผ๋กœ = soup.select('#tabContent > div:nth-child(1) > div:nth-child(6) > p') ์„œํ‰ = soup.select('#tabContent > div:nth-child(1) > div:nth-child(7) > p') ์ฑ…์†Œ๊ฐœ = ์ฑ…์†Œ๊ฐœ[0].text ์ฑ…์†์œผ๋กœ = ์ฑ…์†์œผ๋กœ[0].text ์„œํ‰ = ์„œํ‰[0].text book['์ฑ…์†Œ๊ฐœ'] = ์ฑ…์†Œ๊ฐœ book['์ฑ…์†์œผ๋กœ'] = ์ฑ…์†์œผ๋กœ book['์„œํ‰'] = ์„œํ‰ img= soup.select('#tabContent > div:nth-child(1) > div.info_section.info_intro > div.wrap_thumb > span > img') img_src = img[0]['src'] col1, col2 = st.columns([1,2]) with col1: st.image(img_src,width=150) with col2: title = book['title'][0] author = book['authors'][0] publisher = book['publisher'][0] st.caption('์ œ๋ชฉ : '+ title) st.caption('์ €์ž : '+ author) st.caption('์ถœํŒ์‚ฌ : '+publisher) st.title('') text = '<'+title +'>์— ๋Œ€ํ•œ ์ •๋ณด๋ฅผ ๋ชจ์œผ๊ณ  ์žˆ๋Š” ์ค‘์ž…๋‹ˆ๋‹ค.' my_bar = st.progress(0, text=text) time.sleep(5) my_bar.progress(5, text='ใ€ฐ๏ธ5%ใ€ฐ๏ธ') time.sleep(1) my_bar.progress(30, text='ใ€ฐ๏ธ30%ใ€ฐ๏ธ') #์˜์–ด ๋ถˆ์šฉ์–ด ์‚ฌ์ „ stops = set(stopwords.words('english')) def hapus_url(text): mention_pattern = r'@[\w]+' cleaned_text = re.sub(mention_pattern, '', text) return re.sub(r'http\S+','', cleaned_text) #ํŠน์ˆ˜๋ฌธ์ž ์ œ๊ฑฐ #์˜์–ด ๋Œ€์†Œ๋ฌธ์ž, ์ˆซ์ž, ๊ณต๋ฐฑ๋ฌธ์ž(์ŠคํŽ˜์ด์Šค, ํƒญ, ์ค„๋ฐ”๊ฟˆ ๋“ฑ) ์•„๋‹Œ ๋ฌธ์ž๋“ค ์ œ๊ฑฐ def remove_special_characters(text, remove_digits=True): text=re.sub(r'[^a-zA-Z0-9\s]', '', text) return text #๋ถˆ์šฉ์–ด ์ œ๊ฑฐ def delete_stops(text): text = text.lower().split() text = ' '.join([word for word in text if word not in stops]) return text #ํ’ˆ์‚ฌ tag ๋งค์นญ์šฉ ํ•จ์ˆ˜ def get_wordnet_pos(treebank_tag): if treebank_tag.startswith('J'): return wordnet.ADJ elif treebank_tag.startswith('V'): return wordnet.VERB elif treebank_tag.startswith('N'): return wordnet.NOUN elif treebank_tag.startswith('R'): return wordnet.ADV else: return wordnet.NOUN #ํ’ˆ์‚ฌ ํƒœ๊น… + ํ‘œ์ œ์–ด ์ถ”์ถœ def tockenize(text): tokens=word_tokenize(text) pos_tokens=nltk.pos_tag(tokens) del tokens text_t=list() for _ in pos_tokens: text_t.append([_[0], get_wordnet_pos(_[1])]) del pos_tokens lemmatizer = WordNetLemmatizer() text = ' '.join([lemmatizer.lemmatize(word[0], word[1]) for word in text_t]) del lemmatizer return text def clean(text): text = remove_special_characters(text, remove_digits=True) text = delete_stops(text) text = tockenize(text) return text translator = Translator() for col in ['์ฑ…์†Œ๊ฐœ', '์ฑ…์†์œผ๋กœ', '์„œํ‰']: name = col+'_trans' if book[col].values == '': book[name] = '' continue book[name] = clean(translator.translate(hapus_url(book.loc[0, col])).text) del stops del translator total_text = book.loc[0, '์ฑ…์†Œ๊ฐœ_trans'] + book.loc[0, '์ฑ…์†์œผ๋กœ_trans'] + book.loc[0, '์„œํ‰_trans'] long = book.loc[0, '์ฑ…์†Œ๊ฐœ'] + book.loc[0, '์ฑ…์†์œผ๋กœ'] + book.loc[0, '์„œํ‰'] @st.cache_data def load_tweet(): return pd.read_csv('tweet_data_agumentation.csv', index_col = 0) df = load_tweet() tfidf_vect_emo = TfidfVectorizer() tfidf_vect_emo.fit_transform(df["content"]) del df total_text2 = tfidf_vect_emo.transform(pd.Series(total_text)) model.predict_proba(total_text2) sentiment = pd.DataFrame(model.predict_proba(total_text2),index=['prob']).T sentiment['๊ฐ์ •'] = ['empty','sadness','enthusiasm','worry','love','fun','hate','happiness','boredom','relief','anger'] del tfidf_vect_emo del model my_bar.progress(60, text='ใ€ฐ๏ธ60%ใ€ฐ๏ธ') # audio feature๋ž‘ text ๊ฐ์ • audio_data = data.iloc[:,-12:-1] sentiment_prob = sentiment['prob'] sentiment_prob.index = sentiment['๊ฐ์ •'] audio_data.columns = ['empty', 'sadness', 'enthusiasm', 'worry', 'love', 'fun', 'hate', 'happiness', 'boredom', 'relief', 'anger'] audio_data_1 = pd.concat([sentiment_prob,audio_data.T],axis=1).T col = ['book']+list(data['name']) cosine_sim_audio = cosine_similarity(audio_data_1) cosine_sim_audio_df = pd.DataFrame(cosine_sim_audio, index = col, columns=col) audio_sim = cosine_sim_audio_df['book'] del audio_data del cosine_sim_audio del cosine_sim_audio_df # ๊ฐ€์‚ฌ๋ž‘ text lyrics_data = data.iloc[:,5:-12] lyrics_data_1 = pd.concat([sentiment_prob,lyrics_data.T],axis=1).T cosine_sim_lyrics = cosine_similarity(lyrics_data_1) cosine_sim_lyrics_df = pd.DataFrame(cosine_sim_lyrics, index =col, columns=col) lyrics_sim = cosine_sim_lyrics_df['book'] del lyrics_data del lyrics_data_1 del cosine_sim_lyrics del cosine_sim_lyrics_df del sentiment_prob my_bar.progress(80, text='ใ€ฐ๏ธ80%ใ€ฐ๏ธ') # ํ‚ค์›Œ๋“œ๋ž‘ text keyword_data = data['key_word'] book_song_cont1 = pd.DataFrame({"text": total_text}, index = range(1)) book_song_cont2 = pd.DataFrame({"text": keyword_data}) keyword_data_1 = pd.concat([book_song_cont1, book_song_cont2], axis=0).reset_index(drop=True) tfidf_vect_cont = TfidfVectorizer() tfidf_matrix_cont = tfidf_vect_cont.fit_transform(keyword_data_1['text']) tfidf_array_cont = tfidf_matrix_cont.toarray() cosine_sim_keyword = cosine_similarity(tfidf_array_cont) cosine_sim_keyword_df = pd.DataFrame(cosine_sim_keyword, index = col, columns=col) keyword_sim = cosine_sim_keyword_df['book'] del total_text del keyword_data del book_song_cont1 del book_song_cont2 del keyword_data_1 del tfidf_vect_cont del tfidf_matrix_cont del tfidf_array_cont del cosine_sim_keyword del cosine_sim_keyword_df my_bar.progress(100, text='100%') # ์ „์ฒด ์œ ์‚ฌ๋„ ๊ณ„์‚ฐ total_sim = 0.8*audio_sim + 0.1*lyrics_sim + 0.1*keyword_sim total_sim_df = pd.DataFrame(total_sim[1:]) total_sim_df = total_sim_df.reset_index() total_sim_df.columns = ['name','book'] top_five = total_sim_df.sort_values(by='book',ascending=False)[:5] index = total_sim_df.sort_values(by='book',ascending=False)[:5].index.sort_values() del total_sim del total_sim_df artist = data.iloc[index][['url','name','Artist']] top_five_df = pd.merge(artist,top_five,on='name').sort_values(by='book',ascending=False).drop_duplicates() del artist del top_five total_sim = 0*audio_sim + 0.5*lyrics_sim + 0.5*keyword_sim total_sim_df_1 = pd.DataFrame(total_sim[1:]) total_sim_df_1 = total_sim_df_1.reset_index() total_sim_df_1.columns = ['name','book'] top_five_1 = total_sim_df_1.sort_values(by='book',ascending=False)[:5] index_1 = total_sim_df_1.sort_values(by='book',ascending=False)[:5].index.sort_values() del total_sim del total_sim_df_1 artist = data.iloc[index_1][['url','name','Artist']] top_five_df_1 = pd.merge(artist,top_five_1,on='name').sort_values(by='book',ascending=False).drop_duplicates() del artist del top_five_1 del data time.sleep(1) my_bar.empty() st.caption('์ฑ… ์†Œ๊ฐœ ์ค‘....') st.markdown(long[:300]+'...') st.markdown('') lyrics_list = [] for i in top_five_df['url']: lyrics_list.append(lyrics[i== lyrics['url']]['lyrics'].values[0]) for i in top_five_df_1['url']: lyrics_list.append(lyrics[i== lyrics['url']]['lyrics'].values[0]) lyrics_eng_list = [] for i in top_five_df['url']: lyrics_eng_list.append(lyrics[i== lyrics['url']]['lyrics_english'].values[0]) for i in top_five_df_1['url']: lyrics_eng_list.append(lyrics[i== lyrics['url']]['lyrics_english'].values[0]) del lyrics st.header('2๏ธโƒฃ ๊ฒฐ๊ณผ') st.subheader('๐Ÿ™‚ ๋„์„œ์™€ ๋ถ„์œ„๊ธฐ๊ฐ€ ์œ ์‚ฌํ•œ ๋…ธ๋ž˜') st.caption('AF : ๊ฐ€์‚ฌ : ํ‚ค์›Œ๋“œ = 0.8 : 0.1 : 0.1') tab1, tab2, tab3, tab4, tab5= st.tabs(['TOP 1' , 'TOP 2', 'TOP 3', 'TOP 4', 'TOP 5']) with tab1: st.subheader('๐Ÿฅ‡ TOP 1') st.markdown('**์ œ๋ชฉ** : {0}'.format(top_five_df.iloc[0]['name'])) st.markdown('**๊ฐ€์ˆ˜** : {0} '.format(top_five_df.iloc[0]['Artist'])) st.markdown('**url** : {0} '.format(top_five_df.iloc[0]['url'])) st.markdown('**์œ ์‚ฌ๋„** : {0:.4f}'.format(top_five_df.iloc[0]['book'])) with st.expander('๊ฐ€์‚ฌ'): st.caption('์›๋ณธ ver') st.markdown(lyrics_list[0]) st.caption('์˜์–ด ver') st.markdown(lyrics_eng_list[0]) st.markdown('') with tab2: st.subheader('๐Ÿฅˆ TOP 2') st.markdown('**์ œ๋ชฉ** : {0}'.format(top_five_df.iloc[1]['name'])) st.markdown('**๊ฐ€์ˆ˜** : {0} '.format(top_five_df.iloc[1]['Artist'])) st.markdown('**url** : {0} '.format(top_five_df.iloc[1]['url'])) st.markdown('**์œ ์‚ฌ๋„** : {0:.4f}'.format(top_five_df.iloc[1]['book'])) with st.expander('๊ฐ€์‚ฌ'): st.caption('์›๋ณธ ver') st.markdown(lyrics_list[1]) st.caption('์˜์–ด ver') st.markdown(lyrics_eng_list[1]) st.markdown('') with tab3: st.subheader('๐Ÿฅ‰ TOP 3') st.markdown('**์ œ๋ชฉ** : {0}'.format(top_five_df.iloc[2]['name'])) st.markdown('**๊ฐ€์ˆ˜** : {0} '.format(top_five_df.iloc[2]['Artist'])) st.markdown('**url** : {0} '.format(top_five_df.iloc[2]['url'])) st.markdown('**์œ ์‚ฌ๋„** : {0:.4f}'.format(top_five_df.iloc[2]['book'])) with st.expander('๊ฐ€์‚ฌ'): st.caption('์›๋ณธ ver') st.markdown(lyrics_list[2]) st.caption('์˜์–ด ver') st.markdown(lyrics_eng_list[2]) st.markdown('') with tab4: st.subheader('TOP 4') st.markdown('**์ œ๋ชฉ** : {0}'.format(top_five_df.iloc[3]['name'])) st.markdown('**๊ฐ€์ˆ˜** : {0} '.format(top_five_df.iloc[3]['Artist'])) st.markdown('**url** : {0} '.format(top_five_df.iloc[3]['url'])) st.markdown('**์œ ์‚ฌ๋„** : {0:.4f}'.format(top_five_df.iloc[3]['book'])) with st.expander('๊ฐ€์‚ฌ'): st.caption('์›๋ณธ ver') st.markdown(lyrics_list[3]) st.caption('์˜์–ด ver') st.markdown(lyrics_eng_list[3]) st.markdown('') with tab5: st.subheader('TOP 5') st.markdown('**์ œ๋ชฉ** : {0}'.format(top_five_df.iloc[4]['name'])) st.markdown('**๊ฐ€์ˆ˜** : {0} '.format(top_five_df.iloc[4]['Artist'])) st.markdown('**url** : {0} '.format(top_five_df.iloc[4]['url'])) st.markdown('**์œ ์‚ฌ๋„** : {0:.4f}'.format(top_five_df.iloc[4]['book'])) with st.expander('๊ฐ€์‚ฌ'): st.caption('์›๋ณธ ver') st.markdown(lyrics_list[4]) st.caption('์˜์–ด ver') st.markdown(lyrics_eng_list[4]) st.subheader('๐Ÿ“– ๋„์„œ์™€ ๋‚ด์šฉ์ด ์œ ์‚ฌํ•œ ๋…ธ๋ž˜') st.caption('AF : ๊ฐ€์‚ฌ : ํ‚ค์›Œ๋“œ = 0 : 0.5 : 0.5') tab1, tab2, tab3, tab4, tab5= st.tabs(['TOP 1' , 'TOP 2', 'TOP 3', 'TOP 4', 'TOP 5']) with tab1: st.subheader('๐Ÿฅ‡ TOP 1') st.markdown('**์ œ๋ชฉ** : {0}'.format(top_five_df_1.iloc[0]['name'])) st.markdown('**๊ฐ€์ˆ˜** : {0} '.format(top_five_df_1.iloc[0]['Artist'])) st.markdown('**url** : {0} '.format(top_five_df_1.iloc[0]['url'])) st.markdown('**์œ ์‚ฌ๋„** : {0:.4f}'.format(top_five_df_1.iloc[0]['book'])) with st.expander('๊ฐ€์‚ฌ'): st.caption('์›๋ณธ ver') st.markdown(lyrics_list[5]) st.caption('์˜์–ด ver') st.markdown(lyrics_eng_list[5]) st.markdown('') with tab2: st.subheader('๐Ÿฅˆ TOP 2') st.markdown('**์ œ๋ชฉ** : {0}'.format(top_five_df_1.iloc[1]['name'])) st.markdown('**๊ฐ€์ˆ˜** : {0} '.format(top_five_df_1.iloc[1]['Artist'])) st.markdown('**url** : {0} '.format(top_five_df_1.iloc[1]['url'])) st.markdown('**์œ ์‚ฌ๋„** : {0:.4f}'.format(top_five_df_1.iloc[1]['book'])) with st.expander('๊ฐ€์‚ฌ'): st.caption('์›๋ณธ ver') st.markdown(lyrics_list[6]) st.caption('์˜์–ด ver') st.markdown(lyrics_eng_list[6]) st.markdown('') with tab3: st.subheader('๐Ÿฅ‰ TOP 3') st.markdown('**์ œ๋ชฉ** : {0}'.format(top_five_df_1.iloc[2]['name'])) st.markdown('**๊ฐ€์ˆ˜** : {0} '.format(top_five_df_1.iloc[2]['Artist'])) st.markdown('**url** : {0} '.format(top_five_df_1.iloc[2]['url'])) st.markdown('**์œ ์‚ฌ๋„** : {0:.4f}'.format(top_five_df_1.iloc[2]['book'])) with st.expander('๊ฐ€์‚ฌ'): st.caption('์›๋ณธ ver') st.markdown(lyrics_list[7]) st.caption('์˜์–ด ver') st.markdown(lyrics_eng_list[7]) st.markdown('') with tab4: st.subheader('TOP 4') st.markdown('**์ œ๋ชฉ** : {0}'.format(top_five_df_1.iloc[3]['name'])) st.markdown('**๊ฐ€์ˆ˜** : {0} '.format(top_five_df_1.iloc[3]['Artist'])) st.markdown('**url** : {0} '.format(top_five_df_1.iloc[3]['url'])) st.markdown('**์œ ์‚ฌ๋„** : {0:.4f}'.format(top_five_df_1.iloc[3]['book'])) with st.expander('๊ฐ€์‚ฌ'): st.caption('์›๋ณธ ver') st.markdown(lyrics_list[8]) st.caption('์˜์–ด ver') st.markdown(lyrics_eng_list[8]) st.markdown('') with tab5: st.subheader('TOP 5') st.markdown('**์ œ๋ชฉ** : {0}'.format(top_five_df_1.iloc[4]['name'])) st.markdown('**๊ฐ€์ˆ˜** : {0} '.format(top_five_df_1.iloc[4]['Artist'])) st.markdown('**url** : {0} '.format(top_five_df_1.iloc[4]['url'])) st.markdown('**์œ ์‚ฌ๋„** : {0:.4f}'.format(top_five_df_1.iloc[4]['book'])) with st.expander('๊ฐ€์‚ฌ'): st.caption('์›๋ณธ ver') st.markdown(lyrics_list[9]) st.caption('์˜์–ด ver') st.markdown(lyrics_eng_list[9]) st.header('3๏ธโƒฃ ๋„์„œ ๋Œ€์ถœ๊ฐ€๋Šฅ์—ฌ๋ถ€') api_key = "6a4e438d1c66bb40ae6eb1fd83b134197ad1a274907b3804d0f2996de7c3e59c" isbn = book['isbn'][0] isbn = isbn.split(" ")[1] #isbn = "9788937460951" #๋„์„œ ISBN areacode = "11" #์„œ์šธ my_bar = st.progress(0, text='๋„์„œ ๋Œ€์ถœ๊ฐ€๋Šฅ ์—ฌ๋ถ€์— ๋Œ€ํ•ด ์ •๋ณด๋ฅผ ๋ชจ์œผ๋Š” ์ค‘์ž…๋‹ˆ๋‹ค.') # ๋„์„œ ์†Œ์žฅ ๋„์„œ๊ด€ ์กฐํšŒ API ์š”์ฒญ lib_url = f"http://data4library.kr/api/libSrchByBook?authKey={api_key}&isbn={isbn}®ion={areacode}" response = requests.get(lib_url) root = ET.fromstring(response.text) num_found_element = root.find(".//numFound") if num_found_element is not None: num_found = int(num_found_element.text) else: num_found = 0 # ๊ฒฐ๊ณผ๋ฅผ ๋‹ด์„ ๋ฆฌ์ŠคํŠธ ์ดˆ๊ธฐํ™” results = [] # ํŽ˜์ด์ง€ ํฌ๊ธฐ ์„ค์ • page_size = 100 #์ œํ•œ ์—†์ด ๋ณผ ์ˆ˜ ์žˆ๋Š” ํฌ๊ธฐ num_pages = (num_found + page_size - 1) // page_size # ํŽ˜์ด์ง€ ์ˆ˜ ๊ณ„์‚ฐ # ๊ฐ ํŽ˜์ด์ง€๋ณ„๋กœ ์š”์ฒญํ•˜์—ฌ ๊ฒฐ๊ณผ ๋ฆฌ์ŠคํŠธ์— ์ถ”๊ฐ€ my_bar.progress(40, text='๋„์„œ ๋Œ€์ถœ๊ฐ€๋Šฅ ์—ฌ๋ถ€์— ๋Œ€ํ•ด ์ •๋ณด๋ฅผ ๋ชจ์œผ๋Š” ์ค‘์ž…๋‹ˆ๋‹ค.') for page in range(1, num_pages + 1): url_page=f"{lib_url}&pageNo={page}&pageSize={page_size}" response_page=requests.get(url_page) root_page=ET.fromstring(response_page.text) libs=root_page.findall("libs/lib") for lib in libs: lib_name=lib.findtext("libName") address=lib.findtext("address") homepage_url=lib.findtext("homepage") # ๋Œ€์ถœ ๊ฐ€๋Šฅ ์—ฌ๋ถ€ ์กฐํšŒ API ์š”์ฒญ lib_code=lib.findtext("libCode") # ๋„์„œ๊ด€ ์ฝ”๋“œ ๊ฐ€์ ธ์˜ค๊ธฐ loan_url=f"http://data4library.kr/api/bookExist?authKey={api_key}&libCode={lib_code}&isbn13={isbn}" loan_response=requests.get(loan_url) loan_root=ET.fromstring(loan_response.text) has_book=loan_root.findtext("result/hasBook") # ๋Œ€์ถœ ๊ฐ€๋Šฅ ์—ฌ๋ถ€ ํ™•์ธ if has_book=="Y": loan_availables=loan_root.findall('result/loanAvailable') if len(loan_availables)>0: loan_available_str=", ".join(["Y" if available.text=="Y" else "N" for available in loan_availables]) else: loan_available_str="N" results.append([lib_name, address, homepage_url, loan_available_str]) my_bar.progress(80, text='๋„์„œ ๋Œ€์ถœ๊ฐ€๋Šฅ ์—ฌ๋ถ€์— ๋Œ€ํ•ด ์ •๋ณด๋ฅผ ๋ชจ์œผ๋Š” ์ค‘์ž…๋‹ˆ๋‹ค.') # ๋ฐ์ดํ„ฐ ํ”„๋ ˆ์ž„ ์ƒ์„ฑ ๋ฐ ์ถœ๋ ฅ ์˜ต์…˜ ๋ณ€๊ฒฝ df=pd.DataFrame(results, columns=["๋„์„œ๊ด€", "์ฃผ์†Œ", "ํ™ˆํŽ˜์ด์ง€ URL", "๋Œ€์ถœ ๊ฐ€๋Šฅ ์—ฌ๋ถ€"]) pd.set_option('display.max_rows', None) # ๋ชจ๋“  ํ–‰ ๋ณด์—ฌ์ฃผ๊ธฐ my_bar.progress(100, text='๋„์„œ ๋Œ€์ถœ๊ฐ€๋Šฅ ์—ฌ๋ถ€์— ๋Œ€ํ•ด ์ •๋ณด๋ฅผ ๋ชจ์œผ๋Š” ์ค‘์ž…๋‹ˆ๋‹ค.') time.sleep(2) my_bar.empty() st.divider() df = df.set_index('๋Œ€์ถœ ๊ฐ€๋Šฅ ์—ฌ๋ถ€') st.dataframe(df)