Spaces:
Sleeping
Sleeping
Upload 6 files
Browse files- app.py +29 -0
- features.joblib +3 -0
- label_encoders.joblib +3 -0
- predict_module.py +241 -0
- requirements.txt +11 -3
- view_predictor.joblib +3 -0
app.py
ADDED
|
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import streamlit as st
|
| 2 |
+
import pandas as pd
|
| 3 |
+
import joblib
|
| 4 |
+
from predict_module import extract_features_from_video_id, predict_view_count, visualize_result, predict_views
|
| 5 |
+
|
| 6 |
+
api_key = "AIzaSyAgkZQp9EqA6N49J7TCh6Q40mWyVIGBit8"
|
| 7 |
+
model = joblib.load("model/view_predictor.joblib")
|
| 8 |
+
|
| 9 |
+
st.title("๐ฌ YouTube ์กฐํ์ ์์ธก๊ธฐ")
|
| 10 |
+
|
| 11 |
+
video_id = st.text_input("YouTube ์์ ID๋ฅผ ์
๋ ฅํ์ธ์:")
|
| 12 |
+
|
| 13 |
+
if st.button("์์ธก ์์"):
|
| 14 |
+
try:
|
| 15 |
+
# โถ info = ์์ ์ ์ฒด ์ ๋ณด ํฌํจ
|
| 16 |
+
info = predict_views(video_id, api_key)
|
| 17 |
+
|
| 18 |
+
# โท features๋ง ์ถ์ถ
|
| 19 |
+
features = extract_features_from_video_id(video_id, api_key)
|
| 20 |
+
|
| 21 |
+
# โธ ์์ธก
|
| 22 |
+
predicted = predict_view_count(model, features)
|
| 23 |
+
|
| 24 |
+
# โน ์๊ฐํํ ๋ info๋ ๋๊น
|
| 25 |
+
html = visualize_result(video_id, features, predicted, info)
|
| 26 |
+
st.components.v1.html(html, height=1000)
|
| 27 |
+
|
| 28 |
+
except Exception as e:
|
| 29 |
+
st.error(f"โ ์ค๋ฅ ๋ฐ์: {e}")
|
features.joblib
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:499398a910ad766c26ea73fe5efaf6f71da2990d16d4f8e05740ab37b00de7a6
|
| 3 |
+
size 108
|
label_encoders.joblib
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:56634bf5e292b2396763f85be2cc6b46d28a2f2ceefcdef99dbe00f60e06901e
|
| 3 |
+
size 636
|
predict_module.py
ADDED
|
@@ -0,0 +1,241 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import pandas as pd
|
| 2 |
+
import numpy as np
|
| 3 |
+
import requests
|
| 4 |
+
import cv2
|
| 5 |
+
import mediapipe as mp
|
| 6 |
+
import torch
|
| 7 |
+
from PIL import Image
|
| 8 |
+
from io import BytesIO
|
| 9 |
+
from joblib import load
|
| 10 |
+
from transformers import AutoTokenizer, AutoModelForSequenceClassification
|
| 11 |
+
from youtube_transcript_api import YouTubeTranscriptApi
|
| 12 |
+
|
| 13 |
+
# ๋ชจ๋ธ ๋ฐ ๊ธฐํ ํ์ผ ๋ก๋
|
| 14 |
+
model = load('model/view_predictor.joblib')
|
| 15 |
+
_, _, le_cat = load('model/label_encoders.joblib')
|
| 16 |
+
feature_cols = load('model/features.joblib')
|
| 17 |
+
|
| 18 |
+
# ๊ฐ์ฑ ๋ถ์ ๋ชจ๋ธ
|
| 19 |
+
senti_model_name = "nlp04/korean_sentiment_analysis_kcelectra"
|
| 20 |
+
senti_tokenizer = AutoTokenizer.from_pretrained(senti_model_name)
|
| 21 |
+
senti_model = AutoModelForSequenceClassification.from_pretrained(senti_model_name)
|
| 22 |
+
senti_model.eval()
|
| 23 |
+
|
| 24 |
+
def sentiment_score(text):
|
| 25 |
+
if not text or pd.isna(text):
|
| 26 |
+
return 0.0
|
| 27 |
+
with torch.no_grad():
|
| 28 |
+
inputs = senti_tokenizer(text, return_tensors="pt", truncation=True)
|
| 29 |
+
outputs = senti_model(**inputs)
|
| 30 |
+
probs = torch.softmax(outputs.logits, dim=1).squeeze()
|
| 31 |
+
try:
|
| 32 |
+
return round(float(probs[2]) * 100, 1) # Positive
|
| 33 |
+
except IndexError:
|
| 34 |
+
return round(float(probs[1]) * 100, 1)
|
| 35 |
+
|
| 36 |
+
category_dict = {
|
| 37 |
+
'์์': ['์ฏ์', '์ฐจ๋ฐฅ์ด๋ผ', '๋จน๋ฐฉ', '๋จ๊ณจ', '์์นจ', '์ฅ์ฌ', '๋ง๋ค๊ธฐ', '์นผ๋ก๋ฆฌ', '๋ฒ ์ด๊ธ', '๊ณฑ์ฐฝ', '์คํ
์ดํฌ', '๊ณ ๊ธฐ',
|
| 38 |
+
'์ผ๊ฒน์ด', '์ฑ์ฌ๋น', 'ํธ์์ ', '์ด์์', '๋ผ๋ฉด', '๊น๋ฐฅ', '์นํจ', '๋ง์ง', '์ง๋ฐฅ', '๋ก๋ณถ์ด', '์์', '๊น์น',
|
| 39 |
+
'๊ด์ด', '๋ง๋', '๋๋ฉด', '์ฒ ํ', '๋ผ์ง', '์๋ฆฌ', '๊ฐ์', 'ํ์', '์ ์๋ฆฌ', '๋ ์ํผ', '๊น์น์ฐ๊ฐ'],
|
| 40 |
+
'์ฐ์/์ ๋ช
์ธ': ['์ตํ์ ', '์ดํด๋ฆฌ', '๊ฐ๊ทธ๋งจ', '๊ฐ๋ฏผ๊ฒฝ', '๋ค๋น์น', '์ด์งํ', '์ฌ์', '์์ด๋', '๋ค๋์นด', '์ ๋',
|
| 41 |
+
'์ ์ฌ์', 'ํ๊ณ๊ณ ', '์กฐ์ธํธ', '์ฅ์๋', '๊น๊ตฌ๋ผ', '๊น์์ฒ ', '์ฐ์์ธ', '๋ฐฐ์ฐ', '์คํ', '์ถ์ฐ', '์ญ์ธ',
|
| 42 |
+
'๊ฐ์', '๋
ธ๋', '์ฝ์ํธ', '์ด์น์ฒ '],
|
| 43 |
+
'๊ต์ก/๊ณต๋ถ': ['์ผ์ฐจ๋ฐฉ์ ์', '์ด์ฐจ๋ฐฉ์ ์', '๋ฎ์', '์ธ์๋ถํด', '์ง์', '๋ง์ถค๋ฒ', 'ํ๊ตญ์ฌ', '๊ณผํ', '๊ณผ์ธ', '์ํ',
|
| 44 |
+
'์์
', '๊ณต๋ถ', '์ญ์ฌ', '๊ณต๋ถ์', '์๋ฅ', 'ํด์ฆ', '์คํฐ๋', '์ ์๋', '์ํ', '์ง์', '๋ฌธ์ ',
|
| 45 |
+
'์ผ์ฐจํจ์', '์ด์ฐจํจ์', '๋ฐฉ์ ์', '๊ฒ์ ๊ณ ์', '์์ด', '๊ตญ์ด', 'ํ๊ตญ์ด', '์์ธ๋'],
|
| 46 |
+
'์ฌํ/์ฅ์': ['๋๋ฐ์ด', 'ํด๊ฐ', '์ ๊ตญ', '์ฌํ', 'ํฌ์ด', '์ธ๊ณ', '์งํ์ฒ ', 'ํ๊ฐ', '์นดํ', '์ฝ์ค', 'ํ์์ด',
|
| 47 |
+
'๋์ฟ', '๋ชฝ๊ณจ', '์ผ๋ณธ', '์ค์ฌ์นด', '์ ์ฃผ', '์ ์ฃผ', '์ ์ฃผ๋', '์์ธ', '๋ฏผ๋ฐ', '๋ฏธ๊ตญ', '๋๋ง',
|
| 48 |
+
'ํ๋ฆฌ', '์คํ์ธ', '์ธ๋ฆ๋', 'ํ์ฝฉ'],
|
| 49 |
+
'์ผ์/๊ฐ์กฑ': ['๊ฐ์กฑ', '์๋ง', '์๋น ', '๋จํธ', '์์', '๋ชจ๋
', 'ํผ์', 'ํ๋ฃจ', '์ผ์', '์ฌ๋', '์์ด', '๊ณต์ ',
|
| 50 |
+
'ํ์ฅ', '๋ถ๋ถ', '๊ฐ์ฅ', '์ด๋จธ๋', '์กฐ์นด', '๊ฐ์', '์๋ค', '๊ฒฐํผ์'],
|
| 51 |
+
'์ฝํ
์ธ /์ ํ๋ธ': ['์๋ฅ', '์์ฆ', '๋ฆฌ๋ทฐ', '๋ผ์ด๋ธ', '๋ฐฉ์ก', '์์', '์ฑ๋', '๊ฒ์', '์ ํ๋ธ', '์๋ฐฉ์ก',
|
| 52 |
+
'์ดฌ์', '์ฝํ
์ธ ', '๋๊ธ', '์ผํ'],
|
| 53 |
+
'์ ์น': ['๋์ ', '๊ณต์ฝ', '์์ฒ ์', '๊ตญํ', '์ ์น', '๋ํต๋ น', '์ ๊ฑฐ', '์ ๋น', '์์'],
|
| 54 |
+
'๊ฒฝ์ ': ['์ฃผ์', '๋นํธ์ฝ์ธ', '์ฝ์ธ', '์ ๋ฌผ', '๋ถ์', 'ํฌ์', '๊ฒฝ์ ', '๊ธ์ต', '๊ด๊ณ ', '๋์ถ', '์ํ', '์์ฅ'],
|
| 55 |
+
'๊ฑด๊ฐ/์ด๋': ['์ด๋', '๊ฑด๊ฐ', '๋ค์ด์ดํธ', 'ํฌ์ค', '์คํธ๋ ์นญ', '์๊ฐ', '์ฒด๋ ฅ', 'ํผํธ๋์ค', '๋ฌ๋ฆฌ๊ธฐ', '๊ทผ๋ ฅ', '์๋จ'],
|
| 56 |
+
'์ธ๊ฐ๊ด๊ณ/๊ณ ๋ฏผ': ['์ฐ์ ', '๊ณ ๋ฐฑ', '์๊ฐํ
', '๋ฐ์ดํธ', '์๋ก', '๊ณ ๋ฏผ']
|
| 57 |
+
}
|
| 58 |
+
|
| 59 |
+
# ์ ๋ชฉ ํค์๋ ๊ธฐ๋ฐ ๋ถ๋ฅ ํจ์
|
| 60 |
+
def classify_by_keywords(title, keyword_dict):
|
| 61 |
+
for category, keywords in keyword_dict.items():
|
| 62 |
+
for keyword in keywords:
|
| 63 |
+
if keyword in title:
|
| 64 |
+
return category
|
| 65 |
+
return None
|
| 66 |
+
|
| 67 |
+
# ์ ํ๋ธ ์นดํ
๊ณ ๋ฆฌ + ํค์๋ ๊ธฐ๋ฐ์ผ๋ก ์ฌ์ฉ์ ์นดํ
๊ณ ๋ฆฌ ๋ถ๋ฅ
|
| 68 |
+
def map_category(category_id, title, api_key):
|
| 69 |
+
# ์ ํ๋ธ ์นดํ
๊ณ ๋ฆฌ ์ด๋ฆ ๊ฐ์ ธ์ค๊ธฐ
|
| 70 |
+
url = f'https://www.googleapis.com/youtube/v3/videoCategories?part=snippet&id={category_id}®ionCode=KR&key={api_key}'
|
| 71 |
+
try:
|
| 72 |
+
res = requests.get(url).json()
|
| 73 |
+
yt_category = res['items'][0]['snippet']['title']
|
| 74 |
+
except:
|
| 75 |
+
yt_category = "๊ธฐํ"
|
| 76 |
+
|
| 77 |
+
# ์ ํ๋ธ ์นดํ
๊ณ ๋ฆฌ๋ช
โ ์ฌ์ฉ์ ์นดํ
๊ณ ๋ฆฌ ๋งคํ
|
| 78 |
+
category_map = {
|
| 79 |
+
"์ํ/์ ๋๋ฉ์ด์
": "์ฝํ
์ธ /์ ํ๋ธ",
|
| 80 |
+
"์์
": "์ฐ์/์ ๋ช
์ธ",
|
| 81 |
+
"์ํฐํ
์ธ๋จผํธ": "์ฝํ
์ธ /์ ํ๋ธ",
|
| 82 |
+
"์ฝ๋ฏธ๋": "์ฝํ
์ธ /์ ํ๋ธ",
|
| 83 |
+
"์ธ๋ฌผ/๋ธ๋ก๊ทธ": "์ฐ์/์ ๋ช
์ธ",
|
| 84 |
+
"๊ฒ์": "์ฝํ
์ธ /์ ํ๋ธ",
|
| 85 |
+
"๋
ธํ์ฐ/์คํ์ผ": "์ผ์/๊ฐ์กฑ",
|
| 86 |
+
"๋ด์ค/์ ์น": "์ ์น",
|
| 87 |
+
"๊ต์ก": "๊ต์ก/๊ณต๋ถ",
|
| 88 |
+
"๊ณผํ/๊ธฐ์ ": "๊ต์ก/๊ณต๋ถ",
|
| 89 |
+
"์คํฌ์ธ ": "๊ฑด๊ฐ/์ด๋",
|
| 90 |
+
"์๋์ฐจ": "๊ธฐํ",
|
| 91 |
+
"๋๋ฌผ": "๊ธฐํ",
|
| 92 |
+
"์ฌํ": "์ฌํ/์ฅ์"
|
| 93 |
+
}
|
| 94 |
+
mapped_category = category_map.get(yt_category, None)
|
| 95 |
+
|
| 96 |
+
# ํค์๋ ๊ธฐ๋ฐ ๋ณด์ ๋ถ๋ฅ
|
| 97 |
+
keyword_category = classify_by_keywords(title, category_dict)
|
| 98 |
+
|
| 99 |
+
# ์ต์ข
์ฐ์ ์์ ์ ์ฉ
|
| 100 |
+
return keyword_category or mapped_category or "๊ธฐํ"
|
| 101 |
+
|
| 102 |
+
def hue_to_color_group(hue_value):
|
| 103 |
+
if 0 <= hue_value < 15 or hue_value >= 345:
|
| 104 |
+
return "๋นจ๊ฐ ๊ณ์ด"
|
| 105 |
+
elif 15 <= hue_value < 45:
|
| 106 |
+
return "์ฃผํฉ/๋
ธ๋ ๊ณ์ด"
|
| 107 |
+
elif 45 <= hue_value < 75:
|
| 108 |
+
return "์ฐ๋/์ด๋ก ๊ณ์ด"
|
| 109 |
+
elif 75 <= hue_value < 165:
|
| 110 |
+
return "์ด๋ก/ํ๋ ๊ณ์ด"
|
| 111 |
+
elif 165 <= hue_value < 255:
|
| 112 |
+
return "ํ๋/๋จ์ ๊ณ์ด"
|
| 113 |
+
elif 255 <= hue_value < 285:
|
| 114 |
+
return "๋ณด๋ผ ๊ณ์ด"
|
| 115 |
+
elif 285 <= hue_value < 345:
|
| 116 |
+
return "๋ถํ ๊ณ์ด"
|
| 117 |
+
else:
|
| 118 |
+
return "๊ธฐํ"
|
| 119 |
+
|
| 120 |
+
def analyze_thumbnail(thumbnail_url):
|
| 121 |
+
response = requests.get(thumbnail_url)
|
| 122 |
+
img = Image.open(BytesIO(response.content)).convert('RGB')
|
| 123 |
+
img_np = np.array(img)
|
| 124 |
+
hsv = cv2.cvtColor(img_np, cv2.COLOR_RGB2HSV)
|
| 125 |
+
hue_avg = int(np.mean(hsv[:, :, 0]) * 2)
|
| 126 |
+
|
| 127 |
+
# ์ผ๊ตด ์ ๊ฒ์ถ
|
| 128 |
+
mp_face = mp.solutions.face_detection
|
| 129 |
+
with mp_face.FaceDetection(model_selection=1, min_detection_confidence=0.3) as fd:
|
| 130 |
+
results = fd.process(cv2.cvtColor(img_np, cv2.COLOR_RGB2BGR))
|
| 131 |
+
face_count = len(results.detections) if results.detections else 0
|
| 132 |
+
|
| 133 |
+
return hue_to_color_group(hue_avg), face_count, hue_avg
|
| 134 |
+
|
| 135 |
+
def predict_views(video_id, api_key):
|
| 136 |
+
url = f'https://www.googleapis.com/youtube/v3/videos?part=snippet,statistics&id={video_id}&key={api_key}'
|
| 137 |
+
res = requests.get(url).json()
|
| 138 |
+
item = res['items'][0]
|
| 139 |
+
|
| 140 |
+
title = item['snippet']['title']
|
| 141 |
+
published_at = item['snippet']['publishedAt']
|
| 142 |
+
category_id = item['snippet'].get('categoryId', '')
|
| 143 |
+
thumbnail_url = item['snippet']['thumbnails']['high']['url']
|
| 144 |
+
views = int(item['statistics'].get('viewCount', 0))
|
| 145 |
+
|
| 146 |
+
# ๊ฒ์์ผ ์ ๋ณด
|
| 147 |
+
dt = pd.to_datetime(published_at)
|
| 148 |
+
hour = dt.hour
|
| 149 |
+
weekday = dt.dayofweek
|
| 150 |
+
|
| 151 |
+
# ์๋ง ์
|
| 152 |
+
def count_manual_subtitles(video_id):
|
| 153 |
+
ppl = YouTubeTranscriptApi.list_transcripts(video_id)
|
| 154 |
+
manual = [t for t in ppl if not t.is_generated]
|
| 155 |
+
return len(manual)
|
| 156 |
+
|
| 157 |
+
caption_count = count_manual_subtitles(video_id)
|
| 158 |
+
|
| 159 |
+
# ์ธ๋ค์ผ ๋ถ์
|
| 160 |
+
hue_group, face_count, hue_value = analyze_thumbnail(thumbnail_url)
|
| 161 |
+
|
| 162 |
+
# ๊ฐ์ฑ ์ ์
|
| 163 |
+
senti = sentiment_score(title)
|
| 164 |
+
|
| 165 |
+
# ์นดํ
๊ณ ๋ฆฌ ์ด๋ฆ ๋งคํ
|
| 166 |
+
user_category = map_category(category_id, title, api_key)
|
| 167 |
+
|
| 168 |
+
# Label Encoding
|
| 169 |
+
if user_category not in le_cat.classes_:
|
| 170 |
+
user_category = '๊ธฐํ'
|
| 171 |
+
cat_encoded = le_cat.transform([user_category])[0]
|
| 172 |
+
|
| 173 |
+
# ์์ธก
|
| 174 |
+
X_input = pd.DataFrame([{
|
| 175 |
+
'์๊ฐ๋': hour,
|
| 176 |
+
'์์ผ': weekday,
|
| 177 |
+
'์๋ง์': caption_count,
|
| 178 |
+
'์นดํ
๊ณ ๋ฆฌ': cat_encoded,
|
| 179 |
+
'Hue': hue_value,
|
| 180 |
+
'์ธ๋ค์ผ ์ผ๊ตด ์': face_count,
|
| 181 |
+
'๊ฐ์ฑ์ ์': senti
|
| 182 |
+
}])
|
| 183 |
+
|
| 184 |
+
pred_log = model.predict(X_input[feature_cols])[0]
|
| 185 |
+
predicted_views = int(np.expm1(pred_log))
|
| 186 |
+
|
| 187 |
+
return {
|
| 188 |
+
'์ ๋ชฉ': title,
|
| 189 |
+
'์์ธก ์กฐํ์': predicted_views,
|
| 190 |
+
'์ค์ ์กฐํ์': views,
|
| 191 |
+
'์นดํ
๊ณ ๋ฆฌ': user_category,
|
| 192 |
+
'์๊ฐ๋': hour,
|
| 193 |
+
'์์ผ': weekday,
|
| 194 |
+
'์๋ง์': caption_count,
|
| 195 |
+
'์ธ๋ค์ผ ์ผ๊ตด ์': face_count,
|
| 196 |
+
'๊ฐ์ฑ์ ์': senti,
|
| 197 |
+
'Hue ๊ทธ๋ฃน': hue_group,
|
| 198 |
+
'Hue ๊ฐ': hue_value,
|
| 199 |
+
'์ธ๋ค์ผ URL': thumbnail_url
|
| 200 |
+
}
|
| 201 |
+
|
| 202 |
+
#1. ์ถ์ธก ํจ์
|
| 203 |
+
def extract_features_from_video_id(video_id, api_key):
|
| 204 |
+
info = predict_views(video_id, api_key)
|
| 205 |
+
return pd.DataFrame([{
|
| 206 |
+
'์๊ฐ๋': info['์๊ฐ๋'],
|
| 207 |
+
'์์ผ': info['์์ผ'],
|
| 208 |
+
'์๋ง์': info['์๋ง์'],
|
| 209 |
+
'์นดํ
๊ณ ๋ฆฌ': le_cat.transform([info['์นดํ
๊ณ ๋ฆฌ']])[0],
|
| 210 |
+
'Hue': info['Hue ๊ฐ'],
|
| 211 |
+
'์ธ๋ค์ผ ์ผ๊ตด ์': info['์ธ๋ค์ผ ์ผ๊ตด ์'],
|
| 212 |
+
'๊ฐ์ฑ์ ์': info['๊ฐ์ฑ์ ์']
|
| 213 |
+
}])
|
| 214 |
+
|
| 215 |
+
# 2. ์์ธก ํจ์
|
| 216 |
+
def predict_view_count(model, features):
|
| 217 |
+
pred_log = model.predict(features[feature_cols])[0]
|
| 218 |
+
return int(np.expm1(pred_log))
|
| 219 |
+
|
| 220 |
+
# 3. ์๊ฐํ ํจ์
|
| 221 |
+
def visualize_result(video_id, features, predicted_view_count, info):
|
| 222 |
+
์์ผ_ํ
์คํธ = ['์', 'ํ', '์', '๋ชฉ', '๊ธ', 'ํ ', '์ผ'][features['์์ผ'].values[0]]
|
| 223 |
+
|
| 224 |
+
html = f"""
|
| 225 |
+
<div style="background-color: #111; color: white; padding: 20px; border-radius: 10px; max-width: 600px; font-family: Arial, sans-serif;">
|
| 226 |
+
<h2>๐ฏ ์์ธก ์กฐํ์: {predicted_view_count:,}ํ</h2>
|
| 227 |
+
<h3>๐ ์์ ์ ๋ชฉ: {info['์ ๋ชฉ']}</h3>
|
| 228 |
+
<img src="{info['์ธ๋ค์ผ URL']}" alt="์ธ๋ค์ผ ์ด๋ฏธ์ง" style="width: 100%; border-radius: 10px; margin-bottom: 20px;"/>
|
| 229 |
+
<ul style="list-style-type: none; padding-left: 0;">
|
| 230 |
+
<li>๐ฝ๏ธ <strong>์์ ID:</strong> {video_id}</li>
|
| 231 |
+
<li>๐๏ธ <strong>์ค์ ์กฐํ์:</strong> {info['๏ฟฝ๏ฟฝ๏ฟฝ์ ์กฐํ์']:,}ํ</li>
|
| 232 |
+
<li>โฐ <strong>์๊ฐ๋:</strong> {features['์๊ฐ๋'].values[0]}์</li>
|
| 233 |
+
<li>๐
<strong>์์ผ:</strong> {์์ผ_ํ
์คํธ}</li>
|
| 234 |
+
<li>๐ฌ <strong>์๋ง ์:</strong> {features['์๋ง์'].values[0]}</li>
|
| 235 |
+
<li>๐จ <strong>์์ ๊ณ์ด:</strong> {info['Hue ๊ทธ๋ฃน']}</li>
|
| 236 |
+
<li>๐ <strong>์ธ๋ค์ผ ์ผ๊ตด ์:</strong> {features['์ธ๋ค์ผ ์ผ๊ตด ์'].values[0]}</li>
|
| 237 |
+
<li>๐ง <strong>๊ฐ์ฑ ์ ์:</strong> {features['๊ฐ์ฑ์ ์'].values[0]:.2f}</li>
|
| 238 |
+
</ul>
|
| 239 |
+
</div>
|
| 240 |
+
"""
|
| 241 |
+
return html
|
requirements.txt
CHANGED
|
@@ -1,3 +1,11 @@
|
|
| 1 |
-
|
| 2 |
-
pandas
|
| 3 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
streamlit
|
| 2 |
+
pandas
|
| 3 |
+
numpy
|
| 4 |
+
requests
|
| 5 |
+
opencv-python-headless
|
| 6 |
+
mediapipe
|
| 7 |
+
torch
|
| 8 |
+
Pillow
|
| 9 |
+
joblib
|
| 10 |
+
transformers
|
| 11 |
+
youtube-transcript-api
|
view_predictor.joblib
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8e6382db2936fcc04e3594c5731d64eddf3e7b4efdbf78490f877d5641a4b548
|
| 3 |
+
size 17581281
|