Update api/core/nlp_handler.py
Browse files- api/core/nlp_handler.py +51 -21
api/core/nlp_handler.py
CHANGED
|
@@ -117,46 +117,76 @@ class NLPHandler:
|
|
| 117 |
@staticmethod
|
| 118 |
def _fetch_official_api(video_id, api_key):
|
| 119 |
print(f"🔑 Using Official API Key for {video_id}...")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 120 |
text_parts = []
|
| 121 |
|
| 122 |
try:
|
| 123 |
-
# 1. Ambil Metadata
|
| 124 |
-
url_meta = f"https://www.googleapis.com/youtube/v3/videos?part=snippet&id={video_id}&key={api_key}"
|
| 125 |
res_meta = requests.get(url_meta, timeout=5)
|
| 126 |
|
| 127 |
if res_meta.status_code == 200:
|
| 128 |
data = res_meta.json()
|
| 129 |
if "items" in data and len(data["items"]) > 0:
|
| 130 |
-
|
| 131 |
-
|
|
|
|
|
|
|
|
|
|
| 132 |
title = html.unescape(snippet['title'])
|
| 133 |
desc = html.unescape(snippet['description'])
|
| 134 |
-
|
| 135 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 136 |
|
| 137 |
-
# 2. Ambil Komentar
|
| 138 |
-
url_comm = f"https://www.googleapis.com/youtube/v3/commentThreads?part=snippet&videoId={video_id}&maxResults=
|
| 139 |
res_comm = requests.get(url_comm, timeout=5)
|
| 140 |
|
| 141 |
if res_comm.status_code == 200:
|
| 142 |
data = res_comm.json()
|
| 143 |
-
comments = []
|
| 144 |
for item in data.get("items", []):
|
| 145 |
-
|
| 146 |
-
|
| 147 |
-
|
| 148 |
-
|
| 149 |
-
|
| 150 |
-
comments.append(
|
| 151 |
-
|
| 152 |
-
|
| 153 |
-
|
| 154 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 155 |
|
| 156 |
if not text_parts:
|
| 157 |
return None
|
| 158 |
-
|
| 159 |
-
|
|
|
|
| 160 |
|
| 161 |
except Exception as e:
|
| 162 |
print(f"❌ Official API Error: {e}")
|
|
|
|
| 117 |
@staticmethod
|
| 118 |
def _fetch_official_api(video_id, api_key):
|
| 119 |
print(f"🔑 Using Official API Key for {video_id}...")
|
| 120 |
+
|
| 121 |
+
result = {
|
| 122 |
+
"video": None,
|
| 123 |
+
"comments": [],
|
| 124 |
+
"text_for_analysis": ""
|
| 125 |
+
}
|
| 126 |
text_parts = []
|
| 127 |
|
| 128 |
try:
|
| 129 |
+
# 1. Ambil Metadata Video
|
| 130 |
+
url_meta = f"https://www.googleapis.com/youtube/v3/videos?part=snippet,statistics&id={video_id}&key={api_key}"
|
| 131 |
res_meta = requests.get(url_meta, timeout=5)
|
| 132 |
|
| 133 |
if res_meta.status_code == 200:
|
| 134 |
data = res_meta.json()
|
| 135 |
if "items" in data and len(data["items"]) > 0:
|
| 136 |
+
item = data["items"][0]
|
| 137 |
+
snippet = item["snippet"]
|
| 138 |
+
stats = item.get("statistics", {})
|
| 139 |
+
|
| 140 |
+
# Unescape HTML entities
|
| 141 |
title = html.unescape(snippet['title'])
|
| 142 |
desc = html.unescape(snippet['description'])
|
| 143 |
+
|
| 144 |
+
# Get best thumbnail
|
| 145 |
+
thumbnails = snippet.get('thumbnails', {})
|
| 146 |
+
thumbnail = (thumbnails.get('maxres') or thumbnails.get('high') or thumbnails.get('medium') or thumbnails.get('default', {})).get('url', '')
|
| 147 |
+
|
| 148 |
+
result["video"] = {
|
| 149 |
+
"title": title,
|
| 150 |
+
"description": desc,
|
| 151 |
+
"thumbnail": thumbnail,
|
| 152 |
+
"channel": snippet.get('channelTitle', 'Unknown Channel'),
|
| 153 |
+
"publishedAt": snippet.get('publishedAt', ''),
|
| 154 |
+
"viewCount": stats.get('viewCount', '0'),
|
| 155 |
+
"likeCount": stats.get('likeCount', '0'),
|
| 156 |
+
"commentCount": stats.get('commentCount', '0')
|
| 157 |
+
}
|
| 158 |
+
|
| 159 |
+
text_parts.append(title)
|
| 160 |
+
text_parts.append(desc)
|
| 161 |
|
| 162 |
+
# 2. Ambil Komentar dengan detail
|
| 163 |
+
url_comm = f"https://www.googleapis.com/youtube/v3/commentThreads?part=snippet&videoId={video_id}&maxResults=20&order=relevance&key={api_key}"
|
| 164 |
res_comm = requests.get(url_comm, timeout=5)
|
| 165 |
|
| 166 |
if res_comm.status_code == 200:
|
| 167 |
data = res_comm.json()
|
|
|
|
| 168 |
for item in data.get("items", []):
|
| 169 |
+
comment_snippet = item["snippet"]["topLevelComment"]["snippet"]
|
| 170 |
+
raw_text = comment_snippet.get("textDisplay", "")
|
| 171 |
+
clean_text = re.sub(r'<[^>]+>', '', raw_text)
|
| 172 |
+
clean_text = html.unescape(clean_text)
|
| 173 |
+
|
| 174 |
+
result["comments"].append({
|
| 175 |
+
"text": clean_text,
|
| 176 |
+
"author": comment_snippet.get("authorDisplayName", "Anonymous"),
|
| 177 |
+
"authorImage": comment_snippet.get("authorProfileImageUrl", ""),
|
| 178 |
+
"likeCount": comment_snippet.get("likeCount", 0),
|
| 179 |
+
"publishedAt": comment_snippet.get("publishedAt", ""),
|
| 180 |
+
"replyCount": item["snippet"].get("totalReplyCount", 0)
|
| 181 |
+
})
|
| 182 |
+
|
| 183 |
+
text_parts.append(clean_text)
|
| 184 |
|
| 185 |
if not text_parts:
|
| 186 |
return None
|
| 187 |
+
|
| 188 |
+
result["text_for_analysis"] = " ".join(text_parts)
|
| 189 |
+
return result
|
| 190 |
|
| 191 |
except Exception as e:
|
| 192 |
print(f"❌ Official API Error: {e}")
|