anggars commited on
Commit
f949865
·
verified ·
1 Parent(s): cf2a25d

Update api/core/nlp_handler.py

Browse files
Files changed (1) hide show
  1. api/core/nlp_handler.py +51 -21
api/core/nlp_handler.py CHANGED
@@ -117,46 +117,76 @@ class NLPHandler:
117
  @staticmethod
118
  def _fetch_official_api(video_id, api_key):
119
  print(f"🔑 Using Official API Key for {video_id}...")
 
 
 
 
 
 
120
  text_parts = []
121
 
122
  try:
123
- # 1. Ambil Metadata
124
- url_meta = f"https://www.googleapis.com/youtube/v3/videos?part=snippet&id={video_id}&key={api_key}"
125
  res_meta = requests.get(url_meta, timeout=5)
126
 
127
  if res_meta.status_code == 200:
128
  data = res_meta.json()
129
  if "items" in data and len(data["items"]) > 0:
130
- snippet = data["items"][0]["snippet"]
131
- # Unescape biar " jadi " dan ' jadi '
 
 
 
132
  title = html.unescape(snippet['title'])
133
  desc = html.unescape(snippet['description'])
134
- text_parts.append(f"Title: {title}")
135
- text_parts.append(f"Description: {desc}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
136
 
137
- # 2. Ambil Komentar
138
- url_comm = f"https://www.googleapis.com/youtube/v3/commentThreads?part=snippet&videoId={video_id}&maxResults=30&order=relevance&key={api_key}"
139
  res_comm = requests.get(url_comm, timeout=5)
140
 
141
  if res_comm.status_code == 200:
142
  data = res_comm.json()
143
- comments = []
144
  for item in data.get("items", []):
145
- raw_comm = item["snippet"]["topLevelComment"]["snippet"]["textDisplay"]
146
- # Bersihkan tag HTML <b> <br>
147
- clean_comm = re.sub(r'<[^>]+>', '', raw_comm)
148
- # Bersihkan entities &quot; &#39;
149
- clean_comm = html.unescape(clean_comm)
150
- comments.append(clean_comm)
151
-
152
- if comments:
153
- text_parts.append("\n\n--- Top Comments (Community Vibe) ---\n")
154
- text_parts.extend(comments)
 
 
 
 
 
155
 
156
  if not text_parts:
157
  return None
158
-
159
- return "\n\n".join(text_parts)
 
160
 
161
  except Exception as e:
162
  print(f"❌ Official API Error: {e}")
 
117
  @staticmethod
118
  def _fetch_official_api(video_id, api_key):
119
  print(f"🔑 Using Official API Key for {video_id}...")
120
+
121
+ result = {
122
+ "video": None,
123
+ "comments": [],
124
+ "text_for_analysis": ""
125
+ }
126
  text_parts = []
127
 
128
  try:
129
+ # 1. Ambil Metadata Video
130
+ url_meta = f"https://www.googleapis.com/youtube/v3/videos?part=snippet,statistics&id={video_id}&key={api_key}"
131
  res_meta = requests.get(url_meta, timeout=5)
132
 
133
  if res_meta.status_code == 200:
134
  data = res_meta.json()
135
  if "items" in data and len(data["items"]) > 0:
136
+ item = data["items"][0]
137
+ snippet = item["snippet"]
138
+ stats = item.get("statistics", {})
139
+
140
+ # Unescape HTML entities
141
  title = html.unescape(snippet['title'])
142
  desc = html.unescape(snippet['description'])
143
+
144
+ # Get best thumbnail
145
+ thumbnails = snippet.get('thumbnails', {})
146
+ thumbnail = (thumbnails.get('maxres') or thumbnails.get('high') or thumbnails.get('medium') or thumbnails.get('default', {})).get('url', '')
147
+
148
+ result["video"] = {
149
+ "title": title,
150
+ "description": desc,
151
+ "thumbnail": thumbnail,
152
+ "channel": snippet.get('channelTitle', 'Unknown Channel'),
153
+ "publishedAt": snippet.get('publishedAt', ''),
154
+ "viewCount": stats.get('viewCount', '0'),
155
+ "likeCount": stats.get('likeCount', '0'),
156
+ "commentCount": stats.get('commentCount', '0')
157
+ }
158
+
159
+ text_parts.append(title)
160
+ text_parts.append(desc)
161
 
162
+ # 2. Ambil Komentar dengan detail
163
+ url_comm = f"https://www.googleapis.com/youtube/v3/commentThreads?part=snippet&videoId={video_id}&maxResults=20&order=relevance&key={api_key}"
164
  res_comm = requests.get(url_comm, timeout=5)
165
 
166
  if res_comm.status_code == 200:
167
  data = res_comm.json()
 
168
  for item in data.get("items", []):
169
+ comment_snippet = item["snippet"]["topLevelComment"]["snippet"]
170
+ raw_text = comment_snippet.get("textDisplay", "")
171
+ clean_text = re.sub(r'<[^>]+>', '', raw_text)
172
+ clean_text = html.unescape(clean_text)
173
+
174
+ result["comments"].append({
175
+ "text": clean_text,
176
+ "author": comment_snippet.get("authorDisplayName", "Anonymous"),
177
+ "authorImage": comment_snippet.get("authorProfileImageUrl", ""),
178
+ "likeCount": comment_snippet.get("likeCount", 0),
179
+ "publishedAt": comment_snippet.get("publishedAt", ""),
180
+ "replyCount": item["snippet"].get("totalReplyCount", 0)
181
+ })
182
+
183
+ text_parts.append(clean_text)
184
 
185
  if not text_parts:
186
  return None
187
+
188
+ result["text_for_analysis"] = " ".join(text_parts)
189
+ return result
190
 
191
  except Exception as e:
192
  print(f"❌ Official API Error: {e}")