File size: 23,903 Bytes
c7e5db4
 
 
 
 
 
 
 
f3ff343
11962ff
c7e5db4
 
 
 
 
 
 
11962ff
 
 
c7e5db4
 
 
 
 
 
 
 
 
 
 
 
 
 
050ab1a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c7e5db4
050ab1a
 
 
 
c7e5db4
 
11962ff
 
dd7b216
c7e5db4
dd7b216
11962ff
dd7b216
11962ff
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c7e5db4
dd061e3
f3ff343
dd061e3
 
 
f3ff343
 
dd061e3
 
 
f3ff343
 
 
dd061e3
11962ff
f3ff343
dd061e3
 
 
 
 
 
 
 
 
 
96e3e26
dd061e3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f3ff343
 
 
 
dd061e3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11962ff
dd061e3
 
 
 
 
11962ff
dd061e3
 
c7e5db4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
dd061e3
c7e5db4
dd061e3
 
 
11962ff
dd061e3
11962ff
 
dd061e3
11962ff
 
 
96e3e26
11962ff
 
 
 
 
 
dd061e3
11962ff
 
 
 
 
 
 
 
 
 
 
 
 
 
dd061e3
11962ff
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
dd061e3
11962ff
 
 
 
 
 
 
 
 
 
 
050ab1a
 
11962ff
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c7e5db4
11962ff
 
050ab1a
 
 
 
 
 
dd061e3
 
 
 
 
11962ff
050ab1a
 
11962ff
 
 
 
 
 
 
050ab1a
11962ff
 
050ab1a
11962ff
050ab1a
 
 
 
 
 
c7e5db4
 
 
050ab1a
 
 
 
 
 
c7e5db4
 
 
 
 
11962ff
f949865
 
 
 
 
 
c7e5db4
 
 
f949865
 
c7e5db4
 
 
 
 
f949865
 
 
 
 
c7e5db4
 
f949865
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c7e5db4
f949865
 
c7e5db4
 
 
 
 
f949865
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c7e5db4
 
 
f949865
 
 
c7e5db4
 
11962ff
c7e5db4
 
 
 
 
 
 
 
 
 
 
 
 
11962ff
c7e5db4
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
import joblib
import os
import re
import requests
import numpy as np
import html
from deep_translator import GoogleTranslator
from youtube_transcript_api import YouTubeTranscriptApi

import time

# --- CONFIG PATH ---
BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
MBTI_PATH = os.path.join(BASE_DIR, 'data', 'model_mbti.pkl')
EMOTION_PATH = os.path.join(BASE_DIR, 'data', 'model_emotion.pkl')

_model_mbti = None
_classifier_mbti_transformer = None
_classifier_roberta = None
_classifier_distilbert = None

EMOTION_TRANSLATIONS = {
    'admiration': 'Kagum', 'amusement': 'Terhibur', 'anger': 'Marah',
    'annoyance': 'Kesal', 'approval': 'Setuju', 'caring': 'Peduli',
    'confusion': 'Bingung', 'curiosity': 'Penasaran', 'desire': 'Keinginan',
    'disappointment': 'Kecewa', 'disapproval': 'Tidak Setuju', 'disgust': 'Jijik',
    'embarrassment': 'Malu', 'excitement': 'Semangat', 'fear': 'Takut',
    'gratitude': 'Bersyukur', 'grief': 'Berduka', 'joy': 'Gembira',
    'love': 'Cinta', 'nervousness': 'Gugup', 'optimism': 'Optimis',
    'pride': 'Bangga', 'realization': 'Sadar', 'relief': 'Lega',
    'remorse': 'Menyesal', 'sadness': 'Sedih', 'surprise': 'Terkejut',
    'neutral': 'Netral'
}


MBTI_EXPLANATIONS = {
    'ISTJ': {'en': "The Logistician. Practical and fact-minded individuals, whose reliability cannot be doubted.", 
             'id': "Si Organisator. Lo orangnya logis, praktis, dan bisa diandelin banget. Anti ribet-ribet club."},
    'ISFJ': {'en': "The Defender. Very dedicated and warm protectors, always ready to defend their loved ones.", 
             'id': "Si Pelindung. Hati lo lembut, setia, dan care banget sama orang terdekat. Temen curhat terbaik."},
    'INFJ': {'en': "The Advocate. Quiet and mystical, yet very inspiring and tireless idealists.", 
             'id': "Si Visioner Misterius. Lo peka, idealis, dan suka mikirin makna hidup mendalam. Langka nih!"},
    'INTJ': {'en': "The Architect. Imaginative and strategic thinkers, with a plan for everything.", 
             'id': "Si Strategis. Otak lo jalan terus, visioner, dan selalu punya rencana cadangan buat segala hal."},
    'ISTP': {'en': "The Virtuoso. Bold and practical experimenters, masters of all kinds of tools.", 
             'id': "Si Pengrajin. Lo cool, santuy, tapi jago banget mecahin masalah teknis secara praktis."},
    'ISFP': {'en': "The Adventurer. Flexible and charming artists, always ready to explore and experience something new.", 
             'id': "Si Seniman Bebas. Lo estetik, santai, dan suka banget nge-explore hal baru tanpa banyak drama."},
    'INFP': {'en': "The Mediator. Poetic, kind and altruistic people, always eager to help a good cause.", 
             'id': "Si Paling Perasa. Hati lo kayak kapas, puitis, idealis banget, dan selalu mau bikin dunia lebih baik."},
    'INTP': {'en': "The Logician. Innovative inventors with an unquenchable thirst for knowledge.", 
             'id': "Si Pemikir Kritis. Lo kepoan parah, logis abis, dan suka banget debat teori sampe pagi."},
    'ESTP': {'en': "The Entrepreneur. Smart, energetic and very perceptive people, who truly enjoy living on the edge.", 
             'id': "Si Pemberani. Lo enerjik, spontan, dan jago banget ngambil peluang dalam situasi mepet."},
    'ESFP': {'en': "The Entertainer. Spontaneous, energetic and enthusiastic people - life is never boring around them.", 
             'id': "Si Penghibur. Lo asik parah, spontan, dan selalu jadi pusat perhatian di tongkrongan."},
    'ENFP': {'en': "The Campaigner. Enthusiastic, creative and sociable free spirits, who can always find a reason to smile.", 
             'id': "Si Semangat 45. Lo kreatif, ramah, dan punya energi positif yang nular ke semua orang."},
    'ENTP': {'en': "The Debater. Smart and curious thinkers who cannot resist an intellectual challenge.", 
             'id': "Si Pendebat Ulung. Lo pinter, kritis, dan iseng banget suka mancing debat cuma buat seru-seruan."},
    'ESTJ': {'en': "The Executive. Excellent administrators, unsurpassed at managing things - or people.", 
             'id': "Si Bos Tegas. Lo jago ngatur, disiplin, dan gak suka liat ada yang lelet atau berantakan."},
    'ESFJ': {'en': "The Consul. Extraordinarily caring, social and popular people, always eager to help.", 
             'id': "Si Paling Gaul. Lo ramah, suka nolong, dan care banget sama harmoni di sirkel pertemanan."},
    'ENFJ': {'en': "The Protagonist. Charismatic and inspiring leaders, able to mesmerize their listeners.", 
             'id': "Si Pemimpin Karismatik. Lo jago banget ngomong, inspiratif, dan bisa bikin orang lain nurut sama lo."},
    'ENTJ': {'en': "The Commander. Bold, imaginative and strong-willed leaders, always finding a way - or making one.", 
             'id': "Si Jenderal. Lo ambisius, tegas, dan punya bakat alami buat mimpin dan naklukin tantangan."}
}

class NLPHandler:
    # ... code before ...
    # (The existing static methods load_models, translate_to_english, extract_keywords are unchanged)
    # Re-writing predict_all to include explanation logic

    @staticmethod
    def load_models():
        global _model_mbti, _classifier_mbti_transformer, _classifier_roberta, _classifier_distilbert
        print(f"Loading models from: {BASE_DIR}")
        
        if _model_mbti is None and os.path.exists(MBTI_PATH):
            try: 
                print(f"Loading MBTI Model (SVM) from: {MBTI_PATH}")
                _model_mbti = joblib.load(MBTI_PATH)
            except Exception as e: print(f"MBTI Load Error: {e}")

        if _classifier_mbti_transformer is None:
            try:
                print(f"Loading MBTI Model (Transformer): parka735/mbti-classifier") 
                from transformers import pipeline
                _classifier_mbti_transformer = pipeline("text-classification", model="parka735/mbti-classifier", top_k=1)
            except Exception as e: print(f"MBTI Transformer Load Error: {e}")

        if _classifier_roberta is None:
            try:
                print("Loading Emotion Model 1: SamLowe/roberta-base-go_emotions")
                from transformers import pipeline
                _classifier_roberta = pipeline("text-classification", model="SamLowe/roberta-base-go_emotions", top_k=None)
            except Exception as e: print(f"Emotion 1 Load Error: {e}")

        if _classifier_distilbert is None:
            try:
                print("Loading Emotion Model 2: joeddav/distilbert-base-uncased-go-emotions-student")
                from transformers import pipeline
                _classifier_distilbert = pipeline("text-classification", model="joeddav/distilbert-base-uncased-go-emotions-student", top_k=None)
            except Exception as e: print(f"Emotion 2 Load Error: {e}")

    # --- GEMINI VALIDATOR SETUP ---
    _gemini_client = None
    
    @staticmethod
    def _init_gemini():
        """Initialize Gemini Client for validation (lazy loading)"""
        if NLPHandler._gemini_client is None:
            api_key = os.getenv("GEMINI_API_KEY")
            if api_key:
                try:
                    from google import genai
                    NLPHandler._gemini_client = genai.Client(api_key=api_key)
                    print("Gemini Validator Ready (google-genai SDK)")
                except Exception as e:
                    print(f"Gemini Init Failed: {e}")
        return NLPHandler._gemini_client is not None
    
    @staticmethod
    def _validate_with_gemini(text, ml_prediction):
        """
        Use Gemini to validate ML prediction.
        Returns: (validated_mbti, confidence, reasoning)
        """
        if not NLPHandler._init_gemini():
            return ml_prediction, 0.6, "ML only (Gemini unavailable)"
        

        
        prompt = f"""You are an MBTI expert. Analyze this text and determine the MOST LIKELY MBTI type based ONLY on the content.

TEXT TO ANALYZE:
"{text}"

ANALYSIS FRAMEWORK:
1. I/E (Introversion/Extraversion):
   - E indicators: Mentions of social events, leading teams, networking, group activities, energized by people
   - I indicators: Preference for solitude, reflection, working alone, drained by social interaction

2. N/S (Intuition/Sensing):
   - N indicators: Abstract thinking, future-focused, big picture, patterns, possibilities, theory
   - S indicators: Concrete details, present-focused, practical, facts, reality, hands-on

3. T/F (Thinking/Feeling):
   - T indicators: Logic, efficiency, objectivity, direct communication, "facts over feelings"
   - F indicators: Empathy, harmony, values, subjective decisions, people-focused

4. J/P (Judging/Perceiving):
   - J indicators: Planning, structure, deadlines, organization, schedules, decisive
   - P indicators: Spontaneous, flexible, adaptable, open-ended, exploratory

CRITICAL INSTRUCTIONS:
- Analyze INDEPENDENTLY - ignore any preconceptions
- Look for EXPLICIT behavioral indicators in the text
- Weight E/I heavily on social energy language (not just content topic)
- If text mentions "leading", "networking", "team meetings" → strong E signal
- If text emphasizes "planning", "deadlines", "structure" → strong J signal

Respond in this EXACT format:
MBTI: [4-letter type]
CONFIDENCE: [0.0-1.0]
REASON: [One sentence citing specific text evidence]

Example:
MBTI: ENTJ
CONFIDENCE: 0.88
REASON: Explicit mentions of networking, leading teams, and structured planning indicate ENTJ.
"""
        
        try:
            response = NLPHandler._gemini_client.models.generate_content(
                model='gemini-2.0-flash', 
                contents=prompt
            )
            result_text = response.text.strip()
            
            # Parse response
            lines = result_text.split('\n')
            validated_mbti = ml_prediction
            confidence = 0.7
            reason = "Gemini validation"
            
            for line in lines:
                if line.startswith('MBTI:'):
                    validated_mbti = line.split(':', 1)[1].strip().upper()
                elif line.startswith('CONFIDENCE:'):
                    try:
                        confidence = float(line.split(':', 1)[1].strip())
                    except:
                        confidence = 0.7
                elif line.startswith('REASON:'):
                    reason = line.split(':', 1)[1].strip()
            
            # Validate MBTI format (must be 4 chars)
            if len(validated_mbti) != 4 or not all(c in 'IENTFSJP' for c in validated_mbti):
                print(f"Invalid Gemini MBTI: {validated_mbti}, using ML: {ml_prediction}")
                return ml_prediction, 0.6, "Invalid Gemini response - using ML"
            
            return validated_mbti, confidence, reason
            
        except Exception as e:
            print(f"Gemini Validation Error: {e}")
            return ml_prediction, 0.6, f"Gemini error - using ML"

    @staticmethod
    def translate_to_english(text):
        try:
            if len(text) > 4500: text = text[:4500]
            return GoogleTranslator(source='auto', target='en').translate(text)
        except: return text

    @staticmethod
    def extract_keywords(text):
        stopwords = ["the", "and", "is", "to", "in", "it", "of", "for", "with", "on", "that", "this", "my", "was", "as", "are", "have", "you", "but", "so", "ini", "itu", "dan", "yang", "di", "ke"]
        words = re.findall(r'\w+', text.lower())
        filtered = [w for w in words if len(w) > 3 and w not in stopwords]
        freq = {}
        for w in filtered: freq[w] = freq.get(w, 0) + 1
        sorted_words = sorted(freq.items(), key=lambda x: x[1], reverse=True)
        
        keywords_en = [w[0] for w in sorted_words[:5]]
        keywords_id = []
        try:
            translator = GoogleTranslator(source='auto', target='id')
            for k in keywords_en: keywords_id.append(translator.translate(k))
        except: keywords_id = keywords_en 
        return {"en": keywords_en, "id": keywords_id}

    @staticmethod
    def predict_all(raw_text):
        NLPHandler.load_models() 
        processed_text = NLPHandler.translate_to_english(raw_text)
        
        # --- MBTI PREDICTION WITH GEMINI VALIDATION ---
        mbti_result = "UNKNOWN"
        mbti_confidence = 0.0
        mbti_reasoning = ""
        
        if _model_mbti and _classifier_mbti_transformer:
            try:
                # 1. SVM Prediction (Keyword/Structure)
                svm_pred = _model_mbti.predict([processed_text])[0]
                
                # 2. Transformer Prediction
                trans_input = processed_text[:2000]
                trans_output = _classifier_mbti_transformer(trans_input)
                
                # Handle nested list output (common in batched pipelines)
                # Output can be [{'label': 'A'}] OR [[{'label': 'A'}]]
                if isinstance(trans_output, list) and isinstance(trans_output[0], list):
                    trans_res = trans_output[0][0]
                elif isinstance(trans_output, list):
                    trans_res = trans_output[0]
                else:
                    trans_res = trans_output

                trans_pred = trans_res['label'].upper()
                trans_conf = trans_res['score']
                
                print(f"[Voting] SVM='{svm_pred}' vs Transformer='{trans_pred}' ({trans_conf:.2%})")

                # 3. Consensus Logic
                if svm_pred == trans_pred:
                    # Both agree! High confidence.
                    print("[Check] Models AGREE! Auto-approving.")
                    mbti_result = svm_pred
                    mbti_confidence = 0.95
                    mbti_reasoning = f"Both AI models agreed strictly on {mbti_result}."
                    
                    # Optional: Lightweight Gemini check just for reasoning text, IF enabled.
                    # validation is skipped for speed since we have consensus.
                else:
                    # Disagreement! Gemini is the Tie-Breaker.
                    print("[Warning] Models DISAGREE! Summoning Gemini Judge...")
                    
                    # Prepare context for Gemini
                    validation_context = f"Model A (Keyword) detected {svm_pred}. Model B (Context) detected {trans_pred}."
                    
                    validated_mbti, confidence, reason = NLPHandler._validate_with_gemini(
                        processed_text, validation_context 
                    )
                    
                    mbti_result = validated_mbti
                    mbti_confidence = confidence
                    mbti_reasoning = reason
                    print(f"[Gemini] Verdict: {mbti_result} (Confidence: {confidence})")

            except Exception as e:
                print(f"[Error] Hybrid MBTI Error: {e}")
                # Fallback to SVM if everything explodes
                try: 
                    mbti_result = _model_mbti.predict([processed_text])[0]
                    mbti_confidence = 0.4
                except:
                    mbti_result = "INTJ"
                mbti_reasoning = "System fallback due to hybrid error."

        # --- EMOTION PREDICTION (HYBRID TRANSFORMER) ---
        emotion_data = {"id": "Netral", "en": "Neutral", "raw": "neutral", "list": []}
        confidence_score = 0.0
        
        try:
             # Load pipelines (Ensured in load_models)
            global _classifier_roberta, _classifier_distilbert
            
            # Truncate for safety
            emo_input = processed_text[:1500] 
            
            combined_scores = {}
            
            def add_scores(results):
                if isinstance(results, list) and isinstance(results[0], list):
                    results = results[0]
                for item in results:
                    label = item['label']
                    score = item['score']
                    combined_scores[label] = combined_scores.get(label, 0) + score

            if _classifier_roberta:
                 add_scores(_classifier_roberta(emo_input))
            if _classifier_distilbert:
                 add_scores(_classifier_distilbert(emo_input))

            # Normalize and filter
            if 'neutral' in combined_scores:
                del combined_scores['neutral'] # Remove neutral preference
            
            sorted_emotions = sorted(combined_scores.items(), key=lambda x: x[1], reverse=True)
            
            top_3_list = []
            if sorted_emotions:
                # Top 1 for legacy compatibility
                best_label, total_score = sorted_emotions[0]
                confidence_score = (total_score / 2.0) 
                
                indo_label = EMOTION_TRANSLATIONS.get(best_label, best_label.capitalize())
                emotion_data = {
                    "id": indo_label, 
                    "en": best_label.capitalize(), 
                    "raw": best_label,
                    "list": []  # Will populate below
                }
                
                # Populate Top 3 List
                for label, score in sorted_emotions[:3]:
                    norm_score = score / 2.0
                    top_3_list.append({
                        "en": label.capitalize(),
                        "id": EMOTION_TRANSLATIONS.get(label, label.capitalize()),
                        "score": norm_score
                    })
                
                emotion_data["list"] = top_3_list
                print(f"Emotion Hybrid Top 1: {emotion_data['en']} ({confidence_score:.2%})")
            else:
                print("Emotion Hybrid: No clear emotion found (Neutral)")

        except Exception as e:
            print(f"Emotion Prediction Error: {e}") 

        # --- REASONING GENERATION ---
        mbti_desc = MBTI_EXPLANATIONS.get(mbti_result, {
            'en': "Complex personality type.", 
            'id': "Kepribadian yang cukup kompleks."
        })
        
        # Add Gemini reasoning to MBTI description
        if mbti_reasoning:
            mbti_desc['validation'] = mbti_reasoning
            mbti_desc['confidence'] = mbti_confidence
            
        # Emotion Reasoning
        conf_percent = int(confidence_score * 100)
        
        # Generate dynamic reasoning for Top 3
        em_list_str = ""
        if 'list' in emotion_data and emotion_data['list']:
             labels = [f"{item['en']} ({int(item['score']*100)}%)" for item in emotion_data['list']]
             em_list_str = ", ".join(labels)

        emotion_reasoning = {
            'en': f"Dominant emotion is '{emotion_data['en']}'. Mix: {em_list_str}.",
            'id': f"Emosi dominan '{emotion_data['id']}'. Campuran: {em_list_str}."
        }
        
        # Keywords Reasoning
        keywords_reasoning = {
            'en': "These words appeared most frequently and define the main topic.",
            'id': "Kata-kata ini paling sering muncul dan jadi inti topik lo."
        }

        return {
            "mbti": mbti_result,
            "emotion": emotion_data,
            "keywords": NLPHandler.extract_keywords(processed_text),
            "reasoning": {
                "mbti": mbti_desc,
                "emotion": emotion_reasoning,
                "keywords": keywords_reasoning
            }
        }

    # --- JALUR RESMI: YOUTUBE DATA API ---
    @staticmethod
    def _fetch_official_api(video_id, api_key):
        print(f"Using Official API Key for {video_id}...")
        
        result = {
            "video": None,
            "comments": [],
            "text_for_analysis": ""
        }
        text_parts = []
        
        try:
            # 1. Ambil Metadata Video
            url_meta = f"https://www.googleapis.com/youtube/v3/videos?part=snippet,statistics&id={video_id}&key={api_key}"
            res_meta = requests.get(url_meta, timeout=5)
            
            if res_meta.status_code == 200:
                data = res_meta.json()
                if "items" in data and len(data["items"]) > 0:
                    item = data["items"][0]
                    snippet = item["snippet"]
                    stats = item.get("statistics", {})
                    
                    # Unescape HTML entities
                    title = html.unescape(snippet['title'])
                    desc = html.unescape(snippet['description'])
                    
                    # Get best thumbnail
                    thumbnails = snippet.get('thumbnails', {})
                    thumbnail = (thumbnails.get('maxres') or thumbnails.get('high') or thumbnails.get('medium') or thumbnails.get('default', {})).get('url', '')
                    
                    result["video"] = {
                        "title": title,
                        "description": desc,
                        "thumbnail": thumbnail,
                        "channel": snippet.get('channelTitle', 'Unknown Channel'),
                        "publishedAt": snippet.get('publishedAt', ''),
                        "viewCount": stats.get('viewCount', '0'),
                        "likeCount": stats.get('likeCount', '0'),
                        "commentCount": stats.get('commentCount', '0')
                    }
                    
                    text_parts.append(title)
                    text_parts.append(desc)
            
            # 2. Ambil Komentar dengan detail
            url_comm = f"https://www.googleapis.com/youtube/v3/commentThreads?part=snippet&videoId={video_id}&maxResults=20&order=relevance&key={api_key}"
            res_comm = requests.get(url_comm, timeout=5)
            
            if res_comm.status_code == 200:
                data = res_comm.json()
                for item in data.get("items", []):
                    comment_snippet = item["snippet"]["topLevelComment"]["snippet"]
                    raw_text = comment_snippet.get("textDisplay", "")
                    clean_text = re.sub(r'<[^>]+>', '', raw_text)
                    clean_text = html.unescape(clean_text)
                    
                    result["comments"].append({
                        "text": clean_text,
                        "author": comment_snippet.get("authorDisplayName", "Anonymous"),
                        "authorImage": comment_snippet.get("authorProfileImageUrl", ""),
                        "likeCount": comment_snippet.get("likeCount", 0),
                        "publishedAt": comment_snippet.get("publishedAt", ""),
                        "replyCount": item["snippet"].get("totalReplyCount", 0)
                    })
                    
                    text_parts.append(clean_text)
            
            if not text_parts:
                return None
            
            result["text_for_analysis"] = " ".join(text_parts)
            return result

        except Exception as e:
            print(f"Official API Error: {e}")
            return None

    @staticmethod
    def fetch_youtube_transcript(video_id):
        # 1. PRIORITAS UTAMA: Cek API Key
        api_key = os.getenv("YOUTUBE_API_KEY")
        
        if api_key:
            official_data = NLPHandler._fetch_official_api(video_id, api_key)
            if official_data:
                return official_data
        
        # 2. PRIORITAS KEDUA: Fallback Scraping
        print(f"Fetching transcript (fallback) for: {video_id}")
        try:
            transcript_list = YouTubeTranscriptApi.get_transcript(video_id, languages=['id', 'en', 'en-US'])
            full_text = " ".join([item['text'] for item in transcript_list])
            clean_text = re.sub(r'\[.*?\]|\(.*?\)', '', full_text).strip()
            # Unescape juga buat hasil scraping
            return html.unescape(clean_text)
        except Exception:
            pass

        return None