File size: 9,839 Bytes
4fb045a
 
67e98cd
 
 
 
4fb045a
 
 
 
 
 
 
 
 
67e98cd
4fb045a
 
 
 
67e98cd
 
 
4fb045a
67e98cd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4fb045a
67e98cd
 
 
4fb045a
67e98cd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4fb045a
67e98cd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4fb045a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
67e98cd
4fb045a
 
 
 
67e98cd
 
 
 
 
 
 
 
 
 
 
4fb045a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
import streamlit as st
import re
import time
import json
import requests
from urllib.parse import urlparse, parse_qs
from youtube_transcript_api import YouTubeTranscriptApi
import plotly.graph_objects as go

# =============================================================================
# CONFIG
# =============================================================================
st.set_page_config(page_title="πŸ”₯ ViralScope AI", page_icon="🎬", layout="wide")

# =============================================================================
# ROBUST TRANSCRIPT FETCHER (Multi-Layer Fallback)
# =============================================================================
@st.cache_data(ttl=1800)
def fetch_transcript_robust(video_id: str) -> list[dict] | None:
    """
    Layer 1: youtube-transcript-api (direct get_transcript)
    Layer 2: Direct HTTP request ke YouTube timedtext API
    Layer 3: Fallback error handler dengan debug info
    """
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36',
        'Accept-Language': 'en-US,en;q=0.9,id;q=0.8'
    }
    
    # LAYER 1: Library langsung dengan bahasa prioritas
    languages = ['id', 'en', 'es', 'de', 'fr', 'pt', 'ru', 'ja', 'ko']
    for lang in languages:
        try:
            transcript = YouTubeTranscriptApi.get_transcript(video_id, languages=[lang])
            if transcript and len(transcript) > 3:
                return transcript
        except Exception:
            continue

    # LAYER 2: Direct HTTP fallback ke YouTube caption endpoint
    try:
        # Ambil halaman video untuk ekstrak URL caption track
        video_page = requests.get(f"https://www.youtube.com/watch?v={video_id}", headers=headers, timeout=10)
        video_page.raise_for_status()
        
        # Regex untuk menemukan JSON captionTracks
        caption_match = re.search(r'"captionTracks":(\[.*?\])', video_page.text)
        if not caption_match:
            return None
            
        caption_tracks = json.loads(caption_match.group(1))
        if not caption_tracks:
            return None
            
        # Pilih track pertama (biasanya auto-generated atau manual)
        track_url = caption_tracks[0].get('baseUrl')
        if not track_url:
            return None
            
        # Fetch transcript dalam format JSON3
        transcript_resp = requests.get(track_url, headers=headers, timeout=10)
        transcript_resp.raise_for_status()
        data = transcript_resp.json()
        
        # Parse ke format library
        result = []
        for event in data.get('events', []):
            if 'segs' in event and 'tStartMs' in event:
                text = ''.join(seg.get('utf8', '') for seg in event.get('segs', []))
                if text.strip():
                    result.append({
                        'text': text.strip(),
                        'start': event['tStartMs'] / 1000,
                        'duration': event.get('dDurationMs', 0) / 1000
                    })
        return result if result else None
        
    except Exception as e:
        st.warning(f"⚠️ Semua metode gagal. Detail: {type(e).__name__}")
        return None

def extract_video_id(url: str) -> str | None:
    patterns = [
        r'(?:v=|\/)([a-zA-Z0-9_-]{11})(?:\&|\/|$)',
        r'youtu\.be\/([a-zA-Z0-9_-]{11})',
        r'^([a-zA-Z0-9_-]{11})$'
    ]
    for pattern in patterns:
        match = re.search(pattern, url)
        if match:
            return match.group(1)
    return None

def format_time(seconds: float) -> str:
    m, s = divmod(int(seconds), 60)
    return f"{m:02d}:{s:02d}"

# =============================================================================
# ANALYSIS ENGINE (Lightweight, Pure Python)
# =============================================================================
def calculate_emotional_intensity(text: str) -> float:
    high_arousal = ['shocking', 'viral', 'wow', 'amazing', 'gila', 'luar biasa', 
                    'terkejut', 'marah', 'sedih', 'rahasia', 'ternyata', 'twist',
                    'unbelievable', 'mind-blowing', 'breakthrough', 'first time']
    words = re.findall(r'\b\w+\b', text.lower())
    matches = sum(1 for w in words if w in high_arousal)
    return min(100, 20 + matches * 16)

def detect_hook_strength(transcript: list[dict], first_n_seconds: int = 30) -> float:
    hook_kw = ['rahasia', 'ternyata', 'jangan', 'peringatan', 'viral', 'shocking',
               'unbelievable', 'you won\'t believe', 'wait until', 'plot twist']
    hook_text = " ".join([seg['text'].lower() for seg in transcript if seg.get('start', 0) <= first_n_seconds])
    matches = sum(1 for kw in hook_kw if kw in hook_text)
    return min(100, matches * 15 + 30)

def identify_viral_segments(transcript: list[dict], window: int = 30) -> list[dict]:
    candidates = []
    for i, seg in enumerate(transcript):
        start = seg['start']
        window_text = " ".join([t['text'] for t in transcript[i:] if t['start'] - start <= window])
        if not window_text.strip(): continue
        
        emotion = calculate_emotional_intensity(window_text)
        hook_boost = 20 if any(kw in window_text.lower() for kw in ['shocking', 'rahasia', 'viral', 'twist']) else 0
        score = min(100, max(0, emotion * 0.6 + hook_boost + 25))
        
        candidates.append({
            "start": start,
            "end": start + window,
            "preview": window_text[:120] + "...",
            "score": round(score, 1),
            "emotion": round(emotion, 1)
        })
    return sorted(candidates, key=lambda x: x['score'], reverse=True)[:5]

def calculate_overall_score(transcript: list[dict]) -> dict:
    full_text = " ".join([t['text'] for t in transcript])
    emotion = calculate_emotional_intensity(full_text)
    hook = detect_hook_strength(transcript)
    density = min(100, len(full_text.split()) // 5)
    
    return {
        "score": min(100, round(emotion * 0.25 + hook * 0.20 + density * 0.15 + 40, 1)),
        "breakdown": {
            "Emotional Intensity": round(emotion, 1),
            "Hook Strength": round(hook, 1),
            "Content Density": round(density, 1)
        }
    }

# =============================================================================
# STREAMLIT UI
# =============================================================================
def main():
    st.title("πŸ”₯ ViralScope AI")
    st.markdown("Analisis potensi viral & temukan segment terbaik untuk Shorts/Reels secara instan.")
    
    col1, col2 = st.columns([3, 1])
    with col1:
        url = st.text_input("πŸ”— URL YouTube", placeholder="https://www.youtube.com/watch?v=...")
    with col2:
        if st.button("πŸ§ͺ Video Test", use_container_width=True):
            st.session_state.test_url = "https://www.youtube.com/watch?v=jNQXAC9IVRw"
            st.rerun()

    if 'test_url' in st.session_state and not url:
        url = st.session_state.test_url
        del st.session_state.test_url

    if st.button("πŸš€ Analisis Sekarang", type="primary", use_container_width=True):
        if not url:
            st.error("❌ Masukkan URL YouTube terlebih dahulu.")
            return

        vid_id = extract_video_id(url)
        if not vid_id:
            st.error("❌ Format URL tidak valid.")
            return

        with st.spinner("πŸ“‘ Menghubungkan ke YouTube (Layer 1 & 2)..."):
            transcript = fetch_transcript_robust(vid_id)

        if not transcript:
            st.error("""
            🚫 **Transcript tidak terambil meskipun CC terlihat di YouTube.**
            
            πŸ” **Penyebab Umum:**
            β€’ IP Hugging Face Spaces diblokir/di-throttle YouTube
            β€’ Video hanya punya "Auto-generated" yang dilindungi anti-bot
            β€’ Region Indonesia terkena rate limit YouTube API
            
            πŸ’‘ **Solusi Cepat:**
            1. Coba video dengan subtitle **Manual** (bukan auto-generated)
            2. Gunakan tombol πŸ§ͺ *Video Test* di atas (video lama YouTube, pasti lolos)
            3. Deploy alternatif di [Streamlit Community Cloud](https://streamlit.io/cloud) (IP lebih stabil)
            """)
            return

        with st.spinner("🧠 Menganalisis potensi viral..."):
            time.sleep(0.3)
            overall = calculate_overall_score(transcript)
            segments = identify_viral_segments(transcript)

        st.divider()
        col1, col2 = st.columns([1, 2])
        with col1:
            fig = go.Figure(go.Indicator(
                mode="gauge+number", value=overall['score'],
                gauge={'axis': {'range': [0, 100]}, 'bar': {'color': 'crimson'},
                       'steps': [{'range': [0, 50], 'color': '#ffebee'},
                                 {'range': [50, 75], 'color': '#fff3e0'},
                                 {'range': [75, 100], 'color': '#e8f5e9'}]}
            ))
            fig.update_layout(height=220, margin=dict(l=10, r=10, t=30, b=10))
            st.plotly_chart(fig, use_container_width=True)

        with col2:
            st.subheader("πŸ“Š Detail Skor")
            for k, v in overall['breakdown'].items():
                st.metric(k, f"{v}/100")

        st.subheader("🎯 Top 5 Segment Viral")
        for i, seg in enumerate(segments, 1):
            with st.expander(f"#{i} | Score: {seg['score']} | ⏱️ {format_time(seg['start'])}-{format_time(seg['end'])}", expanded=(i==1)):
                st.markdown(f"*\"{seg['preview']}\"*")
                st.caption(f"Emosi: {seg['emotion']}/100 | πŸ’‘ Cocok untuk Reels/TikTok/Shorts")

        st.divider()
        st.success("βœ… Analisis selesai! Gunakan segment di atas sebagai hook konten pendek Anda.")

if __name__ == "__main__":
    main()