Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| import re | |
| import time | |
| import json | |
| import requests | |
| from urllib.parse import urlparse, parse_qs | |
| from youtube_transcript_api import YouTubeTranscriptApi | |
| import plotly.graph_objects as go | |
| # ============================================================================= | |
| # CONFIG | |
| # ============================================================================= | |
| st.set_page_config(page_title="π₯ ViralScope AI", page_icon="π¬", layout="wide") | |
| # ============================================================================= | |
| # ROBUST TRANSCRIPT FETCHER (Multi-Layer Fallback) | |
| # ============================================================================= | |
| def fetch_transcript_robust(video_id: str) -> list[dict] | None: | |
| """ | |
| Layer 1: youtube-transcript-api (direct get_transcript) | |
| Layer 2: Direct HTTP request ke YouTube timedtext API | |
| Layer 3: Fallback error handler dengan debug info | |
| """ | |
| headers = { | |
| 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36', | |
| 'Accept-Language': 'en-US,en;q=0.9,id;q=0.8' | |
| } | |
| # LAYER 1: Library langsung dengan bahasa prioritas | |
| languages = ['id', 'en', 'es', 'de', 'fr', 'pt', 'ru', 'ja', 'ko'] | |
| for lang in languages: | |
| try: | |
| transcript = YouTubeTranscriptApi.get_transcript(video_id, languages=[lang]) | |
| if transcript and len(transcript) > 3: | |
| return transcript | |
| except Exception: | |
| continue | |
| # LAYER 2: Direct HTTP fallback ke YouTube caption endpoint | |
| try: | |
| # Ambil halaman video untuk ekstrak URL caption track | |
| video_page = requests.get(f"https://www.youtube.com/watch?v={video_id}", headers=headers, timeout=10) | |
| video_page.raise_for_status() | |
| # Regex untuk menemukan JSON captionTracks | |
| caption_match = re.search(r'"captionTracks":(\[.*?\])', video_page.text) | |
| if not caption_match: | |
| return None | |
| caption_tracks = json.loads(caption_match.group(1)) | |
| if not caption_tracks: | |
| return None | |
| # Pilih track pertama (biasanya auto-generated atau manual) | |
| track_url = caption_tracks[0].get('baseUrl') | |
| if not track_url: | |
| return None | |
| # Fetch transcript dalam format JSON3 | |
| transcript_resp = requests.get(track_url, headers=headers, timeout=10) | |
| transcript_resp.raise_for_status() | |
| data = transcript_resp.json() | |
| # Parse ke format library | |
| result = [] | |
| for event in data.get('events', []): | |
| if 'segs' in event and 'tStartMs' in event: | |
| text = ''.join(seg.get('utf8', '') for seg in event.get('segs', [])) | |
| if text.strip(): | |
| result.append({ | |
| 'text': text.strip(), | |
| 'start': event['tStartMs'] / 1000, | |
| 'duration': event.get('dDurationMs', 0) / 1000 | |
| }) | |
| return result if result else None | |
| except Exception as e: | |
| st.warning(f"β οΈ Semua metode gagal. Detail: {type(e).__name__}") | |
| return None | |
| def extract_video_id(url: str) -> str | None: | |
| patterns = [ | |
| r'(?:v=|\/)([a-zA-Z0-9_-]{11})(?:\&|\/|$)', | |
| r'youtu\.be\/([a-zA-Z0-9_-]{11})', | |
| r'^([a-zA-Z0-9_-]{11})$' | |
| ] | |
| for pattern in patterns: | |
| match = re.search(pattern, url) | |
| if match: | |
| return match.group(1) | |
| return None | |
| def format_time(seconds: float) -> str: | |
| m, s = divmod(int(seconds), 60) | |
| return f"{m:02d}:{s:02d}" | |
| # ============================================================================= | |
| # ANALYSIS ENGINE (Lightweight, Pure Python) | |
| # ============================================================================= | |
| def calculate_emotional_intensity(text: str) -> float: | |
| high_arousal = ['shocking', 'viral', 'wow', 'amazing', 'gila', 'luar biasa', | |
| 'terkejut', 'marah', 'sedih', 'rahasia', 'ternyata', 'twist', | |
| 'unbelievable', 'mind-blowing', 'breakthrough', 'first time'] | |
| words = re.findall(r'\b\w+\b', text.lower()) | |
| matches = sum(1 for w in words if w in high_arousal) | |
| return min(100, 20 + matches * 16) | |
| def detect_hook_strength(transcript: list[dict], first_n_seconds: int = 30) -> float: | |
| hook_kw = ['rahasia', 'ternyata', 'jangan', 'peringatan', 'viral', 'shocking', | |
| 'unbelievable', 'you won\'t believe', 'wait until', 'plot twist'] | |
| hook_text = " ".join([seg['text'].lower() for seg in transcript if seg.get('start', 0) <= first_n_seconds]) | |
| matches = sum(1 for kw in hook_kw if kw in hook_text) | |
| return min(100, matches * 15 + 30) | |
| def identify_viral_segments(transcript: list[dict], window: int = 30) -> list[dict]: | |
| candidates = [] | |
| for i, seg in enumerate(transcript): | |
| start = seg['start'] | |
| window_text = " ".join([t['text'] for t in transcript[i:] if t['start'] - start <= window]) | |
| if not window_text.strip(): continue | |
| emotion = calculate_emotional_intensity(window_text) | |
| hook_boost = 20 if any(kw in window_text.lower() for kw in ['shocking', 'rahasia', 'viral', 'twist']) else 0 | |
| score = min(100, max(0, emotion * 0.6 + hook_boost + 25)) | |
| candidates.append({ | |
| "start": start, | |
| "end": start + window, | |
| "preview": window_text[:120] + "...", | |
| "score": round(score, 1), | |
| "emotion": round(emotion, 1) | |
| }) | |
| return sorted(candidates, key=lambda x: x['score'], reverse=True)[:5] | |
| def calculate_overall_score(transcript: list[dict]) -> dict: | |
| full_text = " ".join([t['text'] for t in transcript]) | |
| emotion = calculate_emotional_intensity(full_text) | |
| hook = detect_hook_strength(transcript) | |
| density = min(100, len(full_text.split()) // 5) | |
| return { | |
| "score": min(100, round(emotion * 0.25 + hook * 0.20 + density * 0.15 + 40, 1)), | |
| "breakdown": { | |
| "Emotional Intensity": round(emotion, 1), | |
| "Hook Strength": round(hook, 1), | |
| "Content Density": round(density, 1) | |
| } | |
| } | |
| # ============================================================================= | |
| # STREAMLIT UI | |
| # ============================================================================= | |
| def main(): | |
| st.title("π₯ ViralScope AI") | |
| st.markdown("Analisis potensi viral & temukan segment terbaik untuk Shorts/Reels secara instan.") | |
| col1, col2 = st.columns([3, 1]) | |
| with col1: | |
| url = st.text_input("π URL YouTube", placeholder="https://www.youtube.com/watch?v=...") | |
| with col2: | |
| if st.button("π§ͺ Video Test", use_container_width=True): | |
| st.session_state.test_url = "https://www.youtube.com/watch?v=jNQXAC9IVRw" | |
| st.rerun() | |
| if 'test_url' in st.session_state and not url: | |
| url = st.session_state.test_url | |
| del st.session_state.test_url | |
| if st.button("π Analisis Sekarang", type="primary", use_container_width=True): | |
| if not url: | |
| st.error("β Masukkan URL YouTube terlebih dahulu.") | |
| return | |
| vid_id = extract_video_id(url) | |
| if not vid_id: | |
| st.error("β Format URL tidak valid.") | |
| return | |
| with st.spinner("π‘ Menghubungkan ke YouTube (Layer 1 & 2)..."): | |
| transcript = fetch_transcript_robust(vid_id) | |
| if not transcript: | |
| st.error(""" | |
| π« **Transcript tidak terambil meskipun CC terlihat di YouTube.** | |
| π **Penyebab Umum:** | |
| β’ IP Hugging Face Spaces diblokir/di-throttle YouTube | |
| β’ Video hanya punya "Auto-generated" yang dilindungi anti-bot | |
| β’ Region Indonesia terkena rate limit YouTube API | |
| π‘ **Solusi Cepat:** | |
| 1. Coba video dengan subtitle **Manual** (bukan auto-generated) | |
| 2. Gunakan tombol π§ͺ *Video Test* di atas (video lama YouTube, pasti lolos) | |
| 3. Deploy alternatif di [Streamlit Community Cloud](https://streamlit.io/cloud) (IP lebih stabil) | |
| """) | |
| return | |
| with st.spinner("π§ Menganalisis potensi viral..."): | |
| time.sleep(0.3) | |
| overall = calculate_overall_score(transcript) | |
| segments = identify_viral_segments(transcript) | |
| st.divider() | |
| col1, col2 = st.columns([1, 2]) | |
| with col1: | |
| fig = go.Figure(go.Indicator( | |
| mode="gauge+number", value=overall['score'], | |
| gauge={'axis': {'range': [0, 100]}, 'bar': {'color': 'crimson'}, | |
| 'steps': [{'range': [0, 50], 'color': '#ffebee'}, | |
| {'range': [50, 75], 'color': '#fff3e0'}, | |
| {'range': [75, 100], 'color': '#e8f5e9'}]} | |
| )) | |
| fig.update_layout(height=220, margin=dict(l=10, r=10, t=30, b=10)) | |
| st.plotly_chart(fig, use_container_width=True) | |
| with col2: | |
| st.subheader("π Detail Skor") | |
| for k, v in overall['breakdown'].items(): | |
| st.metric(k, f"{v}/100") | |
| st.subheader("π― Top 5 Segment Viral") | |
| for i, seg in enumerate(segments, 1): | |
| with st.expander(f"#{i} | Score: {seg['score']} | β±οΈ {format_time(seg['start'])}-{format_time(seg['end'])}", expanded=(i==1)): | |
| st.markdown(f"*\"{seg['preview']}\"*") | |
| st.caption(f"Emosi: {seg['emotion']}/100 | π‘ Cocok untuk Reels/TikTok/Shorts") | |
| st.divider() | |
| st.success("β Analisis selesai! Gunakan segment di atas sebagai hook konten pendek Anda.") | |
| if __name__ == "__main__": | |
| main() |