Spaces:
Sleeping
Sleeping
File size: 9,839 Bytes
4fb045a 67e98cd 4fb045a 67e98cd 4fb045a 67e98cd 4fb045a 67e98cd 4fb045a 67e98cd 4fb045a 67e98cd 4fb045a 67e98cd 4fb045a 67e98cd 4fb045a 67e98cd 4fb045a | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 | import streamlit as st
import re
import time
import json
import requests
from urllib.parse import urlparse, parse_qs
from youtube_transcript_api import YouTubeTranscriptApi
import plotly.graph_objects as go
# =============================================================================
# CONFIG
# =============================================================================
st.set_page_config(page_title="π₯ ViralScope AI", page_icon="π¬", layout="wide")
# =============================================================================
# ROBUST TRANSCRIPT FETCHER (Multi-Layer Fallback)
# =============================================================================
@st.cache_data(ttl=1800)
def fetch_transcript_robust(video_id: str) -> list[dict] | None:
"""
Layer 1: youtube-transcript-api (direct get_transcript)
Layer 2: Direct HTTP request ke YouTube timedtext API
Layer 3: Fallback error handler dengan debug info
"""
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36',
'Accept-Language': 'en-US,en;q=0.9,id;q=0.8'
}
# LAYER 1: Library langsung dengan bahasa prioritas
languages = ['id', 'en', 'es', 'de', 'fr', 'pt', 'ru', 'ja', 'ko']
for lang in languages:
try:
transcript = YouTubeTranscriptApi.get_transcript(video_id, languages=[lang])
if transcript and len(transcript) > 3:
return transcript
except Exception:
continue
# LAYER 2: Direct HTTP fallback ke YouTube caption endpoint
try:
# Ambil halaman video untuk ekstrak URL caption track
video_page = requests.get(f"https://www.youtube.com/watch?v={video_id}", headers=headers, timeout=10)
video_page.raise_for_status()
# Regex untuk menemukan JSON captionTracks
caption_match = re.search(r'"captionTracks":(\[.*?\])', video_page.text)
if not caption_match:
return None
caption_tracks = json.loads(caption_match.group(1))
if not caption_tracks:
return None
# Pilih track pertama (biasanya auto-generated atau manual)
track_url = caption_tracks[0].get('baseUrl')
if not track_url:
return None
# Fetch transcript dalam format JSON3
transcript_resp = requests.get(track_url, headers=headers, timeout=10)
transcript_resp.raise_for_status()
data = transcript_resp.json()
# Parse ke format library
result = []
for event in data.get('events', []):
if 'segs' in event and 'tStartMs' in event:
text = ''.join(seg.get('utf8', '') for seg in event.get('segs', []))
if text.strip():
result.append({
'text': text.strip(),
'start': event['tStartMs'] / 1000,
'duration': event.get('dDurationMs', 0) / 1000
})
return result if result else None
except Exception as e:
st.warning(f"β οΈ Semua metode gagal. Detail: {type(e).__name__}")
return None
def extract_video_id(url: str) -> str | None:
patterns = [
r'(?:v=|\/)([a-zA-Z0-9_-]{11})(?:\&|\/|$)',
r'youtu\.be\/([a-zA-Z0-9_-]{11})',
r'^([a-zA-Z0-9_-]{11})$'
]
for pattern in patterns:
match = re.search(pattern, url)
if match:
return match.group(1)
return None
def format_time(seconds: float) -> str:
m, s = divmod(int(seconds), 60)
return f"{m:02d}:{s:02d}"
# =============================================================================
# ANALYSIS ENGINE (Lightweight, Pure Python)
# =============================================================================
def calculate_emotional_intensity(text: str) -> float:
high_arousal = ['shocking', 'viral', 'wow', 'amazing', 'gila', 'luar biasa',
'terkejut', 'marah', 'sedih', 'rahasia', 'ternyata', 'twist',
'unbelievable', 'mind-blowing', 'breakthrough', 'first time']
words = re.findall(r'\b\w+\b', text.lower())
matches = sum(1 for w in words if w in high_arousal)
return min(100, 20 + matches * 16)
def detect_hook_strength(transcript: list[dict], first_n_seconds: int = 30) -> float:
hook_kw = ['rahasia', 'ternyata', 'jangan', 'peringatan', 'viral', 'shocking',
'unbelievable', 'you won\'t believe', 'wait until', 'plot twist']
hook_text = " ".join([seg['text'].lower() for seg in transcript if seg.get('start', 0) <= first_n_seconds])
matches = sum(1 for kw in hook_kw if kw in hook_text)
return min(100, matches * 15 + 30)
def identify_viral_segments(transcript: list[dict], window: int = 30) -> list[dict]:
candidates = []
for i, seg in enumerate(transcript):
start = seg['start']
window_text = " ".join([t['text'] for t in transcript[i:] if t['start'] - start <= window])
if not window_text.strip(): continue
emotion = calculate_emotional_intensity(window_text)
hook_boost = 20 if any(kw in window_text.lower() for kw in ['shocking', 'rahasia', 'viral', 'twist']) else 0
score = min(100, max(0, emotion * 0.6 + hook_boost + 25))
candidates.append({
"start": start,
"end": start + window,
"preview": window_text[:120] + "...",
"score": round(score, 1),
"emotion": round(emotion, 1)
})
return sorted(candidates, key=lambda x: x['score'], reverse=True)[:5]
def calculate_overall_score(transcript: list[dict]) -> dict:
full_text = " ".join([t['text'] for t in transcript])
emotion = calculate_emotional_intensity(full_text)
hook = detect_hook_strength(transcript)
density = min(100, len(full_text.split()) // 5)
return {
"score": min(100, round(emotion * 0.25 + hook * 0.20 + density * 0.15 + 40, 1)),
"breakdown": {
"Emotional Intensity": round(emotion, 1),
"Hook Strength": round(hook, 1),
"Content Density": round(density, 1)
}
}
# =============================================================================
# STREAMLIT UI
# =============================================================================
def main():
st.title("π₯ ViralScope AI")
st.markdown("Analisis potensi viral & temukan segment terbaik untuk Shorts/Reels secara instan.")
col1, col2 = st.columns([3, 1])
with col1:
url = st.text_input("π URL YouTube", placeholder="https://www.youtube.com/watch?v=...")
with col2:
if st.button("π§ͺ Video Test", use_container_width=True):
st.session_state.test_url = "https://www.youtube.com/watch?v=jNQXAC9IVRw"
st.rerun()
if 'test_url' in st.session_state and not url:
url = st.session_state.test_url
del st.session_state.test_url
if st.button("π Analisis Sekarang", type="primary", use_container_width=True):
if not url:
st.error("β Masukkan URL YouTube terlebih dahulu.")
return
vid_id = extract_video_id(url)
if not vid_id:
st.error("β Format URL tidak valid.")
return
with st.spinner("π‘ Menghubungkan ke YouTube (Layer 1 & 2)..."):
transcript = fetch_transcript_robust(vid_id)
if not transcript:
st.error("""
π« **Transcript tidak terambil meskipun CC terlihat di YouTube.**
π **Penyebab Umum:**
β’ IP Hugging Face Spaces diblokir/di-throttle YouTube
β’ Video hanya punya "Auto-generated" yang dilindungi anti-bot
β’ Region Indonesia terkena rate limit YouTube API
π‘ **Solusi Cepat:**
1. Coba video dengan subtitle **Manual** (bukan auto-generated)
2. Gunakan tombol π§ͺ *Video Test* di atas (video lama YouTube, pasti lolos)
3. Deploy alternatif di [Streamlit Community Cloud](https://streamlit.io/cloud) (IP lebih stabil)
""")
return
with st.spinner("π§ Menganalisis potensi viral..."):
time.sleep(0.3)
overall = calculate_overall_score(transcript)
segments = identify_viral_segments(transcript)
st.divider()
col1, col2 = st.columns([1, 2])
with col1:
fig = go.Figure(go.Indicator(
mode="gauge+number", value=overall['score'],
gauge={'axis': {'range': [0, 100]}, 'bar': {'color': 'crimson'},
'steps': [{'range': [0, 50], 'color': '#ffebee'},
{'range': [50, 75], 'color': '#fff3e0'},
{'range': [75, 100], 'color': '#e8f5e9'}]}
))
fig.update_layout(height=220, margin=dict(l=10, r=10, t=30, b=10))
st.plotly_chart(fig, use_container_width=True)
with col2:
st.subheader("π Detail Skor")
for k, v in overall['breakdown'].items():
st.metric(k, f"{v}/100")
st.subheader("π― Top 5 Segment Viral")
for i, seg in enumerate(segments, 1):
with st.expander(f"#{i} | Score: {seg['score']} | β±οΈ {format_time(seg['start'])}-{format_time(seg['end'])}", expanded=(i==1)):
st.markdown(f"*\"{seg['preview']}\"*")
st.caption(f"Emosi: {seg['emotion']}/100 | π‘ Cocok untuk Reels/TikTok/Shorts")
st.divider()
st.success("β
Analisis selesai! Gunakan segment di atas sebagai hook konten pendek Anda.")
if __name__ == "__main__":
main() |