ViralScopeAI / app.py
daafa999's picture
Update app.py
67e98cd verified
Raw
History Blame Contribute Delete
9.84 kB
import streamlit as st
import re
import time
import json
import requests
from urllib.parse import urlparse, parse_qs
from youtube_transcript_api import YouTubeTranscriptApi
import plotly.graph_objects as go
# =============================================================================
# CONFIG
# =============================================================================
st.set_page_config(page_title="πŸ”₯ ViralScope AI", page_icon="🎬", layout="wide")
# =============================================================================
# ROBUST TRANSCRIPT FETCHER (Multi-Layer Fallback)
# =============================================================================
@st.cache_data(ttl=1800)
def fetch_transcript_robust(video_id: str) -> list[dict] | None:
"""
Layer 1: youtube-transcript-api (direct get_transcript)
Layer 2: Direct HTTP request ke YouTube timedtext API
Layer 3: Fallback error handler dengan debug info
"""
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36',
'Accept-Language': 'en-US,en;q=0.9,id;q=0.8'
}
# LAYER 1: Library langsung dengan bahasa prioritas
languages = ['id', 'en', 'es', 'de', 'fr', 'pt', 'ru', 'ja', 'ko']
for lang in languages:
try:
transcript = YouTubeTranscriptApi.get_transcript(video_id, languages=[lang])
if transcript and len(transcript) > 3:
return transcript
except Exception:
continue
# LAYER 2: Direct HTTP fallback ke YouTube caption endpoint
try:
# Ambil halaman video untuk ekstrak URL caption track
video_page = requests.get(f"https://www.youtube.com/watch?v={video_id}", headers=headers, timeout=10)
video_page.raise_for_status()
# Regex untuk menemukan JSON captionTracks
caption_match = re.search(r'"captionTracks":(\[.*?\])', video_page.text)
if not caption_match:
return None
caption_tracks = json.loads(caption_match.group(1))
if not caption_tracks:
return None
# Pilih track pertama (biasanya auto-generated atau manual)
track_url = caption_tracks[0].get('baseUrl')
if not track_url:
return None
# Fetch transcript dalam format JSON3
transcript_resp = requests.get(track_url, headers=headers, timeout=10)
transcript_resp.raise_for_status()
data = transcript_resp.json()
# Parse ke format library
result = []
for event in data.get('events', []):
if 'segs' in event and 'tStartMs' in event:
text = ''.join(seg.get('utf8', '') for seg in event.get('segs', []))
if text.strip():
result.append({
'text': text.strip(),
'start': event['tStartMs'] / 1000,
'duration': event.get('dDurationMs', 0) / 1000
})
return result if result else None
except Exception as e:
st.warning(f"⚠️ Semua metode gagal. Detail: {type(e).__name__}")
return None
def extract_video_id(url: str) -> str | None:
patterns = [
r'(?:v=|\/)([a-zA-Z0-9_-]{11})(?:\&|\/|$)',
r'youtu\.be\/([a-zA-Z0-9_-]{11})',
r'^([a-zA-Z0-9_-]{11})$'
]
for pattern in patterns:
match = re.search(pattern, url)
if match:
return match.group(1)
return None
def format_time(seconds: float) -> str:
m, s = divmod(int(seconds), 60)
return f"{m:02d}:{s:02d}"
# =============================================================================
# ANALYSIS ENGINE (Lightweight, Pure Python)
# =============================================================================
def calculate_emotional_intensity(text: str) -> float:
high_arousal = ['shocking', 'viral', 'wow', 'amazing', 'gila', 'luar biasa',
'terkejut', 'marah', 'sedih', 'rahasia', 'ternyata', 'twist',
'unbelievable', 'mind-blowing', 'breakthrough', 'first time']
words = re.findall(r'\b\w+\b', text.lower())
matches = sum(1 for w in words if w in high_arousal)
return min(100, 20 + matches * 16)
def detect_hook_strength(transcript: list[dict], first_n_seconds: int = 30) -> float:
hook_kw = ['rahasia', 'ternyata', 'jangan', 'peringatan', 'viral', 'shocking',
'unbelievable', 'you won\'t believe', 'wait until', 'plot twist']
hook_text = " ".join([seg['text'].lower() for seg in transcript if seg.get('start', 0) <= first_n_seconds])
matches = sum(1 for kw in hook_kw if kw in hook_text)
return min(100, matches * 15 + 30)
def identify_viral_segments(transcript: list[dict], window: int = 30) -> list[dict]:
candidates = []
for i, seg in enumerate(transcript):
start = seg['start']
window_text = " ".join([t['text'] for t in transcript[i:] if t['start'] - start <= window])
if not window_text.strip(): continue
emotion = calculate_emotional_intensity(window_text)
hook_boost = 20 if any(kw in window_text.lower() for kw in ['shocking', 'rahasia', 'viral', 'twist']) else 0
score = min(100, max(0, emotion * 0.6 + hook_boost + 25))
candidates.append({
"start": start,
"end": start + window,
"preview": window_text[:120] + "...",
"score": round(score, 1),
"emotion": round(emotion, 1)
})
return sorted(candidates, key=lambda x: x['score'], reverse=True)[:5]
def calculate_overall_score(transcript: list[dict]) -> dict:
full_text = " ".join([t['text'] for t in transcript])
emotion = calculate_emotional_intensity(full_text)
hook = detect_hook_strength(transcript)
density = min(100, len(full_text.split()) // 5)
return {
"score": min(100, round(emotion * 0.25 + hook * 0.20 + density * 0.15 + 40, 1)),
"breakdown": {
"Emotional Intensity": round(emotion, 1),
"Hook Strength": round(hook, 1),
"Content Density": round(density, 1)
}
}
# =============================================================================
# STREAMLIT UI
# =============================================================================
def main():
st.title("πŸ”₯ ViralScope AI")
st.markdown("Analisis potensi viral & temukan segment terbaik untuk Shorts/Reels secara instan.")
col1, col2 = st.columns([3, 1])
with col1:
url = st.text_input("πŸ”— URL YouTube", placeholder="https://www.youtube.com/watch?v=...")
with col2:
if st.button("πŸ§ͺ Video Test", use_container_width=True):
st.session_state.test_url = "https://www.youtube.com/watch?v=jNQXAC9IVRw"
st.rerun()
if 'test_url' in st.session_state and not url:
url = st.session_state.test_url
del st.session_state.test_url
if st.button("πŸš€ Analisis Sekarang", type="primary", use_container_width=True):
if not url:
st.error("❌ Masukkan URL YouTube terlebih dahulu.")
return
vid_id = extract_video_id(url)
if not vid_id:
st.error("❌ Format URL tidak valid.")
return
with st.spinner("πŸ“‘ Menghubungkan ke YouTube (Layer 1 & 2)..."):
transcript = fetch_transcript_robust(vid_id)
if not transcript:
st.error("""
🚫 **Transcript tidak terambil meskipun CC terlihat di YouTube.**
πŸ” **Penyebab Umum:**
β€’ IP Hugging Face Spaces diblokir/di-throttle YouTube
β€’ Video hanya punya "Auto-generated" yang dilindungi anti-bot
β€’ Region Indonesia terkena rate limit YouTube API
πŸ’‘ **Solusi Cepat:**
1. Coba video dengan subtitle **Manual** (bukan auto-generated)
2. Gunakan tombol πŸ§ͺ *Video Test* di atas (video lama YouTube, pasti lolos)
3. Deploy alternatif di [Streamlit Community Cloud](https://streamlit.io/cloud) (IP lebih stabil)
""")
return
with st.spinner("🧠 Menganalisis potensi viral..."):
time.sleep(0.3)
overall = calculate_overall_score(transcript)
segments = identify_viral_segments(transcript)
st.divider()
col1, col2 = st.columns([1, 2])
with col1:
fig = go.Figure(go.Indicator(
mode="gauge+number", value=overall['score'],
gauge={'axis': {'range': [0, 100]}, 'bar': {'color': 'crimson'},
'steps': [{'range': [0, 50], 'color': '#ffebee'},
{'range': [50, 75], 'color': '#fff3e0'},
{'range': [75, 100], 'color': '#e8f5e9'}]}
))
fig.update_layout(height=220, margin=dict(l=10, r=10, t=30, b=10))
st.plotly_chart(fig, use_container_width=True)
with col2:
st.subheader("πŸ“Š Detail Skor")
for k, v in overall['breakdown'].items():
st.metric(k, f"{v}/100")
st.subheader("🎯 Top 5 Segment Viral")
for i, seg in enumerate(segments, 1):
with st.expander(f"#{i} | Score: {seg['score']} | ⏱️ {format_time(seg['start'])}-{format_time(seg['end'])}", expanded=(i==1)):
st.markdown(f"*\"{seg['preview']}\"*")
st.caption(f"Emosi: {seg['emotion']}/100 | πŸ’‘ Cocok untuk Reels/TikTok/Shorts")
st.divider()
st.success("βœ… Analisis selesai! Gunakan segment di atas sebagai hook konten pendek Anda.")
if __name__ == "__main__":
main()