Spaces:

daafa999
/

ViralScopeAI

Sleeping

App Files Files Community

ViralScopeAI / app.py

daafa999

Update app.py

67e98cd verified about 1 month ago

Raw

History Blame Contribute Delete

9.84 kB

	import streamlit as st
	import re
	import time
	import json
	import requests
	from urllib.parse import urlparse, parse_qs
	from youtube_transcript_api import YouTubeTranscriptApi
	import plotly.graph_objects as go

	# =============================================================================
	# CONFIG
	# =============================================================================
	st.set_page_config(page_title="🔥 ViralScope AI", page_icon="🎬", layout="wide")

	# =============================================================================
	# ROBUST TRANSCRIPT FETCHER (Multi-Layer Fallback)
	# =============================================================================
	@st.cache_data(ttl=1800)
	def fetch_transcript_robust(video_id: str) -> list[dict] \| None:
	"""
	Layer 1: youtube-transcript-api (direct get_transcript)
	Layer 2: Direct HTTP request ke YouTube timedtext API
	Layer 3: Fallback error handler dengan debug info
	"""
	headers = {
	'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36',
	'Accept-Language': 'en-US,en;q=0.9,id;q=0.8'
	}

	# LAYER 1: Library langsung dengan bahasa prioritas
	languages = ['id', 'en', 'es', 'de', 'fr', 'pt', 'ru', 'ja', 'ko']
	for lang in languages:
	try:
	transcript = YouTubeTranscriptApi.get_transcript(video_id, languages=[lang])
	if transcript and len(transcript) > 3:
	return transcript
	except Exception:
	continue

	# LAYER 2: Direct HTTP fallback ke YouTube caption endpoint
	try:
	# Ambil halaman video untuk ekstrak URL caption track
	video_page = requests.get(f"https://www.youtube.com/watch?v={video_id}", headers=headers, timeout=10)
	video_page.raise_for_status()

	# Regex untuk menemukan JSON captionTracks
	caption_match = re.search(r'"captionTracks":(\[.*?\])', video_page.text)
	if not caption_match:
	return None

	caption_tracks = json.loads(caption_match.group(1))
	if not caption_tracks:
	return None

	# Pilih track pertama (biasanya auto-generated atau manual)
	track_url = caption_tracks[0].get('baseUrl')
	if not track_url:
	return None

	# Fetch transcript dalam format JSON3
	transcript_resp = requests.get(track_url, headers=headers, timeout=10)
	transcript_resp.raise_for_status()
	data = transcript_resp.json()

	# Parse ke format library
	result = []
	for event in data.get('events', []):
	if 'segs' in event and 'tStartMs' in event:
	text = ''.join(seg.get('utf8', '') for seg in event.get('segs', []))
	if text.strip():
	result.append({
	'text': text.strip(),
	'start': event['tStartMs'] / 1000,
	'duration': event.get('dDurationMs', 0) / 1000
	})
	return result if result else None

	except Exception as e:
	st.warning(f"⚠️ Semua metode gagal. Detail: {type(e).__name__}")
	return None

	def extract_video_id(url: str) -> str \| None:
	patterns = [
	r'(?:v=\|\/)([a-zA-Z0-9_-]{11})(?:\&\|\/\|$)',
	r'youtu\.be\/([a-zA-Z0-9_-]{11})',
	r'^([a-zA-Z0-9_-]{11})$'
	]
	for pattern in patterns:
	match = re.search(pattern, url)
	if match:
	return match.group(1)
	return None

	def format_time(seconds: float) -> str:
	m, s = divmod(int(seconds), 60)
	return f"{m:02d}:{s:02d}"

	# =============================================================================
	# ANALYSIS ENGINE (Lightweight, Pure Python)
	# =============================================================================
	def calculate_emotional_intensity(text: str) -> float:
	high_arousal = ['shocking', 'viral', 'wow', 'amazing', 'gila', 'luar biasa',
	'terkejut', 'marah', 'sedih', 'rahasia', 'ternyata', 'twist',
	'unbelievable', 'mind-blowing', 'breakthrough', 'first time']
	words = re.findall(r'\b\w+\b', text.lower())
	matches = sum(1 for w in words if w in high_arousal)
	return min(100, 20 + matches * 16)

	def detect_hook_strength(transcript: list[dict], first_n_seconds: int = 30) -> float:
	hook_kw = ['rahasia', 'ternyata', 'jangan', 'peringatan', 'viral', 'shocking',
	'unbelievable', 'you won\'t believe', 'wait until', 'plot twist']
	hook_text = " ".join([seg['text'].lower() for seg in transcript if seg.get('start', 0) <= first_n_seconds])
	matches = sum(1 for kw in hook_kw if kw in hook_text)
	return min(100, matches * 15 + 30)

	def identify_viral_segments(transcript: list[dict], window: int = 30) -> list[dict]:
	candidates = []
	for i, seg in enumerate(transcript):
	start = seg['start']
	window_text = " ".join([t['text'] for t in transcript[i:] if t['start'] - start <= window])
	if not window_text.strip(): continue

	emotion = calculate_emotional_intensity(window_text)
	hook_boost = 20 if any(kw in window_text.lower() for kw in ['shocking', 'rahasia', 'viral', 'twist']) else 0
	score = min(100, max(0, emotion * 0.6 + hook_boost + 25))

	candidates.append({
	"start": start,
	"end": start + window,
	"preview": window_text[:120] + "...",
	"score": round(score, 1),
	"emotion": round(emotion, 1)
	})
	return sorted(candidates, key=lambda x: x['score'], reverse=True)[:5]

	def calculate_overall_score(transcript: list[dict]) -> dict:
	full_text = " ".join([t['text'] for t in transcript])
	emotion = calculate_emotional_intensity(full_text)
	hook = detect_hook_strength(transcript)
	density = min(100, len(full_text.split()) // 5)

	return {
	"score": min(100, round(emotion * 0.25 + hook * 0.20 + density * 0.15 + 40, 1)),
	"breakdown": {
	"Emotional Intensity": round(emotion, 1),
	"Hook Strength": round(hook, 1),
	"Content Density": round(density, 1)
	}
	}

	# =============================================================================
	# STREAMLIT UI
	# =============================================================================
	def main():
	st.title("🔥 ViralScope AI")
	st.markdown("Analisis potensi viral & temukan segment terbaik untuk Shorts/Reels secara instan.")

	col1, col2 = st.columns([3, 1])
	with col1:
	url = st.text_input("🔗 URL YouTube", placeholder="https://www.youtube.com/watch?v=...")
	with col2:
	if st.button("🧪 Video Test", use_container_width=True):
	st.session_state.test_url = "https://www.youtube.com/watch?v=jNQXAC9IVRw"
	st.rerun()

	if 'test_url' in st.session_state and not url:
	url = st.session_state.test_url
	del st.session_state.test_url

	if st.button("🚀 Analisis Sekarang", type="primary", use_container_width=True):
	if not url:
	st.error("❌ Masukkan URL YouTube terlebih dahulu.")
	return

	vid_id = extract_video_id(url)
	if not vid_id:
	st.error("❌ Format URL tidak valid.")
	return

	with st.spinner("📡 Menghubungkan ke YouTube (Layer 1 & 2)..."):
	transcript = fetch_transcript_robust(vid_id)

	if not transcript:
	st.error("""
	🚫 Transcript tidak terambil meskipun CC terlihat di YouTube.

	🔍 Penyebab Umum:
	• IP Hugging Face Spaces diblokir/di-throttle YouTube
	• Video hanya punya "Auto-generated" yang dilindungi anti-bot
	• Region Indonesia terkena rate limit YouTube API

	💡 Solusi Cepat:
	1. Coba video dengan subtitle Manual (bukan auto-generated)
	2. Gunakan tombol 🧪 Video Test di atas (video lama YouTube, pasti lolos)
	3. Deploy alternatif di [Streamlit Community Cloud](https://streamlit.io/cloud) (IP lebih stabil)
	""")
	return

	with st.spinner("🧠 Menganalisis potensi viral..."):
	time.sleep(0.3)
	overall = calculate_overall_score(transcript)
	segments = identify_viral_segments(transcript)

	st.divider()
	col1, col2 = st.columns([1, 2])
	with col1:
	fig = go.Figure(go.Indicator(
	mode="gauge+number", value=overall['score'],
	gauge={'axis': {'range': [0, 100]}, 'bar': {'color': 'crimson'},
	'steps': [{'range': [0, 50], 'color': '#ffebee'},
	{'range': [50, 75], 'color': '#fff3e0'},
	{'range': [75, 100], 'color': '#e8f5e9'}]}
	))
	fig.update_layout(height=220, margin=dict(l=10, r=10, t=30, b=10))
	st.plotly_chart(fig, use_container_width=True)

	with col2:
	st.subheader("📊 Detail Skor")
	for k, v in overall['breakdown'].items():
	st.metric(k, f"{v}/100")

	st.subheader("🎯 Top 5 Segment Viral")
	for i, seg in enumerate(segments, 1):
	with st.expander(f"#{i} \| Score: {seg['score']} \| ⏱️ {format_time(seg['start'])}-{format_time(seg['end'])}", expanded=(i==1)):
	st.markdown(f"\"{seg['preview']}\"")
	st.caption(f"Emosi: {seg['emotion']}/100 \| 💡 Cocok untuk Reels/TikTok/Shorts")

	st.divider()
	st.success("✅ Analisis selesai! Gunakan segment di atas sebagai hook konten pendek Anda.")

	if __name__ == "__main__":
	main()