Spaces:

Arjon07CSE
/

Prohori

Sleeping

App Files Files Community

Prohori / app.py

Arjon07CSE

Update app.py

6d429fa verified 3 months ago

raw

history blame contribute delete

77.3 kB

	# ==============================================================================
	# SOCIAL PERCEPTION ANALYZER - FINAL COMPLETE APPLICATION
	# Version: 4.1 (Fully Refactored, Production-Ready)
	# ==============================================================================
	# --- IMPORTS ---
	import re
	from GoogleNews import GoogleNews
	from requests.exceptions import HTTPError
	import pandas as pd
	import logging
	import time
	from datetime import datetime, timezone
	from logging.handlers import RotatingFileHandler
	import gradio as gr
	import matplotlib.pyplot as plt
	from matplotlib.font_manager import FontProperties, fontManager
	import seaborn as sns
	from wordcloud import WordCloud
	import dateparser
	import numpy as np
	import os

	# ==============================================================================
	# SETUP PRODUCTION-GRADE LOGGING & CONFIGURATION
	# ==============================================================================
	log_formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')
	log_handler = RotatingFileHandler('app.log', maxBytes=510241024, backupCount=2)
	log_handler.setFormatter(log_formatter)
	logger = logging.getLogger()
	logger.setLevel(logging.INFO)
	if not logger.handlers:
	logger.addHandler(log_handler)
	logger.info("Application starting up.")

	# --- APPLICATION CONFIGURATION ---
	APP_TITLE = "Prohori (প্রহরী)"
	APP_TAGLINE = "Analyze GoogleNews & YouTube video trends, engagement, and comment activity for your search topics."
	APP_FOOTER = "Developed by Arjon for CDSR"

	# --- FONT CONFIGURATION ---
	FONT_PATH = 'NotoSansBengali-Regular.ttf'
	BANGLA_FONT = FONT_PATH

	def setup_bangla_font():
	"""Properly set up Bengali font for all visualizations"""
	global BANGLA_FONT
	# Strictly enforce NotoSansBengali-Regular.ttf for all Bengali text
	if os.path.exists(FONT_PATH):
	try:
	fontManager.addfont(FONT_PATH)
	BANGLA_FONT = FontProperties(fname=FONT_PATH)
	plt.rcParams['font.family'] = BANGLA_FONT.get_name()
	plt.rcParams['axes.unicode_minus'] = False
	logger.info(f"Successfully loaded '{FONT_PATH}' for Bengali text.")
	return True
	except Exception as e:
	logger.error(f"Error loading Bengali font: {e}")
	return False
	else:
	logger.error(f"Font file {FONT_PATH} not found. Bengali text will not render correctly.")
	BANGLA_FONT = None
	plt.rcParams['font.family'] = 'sans-serif'
	return False

	# Initialize font system
	font_loaded = setup_bangla_font()

	# ==============================================================================
	# CORE HELPER FUNCTIONS
	# ==============================================================================
	def clean_bengali_text(text):
	"""Remove non-Bengali characters except spaces and underscores (for joined phrases)"""
	cleaned = re.sub(r'[^\u0980-\u09FF_\s]', '', str(text))
	cleaned = re.sub(r'\s+', ' ', cleaned).strip()
	return cleaned

	# Comprehensive stopword list for Bengali text analysis
	BANGLA_STOP_WORDS = [
	'অতএব', 'অথচ', 'অথবা', 'অনুযায়ী', 'অনেক', 'অনেকে', 'অনেকেই', 'অন্তত', 'অন্য', 'অবধি', 'অবশ্য',
	'অভিপ্রায়', 'একে', 'একই', 'একেবারে', 'একটি', 'একবার', 'এখন', 'এখনও', 'এখানে', 'এখানেই', 'এটি',
	'এতটাই', 'এতদূর', 'এতটুকু', 'এক', 'এবং', 'এবার', 'এমন', 'এমনভাবে', 'এর', 'এরা', 'এঁরা', 'এঁদের',
	'এই', 'এইভাবে', 'ও', 'ওঁরা', 'ওঁর', 'ওঁদের', 'ওকে', 'ওখানে', 'ওদের', 'ওর', 'কাছ', 'কাছে', 'কাজ',
	'কারণ', 'কিছু', 'কিছুই', 'কিন্তু', 'কিভাবে', 'কেন', 'কোন', 'কোনও', 'কোনো', 'ক্ষেত্রে', 'খুব',
	'গুলি', 'গিয়ে', 'চায়', 'ছাড়া', 'জন্য', 'জানা', 'ঠিক', 'তিনি', 'তিন', 'তিনিও', 'তাকে', 'তাঁকে',
	'তার', 'তাঁর', 'তারা', 'তাঁরা', 'তাদের', 'তাঁদের', 'তাহলে', 'থাকলেও', 'থেকে', 'মধ্যেই', 'মধ্যে',
	'দ্বারা', 'নয়', 'না', 'নিজের', 'নিজে', 'নিয়ে', 'পারেন', 'পারা', 'পারে', 'পরে', 'পর্যন্ত', 'পুনরায়',
	'ফলে', 'বজায়', 'বা', 'বাদে', 'বার', 'বিশেষ', 'বিভিন্ন', 'ব্যবহার', 'ব্যাপারে', 'ভাবে', 'ভাবেই', 'মাধ্যমে',
	'মতো', 'মতোই', 'যখন', 'যদি', 'যদিও', 'যা', 'যাকে', 'যাওয়া', 'যায়', 'যে', 'যেখানে', 'যেতে', 'যেমন',
	'যেহেতু', 'রহিছে', 'শিক্ষা', 'শুধু', 'সঙ্গে', 'সব', 'সমস্ত', 'সম্প্রতি', 'সহ', 'সাধারণ', 'সামনে', 'হতে',
	'হতেই', 'হবে', 'হয়', 'হয়তো', 'হয়', 'হচ্ছে', 'হত', 'হলে', 'হলেও', 'হয়নি', 'হাজার', 'হোওয়া', 'আরও', 'আমরা',
	'আমার', 'আমি', 'আর', 'আগে', 'আগেই', 'আছে', 'আজ', 'তাকে', 'তাতে', 'তাদের', 'তাহার', 'তাহাতে', 'তাহারই',
	'তথা', 'তথাপি', 'সে', 'সেই', 'সেখান', 'সেখানে', 'থেকে', 'নাকি', 'নাগাদ', 'দু', 'দুটি', 'সুতরাং',
	'সম্পর্কে', 'সঙ্গেও', 'সর্বাধিক', 'সর্বদা', 'সহ', 'হৈতে', 'হইবে', 'হইয়া', 'হৈল', 'জানিয়েছেন', 'প্রতিবেদক'
	]

	COMBINED_STOPWORDS = set(BANGLA_STOP_WORDS)

	PHRASES_TO_JOIN = {
	"তারেক রহমান": "তারেক_রহমান",
	"খালেদা জিয়া": "খালেদা_জিয়া",
	"বিএনপি জিন্দাবাদ": "বিএনপি_জিন্দাবাদ"

	}

	def get_dynamic_time_agg(start_date, end_date):
	"""Determine appropriate time aggregation level based on date range"""
	if not isinstance(start_date, pd.Timestamp) or not isinstance(end_date, pd.Timestamp):
	return 'D', 'Daily' # Graceful fallback

	delta = end_date - start_date
	if delta.days <= 2:
	return 'H', 'Hourly'
	if delta.days <= 90:
	return 'D', 'Daily'
	if delta.days <= 730:
	return 'W', 'Weekly'
	return 'M', 'Monthly'

	def kpi_badge_html(value, label, threshold_high=None, threshold_low=None):
	"""
	Returns HTML for a color-coded KPI badge.
	Green for high, red for low, yellow for medium.
	"""
	try:
	# Handle comma-separated numbers
	if isinstance(value, str) and ',' in value:
	val = float(value.replace(',', ''))
	else:
	val = float(value)
	except (TypeError, ValueError, AttributeError):
	val = value

	color = '#e0e0e0' # default
	if threshold_high is not None and isinstance(val, (int, float)) and val >= threshold_high:
	color = '#4caf50' # green
	elif threshold_low is not None and isinstance(val, (int, float)) and val <= threshold_low:
	color = '#f44336' # red
	elif threshold_high is not None and threshold_low is not None and isinstance(val, (int, float)):
	color = '#ffeb3b' # yellow

	# Format value with commas for large numbers
	if isinstance(value, (int, float)):
	formatted_value = f"{value:,.0f}"
	else:
	formatted_value = str(value)

	return f"<div style='display:inline-block;padding:8px 16px;border-radius:8px;background:{color};color:#222;font-weight:bold;margin:2px;'>{label}: {formatted_value}</div>"

	def set_plot_style():
	"""Configure consistent matplotlib style for all visualizations"""
	plt.style.use('seaborn-v0_8-whitegrid')
	plt.rcParams['figure.dpi'] = 100
	plt.rcParams['savefig.dpi'] = 300
	plt.rcParams['figure.figsize'] = (10, 6)
	# Always use NotoSansBengali-Regular.ttf for Bengali text
	if BANGLA_FONT and BANGLA_FONT.get_name():
	plt.rcParams['font.family'] = BANGLA_FONT.get_name()
	else:
	plt.rcParams['font.family'] = 'sans-serif'
	plt.rcParams['axes.unicode_minus'] = False # Fix for minus sign rendering

	def cleanup_figures(*figures):
	"""Properly close matplotlib figures to prevent memory leaks"""
	for fig in figures:
	if fig is not None:
	try:
	plt.close(fig)
	except:
	pass

	# ==============================================================================
	# NEWS SCRAPER BACKEND
	# ==============================================================================
	def run_news_scraper_pipeline(search_keywords, sites, start_date_str, end_date_str, interval, max_pages, filter_keys, progress=gr.Progress()):
	"""Full implementation of the news scraper with robust error handling."""
	# Input validation and sanitization
	search_keywords = str(search_keywords).strip() if search_keywords else ""
	sites = str(sites).strip() if sites else ""
	start_date_str = str(start_date_str).strip() if start_date_str else ""
	end_date_str = str(end_date_str).strip() if end_date_str else ""
	filter_keys = str(filter_keys).strip() if filter_keys else ""

	if not all([search_keywords, start_date_str, end_date_str]):
	raise gr.Error("Search Keywords, Start Date, and End Date are required.")

	start_dt = dateparser.parse(start_date_str)
	end_dt = dateparser.parse(end_date_str)

	if not all([start_dt, end_dt]):
	raise gr.Error("Invalid date format. Please use a recognizable format like YYYY-MM-DD or '2 weeks ago'.")

	# Ensure start date is before end date
	if start_dt > end_dt:
	start_dt, end_dt = end_dt, start_dt
	gr.Warning("Start date was after end date. Dates have been swapped.")

	all_articles, current_dt = [], start_dt
	total_intervals = (end_dt - start_dt).days // interval + 1

	while current_dt <= end_dt:
	try:
	interval_end_dt = min(current_dt + pd.Timedelta(days=interval - 1), end_dt)
	start_str, end_str = current_dt.strftime('%Y-%m-%d'), interval_end_dt.strftime('%Y-%m-%d')

	progress((current_dt - start_dt).days / (end_dt - start_dt).days,
	desc=f"Fetching news from {start_str} to {end_str}")

	site_query = f"({' OR '.join(['site:' + s.strip() for s in sites.split(',') if s.strip()])})" if sites else ""
	final_query = f'"{search_keywords}" {site_query} after:{start_str} before:{end_str}'

	googlenews = GoogleNews(lang='bn', region='BD', period='1d')
	googlenews.search(final_query)

	for page in range(1, max_pages + 1):
	try:
	results = googlenews.results()
	if not results:
	break
	all_articles.extend(results)

	if page < max_pages:
	googlenews.getpage(page + 1)
	time.sleep(0.3) # Reduced sleep for performance
	except HTTPError as e:
	if e.response.status_code == 429:
	wait_time = 3 # Reduced wait for optimization
	gr.Warning(f"Rate limited by Google News. Pausing for {wait_time} seconds.")
	time.sleep(wait_time)
	else:
	logger.error(f"HTTP Error fetching news: {e}")
	break
	except Exception as e:
	logger.error(f"An error occurred fetching news: {e}")
	break

	current_dt += pd.Timedelta(days=interval)
	except Exception as e:
	logger.error(f"Error in news scraping loop: {e}")
	break

	if not all_articles:
	return pd.DataFrame(), pd.DataFrame()

	# Create DataFrame and clean data
	df = pd.DataFrame(all_articles).drop_duplicates(subset=['link'])

	# Parse dates safely
	df['published_date'] = df['date'].apply(lambda x: dateparser.parse(x, languages=['bn']) if pd.notna(x) else None)

	# Drop rows with missing critical data
	df = df.dropna(subset=['published_date', 'title'])

	# Apply advanced filtering if filter keywords are provided
	if filter_keys and filter_keys.strip():
	def match_complex_query(text, query):
	"""Advanced query parser supporting AND, OR, NOT logic"""
	if not text or not query:
	return False

	text = str(text).lower()
	query = query.lower()

	# Simple tokenization that preserves phrases in quotes
	tokens = re.findall(r'"[^"]+"\|\S+', query)

	# Build a regex pattern from the tokens
	patterns = []
	for token in tokens:
	if token == 'and':
	continue # We'll handle this with the final pattern
	elif token == 'or':
	patterns.append('\|')
	elif token == 'not':
	patterns.append('(?=^(?!.*')
	else:
	# Clean token and convert to regex pattern
	clean_token = token.strip('"')
	if clean_token.startswith('"') and clean_token.endswith('"'):
	clean_token = clean_token[1:-1]
	patterns.append(re.escape(clean_token))

	# Join patterns and handle negation
	final_pattern = ''.join(patterns)
	if '(?=' in final_pattern:
	final_pattern += '))'

	try:
	return bool(re.search(final_pattern, text))
	except:
	# Fallback to simple substring match if regex fails
	return any(token in text for token in tokens if token not in ['and', 'or', 'not'])

	# Apply filtering to title and description
	mask = df.apply(lambda row: match_complex_query(
	str(row['title']) + ' ' + str(row.get('desc', '')),
	filter_keys
	), axis=1)

	df = df[mask]

	# Return both full dataset and filtered display dataset
	# Always return all Google News fields (published_date, title, media, description, link)
	# Some sources use 'desc', some use 'description'. Unify to 'description'.
	if 'desc' in df.columns and 'description' not in df.columns:
	df['description'] = df['desc']
	return df, df[['published_date', 'title', 'media', 'description', 'link']].sort_values(by='published_date', ascending=False)

	# ==============================================================================
	# YOUTUBE ANALYZER BACKEND
	# ==============================================================================
	def run_youtube_analysis_pipeline(api_key, query, max_videos_for_stats, num_videos_for_comments, max_comments_per_video, published_after, progress=gr.Progress()):
	"""Complete YouTube analysis pipeline with robust error handling."""
	# Use integrated API key for seamless experience
	api_key = os.getenv("YOUTUBE_API_KEY", "AIzaSyAiiGsKTJyIe4SRfC2uUXwhQ6KO-DEjgIA")

	if not query:
	raise gr.Error("Search Keywords are required.")

	try:
	from googleapiclient.discovery import build
	from googleapiclient.errors import HttpError
	youtube = build('youtube', 'v3', developerKey=api_key)
	except ImportError:
	logger.error("Required YouTube API libraries not installed")
	raise gr.Error("YouTube analysis requires additional libraries. Please install google-api-python-client.")
	except HttpError as e:
	raise gr.Error(f"Failed to initialize YouTube service. Check API Key. Error: {e}")
	except Exception as e:
	raise gr.Error(f"An unexpected error occurred during API initialization: {e}")

	progress(0.1, desc="Performing broad scan for videos...")
	all_video_ids, next_page_token, total_results_estimate = [], None, 0
	PAGES_TO_FETCH = min(15, (max_videos_for_stats // 50) + 1)

	search_params = {
	'q': query,
	'part': 'id',
	'maxResults': 50,
	'type': 'video',
	'order': 'relevance'
	}

	if published_after:
	parsed_date = dateparser.parse(published_after)
	if parsed_date:
	search_params['publishedAfter'] = parsed_date.replace(tzinfo=timezone.utc).isoformat()
	else:
	gr.Warning(f"Could not parse date: '{published_after}'. Ignoring filter.")

	for page in range(PAGES_TO_FETCH):
	try:
	if next_page_token:
	search_params['pageToken'] = next_page_token

	response = youtube.search().list(**search_params).execute()

	if page == 0:
	total_results_estimate = response.get('pageInfo', {}).get('totalResults', 0)

	# Extract valid video IDs
	valid_ids = []
	for item in response.get('items', []):
	if 'id' in item and 'videoId' in item['id']:
	valid_ids.append(item['id']['videoId'])

	all_video_ids.extend(valid_ids)

	next_page_token = response.get('nextPageToken')
	progress(0.1 + (0.3 * (page / PAGES_TO_FETCH)),
	desc=f"Broad scan: Found {len(all_video_ids)} videos...")

	if not next_page_token:
	break
	except HttpError as e:
	if "quotaExceeded" in str(e):
	raise gr.Error("CRITICAL: YouTube API daily quota exceeded. Try again tomorrow.")
	logger.error(f"HTTP error during video search: {e}")
	break
	except Exception as e:
	logger.error(f"Unexpected error during YouTube search: {e}")
	break

	if not all_video_ids:
	return pd.DataFrame(), pd.DataFrame(), ""

	# Fetch video details in batches
	progress(0.4, desc=f"Fetching details for {len(all_video_ids)} videos...")

	def _fetch_video_details(youtube_service, video_ids: list):
	"""Fetch detailed information for a batch of video IDs"""
	all_videos_data = []
	try:
	for i in range(0, len(video_ids), 50):
	id_batch = video_ids[i:i+50]
	video_request = youtube_service.videos().list(
	part="snippet,statistics",
	id=",".join(id_batch)
	)
	video_response = video_request.execute()

	for item in video_response.get('items', []):
	stats = item.get('statistics', {})
	all_videos_data.append({
	'video_id': item['id'],
	'video_title': item['snippet']['title'],
	'channel': item['snippet']['channelTitle'],
	'published_date': item['snippet']['publishedAt'],
	'view_count': int(stats.get('viewCount', 0)),
	'like_count': int(stats.get('likeCount', 0)),
	'comment_count': int(stats.get('commentCount', 0))
	})
	except Exception as e:
	logger.error(f"Could not fetch video details: {e}")

	return all_videos_data

	videos_df_full_scan = pd.DataFrame(_fetch_video_details(youtube, all_video_ids))

	if videos_df_full_scan.empty:
	return pd.DataFrame(), pd.DataFrame(), ""

	# Process and clean video data
	videos_df_full_scan['published_date'] = pd.to_datetime(videos_df_full_scan['published_date'])

	# Calculate engagement rate safely
	videos_df_full_scan['engagement_rate'] = (
	(videos_df_full_scan['like_count'] + videos_df_full_scan['comment_count']) /
	videos_df_full_scan['view_count'].replace(0, 1)
	).fillna(0)

	videos_df_full_scan = videos_df_full_scan.sort_values(
	by='view_count',
	ascending=False
	).reset_index(drop=True)

	# Fetch comments for top videos
	videos_to_scrape_df = videos_df_full_scan.head(int(num_videos_for_comments))
	all_comments = []

	def _scrape_single_video_comments(youtube_service, video_id, max_comments):
	"""Scrape comments for a single video with error handling"""
	comments_list = []
	try:
	request = youtube_service.commentThreads().list(
	part="snippet",
	videoId=video_id,
	maxResults=min(max_comments, 100),
	order='relevance',
	textFormat="plainText"
	)
	response = request.execute()

	for item in response.get('items', []):
	snippet = item['snippet']['topLevelComment']['snippet']
	comments_list.append({
	'author': snippet['authorDisplayName'],
	'published_date_comment': snippet['publishedAt'],
	'comment_text': snippet['textDisplay'],
	'likes': snippet['likeCount'],
	'replies': item['snippet']['totalReplyCount']
	})
	except Exception as e:
	logger.warning(f"Could not retrieve comments for video {video_id}: {e}")

	return comments_list

	for index, row in videos_to_scrape_df.iterrows():
	progress(0.7 + (0.3 * (index / len(videos_to_scrape_df))),
	desc=f"Deep dive: Scraping comments from video {index+1}/{len(videos_to_scrape_df)}...")

	comments_for_video = _scrape_single_video_comments(
	youtube,
	row['video_id'],
	max_comments_per_video
	)

	if comments_for_video:
	for comment in comments_for_video:
	comment.update({
	'video_id': row['video_id'],
	'video_title': row['video_title']
	})
	all_comments.extend(comments_for_video)

	comments_df = pd.DataFrame(all_comments)
	if not comments_df.empty:
	comments_df['published_date_comment'] = pd.to_datetime(comments_df['published_date_comment'])

	logger.info(f"YouTube analysis complete. Est. total videos: {total_results_estimate}. "
	f"Scanned: {len(videos_df_full_scan)}. Comments: {len(comments_df)}.")

	# Create summary HTML
	summary_html = f"""
	<div style='background:#f5f5f5;padding:16px;border-radius:12px;margin-bottom:12px;box-shadow:0 2px 8px #eee;'>
	<h3 style='margin:0 0 8px 0;'>YouTube Analytics Summary</h3>
	<ul style='margin:0;padding-left:18px;'>
	<li><b>Total Videos:</b> {len(videos_df_full_scan):,}</li>
	<li><b>Total Comments:</b> {len(comments_df):,}</li>
	<li><b>Total Views:</b> {videos_df_full_scan['view_count'].sum():,}</li>
	</ul>
	</div>
	"""

	return videos_df_full_scan, comments_df, summary_html

	# ==============================================================================
	# ADVANCED ANALYTICS MODULE
	# ==============================================================================
	def generate_scraper_dashboard(df: pd.DataFrame):
	"""Generate comprehensive dashboard from news scraper results."""
	if df.empty:
	# Return empty dashboard components
	return {
	"kpi_total_articles": gr.HTML(""),
	"kpi_unique_media": gr.HTML(""),
	"kpi_date_range": gr.HTML(""),
	"dashboard_timeline_plot": None,
	"dashboard_media_plot": None,
	"dashboard_wordcloud_plot": None
	}

	set_plot_style()

	# Calculate KPIs
	total_articles, unique_media = len(df), df['media'].nunique()
	start_date, end_date = pd.to_datetime(df['published_date']).min(), pd.to_datetime(df['published_date']).max()
	date_range_str = f"{start_date.strftime('%Y-%m-%d')} to {end_date.strftime('%Y-%m-%d')}"

	# Color-coded KPI badges
	kpi_total_articles_html = kpi_badge_html(
	total_articles, 'Total Articles', threshold_high=100, threshold_low=10
	)
	kpi_unique_media_html = kpi_badge_html(
	unique_media, 'Unique Media', threshold_high=10, threshold_low=2
	)
	kpi_date_range_html = kpi_badge_html(
	date_range_str, 'Date Range', threshold_high=None, threshold_low=None
	)

	# Time series visualization - FIXED GRADIO API USAGE
	agg_code, agg_name = get_dynamic_time_agg(start_date, end_date)
	timeline_df = df.set_index(pd.to_datetime(df['published_date'])).resample(agg_code).size().reset_index(name='count')
	timeline_df.rename(columns={'published_date': 'date'}, inplace=True)
	timeline_plot = gr.LinePlot(
	value=timeline_df,
	x='date',
	y='count',
	title=f'{agg_name} News Volume',
	tooltip=['date', 'count'],
	x_title="Date",
	y_title="Number of Articles"
	)

	# Media source analysis
	media_counts = df['media'].dropna().value_counts().nlargest(15).sort_values()
	fig_media = None
	if not media_counts.empty:
	fig_media, ax = plt.subplots(figsize=(8, 6))
	media_counts.plot(kind='barh', ax=ax, color='skyblue')
	ax.set_title("Top 15 Media Sources", fontproperties=BANGLA_FONT, fontsize=18)
	ax.set_xlabel("Article Count", fontproperties=BANGLA_FONT, fontsize=14)
	ax.set_ylabel("মিডিয়া", fontproperties=BANGLA_FONT, fontsize=14)
	yticks = np.arange(len(media_counts.index))
	ax.set_yticks(yticks)
	ax.set_yticklabels(media_counts.index, fontproperties=BANGLA_FONT, fontsize=14)
	for label in ax.get_xticklabels():
	label.set_fontproperties(BANGLA_FONT)
	label.set_fontsize(12)
	for label in ax.get_yticklabels():
	label.set_fontproperties(BANGLA_FONT)
	label.set_fontsize(14)
	legend = ax.get_legend()
	if legend:
	for text in legend.get_texts():
	text.set_fontproperties(BANGLA_FONT)
	plt.tight_layout()

	# Word cloud generation
	fig_wc = None
	try:
	# Combine all titles and clean text
	text = " ".join(title for title in df['title'].astype(str))
	text = clean_bengali_text(text)

	# Join special phrases
	for phrase, joined in PHRASES_TO_JOIN.items():
	text = text.replace(phrase, joined)

	# Extract and filter words
	words = re.findall(r'[\u0980-\u09FF_]{2,}', text)
	words = [w for w in words if w not in COMBINED_STOPWORDS]
	words = [w for w in words if len(w) > 1]
	words = [w for w in words if not re.search(r'[a-zA-Z]', w)]

	# Filter by frequency
	from collections import Counter
	word_freq = Counter(words)
	min_freq = 2
	most_common = set([w for w, _ in word_freq.most_common(3)])
	filtered_words = [w for w in words if word_freq[w] >= min_freq and w not in most_common]
	wc_text = " ".join(filtered_words)

	# Generate word cloud
	if wc_text.strip():
	wc = WordCloud(
	font_path=FONT_PATH,
	width=1600,
	height=900,
	background_color='white',
	stopwords=COMBINED_STOPWORDS,
	collocations=False,
	colormap='plasma',
	max_words=200,
	contour_width=2,
	contour_color='steelblue',
	regexp=r"[\u0980-\u09FF_]+"
	).generate(wc_text)

	fig_wc, ax = plt.subplots(figsize=(15, 8))
	ax.imshow(wc, interpolation='bilinear')
	ax.axis("off")
	ax.set_title("Bengali Headline Word Cloud", fontproperties=BANGLA_FONT, fontsize=22)
	plt.tight_layout()
	except Exception as e:
	logger.error(f"WordCloud failed: {e}")
	gr.Warning(f"WordCloud generation failed: {str(e)}")

	return {
	"kpi_total_articles": gr.HTML(kpi_total_articles_html),
	"kpi_unique_media": gr.HTML(kpi_unique_media_html),
	"kpi_date_range": gr.HTML(kpi_date_range_html),
	"dashboard_timeline_plot": timeline_plot,
	"dashboard_media_plot": fig_media,
	"dashboard_wordcloud_plot": fig_wc
	}

	def generate_youtube_dashboard(videos_df, comments_df):
	"""Generate comprehensive dashboard from YouTube analysis results."""
	# Initialize all dashboard components FIRST
	dashboard_components = {
	"kpi_yt_videos_found": gr.HTML(""),
	"kpi_yt_views_scanned": gr.HTML(""),
	"kpi_yt_comments_scraped": gr.HTML(""),
	"yt_channel_plot": None,
	"yt_channel_dominance_plot": None,
	"yt_time_series_plot": None,
	"yt_top_videos_plot": None,
	"yt_content_quadrant_plot": None,
	"yt_engagement_plot": None,
	"yt_wordcloud_plot": None,
	"yt_detailed_summary": gr.HTML("")
	}

	# Channel dominance by view
	fig_channel_dominance = None
	if videos_df is not None and not videos_df.empty and 'channel' in videos_df.columns:
	channel_views = videos_df.groupby('channel')['view_count'].sum().sort_values(ascending=False).head(10)
	if not channel_views.empty:
	fig_channel_dominance, ax = plt.subplots(figsize=(10, 6))
	channel_views.plot(kind='barh', ax=ax, color='slateblue')
	ax.set_title("Top 10 Dominant Channels by View Count", fontproperties=BANGLA_FONT, fontsize=18)
	ax.set_xlabel("মোট ভিউ", fontproperties=BANGLA_FONT, fontsize=14)
	ax.set_ylabel("চ্যানেল", fontproperties=BANGLA_FONT, fontsize=14)
	yticks = np.arange(len(channel_views.index))
	ax.set_yticks(yticks)
	ax.set_yticklabels(channel_views.index, fontproperties=BANGLA_FONT, fontsize=14)
	for label in ax.get_xticklabels():
	label.set_fontproperties(BANGLA_FONT)
	label.set_fontsize(12)
	for label in ax.get_yticklabels():
	label.set_fontproperties(BANGLA_FONT)
	label.set_fontsize(14)
	legend = ax.get_legend()
	if legend:
	for text in legend.get_texts():
	text.set_fontproperties(BANGLA_FONT)
	plt.tight_layout()
	dashboard_components["yt_channel_dominance_plot"] = fig_channel_dominance

	# Content performance quadrant
	fig_quadrant = None
	if videos_df is not None and not videos_df.empty:
	try:
	# Define quadrant boundaries
	median_views = videos_df['view_count'].median()
	median_engagement = videos_df['engagement_rate'].median()
	fig_quadrant, ax = plt.subplots(figsize=(10, 8))
	scatter = ax.scatter(
	videos_df['view_count'],
	videos_df['engagement_rate'],
	c='darkorange', alpha=0.7
	)
	ax.axvline(median_views, color='blue', linestyle='--', label='Median Views')
	ax.axhline(median_engagement, color='green', linestyle='--', label='Median Engagement')
	ax.set_xlabel("মোট ভিউ", fontproperties=BANGLA_FONT, fontsize=14)
	ax.set_ylabel("এনগেজমেন্ট রেট", fontproperties=BANGLA_FONT, fontsize=14)
	ax.set_title("Content Performance Quadrant", fontproperties=BANGLA_FONT, fontsize=18)
	for label in ax.get_xticklabels():
	label.set_fontproperties(BANGLA_FONT)
	label.set_fontsize(12)
	for label in ax.get_yticklabels():
	label.set_fontproperties(BANGLA_FONT)
	label.set_fontsize(14)
	legend = ax.get_legend()
	if legend:
	for text in legend.get_texts():
	text.set_fontproperties(BANGLA_FONT)
	plt.tight_layout()
	except Exception as e:
	logger.error(f"Quadrant plot failed: {e}")
	dashboard_components["yt_content_quadrant_plot"] = fig_quadrant

	# Detailed analysis summary from YouTube API
	detailed_summary = ""
	if videos_df is not None and not videos_df.empty:
	top_video = videos_df.iloc[0]
	detailed_summary = f"<div style='background:#e3f2fd;padding:12px;border-radius:8px;margin-bottom:8px;'>"
	detailed_summary += f"<b>Top Video:</b> {top_video['video_title']}<br>"
	detailed_summary += f"<b>Channel:</b> {top_video['channel']}<br>"
	detailed_summary += f"<b>Views:</b> {top_video['view_count']:,}<br>"
	detailed_summary += f"<b>Likes:</b> {top_video['like_count']:,}<br>"
	detailed_summary += f"<b>Comments:</b> {top_video['comment_count']:,}<br>"
	detailed_summary += f"<b>Published:</b> {top_video['published_date'].strftime('%Y-%m-%d')}<br>"
	detailed_summary += f"<b>Engagement Rate:</b> {top_video['engagement_rate']:.2f}"
	detailed_summary += "</div>"
	dashboard_components["yt_detailed_summary"] = gr.HTML(detailed_summary)

	# Generate KPIs if data exists
	if videos_df is not None and not videos_df.empty:
	dashboard_components["kpi_yt_videos_found"] = gr.HTML(
	kpi_badge_html(len(videos_df), 'Videos Found', threshold_high=50, threshold_low=5)
	)
	dashboard_components["kpi_yt_views_scanned"] = gr.HTML(
	kpi_badge_html(videos_df['view_count'].sum(), 'Views Scanned', threshold_high=100000, threshold_low=1000)
	)

	if comments_df is not None and not comments_df.empty:
	dashboard_components["kpi_yt_comments_scraped"] = gr.HTML(
	kpi_badge_html(len(comments_df), 'Comments Scraped', threshold_high=100, threshold_low=10)
	)

	# Channel analysis
	fig_channels = None
	if videos_df is not None and not videos_df.empty and 'channel' in videos_df.columns:
	channel_counts = videos_df['channel'].value_counts().nlargest(15).sort_values()
	if not channel_counts.empty:
	fig_channels, ax = plt.subplots(figsize=(8, 6))
	channel_counts.plot(kind='barh', ax=ax, color='coral')
	ax.set_title("Top 15 Channels by Video Volume", fontproperties=BANGLA_FONT, fontsize=18)
	ax.set_yticklabels(channel_counts.index, fontproperties=BANGLA_FONT, fontsize=14)
	ax.set_xlabel("Video Count", fontproperties=BANGLA_FONT, fontsize=14)
	for label in ax.get_xticklabels():
	label.set_fontproperties(BANGLA_FONT)
	label.set_fontsize(12)
	for label in ax.get_yticklabels():
	label.set_fontproperties(BANGLA_FONT)
	label.set_fontsize(14)
	legend = ax.get_legend()
	if legend:
	for text in legend.get_texts():
	text.set_fontproperties(BANGLA_FONT)
	plt.tight_layout()
	dashboard_components["yt_channel_plot"] = fig_channels

	# Word cloud from comments
	fig_wc = None
	if comments_df is not None and not comments_df.empty and 'comment_text' in comments_df.columns:
	try:
	text = " ".join(comment for comment in comments_df['comment_text'].astype(str))
	text = clean_bengali_text(text)

	# Join special phrases
	for phrase, joined in PHRASES_TO_JOIN.items():
	text = text.replace(phrase, joined)

	# Extract and filter words
	words = re.findall(r'[\u0980-\u09FF_]{2,}', text)
	words = [w for w in words if w not in COMBINED_STOPWORDS]
	words = [w for w in words if len(w) > 1]
	words = [w for w in words if not re.search(r'[a-zA-Z]', w)]

	# Filter by frequency
	from collections import Counter
	word_freq = Counter(words)
	min_freq = 2
	most_common = set([w for w, _ in word_freq.most_common(3)])
	filtered_words = [w for w in words if word_freq[w] >= min_freq and w not in most_common]
	wc_text = " ".join(filtered_words)

	# Generate word cloud
	if wc_text.strip():
	wc = WordCloud(
	font_path=FONT_PATH,
	width=1600,
	height=900,
	background_color='white',
	stopwords=COMBINED_STOPWORDS,
	collocations=False,
	colormap='plasma',
	max_words=250,
	contour_width=2,
	contour_color='darkorange',
	regexp=r"[\u0980-\u09FF_]+"
	).generate(wc_text)

	fig_wc, ax = plt.subplots(figsize=(15, 8))
	ax.imshow(wc, interpolation='bilinear')
	ax.axis("off")
	ax.set_title("Bengali Word Cloud from YouTube Comments", fontproperties=BANGLA_FONT, fontsize=22)
	plt.tight_layout()
	except Exception as e:
	logger.error(f"YouTube WordCloud failed: {e}")
	dashboard_components["yt_wordcloud_plot"] = fig_wc

	# Top commented videos
	fig_top_videos = None
	if comments_df is not None and not comments_df.empty and 'video_title' in comments_df.columns:
	top_videos = comments_df['video_title'].value_counts().nlargest(10)
	if not top_videos.empty:
	fig_top_videos, ax = plt.subplots(figsize=(10, 6))
	top_videos.plot(kind='barh', ax=ax, color='dodgerblue')
	ax.set_title("Top 10 Videos by Comment Count", fontproperties=BANGLA_FONT, fontsize=18)
	ax.set_xlabel("মন্তব্য সংখ্যা", fontproperties=BANGLA_FONT, fontsize=14)
	ax.set_ylabel("ভিডিও শিরোনাম", fontproperties=BANGLA_FONT, fontsize=14)
	yticks = np.arange(len(top_videos.index))
	ax.set_yticks(yticks)
	ax.set_yticklabels(top_videos.index, fontproperties=BANGLA_FONT, fontsize=14)
	for label in ax.get_xticklabels():
	label.set_fontproperties(BANGLA_FONT)
	label.set_fontsize(12)
	for label in ax.get_yticklabels():
	label.set_fontproperties(BANGLA_FONT)
	label.set_fontsize(14)
	legend = ax.get_legend()
	if legend:
	for text in legend.get_texts():
	text.set_fontproperties(BANGLA_FONT)
	plt.tight_layout()
	dashboard_components["yt_top_videos_plot"] = fig_top_videos

	# Engagement rate per video
	fig_engagement = None
	if videos_df is not None and not videos_df.empty and comments_df is not None and not comments_df.empty:
	if 'video_id' in videos_df.columns and 'video_id' in comments_df.columns:
	try:
	# Count comments per video
	comment_counts = comments_df['video_id'].value_counts().reset_index()
	comment_counts.columns = ['video_id', 'comment_count']
	# Ensure 'comment_count' column exists in videos_df
	merged = videos_df.merge(comment_counts, on='video_id', how='left')
	if 'comment_count' not in merged.columns:
	merged['comment_count'] = 0
	merged['comment_count'] = merged['comment_count'].fillna(0)
	# Calculate engagement rate
	merged['engagement_rate'] = merged['comment_count'] / merged['view_count'].replace(0, 1)
	# Get top 10 videos by engagement
	top_engagement = merged.nlargest(10, 'engagement_rate')
	if not top_engagement.empty:
	fig_engagement, ax = plt.subplots(figsize=(10, 6))
	ax.barh(top_engagement['video_title'], top_engagement['engagement_rate'], color='mediumseagreen')
	ax.set_title("Top 10 Videos by Engagement Rate", fontproperties=BANGLA_FONT, fontsize=18)
	ax.set_xlabel("এনগেজমেন্ট রেট (মন্তব্য/ভিউ)", fontproperties=BANGLA_FONT, fontsize=14)
	ax.set_ylabel("ভিডিও শিরোনাম", fontproperties=BANGLA_FONT, fontsize=14)
	yticks = np.arange(len(top_engagement['video_title']))
	ax.set_yticks(yticks)
	ax.set_yticklabels(top_engagement['video_title'], fontproperties=BANGLA_FONT, fontsize=14)
	for label in ax.get_xticklabels():
	label.set_fontproperties(BANGLA_FONT)
	label.set_fontsize(12)
	for label in ax.get_yticklabels():
	label.set_fontproperties(BANGLA_FONT)
	label.set_fontsize(14)
	legend = ax.get_legend()
	if legend:
	for text in legend.get_texts():
	text.set_fontproperties(BANGLA_FONT)
	plt.tight_layout()
	except Exception as e:
	logger.error(f"Engagement rate calculation failed: {e}")
	dashboard_components["yt_engagement_plot"] = fig_engagement

	# Comment activity over time
	fig_time_series = None
	if comments_df is not None and not comments_df.empty and 'published_date_comment' in comments_df.columns:
	try:
	comments_df['published_date_comment'] = pd.to_datetime(comments_df['published_date_comment'])
	time_series = comments_df.set_index('published_date_comment').resample('D').size().reset_index()
	time_series.columns = ['date', 'count']

	if not time_series.empty:
	fig_time_series = gr.LinePlot(
	value=time_series,
	x='date',
	y='count',
	title="Comment Activity Over Time",
	tooltip=['date', 'count'],
	x_title="Date",
	y_title="Number of Comments"
	)
	except Exception as e:
	logger.error(f"Error in comment activity plot: {e}")
	dashboard_components["yt_time_series_plot"] = fig_time_series

	return dashboard_components

	# ==============================================================================
	# GRADIO UI DEFINITION
	# ==============================================================================
	with gr.Blocks(theme=gr.themes.Soft(primary_hue="blue", secondary_hue="orange"), title=APP_TITLE) as app:
	gr.Markdown(f"# {APP_TITLE}\n{APP_TAGLINE}")

	# --- STATE MANAGEMENT ---
	scraper_results_state = gr.State()
	youtube_results_state = gr.State()

	with gr.Tabs():
	with gr.TabItem("1. News Scraper", id=0):
	with gr.Row():
	with gr.Column(scale=1):
	gr.Markdown("### Search Criteria")
	search_keywords_textbox = gr.Textbox(
	label="Search Keywords",
	placeholder="e.g., বাংলাদেশ, নির্বাচন",
	info="Keywords to search for in news articles."
	)
	sites_to_search_textbox = gr.Textbox(
	label="Target Sites (Optional, comma-separated)",
	placeholder="e.g., prothomalo.com",
	info="Limit search to specific news sites."
	)
	start_date_textbox = gr.Textbox(
	label="Start Date",
	placeholder="YYYY-MM-DD or 'last week'",
	info="Start date for news scraping."
	)
	end_date_textbox = gr.Textbox(
	label="End Date",
	placeholder="YYYY-MM-DD or 'today'",
	info="End date for news scraping."
	)

	gr.Markdown("### Scraping Parameters")
	interval_days_slider = gr.Slider(
	1, 7, 3, step=1,
	label="Days per Interval",
	info="How many days to group each scraping interval."
	)
	max_pages_slider = gr.Slider(
	1, 10, 5, step=1,
	label="Max Pages per Interval",
	info="Maximum number of pages to fetch per interval."
	)
	filter_keywords_textbox = gr.Textbox(
	label="Filter Keywords (comma-separated, optional)",
	placeholder="e.g., ডাকসু, নোবেল",
	info="Filter results by these keywords."
	)

	start_scraper_button = gr.Button("Start Scraping & Analysis", variant="primary")
	scraper_progress = gr.Progress()

	with gr.Column(scale=2):
	scraper_results_df = gr.DataFrame(
	label="Filtered Results",
	interactive=True
	)
	scraper_download_file = gr.File(
	label="Download Filtered Results CSV"
	)

	with gr.TabItem("2. News Analytics", id=1):
	gr.Markdown("### News Analytics Dashboard")

	with gr.Group():
	news_summary_card = gr.HTML(
	"<div style='background:#f5f5f5;padding:16px;border-radius:12px;margin-bottom:12px;box-shadow:0 2px 8px #eee;'>"
	"<h3 style='margin:0 0 8px 0;'>Key Findings</h3>"
	"<ul style='margin:0;padding-left:18px;'>"
	"<li><b>Total Articles:</b> <span id='news_total_articles'></span></li>"
	"<li><b>Unique Media:</b> <span id='news_unique_media'></span></li>"
	"<li><b>Date Range:</b> <span id='news_date_range'></span></li>"
	"</ul></div>"
	)

	kpi_total_articles = gr.HTML()
	kpi_unique_media = gr.HTML()
	kpi_date_range = gr.HTML()

	with gr.Row():
	with gr.Column():
	dashboard_timeline_plot = gr.LinePlot(
	label="News Volume Timeline"
	)
	with gr.Column():
	dashboard_media_plot = gr.Plot(
	label="Top Media Sources by Article Count"
	)

	dashboard_wordcloud_plot = gr.Plot(
	label="Headline Word Cloud"
	)

	with gr.TabItem("3. YouTube Topic Analysis", id=2):
	gr.Markdown("## YouTube Topic Analysis")

	with gr.Row():
	with gr.Column(scale=1):
	yt_search_keywords = gr.Textbox(
	label="YouTube Search Keywords",
	placeholder="e.g., ক্রিকেট",
	info="Keywords to search for in YouTube videos."
	)
	yt_max_videos_slider = gr.Slider(
	10, 100, 30, step=5,
	label="Max Videos for Stats",
	info="Maximum number of videos to scan for statistics."
	)
	yt_num_videos_comments_slider = gr.Slider(
	1, 20, 5, step=1,
	label="Videos for Comments",
	info="Number of top videos to scrape comments from."
	)
	yt_max_comments_slider = gr.Slider(
	10, 200, 50, step=10,
	label="Max Comments per Video",
	info="Maximum number of comments to fetch per video."
	)
	yt_published_after = gr.Textbox(
	label="Published After (Optional)",
	placeholder="YYYY-MM-DD",
	info="Only include videos published after this date."
	)

	start_youtube_analysis_button = gr.Button(
	"Start YouTube Analysis",
	variant="primary"
	)
	yt_progress = gr.Progress()

	with gr.Column(scale=2):
	yt_results_df = gr.DataFrame(
	label="YouTube Video Results",
	interactive=True
	)
	yt_videos_download_file = gr.File(
	label="Download YouTube Video Results CSV"
	)
	yt_comments_df = gr.DataFrame(
	label="YouTube Comments Results",
	interactive=True
	)
	yt_comments_download_file = gr.File(
	label="Download YouTube Comments CSV"
	)
	yt_dashboard_html = gr.HTML()
	with gr.Group():
	kpi_yt_videos_found = gr.HTML()
	kpi_yt_views_scanned = gr.HTML()
	kpi_yt_comments_scraped = gr.HTML()
	with gr.Row():
	with gr.Column():
	yt_channel_plot = gr.Plot(
	label="Top Channels by Video Volume"
	)
	yt_channel_dominance_plot = gr.Plot(
	label="Channel Dominance by View Count"
	)
	with gr.Column():
	yt_time_series_plot = gr.LinePlot(
	label="Comment Activity Over Time"
	)
	with gr.Row():
	with gr.Column():
	yt_top_videos_plot = gr.Plot(
	label="Top Videos by Comment Count"
	)
	yt_content_quadrant_plot = gr.Plot(
	label="Content Performance Quadrant"
	)
	with gr.Column():
	yt_engagement_plot = gr.Plot(
	label="Top Videos by Engagement Rate"
	)
	yt_wordcloud_plot = gr.Plot(
	label="Bengali Word Cloud from Comments"
	)
	yt_detailed_summary = gr.HTML()

	# --- EVENT HANDLERS ---
	def scraper_button_handler(search_keywords, sites, start_date, end_date, interval, max_pages, filter_keys):
	"""Handle news scraper button click event."""
	try:
	df, filtered_df = run_news_scraper_pipeline(
	search_keywords, sites, start_date, end_date,
	interval, max_pages, filter_keys
	)

	# Update the state with the full results
	scraper_results_state = df

	# Generate dashboard visualizations
	dashboard = generate_scraper_dashboard(df)

	# Prepare download file for news results
	if not df.empty:
	csv_path = "news_results.csv"
	df.to_csv(csv_path, index=False)
	scraper_download_file = gr.File(value=csv_path, visible=True)
	else:
	scraper_download_file = gr.File(visible=False)

	return (
	filtered_df,
	scraper_download_file,
	dashboard["kpi_total_articles"],
	dashboard["kpi_unique_media"],
	dashboard["kpi_date_range"],
	dashboard["dashboard_timeline_plot"],
	dashboard["dashboard_media_plot"],
	dashboard["dashboard_wordcloud_plot"]
	)
	except Exception as e:
	logger.error(f"Error in scraper button handler: {str(e)}")
	gr.Error(f"An error occurred during scraping: {str(e)}")
	# Return empty values to reset the UI
	return (
	pd.DataFrame(),
	gr.File(visible=False),
	gr.HTML(""), gr.HTML(""), gr.HTML(""),
	None, None, None
	)

	start_scraper_button.click(
	fn=scraper_button_handler,
	inputs=[
	search_keywords_textbox,
	sites_to_search_textbox,
	start_date_textbox,
	end_date_textbox,
	interval_days_slider,
	max_pages_slider,
	filter_keywords_textbox
	],
	outputs=[
	scraper_results_df,
	scraper_download_file,
	kpi_total_articles,
	kpi_unique_media,
	kpi_date_range,
	dashboard_timeline_plot,
	dashboard_media_plot,
	dashboard_wordcloud_plot
	]
	)

	def youtube_button_handler(keywords, max_videos, num_comments_videos, max_comments, published_after):
	"""Handle YouTube analysis button click event."""
	try:
	videos_df, comments_df, summary_html = run_youtube_analysis_pipeline(
	api_key=None,
	query=keywords,
	max_videos_for_stats=max_videos,
	num_videos_for_comments=num_comments_videos,
	max_comments_per_video=max_comments,
	published_after=published_after
	)
	# Update the state with the results
	youtube_results_state = (videos_df, comments_df)
	# Prepare download files for YouTube results
	yt_videos_csv = "youtube_videos.csv"
	yt_comments_csv = "youtube_comments.csv"
	if not videos_df.empty:
	videos_df.to_csv(yt_videos_csv, index=False)
	yt_videos_download_file = gr.File(value=yt_videos_csv, visible=True)
	else:
	yt_videos_download_file = gr.File(visible=False)
	# For comments, add video title and channel if not present
	if not comments_df.empty:
	if "video_title" not in comments_df.columns and "video_id" in comments_df.columns:
	# Map video title from videos_df
	title_map = videos_df.set_index("video_id")["video_title"].to_dict()
	comments_df["video_title"] = comments_df["video_id"].map(title_map)
	if "channel" not in comments_df.columns and "channel_title" in comments_df.columns:
	comments_df["channel"] = comments_df["channel_title"]
	comments_df.to_csv(yt_comments_csv, index=False)
	yt_comments_download_file = gr.File(value=yt_comments_csv, visible=True)
	else:
	yt_comments_download_file = gr.File(visible=False)
	# Generate dashboard visualizations
	dashboard = generate_youtube_dashboard(videos_df, comments_df)
	return (
	videos_df,
	yt_videos_download_file,
	comments_df,
	yt_comments_download_file,
	summary_html,
	dashboard["kpi_yt_videos_found"],
	dashboard["kpi_yt_views_scanned"],
	dashboard["kpi_yt_comments_scraped"],
	dashboard["yt_channel_plot"],
	dashboard["yt_channel_dominance_plot"],
	dashboard["yt_time_series_plot"],
	dashboard["yt_top_videos_plot"],
	dashboard["yt_content_quadrant_plot"],
	dashboard["yt_engagement_plot"],
	dashboard["yt_wordcloud_plot"],
	dashboard["yt_detailed_summary"]
	)
	except Exception as e:
	logger.error(f"Error in YouTube button handler: {str(e)}")
	gr.Error(f"An error occurred during YouTube analysis: {str(e)}")
	# Return empty values to reset the UI (16 outputs)
	return (
	pd.DataFrame(), # yt_results_df
	gr.File(visible=False), # yt_videos_download_file
	pd.DataFrame(), # yt_comments_df
	gr.File(visible=False), # yt_comments_download_file
	gr.HTML(""), # yt_dashboard_html
	gr.HTML(""), # kpi_yt_videos_found
	gr.HTML(""), # kpi_yt_views_scanned
	gr.HTML(""), # kpi_yt_comments_scraped
	None, # yt_channel_plot
	None, # yt_channel_dominance_plot
	None, # yt_time_series_plot
	None, # yt_top_videos_plot
	None, # yt_content_quadrant_plot
	None, # yt_engagement_plot
	None, # yt_wordcloud_plot
	gr.HTML("") # yt_detailed_summary
	)

	start_youtube_analysis_button.click(
	fn=youtube_button_handler,
	inputs=[
	yt_search_keywords,
	yt_max_videos_slider,
	yt_num_videos_comments_slider,
	yt_max_comments_slider,
	yt_published_after
	],
	outputs=[
	yt_results_df,
	yt_videos_download_file,
	yt_comments_df,
	yt_comments_download_file,
	yt_dashboard_html,
	kpi_yt_videos_found,
	kpi_yt_views_scanned,
	kpi_yt_comments_scraped,
	yt_channel_plot,
	yt_channel_dominance_plot,
	yt_time_series_plot,
	yt_top_videos_plot,
	yt_content_quadrant_plot,
	yt_engagement_plot,
	yt_wordcloud_plot,
	yt_detailed_summary
	]
	)

	# ==============================================================================
	# LAUNCH THE APP
	# ==============================================================================
	custom_css = """
	body, .gradio-container {
	background: #181a20 !important;
	font-family: 'Inter', 'Noto Sans', sans-serif;
	}
	.gr-card {
	background: #23263a;
	border-radius: 18px;
	box-shadow: 0 4px 24px rgba(0,0,0,0.12);
	padding: 24px;
	margin-bottom: 24px;
	}
	.gr-title {
	color: #fff;
	font-size: 2.2rem;
	font-weight: 700;
	margin-bottom: 12px;
	}
	.gr-metric {
	color: #22d3ee;
	font-size: 2.5rem;
	font-weight: 800;
	}
	.gr-label {
	color: #94a3b8;
	font-size: 1.1rem;
	margin-bottom: 6px;
	}
	.gradio-row, .gradio-column {
	background: transparent !important;
	}
	.gradio-button {
	border-radius: 8px !important;
	background: linear-gradient(90deg,#3b82f6,#22d3ee) !important;
	color: #fff !important;
	font-weight: 600 !important;
	box-shadow: 0 2px 8px rgba(34,211,238,0.08);
	transition: background 0.2s;
	}
	.gradio-button:hover {
	background: linear-gradient(90deg,#22d3ee,#3b82f6) !important;
	}
	.gradio-markdown h1, .gradio-markdown h2, .gradio-markdown h3 {
	color: #fff !important;
	}
	.gradio-markdown {
	color: #cbd5e1 !important;
	}
	"""

	with gr.Blocks(theme=gr.themes.Soft(primary_hue="blue", secondary_hue="orange"), title=APP_TITLE, css=custom_css) as app:
	gr.HTML("""
	<div class='gr-card' style='margin-bottom:32px;'>
	<div class='gr-title'>Prohori (প্রহরী)</div>
	<div style='color:#94a3b8;font-size:1.2rem;margin-bottom:8px;'>Analyze GoogleNews & YouTube video trends, engagement, and comment activity for your search topics.</div>
	<div style='color:#22d3ee;font-size:1rem;'>Developed for CDSR by Arjon</div>
	</div>
	""")
	# --- STATE MANAGEMENT ---
	scraper_results_state = gr.State()
	youtube_results_state = gr.State()

	with gr.Tabs():
	with gr.TabItem("1. News Scraper", id=0):
	gr.HTML("<div class='gr-card' style='margin-bottom:24px;'><h2>News Scraper</h2><p>Search and filter news articles from top Bangladeshi sources. Use advanced filters and download results.</p></div>")
	with gr.Row():
	with gr.Column(scale=1):
	gr.HTML("<div class='gr-card'><h3>Search Criteria</h3></div>")
	search_keywords_textbox = gr.Textbox(
	label="Search Keywords",
	placeholder="e.g., বাংলাদেশ, নির্বাচন",
	info="Keywords to search for in news articles."
	)
	sites_to_search_textbox = gr.Textbox(
	label="Target Sites (Optional, comma-separated)",
	placeholder="e.g., prothomalo.com",
	info="Limit search to specific news sites."
	)
	start_date_textbox = gr.Textbox(
	label="Start Date",
	placeholder="YYYY-MM-DD or 'last week'",
	info="Start date for news scraping."
	)
	end_date_textbox = gr.Textbox(
	label="End Date",
	placeholder="YYYY-MM-DD or 'today'",
	info="End date for news scraping."
	)
	gr.HTML("<div class='gr-card'><h3>Scraping Parameters</h3></div>")
	interval_days_slider = gr.Slider(
	1, 7, 3, step=1,
	label="Days per Interval",
	info="How many days to group each scraping interval."
	)
	max_pages_slider = gr.Slider(
	1, 25, 5, step=1,
	label="Max Pages per Interval",
	info="Maximum number of pages to fetch per interval."
	)
	filter_keywords_textbox = gr.Textbox(
	label="Filter Keywords (comma-separated, optional)",
	placeholder="e.g., ডাকসু, নোবেল",
	info="Filter results by these keywords."
	)
	start_scraper_button = gr.Button("Start Scraping & Analysis", variant="primary")
	scraper_progress = gr.Progress()
	with gr.Column(scale=2):
	gr.HTML("<div class='gr-card'><h3>Filtered Results</h3></div>")
	scraper_results_df = gr.DataFrame(
	label="Filtered Results",
	interactive=True
	)
	scraper_download_file = gr.File(
	label="Download Filtered Results CSV"
	)
	with gr.TabItem("2. News Analytics", id=1):
	gr.HTML("<div class='gr-card' style='margin-bottom:24px;'><h2>News Analytics Dashboard</h2><p>Visualize key metrics, trends, and top sources from scraped news data. All plots and metrics update dynamically.</p></div>")
	with gr.Row():
	with gr.Column(scale=1):
	gr.HTML("<div class='gr-card'><h3>Key Metrics</h3></div>")
	kpi_total_articles = gr.HTML()
	kpi_unique_media = gr.HTML()
	kpi_date_range = gr.HTML()
	with gr.Column(scale=2):
	gr.HTML("<div class='gr-card'><h3>Trends</h3></div>")
	dashboard_timeline_plot = gr.LinePlot(
	label="News Volume Timeline"
	)
	with gr.Row():
	with gr.Column(scale=1):
	gr.HTML("<div class='gr-card'><h3>Top Sources</h3></div>")
	dashboard_media_plot = gr.Plot(
	label="Top Media Sources by Article Count"
	)
	with gr.Column(scale=1):
	gr.HTML("<div class='gr-card'><h3>Headline Word Cloud</h3></div>")
	dashboard_wordcloud_plot = gr.Plot(
	label="Headline Word Cloud"
	)
	with gr.TabItem("3. YouTube Topic Analysis", id=2):
	gr.HTML("<div class='gr-card' style='margin-bottom:24px;'><h2>YouTube Topic Analysis</h2><p>Analyze YouTube video trends, engagement, and comment activity for your search topics.</p></div>")
	with gr.Row():
	with gr.Column(scale=1):
	gr.HTML("<div class='gr-card'><h3>Search Criteria</h3></div>")
	yt_search_keywords = gr.Textbox(
	label="YouTube Search Keywords",
	placeholder="e.g., প্রধান উপদেষ্টা",
	info="Keywords to search for in YouTube videos."
	)
	yt_max_videos_slider = gr.Slider(
	10, 100, 30, step=5,
	label="Max Videos for Stats",
	info="Maximum number of videos to scan for statistics."
	)
	yt_num_videos_comments_slider = gr.Slider(
	1, 20, 5, step=1,
	label="Videos for Comments",
	info="Number of top videos to scrape comments from."
	)
	yt_max_comments_slider = gr.Slider(
	10, 200, 50, step=10,
	label="Max Comments per Video",
	info="Maximum number of comments to fetch per video."
	)
	yt_published_after = gr.Textbox(
	label="Published After (Optional)",
	placeholder="YYYY-MM-DD",
	info="Only include videos published after this date."
	)
	start_youtube_analysis_button = gr.Button(
	"Start YouTube Analysis",
	variant="primary"
	)
	yt_progress = gr.Progress()
	with gr.Column(scale=2):
	gr.HTML("<div class='gr-card'><h3>Video Results</h3></div>")
	yt_results_df = gr.DataFrame(
	label="YouTube Video Results",
	interactive=True
	)
	yt_videos_download_file = gr.File(
	label="Download YouTube Video Results CSV"
	)
	yt_comments_df = gr.DataFrame(
	label="YouTube Comments Results",
	interactive=True
	)
	yt_comments_download_file = gr.File(
	label="Download YouTube Comments CSV"
	)
	yt_dashboard_html = gr.HTML()
	with gr.Row():
	with gr.Column(scale=1):
	gr.HTML("<div class='gr-card'><h3>Top Channels & Engagement</h3></div>")
	kpi_yt_videos_found = gr.HTML()
	kpi_yt_views_scanned = gr.HTML()
	kpi_yt_comments_scraped = gr.HTML()
	yt_channel_plot = gr.Plot(
	label="Top Channels by Video Volume"
	)
	yt_channel_dominance_plot = gr.Plot(
	label="Channel Dominance by View Count"
	)
	yt_top_videos_plot = gr.Plot(
	label="Top Videos by Comment Count"
	)
	yt_content_quadrant_plot = gr.Plot(
	label="Content Performance Quadrant"
	)
	yt_engagement_plot = gr.Plot(
	label="Top Videos by Engagement Rate"
	)
	with gr.Column(scale=1):
	gr.HTML("<div class='gr-card'><h3>Comment Activity & Word Cloud</h3></div>")
	yt_time_series_plot = gr.LinePlot(
	label="Comment Activity Over Time"
	)
	yt_wordcloud_plot = gr.Plot(
	label="Bengali Word Cloud from Comments"
	)
	yt_detailed_summary = gr.HTML()
	# --- EVENT HANDLERS ---
	def scraper_button_handler(search_keywords, sites, start_date, end_date, interval, max_pages, filter_keys):
	"""Handle news scraper button click event."""
	try:
	df, filtered_df = run_news_scraper_pipeline(
	search_keywords, sites, start_date, end_date,
	interval, max_pages, filter_keys
	)
	scraper_results_state = df
	dashboard = generate_scraper_dashboard(df)
	if not df.empty:
	csv_path = "news_results.csv"
	df.to_csv(csv_path, index=False)
	scraper_download_file = gr.File(value=csv_path, visible=True)
	else:
	scraper_download_file = gr.File(visible=False)
	return (
	filtered_df,
	scraper_download_file,
	dashboard["kpi_total_articles"],
	dashboard["kpi_unique_media"],
	dashboard["kpi_date_range"],
	dashboard["dashboard_timeline_plot"],
	dashboard["dashboard_media_plot"],
	dashboard["dashboard_wordcloud_plot"]
	)
	except Exception as e:
	logger.error(f"Error in scraper button handler: {str(e)}")
	gr.Error(f"An error occurred during scraping: {str(e)}")
	return (
	pd.DataFrame(),
	gr.File(visible=False),
	gr.HTML(""), gr.HTML(""), gr.HTML(""),
	None, None, None
	)

	start_scraper_button.click(
	fn=scraper_button_handler,
	inputs=[
	search_keywords_textbox,
	sites_to_search_textbox,
	start_date_textbox,
	end_date_textbox,
	interval_days_slider,
	max_pages_slider,
	filter_keywords_textbox
	],
	outputs=[
	scraper_results_df,
	scraper_download_file,
	kpi_total_articles,
	kpi_unique_media,
	kpi_date_range,
	dashboard_timeline_plot,
	dashboard_media_plot,
	dashboard_wordcloud_plot
	]
	)

	def youtube_button_handler(keywords, max_videos, num_comments_videos, max_comments, published_after):
	"""Handle YouTube analysis button click event."""
	try:
	videos_df, comments_df, summary_html = run_youtube_analysis_pipeline(
	api_key=None,
	query=keywords,
	max_videos_for_stats=max_videos,
	num_videos_for_comments=num_comments_videos,
	max_comments_per_video=max_comments,
	published_after=published_after
	)
	youtube_results_state = (videos_df, comments_df)
	yt_videos_csv = "youtube_videos.csv"
	yt_comments_csv = "youtube_comments.csv"
	if not videos_df.empty:
	videos_df.to_csv(yt_videos_csv, index=False)
	yt_videos_download_file = gr.File(value=yt_videos_csv, visible=True)
	else:
	yt_videos_download_file = gr.File(visible=False)
	if not comments_df.empty:
	if "video_title" not in comments_df.columns and "video_id" in comments_df.columns:
	title_map = videos_df.set_index("video_id")["video_title"].to_dict()
	comments_df["video_title"] = comments_df["video_id"].map(title_map)
	if "channel" not in comments_df.columns and "channel_title" in comments_df.columns:
	comments_df["channel"] = comments_df["channel_title"]
	comments_df.to_csv(yt_comments_csv, index=False)
	yt_comments_download_file = gr.File(value=yt_comments_csv, visible=True)
	else:
	yt_comments_download_file = gr.File(visible=False)
	dashboard = generate_youtube_dashboard(videos_df, comments_df)
	return (
	videos_df,
	yt_videos_download_file,
	comments_df,
	yt_comments_download_file,
	summary_html,
	dashboard["kpi_yt_videos_found"],
	dashboard["kpi_yt_views_scanned"],
	dashboard["kpi_yt_comments_scraped"],
	dashboard["yt_channel_plot"],
	dashboard["yt_channel_dominance_plot"],
	dashboard["yt_time_series_plot"],
	dashboard["yt_top_videos_plot"],
	dashboard["yt_content_quadrant_plot"],
	dashboard["yt_engagement_plot"],
	dashboard["yt_wordcloud_plot"],
	dashboard["yt_detailed_summary"]
	)
	except Exception as e:
	logger.error(f"Error in YouTube button handler: {str(e)}")
	gr.Error(f"An error occurred during YouTube analysis: {str(e)}")
	return (
	pd.DataFrame(), # yt_results_df
	gr.File(visible=False), # yt_videos_download_file
	pd.DataFrame(), # yt_comments_df
	gr.File(visible=False), # yt_comments_download_file
	gr.HTML(""), # yt_dashboard_html
	gr.HTML(""), # kpi_yt_videos_found
	gr.HTML(""), # kpi_yt_views_scanned
	gr.HTML(""), # kpi_yt_comments_scraped
	None, # yt_channel_plot
	None, # yt_channel_dominance_plot
	None, # yt_time_series_plot
	None, # yt_top_videos_plot
	None, # yt_content_quadrant_plot
	None, # yt_engagement_plot
	None, # yt_wordcloud_plot
	gr.HTML("") # yt_detailed_summary
	)

	start_youtube_analysis_button.click(
	fn=youtube_button_handler,
	inputs=[
	yt_search_keywords,
	yt_max_videos_slider,
	yt_num_videos_comments_slider,
	yt_max_comments_slider,
	yt_published_after
	],
	outputs=[
	yt_results_df,
	yt_videos_download_file,
	yt_comments_df,
	yt_comments_download_file,
	yt_dashboard_html,
	kpi_yt_videos_found,
	kpi_yt_views_scanned,
	kpi_yt_comments_scraped,
	yt_channel_plot,
	yt_channel_dominance_plot,
	yt_time_series_plot,
	yt_top_videos_plot,
	yt_content_quadrant_plot,
	yt_engagement_plot,
	yt_wordcloud_plot,
	yt_detailed_summary
	]
	)
	AUTH_USERS = [
	("admin", "admin123"),
	("user", "user123")
	]

	if __name__ == "__main__":
	app.launch(debug=True, share=True ) #auth=AUTH_USERS, ssr_mode=False