Rahul-Sainy's picture
Rename Home.py to app.py
f7e3885 verified
import datetime
import streamlit as st
import io
import plotly.express as px
from wordcloud import WordCloud
import matplotlib.pyplot as plt
import plotly.graph_objects as go
from streamlit_extras.metric_cards import style_metric_cards
from streamlit_extras.chart_container import chart_container
from streamlit_extras.switch_page_button import switch_page
from streamlit_extras.app_logo import add_logo
from prophet import Prophet
from channelDataExtraction import getChannelData
from channelVideoDataExtraction import *
########################################################################################################################
# FUNCTIONS
########################################################################################################################
@st.cache_data
def download_data(api_key, channel_id):
channel_details = getChannelData(api_key, channel_id)
# check if bad channel id
if channel_details is None:
return None, None, None, None
videos = getVideoList(api_key, channel_details["uploads"])
videos_df = pd.DataFrame(videos)
video_ids = [video['id'] for video in videos if video['id'] is not None]
all_video_data = buildVideoListDataframe(api_key, video_ids)
st.session_state.start_index = 0
st.session_state.end_index = 10
st.session_state['video_id'] = None
st.session_state.all_video_df = all_video_data
st.session_state.api_key = st.session_state.API_KEY
return channel_details, videos, all_video_data, videos_df
def display_video_list(video_data, start_index, end_index, search_query=None):
"""Displays a list of videos in a tabular format with custom column order and buttons."""
# Input widget for searching videos by title
if search_query is None:
search_query = ""
new_search_query = st.text_input("Search Videos by Title", search_query)
# Initialize start_index and end_index in session_state
if 'start_index' not in st.session_state:
st.session_state.start_index = start_index
if 'end_index' not in st.session_state:
st.session_state.end_index = end_index
# If a new search query is entered, reset the start and end indices
if new_search_query != search_query:
st.session_state.start_index = start_index
st.session_state.end_index = end_index
# Filter videos based on the search query across the entire video_data list
filtered_videos = [video for video in video_data if new_search_query.lower() in video['title'].lower()]
# Paginate the filtered results
paginated_videos = filtered_videos[st.session_state.start_index:st.session_state.end_index]
for video in paginated_videos:
col1, col2, col3, col4 = st.columns(4)
with col1:
st.image(video['thumbnail'])
with col2:
st.write(video['id'])
with col3:
st.write(video['title'])
with col4:
video_stats = st.button("Check Video Statistics", key=video['id'])
if video_stats:
st.session_state['video_id'] = video['id']
switch_page("video_data")
# Display a button to load the next 10 search results
if st.session_state.end_index < len(filtered_videos):
if st.button('Load next 10 videos', key='load_next'):
st.session_state.start_index = st.session_state.end_index
st.session_state.end_index += 10
########################################################################################################################
# MAIN PAGE CONFIGURATION
########################################################################################################################
st.set_page_config(page_title="Youtube Channel Analytics Dashboard",
page_icon="📊",
layout="wide")
########################################################################################################################
# SIDE BAR CONFIGURATION
########################################################################################################################
st.title("YouTube Analytics Dashboard")
# Sidebar
st.sidebar.title("Settings")
# Sidebar: Enter Channel ID and YouTube API Key
if 'API_KEY' not in st.session_state:
st.session_state.API_KEY = ""
if 'CHANNEL_ID' not in st.session_state:
st.session_state.CHANNEL_ID = ""
st.session_state.API_KEY = st.sidebar.text_input("Enter your YouTube API Key", st.session_state.API_KEY,
type="password")
st.session_state.CHANNEL_ID = st.sidebar.text_input("Enter the YouTube Channel ID", st.session_state.CHANNEL_ID)
if not st.session_state.API_KEY or not st.session_state.CHANNEL_ID:
st.warning("Please enter your API Key and Channel ID.")
# Display the GitHub link for the user manual
user_manual_link = "https://github.com/zainmz/Youtube-Channel-Analytics-Dashboard"
st.markdown(f"If you need help, please refer to the the GitHub Repository for the [User Manual]({user_manual_link}).")
st.stop()
# Data Refresh Button
refresh_button = st.sidebar.button("Refresh Data")
# First Data Load
channel_details, videos, all_video_data, videos_df = download_data(st.session_state.API_KEY, st.session_state.CHANNEL_ID)
if channel_details is None:
st.warning("Invalid YouTube Channel ID. Please check and enter a valid Channel ID.")
st.stop()
if refresh_button:
with st.spinner("Refreshing data..."):
channel_details, videos, all_video_data, videos_df = download_data(st.session_state.API_KEY, st.session_state.CHANNEL_ID)
if channel_details is None:
st.warning("Invalid YouTube Channel ID. Please check and enter a valid Channel ID.")
st.stop()
# Data Filters for fine-tuned data selection
st.sidebar.title("Data Filters")
num_videos = st.sidebar.slider("Select Number of Top Videos to Display:", 1, 50, 10)
# Convert the 'published_date' column to datetime format
all_video_data['published_date'] = pd.to_datetime(all_video_data['published_date'])
# Extract min and max publish dates
min_date = all_video_data['published_date'].min().date() # Ensure it's a date object
max_date = all_video_data['published_date'].max().date() # Ensure it's a date object
# Sidebar date input
start_date = st.sidebar.date_input("Select Start Date", min_date)
end_date = st.sidebar.date_input("Select End Date", max_date)
if start_date > end_date:
st.sidebar.warning("Start date should be earlier than end date.")
st.stop()
tag_search = st.sidebar.text_input("Search Videos by Tag")
date_range_start = pd.Timestamp(start_date)
date_range_end = pd.Timestamp(end_date)
filtered_data = all_video_data[(all_video_data['published_date'] >= date_range_start) &
(all_video_data['published_date'] <= date_range_end)]
if tag_search:
filtered_data = filtered_data[filtered_data['tags'].apply(lambda x: tag_search in x)]
########################################################################################################################
# CHANNEL DETAILS AREA CONFIGURATION
########################################################################################################################
# Display channel details
st.header("Channel Details", divider="green")
col1, col2, col3 = st.columns(3)
with col1:
channel_thumbnail = channel_details['thumbnail']
add_logo(channel_thumbnail, height=300)
view_count = int(channel_details['viewCount'])
subscriber_count = int(channel_details['subscriberCount'])
# Format view count and subscriber count with commas
view_count_formatted = "{:,}".format(view_count)
subscriber_count_formatted = "{:,}".format(subscriber_count)
st.markdown(f"**Channel Title:** {channel_details['title']}")
st.markdown(f"**Channel Description:** {channel_details['description']}")
with col3:
# Go to Channel Button
st.link_button("Go to Channel", f"https://www.youtube.com/channel/{st.session_state.CHANNEL_ID}")
col1, col2, col3 = st.columns(3)
col1.metric("Total Views", view_count_formatted, "")
col2.metric("Subscribers", subscriber_count_formatted, "")
col3.metric("Total Videos", len(videos), "")
style_metric_cards(background_color="#000000",
border_left_color="#049204",
border_color="#0E0E0E"
)
########################################################################################################################
# TOP VIDEO GRAPHS AREA
########################################################################################################################
col1, col2, col3 = st.columns(3)
# Display statistical graphs for the top videos based on views
with col1:
st.subheader(f"Top {num_videos} Videos Based on Views")
sorted_video_data = filtered_data.sort_values(by='view_count', ascending=False)
# Get the top videos from the sorted DataFrame
top_views_df = sorted_video_data.head(num_videos)
with chart_container(top_views_df):
# Display statistical graphs for the top videos based on views
# Create a bar chart using Plotly
fig = px.bar(top_views_df, x='title', y='view_count')
# Update the layout to rename the axes
fig.update_layout(xaxis_title="Video Title",
yaxis_title="View Count")
fig.update_traces(marker_color='green')
# Display the bar chart in Streamlit
st.plotly_chart(fig, use_container_width=True)
with col2:
st.subheader(f"Top {num_videos} Videos Based on Likes")
sorted_video_data = filtered_data.sort_values(by='like_count', ascending=False)
# Get the top 10 liked videos from the sorted DataFrame
top_likes_df = sorted_video_data.head(num_videos)
with chart_container(top_likes_df):
# Display statistical graphs for the top 10 videos based on views
# Create a bar chart using Plotly
fig = px.bar(top_likes_df, x='title', y='like_count')
# Update the layout to rename the axes
fig.update_layout(xaxis_title="Video Title",
yaxis_title="Like Count")
fig.update_traces(marker_color='orange')
# Display the bar chart in Streamlit
st.plotly_chart(fig, use_container_width=True)
with col3:
st.subheader(f"Top {num_videos} Based on Comments")
sorted_video_data = filtered_data.sort_values(by='comment_count', ascending=False)
# Get the top 10 liked videos from the sorted DataFrame
top_comments_df = sorted_video_data.head(num_videos)
with chart_container(top_comments_df):
# Display statistical graphs for the top 10 videos based on views
# Create a bar chart using Plotly
fig = px.bar(top_comments_df, x='title', y='comment_count')
# Update the layout to rename the axes
fig.update_layout(xaxis_title="Video Title",
yaxis_title="Comment Count")
fig.update_traces(marker_color='green')
# Display the bar chart in Streamlit
st.plotly_chart(fig, use_container_width=True)
########################################################################################################################
# CHANNEL GROWTH STATS
########################################################################################################################
st.subheader("Viewership Growth Over Time", divider="green")
views = filtered_data['view_count']
dates = filtered_data['published_date']
# Creating a time series plot using Plotly
fig = go.Figure()
fig.add_trace(
go.Scatter(x=dates, y=views, mode='lines+markers', name='Views Over Time', line=dict(color='orange'))
)
fig.update_layout(title='Views Over Time',
xaxis_title='Published Date',
yaxis_title='Number of Views',
template="plotly_dark")
st.plotly_chart(fig, use_container_width=True)
st.subheader("Predicted Viewership Growth Over Time", divider="green")
with st.spinner("Predicting Views for the next Week"):
# Prepare dataframe for Prophet
forecast_df = all_video_data[['published_date', 'view_count']]
forecast_df.columns = ['ds', 'y']
# Initialize the Prophet model
model = Prophet(
yearly_seasonality=False,
weekly_seasonality=True,
daily_seasonality=True,
seasonality_mode='additive')
# Fit the model with the data
model.fit(forecast_df)
# Dataframe for future dates
future_dates = model.make_future_dataframe(periods=30)
# Predict views for the future dates
forecast = model.predict(future_dates)
# Plot the original data and the forecast
# Plotting using Plotly
# Filter the forecast dataframe to include only the forecasted period
forecasted_period = forecast[forecast['ds'] > forecast_df['ds'].max()]
# Plotting using Plotly
# Filter the forecast dataframe to include only the forecasted period
forecasted_period = forecast[forecast['ds'] > forecast_df['ds'].max()]
# Filter the original dataframe to include only the last 30 days
last_date = forecast_df['ds'].max()
start_date = last_date - datetime.timedelta(days=30)
last_30_days = forecast_df[(forecast_df['ds'] > start_date) & (forecast_df['ds'] <= last_date)]
# Plotting using Plotly
trace1 = go.Scatter(x=last_30_days['ds'], y=last_30_days['y'], mode='lines', name='Actual Views (Last 30 Days)')
trace2 = go.Scatter(x=forecasted_period['ds'], y=forecasted_period['yhat'], mode='lines',
name='Predicted Views (Next 30 Days)')
layout = go.Layout(title="YouTube Views: Last 30 Days and Forecast for Next 30 Days", xaxis_title="Date",
yaxis_title="Views")
fig = go.Figure(data=[trace1, trace2], layout=layout)
# Display the combined historical and forecast data in Streamlit using Plotly
st.plotly_chart(fig, use_container_width=True)
########################################################################################################################
# WORD CLOUD & LIKE TO VIEW RATIO
########################################################################################################################
col1, col2 = st.columns(2)
with col1:
st.divider()
with st.spinner("Generating Word Cloud..."):
st.subheader("Most Common Tags")
# Extracting tags from DataFrame and creating a single string
all_tags = " ".join(" ".join(tags) for tags in filtered_data['tags'])
# Generating the word cloud
wordcloud = WordCloud(width=800, height=400, background_color='black').generate(all_tags)
# Plotting the word cloud using matplotlib
plt.figure(figsize=(10, 5))
plt.imshow(wordcloud, interpolation='bilinear')
plt.axis('off')
plt.tight_layout(pad=0)
# Saving the figure to a bytes buffer
buf = io.BytesIO()
plt.savefig(buf, format="png", bbox_inches='tight', pad_inches=0)
buf.seek(0)
st.image(buf, use_column_width=True)
with col2:
# Calculating the Like-to-View Ratio
filtered_data['like_to_view_ratio'] = filtered_data['like_count'] / filtered_data['view_count']
# Extracting the like-to-view ratio and published dates from the dataframe
like_to_view_ratio = filtered_data['like_to_view_ratio']
st.divider()
st.subheader("Like-to-View Ratio Over Time")
# Creating a time series plot for Like-to-View Ratio using Plotly
fig_ratio = go.Figure()
fig_ratio.add_trace(go.Scatter(x=dates, y=like_to_view_ratio, mode='lines+markers', name='Like-to-View Ratio',
line=dict(color='green')))
fig_ratio.update_layout(xaxis_title='Published Date',
yaxis_title='Like-to-View Ratio',
template="plotly_dark")
# Display the plot in Streamlit
st.plotly_chart(fig_ratio, use_container_width=True)
########################################################################################################################
# DETAILED VIDEO STATS SELECTION SECTION
########################################################################################################################
st.divider()
st.subheader("Detailed Video Statistics Video Selection")
st.write("Click on view statistics to get detailed information related to the selected video")
# latest 10 videos
display_video_list(videos, 0, 10)