Spaces:

AmritSbisht
/

QuickVid_AI_-_Video_Analysis_and_Summary

Sleeping

App Files Files Community

QuickVid_AI_-_Video_Analysis_and_Summary / app.py

AmritSbisht

Update app.py

d6c6303 verified about 1 year ago

raw

history blame contribute delete

22.5 kB

	import plotly.graph_objects as go
	import streamlit as st
	import requests
	from youtube_transcript_api import YouTubeTranscriptApi
	import re
	import json
	import pandas as pd
	import plotly.express as px
	import matplotlib.pyplot as plt
	import io
	import base64
	from datetime import datetime
	import subprocess
	import time
	import os

	def start_backend():
	if not os.path.exists("backend.log"):
	with open("backend.log", "w") as f:
	pass # Create an empty log file if it doesn't exist

	# Start the backend server in the background
	backend_process = subprocess.Popen(
	["uvicorn", "backend:app", "--host", "0.0.0.0", "--port", "8000"],
	stdout=open("backend.log", "a"),
	stderr=subprocess.STDOUT
	)
	print("Backend server started on port 8000")

	# Wait a few seconds to ensure the backend is up and running
	time.sleep(5)

	# Call the function to start the backend
	start_backend()

	# Set page configuration
	st.set_page_config(page_title="AI Video Digest", page_icon="🎥", layout="wide")

	# Backend API URL
	API_URL = "http://localhost:8000"

	def extract_video_id(url):
	patterns = [
	r'(?:v=\|\/)([0-9A-Za-z_-]{11}).*',
	r'(?:embed\/)([0-9A-Za-z_-]{11})',
	r'(?:watch\?v=)([0-9A-Za-z_-]{11})',
	r'youtu\.be\/([0-9A-Za-z_-]{11})'
	]
	for pattern in patterns:
	match = re.search(pattern, url)
	if match: return match.group(1)
	return None

	def get_thumbnail(video_id):
	return f"https://img.youtube.com/vi/{video_id}/mqdefault.jpg"

	# Initialize session state variables
	if "history" not in st.session_state:
	st.session_state.history = []
	if "active_tab" not in st.session_state:
	st.session_state.active_tab = "summarize"

	# Sidebar navigation
	st.sidebar.title("Navigation")
	page = st.sidebar.radio("Go to", ["Summarize", "History", "Settings", "About"])
	st.session_state.active_tab = page.lower()

	if st.session_state.active_tab == "summarize":
	st.title("🎥 AI Video Digest")
	st.write("Upload a video or provide a YouTube link to get a concise summary.")

	# Input method selection
	input_method = st.radio("Choose Input Method:", ["YouTube URL", "Upload Video"])

	# Advanced options
	with st.expander("Advanced Options"):
	col1, col2 = st.columns(2)
	with col1:
	summary_format = st.selectbox(
	"Summary Format",
	["markdown", "bullet", "narrative"],
	help="Choose the format of your summary"
	)
	summary_length = st.selectbox(
	"Summary Length",
	["short", "medium", "long"],
	index=1,
	help="Short (100-150 words), Medium (250-300 words), Long (400-500 words)"
	)
	with col2:
	target_language = st.selectbox(
	"Output Language",
	["english", "spanish", "french", "german", "chinese", "japanese", "arabic"],
	help="Translate the summary to this language"
	)
	include_sentiment = st.checkbox("Include Sentiment Analysis", value=False)
	include_keywords = st.checkbox("Extract Keywords", value=True)

	if input_method == "YouTube URL":
	url = st.text_input("Enter YouTube URL:", placeholder="https://youtube.com/watch?v=...")
	# Preview the video if URL is valid
	if url:
	video_id = extract_video_id(url)
	if video_id:
	col1, col2 = st.columns([1, 2])
	with col1:
	st.image(get_thumbnail(video_id), use_container_width=True)
	with col2:
	st.write("Ready to summarize this video. Click the button below to process.")

	if st.button("Generate Summary", key="youtube_summary_btn"):
	if url:
	with st.spinner("Processing video..."):
	try:
	# Prepare the request with options
	request_data = {
	"url": url,
	"options": {
	"format": summary_format,
	"length": summary_length,
	"language": target_language,
	"include_sentiment": include_sentiment,
	"include_keywords": include_keywords
	}
	}
	response = requests.post(
	f"{API_URL}/summarize/youtube",
	json=request_data
	)
	if response.status_code == 200:
	data = response.json()
	st.success("Summary generated successfully!")
	# Create tabs for different sections
	tab1, tab2, tab3 = st.tabs(["Summary", "Details", "Full Transcript"])
	with tab1:
	st.markdown(data["summary"])
	# Download options
	st.download_button(
	"Download Summary (TXT)",
	data["summary"],
	file_name="summary.txt",
	mime="text/plain"
	)
	with tab2:
	# Display metadata
	if "metadata" in data:
	metadata = data["metadata"]
	# Video info
	if "title" in metadata:
	st.subheader("Video Information")
	st.write(f"Title: {metadata.get('title', 'N/A')}")
	st.write(f"Author: {metadata.get('author', 'N/A')}")
	st.write(f"Length: {metadata.get('length_seconds', 0)} seconds")
	st.write(f"Views: {metadata.get('views', 'N/A')}")
	# Sentiment analysis
	if "sentiment" in metadata:
	st.subheader("Sentiment Analysis")
	sentiment = metadata["sentiment"]
	compound_score = sentiment["scores"]["compound"]
	fig = go.Figure(go.Indicator(
	mode="gauge+number",
	value=compound_score,
	domain={'x': [0, 1], 'y': [0, 1]},
	title={'text': "Sentiment Score"},
	gauge={
	'axis': {'range': [-1, 1]},
	'bar': {'color': "darkblue"},
	'steps': [
	{'range': [-1, -0.25], 'color': "red"},
	{'range': [-0.25, 0.25], 'color': "gray"},
	{'range': [0.25, 1], 'color': "green"}
	]
	}
	))
	st.plotly_chart(fig, key="youtube_sentiment_gauge")
	st.write(f"Overall Sentiment: {sentiment['overall'].capitalize()}")
	# Detailed sentiment scores
	scores = sentiment["scores"]
	st.write("Detailed Scores:")
	score_df = pd.DataFrame({
	"Metric": ["Positive", "Neutral", "Negative", "Compound"],
	"Score": [scores["pos"], scores["neu"], scores["neg"], scores["compound"]]
	})
	st.dataframe(score_df)
	# Keywords
	if "keywords" in metadata:
	st.subheader("Top Keywords")
	keywords = metadata["keywords"]
	st.write(", ".join(keywords))
	with tab3:
	st.text_area("Full Transcript", data["transcript"], height=300)
	# Add to history
	st.session_state.history.append(data)
	else:
	st.error(f"Error: {response.json().get('detail', 'Unknown error')}")
	except Exception as e:
	st.error(f"Error processing request: {str(e)}")
	else:
	st.warning("Please enter a YouTube URL!")

	else: # Upload Video option
	uploaded_file = st.file_uploader("Upload Video File", type=["mp4", "avi", "mov", "mkv"])
	if uploaded_file:
	st.video(uploaded_file)
	if uploaded_file and st.button("Generate Summary", key="upload_summary_btn"):
	with st.spinner("Processing video..."):
	try:
	# Prepare the form data
	files = {"file": uploaded_file.getvalue()}
	# Add options as form data
	params = {
	"format": summary_format,
	"length": summary_length,
	"language": target_language,
	"include_sentiment": str(include_sentiment).lower(),
	"include_keywords": str(include_keywords).lower()
	}
	response = requests.post(
	f"{API_URL}/summarize/upload",
	files=files,
	params=params
	)
	if response.status_code == 200:
	data = response.json()
	st.success("Summary generated successfully!")
	# Create tabs for different sections
	tab1, tab2, tab3 = st.tabs(["Summary", "Details", "Full Transcript"])
	with tab1:
	st.markdown(data["summary"])
	# Download options
	st.download_button(
	"Download Summary (TXT)",
	data["summary"],
	file_name="summary.txt",
	mime="text/plain"
	)
	with tab2:
	# Display metadata
	if "metadata" in data:
	metadata = data["metadata"]
	# Video info
	st.subheader("Video Information")
	st.write(f"Filename: {metadata.get('filename', 'N/A')}")
	st.write(f"Duration: {metadata.get('duration_seconds', 0)} seconds")
	# Sentiment analysis
	if "sentiment" in metadata:
	st.subheader("Sentiment Analysis")
	sentiment = metadata["sentiment"]
	# Display sentiment score with a gauge chart
	compound_score = sentiment["scores"]["compound"]
	fig = go.Figure(go.Indicator(
	mode="gauge+number",
	value=compound_score,
	domain={'x': [0, 1], 'y': [0, 1]},
	title={'text': "Sentiment Score"},
	gauge={
	'axis': {'range': [-1, 1]},
	'bar': {'color': "darkblue"},
	'steps': [
	{'range': [-1, -0.25], 'color': "red"},
	{'range': [-0.25, 0.25], 'color': "gray"},
	{'range': [0.25, 1], 'color': "green"}
	]
	}
	))
	st.plotly_chart(fig, key="upload_sentiment_gauge")
	st.write(f"Overall Sentiment: {sentiment['overall'].capitalize()}")
	# Keywords
	if "keywords" in metadata:
	st.subheader("Top Keywords")
	keywords = metadata["keywords"]
	st.write(", ".join(keywords))
	with tab3:
	st.text_area("Full Transcript", data["transcript"], height=300)
	# Add to history
	st.session_state.history.append(data)
	else:
	st.error(f"Error: {response.json().get('detail', 'Unknown error')}")
	except Exception as e:
	st.error(f"Error processing request: {str(e)}")

	elif st.session_state.active_tab == "history":
	st.title("📚 Summary History")
	if st.button("Refresh History"):
	try:
	response = requests.get(f"{API_URL}/summaries")
	if response.status_code == 200:
	st.session_state.history = response.json()
	st.success("History refreshed successfully!")
	else:
	st.error("Failed to refresh history")
	except Exception as e:
	st.error(f"Error: {str(e)}")

	# Display history
	if st.session_state.history:
	search_term = st.text_input("Search in summaries:", "")
	for i, item in enumerate(reversed(st.session_state.history)):
	if search_term and search_term.lower() not in item["summary"].lower() and search_term.lower() not in item["transcript"].lower():
	continue
	title = item.get("metadata", {}).get("title", f"Summary {i+1}")
	timestamp = item.get("metadata", {}).get("timestamp", "")
	if timestamp:
	try:
	timestamp = datetime.fromisoformat(timestamp).strftime("%Y-%m-%d %H:%M")
	except:
	pass
	with st.expander(f"{title} ({timestamp})"):
	tab1, tab2 = st.tabs(["Summary", "Details"])
	with tab1:
	st.markdown(item["summary"])
	col1, col2, col3 = st.columns(3)
	with col1:
	st.download_button(
	"Download as TXT",
	item["summary"],
	file_name=f"summary_{i}.txt",
	mime="text/plain"
	)
	with col2:
	md_content = f"# {title}\n## Summary\n{item['summary']}\n## Metadata\n"
	for k, v in item.get("metadata", {}).items():
	if k not in ["options", "timestamp"]:
	md_content += f"- {k}: {v}\n"
	st.download_button(
	"Download as MD",
	md_content,
	file_name=f"summary_{i}.md",
	mime="text/markdown"
	)
	with col3:
	st.download_button(
	"Download as JSON",
	json.dumps(item, indent=2),
	file_name=f"summary_{i}.json",
	mime="application/json"
	)
	with tab2:
	metadata = item.get("metadata", {})
	if "video_id" in metadata:
	video_id = metadata["video_id"]
	st.image(get_thumbnail(video_id), width=320)
	meta_df = pd.DataFrame(
	[(k, str(v)) for k, v in metadata.items() if k not in ["sentiment", "keywords", "options"]],
	columns=["Property", "Value"]
	)
	st.dataframe(meta_df, use_container_width=True)
	show_transcript = st.checkbox(f"Show Full Transcript", key=f"transcript_{i}_{item.get('id', '')}")
	if show_transcript:
	st.text_area("Full Transcript", item["transcript"], height=300, key=f"transcript_text_{i}_{item.get('id', '')}")
	if "id" in item:
	if st.button(f"Delete this summary", key=f"delete_{item['id']}"):
	try:
	response = requests.delete(f"{API_URL}/summaries/{item['id']}")
	if response.status_code == 200:
	st.success("Summary deleted successfully!")
	st.session_state.history = [h for h in st.session_state.history if h.get("id") != item["id"]]
	st.rerun()
	else:
	st.error("Failed to delete summary")
	except Exception as e:
	st.error(f"Error: {str(e)}")
	else:
	st.info("No summaries in history yet. Try summarizing a video first!")

	elif st.session_state.active_tab == "settings":
	st.title("⚙️ Settings")
	st.subheader("API Configuration")
	api_url = st.text_input("API URL", value="http://localhost:8000")
	if st.button("Save Settings"):
	st.session_state.api_url = api_url
	st.success("Settings saved successfully!")

	st.subheader("Model Settings")
	llm_model = st.selectbox(
	"LLM Model",
	["gemini-pro", "gemini-pro-vision"], # Updated to Gemini models
	index=0,
	help="The Gemini model to use for summarization"
	)
	temperature = st.slider(
	"Temperature",
	min_value=0.0,
	max_value=1.0,
	value=0.7,
	step=0.1,
	help="Controls randomness in generation (0 = deterministic, 1 = creative)"
	)

	st.subheader("Default Summary Settings")
	default_format = st.selectbox(
	"Default Format",
	["markdown", "bullet", "narrative"],
	index=0
	)
	default_length = st.selectbox(
	"Default Length",
	["short", "medium", "long"],
	index=1
	)
	default_language = st.selectbox(
	"Default Output Language",
	["english", "spanish", "french", "german", "chinese", "japanese", "arabic"],
	index=0
	)

	if st.button("Save Default Settings"):
	st.session_state.default_settings = {
	"format": default_format,
	"length": default_length,
	"language": default_language,
	"model": llm_model,
	"temperature": temperature
	}
	st.success("Default settings saved!")

	st.subheader("Data Management")
	if st.button("Clear History"):
	if st.session_state.history:
	st.session_state.history = []
	st.success("History cleared successfully!")
	else:
	st.info("No history to clear.")

	st.subheader("API Status")
	try:
	response = requests.get(f"{API_URL}/health")
	if response.status_code == 200:
	st.success(f"API is online. Version: {response.json().get('api_version', 'Unknown')}")
	else:
	st.error("API is not responding correctly")
	except Exception as e:
	st.error(f"Could not connect to API: {str(e)}")

	elif st.session_state.active_tab == "about":
	st.title("ℹ️ About AI Video Digest")
	st.markdown("""
	## Overview
	This application helps you summarize video content quickly and effectively. Whether you have a YouTube URL
	or a video file, our tool can generate concise summaries, analyze sentiment, and extract key information.
	## Features
	- YouTube Video Summarization: Summarize any YouTube video by providing its URL
	- Video File Processing: Upload and summarize your own video files
	- Customizable Summaries: Adjust the format, length, and language of your summaries
	- Sentiment Analysis: Understand the emotional tone of the video content
	- Keyword Extraction: Identify the most important topics covered in the video
	- Export Options: Download summaries in various formats (TXT, MD, JSON)
	- History Management: Access your past summaries anytime
	## How It Works
	1. For YouTube videos, we extract the transcript using YouTube's API
	2. For uploaded videos, we convert speech to text using speech recognition
	3. The transcript is processed by an AI language model to generate a concise summary
	4. Additional analysis is performed to extract sentiment and keywords
	5. Results are presented in an easy-to-navigate interface
	## Technologies Used
	- Frontend: Streamlit
	- Backend: FastAPI
	- AI Model: Google's Gemini API
	- Speech Recognition: Google Speech Recognition API
	- Sentiment Analysis: NLTK's VADER
	## Contact
	If you have any questions, suggestions, or need support, please contact us at support@videosummarizer.com
	""")
	st.sidebar.info("Version 1.1.0")

	# Footer
	st.sidebar.markdown("---")
	st.sidebar.markdown("Feedback : Amritsinghbist@gmail.com")