Youtube-sum / app.py
simplytaps's picture
Upload app.py with huggingface_hub
6958a2d verified
"""
Gradio Frontend for YouTube to Book Summary Converter
Deploy on Hugging Face Spaces
"""
import os
import gradio as gr
from datetime import datetime
from typing import Optional, Tuple
import tempfile
import textwrap
# PDF and Word generation
from reportlab.lib.pagesizes import letter, A4
from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
from reportlab.lib.units import inch
from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, PageBreak
from reportlab.lib.enums import TA_CENTER, TA_LEFT, TA_JUSTIFY
from docx import Document
from docx.shared import Inches, Pt
from docx.enum.text import WD_ALIGN_PARAGRAPH
# Import our modules
from config import config
from summarizer import YouTubeBookSummarizer, create_summarizer
from youtube_transcript import YouTubeExtractor, extract_transcript, extract_transcript_via_api
from utils.statistics import calculate_transcript_stats
# Configuration for transcript extraction
# HF Spaces blocks YouTube, so we need to use external APIs
TRANSCRIPT_API_URL = os.getenv("TRANSCRIPT_API_URL", "")
# Default YouTube URL (can be set as HF Space secret)
DEFAULT_YOUTUBE_URL = os.getenv("DEFAULT_YOUTUBE_URL", "")
def create_pdf(summary: str, video_title: str, video_id: str, url: str, output_path: str) -> str:
"""Create a PDF file with the book summary."""
doc = SimpleDocTemplate(
output_path,
pagesize=letter,
rightMargin=72,
leftMargin=72,
topMargin=72,
bottomMargin=72
)
styles = getSampleStyleSheet()
# Custom styles
title_style = ParagraphStyle(
'CustomTitle',
parent=styles['Heading1'],
fontSize=24,
spaceAfter=30,
alignment=TA_CENTER,
textColor='#333333'
)
heading_style = ParagraphStyle(
'CustomHeading',
parent=styles['Heading2'],
fontSize=14,
spaceBefore=20,
spaceAfter=10,
textColor='#555555'
)
body_style = ParagraphStyle(
'CustomBody',
parent=styles['Normal'],
fontSize=11,
leading=16,
alignment=TA_JUSTIFY,
spaceAfter=12
)
meta_style = ParagraphStyle(
'MetaStyle',
parent=styles['Normal'],
fontSize=10,
textColor='#666666',
spaceAfter=6
)
story = []
# Title
story.append(Paragraph(f"πŸ“š {video_title}", title_style))
story.append(Spacer(1, 20))
# Metadata
story.append(Paragraph(f"<b>Generated:</b> {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}", meta_style))
story.append(Paragraph(f"<b>Video ID:</b> {video_id}", meta_style))
story.append(Paragraph(f"<b>Source:</b> {url}", meta_style))
story.append(Spacer(1, 30))
# Horizontal line simulation
story.append(Paragraph("_" * 80, meta_style))
story.append(Spacer(1, 20))
# Process summary content
lines = summary.split('\n')
for line in lines:
line = line.strip()
if not line:
story.append(Spacer(1, 10))
continue
# Check if it's a heading (starts with # or ##)
if line.startswith('## '):
story.append(Paragraph(line[3:], heading_style))
elif line.startswith('# '):
story.append(Paragraph(line[2:], heading_style))
elif line.startswith('**') and line.endswith('**'):
# Bold text as subheading
story.append(Paragraph(line, heading_style))
else:
# Escape HTML special characters
line = line.replace('&', '&amp;').replace('<', '&lt;').replace('>', '&gt;')
# Convert markdown bold to HTML bold
line = line.replace('**', '<b>', 1)
line = line.replace('**', '</b>', 1)
while '**' in line:
line = line.replace('**', '<b>', 1)
line = line.replace('**', '</b>', 1)
story.append(Paragraph(line, body_style))
story.append(Spacer(1, 30))
story.append(Paragraph("_" * 80, meta_style))
story.append(Spacer(1, 10))
story.append(Paragraph("<i>Generated by YouTube to Book Summary Converter</i>", meta_style))
doc.build(story)
return output_path
def create_word_doc(summary: str, video_title: str, video_id: str, url: str, output_path: str) -> str:
"""Create a Word document with the book summary."""
doc = Document()
# Title
title = doc.add_heading(f"πŸ“š {video_title}", 0)
title.alignment = WD_ALIGN_PARAGRAPH.CENTER
# Metadata
doc.add_paragraph()
meta_para = doc.add_paragraph()
meta_para.add_run("Generated: ").bold = True
meta_para.add_run(datetime.now().strftime("%Y-%m-%d %H:%M:%S"))
meta_para2 = doc.add_paragraph()
meta_para2.add_run("Video ID: ").bold = True
meta_para2.add_run(video_id)
meta_para3 = doc.add_paragraph()
meta_para3.add_run("Source: ").bold = True
meta_para3.add_run(url)
# Horizontal line
doc.add_paragraph("─" * 50)
# Process summary content
lines = summary.split('\n')
for line in lines:
line = line.strip()
if not line:
continue
# Check if it's a heading
if line.startswith('## '):
doc.add_heading(line[3:], level=2)
elif line.startswith('# '):
doc.add_heading(line[2:], level=1)
elif line.startswith('**') and line.endswith('**'):
# Bold paragraph as subheading
para = doc.add_paragraph()
run = para.add_run(line.strip('*'))
run.bold = True
else:
# Regular paragraph - handle bold text
para = doc.add_paragraph()
# Simple handling of bold markers
parts = line.split('**')
for i, part in enumerate(parts):
if i % 2 == 1: # Odd indices are bold
run = para.add_run(part)
run.bold = True
else:
para.add_run(part)
# Footer
doc.add_paragraph()
doc.add_paragraph("─" * 50)
footer = doc.add_paragraph()
run = footer.add_run("Generated by YouTube to Book Summary Converter")
run.italic = True
doc.save(output_path)
return output_path
def create_transcript_file(transcript_text: str, video_title: str, video_id: str, url: str, output_path: str) -> str:
"""Create a text file with the transcript."""
content = f"""# πŸ“ Transcript: {video_title}
**Video ID:** {video_id}
**Source:** {url}
**Extracted:** {datetime.now().strftime("%Y-%m-%d %H:%M:%S")}
{'─' * 50}
{transcript_text}
{'─' * 50}
*Extracted by YouTube to Book Summary Converter*
"""
with open(output_path, 'w', encoding='utf-8') as f:
f.write(content)
return output_path
def process_video(
url: str,
api_key: Optional[str] = None,
language: str = "en",
progress: gr.Progress = None
) -> Tuple[str, str, str, str, str, str, str]:
"""
Process a YouTube video and generate summary.
Args:
url: YouTube video URL
api_key: Optional Groq API key (falls back to env variable)
language: Transcript language
progress: Gradio progress tracker
Returns:
Tuple of (summary, video_info, status_message, pdf_file, word_file, md_file, transcript_file)
"""
if not url or not url.strip():
return "", "", "⚠️ Please enter a YouTube URL", None, None, None, None
# Validate URL
extractor = YouTubeExtractor()
video_id = extractor.extract_video_id(url)
if not video_id:
return "", "", "❌ Invalid YouTube URL. Please check and try again.", None, None, None, None
# Helper for progress (handle None)
def update_progress(value, desc=""):
if progress is not None:
progress(value, desc=desc)
try:
# Step 1: Extract transcript (20%)
update_progress(0.1, "πŸ“₯ Extracting transcript...")
# Extract transcript - automatically uses best available method
# On HF Spaces: Uses RapidAPI or custom API (direct YouTube access is blocked)
# Locally: Uses youtube_transcript_api directly
transcript_result = extract_transcript(url, language)
if not transcript_result['success']:
error_msg = transcript_result['error']
help_msg = """
πŸ’‘ **How to use on Hugging Face Spaces:**
1. **Option A**: Add `RAPIDAPI_KEY` secret (get free key at rapidapi.com)
2. **Option B**: Use the "Manual Transcript" tab below to paste transcript text directly"""
return "", "", f"❌ Failed to extract transcript: {error_msg}{help_msg}", None, None, None, None
metadata = transcript_result['metadata']
video_title = metadata.get('title', f'Video {video_id}')
transcript_text = transcript_result.get('transcript', '')
# Step 2: Get video info (30%)
update_progress(0.3, "πŸ“‹ Processing video information...")
video_info = f"""### πŸ“Ή Video Information
**Title:** {video_title}
**Video ID:** {video_id}
**Language:** {metadata.get('language', 'en')}
**Transcript Length:** {metadata.get('transcript_length', 'N/A'):,} characters
"""
# Step 3: Generate summary (50-90%)
update_progress(0.5, "πŸ€– Generating AI summary...")
# Use provided API key or fall back to environment
effective_api_key = api_key.strip() if api_key and api_key.strip() else None
summarizer = create_summarizer(
api_key=effective_api_key,
model=None
)
result = summarizer.summarize(
url=url,
language=language,
save_output=False, # Don't save files in Gradio
verbose=False
)
if not result['success']:
error_msg = result.get('error', 'Unknown error')
if 'rate limit' in error_msg.lower() or 'quota' in error_msg.lower():
return "", video_info, f"⏳ API rate limit reached. Please try with your own API key or wait a moment.\n\nError: {error_msg}", None, None, None, None
return "", video_info, f"❌ Failed to generate summary: {error_msg}", None, None, None, None
# Step 4: Complete (100%)
update_progress(1.0, "βœ… Complete!")
summary = result['summary']
usage = result.get('metadata', {}).get('usage', {})
status = f"""βœ… **Summary Generated Successfully!**
πŸ“Š **Usage Statistics:**
- Tokens Used: {usage.get('total_tokens', 'N/A'):,}
- Estimated Cost: ${usage.get('estimated_cost_usd', 0):.4f}
- Model: {result.get('metadata', {}).get('model', 'N/A')}
"""
# Create downloadable files
pdf_file, word_file, md_file, transcript_file = None, None, None, None
temp_dir = tempfile.gettempdir()
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
safe_title = "".join(c for c in video_title if c.isalnum() or c in (' ', '-', '_')).rstrip()
try:
# Create PDF
pdf_path = os.path.join(temp_dir, f"book_summary_{safe_title}_{timestamp}.pdf")
pdf_file = create_pdf(summary, video_title, video_id, url, pdf_path)
except Exception as e:
print(f"Warning: Could not create PDF file: {e}")
try:
# Create Word document
word_path = os.path.join(temp_dir, f"book_summary_{safe_title}_{timestamp}.docx")
word_file = create_word_doc(summary, video_title, video_id, url, word_path)
except Exception as e:
print(f"Warning: Could not create Word file: {e}")
try:
# Create Markdown file
md_path = os.path.join(temp_dir, f"book_summary_{safe_title}_{timestamp}.md")
md_content = f"""# πŸ“š {video_title}
**Generated:** {datetime.now().strftime("%Y-%m-%d %H:%M:%S")}
**Video ID:** {video_id}
**Source:** {url}
---
{summary}
---
*Generated by YouTube to Book Summary Converter*
"""
with open(md_path, 'w', encoding='utf-8') as f:
f.write(md_content)
md_file = md_path
except Exception as e:
print(f"Warning: Could not create Markdown file: {e}")
try:
# Create Transcript file
transcript_path = os.path.join(temp_dir, f"transcript_{safe_title}_{timestamp}.txt")
transcript_file = create_transcript_file(transcript_text, video_title, video_id, url, transcript_path)
except Exception as e:
print(f"Warning: Could not create transcript file: {e}")
return summary, video_info, status, pdf_file, word_file, md_file, transcript_file
except Exception as e:
error_str = str(e)
if 'api_key' in error_str.lower() or 'unauthorized' in error_str.lower():
return "", "", f"πŸ”‘ **API Key Error:** Please provide a valid Groq API key.\n\nDetails: {error_str}", None, None, None, None
return "", "", f"❌ **Error:** {error_str}", None, None, None, None
def process_manual_transcript(
transcript_text: str,
video_title: str,
video_duration: str,
api_key: Optional[str] = None,
progress: gr.Progress = None
) -> Tuple[str, str, str, str, str, str, str]:
"""
Process a manually pasted transcript and generate summary.
This is useful when HF Spaces blocks YouTube access.
Args:
transcript_text: Manually pasted transcript
video_title: Title for the video (optional)
video_duration: Duration of the video (optional, e.g., "10:30")
api_key: Optional Groq API key
progress: Gradio progress tracker
Returns:
Tuple of (summary, video_info, status_message, pdf_file, word_file, md_file, transcript_file)
"""
if not transcript_text or not transcript_text.strip():
return "", "", "⚠️ Please paste a transcript", None, None, None, None
# Helper for progress
def update_progress(value, desc=""):
if progress is not None:
progress(value, desc=desc)
try:
video_title = video_title.strip() if video_title else "Manual Transcript"
video_id = "manual"
# Parse duration string to seconds
def parse_duration(duration_str: str) -> Optional[int]:
"""Parse duration string like '10:30' or '1:25:00' to seconds."""
if not duration_str or not duration_str.strip():
return None
try:
parts = duration_str.strip().split(':')
if len(parts) == 2:
minutes, seconds = int(parts[0]), int(parts[1])
return minutes * 60 + seconds
elif len(parts) == 3:
hours, minutes, seconds = int(parts[0]), int(parts[1]), int(parts[2])
return hours * 3600 + minutes * 60 + seconds
except:
pass
return None
duration_seconds = parse_duration(video_duration)
# Calculate statistics
stats = calculate_transcript_stats(transcript_text, duration_seconds)
# Format duration for display
if duration_seconds:
hours = duration_seconds // 3600
minutes = (duration_seconds % 3600) // 60
seconds = duration_seconds % 60
if hours > 0:
duration_formatted = f"{hours}:{minutes:02d}:{seconds:02d}"
else:
duration_formatted = f"{minutes}:{seconds:02d}"
else:
duration_formatted = "N/A"
# Step 1: Process transcript (30%)
update_progress(0.3, "πŸ“‹ Processing transcript...")
video_info = f"""### πŸ“Ή Video Information
**Title:** {video_title}
**Video Duration:** {duration_formatted}
**Transcript Source:** Manual Input
**Transcript Length:** {len(transcript_text):,} characters
**Word Count:** {stats['word_count']:,} words
**Reading Time:** {stats['reading_time_formatted']}
"""
if stats.get('speaking_rate_wpm'):
video_info += f"\n**Speaking Rate:** {stats['speaking_rate_wpm']} words/min\n"
# Step 2: Generate summary (50-90%)
update_progress(0.5, "πŸ€– Generating AI summary...")
# Use provided API key or fall back to environment
effective_api_key = api_key.strip() if api_key and api_key.strip() else None
summarizer = create_summarizer(
api_key=effective_api_key,
model=None
)
# Call the summarizer with the transcript directly
result = summarizer.groq_client.summarize_transcript(
transcript=transcript_text,
prompt_template=summarizer.template,
)
if not result['success']:
error_msg = result.get('error', 'Unknown error')
return "", video_info, f"❌ Failed to generate summary: {error_msg}", None, None, None, None
# Step 3: Complete (100%)
update_progress(1.0, "βœ… Complete!")
summary = result['content']
usage = result.get('usage', {})
status = f"""βœ… **Summary Generated Successfully!**
πŸ“Š **Usage Statistics:**
- Tokens Used: {usage.get('total_tokens', 'N/A'):,}
- Estimated Cost: ${usage.get('estimated_cost_usd', 0):.4f}
- Model: {result.get('model', 'N/A')}
"""
# Create downloadable files
pdf_file, word_file, md_file, transcript_file = None, None, None, None
temp_dir = tempfile.gettempdir()
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
safe_title = "".join(c for c in video_title if c.isalnum() or c in (' ', '-', '_')).rstrip()
try:
# Create PDF
pdf_path = os.path.join(temp_dir, f"book_summary_{safe_title}_{timestamp}.pdf")
pdf_file = create_pdf(summary, video_title, video_id, "Manual Input", pdf_path)
except Exception as e:
print(f"Warning: Could not create PDF file: {e}")
try:
# Create Word document
word_path = os.path.join(temp_dir, f"book_summary_{safe_title}_{timestamp}.docx")
word_file = create_word_doc(summary, video_title, video_id, "Manual Input", word_path)
except Exception as e:
print(f"Warning: Could not create Word file: {e}")
try:
# Create Markdown file
md_path = os.path.join(temp_dir, f"book_summary_{safe_title}_{timestamp}.md")
md_content = f"""# πŸ“š {video_title}
**Generated:** {datetime.now().strftime("%Y-%m-%d %H:%M:%S")}
**Source:** Manual Transcript Input
---
{summary}
---
*Generated by YouTube to Book Summary Converter*
"""
with open(md_path, 'w', encoding='utf-8') as f:
f.write(md_content)
md_file = md_path
except Exception as e:
print(f"Warning: Could not create Markdown file: {e}")
try:
# Create Transcript file
transcript_path = os.path.join(temp_dir, f"transcript_{safe_title}_{timestamp}.txt")
transcript_file = create_transcript_file(transcript_text, video_title, video_id, "Manual Input", transcript_path)
except Exception as e:
print(f"Warning: Could not create transcript file: {e}")
return summary, video_info, status, pdf_file, word_file, md_file, transcript_file
except Exception as e:
error_str = str(e)
if 'api_key' in error_str.lower() or 'unauthorized' in error_str.lower():
return "", "", f"πŸ”‘ **API Key Error:** Please provide a valid Groq API key.\n\nDetails: {error_str}", None, None, None, None
return "", "", f"❌ **Error:** {error_str}", None, None, None, None
# Create the Gradio interface
with gr.Blocks(
title="YouTube to Book Summary",
theme=gr.themes.Soft(
primary_hue="purple",
secondary_hue="blue",
neutral_hue="slate",
)
) as app:
# Header
gr.Markdown("""
# πŸ“š YouTube to Book Summary Converter
Transform any YouTube video into a comprehensive, book-style summary using AI (Groq LLM).
""")
# Main content
with gr.Row():
with gr.Column(scale=2):
# Input section with tabs for URL vs Manual transcript
gr.Markdown("### 🎬 Video Input")
with gr.Tabs():
with gr.TabItem("πŸ”— YouTube URL"):
url_input = gr.Textbox(
label="YouTube URL",
placeholder="https://www.youtube.com/watch?v=...",
value=DEFAULT_YOUTUBE_URL, # Pre-fill with default URL from env
lines=1,
max_lines=1
)
language_input = gr.Dropdown(
label="Transcript Language",
choices=["en", "es", "fr", "de", "it", "pt", "ja", "ko", "zh"],
value="en",
interactive=True
)
generate_btn = gr.Button(
"πŸš€ Generate Summary",
variant="primary"
)
with gr.TabItem("πŸ“ Manual Transcript"):
gr.Markdown("""
**For Hugging Face Spaces:** If YouTube access is blocked, paste the transcript manually.
πŸ’‘ Get transcripts from:
- YouTube's "Show transcript" button
- Any transcript website
""")
manual_transcript_input = gr.Textbox(
label="Paste Transcript Here",
placeholder="Paste the video transcript text here...",
lines=10,
max_lines=20
)
manual_title_input = gr.Textbox(
label="Video Title (Optional)",
placeholder="Enter a title for the video",
lines=1
)
manual_duration_input = gr.Textbox(
label="Video Duration (Optional)",
placeholder="e.g., 10:30 or 1:25:00",
lines=1
)
manual_generate_btn = gr.Button(
"πŸš€ Generate Summary from Transcript",
variant="primary"
)
with gr.Accordion("βš™οΈ Advanced Settings", open=False):
api_key_input = gr.Textbox(
label="Groq API Key (Optional)",
placeholder="Leave empty to use default key",
type="password",
lines=1
)
gr.Markdown("""
πŸ’‘ **Tip:** If the default API key is rate-limited, you can:
1. Get a free API key from [console.groq.com](https://console.groq.com/)
2. Paste it above to continue using the app
""")
# Status output
status_output = gr.Markdown(
label="Status"
)
# Video info
video_info_output = gr.Markdown(
label="Video Information"
)
with gr.Column(scale=3):
# Summary output
summary_output = gr.Markdown(
label="Generated Summary"
)
# Download buttons section
gr.Markdown("### πŸ“₯ Download Options")
with gr.Row():
pdf_download = gr.DownloadButton(
label="πŸ“„ PDF",
variant="secondary",
visible=True
)
word_download = gr.DownloadButton(
label="πŸ“ Word",
variant="secondary",
visible=True
)
md_download = gr.DownloadButton(
label="πŸ“‹ Markdown",
variant="secondary",
visible=True
)
transcript_download = gr.DownloadButton(
label="πŸ“œ Transcript",
variant="secondary",
visible=True
)
# Examples
gr.Examples(
examples=[
["https://www.youtube.com/watch?v=dQw4w9WgXcQ"],
["https://www.youtube.com/watch?v=jNQXAC9IVRw"],
["https://youtu.be/aircAruvnKk"],
],
inputs=[url_input],
label="πŸ“ Example Videos (Try these!)"
)
# Footer
gr.Markdown("""
---
**Powered by Groq LLM** | Built with ❀️ using Gradio
Transform YouTube videos into comprehensive book-style summaries
""")
# Event handlers
generate_btn.click(
fn=process_video,
inputs=[url_input, api_key_input, language_input],
outputs=[summary_output, video_info_output, status_output, pdf_download, word_download, md_download, transcript_download],
api_name="generate"
)
# Manual transcript handler
manual_generate_btn.click(
fn=process_manual_transcript,
inputs=[manual_transcript_input, manual_title_input, manual_duration_input, api_key_input],
outputs=[summary_output, video_info_output, status_output, pdf_download, word_download, md_download, transcript_download],
api_name="generate_manual"
)
# Also allow Enter key to submit
url_input.submit(
fn=process_video,
inputs=[url_input, api_key_input, language_input],
outputs=[summary_output, video_info_output, status_output, pdf_download, word_download, md_download, transcript_download]
)
if __name__ == "__main__":
# For local development
import argparse
parser = argparse.ArgumentParser(description="YouTube to Book Summary Converter")
parser.add_argument("--port", type=int, default=7860, help="Port to run the server on")
parser.add_argument("--host", type=str, default="127.0.0.1", help="Host to bind to")
parser.add_argument("--share", action="store_true", help="Create a public share link")
parser.add_argument("--debug", action="store_true", help="Enable debug mode")
args = parser.parse_args()
print(f"\nStarting YouTube to Book Summary Converter...")
print(f" Host: {args.host}")
print(f" Port: {args.port}")
print(f" Share: {'Yes' if args.share else 'No'}")
print(f" Debug: {'Yes' if args.debug else 'No'}")
print(f"\n Open in browser: http://{args.host}:{args.port}")
if args.share:
print(" A public share link will be generated...\n")
app.launch(
server_name=args.host,
server_port=args.port,
share=True, # Required for Hugging Face Spaces
debug=args.debug
)