Spaces:
Sleeping
Sleeping
Sharan Thakur commited on
Commit ·
f8c4214
1
Parent(s): 6e522f0
Add initial implementation of YouTube audio summarizer with Gemini API integration
Browse files- .gitignore +3 -0
- README.md +1 -3
- ai_client.py +80 -0
- app.py +51 -0
- extract_audio.py +101 -0
- models.py +31 -0
- requirements.txt +86 -0
.gitignore
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
venv/
|
| 2 |
+
output/
|
| 3 |
+
*.m4a
|
README.md
CHANGED
|
@@ -7,7 +7,5 @@ sdk: gradio
|
|
| 7 |
sdk_version: 5.12.0
|
| 8 |
app_file: app.py
|
| 9 |
pinned: false
|
| 10 |
-
short_description:
|
| 11 |
---
|
| 12 |
-
|
| 13 |
-
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
|
|
|
| 7 |
sdk_version: 5.12.0
|
| 8 |
app_file: app.py
|
| 9 |
pinned: false
|
| 10 |
+
short_description: YouTube Summarizer is a tool that helps you quickly get the gist of YouTube videos by providing concise summaries.
|
| 11 |
---
|
|
|
|
|
|
ai_client.py
ADDED
|
@@ -0,0 +1,80 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from dotenv import load_dotenv
|
| 2 |
+
import os
|
| 3 |
+
import google.generativeai as genai
|
| 4 |
+
from typing import Generator
|
| 5 |
+
from logging import getLogger
|
| 6 |
+
|
| 7 |
+
logger = getLogger(__name__)
|
| 8 |
+
|
| 9 |
+
|
| 10 |
+
class Gemini:
|
| 11 |
+
def __init__(self):
|
| 12 |
+
load_dotenv()
|
| 13 |
+
api_key = os.getenv("GEMINI_API_KEY")
|
| 14 |
+
if api_key is None:
|
| 15 |
+
raise ValueError("GEMINI_API_KEY is not set in the environment variables")
|
| 16 |
+
genai.configure(api_key=api_key)
|
| 17 |
+
|
| 18 |
+
# Create the model
|
| 19 |
+
self.generation_config = {
|
| 20 |
+
"temperature": 1,
|
| 21 |
+
"top_p": 0.95,
|
| 22 |
+
"top_k": 64,
|
| 23 |
+
"max_output_tokens": 8192,
|
| 24 |
+
"response_mime_type": "text/plain",
|
| 25 |
+
}
|
| 26 |
+
|
| 27 |
+
self.model = genai.GenerativeModel(
|
| 28 |
+
model_name="gemini-1.5-pro",
|
| 29 |
+
generation_config=self.generation_config,
|
| 30 |
+
)
|
| 31 |
+
|
| 32 |
+
def generate_text(
|
| 33 |
+
self, local_file: str, id: str, uploader: str
|
| 34 |
+
) -> Generator[str, None, None]:
|
| 35 |
+
responses = self.model.generate_content(
|
| 36 |
+
[
|
| 37 |
+
{
|
| 38 |
+
"role": "user",
|
| 39 |
+
"parts": [
|
| 40 |
+
self.__upload_to_gemini(
|
| 41 |
+
id=id, path=local_file, mime_type="audio/m4a"
|
| 42 |
+
),
|
| 43 |
+
f"""
|
| 44 |
+
Summarize the audio's content to sound like a podcast.\n
|
| 45 |
+
Add fun facts to the summary too.\n
|
| 46 |
+
The uploader of the audio is the following: {uploader}\n
|
| 47 |
+
Add a nice title to the summary too.\n
|
| 48 |
+
""",
|
| 49 |
+
],
|
| 50 |
+
},
|
| 51 |
+
],
|
| 52 |
+
stream=True,
|
| 53 |
+
)
|
| 54 |
+
for response in responses:
|
| 55 |
+
yield response.text
|
| 56 |
+
|
| 57 |
+
def __upload_to_gemini(self, id: str, path: str, mime_type=None) -> str:
|
| 58 |
+
"""Uploads the given file to Gemini.
|
| 59 |
+
|
| 60 |
+
See https://ai.google.dev/gemini-api/docs/prompting_with_media
|
| 61 |
+
"""
|
| 62 |
+
file = genai.upload_file(
|
| 63 |
+
path,
|
| 64 |
+
mime_type=mime_type,
|
| 65 |
+
)
|
| 66 |
+
logger.info(f"Uploaded file '{file.display_name}' as: {file.uri}")
|
| 67 |
+
return file
|
| 68 |
+
|
| 69 |
+
|
| 70 |
+
if __name__ == "__main__":
|
| 71 |
+
from extract_audio import simple_download_audio_from_youtube
|
| 72 |
+
|
| 73 |
+
gemini = Gemini()
|
| 74 |
+
yt_link = input("Enter YouTube link: ")
|
| 75 |
+
yt_res = simple_download_audio_from_youtube(yt_link)
|
| 76 |
+
|
| 77 |
+
for chunk in gemini.generate_text(
|
| 78 |
+
yt_res.get_local_file_path(), yt_res.id, yt_res.uploader
|
| 79 |
+
):
|
| 80 |
+
print(chunk)
|
app.py
ADDED
|
@@ -0,0 +1,51 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from ai_client import Gemini
|
| 2 |
+
from extract_audio import simple_download_audio_from_youtube
|
| 3 |
+
from models import YTResultWithTranscript
|
| 4 |
+
import gradio as gr
|
| 5 |
+
import os
|
| 6 |
+
|
| 7 |
+
gemini = Gemini()
|
| 8 |
+
|
| 9 |
+
|
| 10 |
+
def summarize_audio(youtube_link: str):
|
| 11 |
+
yt_res = simple_download_audio_from_youtube(youtube_link)
|
| 12 |
+
yt_transcript = YTResultWithTranscript(
|
| 13 |
+
**yt_res.model_dump(), transcript="This is a transcript of the audio."
|
| 14 |
+
)
|
| 15 |
+
for chunk in gemini.generate_text(
|
| 16 |
+
yt_res.get_local_file_path(),
|
| 17 |
+
yt_res.id,
|
| 18 |
+
yt_res.uploader,
|
| 19 |
+
):
|
| 20 |
+
yt_transcript.transcript += chunk
|
| 21 |
+
yield yt_transcript.model_outputs()
|
| 22 |
+
|
| 23 |
+
|
| 24 |
+
demo = gr.Interface(
|
| 25 |
+
fn=summarize_audio,
|
| 26 |
+
inputs=gr.Textbox(label="YouTube Link"),
|
| 27 |
+
outputs=[
|
| 28 |
+
gr.Textbox(lines=1, label="ID"),
|
| 29 |
+
# title
|
| 30 |
+
gr.Textbox(lines=1, label="Title"),
|
| 31 |
+
# thumbnail_link
|
| 32 |
+
gr.Image(label="Thumbnail Link", type='filepath', show_download_button=True),
|
| 33 |
+
# uploader
|
| 34 |
+
gr.Textbox(lines=1, label="Uploader"),
|
| 35 |
+
# transcript
|
| 36 |
+
gr.Markdown(lines=5, label="Transcript", show_copy_button=True),
|
| 37 |
+
],
|
| 38 |
+
title="Summarize Audio",
|
| 39 |
+
description="Summarize the content of an audio from a YouTube link.",
|
| 40 |
+
flagging_mode="never",
|
| 41 |
+
api_name="summarize",
|
| 42 |
+
)
|
| 43 |
+
|
| 44 |
+
|
| 45 |
+
def auth_handler(usr, pwd) -> bool:
|
| 46 |
+
username = os.environ.get("USERNAME")
|
| 47 |
+
password = os.environ.get("PASSWORD")
|
| 48 |
+
return usr == username and pwd == password
|
| 49 |
+
|
| 50 |
+
|
| 51 |
+
demo.launch(auth=auth_handler, pwa=True)
|
extract_audio.py
ADDED
|
@@ -0,0 +1,101 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from logging import getLogger
|
| 2 |
+
from typing import Generator, Optional
|
| 3 |
+
from models import YTResult
|
| 4 |
+
import yt_dlp
|
| 5 |
+
|
| 6 |
+
logger = getLogger(__name__)
|
| 7 |
+
|
| 8 |
+
|
| 9 |
+
def __get_audio(result: YTResult) -> Optional[YTResult]:
|
| 10 |
+
try:
|
| 11 |
+
with open(f"output/{result.id}.m4a", "rb") as f:
|
| 12 |
+
return result
|
| 13 |
+
except FileNotFoundError:
|
| 14 |
+
return None
|
| 15 |
+
|
| 16 |
+
|
| 17 |
+
def __my_hook(d):
|
| 18 |
+
if d["status"] == "error":
|
| 19 |
+
logger.info("Error downloading video")
|
| 20 |
+
elif d["status"] == "downloading":
|
| 21 |
+
downloaded_bytes = d.get("downloaded_bytes", 0)
|
| 22 |
+
total_bytes_estimate = d.get("total_bytes_estimate", 1)
|
| 23 |
+
percent = downloaded_bytes / total_bytes_estimate * 100
|
| 24 |
+
logger.info(f"Downloaded {percent:.2f}%")
|
| 25 |
+
elif d["status"] == "finished":
|
| 26 |
+
logger.info("Download finished")
|
| 27 |
+
|
| 28 |
+
|
| 29 |
+
def __get_options():
|
| 30 |
+
return {
|
| 31 |
+
"format": "m4a/bestaudio/best",
|
| 32 |
+
"outtmpl": "output/%(id)s.%(ext)s",
|
| 33 |
+
"progress_hooks": [__my_hook],
|
| 34 |
+
}
|
| 35 |
+
|
| 36 |
+
|
| 37 |
+
def extract_info(link: str) -> YTResult:
|
| 38 |
+
with yt_dlp.YoutubeDL(__get_options()) as ydl:
|
| 39 |
+
info = ydl.extract_info(link, download=False)
|
| 40 |
+
info_dict = ydl.sanitize_info(info)
|
| 41 |
+
|
| 42 |
+
return YTResult(
|
| 43 |
+
id=info_dict["id"],
|
| 44 |
+
title=info_dict["title"],
|
| 45 |
+
thumbnail_link=info_dict["thumbnail"],
|
| 46 |
+
uploader=info_dict["uploader"],
|
| 47 |
+
)
|
| 48 |
+
|
| 49 |
+
|
| 50 |
+
def simple_download_audio_from_youtube(link: str) -> YTResult:
|
| 51 |
+
with yt_dlp.YoutubeDL(__get_options()) as ydl:
|
| 52 |
+
info = ydl.extract_info(link, download=False)
|
| 53 |
+
info_dict = ydl.sanitize_info(info)
|
| 54 |
+
|
| 55 |
+
res = YTResult(
|
| 56 |
+
id=info_dict["id"],
|
| 57 |
+
title=info_dict["title"],
|
| 58 |
+
thumbnail_link=info_dict["thumbnail"],
|
| 59 |
+
uploader=info_dict["uploader"],
|
| 60 |
+
)
|
| 61 |
+
|
| 62 |
+
local_link = __get_audio(res)
|
| 63 |
+
if local_link:
|
| 64 |
+
return res
|
| 65 |
+
|
| 66 |
+
error_code = ydl.download([link])
|
| 67 |
+
|
| 68 |
+
res.error_code = error_code
|
| 69 |
+
return res
|
| 70 |
+
|
| 71 |
+
|
| 72 |
+
def download_audio_from_youtube(link: str) -> Generator[YTResult, None, None]:
|
| 73 |
+
with yt_dlp.YoutubeDL(__get_options()) as ydl:
|
| 74 |
+
info = ydl.extract_info(link, download=False)
|
| 75 |
+
info_dict = ydl.sanitize_info(info)
|
| 76 |
+
|
| 77 |
+
# Yield video metadata
|
| 78 |
+
yield YTResult(
|
| 79 |
+
id=info_dict["id"],
|
| 80 |
+
title=info_dict["title"],
|
| 81 |
+
thumbnail_link=info_dict["thumbnail"],
|
| 82 |
+
uploader=info_dict["uploader"],
|
| 83 |
+
)
|
| 84 |
+
|
| 85 |
+
# Start downloading and yield progress updates
|
| 86 |
+
error_code = ydl.download([link])
|
| 87 |
+
|
| 88 |
+
yield YTResult(
|
| 89 |
+
id=info_dict["id"],
|
| 90 |
+
title=info_dict["title"],
|
| 91 |
+
thumbnail_link=info_dict["thumbnail"],
|
| 92 |
+
uploader=info_dict["uploader"],
|
| 93 |
+
error_code=error_code,
|
| 94 |
+
)
|
| 95 |
+
|
| 96 |
+
|
| 97 |
+
# Example Usage
|
| 98 |
+
if __name__ == "__main__":
|
| 99 |
+
yt_link = "https://www.youtube.com/watch?v=vf7bI5nZyi8"
|
| 100 |
+
for update in download_audio_from_youtube(yt_link):
|
| 101 |
+
logger.info(f"Video Info: {update}")
|
models.py
ADDED
|
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from typing import Optional
|
| 2 |
+
|
| 3 |
+
from pydantic import BaseModel, Field
|
| 4 |
+
|
| 5 |
+
|
| 6 |
+
class YTRequest(BaseModel):
|
| 7 |
+
yt_link: str = Field(description="The YouTube video link to be processed")
|
| 8 |
+
|
| 9 |
+
|
| 10 |
+
class YTResult(BaseModel):
|
| 11 |
+
id: str = Field(description="The YouTube video ID")
|
| 12 |
+
title: str = Field(description="The YouTube video title")
|
| 13 |
+
thumbnail_link: str = Field(description="The YouTube video thumbnail link")
|
| 14 |
+
uploader: str = Field(description="The YouTube video uploader")
|
| 15 |
+
error_code: Optional[int] = Field(description="The error code if any", default=None)
|
| 16 |
+
|
| 17 |
+
def get_local_file_path(self) -> str:
|
| 18 |
+
return f"output/{self.id}.m4a"
|
| 19 |
+
|
| 20 |
+
|
| 21 |
+
class YTResultWithTranscript(YTResult):
|
| 22 |
+
transcript: str = Field(description="The YouTube video transcript")
|
| 23 |
+
|
| 24 |
+
def model_outputs(self) -> list:
|
| 25 |
+
return [
|
| 26 |
+
self.id,
|
| 27 |
+
self.title,
|
| 28 |
+
self.thumbnail_link,
|
| 29 |
+
self.uploader,
|
| 30 |
+
self.transcript,
|
| 31 |
+
]
|
requirements.txt
ADDED
|
@@ -0,0 +1,86 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
aiofiles==23.2.1
|
| 2 |
+
annotated-types==0.7.0
|
| 3 |
+
anthropic==0.43.1
|
| 4 |
+
anyio==4.8.0
|
| 5 |
+
cachetools==5.5.0
|
| 6 |
+
certifi==2024.12.14
|
| 7 |
+
charset-normalizer==3.4.1
|
| 8 |
+
click==8.1.8
|
| 9 |
+
colorama==0.4.6
|
| 10 |
+
distro==1.9.0
|
| 11 |
+
eval_type_backport==0.2.2
|
| 12 |
+
fastapi==0.115.6
|
| 13 |
+
ffmpy==0.5.0
|
| 14 |
+
filelock==3.16.1
|
| 15 |
+
fsspec==2024.12.0
|
| 16 |
+
google-ai-generativelanguage==0.6.10
|
| 17 |
+
google-api-core==2.24.0
|
| 18 |
+
google-api-python-client==2.157.0
|
| 19 |
+
google-auth==2.37.0
|
| 20 |
+
google-auth-httplib2==0.2.0
|
| 21 |
+
google-generativeai==0.8.3
|
| 22 |
+
googleapis-common-protos==1.66.0
|
| 23 |
+
gradio==5.12.0
|
| 24 |
+
gradio_client==1.5.4
|
| 25 |
+
griffe==1.5.5
|
| 26 |
+
groq==0.15.0
|
| 27 |
+
grpcio==1.69.0
|
| 28 |
+
grpcio-status==1.69.0
|
| 29 |
+
h11==0.14.0
|
| 30 |
+
httpcore==1.0.7
|
| 31 |
+
httplib2==0.22.0
|
| 32 |
+
httpx==0.28.1
|
| 33 |
+
huggingface-hub==0.27.1
|
| 34 |
+
idna==3.10
|
| 35 |
+
Jinja2==3.1.5
|
| 36 |
+
jiter==0.8.2
|
| 37 |
+
jsonpath-python==1.0.6
|
| 38 |
+
logfire-api==3.2.0
|
| 39 |
+
markdown-it-py==3.0.0
|
| 40 |
+
MarkupSafe==2.1.5
|
| 41 |
+
mdurl==0.1.2
|
| 42 |
+
mistralai==1.4.0
|
| 43 |
+
mypy-extensions==1.0.0
|
| 44 |
+
numpy==2.2.2
|
| 45 |
+
openai==1.59.9
|
| 46 |
+
orjson==3.10.15
|
| 47 |
+
packaging==24.2
|
| 48 |
+
pandas==2.2.3
|
| 49 |
+
pillow==11.1.0
|
| 50 |
+
proto-plus==1.25.0
|
| 51 |
+
protobuf==5.29.3
|
| 52 |
+
pyasn1==0.6.1
|
| 53 |
+
pyasn1_modules==0.4.1
|
| 54 |
+
pydantic==2.10.5
|
| 55 |
+
pydantic-ai-slim==0.0.19
|
| 56 |
+
pydantic-graph==0.0.19
|
| 57 |
+
pydantic_core==2.27.2
|
| 58 |
+
pydub==0.25.1
|
| 59 |
+
Pygments==2.19.1
|
| 60 |
+
pyparsing==3.2.1
|
| 61 |
+
python-dateutil==2.9.0.post0
|
| 62 |
+
python-dotenv==1.0.1
|
| 63 |
+
python-multipart==0.0.20
|
| 64 |
+
pytz==2024.2
|
| 65 |
+
PyYAML==6.0.2
|
| 66 |
+
requests==2.32.3
|
| 67 |
+
rich==13.9.4
|
| 68 |
+
rsa==4.9
|
| 69 |
+
ruff==0.9.2
|
| 70 |
+
safehttpx==0.1.6
|
| 71 |
+
semantic-version==2.10.0
|
| 72 |
+
shellingham==1.5.4
|
| 73 |
+
six==1.17.0
|
| 74 |
+
sniffio==1.3.1
|
| 75 |
+
starlette==0.41.3
|
| 76 |
+
tomlkit==0.13.2
|
| 77 |
+
tqdm==4.67.1
|
| 78 |
+
typer==0.15.1
|
| 79 |
+
typing-inspect==0.9.0
|
| 80 |
+
typing_extensions==4.12.2
|
| 81 |
+
tzdata==2024.2
|
| 82 |
+
uritemplate==4.1.1
|
| 83 |
+
urllib3==2.3.0
|
| 84 |
+
uvicorn==0.34.0
|
| 85 |
+
websockets==14.2
|
| 86 |
+
yt-dlp==2025.1.15
|