Spaces:

c2p-cmd
/

YouTubeSummarizer

Sleeping

App Files Files Community

Sharan Thakur commited on Jan 21, 2025

Commit

f8c4214

1 Parent(s): 6e522f0

Add initial implementation of YouTube audio summarizer with Gemini API integration

Browse files

Files changed (7) hide show

.gitignore +3 -0
README.md +1 -3
ai_client.py +80 -0
app.py +51 -0
extract_audio.py +101 -0
models.py +31 -0
requirements.txt +86 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,3 @@

+venv/
+output/
+*.m4a

README.md CHANGED Viewed

@@ -7,7 +7,5 @@ sdk: gradio
 sdk_version: 5.12.0
 app_file: app.py
 pinned: false
-short_description: A summarizer for youtube videos using GeminiAPI
 ---
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 sdk_version: 5.12.0
 app_file: app.py
 pinned: false
+short_description: YouTube Summarizer is a tool that helps you quickly get the gist of YouTube videos by providing concise summaries.
 ---

ai_client.py ADDED Viewed

	@@ -0,0 +1,80 @@

+from dotenv import load_dotenv
+import os
+import google.generativeai as genai
+from typing import Generator
+from logging import getLogger
+logger = getLogger(__name__)
+class Gemini:
+    def __init__(self):
+        load_dotenv()
+        api_key = os.getenv("GEMINI_API_KEY")
+        if api_key is None:
+            raise ValueError("GEMINI_API_KEY is not set in the environment variables")
+        genai.configure(api_key=api_key)
+        # Create the model
+        self.generation_config = {
+            "temperature": 1,
+            "top_p": 0.95,
+            "top_k": 64,
+            "max_output_tokens": 8192,
+            "response_mime_type": "text/plain",
+        }
+        self.model = genai.GenerativeModel(
+            model_name="gemini-1.5-pro",
+            generation_config=self.generation_config,
+        )
+    def generate_text(
+        self, local_file: str, id: str, uploader: str
+    ) -> Generator[str, None, None]:
+        responses = self.model.generate_content(
+            [
+                {
+                    "role": "user",
+                    "parts": [
+                        self.__upload_to_gemini(
+                            id=id, path=local_file, mime_type="audio/m4a"
+                        ),
+                        f"""
+Summarize the audio's content to sound like a podcast.\n
+Add fun facts to the summary too.\n
+The uploader of the audio is the following: {uploader}\n
+Add a nice title to the summary too.\n
+""",
+                    ],
+                },
+            ],
+            stream=True,
+        )
+        for response in responses:
+            yield response.text
+    def __upload_to_gemini(self, id: str, path: str, mime_type=None) -> str:
+        """Uploads the given file to Gemini.
+        See https://ai.google.dev/gemini-api/docs/prompting_with_media
+        """
+        file = genai.upload_file(
+            path,
+            mime_type=mime_type,
+        )
+        logger.info(f"Uploaded file '{file.display_name}' as: {file.uri}")
+        return file
+if __name__ == "__main__":
+    from extract_audio import simple_download_audio_from_youtube
+    gemini = Gemini()
+    yt_link = input("Enter YouTube link: ")
+    yt_res = simple_download_audio_from_youtube(yt_link)
+    for chunk in gemini.generate_text(
+        yt_res.get_local_file_path(), yt_res.id, yt_res.uploader
+    ):
+        print(chunk)

app.py ADDED Viewed

	@@ -0,0 +1,51 @@

+from ai_client import Gemini
+from extract_audio import simple_download_audio_from_youtube
+from models import YTResultWithTranscript
+import gradio as gr
+import os
+gemini = Gemini()
+def summarize_audio(youtube_link: str):
+    yt_res = simple_download_audio_from_youtube(youtube_link)
+    yt_transcript = YTResultWithTranscript(
+        **yt_res.model_dump(), transcript="This is a transcript of the audio."
+    )
+    for chunk in gemini.generate_text(
+        yt_res.get_local_file_path(),
+        yt_res.id,
+        yt_res.uploader,
+    ):
+        yt_transcript.transcript += chunk
+        yield yt_transcript.model_outputs()
+demo = gr.Interface(
+    fn=summarize_audio,
+    inputs=gr.Textbox(label="YouTube Link"),
+    outputs=[
+        gr.Textbox(lines=1, label="ID"),
+        # title
+        gr.Textbox(lines=1, label="Title"),
+        # thumbnail_link
+        gr.Image(label="Thumbnail Link", type='filepath', show_download_button=True),
+        # uploader
+        gr.Textbox(lines=1, label="Uploader"),
+        # transcript
+        gr.Markdown(lines=5, label="Transcript", show_copy_button=True),
+    ],
+    title="Summarize Audio",
+    description="Summarize the content of an audio from a YouTube link.",
+    flagging_mode="never",
+    api_name="summarize",
+)
+def auth_handler(usr, pwd) -> bool:
+    username = os.environ.get("USERNAME")
+    password = os.environ.get("PASSWORD")
+    return usr == username and pwd == password
+demo.launch(auth=auth_handler, pwa=True)

extract_audio.py ADDED Viewed

	@@ -0,0 +1,101 @@

+from logging import getLogger
+from typing import Generator, Optional
+from models import YTResult
+import yt_dlp
+logger = getLogger(__name__)
+def __get_audio(result: YTResult) -> Optional[YTResult]:
+    try:
+        with open(f"output/{result.id}.m4a", "rb") as f:
+            return result
+    except FileNotFoundError:
+        return None
+def __my_hook(d):
+    if d["status"] == "error":
+        logger.info("Error downloading video")
+    elif d["status"] == "downloading":
+        downloaded_bytes = d.get("downloaded_bytes", 0)
+        total_bytes_estimate = d.get("total_bytes_estimate", 1)
+        percent = downloaded_bytes / total_bytes_estimate * 100
+        logger.info(f"Downloaded {percent:.2f}%")
+    elif d["status"] == "finished":
+        logger.info("Download finished")
+def __get_options():
+    return {
+        "format": "m4a/bestaudio/best",
+        "outtmpl": "output/%(id)s.%(ext)s",
+        "progress_hooks": [__my_hook],
+    }
+def extract_info(link: str) -> YTResult:
+    with yt_dlp.YoutubeDL(__get_options()) as ydl:
+        info = ydl.extract_info(link, download=False)
+        info_dict = ydl.sanitize_info(info)
+        return YTResult(
+            id=info_dict["id"],
+            title=info_dict["title"],
+            thumbnail_link=info_dict["thumbnail"],
+            uploader=info_dict["uploader"],
+        )
+def simple_download_audio_from_youtube(link: str) -> YTResult:
+    with yt_dlp.YoutubeDL(__get_options()) as ydl:
+        info = ydl.extract_info(link, download=False)
+        info_dict = ydl.sanitize_info(info)
+        res = YTResult(
+            id=info_dict["id"],
+            title=info_dict["title"],
+            thumbnail_link=info_dict["thumbnail"],
+            uploader=info_dict["uploader"],
+        )
+        local_link = __get_audio(res)
+        if local_link:
+            return res
+        error_code = ydl.download([link])
+        res.error_code = error_code
+        return res
+def download_audio_from_youtube(link: str) -> Generator[YTResult, None, None]:
+    with yt_dlp.YoutubeDL(__get_options()) as ydl:
+        info = ydl.extract_info(link, download=False)
+        info_dict = ydl.sanitize_info(info)
+        # Yield video metadata
+        yield YTResult(
+            id=info_dict["id"],
+            title=info_dict["title"],
+            thumbnail_link=info_dict["thumbnail"],
+            uploader=info_dict["uploader"],
+        )
+        # Start downloading and yield progress updates
+        error_code = ydl.download([link])
+        yield YTResult(
+            id=info_dict["id"],
+            title=info_dict["title"],
+            thumbnail_link=info_dict["thumbnail"],
+            uploader=info_dict["uploader"],
+            error_code=error_code,
+        )
+# Example Usage
+if __name__ == "__main__":
+    yt_link = "https://www.youtube.com/watch?v=vf7bI5nZyi8"
+    for update in download_audio_from_youtube(yt_link):
+        logger.info(f"Video Info: {update}")

models.py ADDED Viewed

	@@ -0,0 +1,31 @@

+from typing import Optional
+from pydantic import BaseModel, Field
+class YTRequest(BaseModel):
+    yt_link: str = Field(description="The YouTube video link to be processed")
+class YTResult(BaseModel):
+    id: str = Field(description="The YouTube video ID")
+    title: str = Field(description="The YouTube video title")
+    thumbnail_link: str = Field(description="The YouTube video thumbnail link")
+    uploader: str = Field(description="The YouTube video uploader")
+    error_code: Optional[int] = Field(description="The error code if any", default=None)
+    def get_local_file_path(self) -> str:
+        return f"output/{self.id}.m4a"
+class YTResultWithTranscript(YTResult):
+    transcript: str = Field(description="The YouTube video transcript")
+    def model_outputs(self) -> list:
+        return [
+            self.id,
+            self.title,
+            self.thumbnail_link,
+            self.uploader,
+            self.transcript,
+        ]

requirements.txt ADDED Viewed

	@@ -0,0 +1,86 @@

+aiofiles==23.2.1
+annotated-types==0.7.0
+anthropic==0.43.1
+anyio==4.8.0
+cachetools==5.5.0
+certifi==2024.12.14
+charset-normalizer==3.4.1
+click==8.1.8
+colorama==0.4.6
+distro==1.9.0
+eval_type_backport==0.2.2
+fastapi==0.115.6
+ffmpy==0.5.0
+filelock==3.16.1
+fsspec==2024.12.0
+google-ai-generativelanguage==0.6.10
+google-api-core==2.24.0
+google-api-python-client==2.157.0
+google-auth==2.37.0
+google-auth-httplib2==0.2.0
+google-generativeai==0.8.3
+googleapis-common-protos==1.66.0
+gradio==5.12.0
+gradio_client==1.5.4
+griffe==1.5.5
+groq==0.15.0
+grpcio==1.69.0
+grpcio-status==1.69.0
+h11==0.14.0
+httpcore==1.0.7
+httplib2==0.22.0
+httpx==0.28.1
+huggingface-hub==0.27.1
+idna==3.10
+Jinja2==3.1.5
+jiter==0.8.2
+jsonpath-python==1.0.6
+logfire-api==3.2.0
+markdown-it-py==3.0.0
+MarkupSafe==2.1.5
+mdurl==0.1.2
+mistralai==1.4.0
+mypy-extensions==1.0.0
+numpy==2.2.2
+openai==1.59.9
+orjson==3.10.15
+packaging==24.2
+pandas==2.2.3
+pillow==11.1.0
+proto-plus==1.25.0
+protobuf==5.29.3
+pyasn1==0.6.1
+pyasn1_modules==0.4.1
+pydantic==2.10.5
+pydantic-ai-slim==0.0.19
+pydantic-graph==0.0.19
+pydantic_core==2.27.2
+pydub==0.25.1
+Pygments==2.19.1
+pyparsing==3.2.1
+python-dateutil==2.9.0.post0
+python-dotenv==1.0.1
+python-multipart==0.0.20
+pytz==2024.2
+PyYAML==6.0.2
+requests==2.32.3
+rich==13.9.4
+rsa==4.9
+ruff==0.9.2
+safehttpx==0.1.6
+semantic-version==2.10.0
+shellingham==1.5.4
+six==1.17.0
+sniffio==1.3.1
+starlette==0.41.3
+tomlkit==0.13.2
+tqdm==4.67.1
+typer==0.15.1
+typing-inspect==0.9.0
+typing_extensions==4.12.2
+tzdata==2024.2
+uritemplate==4.1.1
+urllib3==2.3.0
+uvicorn==0.34.0
+websockets==14.2
+yt-dlp==2025.1.15