| | import os |
| | import traceback |
| |
|
| | import requests |
| | import yt_dlp |
| | from bs4 import BeautifulSoup |
| | from download_video import downlaod_video_from_url |
| | from pytube import YouTube |
| |
|
| |
|
| | def download_youtube_video(url, download_path="../data/"): |
| | try: |
| | yt = YouTube(url) |
| |
|
| | |
| | video_stream = ( |
| | yt.streams.filter(progressive=True, file_extension="mp4") |
| | .order_by("resolution") |
| | .desc() |
| | .first() |
| | ) |
| |
|
| | |
| | if video_stream: |
| | video_stream.download(output_path=download_path) |
| | print(f"Video downloaded successfully to {download_path}") |
| | else: |
| | print("No suitable video stream found") |
| | except Exception as e: |
| | print(f"Error in downloading YouTube video: {e}") |
| |
|
| |
|
| | def download_audio(url, download_path="../data/"): |
| | """ |
| | Download audio from YouTube and convert to MP3 format. |
| | |
| | Args: |
| | url: YouTube video URL |
| | download_path: Path where the MP3 file will be saved |
| | """ |
| | ydl_opts = { |
| | "outtmpl": f"{download_path}%(title)s.%(ext)s", |
| | "format": "bestaudio/best", |
| | "geo-bypass": True, |
| | "noplaylist": True, |
| | "force-ipv4": True, |
| | |
| | "postprocessors": [ |
| | { |
| | "key": "FFmpegExtractAudio", |
| | "preferredcodec": "mp3", |
| | "preferredquality": "192", |
| | } |
| | ], |
| | "headers": { |
| | "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36" |
| | }, |
| | } |
| |
|
| | try: |
| | with yt_dlp.YoutubeDL(ydl_opts) as ydl: |
| | ydl.download([url]) |
| | print(f"Audio downloaded and converted to MP3 successfully at {download_path}") |
| | except Exception as e: |
| | print(f"An error occurred: {e}") |
| |
|
| |
|
| | |
| | def download_file(url, download_path="../data/"): |
| | try: |
| | response = requests.get(url, stream=True) |
| | response.raise_for_status() |
| | filename = os.path.join(download_path, url.split("/")[-1]) |
| |
|
| | with open(filename, "wb") as file: |
| | for chunk in response.iter_content(chunk_size=1024): |
| | if chunk: |
| | file.write(chunk) |
| | print(f"File downloaded successfully to {filename}") |
| | except Exception as e: |
| | print(f"An error occurred: {e}") |
| |
|
| |
|
| | |
| | def download_text_or_webpage(url, download_path="../data/", is_text=False): |
| | try: |
| | response = requests.get(url) |
| | response.raise_for_status() |
| |
|
| | if is_text: |
| | filename = os.path.join(download_path, url.split("/")[-1] + ".txt") |
| | with open(filename, "w") as file: |
| | file.write(response.text) |
| | print(f"Text file downloaded successfully to {filename}") |
| | else: |
| | soup = BeautifulSoup(response.text, "html.parser") |
| | filename = os.path.join(download_path, url.split("/")[-1] + ".html") |
| | with open(filename, "w", encoding="utf-8") as file: |
| | file.write(soup.prettify()) |
| | print(f"Webpage downloaded successfully to {filename}") |
| |
|
| | except Exception as e: |
| | print(f"An error occurred: {e}") |
| |
|
| |
|
| | def main(): |
| | |
| | |
| | |
| | |
| | |
| | url_audio = "https://www.youtube.com/watch?v=8OHYynw7Yh4" |
| | download_audio(url_audio) |
| |
|
| | |
| | |
| |
|
| | |
| | |
| |
|
| | |
| | |
| |
|
| |
|
| | if __name__ == "__main__": |
| | main() |
| |
|