Spaces:
Paused
Paused
| from bs4 import BeautifulSoup | |
| import json, os | |
| from others import * | |
| import cloudscraper | |
| scraper = cloudscraper.create_scraper() | |
| def get_info_rule34(link): | |
| response = scraper.get(link) | |
| soup = BeautifulSoup(response.text, 'html.parser') | |
| # Mencari judul video di elemen dengan class title_video | |
| title = soup.find(class_="title_video") | |
| if title: | |
| video_title = title.text.strip().replace('/', ' -') | |
| idx = video_title.find(']') | |
| if idx != -1 and idx + 1 < len(video_title) and video_title[idx + 1].isalpha(): | |
| video_title = video_title[:idx + 1] + ' ' + video_title[idx + 1:] | |
| video_title = video_title.title() | |
| print(f"Judul Video: {video_title}") | |
| else: | |
| print("Judul Video tidak ditemukan") | |
| # Mencari nama artist di elemen dengan class col | |
| cols = soup.find_all(class_="col") # Menggunakan find_all untuk mendapatkan semua elemen dengan class col | |
| artists = [] | |
| if cols: | |
| for col in cols: # Melakukan iterasi untuk setiap elemen col | |
| # Mencari elemen dengan class label yang memiliki teks yang cocok dengan "Artist" | |
| label = col.find(class_="label", string="Artist") | |
| if label: | |
| # Mencari semua elemen dengan class item btn_link yang merupakan saudara dari label | |
| items = label.find_next_siblings(class_="item btn_link") | |
| for item in items: | |
| # Mencari elemen dengan class name yang merupakan anak dari item | |
| name = item.find(class_="name") | |
| if name: | |
| if "VA" not in name.text.strip(): | |
| artists.append(name.text.strip()) | |
| break # Keluar dari loop jika sudah menemukan label artist | |
| if artists: | |
| artists.sort() # Mengurutkan nama-nama artist | |
| artist_string = ", ".join(artists) # Menggabungkan nama-nama artist dengan koma | |
| print(f"Nama Artist: {artist_string}") | |
| else: | |
| artist_string = "Unknown Artist" | |
| print("Nama Artist tidak ditemukan") | |
| else: | |
| print("Elemen col tidak ditemukan") | |
| # Mencari thumbnailUrl di script type="application/ld+json" | |
| script = soup.find("script", type="application/ld+json") | |
| if script: | |
| data = json.loads(script.string) | |
| if "thumbnailUrl" in data: | |
| thumbnail_url = data['thumbnailUrl'] | |
| print(f"Thumbnail URL: {thumbnail_url}") | |
| else: | |
| print("Tidak ditemukan thumbnail URL") | |
| else: | |
| print("Tidak ditemukan elemen script dengan type application/ld+json") | |
| # Mencari resolusi yang tersedia | |
| resolutions = [] | |
| for a in soup.find_all('a'): | |
| if 'MP4' in a.text and 'p' in a.text: | |
| resolutions.append(a.text.split()[1]) | |
| if resolutions: | |
| print("Resolusi yang tersedia: " + ", ".join(resolutions)) | |
| else: | |
| print("Tidak ditemukan resolusi yang tersedia") | |
| # Mencari kualitas video 720p atau 480p | |
| video_quality_elements = soup.find_all("a", class_="tag_item") | |
| video_quality_720p = None | |
| video_quality_480p = None | |
| for element in video_quality_elements: | |
| if "720p" in element.text: | |
| video_quality_720p = element['href'] | |
| elif "480p" in element.text: | |
| video_quality_480p = element['href'] | |
| if video_quality_720p: | |
| print(f"Video kualitas 720p: {video_quality_720p}") | |
| video_url = video_quality_720p | |
| elif video_quality_480p: | |
| print(f"Video kualitas 480p: {video_quality_480p}") | |
| video_url = video_quality_480p | |
| else: | |
| print("Tidak ditemukan video kualitas 720p atau 480p") | |
| video_url = None | |
| return video_title, artist_string, video_url, thumbnail_url | |
| def rule34(link): | |
| video_info = "" | |
| video_title, artist, video_url, thumbnail_url = get_info_rule34(link) | |
| judul = f"{artist} - {video_title}" | |
| directory = f"/home/user/app/Hasil Download/Rule34/{artist}" | |
| if not os.path.exists(directory): | |
| os.makedirs(directory) | |
| # Menentukan nama file thumbnail | |
| thumbnail_file = download_file(thumbnail_url, judul, directory) | |
| video_file = download_file(video_url, judul, directory) | |
| video_info = f"Nama Channel: {artist}\n" | |
| video_info += f"Judul Video: {video_title}\n" | |
| return video_file, judul, video_info, thumbnail_file |