Spaces:
Paused
Paused
| import requests | |
| from bs4 import BeautifulSoup | |
| import os | |
| import urllib.request | |
| import json | |
| import time | |
| import random | |
| import tkinter as tk | |
| from tkinter import ttk, messagebox, filedialog | |
| import shutil | |
| import threading | |
| # Hàm lấy gợi ý tìm kiếm từ Google | |
| def get_google_suggestions(query): | |
| url = f"http://suggestqueries.google.com/complete/search?client=firefox&q={query}" | |
| headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3"} | |
| try: | |
| response = requests.get(url, headers=headers, timeout=5) | |
| return json.loads(response.text)[1] | |
| except: | |
| return [] | |
| # Hàm tải ảnh | |
| def download_images(search_term, num_images_per_term, save_folder, status_callback): | |
| if not os.path.exists(save_folder): | |
| os.makedirs(save_folder) | |
| search_url = f"https://www.google.com/search?q={search_term}+free&tbm=isch" | |
| headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3"} | |
| try: | |
| response = requests.get(search_url, headers=headers, timeout=10) | |
| response.raise_for_status() | |
| except: | |
| status_callback(f"Lỗi truy cập {search_term}") | |
| return 0 | |
| soup = BeautifulSoup(response.text, 'html.parser') | |
| img_tags = soup.find_all('img') | |
| count = 0 | |
| for i, img in enumerate(img_tags): | |
| if count >= num_images_per_term: | |
| break | |
| try: | |
| img_url = img.get('src') | |
| if img_url and img_url.startswith('http'): | |
| file_name = f"{save_folder}/{search_term.replace(' ', '_')}_{count}.jpg" | |
| urllib.request.urlretrieve(img_url, file_name) | |
| status_callback(f"Đã tải: {file_name}") | |
| count += 1 | |
| time.sleep(random.uniform(1, 3)) | |
| except Exception as e: | |
| status_callback(f"Lỗi ảnh {i} ({search_term}): {str(e)}") | |
| continue | |
| return count | |
| # Hàm nén thư mục thành file zip | |
| def zip_folder(folder_path, output_zip): | |
| try: | |
| shutil.make_archive(output_zip, 'zip', folder_path) | |
| return True | |
| except Exception as e: | |
| return str(e) | |
| # Hàm chính tải ảnh | |
| def start_download(initial_query, target_images, save_folder, max_per_term, status_text): | |
| def download_thread(): | |
| total_downloaded = 0 | |
| current_query = initial_query | |
| used_queries = set() | |
| while total_downloaded < target_images: | |
| suggestions = get_google_suggestions(current_query) | |
| if not suggestions: | |
| status_text(f"Hết gợi ý, dừng lại.") | |
| break | |
| available_suggestions = [s for s in suggestions if s not in used_queries] | |
| if not available_suggestions: | |
| status_text("Hết gợi ý mới, dừng lại.") | |
| break | |
| current_query = random.choice(available_suggestions) | |
| used_queries.add(current_query) | |
| remaining = target_images - total_downloaded | |
| images_to_download = min(max_per_term, remaining) | |
| status_text(f"Tìm kiếm: {current_query}") | |
| downloaded = download_images(current_query, images_to_download, save_folder, status_text) | |
| total_downloaded += downloaded | |
| status_text(f"Tổng: {total_downloaded}/{target_images}") | |
| time.sleep(random.uniform(2, 5)) | |
| status_text(f"Hoàn tất! Đã tải {total_downloaded} ảnh.") | |
| # Nén thư mục | |
| status_text("Đang nén thư mục...") | |
| zip_result = zip_folder(save_folder, os.path.join(os.path.dirname(save_folder), "downloaded_images")) | |
| if zip_result is True: | |
| status_text("Đã nén thành công: downloaded_images.zip") | |
| else: | |
| status_text(f"Lỗi khi nén: {zip_result}") | |
| thread = threading.Thread(target=download_thread) | |
| thread.start() | |
| # Giao diện GUI | |
| class ImageDownloaderApp: | |
| def __init__(self, root): | |
| self.root = root | |
| self.root.title("Image Downloader") | |
| self.root.geometry("600x500") | |
| # Nhãn và ô nhập từ khóa | |
| tk.Label(root, text="Từ khóa ban đầu:").pack(pady=5) | |
| self.query_entry = tk.Entry(root, width=40) | |
| self.query_entry.insert(0, "free images") | |
| self.query_entry.pack() | |
| # Số lượng ảnh mục tiêu | |
| tk.Label(root, text="Số lượng ảnh cần tải:").pack(pady=5) | |
| self.target_entry = tk.Entry(root, width=10) | |
| self.target_entry.insert(0, "10000") | |
| self.target_entry.pack() | |
| # Số ảnh tối đa mỗi từ khóa | |
| tk.Label(root, text="Số ảnh tối đa mỗi từ khóa:").pack(pady=5) | |
| self.max_per_term_entry = tk.Entry(root, width=10) | |
| self.max_per_term_entry.insert(0, "20") | |
| self.max_per_term_entry.pack() | |
| # Thư mục lưu | |
| tk.Label(root, text="Thư mục lưu:").pack(pady=5) | |
| self.folder_var = tk.StringVar(value="free_images") | |
| folder_frame = tk.Frame(root) | |
| folder_frame.pack() | |
| tk.Entry(folder_frame, textvariable=self.folder_var, width=30).pack(side=tk.LEFT) | |
| tk.Button(folder_frame, text="Chọn", command=self.choose_folder).pack(side=tk.LEFT, padx=5) | |
| # Nút bắt đầu | |
| self.start_button = tk.Button(root, text="Bắt đầu tải", command=self.start_download) | |
| self.start_button.pack(pady=10) | |
| # Khu vực hiển thị trạng thái | |
| tk.Label(root, text="Trạng thái:").pack(pady=5) | |
| self.status_text = tk.Text(root, height=15, width=70) | |
| self.status_text.pack() | |
| def choose_folder(self): | |
| folder = filedialog.askdirectory() | |
| if folder: | |
| self.folder_var.set(folder) | |
| def update_status(self, message): | |
| self.status_text.insert(tk.END, f"{message}\n") | |
| self.status_text.see(tk.END) | |
| self.root.update_idletasks() | |
| def start_download(self): | |
| try: | |
| initial_query = self.query_entry.get() | |
| target_images = int(self.target_entry.get()) | |
| max_per_term = int(self.max_per_term_entry.get()) | |
| save_folder = self.folder_var.get() | |
| if not initial_query or target_images <= 0 or max_per_term <= 0 or not save_folder: | |
| messagebox.showerror("Lỗi", "Vui lòng nhập đầy đủ và hợp lệ các thông tin!") | |
| return | |
| self.start_button.config(state="disabled") | |
| self.status_text.delete(1.0, tk.END) | |
| start_download(initial_query, target_images, save_folder, max_per_term, self.update_status) | |
| except ValueError: | |
| messagebox.showerror("Lỗi", "Số lượng phải là số nguyên!") | |
| finally: | |
| self.start_button.config(state="normal") | |
| # Chạy ứng dụng | |
| if __name__ == "__main__": | |
| root = tk.Tk() | |
| app = ImageDownloaderApp(root) | |
| root.mainloop() |