Spaces:
Paused
Paused
| import requests | |
| from bs4 import BeautifulSoup | |
| import os | |
| import urllib.request | |
| import json | |
| import time | |
| import random | |
| import gradio as gr | |
| import shutil | |
| # Hàm lấy gợi ý tìm kiếm từ Google | |
| def get_google_suggestions(query): | |
| url = f"http://suggestqueries.google.com/complete/search?client=firefox&q={query}" | |
| headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3"} | |
| try: | |
| response = requests.get(url, headers=headers, timeout=5) | |
| return json.loads(response.text)[1] | |
| except: | |
| return [] | |
| # Hàm tải ảnh | |
| def download_images(search_term, num_images_per_term, save_folder, status_callback): | |
| if not os.path.exists(save_folder): | |
| os.makedirs(save_folder) | |
| search_url = f"https://www.google.com/search?q={search_term}+free&tbm=isch" | |
| headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3"} | |
| try: | |
| response = requests.get(search_url, headers=headers, timeout=10) | |
| response.raise_for_status() | |
| except: | |
| status_callback(f"Lỗi truy cập {search_term}") | |
| return 0 | |
| soup = BeautifulSoup(response.text, 'html.parser') | |
| img_tags = soup.find_all('img') | |
| count = 0 | |
| for i, img in enumerate(img_tags): | |
| if count >= num_images_per_term: | |
| break | |
| try: | |
| img_url = img.get('src') | |
| if img_url and img_url.startswith('http'): | |
| file_name = f"{save_folder}/{search_term.replace(' ', '_')}_{count}.jpg" | |
| urllib.request.urlretrieve(img_url, file_name) | |
| status_callback(f"Đã tải: {file_name}") | |
| count += 1 | |
| time.sleep(random.uniform(1, 3)) | |
| except Exception as e: | |
| status_callback(f"Lỗi ảnh {i} ({search_term}): {str(e)}") | |
| continue | |
| return count | |
| # Hàm nén thư mục | |
| def zip_folder(folder_path): | |
| output_zip = os.path.join(os.path.dirname(folder_path), "downloaded_images") | |
| try: | |
| shutil.make_archive(output_zip, 'zip', folder_path) | |
| return output_zip + ".zip", "Đã nén thành công" | |
| except Exception as e: | |
| return None, f"Lỗi khi nén: {str(e)}" | |
| # Hàm chính tải ảnh | |
| def start_download(initial_query, target_images, max_per_term, save_folder, zip_files): | |
| status_log = [] | |
| def status_callback(message): | |
| status_log.append(message) | |
| total_downloaded = 0 | |
| current_query = initial_query | |
| used_queries = set() | |
| while total_downloaded < target_images: | |
| suggestions = get_google_suggestions(current_query) | |
| if not suggestions: | |
| status_callback("Hết gợi ý, dừng lại.") | |
| break | |
| available_suggestions = [s for s in suggestions if s not in used_queries] | |
| if not available_suggestions: | |
| status_callback("Hết gợi ý mới, dừng lại.") | |
| break | |
| current_query = random.choice(available_suggestions) | |
| used_queries.add(current_query) | |
| remaining = target_images - total_downloaded | |
| images_to_download = min(max_per_term, remaining) | |
| status_callback(f"Tìm kiếm: {current_query}") | |
| downloaded = download_images(current_query, images_to_download, save_folder, status_callback) | |
| total_downloaded += downloaded | |
| status_callback(f"Tổng: {total_downloaded}/{target_images}") | |
| time.sleep(random.uniform(2, 5)) | |
| status_callback(f"Hoàn tất! Đã tải {total_downloaded} ảnh.") | |
| zip_file_path = None | |
| if zip_files: | |
| status_callback("Đang nén thư mục...") | |
| zip_file_path, zip_message = zip_folder(save_folder) | |
| status_callback(zip_message) | |
| return "\n".join(status_log), zip_file_path | |
| # Giao diện Gradio | |
| def create_interface(): | |
| css = """ | |
| .container { | |
| background: linear-gradient(135deg, #e0eafc, #cfdef3); | |
| padding: 20px; | |
| border-radius: 10px; | |
| box-shadow: 0 4px 8px rgba(0,0,0,0.1); | |
| } | |
| h1 { | |
| color: #2c3e50; | |
| text-align: center; | |
| font-family: Arial, sans-serif; | |
| } | |
| .input-box { | |
| background: white; | |
| border-radius: 5px; | |
| padding: 10px; | |
| margin: 5px 0; | |
| } | |
| .status-box { | |
| background: #f9f9f9; | |
| border: 1px solid #ddd; | |
| border-radius: 5px; | |
| padding: 10px; | |
| height: 300px; | |
| overflow-y: auto; | |
| } | |
| .button { | |
| background: #3498db !important; | |
| color: white !important; | |
| border-radius: 5px !important; | |
| padding: 10px 20px !important; | |
| } | |
| .button:hover { | |
| background: #2980b9 !important; | |
| } | |
| """ | |
| with gr.Blocks(css=css, title="Image Downloader") as demo: | |
| gr.Markdown("<h1>📸 Image Downloader</h1>") | |
| gr.Markdown("Tải ảnh từ Google Images và tùy chọn nén thành file zip.") | |
| with gr.Row(elem_classes="container"): | |
| with gr.Column(scale=1): | |
| gr.Markdown("### Cài đặt", elem_classes="input-box") | |
| initial_query = gr.Textbox(label="Từ khóa ban đầu", value="free images", elem_classes="input-box") | |
| target_images = gr.Number(label="Số lượng ảnh", value=10000, precision=0, elem_classes="input-box") | |
| max_per_term = gr.Number(label="Ảnh tối đa mỗi từ khóa", value=20, precision=0, elem_classes="input-box") | |
| save_folder = gr.Textbox(label="Thư mục lưu", value="free_images", elem_classes="input-box") | |
| zip_files = gr.Checkbox(label="Nén thành file zip", value=True, elem_classes="input-box") | |
| submit_btn = gr.Button("Bắt đầu tải", elem_classes="button") | |
| with gr.Column(scale=2): | |
| gr.Markdown("### Trạng thái", elem_classes="input-box") | |
| output_status = gr.Textbox(label="", lines=15, interactive=False, elem_classes="status-box") | |
| output_file = gr.File(label="Tải file zip (nếu có)", visible=False) | |
| def run_download(query, target, max_term, folder, zip_opt): | |
| status, zip_path = start_download(query, int(target), int(max_term), folder, zip_opt) | |
| if zip_path: | |
| return status, gr.File.update(value=zip_path, visible=True) | |
| return status, gr.File.update(visible=False) | |
| submit_btn.click( | |
| fn=run_download, | |
| inputs=[initial_query, target_images, max_per_term, save_folder, zip_files], | |
| outputs=[output_status, output_file] | |
| ) | |
| return demo | |
| # Chạy ứng dụng | |
| if __name__ == "__main__": | |
| interface = create_interface() | |
| interface.launch() |