{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": { "ExecuteTime": { "end_time": "2022-03-09T19:40:31.635208Z", "start_time": "2022-03-09T19:40:30.622979Z" } }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "/imgflip_white_96.png\n", "/imgflip-icon-transparent-192.svg\n", "/s/meme/Monkey-Puppet.jpg\n", "//i.imgflip.com/4zv2v9.jpg\n", "//i.imgflip.com/67jy1i.jpg\n", "//i.imgflip.com/67gu7u.jpg\n", "//i.imgflip.com/67ezqb.jpg\n", "//i.imgflip.com/63add8.jpg\n", "//i.imgflip.com/67t59e.jpg\n", "//i.imgflip.com/67n7t7.jpg\n", "//i.imgflip.com/67t4xy.jpg\n", "//i.imgflip.com/675rc7.jpg\n", "//i.imgflip.com/674mrj.jpg\n", "//i.imgflip.com/67rmxb.jpg\n", "//i.imgflip.com/61m49m.jpg\n", "//i.imgflip.com/66p3ul.jpg\n", "//i.imgflip.com/64kb0c.jpg\n" ] } ], "source": [ "from urllib.request import urlopen\n", "from bs4 import BeautifulSoup\n", "import requests\n", "htmldata = requests.get('https://imgflip.com/meme/Monkey-Puppet?page=2', proxies={'http':'50.207.31.221:80'})\n", "soup = BeautifulSoup(htmldata.text, 'lxml')\n", "images = soup.find_all('img')\n", " \n", "for item in images:\n", " print(item['src'])" ] }, { "cell_type": "code", "execution_count": 12, "metadata": { "ExecuteTime": { "end_time": "2022-02-25T12:24:56.533753Z", "start_time": "2022-02-25T12:24:56.017684Z" } }, "outputs": [], "source": [ "img_data = requests.get('http://i.imgflip.com/4zv2v9.jpg').content\n", "with open('image_name.jpg', 'wb') as handler:\n", " handler.write(img_data)" ] }, { "cell_type": "code", "execution_count": 20, "metadata": { "ExecuteTime": { "end_time": "2022-02-25T14:22:45.253019Z", "start_time": "2022-02-25T14:22:44.273480Z" } }, "outputs": [], "source": [ "htmldata = requests.get('https://imgflip.com/meme/Monkey-Puppet?page=104156000', proxies={'http':'50.207.31.221:80'})\n" ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "ExecuteTime": { "end_time": "2022-03-09T19:41:25.773009Z", "start_time": "2022-03-09T19:41:25.766008Z" } }, "outputs": [], "source": [ "import os" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "start_time": "2022-03-09T22:08:26.209Z" } }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "done printing image:10 in page 0\n", "done printing image:10 in page 1\n", "done printing image:10 in page 2\n", "done printing image:10 in page 3\n", "done printing image:10 in page 4\n", "done printing image:10 in page 5\n", "done printing image:10 in page 6\n", "done printing image:10 in page 7\n", "done printing image:10 in page 8\n", "done printing image:10 in page 9\n", "done printing image:10 in page 10\n", "done printing image:10 in page 11\n", "done printing image:10 in page 12\n", "done printing image:10 in page 13\n", "done printing image:10 in page 14\n", "done printing image:10 in page 15\n", "done printing image:10 in page 16\n", "done printing image:10 in page 17\n", "done printing image:10 in page 18\n", "done printing image:10 in page 19\n", "done printing image:10 in page 20\n", "done printing image:10 in page 21\n", "done printing image:10 in page 22\n", "done printing image:10 in page 23\n", "done printing image:10 in page 24\n", "done printing image:10 in page 25\n", "done printing image:10 in page 26\n", "done printing image:10 in page 27\n", "done printing image:10 in page 28\n" ] } ], "source": [ "meme_templetes = [\"Monkey-Puppet\",\"Surprised-Pikachu\",\"Well-Yes-But-Actually-No\",\"10-Guy\",\"Spiderman-Computer-Desk\", \"Kevin_Hart\", \"laughing-leo\", \"Lisa\", \"Roll-Safe-Think-About-It\",\"Batman-Slapping-Robin\", \"Change-My-Mind\", \"Futurama-Fry\", \"First-World-Problems\"]\n", "for temp in meme_templetes:\n", " st = temp.replace(\"-\", \"_\")\n", " os.mkdir(\"D:/{}\".format(st))\n", " for i in range(100):\n", " htmldata = requests.get('https://imgflip.com/meme/{}?page={}'.format(temp,i+1), proxies={'http':'10.10.1.10:3128'})\n", " soup = BeautifulSoup(htmldata.text, 'lxml')\n", " images = soup.find_all('img')\n", " for item in images:\n", " if item['src'].startswith('//i.imgflip.com/'):\n", " img_data = requests.get('http:{}'.format(item['src'])).content\n", " with open('D:/{}/{}{}page{}.jpg'.format(st,st,images.index(item),i), 'wb') as handler:\n", " handler.write(img_data)\n", " if images.index(item)%10 ==0:\n", " print(\"done printing image:{}\".format(images.index(item)), \"in page {}\".format(i))" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2022-03-09T23:46:49.531228Z", "start_time": "2022-03-09T23:46:49.510689Z" } }, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5" }, "varInspector": { "cols": { "lenName": 16, "lenType": 16, "lenVar": 40 }, "kernels_config": { "python": { "delete_cmd_postfix": "", "delete_cmd_prefix": "del ", "library": "var_list.py", "varRefreshCmd": "print(var_dic_list())" }, "r": { "delete_cmd_postfix": ") ", "delete_cmd_prefix": "rm(", "library": "var_list.r", "varRefreshCmd": "cat(var_dic_list()) " } }, "types_to_exclude": [ "module", "function", "builtin_function_or_method", "instance", "_Feature" ], "window_display": false } }, "nbformat": 4, "nbformat_minor": 4 }