Spaces:
Sleeping
Sleeping
| FEEDS ={'scraped.json' : {'format' : 'json'}} | |
| #For SCRAPEOPS | |
| SCRAPEOPS_API_KEY = '8857a1e3-3e44-428f-8809-d6028ba24f0f' | |
| #SCRAPEOPS_FAKE_USER_AGENT_ENDPOINT = "https://headers.scrapeops.io/v1/user-agents" | |
| SCRAPEOPS_FAKE_BROWSER_HEADER_ENDPOINT = "https://headers.scrapeops.io/v1/browser-headers" | |
| #SCRAPEOPS_FAKE_USER_AGENT_ENABLED = True | |
| SCRAPEOPS_FAKE_BROWSER_HEADER_ENABLED = True | |
| SCRAPEOPS_NUM_RESULTS = 50 | |
| #For PROXY | |
| PROXY_USER = 'your_proxy_user_here' | |
| PROXY_PASSWORD = 'your_proxy_password_here' | |
| PROXY_ENDPOINT = 'your_proxy_endpoint_here' | |
| PROXY_PORT = 'your_proxy_port_here' | |
| #User agent is one part of header | |
| #If we want only user agent to change use ScrapeOpsFakeUserAgentMiddleware | |
| #But if you want to change complete user agent Use ScrapeOpsFakeBrowserHeaderAgentMiddleware | |
| #DOWNLOADER_MIDDLEWARES = { | |
| # "webscraper.middlewares.MyProxyMiddleware": 300, | |
| # "webscraper.middlewares.HttpProxyMiddleware": 350, | |
| # "webscraper.middlewares.ScrapeOpsFakeUserAgentMiddleware": 400, | |
| # "webscraper.middlewares.ScrapeOpsFakeBrowserHeaderAgentMiddleware": 400, | |
| #} | |
| # Obey robots.txt rules | |
| # True - If we need to follow the rules regarding whether the website allows us to scrape | |
| # False - If we do not want to follow the rules regarding whether the website allows us to scrape | |
| ROBOTSTXT_OBEY = True | |
| ITEM_PIPELINES = { | |
| "webscraper.pipelines.HtmlToMarkdownPipeline": 300, | |
| } | |
| BOT_NAME = "webscraper" | |
| SPIDER_MODULES = ["webscraper.spiders"] | |
| NEWSPIDER_MODULE = "webscraper.spiders" | |
| # Set settings whose default value is deprecated to a future-proof value | |
| REQUEST_FINGERPRINTER_IMPLEMENTATION = "2.7" | |
| TWISTED_REACTOR = "twisted.internet.asyncioreactor.AsyncioSelectorReactor" | |
| FEED_EXPORT_ENCODING = "utf-8" |