Spaces:

satyam998
/

introlix_api

Sleeping

Initial commit

79d285f over 1 year ago

1.45 kB

	import subprocess

	def run_app():
	command = ["scrapy", "crawl", "generic"]
	working_directory = "src/introlix_api/app/introlix_spider"

	result = subprocess.run(command, cwd=working_directory, capture_output=True, text=True)

	print("Output:", result.stdout)
	print("Error:", result.stderr)

	if __name__ == "__main__":
	# running the spider
	run_app()

	# def run_get_urls_from_page_parallel(self, urls: list, max_workers: int=10) -> list:
	# """
	# Running get_urls_from_page function in parallel for many runs.

	# Args:
	# urls (list): list of urls
	# max_workers (int, optional): number of workers. Defaults to 10.
	# Returns:
	# list: list of fetched urls
	# """
	# fetched_urls = []

	# with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
	# futures = {executor.submit(self.get_urls_from_page, url): url for url in urls}

	# for future in concurrent.futures.as_completed(futures):
	# url = futures[future]

	# try:
	# result = future.result()
	# fetched_urls.append(result)
	# except Exception as e:
	# raise CustomException(e, sys) from e

	# return list(set(list(url for sublist in fetched_urls if sublist is not None for url in sublist)))