import subprocess def run_app(): command = ["scrapy", "crawl", "generic"] working_directory = "src/introlix_api/app/introlix_spider" result = subprocess.run(command, cwd=working_directory, capture_output=True, text=True) print("Output:", result.stdout) print("Error:", result.stderr) if __name__ == "__main__": # running the spider run_app() # def run_get_urls_from_page_parallel(self, urls: list, max_workers: int=10) -> list: # """ # Running get_urls_from_page function in parallel for many runs. # Args: # urls (list): list of urls # max_workers (int, optional): number of workers. Defaults to 10. # Returns: # list: list of fetched urls # """ # fetched_urls = [] # with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor: # futures = {executor.submit(self.get_urls_from_page, url): url for url in urls} # for future in concurrent.futures.as_completed(futures): # url = futures[future] # try: # result = future.result() # fetched_urls.append(result) # except Exception as e: # raise CustomException(e, sys) from e # return list(set(list(url for sublist in fetched_urls if sublist is not None for url in sublist)))