| | import os
|
| | import re
|
| | import requests
|
| | from bs4 import BeautifulSoup
|
| |
|
| | def sanitize_filename(filename):
|
| | """Remove ou substitui caracteres inválidos em nomes de arquivos."""
|
| | return re.sub(r'[<>:"/\\|?*]', '_', filename)
|
| |
|
| | def fetch_task_links(category_url):
|
| | """Fetch all task links from Rosetta Code's category page."""
|
| | response = requests.get(category_url)
|
| | if response.status_code != 200:
|
| | print(f"[ERROR] Failed to fetch {category_url}. Status code: {response.status_code}")
|
| | return []
|
| |
|
| | soup = BeautifulSoup(response.text, 'html.parser')
|
| | links = soup.select('.mw-category-group ul li a')
|
| | return [("https://rosettacode.org" + link['href'], link.text) for link in links]
|
| |
|
| | def fetch_code_from_task(task_url):
|
| | """Fetch code snippets from a specific task on Rosetta Code."""
|
| | response = requests.get(task_url)
|
| | if response.status_code != 200:
|
| | print(f"[ERROR] Failed to fetch {task_url}. Status code: {response.status_code}")
|
| | return []
|
| |
|
| | soup = BeautifulSoup(response.text, 'html.parser')
|
| | code_blocks = soup.find_all('pre')
|
| | return [code.text for code in code_blocks]
|
| |
|
| | def save_safe_codes(task_name, codes, save_dir):
|
| | """Save the safe codes as text files."""
|
| | os.makedirs(save_dir, exist_ok=True)
|
| | task_name = sanitize_filename(task_name)
|
| | for i, code in enumerate(codes):
|
| | filename = f"{task_name}_{i+1}.txt"
|
| | filepath = os.path.join(save_dir, filename)
|
| | try:
|
| | with open(filepath, 'w', encoding='utf-8') as f:
|
| | f.write(code)
|
| | print(f"[SUCCESS] Saved: {filepath}")
|
| | except Exception as e:
|
| | print(f"[ERROR] Could not save file {filepath}: {e}")
|
| |
|
| | if __name__ == "__main__":
|
| | category_url = "https://rosettacode.org/wiki/Category:Programming_Tasks"
|
| | save_directory = "safe-code-analyzer/safe_codes"
|
| |
|
| |
|
| | tasks = fetch_task_links(category_url)
|
| | print(f"[INFO] Found {len(tasks)} tasks on Rosetta Code.")
|
| |
|
| |
|
| | for task_url, task_name in tasks[:10]:
|
| | print(f"[INFO] Fetching codes for task: {task_name}")
|
| | codes = fetch_code_from_task(task_url)
|
| | save_safe_codes(task_name, codes, save_directory)
|
| |
|