File size: 2,366 Bytes
497f2f3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
import os
import re
import requests
from bs4 import BeautifulSoup

def sanitize_filename(filename):
    """Remove ou substitui caracteres inválidos em nomes de arquivos."""
    return re.sub(r'[<>:"/\\|?*]', '_', filename)

def fetch_task_links(category_url):
    """Fetch all task links from Rosetta Code's category page."""
    response = requests.get(category_url)
    if response.status_code != 200:
        print(f"[ERROR] Failed to fetch {category_url}. Status code: {response.status_code}")
        return []

    soup = BeautifulSoup(response.text, 'html.parser')
    links = soup.select('.mw-category-group ul li a')
    return [("https://rosettacode.org" + link['href'], link.text) for link in links]

def fetch_code_from_task(task_url):
    """Fetch code snippets from a specific task on Rosetta Code."""
    response = requests.get(task_url)
    if response.status_code != 200:
        print(f"[ERROR] Failed to fetch {task_url}. Status code: {response.status_code}")
        return []

    soup = BeautifulSoup(response.text, 'html.parser')
    code_blocks = soup.find_all('pre')
    return [code.text for code in code_blocks]

def save_safe_codes(task_name, codes, save_dir):
    """Save the safe codes as text files."""
    os.makedirs(save_dir, exist_ok=True)
    task_name = sanitize_filename(task_name)  # Sanitizar o nome da tarefa
    for i, code in enumerate(codes):
        filename = f"{task_name}_{i+1}.txt"
        filepath = os.path.join(save_dir, filename)
        try:
            with open(filepath, 'w', encoding='utf-8') as f:
                f.write(code)
            print(f"[SUCCESS] Saved: {filepath}")
        except Exception as e:
            print(f"[ERROR] Could not save file {filepath}: {e}")

if __name__ == "__main__":
    category_url = "https://rosettacode.org/wiki/Category:Programming_Tasks"
    save_directory = "safe-code-analyzer/safe_codes"

    # Fetch tasks
    tasks = fetch_task_links(category_url)
    print(f"[INFO] Found {len(tasks)} tasks on Rosetta Code.")

    # Fetch and save codes
    for task_url, task_name in tasks[:10]:  # Ajuste o número de tarefas a serem processadas
        print(f"[INFO] Fetching codes for task: {task_name}")
        codes = fetch_code_from_task(task_url)
        save_safe_codes(task_name, codes, save_directory)