File size: 3,674 Bytes
6f54a86
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
import os
import glob

DOCS_DIR = "Docs"  
MAX_SCAN_LINES = 30  

def guess_title_from_filename(filename: str) -> str:
    """Crea un título legible a partir del nombre de archivo."""
    base = os.path.splitext(os.path.basename(filename))[0]

    title = base.replace("_", " ").replace("-", " ")

    if title.isupper():
        title = title.title()
    return title.strip()


def normalize_text_for_match(text: str) -> str:
    """Normaliza texto para comparación aproximada (minúsculas, sin símbolos comunes)."""
    cleaned = text.replace("_", " ").replace("-", " ")
    cleaned = cleaned.replace("(", " ").replace(")", " ")
    cleaned = cleaned.replace(",", " ").replace(".", " ")
    cleaned = " ".join(cleaned.split())
    return cleaned.lower()


def looks_like_title(line: str) -> bool:
    """Heurística sencilla para detectar líneas "tipo título" (no muy largas, poco ruido numérico)."""
    txt = line.strip()
    if not txt:
        return False
    # Evitar URLs claras
    if "http://" in txt or "https://" in txt:
        return False

    if len(txt) > 200:
        return False

    digits = sum(c.isdigit() for c in txt)
    if digits > len(txt) * 0.4:
        return False
    return True

def normalize_md_file(path: str) -> None:
    with open(path, "r", encoding="utf-8") as f:
        lines = f.readlines()

    if not lines:
        return

    filename_title = guess_title_from_filename(path)
    filename_norm = normalize_text_for_match(filename_title)

    best_idx = None
    best_title = None
    best_score = 0.0

    for i, line in enumerate(lines[:MAX_SCAN_LINES]):
        raw_line = line.rstrip("\n")
        stripped = raw_line.strip()
        if not stripped:
            continue

        is_heading = stripped.startswith("#")
        candidate = stripped.lstrip("#").strip() if is_heading else stripped

        if not looks_like_title(candidate):
            continue

        cand_norm = normalize_text_for_match(candidate)
        if not cand_norm:
            continue

        fname_words = set(filename_norm.split())
        cand_words = set(cand_norm.split())
        if not fname_words:
            overlap = 0.0
        else:
            overlap = len(fname_words & cand_words) / len(fname_words)

        score = overlap

        if is_heading:
            score += 0.1

        if score > best_score:
            best_score = score
            best_idx = i
            best_title = candidate

    # Umbral: si no encontramos nada razonable, usamos el nombre del archivo
    if best_title is None or best_score < 0.3:
        new_title_text = filename_title
        insert_idx = 0
        for i, line in enumerate(lines):
            if line.strip():
                insert_idx = i
                break
        new_title = f"# {new_title_text}\n"
        lines.insert(insert_idx, new_title + "\n")
    else:
        raw_norm = best_title.replace("_", " ").replace("-", " ")
        if raw_norm.isupper():
            raw_norm = raw_norm.title()
        new_title = f"# {raw_norm}\n"
        lines[best_idx] = new_title

    with open(path, "w", encoding="utf-8") as f:
        f.writelines(lines)

def main():
    pattern = os.path.join(DOCS_DIR, "*.md")
    files = glob.glob(pattern)
    print(f"Encontrados {len(files)} arquivos .md em {DOCS_DIR}")

    for i, path in enumerate(files, start=1):
        print(f"[{i}/{len(files)}] Normalizando título de: {os.path.basename(path)}")
        try:
            normalize_md_file(path)
        except Exception as e:
            print(f"  -> Erro ao processar {path}: {e}")

    print("Normalização de títulos concluída.")

if __name__ == "__main__":
    main()