File size: 3,379 Bytes
53fc829
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
"""
代码高亮功能模块 - 处理 Python 代码的语法高亮
"""

import html
import re
from constants import KEYWORD_RE, BUILTIN_RE, NUMBER_RE


def split_comment(line: str) -> tuple[str, str]:
    """分离代码行中的注释部分"""
    in_single = False
    in_double = False
    escaped = False
    for index, ch in enumerate(line):
        if escaped:
            escaped = False
            continue
        if ch == "\\":
            escaped = True
            continue
        if ch == "'" and not in_double:
            in_single = not in_single
            continue
        if ch == '"' and not in_single:
            in_double = not in_double
            continue
        if ch == "#" and not in_single and not in_double:
            return line[:index], line[index:]
    return line, ""


def tokenize_strings(code: str) -> list[tuple[str, str]]:
    """将代码分解为字符串和文本段"""
    segments: list[tuple[str, str]] = []
    index = 0
    while index < len(code):
        ch = code[index]
        if ch in ("'", '"'):
            quote = ch
            start = index
            index += 1
            escaped = False
            while index < len(code):
                if escaped:
                    escaped = False
                    index += 1
                    continue
                if code[index] == "\\":
                    escaped = True
                    index += 1
                    continue
                if code[index] == quote:
                    index += 1
                    break
                index += 1
            segments.append(("string", code[start:index]))
            continue
        start = index
        while index < len(code) and code[index] not in ("'", '"'):
            index += 1
        segments.append(("text", code[start:index]))
    return segments


def highlight_text_segment(text: str) -> str:
    """高亮文本段中的关键字、内置函数和数字"""
    escaped = html.escape(text)
    escaped = KEYWORD_RE.sub(r'<span class="tok-keyword">\1</span>', escaped)
    escaped = BUILTIN_RE.sub(r'<span class="tok-builtin">\1</span>', escaped)
    escaped = NUMBER_RE.sub(r'<span class="tok-number">\1</span>', escaped)
    return escaped


def highlight_python_line(line: str) -> str:
    """高亮单行 Python 代码"""
    code, comment = split_comment(line)
    segments = tokenize_strings(code)
    rendered: list[str] = []
    for kind, text in segments:
        if kind == "string":
            rendered.append('<span class="tok-string">{}</span>'.format(html.escape(text)))
        else:
            rendered.append(highlight_text_segment(text))
    if comment:
        rendered.append('<span class="tok-comment">{}</span>'.format(html.escape(comment)))
    return "".join(rendered)


def build_plain_code_html(code: str, block_id: str) -> str:
    """构建简单的代码 HTML 块"""
    lines = code.splitlines()
    rendered = [f'<div id="{block_id}" class="rosa-code">']
    for index, line in enumerate(lines, start=1):
        highlighted = highlight_python_line(line)
        rendered.append(
            '<div class="code-line">'
            '<span class="line-no">{line}</span>'
            '<span class="line-text">{text}</span>'
            "</div>".format(line=index, text=highlighted)
        )
    rendered.append("</div>")
    return "\n".join(rendered)