File size: 2,256 Bytes
2c3a136
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
from collections import defaultdict


def get_sections_and_text(chunks):
    text = "".join(chunk["page_content"] for chunk in chunks["chunks"])
    sections = [
        {
            "start": chunk["metadata"]["start_index"],
            "end": chunk["metadata"]["start_index"] + len(chunk["page_content"]),
            "text": chunk["page_content"],
        }
        for chunk in chunks["chunks"]
    ]
    return sections, text


def calculate_coverage(selection):
    coverage = defaultdict(int)
    for s in selection:
        for i in range(s["start"], s["end"]):
            coverage[i] += 1
    return coverage


def create_end_markers(selection):
    end_markers = defaultdict(list)
    for i, s in enumerate(selection, 1):
        end_markers[s["end"]].append(i)
    return end_markers


def highlight_text(text, coverage, end_markers):
    output = []
    buffer = []

    def flush_buffer():
        if buffer:
            output.append("".join(buffer))
            buffer.clear()

    for i, ch in enumerate(text):
        if coverage.get(i, 0) > 0:
            # flush normal markdown
            flush_buffer()

            opacity = min(0.3 + 0.2 * coverage[i], 0.8)
            output.append(
                f"<span style='background: rgba(255, 230, 150, {opacity});'>"
                f"{ch}</span>"
            )
        else:
            buffer.append(ch)

        if i + 1 in end_markers:
            flush_buffer()
            if ch.isalnum():
                output.append(" ")
            for m in end_markers[i + 1]:
                output.append(f"[^{m}]")

    flush_buffer()
    return "".join(output)


def create_layout(annotated_text, sections):
    annotations = []
    for i, s in enumerate(sections, 1):
        annotations.append(f"[^{i}]: Section {i} {s['rebuttal']}")

    markdown = (
        f"{annotated_text}\n\n" f"---\n\n" f"### Annotations\n" + "\n".join(annotations)
    )

    return markdown


def render_annotated_text(chunks):
    sections, text = get_sections_and_text(chunks)
    coverage = calculate_coverage(sections)
    end_markers = create_end_markers(sections)
    annotated_text = highlight_text(text, coverage, end_markers)
    layout = create_layout(annotated_text, sections)
    return layout