Francisco Zanartu commited on
Commit
2c3a136
·
1 Parent(s): 5393dd8

feat: implement annotation rendering functions for text highlighting and layout creation

Browse files
Files changed (1) hide show
  1. src/utils/annotation_rendering.py +83 -0
src/utils/annotation_rendering.py ADDED
@@ -0,0 +1,83 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from collections import defaultdict
2
+
3
+
4
+ def get_sections_and_text(chunks):
5
+ text = "".join(chunk["page_content"] for chunk in chunks["chunks"])
6
+ sections = [
7
+ {
8
+ "start": chunk["metadata"]["start_index"],
9
+ "end": chunk["metadata"]["start_index"] + len(chunk["page_content"]),
10
+ "text": chunk["page_content"],
11
+ }
12
+ for chunk in chunks["chunks"]
13
+ ]
14
+ return sections, text
15
+
16
+
17
+ def calculate_coverage(selection):
18
+ coverage = defaultdict(int)
19
+ for s in selection:
20
+ for i in range(s["start"], s["end"]):
21
+ coverage[i] += 1
22
+ return coverage
23
+
24
+
25
+ def create_end_markers(selection):
26
+ end_markers = defaultdict(list)
27
+ for i, s in enumerate(selection, 1):
28
+ end_markers[s["end"]].append(i)
29
+ return end_markers
30
+
31
+
32
+ def highlight_text(text, coverage, end_markers):
33
+ output = []
34
+ buffer = []
35
+
36
+ def flush_buffer():
37
+ if buffer:
38
+ output.append("".join(buffer))
39
+ buffer.clear()
40
+
41
+ for i, ch in enumerate(text):
42
+ if coverage.get(i, 0) > 0:
43
+ # flush normal markdown
44
+ flush_buffer()
45
+
46
+ opacity = min(0.3 + 0.2 * coverage[i], 0.8)
47
+ output.append(
48
+ f"<span style='background: rgba(255, 230, 150, {opacity});'>"
49
+ f"{ch}</span>"
50
+ )
51
+ else:
52
+ buffer.append(ch)
53
+
54
+ if i + 1 in end_markers:
55
+ flush_buffer()
56
+ if ch.isalnum():
57
+ output.append(" ")
58
+ for m in end_markers[i + 1]:
59
+ output.append(f"[^{m}]")
60
+
61
+ flush_buffer()
62
+ return "".join(output)
63
+
64
+
65
+ def create_layout(annotated_text, sections):
66
+ annotations = []
67
+ for i, s in enumerate(sections, 1):
68
+ annotations.append(f"[^{i}]: Section {i} {s['rebuttal']}")
69
+
70
+ markdown = (
71
+ f"{annotated_text}\n\n" f"---\n\n" f"### Annotations\n" + "\n".join(annotations)
72
+ )
73
+
74
+ return markdown
75
+
76
+
77
+ def render_annotated_text(chunks):
78
+ sections, text = get_sections_and_text(chunks)
79
+ coverage = calculate_coverage(sections)
80
+ end_markers = create_end_markers(sections)
81
+ annotated_text = highlight_text(text, coverage, end_markers)
82
+ layout = create_layout(annotated_text, sections)
83
+ return layout