Spaces:
Paused
Paused
File size: 2,857 Bytes
9f069df | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 | package main
import (
"sort"
"strings"
"github.com/yuin/goldmark"
"github.com/yuin/goldmark/ast"
"github.com/yuin/goldmark/extension"
"github.com/yuin/goldmark/text"
)
func extractSegments(body, relPath string) ([]Segment, error) {
source := []byte(body)
r := text.NewReader(source)
md := goldmark.New(
goldmark.WithExtensions(extension.GFM),
)
doc := md.Parser().Parse(r)
segments := make([]Segment, 0, 128)
skipDepth := 0
var lastBlock ast.Node
err := ast.Walk(doc, func(n ast.Node, entering bool) (ast.WalkStatus, error) {
switch n.(type) {
case *ast.CodeBlock, *ast.FencedCodeBlock, *ast.CodeSpan, *ast.HTMLBlock, *ast.RawHTML:
if entering {
skipDepth++
} else {
skipDepth--
}
return ast.WalkContinue, nil
}
if !entering || skipDepth > 0 {
return ast.WalkContinue, nil
}
textNode, ok := n.(*ast.Text)
if !ok {
return ast.WalkContinue, nil
}
block := blockParent(textNode)
if block == nil {
return ast.WalkContinue, nil
}
textValue := string(textNode.Segment.Value(source))
if strings.TrimSpace(textValue) == "" {
return ast.WalkContinue, nil
}
start := textNode.Segment.Start
stop := textNode.Segment.Stop
if len(segments) > 0 && lastBlock == block {
last := &segments[len(segments)-1]
gap := string(source[last.Stop:start])
if strings.TrimSpace(gap) == "" {
last.Stop = stop
return ast.WalkContinue, nil
}
}
segments = append(segments, Segment{Start: start, Stop: stop})
lastBlock = block
return ast.WalkContinue, nil
})
if err != nil {
return nil, err
}
filtered := make([]Segment, 0, len(segments))
for _, seg := range segments {
textValue := string(source[seg.Start:seg.Stop])
trimmed := strings.TrimSpace(textValue)
if trimmed == "" {
continue
}
textHash := hashText(textValue)
segmentID := segmentID(relPath, textHash)
filtered = append(filtered, Segment{
Start: seg.Start,
Stop: seg.Stop,
Text: textValue,
TextHash: textHash,
SegmentID: segmentID,
})
}
sort.Slice(filtered, func(i, j int) bool {
return filtered[i].Start < filtered[j].Start
})
return filtered, nil
}
func blockParent(n ast.Node) ast.Node {
for node := n.Parent(); node != nil; node = node.Parent() {
if isTranslatableBlock(node) {
return node
}
}
return nil
}
func isTranslatableBlock(n ast.Node) bool {
switch n.(type) {
case *ast.Paragraph, *ast.Heading, *ast.ListItem:
return true
default:
return false
}
}
func applyTranslations(body string, segments []Segment) string {
if len(segments) == 0 {
return body
}
var out strings.Builder
last := 0
for _, seg := range segments {
if seg.Start < last {
continue
}
out.WriteString(body[last:seg.Start])
out.WriteString(seg.Translated)
last = seg.Stop
}
out.WriteString(body[last:])
return out.String()
}
|