File size: 2,857 Bytes
9f069df
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
package main

import (
	"sort"
	"strings"

	"github.com/yuin/goldmark"
	"github.com/yuin/goldmark/ast"
	"github.com/yuin/goldmark/extension"
	"github.com/yuin/goldmark/text"
)

func extractSegments(body, relPath string) ([]Segment, error) {
	source := []byte(body)
	r := text.NewReader(source)
	md := goldmark.New(
		goldmark.WithExtensions(extension.GFM),
	)
	doc := md.Parser().Parse(r)

	segments := make([]Segment, 0, 128)
	skipDepth := 0
	var lastBlock ast.Node

	err := ast.Walk(doc, func(n ast.Node, entering bool) (ast.WalkStatus, error) {
		switch n.(type) {
		case *ast.CodeBlock, *ast.FencedCodeBlock, *ast.CodeSpan, *ast.HTMLBlock, *ast.RawHTML:
			if entering {
				skipDepth++
			} else {
				skipDepth--
			}
			return ast.WalkContinue, nil
		}

		if !entering || skipDepth > 0 {
			return ast.WalkContinue, nil
		}

		textNode, ok := n.(*ast.Text)
		if !ok {
			return ast.WalkContinue, nil
		}
		block := blockParent(textNode)
		if block == nil {
			return ast.WalkContinue, nil
		}
		textValue := string(textNode.Segment.Value(source))
		if strings.TrimSpace(textValue) == "" {
			return ast.WalkContinue, nil
		}

		start := textNode.Segment.Start
		stop := textNode.Segment.Stop
		if len(segments) > 0 && lastBlock == block {
			last := &segments[len(segments)-1]
			gap := string(source[last.Stop:start])
			if strings.TrimSpace(gap) == "" {
				last.Stop = stop
				return ast.WalkContinue, nil
			}
		}

		segments = append(segments, Segment{Start: start, Stop: stop})
		lastBlock = block
		return ast.WalkContinue, nil
	})
	if err != nil {
		return nil, err
	}

	filtered := make([]Segment, 0, len(segments))
	for _, seg := range segments {
		textValue := string(source[seg.Start:seg.Stop])
		trimmed := strings.TrimSpace(textValue)
		if trimmed == "" {
			continue
		}
		textHash := hashText(textValue)
		segmentID := segmentID(relPath, textHash)
		filtered = append(filtered, Segment{
			Start:     seg.Start,
			Stop:      seg.Stop,
			Text:      textValue,
			TextHash:  textHash,
			SegmentID: segmentID,
		})
	}

	sort.Slice(filtered, func(i, j int) bool {
		return filtered[i].Start < filtered[j].Start
	})

	return filtered, nil
}

func blockParent(n ast.Node) ast.Node {
	for node := n.Parent(); node != nil; node = node.Parent() {
		if isTranslatableBlock(node) {
			return node
		}
	}
	return nil
}

func isTranslatableBlock(n ast.Node) bool {
	switch n.(type) {
	case *ast.Paragraph, *ast.Heading, *ast.ListItem:
		return true
	default:
		return false
	}
}

func applyTranslations(body string, segments []Segment) string {
	if len(segments) == 0 {
		return body
	}
	var out strings.Builder
	last := 0
	for _, seg := range segments {
		if seg.Start < last {
			continue
		}
		out.WriteString(body[last:seg.Start])
		out.WriteString(seg.Translated)
		last = seg.Stop
	}
	out.WriteString(body[last:])
	return out.String()
}