-
Notifications
You must be signed in to change notification settings - Fork 2
/
html.go
82 lines (67 loc) · 1.6 KB
/
html.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
package autocorrect
import (
"bytes"
"io"
"regexp"
"strings"
"github.com/pkg/errors"
"github.com/tdewolff/parse/v2"
"github.com/tdewolff/parse/v2/html"
// "golang.org/x/net/html"
)
var (
ignoreTagsRe = regexp.MustCompile("(?mi)<(pre|script|style|textarea)")
)
// FormatHTML format HTML content
func FormatHTML(body string, options ...Option) (out string, err error) {
return processHTML(body, func(text string) string {
return Format(text, options...)
})
}
// UnformatHTML cleanup spaces for HTML
func UnformatHTML(body string, options ...UnformatOption) (out string, err error) {
return processHTML(body, func(text string) string {
return Unformat(text, options...)
})
}
func processHTML(body string, fn func(plainText string) string) (out string, err error) {
w := &bytes.Buffer{}
i := parse.NewInput(strings.NewReader(body))
lex := html.NewLexer(i)
out = body
ignoreTag := false
for {
t, data := lex.Next()
switch t {
case html.ErrorToken:
if lex.Err() == io.EOF {
return w.String(), nil
}
err = errors.Errorf("Error on line %d, %v", i.Offset(), lex.Err())
return
case html.TextToken:
if ignoreTag {
if _, err := w.Write(data); err != nil {
return out, err
}
ignoreTag = false
continue
}
formated := fn(string(data))
if _, err := w.Write([]byte(formated)); err != nil {
return out, err
}
case html.StartTagToken:
if ignoreTagsRe.Match(data) {
ignoreTag = true
}
if _, err := w.Write(data); err != nil {
return out, err
}
default:
if _, err := w.Write(data); err != nil {
return out, err
}
}
}
}