-
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathreceived.go
230 lines (183 loc) · 5.25 KB
/
received.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
package mail
import (
"context"
"errors"
"fmt"
"log/slog"
"net"
"net/mail"
"regexp"
"strings"
"time"
)
// https://www.pobox.help/hc/en-us/articles/1500000193602-The-elements-of-a-Received-header
// https://www.rfc-editor.org/rfc/rfc1123
// https://www.rfc-editor.org/rfc/rfc822
// https://www.rfc-editor.org/rfc/rfc2076
// https://stackoverflow.com/questions/504136/parsing-email-received-headers
// https://datatracker.ietf.org/doc/html/rfc2821#section-4.4
// https://datatracker.ietf.org/doc/html/rfc821
// https://metacpan.org/dist/Mail-SpamAssassin/source/lib/Mail/SpamAssassin/Message/Metadata/Received.pm
type Received []*Transport
func Decode(ctx context.Context, v []string) (Received, error) {
var r Received
for _, s := range v {
t := &Transport{}
err := t.Decode(ctx, s)
if err != nil {
return nil, err
}
r = append(r, t)
}
return r, nil
}
type Transport struct {
ID string
// https://datatracker.ietf.org/doc/html/rfc2821#section-3.8.2
// The gateway SHOULD indicate the environment and protocol in the "via"
// clauses of Received field(s) that it supplies.
Via string
For Entity
// https://www.ibm.com/docs/en/zos/2.2.0?topic=sc-helo-command-identify-domain-name-sending-host-smtp
// https://www.ietf.org/rfc/rfc5321.txt
Helo string
From Entity
By Entity
With With
Date time.Time
}
var ErrInvalidTransport = errors.New("invalid transport")
var ErrIgnoreTransport = errors.New("ignore transport")
var idReg = regexp.MustCompile(`id <?([^\s<>;]{3,})`)
func (t *Transport) Decode(ctx context.Context, s string) (err error) {
s, err = normalizeReceived(s)
if err != nil {
return err
}
lastSemi := strings.LastIndex(s, ";")
if lastSemi != -1 {
dt := s[lastSemi+1:]
if dt != "" {
// Extract date from the end of the string
t.Date, err = mail.ParseDate(dt)
if err != nil {
slog.WarnContext(
ctx,
"failed to parse date",
slog.String("received", s),
slog.String("error", err.Error()),
)
}
}
// Extract the rest of the string
s = s[:lastSemi]
}
// Extract the ID
matches := idReg.FindStringSubmatch(s)
if len(matches) > 1 {
t.ID = matches[1]
}
t.Helo = extractHELO(s)
by, err := extractBy(s)
if err == nil {
t.By, err = parseEntity(by)
if err != nil {
return err
}
}
return nil
}
var fromR = regexp.MustCompile(`(?i)^\(?from `)
var withLocalFor = regexp.MustCompile(`\bwith\s+local\s+for\b`)
var whiteSpaceR = regexp.MustCompile(`\s+`)
func normalizeReceived(s string) (string, error) {
s = strings.TrimSpace(strings.ToLower(s))
s = whiteSpaceR.ReplaceAllString(s, " ")
if !fromR.MatchString(s) || !frombyReg(s) {
return "", ErrIgnoreTransport
}
s = normalizeKeywords(s, "by", "with", "for", "id", "via")
// Exclude lines that don't start with "from"
if withLocalFor.MatchString(s) {
return "", ErrIgnoreTransport
}
return s, nil
}
func normalizeKeywords(s string, keys ...string) string {
for _, key := range keys {
index := strings.Index(s, key)
if index == -1 {
continue
}
if index > 0 {
after := index + len(key)
if after < len(s) {
if s[after] != ' ' {
// Splice a space after the keyword
s = s[:index+len(key)] + " " + s[index+len(key):]
}
}
if s[index-1] != ' ' {
// Splice a space in front of the keyword
s = s[:index] + " " + s[index:]
}
}
}
return s
}
func frombyReg(s string) bool {
mainPattern := `(?i)^from (\S+) by [^\s;]+ ?;`
mainRe := regexp.MustCompile(mainPattern)
matches := mainRe.FindStringSubmatch(s)
if len(matches) > 1 {
subPattern := `^\[[\d.]+\]$`
subRe := regexp.MustCompile(subPattern)
if !subRe.MatchString(matches[1]) {
return false
}
}
return true
}
type Entity struct {
Name string
FQDN string
IP net.IP
}
type With struct {
Name string
Metadata map[string]string
}
// https://metacpan.org/dist/Mail-SpamAssassin/source/lib/Mail/SpamAssassin/Message/Metadata/Received.pm#L389
var heloR = regexp.MustCompile(`(?i)\bhelo=([-A-Za-z0-9.^+_&:=?!@%*$\\\/]+)(?:[^-A-Za-z0-9.^+_&:=?!@%*$\\\/]|$)`)
//nolint:lll // This regex is purposely long
var ehloR = regexp.MustCompile(`(?i)\b(?:HELO|EHLO) ([-A-Za-z0-9.^+_&:=?!@%*$\\\/]+)(?:[^-A-Za-z0-9.^+_&:=?!@%*$\\\/]|$)`)
func extractHELO(s string) string {
// Match HELO
matches1 := heloR.FindStringSubmatch(s)
if len(matches1) > 1 {
return matches1[1] // Return the captured group from first pattern
}
// Match EHLO
matches2 := ehloR.FindStringSubmatch(s)
if len(matches2) > 1 {
return matches2[1] // Return the captured group from second pattern
}
return ""
}
// https://metacpan.org/dist/Mail-SpamAssassin/source/lib/Mail/SpamAssassin/Message/Metadata/Received.pm#L395
// Create a regex pattern to match the condition.
// The pattern is: " by " followed by a sequence of non-space characters (\S+),
// and ending with a character not in the set [-A-Za-z0-9;.], or the end of the line.
var byR = regexp.MustCompile(` by (\S+)(?:[^-A-Za-z0-9;.]|$)`)
func extractBy(input string) (string, error) {
// FindSubmatch returns a slice holding the text of the leftmost match.
matches := byR.FindStringSubmatch(input)
if len(matches) > 1 {
// Return the first capturing group (index 1).
return matches[1], nil
}
return "", fmt.Errorf("no match found")
}
func parseEntity(_ string) (Entity, error) {
return Entity{}, nil
}