diff --git a/reader/processor/processor.go b/reader/processor/processor.go
index 67f50b2879b..a5a260863f0 100644
--- a/reader/processor/processor.go
+++ b/reader/processor/processor.go
@@ -85,7 +85,7 @@ func ProcessFeedEntries(store *storage.Storage, feed *model.Feed, user *model.Us
}
}
- entry.Content = rewrite.Rewriter(url, entry.Content, feed.RewriteRules)
+ rewrite.Rewriter(url, entry, feed.RewriteRules)
// The sanitizer should always run at the end of the process to make sure unsafe HTML is filtered.
entry.Content = sanitizer.Sanitize(url, entry.Content)
@@ -168,14 +168,14 @@ func ProcessEntryWebPage(feed *model.Feed, entry *model.Entry, user *model.User)
return scraperErr
}
- content = rewrite.Rewriter(url, content, entry.Feed.RewriteRules)
- content = sanitizer.Sanitize(url, content)
-
if content != "" {
entry.Content = content
entry.ReadingTime = calculateReadingTime(content, user)
}
+ rewrite.Rewriter(url, entry, entry.Feed.RewriteRules)
+ entry.Content = sanitizer.Sanitize(url, entry.Content)
+
return nil
}
diff --git a/reader/rewrite/rewrite_functions.go b/reader/rewrite/rewrite_functions.go
index 1d34a6ea1bb..26a08b6e7d9 100644
--- a/reader/rewrite/rewrite_functions.go
+++ b/reader/rewrite/rewrite_functions.go
@@ -367,3 +367,17 @@ func removeTables(entryContent string) string {
output, _ := doc.Find("body").First().Html()
return output
}
+
+func removeClickbait(entryTitle string) string {
+ titleWords := []string{}
+ for _, word := range strings.Fields(entryTitle) {
+ runes := []rune(word)
+ if len(runes) > 1 {
+ // keep first rune as is to keep the first capital letter
+ titleWords = append(titleWords, string([]rune{runes[0]})+strings.ToLower(string(runes[1:])))
+ } else {
+ titleWords = append(titleWords, word)
+ }
+ }
+ return strings.Join(titleWords, " ")
+}
diff --git a/reader/rewrite/rewriter.go b/reader/rewrite/rewriter.go
index 961a47eca7c..9824fc3a4d1 100644
--- a/reader/rewrite/rewriter.go
+++ b/reader/rewrite/rewriter.go
@@ -10,6 +10,7 @@ import (
"text/scanner"
"miniflux.app/logger"
+ "miniflux.app/model"
"miniflux.app/url"
)
@@ -19,7 +20,7 @@ type rule struct {
}
// Rewriter modify item contents with a set of rewriting rules.
-func Rewriter(entryURL, entryContent, customRewriteRules string) string {
+func Rewriter(entryURL string, entry *model.Entry, customRewriteRules string) {
rulesList := getPredefinedRewriteRules(entryURL)
if customRewriteRules != "" {
rulesList = customRewriteRules
@@ -31,10 +32,8 @@ func Rewriter(entryURL, entryContent, customRewriteRules string) string {
logger.Debug(`[Rewrite] Applying rules %v for %q`, rules, entryURL)
for _, rule := range rules {
- entryContent = applyRule(entryURL, entryContent, rule)
+ applyRule(entryURL, entry, rule)
}
-
- return entryContent
}
func parseRules(rulesText string) (rules []rule) {
@@ -60,61 +59,61 @@ func parseRules(rulesText string) (rules []rule) {
}
}
-func applyRule(entryURL, entryContent string, rule rule) string {
+func applyRule(entryURL string, entry *model.Entry, rule rule) {
switch rule.name {
case "add_image_title":
- entryContent = addImageTitle(entryURL, entryContent)
+ entry.Content = addImageTitle(entryURL, entry.Content)
case "add_mailto_subject":
- entryContent = addMailtoSubject(entryURL, entryContent)
+ entry.Content = addMailtoSubject(entryURL, entry.Content)
case "add_dynamic_image":
- entryContent = addDynamicImage(entryURL, entryContent)
+ entry.Content = addDynamicImage(entryURL, entry.Content)
case "add_youtube_video":
- entryContent = addYoutubeVideo(entryURL, entryContent)
+ entry.Content = addYoutubeVideo(entryURL, entry.Content)
case "add_invidious_video":
- entryContent = addInvidiousVideo(entryURL, entryContent)
+ entry.Content = addInvidiousVideo(entryURL, entry.Content)
case "add_youtube_video_using_invidious_player":
- entryContent = addYoutubeVideoUsingInvidiousPlayer(entryURL, entryContent)
+ entry.Content = addYoutubeVideoUsingInvidiousPlayer(entryURL, entry.Content)
case "add_youtube_video_from_id":
- entryContent = addYoutubeVideoFromId(entryContent)
+ entry.Content = addYoutubeVideoFromId(entry.Content)
case "add_pdf_download_link":
- entryContent = addPDFLink(entryURL, entryContent)
+ entry.Content = addPDFLink(entryURL, entry.Content)
case "nl2br":
- entryContent = replaceLineFeeds(entryContent)
+ entry.Content = replaceLineFeeds(entry.Content)
case "convert_text_link", "convert_text_links":
- entryContent = replaceTextLinks(entryContent)
+ entry.Content = replaceTextLinks(entry.Content)
case "fix_medium_images":
- entryContent = fixMediumImages(entryURL, entryContent)
+ entry.Content = fixMediumImages(entryURL, entry.Content)
case "use_noscript_figure_images":
- entryContent = useNoScriptImages(entryURL, entryContent)
+ entry.Content = useNoScriptImages(entryURL, entry.Content)
case "replace":
// Format: replace("search-term"|"replace-term")
if len(rule.args) >= 2 {
- entryContent = replaceCustom(entryContent, rule.args[0], rule.args[1])
+ entry.Content = replaceCustom(entry.Content, rule.args[0], rule.args[1])
} else {
logger.Debug("[Rewrite] Cannot find search and replace terms for replace rule %s", rule)
}
case "remove":
// Format: remove("#selector > .element, .another")
if len(rule.args) >= 1 {
- entryContent = removeCustom(entryContent, rule.args[0])
+ entry.Content = removeCustom(entry.Content, rule.args[0])
} else {
logger.Debug("[Rewrite] Cannot find selector for remove rule %s", rule)
}
case "add_castopod_episode":
- entryContent = addCastopodEpisode(entryURL, entryContent)
+ entry.Content = addCastopodEpisode(entryURL, entry.Content)
case "base64_decode":
if len(rule.args) >= 1 {
- entryContent = applyFuncOnTextContent(entryContent, rule.args[0], decodeBase64Content)
+ entry.Content = applyFuncOnTextContent(entry.Content, rule.args[0], decodeBase64Content)
} else {
- entryContent = applyFuncOnTextContent(entryContent, "body", decodeBase64Content)
+ entry.Content = applyFuncOnTextContent(entry.Content, "body", decodeBase64Content)
}
case "parse_markdown":
- entryContent = parseMarkdown(entryContent)
+ entry.Content = parseMarkdown(entry.Content)
case "remove_tables":
- entryContent = removeTables(entryContent)
+ entry.Content = removeTables(entry.Content)
+ case "remove_clickbait":
+ entry.Title = removeClickbait(entry.Title)
}
-
- return entryContent
}
func getPredefinedRewriteRules(entryURL string) string {
diff --git a/reader/rewrite/rewriter_test.go b/reader/rewrite/rewriter_test.go
index 7d3306b1594..073809df347 100644
--- a/reader/rewrite/rewriter_test.go
+++ b/reader/rewrite/rewriter_test.go
@@ -8,6 +8,8 @@ import (
"reflect"
"strings"
"testing"
+
+ "miniflux.app/model"
)
func TestParseRules(t *testing.T) {
@@ -46,178 +48,301 @@ func TestReplaceTextLinks(t *testing.T) {
}
func TestRewriteWithNoMatchingRule(t *testing.T) {
- output := Rewriter("https://example.org/article", `Some text.`, ``)
- expected := `Some text.`
+ controlEntry := &model.Entry{
+ Title: `A title`,
+ Content: `Some text.`,
+ }
+ testEntry := &model.Entry{
+ Title: `A title`,
+ Content: `Some text.`,
+ }
+ Rewriter("https://example.org/article", testEntry, ``)
- if expected != output {
- t.Errorf(`Not expected output: got "%s" instead of "%s"`, output, expected)
+ if !reflect.DeepEqual(testEntry, controlEntry) {
+ t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry)
}
}
func TestRewriteWithYoutubeLink(t *testing.T) {
- output := Rewriter("https://www.youtube.com/watch?v=1234", "Video Description", ``)
- expected := `
Video Description`
+ controlEntry := &model.Entry{
+ Title: `A title`,
+ Content: `
Video Description`,
+ }
+ testEntry := &model.Entry{
+ Title: `A title`,
+ Content: `Video Description`,
+ }
+ Rewriter("https://www.youtube.com/watch?v=1234", testEntry, ``)
- if expected != output {
- t.Errorf(`Not expected output: got "%s" instead of "%s"`, output, expected)
+ if !reflect.DeepEqual(testEntry, controlEntry) {
+ t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry)
}
}
func TestRewriteWithInexistingCustomRule(t *testing.T) {
- output := Rewriter("https://www.youtube.com/watch?v=1234", `Video Description`, `some rule`)
- expected := `Video Description`
- if expected != output {
- t.Errorf(`Not expected output: got "%s" instead of "%s"`, output, expected)
+ controlEntry := &model.Entry{
+ Title: `A title`,
+ Content: `Video Description`,
+ }
+ testEntry := &model.Entry{
+ Title: `A title`,
+ Content: `Video Description`,
+ }
+ Rewriter("https://www.youtube.com/watch?v=1234", testEntry, `some rule`)
+
+ if !reflect.DeepEqual(testEntry, controlEntry) {
+ t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry)
}
}
func TestRewriteWithXkcdLink(t *testing.T) {
- description := ``
- output := Rewriter("https://xkcd.com/1912/", description, ``)
- expected := ``
- if expected != output {
- t.Errorf(`Not expected output: got "%s" instead of "%s"`, output, expected)
+ controlEntry := &model.Entry{
+ Title: `A title`,
+ Content: ``,
+ }
+ testEntry := &model.Entry{
+ Title: `A title`,
+ Content: ``,
+ }
+ Rewriter("https://xkcd.com/1912/", testEntry, ``)
+
+ if !reflect.DeepEqual(testEntry, controlEntry) {
+ t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry)
}
}
func TestRewriteWithXkcdLinkHtmlInjection(t *testing.T) {
- description := ``
- output := Rewriter("https://xkcd.com/1912/", description, ``)
- expected := ``
- if expected != output {
- t.Errorf(`Not expected output: got "%s" instead of "%s"`, output, expected)
+ controlEntry := &model.Entry{
+ Title: `A title`,
+ Content: ``,
+ }
+ testEntry := &model.Entry{
+ Title: `A title`,
+ Content: ``,
+ }
+ Rewriter("https://xkcd.com/1912/", testEntry, ``)
+
+ if !reflect.DeepEqual(testEntry, controlEntry) {
+ t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry)
}
}
func TestRewriteWithXkcdLinkAndImageNoTitle(t *testing.T) {
- description := ``
- output := Rewriter("https://xkcd.com/1912/", description, ``)
- expected := description
- if expected != output {
- t.Errorf(`Not expected output: got "%s" instead of "%s"`, output, expected)
+ controlEntry := &model.Entry{
+ Title: `A title`,
+ Content: ``,
+ }
+ testEntry := &model.Entry{
+ Title: `A title`,
+ Content: ``,
+ }
+ Rewriter("https://xkcd.com/1912/", testEntry, ``)
+
+ if !reflect.DeepEqual(testEntry, controlEntry) {
+ t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry)
}
}
func TestRewriteWithXkcdLinkAndNoImage(t *testing.T) {
- description := "test"
- output := Rewriter("https://xkcd.com/1912/", description, ``)
- expected := description
- if expected != output {
- t.Errorf(`Not expected output: got "%s" instead of "%s"`, output, expected)
+ controlEntry := &model.Entry{
+ Title: `A title`,
+ Content: `test`,
+ }
+ testEntry := &model.Entry{
+ Title: `A title`,
+ Content: `test`,
+ }
+ Rewriter("https://xkcd.com/1912/", testEntry, ``)
+
+ if !reflect.DeepEqual(testEntry, controlEntry) {
+ t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry)
}
}
func TestRewriteWithXkcdAndNoImage(t *testing.T) {
- description := "test"
- output := Rewriter("https://xkcd.com/1912/", description, ``)
- expected := description
+ controlEntry := &model.Entry{
+ Title: `A title`,
+ Content: `test`,
+ }
+ testEntry := &model.Entry{
+ Title: `A title`,
+ Content: `test`,
+ }
+ Rewriter("https://xkcd.com/1912/", testEntry, ``)
- if expected != output {
- t.Errorf(`Not expected output: got "%s" instead of "%s"`, output, expected)
+ if !reflect.DeepEqual(testEntry, controlEntry) {
+ t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry)
}
}
func TestRewriteMailtoLink(t *testing.T) {
- description := `contact`
- output := Rewriter("https://www.qwantz.com/", description, ``)
- expected := `contact [blah blah]`
- if expected != output {
- t.Errorf(`Not expected output: got "%s" instead of "%s"`, output, expected)
+ controlEntry := &model.Entry{
+ Title: `A title`,
+ Content: `contact [blah blah]`,
+ }
+ testEntry := &model.Entry{
+ Title: `A title`,
+ Content: `contact`,
+ }
+ Rewriter("https://www.qwantz.com/", testEntry, ``)
+
+ if !reflect.DeepEqual(testEntry, controlEntry) {
+ t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry)
}
}
func TestRewriteWithPDFLink(t *testing.T) {
- description := "test"
- output := Rewriter("https://example.org/document.pdf", description, ``)
- expected := `PDF
test`
+ controlEntry := &model.Entry{
+ Title: `A title`,
+ Content: `PDF
test`,
+ }
+ testEntry := &model.Entry{
+ Title: `A title`,
+ Content: `test`,
+ }
+ Rewriter("https://example.org/document.pdf", testEntry, ``)
- if expected != output {
- t.Errorf(`Not expected output: got "%s" instead of "%s"`, output, expected)
+ if !reflect.DeepEqual(testEntry, controlEntry) {
+ t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry)
}
}
func TestRewriteWithNoLazyImage(t *testing.T) {
- description := ``
- output := Rewriter("https://example.org/article", description, "add_dynamic_image")
- expected := description
+ controlEntry := &model.Entry{
+ Title: `A title`,
+ Content: ``,
+ }
+ testEntry := &model.Entry{
+ Title: `A title`,
+ Content: ``,
+ }
+ Rewriter("https://example.org/article", testEntry, "add_dynamic_image")
- if expected != output {
- t.Errorf(`Not expected output: got "%s" instead of "%s"`, output, expected)
+ if !reflect.DeepEqual(testEntry, controlEntry) {
+ t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry)
}
}
func TestRewriteWithLazyImage(t *testing.T) {
- description := ``
- output := Rewriter("https://example.org/article", description, "add_dynamic_image")
- expected := ``
+ controlEntry := &model.Entry{
+ Title: `A title`,
+ Content: ``,
+ }
+ testEntry := &model.Entry{
+ Title: `A title`,
+ Content: ``,
+ }
+ Rewriter("https://example.org/article", testEntry, "add_dynamic_image")
- if expected != output {
- t.Errorf(`Not expected output: got "%s" instead of "%s"`, output, expected)
+ if !reflect.DeepEqual(testEntry, controlEntry) {
+ t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry)
}
}
func TestRewriteWithLazyDivImage(t *testing.T) {
- description := `
Test
|
Test
Hello World!
Test
` - output := Rewriter("https://example.org/article", content, `remove_tables`) + controlEntry := &model.Entry{ + Title: `A title`, + Content: `Test
Hello World!
Test
`, + } + testEntry := &model.Entry{ + Title: `A title`, + Content: `Test
|