Skip to content

Commit

Permalink
Coradoc::Input::HTML: Fix complex list items
Browse files Browse the repository at this point in the history
This commit fixes handling of complex list items.

Before Coradoc refactor, reverse_adoc used to treat all
children of list elements as inline elements (if I recall correctly)

After Coradoc refactor, we treat all children of list
elements as non-online elements (necessitating line break).

This broke certain assumptions we have while dealing with
BIPM incoming documents. In particular, whenever there was
a subscript involved, we get unexpected line breaks.

This PR aims to correctly handle all inline and non-inline
list children, according to AsciiDoc specification[1].

Since this is quite a large refactor of a crucial part of
documents, this may break a couple of assumptions, so
I would really appreciate some testing before merging that.

[1] https://docs.asciidoctor.org/asciidoc/latest/lists/continuation/
  • Loading branch information
hmdne committed Dec 10, 2024
1 parent 840d3e4 commit 1fddfa1
Show file tree
Hide file tree
Showing 3 changed files with 145 additions and 8 deletions.
63 changes: 57 additions & 6 deletions lib/coradoc/element/list_item.rb
Original file line number Diff line number Diff line change
Expand Up @@ -15,26 +15,77 @@ def initialize(content, options = {})
@line_break = options.fetch(:line_break, "\n")
end

def inline?(elem)
case elem
when Inline::HardLineBreak
:hardbreak
when ->(i){ i.class.name.to_s.include? "::Inline::" }
true
when String, TextElement, Image::InlineImage
true
else
false
end
end

def to_adoc
anchor = @anchor.nil? ? "" : " #{@anchor.to_adoc.to_s} "
# text = Coradoc::Generator.gen_adoc(@content)
content = Array(@content).map do |subitem|
next if subitem.is_a? Inline::HardLineBreak
content = Array(@content).flatten.compact
out = ""
prev_inline = :init

# Collapse meaningless <DIV>s
while content.map(&:class) == [Section] && content.first.safe_to_collapse?
content = Array(content.first.contents)
end

content.each do |subitem|
subcontent = Coradoc::Generator.gen_adoc(subitem)

# Only try to postprocess elements that are text,
# otherwise we could strip markup.
if Coradoc.a_single?(subitem, Coradoc::Element::TextElement)
if subitem.is_a? Coradoc::Element::TextElement
subcontent = Coradoc.strip_unicode(subcontent)
end
subcontent
end.compact.join("\n+\n")

inline = inline?(subitem)

case inline
when true
if prev_inline == false
out += "\n+\n" + subcontent
else
out += subcontent
end
when false
case prev_inline
when :hardbreak
out += subcontent.strip
when :init
out += "{empty}\n+\n" + subcontent.strip
else
out += "\n+\n" + subcontent.strip
end
when :hardbreak
if %i[hardbreak init].include? prev_inline
# can't have two hard breaks in a row; can't start with a hard break
else
out += "\n+\n"
end
end

prev_inline = inline
end
out += "{empty}" if prev_inline == :hardbreak
out = "{empty}" if out.empty?

# attach = Coradoc::Generator.gen_adoc(@attached)
attach = @attached.map do |elem|
"+\n" + Coradoc::Generator.gen_adoc(elem)
end.join
nest = Coradoc::Generator.gen_adoc(@nested)
out = " #{anchor}#{content}#{@line_break}"
out = " #{anchor}#{out}#{@line_break}"
out + attach + nest
end
end
Expand Down
10 changes: 8 additions & 2 deletions spec/coradoc/element/list_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -10,13 +10,19 @@
expect(list.items).to eq(items)
end
it "handles list continuations" do
items2 = Coradoc::Element::ListItem.new(["Item 2a", "Item 2b", "Item 2c"])
items2 = Coradoc::Element::ListItem.new(
[
Coradoc::Element::Paragraph.new("Item 2a"),
Coradoc::Element::Paragraph.new("Item 2b"),
Coradoc::Element::Paragraph.new("Item 2c")
]
)
item1 = Coradoc::Element::ListItem.new("Item 1")
items = [item1, items2]

list = Coradoc::Element::List::Unordered.new(items)

expect(list.to_adoc).to eq("\n\n* Item 1\n* Item 2a\n+\nItem 2b\n+\nItem 2c\n")
expect(list.to_adoc).to eq("\n\n* Item 1\n* {empty}\n+\nItem 2a\n+\nItem 2b\n+\nItem 2c\n")
end
it "handles complex list items" do
items2 = Coradoc::Element::ListItem.new("Item 2\nsecond line\nthird line")
Expand Down
80 changes: 80 additions & 0 deletions spec/coradoc/input/html/components/lists/complex_children_spec.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
require "spec_helper"

describe Coradoc::Element::ListItem do
def input(input, should_convert_to:)
Coradoc::Input::HTML.convert(input).should be == should_convert_to
end

it "should work with simple blocks" do
input "<ul><li>abc</li></ul>", should_convert_to: "* abc\n"
end

it "should not expand inline elements" do
input "<ul><li>abc<b>def</b>ghi</li></ul>",
should_convert_to: "* abc**def**ghi\n"
input "<ul><li>abc<a href='c'>ddd</a>ghi</li></ul>",
should_convert_to: "* abc link:c[ddd]ghi\n"
end

it "should expand non-inline elements like tables" do
input "<ul><li>xx<table><tr><td>test</td></tr></table></li></ul>",
should_convert_to: <<~ADOC
* xx
+
[cols=1]
|===
| test
|===
ADOC
end

it "should prefix non-inline elements with {empty}" do
input "<ul><li><pre>abc</pre></li></ul>",
should_convert_to: <<~ADOC
* {empty}
+
....
abc
....
ADOC
end

it "should not prefix inline elements with {empty}" do
input "<ul><li><b>abc</b></li></ul>",
should_convert_to: <<~ADOC
* *abc*
ADOC
end

it "should replace empty elements with {empty}" do
input "<ul><li></li><li></li><li></li></ul>",
should_convert_to: <<~ADOC
* {empty}
* {empty}
* {empty}
ADOC
end

it "should handle linebreaks like paragraphs" do
input "<ul><li>test<br>test<br>test</li></ul>",
should_convert_to: <<~ADOC
* test
+
test
+
test
ADOC

input "<ul><li><p>test<p>test<p>test</li></ul>",
should_convert_to: <<~ADOC
* {empty}
+
test
+
test
+
test
ADOC
end
end

0 comments on commit 1fddfa1

Please sign in to comment.