diff --git a/Gemfile.devel b/Gemfile.devel new file mode 100644 index 00000000..e15cb95d --- /dev/null +++ b/Gemfile.devel @@ -0,0 +1 @@ +gem "html2doc", git: "https://github.com/metanorma/html2doc", branch: "main" diff --git a/lib/isodoc/function/inline.rb b/lib/isodoc/function/inline.rb index 1202a2fc..94bfb4ec 100644 --- a/lib/isodoc/function/inline.rb +++ b/lib/isodoc/function/inline.rb @@ -87,12 +87,14 @@ def stem_parse(node, out) MATHML = { "m" => "http://www.w3.org/1998/Math/MathML" }.freeze def mathml_parse(node) - node.at("./m:math", MATHML)&.to_xml + # node.xpath("./m:math", MATHML).map(&:to_xml).join + node.xpath(ns("./asciimath | ./latexmath")).each(&:remove) + node.xpath(ns("./br")).each { |e| e.namespace = nil } + node.elements end def asciimath_parse(node) a = node.at(ns("./asciimath"))&.text || node.text - "#{@openmathdelim}#{HTMLEntities.new.encode(a)}" \ "#{@closemathdelim}" end diff --git a/lib/isodoc/html_function/html.rb b/lib/isodoc/html_function/html.rb index 114a3f01..22e3fa85 100644 --- a/lib/isodoc/html_function/html.rb +++ b/lib/isodoc/html_function/html.rb @@ -15,6 +15,11 @@ def convert1(docxml, filename, dir) end.join("\n") end + def preprocess_xslt(docxml) + + super + end + def make_body1(body, _docxml) return if @bare diff --git a/lib/isodoc/presentation_function/bibdata.rb b/lib/isodoc/presentation_function/bibdata.rb index 98b91568..2087d128 100644 --- a/lib/isodoc/presentation_function/bibdata.rb +++ b/lib/isodoc/presentation_function/bibdata.rb @@ -3,84 +3,10 @@ module IsoDoc class PresentationXMLConvert < ::IsoDoc::Convert def bibdata(docxml) - toc_metadata(docxml) - fonts_metadata(docxml) - preprocess_xslt_insert(docxml) docid_prefixes(docxml) a = bibdata_current(docxml) or return address_precompose(a) bibdata_i18n(a) - a.next = - "#{i8n_name(trim_hash(@i18n.get), '').join}" \ - "" - end - - def extension_insert(xml, path = []) - ins = extension_insert_pt(xml) - path.each do |n| - ins = ins.at(ns("./#{n}")) || ins.add_child("<#{n}/>").first - end - ins - end - - def extension_insert_pt(xml) - xml.at(ns("//metanorma-extension")) || - xml.at(ns("//bibdata"))&.after("") - &.next_element || - xml.root.elements.first.before("") - .previous_element - end - - def preprocess_xslt_insert(docxml) - content = "" - p = passthrough_xslt and content += p - p = preprocess_xslt_read and content += File.read(p) - content.empty? and return - ins = extension_insert(docxml, %w(render)) - ins << content - end - - def passthrough_xslt - @output_formats.nil? and return nil - @output_formats.empty? and return nil - @output_formats.each_key.with_object([]) do |k, m| - m << <<~XSLT - - - - - - - - - - - - - - - - - - XSLT - end.join("\n") - end - - # read in from file, but with `` wrapper - def preprocess_xslt_read - html_doc_path("preprocess.xslt") - end - - def toc_metadata(docxml) - @tocfigures || @toctables || @tocrecommendations or return - ins = extension_insert(docxml) - @tocfigures and - ins << "#{@i18n.toc_figures}" - @toctables and - ins << "#{@i18n.toc_tables}" - @tocfigures and - ins << "#{@i18n.toc_recommendations}" \ - "" end def address_precompose(bib) @@ -92,26 +18,6 @@ def address_precompose(bib) end end - def fonts_metadata(xmldoc) - ins = presmeta_insert_pt(xmldoc) - @fontist_fonts and CSV.parse_line(@fontist_fonts, col_sep: ";") - .map(&:strip).reverse.each do |f| - ins.next = presmeta("fonts", f) - end - @fontlicenseagreement and - ins.next = presmeta("font-license-agreement", @fontlicenseagreement) - end - - def presmeta_insert_pt(xmldoc) - xmldoc.at(ns("//presentation-metadata")) || - xmldoc.at(ns("//metanorma-extension")) || xmldoc.at(ns("//bibdata")) - end - - def presmeta(name, value) - "#{name}#{value}" \ - "" - end - def address_precompose1(addr) ret = [] addr.xpath(ns("./street")).each { |s| ret << to_xml(s.children) } @@ -176,67 +82,5 @@ def tag_translate(tag, lang, value) tag.next["language"] = lang tag.next.children = value end - - def i18n_tag(key, value) - "#{value}" \ - "" - end - - def i18n_safe(key) - key.to_s.gsub(/\s|\./, "_") - end - - def i8n_name(hash, pref) - case hash - when Hash then i8n_name1(hash, pref) - when Array - hash.reject { |a| blank?(a) }.each_with_object([]) - .with_index do |(v1, g), i| - i8n_name(v1, "#{i18n_safe(k)}.#{i}").each { |x| g << x } - end - else [i18n_tag(pref, hash)] - end - end - - def i8n_name1(hash, pref) - hash.reject { |_k, v| blank?(v) }.each_with_object([]) do |(k, v), g| - case v - when Hash then i8n_name(v, i18n_safe(k)).each { |x| g << x } - when Array - v.reject { |a| blank?(a) }.each_with_index do |v1, i| - i8n_name(v1, "#{i18n_safe(k)}.#{i}").each { |x| g << x } - end - else - g << i18n_tag("#{pref}#{pref.empty? ? '' : '.'}#{i18n_safe(k)}", v) - end - end - end - - # https://stackoverflow.com/a/31822406 - def blank?(elem) - elem.nil? || (elem.respond_to?(:empty?) && elem.empty?) - end - - def trim_hash(hash) - loop do - h_new = trim_hash1(hash) - break hash if hash == h_new - - hash = h_new - end - end - - def trim_hash1(hash) - hash.is_a?(Hash) or return hash - hash.each_with_object({}) do |(k, v), g| - blank?(v) and next - g[k] = case v - when Hash then trim_hash1(hash[k]) - when Array - hash[k].map { |a| trim_hash1(a) }.reject { |a| blank?(a) } - else v - end - end - end end end diff --git a/lib/isodoc/presentation_function/math.rb b/lib/isodoc/presentation_function/math.rb index fd468b60..34c9c84f 100644 --- a/lib/isodoc/presentation_function/math.rb +++ b/lib/isodoc/presentation_function/math.rb @@ -7,8 +7,9 @@ class PresentationXMLConvert < ::IsoDoc::Convert MATHML = { "m" => "http://www.w3.org/1998/Math/MathML" }.freeze def mathml(docxml) + docxml.xpath("//m:math", MATHML).each { |f| mathml_linebreak(f) } locale = twitter_cldr_localiser - docxml.xpath("//m:math", MATHML).each do |f| + docxml.xpath("//m:math", MATHML).each do |f| # rubocop:disable Style/CombinableLoops mathml1(f, locale) end end @@ -77,8 +78,7 @@ def twitter_cldr_localiser end def parse_localize_number - return {} unless @localizenumber - + @localizenumber or return {} m = %r{(?[^#])?(?#+0)(?.)(?#+)(?[^#])?} .match(@localizenumber) or return {} ret = { decimal: m[:decimal], group_digits: m[:groupdigits].size, @@ -90,17 +90,13 @@ def parse_localize_number end def asciimath_dup(node) - return if @suppressasciimathdup || node.parent.at(ns("./asciimath")) - + @suppressasciimathdup || node.parent.at(ns("./asciimath")) and return math = node.to_xml.gsub(/ xmlns=["'][^"']+["']/, "") .gsub(%r{<[^:/>]+:}, "<").gsub(%r{]+:}, "#{ret}" + node.next = "#{@c.encode(ret, :basic)}" rescue StandardError => e - warn "Failure to convert MathML to AsciiMath" - warn node.parent.to_xml - warn e + warn "Failure to convert MathML to AsciiMath\n#{node.parent.to_xml}\n#{e}" end def maths_just_numeral(node) @@ -114,6 +110,23 @@ def maths_just_numeral(node) def mathml1(node, locale) mathml_style_inherit(node) + mathml_number(node, locale) + end + + def mathml_linebreak(node) + node.at(".//*/@linebreak") or return + m = Plurimath::Math.parse(node.to_xml, :mathml) + .to_mathml(split_on_linebreak: true) + ret = Nokogiri::XML("#{m}").root + ret.elements.each_with_index do |e, i| + i.zero? or e.previous = "
" + end + node.replace(<<~OUTPUT) + #{ret.children}#{node.to_xml} + OUTPUT + end + + def mathml_number(node, locale) justnumeral = node.elements.size == 1 && node.elements.first.name == "mn" justnumeral or asciimath_dup(node) localize_maths(node, locale) diff --git a/lib/isodoc/presentation_function/metadata.rb b/lib/isodoc/presentation_function/metadata.rb new file mode 100644 index 00000000..37162347 --- /dev/null +++ b/lib/isodoc/presentation_function/metadata.rb @@ -0,0 +1,184 @@ +module IsoDoc + class PresentationXMLConvert < ::IsoDoc::Convert + def metadata(docxml) + toc_metadata(docxml) + fonts_metadata(docxml) + preprocess_xslt_insert(docxml) + a = docxml.at(ns("//bibdata")) or return + a.next = + "#{i8n_name(trim_hash(@i18n.get), '').join}" \ + "" + end + + def extension_insert(xml, path = []) + ins = extension_insert_pt(xml) + path.each do |n| + ins = ins.at(ns("./#{n}")) || ins.add_child("<#{n}/>").first + end + ins + end + + def extension_insert_pt(xml) + xml.at(ns("//metanorma-extension")) || + xml.at(ns("//bibdata"))&.after("") + &.next_element || + xml.root.elements.first.before("") + .previous_element + end + + def toc_metadata(docxml) + @tocfigures || @toctables || @tocrecommendations or return + ins = extension_insert(docxml) + @tocfigures and + ins << "#{@i18n.toc_figures}" + @toctables and + ins << "#{@i18n.toc_tables}" + @tocfigures and + ins << "#{@i18n.toc_recommendations}" \ + "" + end + + def fonts_metadata(xmldoc) + ins = presmeta_insert_pt(xmldoc) + @fontist_fonts and CSV.parse_line(@fontist_fonts, col_sep: ";") + .map(&:strip).reverse.each do |f| + ins.next = presmeta("fonts", f) + end + @fontlicenseagreement and + ins.next = presmeta("font-license-agreement", @fontlicenseagreement) + end + + def presmeta_insert_pt(xmldoc) + xmldoc.at(ns("//presentation-metadata")) || + xmldoc.at(ns("//metanorma-extension")) || xmldoc.at(ns("//bibdata")) + end + + def presmeta(name, value) + "#{name}#{value}" \ + "" + end + + def preprocess_xslt_insert(docxml) + content = "" + p = passthrough_xslt and content += p + p = preprocess_xslt_read and content += File.read(p) + content.empty? and return + ins = extension_insert(docxml, %w(render)) + ins << content + end + + COPY_XSLT = + ''.freeze + COPY_CHILDREN_XSLT = + ''.freeze + + def xslt_template(content) + <<~XSLT + + + #{COPY_XSLT} + #{content} + + XSLT + end + + def passthrough_xslt + @output_formats.nil? and return nil + @output_formats.empty? and return nil + @output_formats.each_key.with_object([]) do |k, m| + m << <<~XSLT + + #{xslt_template(<<~XSLT1) + + + #{COPY_XSLT} + + + XSLT1 + } + + XSLT + m << <<~XSLT + + #{xslt_template(<<~XSLT1) + + #{k == 'pdf' ? COPY_CHILDREN_XSLT : ''} + + + #{k == 'pdf' ? '' : COPY_CHILDREN_XSLT} + + XSLT1 + } + + XSLT + end.join("\n") + end + + # read in from file, but with `` wrapper + def preprocess_xslt_read + html_doc_path("preprocess.xslt") + end + + def i18n_tag(key, value) + "#{value}" \ + "" + end + + def i18n_safe(key) + key.to_s.gsub(/\s|\./, "_") + end + + def i8n_name(hash, pref) + case hash + when Hash then i8n_name1(hash, pref) + when Array + hash.reject { |a| blank?(a) }.each_with_object([]) + .with_index do |(v1, g), i| + i8n_name(v1, "#{i18n_safe(k)}.#{i}").each { |x| g << x } + end + else [i18n_tag(pref, hash)] + end + end + + def i8n_name1(hash, pref) + hash.reject { |_k, v| blank?(v) }.each_with_object([]) do |(k, v), g| + case v + when Hash then i8n_name(v, i18n_safe(k)).each { |x| g << x } + when Array + v.reject { |a| blank?(a) }.each_with_index do |v1, i| + i8n_name(v1, "#{i18n_safe(k)}.#{i}").each { |x| g << x } + end + else + g << i18n_tag("#{pref}#{pref.empty? ? '' : '.'}#{i18n_safe(k)}", v) + end + end + end + + # https://stackoverflow.com/a/31822406 + def blank?(elem) + elem.nil? || (elem.respond_to?(:empty?) && elem.empty?) + end + + def trim_hash(hash) + loop do + h_new = trim_hash1(hash) + break hash if hash == h_new + + hash = h_new + end + end + + def trim_hash1(hash) + hash.is_a?(Hash) or return hash + hash.each_with_object({}) do |(k, v), g| + blank?(v) and next + g[k] = case v + when Hash then trim_hash1(hash[k]) + when Array + hash[k].map { |a| trim_hash1(a) }.reject { |a| blank?(a) } + else v + end + end + end + end +end diff --git a/lib/isodoc/presentation_xml_convert.rb b/lib/isodoc/presentation_xml_convert.rb index 289fd083..866fffa7 100644 --- a/lib/isodoc/presentation_xml_convert.rb +++ b/lib/isodoc/presentation_xml_convert.rb @@ -8,6 +8,7 @@ require_relative "presentation_function/math" require_relative "presentation_function/section" require_relative "presentation_function/bibdata" +require_relative "presentation_function/metadata" module IsoDoc class PresentationXMLConvert < ::IsoDoc::Convert @@ -35,6 +36,7 @@ def bibitem_lookup(docxml) def conversions(docxml) semantic_xml_insert(docxml) + metadata docxml bibdata docxml @xrefs.parse docxml section docxml diff --git a/spec/isodoc/blocks_notes_spec.rb b/spec/isodoc/blocks_notes_spec.rb new file mode 100644 index 00000000..45a08ce5 --- /dev/null +++ b/spec/isodoc/blocks_notes_spec.rb @@ -0,0 +1,710 @@ +require "spec_helper" + +RSpec.describe IsoDoc do + it "processes unlabelled notes" do + input = <<~INPUT + + + +

These results are based on a study carried out on three different types of kernel.

+
+ +

These results are based on a study carried out on three different types of kernel.

+
+
+
+ INPUT + presxml = <<~OUTPUT + + + + + Table of contents + + + + NOTE 1 +

+ These results are based on a study carried out on three different + types of kernel. +

+
+ +

+ These results are based on a study carried out on three different + types of kernel. +

+
+
+
+
+ OUTPUT + html = <<~OUTPUT + #{HTML_HDR} +
+
+

Foreword

+
+

+ NOTE 1 +   These results are based on a study carried out on three + different types of kernel. +

+
+
+

+   These results are based on a study carried out on three + different types of kernel. +

+
+
+ + + + OUTPUT + doc = <<~OUTPUT + +