diff --git a/src/xml_tokenizer.ml b/src/xml_tokenizer.ml index fd3a64f..298e0da 100644 --- a/src/xml_tokenizer.ml +++ b/src/xml_tokenizer.ml @@ -97,7 +97,10 @@ let tokenize report resolve_reference (input, get_location) = (Printf.sprintf "&#%s%s;" reference_prefix s, "reference", "number out of range")) !throw unresolved - | Some n -> k (char n) + | Some n -> + let utf_8_encoded = Buffer.create 8 in + add_utf_8 utf_8_encoded n; + k (Buffer.contents utf_8_encoded) end | _, c when filter c -> diff --git a/test/test_xml_tokenizer.ml b/test/test_xml_tokenizer.ml index 9d56713..bdca7d2 100644 --- a/test/test_xml_tokenizer.ml +++ b/test/test_xml_tokenizer.ml @@ -311,7 +311,11 @@ let tests = [ ("xml.tokenizer.reference" >:: fun _ -> expect "foo<bar>&"'baz01quux" [ 1, 1, S (`Chars ["foo&\"'baz01quux"]); - 1, 50, S `EOF]); + 1, 50, S `EOF]; + + expect "λ" + [ 1, 1, S (`Chars ["λ"]); + 1, 7, S `EOF]); ("xml.tokenizer.bad-reference" >:: fun _ -> expect "&"