-
Notifications
You must be signed in to change notification settings - Fork 1
/
xml2struct.m
192 lines (166 loc) · 6.17 KB
/
xml2struct.m
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
function outStruct = xml2struct(input)
%XML2STRUCT converts xml file into a MATLAB structure
%
% outStruct = xml2struct2(input)
%
% xml2struct2 takes either a java xml object, an xml file, or a string in
% xml format as input and returns a parsed xml tree in structure.
%
% Please note that the following characters are substituted
% '-' by '_dash_', ':' by '_colon_' and '.' by '_dot_'
%
% Originally written by W. Falkena, ASTI, TUDelft, 21-08-2010
% Attribute parsing speed increase by 40% by A. Wanner, 14-6-2011
% Added CDATA support by I. Smirnov, 20-3-2012
% Modified by X. Mo, University of Wisconsin, 12-5-2012
% Modified by Chao-Yuan Yeh, August 2016
errorMsg = ['%s is not in a supported format.\n\nInput has to be',...
' a java xml object, an xml file, or a string in xml format.'];
% check if input is a java xml object
if isa(input, 'org.apache.xerces.dom.DeferredDocumentImpl') ||...
isa(input, 'org.apache.xerces.dom.DeferredElementImpl')
xDoc = input;
else
try
if exist(input, 'file') == 2
xDoc = xmlread(input);
else
try
xDoc = xmlFromString(input);
catch
error(errorMsg, inputname(1));
end
end
catch ME
if strcmp(ME.identifier, 'MATLAB:UndefinedFunction')
error(errorMsg, inputname(1));
else
rethrow(ME)
end
end
end
% parse xDoc into a MATLAB structure
outStruct = parseChildNodes(xDoc);
end
% ----- Local function parseChildNodes -----
function [children, ptext, textflag] = parseChildNodes(theNode)
% Recurse over node children.
children = struct;
ptext = struct;
textflag = 'Text';
if theNode.hasChildNodes
childNodes = theNode.getChildNodes;
numChildNodes = childNodes.getLength;
for count = 1:numChildNodes
theChild = childNodes.item(count-1);
[text, name, attr, childs, textflag] = getNodeData(theChild);
if ~strcmp(name,'#text') && ~strcmp(name,'#comment') && ...
~strcmp(name,'#cdata_dash_section')
% XML allows the same elements to be defined multiple times,
% put each in a different cell
if (isfield(children,name))
if (~iscell(children.(name)))
% put existsing element into cell format
children.(name) = {children.(name)};
end
index = length(children.(name))+1;
% add new element
children.(name){index} = childs;
textfields = fieldnames(text);
if ~isempty(textfields)
for ii = 1:length(textfields)
children.(name){index}.(textfields{ii}) = ...
text.(textfields{ii});
end
end
if(~isempty(attr))
children.(name){index}.('Attributes') = attr;
end
else
% add previously unknown (new) element to the structure
children.(name) = childs;
% add text data ( ptext returned by child node )
textfields = fieldnames(text);
if ~isempty(textfields)
for ii = 1:length(textfields)
children.(name).(textfields{ii}) = text.(textfields{ii});
end
end
if(~isempty(attr))
children.(name).('Attributes') = attr;
end
end
else
ptextflag = 'Text';
if (strcmp(name, '#cdata_dash_section'))
ptextflag = 'CDATA';
elseif (strcmp(name, '#comment'))
ptextflag = 'Comment';
end
% this is the text in an element (i.e., the parentNode)
if (~isempty(regexprep(text.(textflag),'[\s]*','')))
if (~isfield(ptext,ptextflag) || isempty(ptext.(ptextflag)))
ptext.(ptextflag) = text.(textflag);
else
% This is what happens when document is like this:
% <element>Text <!--Comment--> More text</element>
%
% text will be appended to existing ptext
ptext.(ptextflag) = [ptext.(ptextflag) text.(textflag)];
end
end
end
end
end
end
% ----- Local function getNodeData -----
function [text,name,attr,childs,textflag] = getNodeData(theNode)
% Create structure of node info.
%make sure name is allowed as structure name
name = char(theNode.getNodeName);
name = strrep(name, '-', '_');
name = strrep(name, ':', '_');
name = strrep(name, '.', '_');
%name = strrep(name, '_', 'u_');
attr = parseAttributes(theNode);
if (isempty(fieldnames(attr)))
attr = [];
end
%parse child nodes
[childs, text, textflag] = parseChildNodes(theNode);
% Get data from any childless nodes. This version is faster than below.
if isempty(fieldnames(childs)) && isempty(fieldnames(text))
text.(textflag) = char(theNode.getTextContent);
end
% This alterative to the above 'if' block will also work but very slowly.
% if any(strcmp(methods(theNode),'getData'))
% text.(textflag) = char(getData(theNode));
% end
end
% ----- Local function parseAttributes -----
function attributes = parseAttributes(theNode)
% Create attributes structure.
attributes = struct;
if theNode.hasAttributes
theAttributes = theNode.getAttributes;
numAttributes = theAttributes.getLength;
for count = 1:numAttributes
% Suggestion of Adrian Wanner
str = char((theAttributes.item(count-1)).toString);
k = strfind(str,'=');
attr_name = str(1:(k(1)-1));
attr_name = strrep(attr_name, '-', '_');
attr_name = strrep(attr_name, ':', '_');
attr_name = strrep(attr_name, '.', '_');
attributes.(attr_name) = str((k(1)+2):(end-1));
end
end
end
% ----- Local function xmlFromString -----
function xmlroot = xmlFromString(iString)
import org.xml.sax.InputSource
import java.io.*
iSource = InputSource();
iSource.setCharacterStream(StringReader(iString));
xmlroot = xmlread(iSource);
end