Branch data Line data Source code
1 : : /*
2 : : * UPnP XML helper routines
3 : : * Copyright (c) 2000-2003 Intel Corporation
4 : : * Copyright (c) 2006-2007 Sony Corporation
5 : : * Copyright (c) 2008-2009 Atheros Communications
6 : : * Copyright (c) 2009, Jouni Malinen <j@w1.fi>
7 : : *
8 : : * See wps_upnp.c for more details on licensing and code history.
9 : : */
10 : :
11 : : #include "includes.h"
12 : :
13 : : #include "common.h"
14 : : #include "base64.h"
15 : : #include "http.h"
16 : : #include "upnp_xml.h"
17 : :
18 : :
19 : : /*
20 : : * XML parsing and formatting
21 : : *
22 : : * XML is a markup language based on unicode; usually (and in our case,
23 : : * always!) based on utf-8. utf-8 uses a variable number of bytes per
24 : : * character. utf-8 has the advantage that all non-ASCII unicode characters are
25 : : * represented by sequences of non-ascii (high bit set) bytes, whereas ASCII
26 : : * characters are single ascii bytes, thus we can use typical text processing.
27 : : *
28 : : * (One other interesting thing about utf-8 is that it is possible to look at
29 : : * any random byte and determine if it is the first byte of a character as
30 : : * versus a continuation byte).
31 : : *
32 : : * The base syntax of XML uses a few ASCII punctionation characters; any
33 : : * characters that would appear in the payload data are rewritten using
34 : : * sequences, e.g., & for ampersand(&) and < for left angle bracket (<).
35 : : * Five such escapes total (more can be defined but that does not apply to our
36 : : * case). Thus we can safely parse for angle brackets etc.
37 : : *
38 : : * XML describes tree structures of tagged data, with each element beginning
39 : : * with an opening tag <label> and ending with a closing tag </label> with
40 : : * matching label. (There is also a self-closing tag <label/> which is supposed
41 : : * to be equivalent to <label></label>, i.e., no payload, but we are unlikely
42 : : * to see it for our purpose).
43 : : *
44 : : * Actually the opening tags are a little more complicated because they can
45 : : * contain "attributes" after the label (delimited by ascii space or tab chars)
46 : : * of the form attribute_label="value" or attribute_label='value'; as it turns
47 : : * out we do not have to read any of these attributes, just ignore them.
48 : : *
49 : : * Labels are any sequence of chars other than space, tab, right angle bracket
50 : : * (and ?), but may have an inner structure of <namespace><colon><plain_label>.
51 : : * As it turns out, we can ignore the namespaces, in fact we can ignore the
52 : : * entire tree hierarchy, because the plain labels we are looking for will be
53 : : * unique (not in general, but for this application). We do however have to be
54 : : * careful to skip over the namespaces.
55 : : *
56 : : * In generating XML we have to be more careful, but that is easy because
57 : : * everything we do is pretty canned. The only real care to take is to escape
58 : : * any special chars in our payload.
59 : : */
60 : :
61 : : /**
62 : : * xml_next_tag - Advance to next tag
63 : : * @in: Input
64 : : * @out: OUT: start of tag just after '<'
65 : : * @out_tagname: OUT: start of name of tag, skipping namespace
66 : : * @end: OUT: one after tag
67 : : * Returns: 0 on success, 1 on failure
68 : : *
69 : : * A tag has form:
70 : : * <left angle bracket><...><right angle bracket>
71 : : * Within the angle brackets, there is an optional leading forward slash (which
72 : : * makes the tag an ending tag), then an optional leading label (followed by
73 : : * colon) and then the tag name itself.
74 : : *
75 : : * Note that angle brackets present in the original data must have been encoded
76 : : * as < and > so they will not trouble us.
77 : : */
78 : 1449 : int xml_next_tag(const char *in, const char **out,
79 : : const char **out_tagname, const char **end)
80 : : {
81 [ + + ][ + + ]: 15487 : while (*in && *in != '<')
82 : 14038 : in++;
83 [ + + ]: 1449 : if (*in != '<')
84 : 16 : return 1;
85 : 1433 : *out = ++in;
86 [ + + ]: 1433 : if (*in == '/')
87 : 512 : in++;
88 : 1433 : *out_tagname = in; /* maybe */
89 [ + + ][ - + ]: 12720 : while (isalnum(*in) || *in == '-')
90 : 11287 : in++;
91 [ + + ]: 1433 : if (*in == ':')
92 : 225 : *out_tagname = ++in;
93 [ + - ][ + + ]: 15967 : while (*in && *in != '>')
94 : 14534 : in++;
95 [ - + ]: 1433 : if (*in != '>')
96 : 0 : return 1;
97 : 1433 : *end = ++in;
98 : 1449 : return 0;
99 : : }
100 : :
101 : :
102 : : /* xml_data_encode -- format data for xml file, escaping special characters.
103 : : *
104 : : * Note that we assume we are using utf8 both as input and as output!
105 : : * In utf8, characters may be classed as follows:
106 : : * 0xxxxxxx(2) -- 1 byte ascii char
107 : : * 11xxxxxx(2) -- 1st byte of multi-byte char w/ unicode value >= 0x80
108 : : * 110xxxxx(2) -- 1st byte of 2 byte sequence (5 payload bits here)
109 : : * 1110xxxx(2) -- 1st byte of 3 byte sequence (4 payload bits here)
110 : : * 11110xxx(2) -- 1st byte of 4 byte sequence (3 payload bits here)
111 : : * 10xxxxxx(2) -- extension byte (6 payload bits per byte)
112 : : * Some values implied by the above are however illegal because they
113 : : * do not represent unicode chars or are not the shortest encoding.
114 : : * Actually, we can almost entirely ignore the above and just do
115 : : * text processing same as for ascii text.
116 : : *
117 : : * XML is written with arbitrary unicode characters, except that five
118 : : * characters have special meaning and so must be escaped where they
119 : : * appear in payload data... which we do here.
120 : : */
121 : 24 : void xml_data_encode(struct wpabuf *buf, const char *data, int len)
122 : : {
123 : : int i;
124 [ + + ]: 304 : for (i = 0; i < len; i++) {
125 : 280 : u8 c = ((u8 *) data)[i];
126 [ - + ]: 280 : if (c == '<') {
127 : 0 : wpabuf_put_str(buf, "<");
128 : 0 : continue;
129 : : }
130 [ - + ]: 280 : if (c == '>') {
131 : 0 : wpabuf_put_str(buf, ">");
132 : 0 : continue;
133 : : }
134 [ - + ]: 280 : if (c == '&') {
135 : 0 : wpabuf_put_str(buf, "&");
136 : 0 : continue;
137 : : }
138 [ - + ]: 280 : if (c == '\'') {
139 : 0 : wpabuf_put_str(buf, "'");
140 : 0 : continue;
141 : : }
142 [ - + ]: 280 : if (c == '"') {
143 : 0 : wpabuf_put_str(buf, """);
144 : 0 : continue;
145 : : }
146 : : /*
147 : : * We could try to represent control characters using the
148 : : * sequence: &#x; where x is replaced by a hex numeral, but not
149 : : * clear why we would do this.
150 : : */
151 : 280 : wpabuf_put_u8(buf, c);
152 : : }
153 : 24 : }
154 : :
155 : :
156 : : /* xml_add_tagged_data -- format tagged data as a new xml line.
157 : : *
158 : : * tag must not have any special chars.
159 : : * data may have special chars, which are escaped.
160 : : */
161 : 20 : void xml_add_tagged_data(struct wpabuf *buf, const char *tag, const char *data)
162 : : {
163 : 20 : wpabuf_printf(buf, "<%s>", tag);
164 : 20 : xml_data_encode(buf, data, os_strlen(data));
165 : 20 : wpabuf_printf(buf, "</%s>\n", tag);
166 : 20 : }
167 : :
168 : :
169 : : /* A POST body looks something like (per upnp spec):
170 : : * <?xml version="1.0"?>
171 : : * <s:Envelope
172 : : * xmlns:s="http://schemas.xmlsoap.org/soap/envelope/"
173 : : * s:encodingStyle="http://schemas.xmlsoap.org/soap/encoding/">
174 : : * <s:Body>
175 : : * <u:actionName xmlns:u="urn:schemas-upnp-org:service:serviceType:v">
176 : : * <argumentName>in arg value</argumentName>
177 : : * other in args and their values go here, if any
178 : : * </u:actionName>
179 : : * </s:Body>
180 : : * </s:Envelope>
181 : : *
182 : : * where :
183 : : * s: might be some other namespace name followed by colon
184 : : * u: might be some other namespace name followed by colon
185 : : * actionName will be replaced according to action requested
186 : : * schema following actionName will be WFA scheme instead
187 : : * argumentName will be actual argument name
188 : : * (in arg value) will be actual argument value
189 : : */
190 : 134 : char * xml_get_first_item(const char *doc, const char *item)
191 : : {
192 : 134 : const char *match = item;
193 : 134 : int match_len = os_strlen(item);
194 : : const char *tag, *tagname, *end;
195 : : char *value;
196 : :
197 : : /*
198 : : * This is crude: ignore any possible tag name conflicts and go right
199 : : * to the first tag of this name. This should be ok for the limited
200 : : * domain of UPnP messages.
201 : : */
202 : : for (;;) {
203 [ + + ]: 1413 : if (xml_next_tag(doc, &tag, &tagname, &end))
204 : 16 : return NULL;
205 : 1397 : doc = end;
206 [ + + ][ + - ]: 1397 : if (!os_strncasecmp(tagname, match, match_len) &&
207 [ - + ]: 118 : *tag != '/' &&
208 [ # # ]: 0 : (tagname[match_len] == '>' ||
209 : 0 : !isgraph(tagname[match_len]))) {
210 : : break;
211 : : }
212 : 1279 : }
213 : 118 : end = doc;
214 [ + - ][ + + ]: 15408 : while (*end && *end != '<')
215 : 15290 : end++;
216 : 118 : value = os_zalloc(1 + (end - doc));
217 [ - + ]: 118 : if (value == NULL)
218 : 0 : return NULL;
219 : 118 : os_memcpy(value, doc, end - doc);
220 : 134 : return value;
221 : : }
222 : :
223 : :
224 : 62 : struct wpabuf * xml_get_base64_item(const char *data, const char *name,
225 : : enum http_reply_code *ret)
226 : : {
227 : : char *msg;
228 : : struct wpabuf *buf;
229 : : unsigned char *decoded;
230 : : size_t len;
231 : :
232 : 62 : msg = xml_get_first_item(data, name);
233 [ - + ]: 62 : if (msg == NULL) {
234 : 0 : *ret = UPNP_ARG_VALUE_INVALID;
235 : 0 : return NULL;
236 : : }
237 : :
238 : 62 : decoded = base64_decode((unsigned char *) msg, os_strlen(msg), &len);
239 : 62 : os_free(msg);
240 [ - + ]: 62 : if (decoded == NULL) {
241 : 0 : *ret = UPNP_OUT_OF_MEMORY;
242 : 0 : return NULL;
243 : : }
244 : :
245 : 62 : buf = wpabuf_alloc_ext_data(decoded, len);
246 [ - + ]: 62 : if (buf == NULL) {
247 : 0 : os_free(decoded);
248 : 0 : *ret = UPNP_OUT_OF_MEMORY;
249 : 0 : return NULL;
250 : : }
251 : 62 : return buf;
252 : : }
|