blob: 1102e6b1b8d363f503acb607c6c7b97757c9ea5f [file] [log] [blame]
adamdunkelsca9ddcb2003-03-19 14:13:31 +00001/*
2 * Copyright (c) 2002, Adam Dunkels.
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above
11 * copyright notice, this list of conditions and the following
12 * disclaimer in the documentation and/or other materials provided
13 * with the distribution.
adamdunkels06f897e2004-06-06 05:59:20 +000014 * 3. The name of the author may not be used to endorse or promote
adamdunkelsca9ddcb2003-03-19 14:13:31 +000015 * products derived from this software without specific prior
16 * written permission.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
19 * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
20 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
22 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
24 * GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
25 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
26 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
27 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
28 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 *
30 * This file is part of the Contiki desktop environment
31 *
oliverschmidt35514d52005-06-12 23:44:29 +000032 * $Id: htmlparser.c,v 1.11 2005/06/12 23:44:29 oliverschmidt Exp $
adamdunkelsca9ddcb2003-03-19 14:13:31 +000033 *
34 */
35
36/* htmlparser.c:
37 *
38 * Implements a very simplistic HTML parser. It recognizes HTML links
39 * (<a href>-tags), HTML img alt tags, a few text flow break tags
40G * (<br>, <p>, <h>), the <li> tag (but does not even try to
41 * distinguish between <ol> or <ul>) as well as HTML comment tags
42 * (<!-- -->).
43 *
44 * To save memory, the HTML parser is state machine driver, which
45 * means that it will shave off one character from the HTML page,
46 * process that character, and return to the next. Another way of
47 * doing it would be to buffer a number of characters and process them
48 * together.
49 *
50 * The main function in this file is the htmlparser_parse() function
51 * which takes a htmlparser_state structur and a part of an HTML file
52 * as an argument. The htmlparser_parse() function will call the
53 * helper functions parse_char() and parse_tag(). Those functions will
54 * in turn call the two callback functions htmlparser_char() and
55 * htmlparser_tag(). Those functions must be implemented by the using
56 * module (e.g., a web browser program).
57 *
58 * htmlparser_char() will be called for every non-tag character.
59 *
60 * htmlparser_tag() will be called whenever a full tag has been found.
61 *
62 */
63
64
65#include "htmlparser.h"
66#include "html-strings.h"
67#include "www-conf.h"
adamdunkels9d1aaef2003-04-05 12:21:37 +000068#include "cc.h"
adamdunkelsca9ddcb2003-03-19 14:13:31 +000069
oliverschmidt3901c232004-06-13 09:48:32 +000070#include <string.h>
71
adamdunkelsca9ddcb2003-03-19 14:13:31 +000072#if 1
oliverschmidte6280c92005-05-20 21:53:21 +000073#define PRINTF(x)
adamdunkelsca9ddcb2003-03-19 14:13:31 +000074#else
75#include <stdio.h>
76#define PRINTF(x) printf x
77#endif
78
adamdunkelsca9ddcb2003-03-19 14:13:31 +000079
80/*-----------------------------------------------------------------------------------*/
81#define ISO_A 0x41
82#define ISO_B 0x42
83#define ISO_E 0x45
84#define ISO_F 0x46
85#define ISO_G 0x47
86#define ISO_H 0x48
87#define ISO_I 0x49
88#define ISO_L 0x4c
89#define ISO_M 0x4d
90#define ISO_P 0x50
91#define ISO_R 0x52
92#define ISO_T 0x54
93
94#define ISO_a (ISO_A | 0x20)
95#define ISO_b (ISO_B | 0x20)
96#define ISO_e (ISO_E | 0x20)
97#define ISO_f (ISO_F | 0x20)
98#define ISO_g (ISO_G | 0x20)
99#define ISO_h (ISO_H | 0x20)
100#define ISO_i (ISO_I | 0x20)
101#define ISO_l (ISO_L | 0x20)
102#define ISO_m (ISO_M | 0x20)
103#define ISO_p (ISO_P | 0x20)
104#define ISO_r (ISO_R | 0x20)
105#define ISO_t (ISO_T | 0x20)
106
107#define ISO_ht 0x09
108#define ISO_nl 0x0a
109#define ISO_cr 0x0d
110#define ISO_space 0x20
111#define ISO_bang 0x21
112#define ISO_citation 0x22
113#define ISO_ampersand 0x26
114#define ISO_citation2 0x27
115#define ISO_asterisk 0x2a
116#define ISO_dash 0x2d
117#define ISO_slash 0x2f
118#define ISO_semicolon 0x3b
119#define ISO_lt 0x3c
120#define ISO_eq 0x3d
121#define ISO_gt 0x3e
122
123#define ISO_rbrack 0x5b
124#define ISO_lbrack 0x5d
125
126#define MINORSTATE_NONE 0
127#define MINORSTATE_TEXT 1 /* Parse normal text */
128#define MINORSTATE_EXTCHAR 2 /* Check for semi-colon */
129#define MINORSTATE_TAG 3 /* Check for name of tag. */
130#define MINORSTATE_TAGEND 4 /* Scan for end of tag. */
131#define MINORSTATE_TAGATTR 5 /* Parse tag attr. */
132#define MINORSTATE_TAGATTRSPACE 6 /* Parse optional space after tag
133 attr. */
134#define MINORSTATE_TAGATTRPARAM 7 /* Parse tag attr parameter. */
135#define MINORSTATE_TAGATTRPARAMNQ 8 /* Parse tag attr parameter without
136 quotation marks. */
137#define MINORSTATE_HTMLCOMMENT 9 /* Scan for HTML comment end */
138
139#define MAJORSTATE_NONE 0
140#define MAJORSTATE_BODY 1
141#define MAJORSTATE_LINK 2
142#define MAJORSTATE_FORM 3
143#define MAJORSTATE_DISCARD 4
144
145
146struct htmlparser_state {
adamdunkels269d7be2004-09-03 09:55:22 +0000147
adamdunkelsca9ddcb2003-03-19 14:13:31 +0000148 unsigned char minorstate;
149 char tag[20];
150 unsigned char tagptr;
151 char tagattr[20];
152 unsigned char tagattrptr;
153 char tagattrparam[WWW_CONF_MAX_URLLEN];
154 unsigned char tagattrparamptr;
155 unsigned char lastchar, quotechar;
156 unsigned char majorstate, lastmajorstate;
157 char linkurl[WWW_CONF_MAX_URLLEN];
adamdunkels269d7be2004-09-03 09:55:22 +0000158
oliverschmidt7eb29812005-05-20 20:49:30 +0000159 char word[WWW_CONF_WEBPAGE_WIDTH];
160 unsigned char wordlen;
adamdunkels269d7be2004-09-03 09:55:22 +0000161
adamdunkelsca9ddcb2003-03-19 14:13:31 +0000162#if WWW_CONF_FORMS
163 char formaction[WWW_CONF_MAX_FORMACTIONLEN];
164 char formname[WWW_CONF_MAX_FORMNAMELEN];
165 unsigned char inputtype;
166 char inputname[WWW_CONF_MAX_INPUTNAMELEN];
167 char inputvalue[WWW_CONF_MAX_INPUTVALUELEN];
168 unsigned char inputvaluesize;
169#endif /* WWW_CONF_FORMS */
170};
171
172static struct htmlparser_state s;
173
174/*-----------------------------------------------------------------------------------*/
175static char last[1] = {0xff};
176
adamdunkels6f6d88c2003-09-04 19:33:05 +0000177static const char *tags[] = {
adamdunkelsca9ddcb2003-03-19 14:13:31 +0000178#define TAG_FIRST 0
179#define TAG_SLASHA 0
180 html_slasha,
181#define TAG_SLASHCENTER 1
182 html_slashcenter,
183#define TAG_SLASHFORM 2
184 html_slashform,
185#define TAG_SLASHH 3
186 html_slashh,
187#define TAG_SLASHSCRIPT 4
188 html_slashscript,
189#define TAG_SLASHSELECT 5
190 html_slashselect,
191#define TAG_SLASHSTYLE 6
192 html_slashstyle,
193#define TAG_A 7
194 html_a,
195#define TAG_BODY 8
196 html_body,
197#define TAG_BR 9
198 html_br,
199#define TAG_CENTER 10
200 html_center,
201#define TAG_FORM 11
202 html_form,
203#define TAG_FRAME 12
204 html_frame,
205#define TAG_H1 13
206 html_h1,
207#define TAG_H2 14
208 html_h2,
209#define TAG_H3 15
210 html_h3,
211#define TAG_H4 16
212 html_h4,
213#define TAG_IMG 17
214 html_img,
215#define TAG_INPUT 18
216 html_input,
217#define TAG_LI 19
218 html_li,
219#define TAG_P 20
220 html_p,
221#define TAG_SCRIPT 21
222 html_script,
223#define TAG_SELECT 22
224 html_select,
225#define TAG_STYLE 23
226 html_style,
227#define TAG_TR 24
228 html_tr,
229#define TAG_LAST 25
230 last,
231};
232
233/*-----------------------------------------------------------------------------------*/
adamdunkels9d1aaef2003-04-05 12:21:37 +0000234static unsigned char CC_FASTCALL
adamdunkelsca9ddcb2003-03-19 14:13:31 +0000235iswhitespace(char c)
236{
237 return (c == ISO_space ||
238 c == ISO_nl ||
239 c == ISO_cr ||
240 c == ISO_ht);
241}
242/*-----------------------------------------------------------------------------------*/
adamdunkels269d7be2004-09-03 09:55:22 +0000243void
244htmlparser_init(void)
245{
246 s.majorstate = s.lastmajorstate = MAJORSTATE_DISCARD;
247 s.minorstate = MINORSTATE_TEXT;
248 s.lastchar = 0;
249}
250/*-----------------------------------------------------------------------------------*/
251static char CC_FASTCALL
252lowercase(char c)
253{
254 /* XXX: This is a *brute force* approach to lower-case
255 converting and should *not* be used anywhere else! It
256 works for our purposes, however (i.e., HTML tags). */
257 if(c > 0x40) {
258 return (c & 0x1f) | 0x60;
259 } else {
260 return c;
261 }
262}
263/*-----------------------------------------------------------------------------------*/
264static void
265endtagfound(void)
266{
267 s.tag[s.tagptr] = 0;
268 s.tagattr[s.tagattrptr] = 0;
269 s.tagattrparam[s.tagattrparamptr] = 0;
270}
271/*-----------------------------------------------------------------------------------*/
272static void CC_FASTCALL
273switch_majorstate(unsigned char newstate)
274{
275 if(s.majorstate != newstate) {
276 PRINTF(("Switching state from %d to %d (%d)\n", s.majorstate, newstate, s.lastmajorstate));
277 s.lastmajorstate = s.majorstate;
278 s.majorstate = newstate;
279 }
280}
281/*-----------------------------------------------------------------------------------*/
282static void CC_FASTCALL
283add_char(unsigned char c)
284{
oliverschmidt7eb29812005-05-20 20:49:30 +0000285 if(s.wordlen < WWW_CONF_WEBPAGE_WIDTH - 1 && c < 0x80) {
adamdunkels269d7be2004-09-03 09:55:22 +0000286 s.word[s.wordlen] = c;
287 ++s.wordlen;
adamdunkels269d7be2004-09-03 09:55:22 +0000288 }
289}
290/*-----------------------------------------------------------------------------------*/
291static void
292do_word(void)
293{
294 if(s.wordlen > 0) {
295 if(s.majorstate == MAJORSTATE_LINK) {
296 if(s.word[s.wordlen] != ISO_space) {
297 add_char(ISO_space);
298 }
299 } else if(s.majorstate == MAJORSTATE_DISCARD) {
300 s.wordlen = 0;
301 } else {
302 s.word[s.wordlen] = '\0';
303 htmlparser_word(s.word, s.wordlen);
304 s.wordlen = 0;
305 }
306 }
307}
308/*-----------------------------------------------------------------------------------*/
309static void
310newline(void)
311{
312 do_word();
313 htmlparser_newline();
314}
315/*-----------------------------------------------------------------------------------*/
adamdunkels9d1aaef2003-04-05 12:21:37 +0000316static unsigned char CC_FASTCALL
adamdunkelsca9ddcb2003-03-19 14:13:31 +0000317find_tag(char *tag)
318{
319 static unsigned char first, last, i, tabi;
320 static char tagc;
321
adamdunkels269d7be2004-09-03 09:55:22 +0000322 first = TAG_FIRST;
adamdunkelsca9ddcb2003-03-19 14:13:31 +0000323 last = TAG_LAST;
324 i = 0;
325
326 do {
327 tagc = tag[i];
328
329 if(tagc == 0 &&
330 tags[first][i] == 0) {
331 return first;
332 }
adamdunkels269d7be2004-09-03 09:55:22 +0000333
334 tabi = first;
adamdunkelsca9ddcb2003-03-19 14:13:31 +0000335
336 /* First, find first matching tag from table. */
337 while(tagc > (tags[tabi])[i] &&
338 tabi < last) {
339 ++tabi;
340 }
341 first = tabi;
342
343 /* Second, find last matching tag from table. */
344 while(tagc == (tags[tabi])[i] &&
345 tabi < last) {
346 ++tabi;
347 }
348 last = tabi;
349
adamdunkels269d7be2004-09-03 09:55:22 +0000350 /* If first and last matching tags are equal, we have a non-match
351 and return. Else we continue with the next character. */
adamdunkelsca9ddcb2003-03-19 14:13:31 +0000352 ++i;
adamdunkels269d7be2004-09-03 09:55:22 +0000353
adamdunkelsca9ddcb2003-03-19 14:13:31 +0000354 } while(last != first);
355 return TAG_LAST;
356}
357/*-----------------------------------------------------------------------------------*/
adamdunkelsca9ddcb2003-03-19 14:13:31 +0000358static void
359parse_tag(void)
360{
361 static char *tagattrparam;
oliverschmidt35514d52005-06-12 23:44:29 +0000362 static unsigned char size;
adamdunkels269d7be2004-09-03 09:55:22 +0000363
364 static char dummy;
adamdunkelsca9ddcb2003-03-19 14:13:31 +0000365
366 PRINTF(("Parsing tag '%s' '%s' '%s'\n",
367 s.tag, s.tagattr, s.tagattrparam));
368
369 switch(find_tag(s.tag)) {
370 case TAG_P:
371 case TAG_H1:
372 case TAG_H2:
373 case TAG_H3:
374 case TAG_H4:
adamdunkels269d7be2004-09-03 09:55:22 +0000375 /* parse_char(ISO_nl);*/
376 newline();
adamdunkelsca9ddcb2003-03-19 14:13:31 +0000377 /* FALLTHROUGH */
378 case TAG_BR:
379 case TAG_TR:
380 case TAG_SLASHH:
adamdunkels269d7be2004-09-03 09:55:22 +0000381 /* parse_char(ISO_nl);*/
382 dummy = 0;
383 newline();
adamdunkelsca9ddcb2003-03-19 14:13:31 +0000384 break;
385 case TAG_LI:
adamdunkels269d7be2004-09-03 09:55:22 +0000386 newline();
387 add_char(ISO_asterisk);
388 add_char(ISO_space);
adamdunkelsca9ddcb2003-03-19 14:13:31 +0000389 break;
390 case TAG_SCRIPT:
391 case TAG_STYLE:
392 case TAG_SELECT:
393 switch_majorstate(MAJORSTATE_DISCARD);
394 break;
395 case TAG_SLASHSCRIPT:
396 case TAG_SLASHSTYLE:
397 case TAG_SLASHSELECT:
oliverschmidt3c50b3f2005-05-20 21:49:54 +0000398 do_word();
adamdunkelsca9ddcb2003-03-19 14:13:31 +0000399 switch_majorstate(s.lastmajorstate);
400 break;
401 case TAG_BODY:
402 s.majorstate = s.lastmajorstate = MAJORSTATE_BODY;
403 break;
404 case TAG_FRAME:
405 if(strncmp(s.tagattr, html_src, sizeof(html_src)) == 0 &&
406 s.tagattrparam[0] != 0) {
407 switch_majorstate(MAJORSTATE_BODY);
adamdunkels269d7be2004-09-03 09:55:22 +0000408 newline();
409 add_char(ISO_rbrack);
410 do_word();
411 htmlparser_link((char *)html_frame, strlen(html_frame), s.tagattrparam);
adamdunkelsca9ddcb2003-03-19 14:13:31 +0000412 PRINTF(("Frame [%s]\n", s.tagattrparam));
adamdunkels269d7be2004-09-03 09:55:22 +0000413 add_char(ISO_lbrack);
414 newline();
adamdunkelsca9ddcb2003-03-19 14:13:31 +0000415 }
416 break;
417 case TAG_IMG:
418 if(strncmp(s.tagattr, html_alt, sizeof(html_alt)) == 0 &&
419 s.tagattrparam[0] != 0) {
adamdunkels269d7be2004-09-03 09:55:22 +0000420 /* parse_char(ISO_lt);*/
421 add_char(ISO_lt);
adamdunkelsca9ddcb2003-03-19 14:13:31 +0000422 tagattrparam = &s.tagattrparam[0];
423 while(*tagattrparam) {
adamdunkels269d7be2004-09-03 09:55:22 +0000424 /* parse_char(*tagattrparam);*/
425 add_char(*tagattrparam);
adamdunkelsca9ddcb2003-03-19 14:13:31 +0000426 ++tagattrparam;
427 }
adamdunkels269d7be2004-09-03 09:55:22 +0000428 /* parse_char(ISO_gt);*/
429 add_char(ISO_gt);
430 do_word();
adamdunkelsca9ddcb2003-03-19 14:13:31 +0000431 }
432 break;
433 case TAG_A:
434 PRINTF(("A %s %s\n", s.tagattr, s.tagattrparam));
435 if(strncmp(s.tagattr, html_href, sizeof(html_href)) == 0 &&
436 s.tagattrparam[0] != 0) {
437 strcpy(s.linkurl, s.tagattrparam);
adamdunkels269d7be2004-09-03 09:55:22 +0000438 do_word();
adamdunkelsca9ddcb2003-03-19 14:13:31 +0000439 switch_majorstate(MAJORSTATE_LINK);
adamdunkelsca9ddcb2003-03-19 14:13:31 +0000440 }
441 break;
442 case TAG_SLASHA:
443 if(s.majorstate == MAJORSTATE_LINK) {
444 switch_majorstate(s.lastmajorstate);
adamdunkels269d7be2004-09-03 09:55:22 +0000445 s.word[s.wordlen] = 0;
446 htmlparser_link(s.word, s.wordlen, s.linkurl);
447 s.wordlen = 0;
adamdunkelsca9ddcb2003-03-19 14:13:31 +0000448 }
449 break;
450#if WWW_CONF_FORMS
451 case TAG_FORM:
452 PRINTF(("Form tag\n"));
453 switch_majorstate(MAJORSTATE_FORM);
454 if(strncmp(s.tagattr, html_action, sizeof(html_action)) == 0) {
455 PRINTF(("Form action '%s'\n", s.tagattrparam));
456 strncpy(s.formaction, s.tagattrparam, WWW_CONF_MAX_FORMACTIONLEN - 1);
457 } else if(strncmp(s.tagattr, html_name, sizeof(html_name)) == 0) {
458 PRINTF(("Form name '%s'\n", s.tagattrparam));
459 strncpy(s.formname, s.tagattrparam, WWW_CONF_MAX_FORMNAMELEN - 1);
460 }
461 s.inputname[0] = s.inputvalue[0] = 0;
462 break;
463 case TAG_SLASHFORM:
464 switch_majorstate(MAJORSTATE_BODY);
465 s.formaction[0] = s.formname[0] = 0;
466 break;
467 case TAG_INPUT:
468 if(s.majorstate == MAJORSTATE_FORM) {
469 /* First check if we are called at the end of an input tag. If
470 so, we should render the input widget. */
471 if(s.tagattr[0] == 0 &&
472 s.inputname[0] != 0) {
473 PRINTF(("Render input type %d\n", s.inputtype));
474 switch(s.inputtype) {
475 case HTMLPARSER_INPUTTYPE_NONE:
476 case HTMLPARSER_INPUTTYPE_TEXT:
oliverschmidt35514d52005-06-12 23:44:29 +0000477 s.inputvalue[s.inputvaluesize] = 0;
478 htmlparser_inputfield(s.inputvaluesize, s.inputvalue, s.inputname,
adamdunkelsca9ddcb2003-03-19 14:13:31 +0000479 s.formname, s.formaction);
480 break;
481 case HTMLPARSER_INPUTTYPE_SUBMIT:
adamdunkelsfe6cd592003-08-09 13:29:53 +0000482 case HTMLPARSER_INPUTTYPE_IMAGE:
adamdunkelsca9ddcb2003-03-19 14:13:31 +0000483 htmlparser_submitbutton(s.inputvalue, s.inputname,
484 s.formname, s.formaction);
485 break;
486 }
487 s.inputtype = HTMLPARSER_INPUTTYPE_NONE;
488 } else {
489 PRINTF(("Input '%s' '%s'\n", s.tagattr, s.tagattrparam));
490 if(strncmp(s.tagattr, html_type, sizeof(html_type)) == 0) {
491 if(strncmp(s.tagattrparam, html_submit,
492 sizeof(html_submit)) == 0) {
493 s.inputtype = HTMLPARSER_INPUTTYPE_SUBMIT;
494 } else if(strncmp(s.tagattrparam, html_image,
495 sizeof(html_image)) == 0) {
496 s.inputtype = HTMLPARSER_INPUTTYPE_IMAGE;
497 } else if(strncmp(s.tagattrparam, html_text,
498 sizeof(html_text)) == 0) {
499 s.inputtype = HTMLPARSER_INPUTTYPE_TEXT;
500 } else {
501 s.inputtype = HTMLPARSER_INPUTTYPE_OTHER;
502 }
503 } else if(strncmp(s.tagattr, html_name,
504 sizeof(html_name)) == 0) {
505 strncpy(s.inputname, s.tagattrparam,
506 WWW_CONF_MAX_INPUTNAMELEN);
507 } else if(strncmp(s.tagattr, html_alt,
508 sizeof(html_alt)) == 0 &&
509 s.inputtype == HTMLPARSER_INPUTTYPE_IMAGE) {
510 strncpy(s.inputvalue, s.tagattrparam,
511 WWW_CONF_MAX_INPUTVALUELEN);
512 } else if(strncmp(s.tagattr, html_value,
513 sizeof(html_value)) == 0) {
514 strncpy(s.inputvalue, s.tagattrparam,
515 WWW_CONF_MAX_INPUTVALUELEN);
516 } else if(strncmp(s.tagattr, html_size,
517 sizeof(html_size)) == 0) {
518 size = 0;
519 if(s.tagattrparam[0] >= '0' &&
520 s.tagattrparam[0] <= '9') {
521 size = s.tagattrparam[0] - '0';
522 if(s.tagattrparam[1] >= '0' &&
523 s.tagattrparam[1] <= '9') {
524 size = size * 10 + (s.tagattrparam[1] - '0');
525 }
526 }
527 if(size >= WWW_CONF_MAX_INPUTVALUELEN) {
528 size = WWW_CONF_MAX_INPUTVALUELEN - 1;
529 }
530 s.inputvaluesize = size;
531 /* strncpy(s.inputvalue, s.tagattrparam,
532 WWW_CONF_MAX_INPUTVALUELEN);*/
533 }
534 }
535
536 }
537 break;
538#endif /* WWW_CONF_FORMS */
539#if WWW_CONF_RENDERSTATE
540 case TAG_CENTER:
adamdunkels269d7be2004-09-03 09:55:22 +0000541 /* parse_char(ISO_nl); */
542 newline();
adamdunkelsca9ddcb2003-03-19 14:13:31 +0000543 htmlparser_renderstate(HTMLPARSER_RENDERSTATE_BEGIN |
544 HTMLPARSER_RENDERSTATE_CENTER);
545 break;
546 case TAG_SLASHCENTER:
adamdunkels269d7be2004-09-03 09:55:22 +0000547 /* parse_char(ISO_nl);*/
548 newline();
adamdunkelsca9ddcb2003-03-19 14:13:31 +0000549 htmlparser_renderstate(HTMLPARSER_RENDERSTATE_END |
550 HTMLPARSER_RENDERSTATE_CENTER);
551 break;
552#endif /* WWW_CONF_RENDERSTATE */
553 }
554}
555/*-----------------------------------------------------------------------------------*/
adamdunkels269d7be2004-09-03 09:55:22 +0000556static u16_t
557parse_word(char *data, u8_t dlen)
adamdunkelsca9ddcb2003-03-19 14:13:31 +0000558{
adamdunkels269d7be2004-09-03 09:55:22 +0000559 static u8_t i;
560 static u8_t len;
561 unsigned char c;
562
563 len = dlen;
564
565 switch(s.minorstate) {
566 case MINORSTATE_TEXT:
567 for(i = 0; i < len; ++i) {
568 c = data[i];
569 if(iswhitespace(c)) {
570 do_word();
571 } else if(c == ISO_lt) {
adamdunkelsca9ddcb2003-03-19 14:13:31 +0000572 s.minorstate = MINORSTATE_TAG;
573 s.tagptr = 0;
adamdunkels269d7be2004-09-03 09:55:22 +0000574 /* do_word();*/
575 break;
adamdunkelsca9ddcb2003-03-19 14:13:31 +0000576 } else if(c == ISO_ampersand) {
577 s.minorstate = MINORSTATE_EXTCHAR;
adamdunkels269d7be2004-09-03 09:55:22 +0000578 break;
adamdunkelsca9ddcb2003-03-19 14:13:31 +0000579 } else {
adamdunkels269d7be2004-09-03 09:55:22 +0000580 add_char(c);
adamdunkelsca9ddcb2003-03-19 14:13:31 +0000581 }
adamdunkels269d7be2004-09-03 09:55:22 +0000582 }
583 break;
584 case MINORSTATE_EXTCHAR:
585 for(i = 0; i < len; ++i) {
586 c = data[i];
adamdunkelsca9ddcb2003-03-19 14:13:31 +0000587 if(c == ISO_semicolon) {
588 s.minorstate = MINORSTATE_TEXT;
adamdunkels269d7be2004-09-03 09:55:22 +0000589 add_char(' ');
590 break;
adamdunkelsca9ddcb2003-03-19 14:13:31 +0000591 } else if(iswhitespace(c)) {
592 s.minorstate = MINORSTATE_TEXT;
adamdunkels269d7be2004-09-03 09:55:22 +0000593 add_char('&');
594 add_char(' ');
595 break;
adamdunkelsca9ddcb2003-03-19 14:13:31 +0000596 }
adamdunkels269d7be2004-09-03 09:55:22 +0000597 }
598 break;
599 case MINORSTATE_TAG:
600 /* We are currently parsing within the name of a tag. We check
601 for the end of a tag (the '>' character) or whitespace (which
602 indicates that we should parse a tag attr argument
603 instead). */
604 for(i = 0; i < len; ++i) {
605 c = data[i];
adamdunkelsca9ddcb2003-03-19 14:13:31 +0000606 if(c == ISO_gt) {
607 /* Full tag found. We continue parsing regular text. */
608 s.minorstate = MINORSTATE_TEXT;
609 s.tagattrptr = s.tagattrparamptr = 0;
610 endtagfound();
611 parse_tag();
adamdunkels269d7be2004-09-03 09:55:22 +0000612 break;
adamdunkelsca9ddcb2003-03-19 14:13:31 +0000613 } else if(iswhitespace(c)) {
614 /* The name of the tag found. We continue parsing the tag
615 attr.*/
616 s.minorstate = MINORSTATE_TAGATTR;
617 s.tagattrptr = 0;
618 endtagfound();
adamdunkels269d7be2004-09-03 09:55:22 +0000619 break;
adamdunkelsca9ddcb2003-03-19 14:13:31 +0000620 } else {
621 /* Keep track of the name of the tag, but convert it to
622 lower case. */
adamdunkels269d7be2004-09-03 09:55:22 +0000623
adamdunkelsca9ddcb2003-03-19 14:13:31 +0000624 s.tag[s.tagptr] = lowercase(c);
625 ++s.tagptr;
626 /* Check if the ->tag field is full. If so, we just eat up
627 any data left in the tag. */
628 if(s.tagptr == sizeof(s.tag)) {
629 s.minorstate = MINORSTATE_TAGEND;
adamdunkels269d7be2004-09-03 09:55:22 +0000630 break;
adamdunkelsca9ddcb2003-03-19 14:13:31 +0000631 }
632 }
adamdunkels269d7be2004-09-03 09:55:22 +0000633
adamdunkelsca9ddcb2003-03-19 14:13:31 +0000634 /* Check for HTML comment, indicated by <!-- */
635 if(s.tagptr == 3 &&
636 s.tag[0] == ISO_bang &&
637 s.tag[1] == ISO_dash &&
638 s.tag[2] == ISO_dash) {
639 PRINTF(("Starting comment...\n"));
640 s.minorstate = MINORSTATE_HTMLCOMMENT;
641 s.tagptr = 0;
642 endtagfound();
adamdunkels269d7be2004-09-03 09:55:22 +0000643 break;
644 }
645 }
646 break;
647 case MINORSTATE_TAGATTR:
648 /* We parse the "tag attr", i.e., the "href" in <a
649 href="...">. */
650 for(i = 0; i < len; ++i) {
651 c = data[i];
adamdunkelsca9ddcb2003-03-19 14:13:31 +0000652 if(c == ISO_gt) {
653 /* Full tag found. */
654 s.minorstate = MINORSTATE_TEXT;
655 s.tagattrparamptr = 0;
656 s.tagattrptr = 0;
657 endtagfound();
658 parse_tag();
659 s.tagptr = 0;
660 endtagfound();
adamdunkels269d7be2004-09-03 09:55:22 +0000661 break;
adamdunkelsca9ddcb2003-03-19 14:13:31 +0000662 } else if(iswhitespace(c)) {
663 if(s.tagattrptr == 0) {
664 /* Discard leading spaces. */
665 } else {
666 /* A non-leading space is the end of the attribute. */
667 s.tagattrparamptr = 0;
668 endtagfound();
669 parse_tag();
670 s.minorstate = MINORSTATE_TAGATTRSPACE;
adamdunkels269d7be2004-09-03 09:55:22 +0000671 break;
adamdunkelsca9ddcb2003-03-19 14:13:31 +0000672 /* s.tagattrptr = 0;
673 endtagfound();*/
674 }
675 } else if(c == ISO_eq) {
676 s.minorstate = MINORSTATE_TAGATTRPARAMNQ;
677 s.tagattrparamptr = 0;
678 endtagfound();
adamdunkels269d7be2004-09-03 09:55:22 +0000679 break;
adamdunkelsca9ddcb2003-03-19 14:13:31 +0000680 } else {
681 s.tagattr[s.tagattrptr] = lowercase(c);
682 ++s.tagattrptr;
683 /* Check if the "tagattr" field is full. If so, we just eat
684 up any data left in the tag. */
685 if(s.tagattrptr == sizeof(s.tagattr)) {
686 s.minorstate = MINORSTATE_TAGEND;
adamdunkels269d7be2004-09-03 09:55:22 +0000687 break;
adamdunkelsca9ddcb2003-03-19 14:13:31 +0000688 }
689 }
adamdunkels269d7be2004-09-03 09:55:22 +0000690 }
691 break;
692 case MINORSTATE_TAGATTRSPACE:
693 for(i = 0; i < len; ++i) {
694 c = data[i];
adamdunkelsca9ddcb2003-03-19 14:13:31 +0000695 if(iswhitespace(c)) {
696 /* Discard spaces. */
697 } else if(c == ISO_eq) {
698 s.minorstate = MINORSTATE_TAGATTRPARAMNQ;
699 s.tagattrparamptr = 0;
700 endtagfound();
701 parse_tag();
adamdunkels269d7be2004-09-03 09:55:22 +0000702 break;
adamdunkelsca9ddcb2003-03-19 14:13:31 +0000703 } else {
704 s.tagattr[0] = lowercase(c);
705 s.tagattrptr = 1;
706 s.minorstate = MINORSTATE_TAGATTR;
adamdunkels269d7be2004-09-03 09:55:22 +0000707 break;
adamdunkelsca9ddcb2003-03-19 14:13:31 +0000708 }
adamdunkels269d7be2004-09-03 09:55:22 +0000709 }
710 break;
711 case MINORSTATE_TAGATTRPARAMNQ:
712 /* We are parsing the "tag attr parameter", i.e., the link part
713 in <a href="link">. */
714 for(i = 0; i < len; ++i) {
715 c = data[i];
adamdunkelsca9ddcb2003-03-19 14:13:31 +0000716 if(c == ISO_gt) {
717 /* Full tag found. */
718 endtagfound();
719 parse_tag();
720 s.minorstate = MINORSTATE_TEXT;
721 s.tagattrptr = 0;
722 endtagfound();
adamdunkels269d7be2004-09-03 09:55:22 +0000723 parse_tag();
adamdunkelsca9ddcb2003-03-19 14:13:31 +0000724 s.tagptr = 0;
725 endtagfound();
adamdunkels269d7be2004-09-03 09:55:22 +0000726 break;
adamdunkelsca9ddcb2003-03-19 14:13:31 +0000727 } else if(iswhitespace(c) &&
728 s.tagattrparamptr == 0) {
729 /* Discard leading spaces. */
730 } else if((c == ISO_citation ||
731 c == ISO_citation2) &&
732 s.tagattrparamptr == 0) {
733 s.minorstate = MINORSTATE_TAGATTRPARAM;
734 s.quotechar = c;
735 PRINTF(("tag attr param q found\n"));
adamdunkels269d7be2004-09-03 09:55:22 +0000736 break;
adamdunkelsca9ddcb2003-03-19 14:13:31 +0000737 } else if(iswhitespace(c)) {
738 PRINTF(("Non-leading space found at %d\n",
739 s.tagattrparamptr));
740 /* Stop parsing if a non-leading space was found */
741 endtagfound();
742 parse_tag();
743
744 s.minorstate = MINORSTATE_TAGATTR;
745 s.tagattrptr = 0;
746 endtagfound();
adamdunkels269d7be2004-09-03 09:55:22 +0000747 break;
adamdunkelsca9ddcb2003-03-19 14:13:31 +0000748 } else {
749 s.tagattrparam[s.tagattrparamptr] = c;
750 ++s.tagattrparamptr;
751 /* Check if the "tagattr" field is full. If so, we just eat
752 up any data left in the tag. */
753 if(s.tagattrparamptr >= sizeof(s.tagattrparam) - 1) {
754 s.minorstate = MINORSTATE_TAGEND;
adamdunkels269d7be2004-09-03 09:55:22 +0000755 break;
adamdunkelsca9ddcb2003-03-19 14:13:31 +0000756 }
757 }
adamdunkels269d7be2004-09-03 09:55:22 +0000758 }
759 break;
760 case MINORSTATE_TAGATTRPARAM:
761 /* We are parsing the "tag attr parameter", i.e., the link
762 part in <a href="link">. */
763 for(i = 0; i < len; ++i) {
764 c = data[i];
adamdunkelsca9ddcb2003-03-19 14:13:31 +0000765 if(c == s.quotechar) {
766 /* Found end of tag attr parameter. */
767 endtagfound();
768 parse_tag();
adamdunkels269d7be2004-09-03 09:55:22 +0000769
adamdunkelsca9ddcb2003-03-19 14:13:31 +0000770 s.minorstate = MINORSTATE_TAGATTR;
771 s.tagattrptr = 0;
772 endtagfound();
adamdunkels269d7be2004-09-03 09:55:22 +0000773 break;
adamdunkelsca9ddcb2003-03-19 14:13:31 +0000774 } else {
775 if(iswhitespace(c)) {
adamdunkels269d7be2004-09-03 09:55:22 +0000776 s.tagattrparam[s.tagattrparamptr] = ISO_space;
777 } else {
778 s.tagattrparam[s.tagattrparamptr] = c;
adamdunkelsca9ddcb2003-03-19 14:13:31 +0000779 }
adamdunkels269d7be2004-09-03 09:55:22 +0000780
adamdunkelsca9ddcb2003-03-19 14:13:31 +0000781 ++s.tagattrparamptr;
782 /* Check if the "tagattr" field is full. If so, we just eat
783 up any data left in the tag. */
784 if(s.tagattrparamptr >= sizeof(s.tagattrparam) - 1) {
785 s.minorstate = MINORSTATE_TAGEND;
adamdunkels269d7be2004-09-03 09:55:22 +0000786 break;
adamdunkelsca9ddcb2003-03-19 14:13:31 +0000787 }
788 }
adamdunkels269d7be2004-09-03 09:55:22 +0000789 }
790 break;
791 case MINORSTATE_HTMLCOMMENT:
792 for(i = 0; i < len; ++i) {
793 c = data[i];
adamdunkelsca9ddcb2003-03-19 14:13:31 +0000794 if(c == ISO_dash) {
795 ++s.tagptr;
796 } else if(c == ISO_gt && s.tagptr > 0) {
797 PRINTF(("Comment done.\n"));
798 s.minorstate = MINORSTATE_TEXT;
adamdunkels269d7be2004-09-03 09:55:22 +0000799 break;
adamdunkelsca9ddcb2003-03-19 14:13:31 +0000800 } else {
801 s.tagptr = 0;
802 }
adamdunkels269d7be2004-09-03 09:55:22 +0000803 }
804 break;
805 case MINORSTATE_TAGEND:
806 /* Discard characters until a '>' is seen. */
807 for(i = 0; i < len; ++i) {
808 if(data[i] == ISO_gt) {
adamdunkelsca9ddcb2003-03-19 14:13:31 +0000809 s.minorstate = MINORSTATE_TEXT;
810 s.tagattrptr = 0;
811 endtagfound();
812 parse_tag();
adamdunkels269d7be2004-09-03 09:55:22 +0000813 break;
adamdunkelsca9ddcb2003-03-19 14:13:31 +0000814 }
adamdunkelsca9ddcb2003-03-19 14:13:31 +0000815 }
adamdunkels269d7be2004-09-03 09:55:22 +0000816 break;
817 default:
818 i = 0;
819 break;
adamdunkelsca9ddcb2003-03-19 14:13:31 +0000820 }
adamdunkels269d7be2004-09-03 09:55:22 +0000821 if(i >= len) {
822 return len;
823 }
824 return i + 1;
825}
826/*-----------------------------------------------------------------------------------*/
827void
828htmlparser_parse(char *data, u16_t datalen)
829{
830 u16_t plen;
831
832 while(datalen > 0) {
833 if(datalen > 255) {
834 plen = parse_word(data, 255);
835 } else {
836 plen = parse_word(data, datalen);
837 }
838 datalen -= plen;
839 data += plen;
840 }
adamdunkelsca9ddcb2003-03-19 14:13:31 +0000841}
842/*-----------------------------------------------------------------------------------*/