blob: 1ee41d1cfbd84c96476d633edf4014825899ca1b [file] [log] [blame]
adamdunkelsca9ddcb2003-03-19 14:13:31 +00001/*
2 * Copyright (c) 2002, Adam Dunkels.
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above
11 * copyright notice, this list of conditions and the following
12 * disclaimer in the documentation and/or other materials provided
13 * with the distribution.
14 * 3. All advertising materials mentioning features or use of this
15 * software must display the following acknowledgement:
16 * This product includes software developed by Adam Dunkels.
17 * 4. The name of the author may not be used to endorse or promote
18 * products derived from this software without specific prior
19 * written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
22 * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
23 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
25 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
27 * GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
29 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
30 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
31 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32 *
33 * This file is part of the Contiki desktop environment
34 *
35 * $Id: htmlparser.c,v 1.1 2003/03/19 14:13:33 adamdunkels Exp $
36 *
37 */
38
39/* htmlparser.c:
40 *
41 * Implements a very simplistic HTML parser. It recognizes HTML links
42 * (<a href>-tags), HTML img alt tags, a few text flow break tags
43G * (<br>, <p>, <h>), the <li> tag (but does not even try to
44 * distinguish between <ol> or <ul>) as well as HTML comment tags
45 * (<!-- -->).
46 *
47 * To save memory, the HTML parser is state machine driver, which
48 * means that it will shave off one character from the HTML page,
49 * process that character, and return to the next. Another way of
50 * doing it would be to buffer a number of characters and process them
51 * together.
52 *
53 * The main function in this file is the htmlparser_parse() function
54 * which takes a htmlparser_state structur and a part of an HTML file
55 * as an argument. The htmlparser_parse() function will call the
56 * helper functions parse_char() and parse_tag(). Those functions will
57 * in turn call the two callback functions htmlparser_char() and
58 * htmlparser_tag(). Those functions must be implemented by the using
59 * module (e.g., a web browser program).
60 *
61 * htmlparser_char() will be called for every non-tag character.
62 *
63 * htmlparser_tag() will be called whenever a full tag has been found.
64 *
65 */
66
67
68#include "htmlparser.h"
69#include "html-strings.h"
70#include "www-conf.h"
71
72#if 1
73#define PRINTF(x)
74#else
75#include <stdio.h>
76#define PRINTF(x) printf x
77#endif
78
79#ifdef WITH_CC65
80#define FASTCALL __fastcall__
81#else
82#define FASTCALL
83#endif
84
85/*-----------------------------------------------------------------------------------*/
86#define ISO_A 0x41
87#define ISO_B 0x42
88#define ISO_E 0x45
89#define ISO_F 0x46
90#define ISO_G 0x47
91#define ISO_H 0x48
92#define ISO_I 0x49
93#define ISO_L 0x4c
94#define ISO_M 0x4d
95#define ISO_P 0x50
96#define ISO_R 0x52
97#define ISO_T 0x54
98
99#define ISO_a (ISO_A | 0x20)
100#define ISO_b (ISO_B | 0x20)
101#define ISO_e (ISO_E | 0x20)
102#define ISO_f (ISO_F | 0x20)
103#define ISO_g (ISO_G | 0x20)
104#define ISO_h (ISO_H | 0x20)
105#define ISO_i (ISO_I | 0x20)
106#define ISO_l (ISO_L | 0x20)
107#define ISO_m (ISO_M | 0x20)
108#define ISO_p (ISO_P | 0x20)
109#define ISO_r (ISO_R | 0x20)
110#define ISO_t (ISO_T | 0x20)
111
112#define ISO_ht 0x09
113#define ISO_nl 0x0a
114#define ISO_cr 0x0d
115#define ISO_space 0x20
116#define ISO_bang 0x21
117#define ISO_citation 0x22
118#define ISO_ampersand 0x26
119#define ISO_citation2 0x27
120#define ISO_asterisk 0x2a
121#define ISO_dash 0x2d
122#define ISO_slash 0x2f
123#define ISO_semicolon 0x3b
124#define ISO_lt 0x3c
125#define ISO_eq 0x3d
126#define ISO_gt 0x3e
127
128#define ISO_rbrack 0x5b
129#define ISO_lbrack 0x5d
130
131#define MINORSTATE_NONE 0
132#define MINORSTATE_TEXT 1 /* Parse normal text */
133#define MINORSTATE_EXTCHAR 2 /* Check for semi-colon */
134#define MINORSTATE_TAG 3 /* Check for name of tag. */
135#define MINORSTATE_TAGEND 4 /* Scan for end of tag. */
136#define MINORSTATE_TAGATTR 5 /* Parse tag attr. */
137#define MINORSTATE_TAGATTRSPACE 6 /* Parse optional space after tag
138 attr. */
139#define MINORSTATE_TAGATTRPARAM 7 /* Parse tag attr parameter. */
140#define MINORSTATE_TAGATTRPARAMNQ 8 /* Parse tag attr parameter without
141 quotation marks. */
142#define MINORSTATE_HTMLCOMMENT 9 /* Scan for HTML comment end */
143
144#define MAJORSTATE_NONE 0
145#define MAJORSTATE_BODY 1
146#define MAJORSTATE_LINK 2
147#define MAJORSTATE_FORM 3
148#define MAJORSTATE_DISCARD 4
149
150
151struct htmlparser_state {
152 unsigned char minorstate;
153 char tag[20];
154 unsigned char tagptr;
155 char tagattr[20];
156 unsigned char tagattrptr;
157 char tagattrparam[WWW_CONF_MAX_URLLEN];
158 unsigned char tagattrparamptr;
159 unsigned char lastchar, quotechar;
160 unsigned char majorstate, lastmajorstate;
161 char linkurl[WWW_CONF_MAX_URLLEN];
162 char linktext[40];
163 unsigned char linktextptr;
164#if WWW_CONF_FORMS
165 char formaction[WWW_CONF_MAX_FORMACTIONLEN];
166 char formname[WWW_CONF_MAX_FORMNAMELEN];
167 unsigned char inputtype;
168 char inputname[WWW_CONF_MAX_INPUTNAMELEN];
169 char inputvalue[WWW_CONF_MAX_INPUTVALUELEN];
170 unsigned char inputvaluesize;
171#endif /* WWW_CONF_FORMS */
172};
173
174static struct htmlparser_state s;
175
176/*-----------------------------------------------------------------------------------*/
177static char last[1] = {0xff};
178
179static char *tags[] = {
180#define TAG_FIRST 0
181#define TAG_SLASHA 0
182 html_slasha,
183#define TAG_SLASHCENTER 1
184 html_slashcenter,
185#define TAG_SLASHFORM 2
186 html_slashform,
187#define TAG_SLASHH 3
188 html_slashh,
189#define TAG_SLASHSCRIPT 4
190 html_slashscript,
191#define TAG_SLASHSELECT 5
192 html_slashselect,
193#define TAG_SLASHSTYLE 6
194 html_slashstyle,
195#define TAG_A 7
196 html_a,
197#define TAG_BODY 8
198 html_body,
199#define TAG_BR 9
200 html_br,
201#define TAG_CENTER 10
202 html_center,
203#define TAG_FORM 11
204 html_form,
205#define TAG_FRAME 12
206 html_frame,
207#define TAG_H1 13
208 html_h1,
209#define TAG_H2 14
210 html_h2,
211#define TAG_H3 15
212 html_h3,
213#define TAG_H4 16
214 html_h4,
215#define TAG_IMG 17
216 html_img,
217#define TAG_INPUT 18
218 html_input,
219#define TAG_LI 19
220 html_li,
221#define TAG_P 20
222 html_p,
223#define TAG_SCRIPT 21
224 html_script,
225#define TAG_SELECT 22
226 html_select,
227#define TAG_STYLE 23
228 html_style,
229#define TAG_TR 24
230 html_tr,
231#define TAG_LAST 25
232 last,
233};
234
235/*-----------------------------------------------------------------------------------*/
236static unsigned char FASTCALL
237iswhitespace(char c)
238{
239 return (c == ISO_space ||
240 c == ISO_nl ||
241 c == ISO_cr ||
242 c == ISO_ht);
243}
244/*-----------------------------------------------------------------------------------*/
245static unsigned char FASTCALL
246find_tag(char *tag)
247{
248 static unsigned char first, last, i, tabi;
249 static char tagc;
250
251 tabi = first = TAG_FIRST;
252 last = TAG_LAST;
253 i = 0;
254
255 do {
256 tagc = tag[i];
257
258 if(tagc == 0 &&
259 tags[first][i] == 0) {
260 return first;
261 }
262
263 /* First, find first matching tag from table. */
264 while(tagc > (tags[tabi])[i] &&
265 tabi < last) {
266 ++tabi;
267 }
268 first = tabi;
269
270 /* Second, find last matching tag from table. */
271 while(tagc == (tags[tabi])[i] &&
272 tabi < last) {
273 ++tabi;
274 }
275 last = tabi;
276
277 /* If first and last matching tags are equal, we have a match and
278 return. Else we continue with the next character. */
279 ++i;
280 tabi = first;
281 } while(last != first);
282 return TAG_LAST;
283}
284/*-----------------------------------------------------------------------------------*/
285static void FASTCALL
286parse_char(unsigned char c)
287{
288 if(c < 0x80) {
289 if(s.majorstate == MAJORSTATE_LINK) {
290 if(s.linktextptr < sizeof(s.linktext)) {
291 if(iswhitespace(c)) {
292 c = ISO_space;
293 }
294 s.linktext[s.linktextptr] = c;
295 ++s.linktextptr;
296 }
297 } else if(s.majorstate != MAJORSTATE_DISCARD) {
298 htmlparser_char(c);
299 }
300 }
301}
302/*-----------------------------------------------------------------------------------*/
303static void
304switch_majorstate(unsigned char newstate)
305{
306 if(s.majorstate != newstate) {
307 PRINTF(("Switching state from %d to %d (%d)\n", s.majorstate, newstate, s.lastmajorstate));
308 s.lastmajorstate = s.majorstate;
309 s.majorstate = newstate;
310 }
311}
312/*-----------------------------------------------------------------------------------*/
313static void
314parse_tag(void)
315{
316 static char *tagattrparam;
317 static unsigned char size, i;
318
319 PRINTF(("Parsing tag '%s' '%s' '%s'\n",
320 s.tag, s.tagattr, s.tagattrparam));
321
322 switch(find_tag(s.tag)) {
323 case TAG_P:
324 case TAG_H1:
325 case TAG_H2:
326 case TAG_H3:
327 case TAG_H4:
328 parse_char(ISO_nl);
329 /* FALLTHROUGH */
330 case TAG_BR:
331 case TAG_TR:
332 case TAG_SLASHH:
333 parse_char(ISO_nl);
334 break;
335 case TAG_LI:
336 parse_char(ISO_nl);
337 parse_char(ISO_asterisk);
338 parse_char(ISO_space);
339 break;
340 case TAG_SCRIPT:
341 case TAG_STYLE:
342 case TAG_SELECT:
343 switch_majorstate(MAJORSTATE_DISCARD);
344 break;
345 case TAG_SLASHSCRIPT:
346 case TAG_SLASHSTYLE:
347 case TAG_SLASHSELECT:
348 switch_majorstate(s.lastmajorstate);
349 break;
350 case TAG_BODY:
351 s.majorstate = s.lastmajorstate = MAJORSTATE_BODY;
352 break;
353 case TAG_FRAME:
354 if(strncmp(s.tagattr, html_src, sizeof(html_src)) == 0 &&
355 s.tagattrparam[0] != 0) {
356 switch_majorstate(MAJORSTATE_BODY);
357 parse_char(ISO_nl);
358 parse_char(ISO_rbrack);
359 parse_char(ISO_space);
360 htmlparser_link(html_frame, s.tagattrparam);
361 PRINTF(("Frame [%s]\n", s.tagattrparam));
362 parse_char(ISO_space);
363 parse_char(ISO_lbrack);
364 parse_char(ISO_nl);
365 }
366 break;
367 case TAG_IMG:
368 if(strncmp(s.tagattr, html_alt, sizeof(html_alt)) == 0 &&
369 s.tagattrparam[0] != 0) {
370 parse_char(ISO_lt);
371 tagattrparam = &s.tagattrparam[0];
372 while(*tagattrparam) {
373 parse_char(*tagattrparam);
374 ++tagattrparam;
375 }
376 parse_char(ISO_gt);
377 }
378 break;
379 case TAG_A:
380 PRINTF(("A %s %s\n", s.tagattr, s.tagattrparam));
381 if(strncmp(s.tagattr, html_href, sizeof(html_href)) == 0 &&
382 s.tagattrparam[0] != 0) {
383 strcpy(s.linkurl, s.tagattrparam);
384 switch_majorstate(MAJORSTATE_LINK);
385 s.linktextptr = 0;
386 }
387 break;
388 case TAG_SLASHA:
389 if(s.majorstate == MAJORSTATE_LINK) {
390 switch_majorstate(s.lastmajorstate);
391 s.linktext[s.linktextptr] = 0;
392 htmlparser_link(s.linktext, s.linkurl);
393 PRINTF(("Link '%s' [%s]\n", s.linktext, s.linkurl));
394 }
395 break;
396#if WWW_CONF_FORMS
397 case TAG_FORM:
398 PRINTF(("Form tag\n"));
399 switch_majorstate(MAJORSTATE_FORM);
400 if(strncmp(s.tagattr, html_action, sizeof(html_action)) == 0) {
401 PRINTF(("Form action '%s'\n", s.tagattrparam));
402 strncpy(s.formaction, s.tagattrparam, WWW_CONF_MAX_FORMACTIONLEN - 1);
403 } else if(strncmp(s.tagattr, html_name, sizeof(html_name)) == 0) {
404 PRINTF(("Form name '%s'\n", s.tagattrparam));
405 strncpy(s.formname, s.tagattrparam, WWW_CONF_MAX_FORMNAMELEN - 1);
406 }
407 s.inputname[0] = s.inputvalue[0] = 0;
408 break;
409 case TAG_SLASHFORM:
410 switch_majorstate(MAJORSTATE_BODY);
411 s.formaction[0] = s.formname[0] = 0;
412 break;
413 case TAG_INPUT:
414 if(s.majorstate == MAJORSTATE_FORM) {
415 /* First check if we are called at the end of an input tag. If
416 so, we should render the input widget. */
417 if(s.tagattr[0] == 0 &&
418 s.inputname[0] != 0) {
419 PRINTF(("Render input type %d\n", s.inputtype));
420 switch(s.inputtype) {
421 case HTMLPARSER_INPUTTYPE_NONE:
422 case HTMLPARSER_INPUTTYPE_TEXT:
423 for(i = 0; i < s.inputvaluesize; ++i) {
424 if(s.inputvalue[i] == 0) {
425 memset(&s.inputvalue[i], ISO_space, s.inputvaluesize - i);
426 s.inputvalue[s.inputvaluesize] = 0;
427 break;
428 }
429 }
430 htmlparser_inputfield(s.inputvalue, s.inputname,
431 s.formname, s.formaction);
432 break;
433 case HTMLPARSER_INPUTTYPE_SUBMIT:
434 case HTMLPARSER_INPUTTYPE_IMAGE:
435 htmlparser_submitbutton(s.inputvalue, s.inputname,
436 s.formname, s.formaction);
437 break;
438 }
439 s.inputtype = HTMLPARSER_INPUTTYPE_NONE;
440 } else {
441 PRINTF(("Input '%s' '%s'\n", s.tagattr, s.tagattrparam));
442 if(strncmp(s.tagattr, html_type, sizeof(html_type)) == 0) {
443 if(strncmp(s.tagattrparam, html_submit,
444 sizeof(html_submit)) == 0) {
445 s.inputtype = HTMLPARSER_INPUTTYPE_SUBMIT;
446 } else if(strncmp(s.tagattrparam, html_image,
447 sizeof(html_image)) == 0) {
448 s.inputtype = HTMLPARSER_INPUTTYPE_IMAGE;
449 } else if(strncmp(s.tagattrparam, html_text,
450 sizeof(html_text)) == 0) {
451 s.inputtype = HTMLPARSER_INPUTTYPE_TEXT;
452 } else {
453 s.inputtype = HTMLPARSER_INPUTTYPE_OTHER;
454 }
455 } else if(strncmp(s.tagattr, html_name,
456 sizeof(html_name)) == 0) {
457 strncpy(s.inputname, s.tagattrparam,
458 WWW_CONF_MAX_INPUTNAMELEN);
459 } else if(strncmp(s.tagattr, html_alt,
460 sizeof(html_alt)) == 0 &&
461 s.inputtype == HTMLPARSER_INPUTTYPE_IMAGE) {
462 strncpy(s.inputvalue, s.tagattrparam,
463 WWW_CONF_MAX_INPUTVALUELEN);
464 } else if(strncmp(s.tagattr, html_value,
465 sizeof(html_value)) == 0) {
466 strncpy(s.inputvalue, s.tagattrparam,
467 WWW_CONF_MAX_INPUTVALUELEN);
468 } else if(strncmp(s.tagattr, html_size,
469 sizeof(html_size)) == 0) {
470 size = 0;
471 if(s.tagattrparam[0] >= '0' &&
472 s.tagattrparam[0] <= '9') {
473 size = s.tagattrparam[0] - '0';
474 if(s.tagattrparam[1] >= '0' &&
475 s.tagattrparam[1] <= '9') {
476 size = size * 10 + (s.tagattrparam[1] - '0');
477 }
478 }
479 if(size >= WWW_CONF_MAX_INPUTVALUELEN) {
480 size = WWW_CONF_MAX_INPUTVALUELEN - 1;
481 }
482 s.inputvaluesize = size;
483 /* strncpy(s.inputvalue, s.tagattrparam,
484 WWW_CONF_MAX_INPUTVALUELEN);*/
485 }
486 }
487
488 }
489 break;
490#endif /* WWW_CONF_FORMS */
491#if WWW_CONF_RENDERSTATE
492 case TAG_CENTER:
493 parse_char(ISO_nl);
494 htmlparser_renderstate(HTMLPARSER_RENDERSTATE_BEGIN |
495 HTMLPARSER_RENDERSTATE_CENTER);
496 break;
497 case TAG_SLASHCENTER:
498 parse_char(ISO_nl);
499 htmlparser_renderstate(HTMLPARSER_RENDERSTATE_END |
500 HTMLPARSER_RENDERSTATE_CENTER);
501 break;
502#endif /* WWW_CONF_RENDERSTATE */
503 }
504}
505/*-----------------------------------------------------------------------------------*/
506void
507htmlparser_init(void)
508{
509 s.majorstate = s.lastmajorstate = MAJORSTATE_DISCARD;
510 s.minorstate = MINORSTATE_TEXT;
511 s.lastchar = 0;
512}
513/*-----------------------------------------------------------------------------------*/
514static char FASTCALL
515lowercase(char c)
516{
517 /* XXX: This is a *brute force* approach to lower-case
518 converting and should *not* be used anywhere else! It
519 works for our purposes, however (i.e., HTML tags). */
520 if(c > 0x40) {
521 return (c & 0x1f) | 0x60;
522 } else {
523 return c;
524 }
525}
526/*-----------------------------------------------------------------------------------*/
527static void
528endtagfound(void)
529{
530 s.tag[s.tagptr] = 0;
531 s.tagattr[s.tagattrptr] = 0;
532 s.tagattrparam[s.tagattrparamptr] = 0;
533}
534/*-----------------------------------------------------------------------------------*/
535/* htmlparser_parse():
536 *
537 * This is the main function in the HTML parser module and it parses
538 * the HTML data in the input buffer. The htmlparser_state is updated
539 * as the buffer is parsed character by character. The functions
540 * parse_char() and parse_tag() (defined earlier in this file) are
541 * called to process regular characters and HTML tags,
542 * respectively.
543 *
544 * Note that the input buffer does not have to contain full HTML tags;
545 * the parser is state machine driven in order to be able to work with
546 * buffers that have been divided in any way.
547 */
548void
549htmlparser_parse(char *data, u16_t len)
550{
551 static char c;
552
553 while(len > 0) {
554 c = *data;
555 --len;
556 ++data;
557
558 switch(s.minorstate) {
559 case MINORSTATE_NONE:
560 break;
561 case MINORSTATE_TEXT:
562 /* We are currently parsing some text, so we look for signs of
563 an HTML tag starting (i.e., a '<' character). We also
564 compress any whitespace character to one single space
565 character (' '). */
566 if(c == ISO_lt) {
567 s.minorstate = MINORSTATE_TAG;
568 s.tagptr = 0;
569 endtagfound();
570 } else if(c == ISO_ampersand) {
571 s.minorstate = MINORSTATE_EXTCHAR;
572 } else {
573 if(iswhitespace(c)) {
574 if(s.lastchar != ISO_space) {
575 parse_char(' ');
576 s.lastchar = ISO_space;
577 c = ISO_space;
578 }
579 } else {
580 parse_char(c);
581 }
582 }
583 break;
584 case MINORSTATE_EXTCHAR:
585 if(c == ISO_semicolon) {
586 s.minorstate = MINORSTATE_TEXT;
587 parse_char(' ');
588 } else if(iswhitespace(c)) {
589 s.minorstate = MINORSTATE_TEXT;
590 parse_char('&');
591 parse_char(' ');
592 }
593 break;
594 case MINORSTATE_TAG:
595 /* We are currently parsing within the name of a tag. We check
596 for the end of a tag (the '>' character) or whitespace (which
597 indicates that we should parse a tag attr argument
598 instead). */
599 if(c == ISO_gt) {
600 /* Full tag found. We continue parsing regular text. */
601 s.minorstate = MINORSTATE_TEXT;
602 s.tagattrptr = s.tagattrparamptr = 0;
603 endtagfound();
604 parse_tag();
605 } else if(iswhitespace(c)) {
606 /* The name of the tag found. We continue parsing the tag
607 attr.*/
608 s.minorstate = MINORSTATE_TAGATTR;
609 s.tagattrptr = 0;
610 endtagfound();
611 } else {
612 /* Keep track of the name of the tag, but convert it to
613 lower case. */
614
615 s.tag[s.tagptr] = lowercase(c);
616 ++s.tagptr;
617 /* Check if the ->tag field is full. If so, we just eat up
618 any data left in the tag. */
619 if(s.tagptr == sizeof(s.tag)) {
620 s.minorstate = MINORSTATE_TAGEND;
621 }
622 }
623
624 /* Check for HTML comment, indicated by <!-- */
625 if(s.tagptr == 3 &&
626 s.tag[0] == ISO_bang &&
627 s.tag[1] == ISO_dash &&
628 s.tag[2] == ISO_dash) {
629 PRINTF(("Starting comment...\n"));
630 s.minorstate = MINORSTATE_HTMLCOMMENT;
631 s.tagptr = 0;
632 endtagfound();
633 }
634 break;
635 case MINORSTATE_TAGATTR:
636 /* We parse the "tag attr", i.e., the "href" in <a
637 href="...">. */
638 if(c == ISO_gt) {
639 /* Full tag found. */
640 s.minorstate = MINORSTATE_TEXT;
641 s.tagattrparamptr = 0;
642 s.tagattrptr = 0;
643 endtagfound();
644 parse_tag();
645 s.tagptr = 0;
646 endtagfound();
647
648 } else if(iswhitespace(c)) {
649 if(s.tagattrptr == 0) {
650 /* Discard leading spaces. */
651 } else {
652 /* A non-leading space is the end of the attribute. */
653 s.tagattrparamptr = 0;
654 endtagfound();
655 parse_tag();
656 s.minorstate = MINORSTATE_TAGATTRSPACE;
657 /* s.tagattrptr = 0;
658 endtagfound();*/
659 }
660 } else if(c == ISO_eq) {
661 s.minorstate = MINORSTATE_TAGATTRPARAMNQ;
662 s.tagattrparamptr = 0;
663 endtagfound();
664 } else {
665 s.tagattr[s.tagattrptr] = lowercase(c);
666 ++s.tagattrptr;
667 /* Check if the "tagattr" field is full. If so, we just eat
668 up any data left in the tag. */
669 if(s.tagattrptr == sizeof(s.tagattr)) {
670 s.minorstate = MINORSTATE_TAGEND;
671 }
672 }
673 break;
674 case MINORSTATE_TAGATTRSPACE:
675 if(iswhitespace(c)) {
676 /* Discard spaces. */
677 } else if(c == ISO_eq) {
678 s.minorstate = MINORSTATE_TAGATTRPARAMNQ;
679 s.tagattrparamptr = 0;
680 endtagfound();
681 parse_tag();
682 } else {
683 s.tagattr[0] = lowercase(c);
684 s.tagattrptr = 1;
685 s.minorstate = MINORSTATE_TAGATTR;
686 }
687 break;
688 case MINORSTATE_TAGATTRPARAMNQ:
689 /* We are parsing the "tag attr parameter", i.e., the link part
690 in <a href="link">. */
691 if(c == ISO_gt) {
692 /* Full tag found. */
693 endtagfound();
694 parse_tag();
695 s.minorstate = MINORSTATE_TEXT;
696 s.tagattrptr = 0;
697 endtagfound();
698 parse_tag();
699 s.tagptr = 0;
700 endtagfound();
701 } else if(iswhitespace(c) &&
702 s.tagattrparamptr == 0) {
703 /* Discard leading spaces. */
704 } else if((c == ISO_citation ||
705 c == ISO_citation2) &&
706 s.tagattrparamptr == 0) {
707 s.minorstate = MINORSTATE_TAGATTRPARAM;
708 s.quotechar = c;
709 PRINTF(("tag attr param q found\n"));
710 } else if(iswhitespace(c)) {
711 PRINTF(("Non-leading space found at %d\n",
712 s.tagattrparamptr));
713 /* Stop parsing if a non-leading space was found */
714 endtagfound();
715 parse_tag();
716
717 s.minorstate = MINORSTATE_TAGATTR;
718 s.tagattrptr = 0;
719 endtagfound();
720 } else {
721 s.tagattrparam[s.tagattrparamptr] = c;
722 ++s.tagattrparamptr;
723 /* Check if the "tagattr" field is full. If so, we just eat
724 up any data left in the tag. */
725 if(s.tagattrparamptr >= sizeof(s.tagattrparam) - 1) {
726 s.minorstate = MINORSTATE_TAGEND;
727 }
728 }
729
730 break;
731 case MINORSTATE_TAGATTRPARAM:
732 /* We are parsing the "tag attr parameter", i.e., the link
733 part in <a href="link">. */
734 if(c == s.quotechar) {
735 /* Found end of tag attr parameter. */
736 endtagfound();
737 parse_tag();
738
739 s.minorstate = MINORSTATE_TAGATTR;
740 s.tagattrptr = 0;
741 endtagfound();
742 } else {
743 if(iswhitespace(c)) {
744 c = ISO_space;
745 }
746 s.tagattrparam[s.tagattrparamptr] = c;
747 ++s.tagattrparamptr;
748 /* Check if the "tagattr" field is full. If so, we just eat
749 up any data left in the tag. */
750 if(s.tagattrparamptr >= sizeof(s.tagattrparam) - 1) {
751 s.minorstate = MINORSTATE_TAGEND;
752 }
753 }
754
755 break;
756 case MINORSTATE_HTMLCOMMENT:
757 if(c == ISO_dash) {
758 ++s.tagptr;
759 } else if(c == ISO_gt && s.tagptr > 0) {
760 PRINTF(("Comment done.\n"));
761 s.minorstate = MINORSTATE_TEXT;
762 } else {
763 s.tagptr = 0;
764 }
765 break;
766 case MINORSTATE_TAGEND:
767 /* Discard characters until a '>' is seen. */
768 if(c == ISO_gt) {
769 s.minorstate = MINORSTATE_TEXT;
770 s.tagattrptr = 0;
771 endtagfound();
772 parse_tag();
773 }
774 break;
775 }
776
777 s.lastchar = c;
778 }
779}
780/*-----------------------------------------------------------------------------------*/