contiki/apps/htmlparser.c - contiki-1.x - Gitiles

 /*
  * Copyright (c) 2002, Adam Dunkels.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above
  *    copyright notice, this list of conditions and the following
  *    disclaimer in the documentation and/or other materials provided
  *    with the distribution.
  * 3. The name of the author may not be used to endorse or promote
  *    products derived from this software without specific prior
  *    written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
  * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
  * GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
  * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  *
  * This file is part of the Contiki desktop environment
  *
  * $Id: htmlparser.c,v 1.11 2005/06/12 23:44:29 oliverschmidt Exp $
  *
  */

 /* htmlparser.c:
  *
  * Implements a very simplistic HTML parser. It recognizes HTML links
  * (<a href>-tags), HTML img alt tags, a few text flow break tags
 G * (<br>, <p>, <h>), the <li> tag (but does not even try to
  * distinguish between <ol> or <ul>) as well as HTML comment tags
  * (<!-- -->).
  *
  * To save memory, the HTML parser is state machine driver, which
  * means that it will shave off one character from the HTML page,
  * process that character, and return to the next. Another way of
  * doing it would be to buffer a number of characters and process them
  * together.
  *
  * The main function in this file is the htmlparser_parse() function
  * which takes a htmlparser_state structur and a part of an HTML file
  * as an argument. The htmlparser_parse() function will call the
  * helper functions parse_char() and parse_tag(). Those functions will
  * in turn call the two callback functions htmlparser_char() and
  * htmlparser_tag(). Those functions must be implemented by the using
  * module (e.g., a web browser program).
  *
  * htmlparser_char() will be called for every non-tag character.
  *
  * htmlparser_tag() will be called whenever a full tag has been found.
  *
  */


 #include "htmlparser.h"
 #include "html-strings.h"
 #include "www-conf.h"
 #include "cc.h"

 #include <string.h>

 #if 1
 #define PRINTF(x)
 #else
 #include <stdio.h>
 #define PRINTF(x) printf x
 #endif


 /*-----------------------------------------------------------------------------------*/
 #define ISO_A     0x41
 #define ISO_B     0x42
 #define ISO_E     0x45
 #define ISO_F     0x46
 #define ISO_G     0x47
 #define ISO_H     0x48
 #define ISO_I     0x49
 #define ISO_L     0x4c
 #define ISO_M     0x4d
 #define ISO_P     0x50
 #define ISO_R     0x52
 #define ISO_T     0x54

 #define ISO_a     (ISO_A | 0x20)
 #define ISO_b     (ISO_B | 0x20)
 #define ISO_e     (ISO_E | 0x20)
 #define ISO_f     (ISO_F | 0x20)
 #define ISO_g     (ISO_G | 0x20)
 #define ISO_h     (ISO_H | 0x20)
 #define ISO_i     (ISO_I | 0x20)
 #define ISO_l     (ISO_L | 0x20)
 #define ISO_m     (ISO_M | 0x20)
 #define ISO_p     (ISO_P | 0x20)
 #define ISO_r     (ISO_R | 0x20)
 #define ISO_t     (ISO_T | 0x20)

 #define ISO_ht    0x09
 #define ISO_nl    0x0a
 #define ISO_cr    0x0d
 #define ISO_space 0x20
 #define ISO_bang  0x21
 #define ISO_citation 0x22
 #define ISO_ampersand 0x26
 #define ISO_citation2 0x27
 #define ISO_asterisk 0x2a
 #define ISO_dash  0x2d
 #define ISO_slash 0x2f
 #define ISO_semicolon  0x3b
 #define ISO_lt    0x3c
 #define ISO_eq    0x3d
 #define ISO_gt    0x3e

 #define ISO_rbrack 0x5b
 #define ISO_lbrack 0x5d

 #define MINORSTATE_NONE           0
 #define MINORSTATE_TEXT           1 /* Parse normal text */
 #define MINORSTATE_EXTCHAR        2 /* Check for semi-colon */
 #define MINORSTATE_TAG            3 /* Check for name of tag. */
 #define MINORSTATE_TAGEND         4 /* Scan for end of tag. */
 #define MINORSTATE_TAGATTR        5 /* Parse tag attr. */
 #define MINORSTATE_TAGATTRSPACE   6 /* Parse optional space after tag
 				       attr. */
 #define MINORSTATE_TAGATTRPARAM   7 /* Parse tag attr parameter. */
 #define MINORSTATE_TAGATTRPARAMNQ 8 /* Parse tag attr parameter without
 				  quotation marks. */
 #define MINORSTATE_HTMLCOMMENT    9 /* Scan for HTML comment end */

 #define MAJORSTATE_NONE       0
 #define MAJORSTATE_BODY       1
 #define MAJORSTATE_LINK       2
 #define MAJORSTATE_FORM       3
 #define MAJORSTATE_DISCARD    4


 struct htmlparser_state {

   unsigned char minorstate;
   char tag[20];
   unsigned char tagptr;
   char tagattr[20];
   unsigned char tagattrptr;
   char tagattrparam[WWW_CONF_MAX_URLLEN];
   unsigned char tagattrparamptr;
   unsigned char lastchar, quotechar;
   unsigned char majorstate, lastmajorstate;
   char linkurl[WWW_CONF_MAX_URLLEN];

   char word[WWW_CONF_WEBPAGE_WIDTH];
   unsigned char wordlen;

 #if WWW_CONF_FORMS
   char formaction[WWW_CONF_MAX_FORMACTIONLEN];
   char formname[WWW_CONF_MAX_FORMNAMELEN];
   unsigned char inputtype;
   char inputname[WWW_CONF_MAX_INPUTNAMELEN];
   char inputvalue[WWW_CONF_MAX_INPUTVALUELEN];
   unsigned char inputvaluesize;
 #endif /* WWW_CONF_FORMS */
 };

 static struct htmlparser_state s;

 /*-----------------------------------------------------------------------------------*/
 static char last[1] = {0xff};

 static const char *tags[] = {
 #define TAG_FIRST       0
 #define TAG_SLASHA      0
   html_slasha,
 #define TAG_SLASHCENTER 1
   html_slashcenter,
 #define TAG_SLASHFORM   2
   html_slashform,
 #define TAG_SLASHH      3
   html_slashh,
 #define TAG_SLASHSCRIPT 4
   html_slashscript,
 #define TAG_SLASHSELECT 5
   html_slashselect,
 #define TAG_SLASHSTYLE  6
   html_slashstyle,
 #define TAG_A           7
   html_a,
 #define TAG_BODY        8
   html_body,
 #define TAG_BR          9
   html_br,
 #define TAG_CENTER     10
   html_center,
 #define TAG_FORM       11
   html_form,
 #define TAG_FRAME      12
   html_frame,
 #define TAG_H1         13
   html_h1,
 #define TAG_H2         14
   html_h2,
 #define TAG_H3         15
   html_h3,
 #define TAG_H4         16
   html_h4,
 #define TAG_IMG        17
   html_img,
 #define TAG_INPUT      18
   html_input,
 #define TAG_LI         19
   html_li,
 #define TAG_P          20
   html_p,
 #define TAG_SCRIPT     21
   html_script,
 #define TAG_SELECT     22
   html_select,
 #define TAG_STYLE      23
   html_style,
 #define TAG_TR         24
   html_tr,
 #define TAG_LAST       25
   last,
 };

 /*-----------------------------------------------------------------------------------*/
 static unsigned char CC_FASTCALL
 iswhitespace(char c)
 {
   return (c == ISO_space ||
 	  c == ISO_nl ||
 	  c == ISO_cr ||
 	  c == ISO_ht);
 }
 /*-----------------------------------------------------------------------------------*/
 void
 htmlparser_init(void)
 {
   s.majorstate = s.lastmajorstate = MAJORSTATE_DISCARD;
   s.minorstate = MINORSTATE_TEXT;
   s.lastchar = 0;
 }
 /*-----------------------------------------------------------------------------------*/
 static char CC_FASTCALL
 lowercase(char c)
 {
   /* XXX: This is a *brute force* approach to lower-case
      converting and should *not* be used anywhere else! It
      works for our purposes, however (i.e., HTML tags). */
   if(c > 0x40) {
     return (c & 0x1f) | 0x60;
   } else {
     return c;
   }
 }
 /*-----------------------------------------------------------------------------------*/
 static void
 endtagfound(void)
 {
   s.tag[s.tagptr] = 0;
   s.tagattr[s.tagattrptr] = 0;
   s.tagattrparam[s.tagattrparamptr] = 0;
 }
 /*-----------------------------------------------------------------------------------*/
 static void CC_FASTCALL
 switch_majorstate(unsigned char newstate)
 {
   if(s.majorstate != newstate) {
     PRINTF(("Switching state from %d to %d (%d)\n", s.majorstate, newstate, s.lastmajorstate));
     s.lastmajorstate = s.majorstate;
     s.majorstate = newstate;
   }
 }
 /*-----------------------------------------------------------------------------------*/
 static void CC_FASTCALL
 add_char(unsigned char c)
 {
   if(s.wordlen < WWW_CONF_WEBPAGE_WIDTH - 1 && c < 0x80) {
     s.word[s.wordlen] = c;
     ++s.wordlen;
   }
 }
 /*-----------------------------------------------------------------------------------*/
 static void
 do_word(void)
 {
   if(s.wordlen > 0) {
     if(s.majorstate == MAJORSTATE_LINK) {
       if(s.word[s.wordlen] != ISO_space) {
 	add_char(ISO_space);
       }
     } else if(s.majorstate == MAJORSTATE_DISCARD) {
       s.wordlen = 0;
     } else {
       s.word[s.wordlen] = '\0';
       htmlparser_word(s.word, s.wordlen);
       s.wordlen = 0;
     }
   }
 }
 /*-----------------------------------------------------------------------------------*/
 static void
 newline(void)
 {
   do_word();
   htmlparser_newline();
 }
 /*-----------------------------------------------------------------------------------*/
 static unsigned char CC_FASTCALL
 find_tag(char *tag)
 {
   static unsigned char first, last, i, tabi;
   static char tagc;

   first = TAG_FIRST;
   last = TAG_LAST;
   i = 0;

   do {
     tagc = tag[i];

     if(tagc == 0 &&
        tags[first][i] == 0) {
       return first;
     }

     tabi = first;

     /* First, find first matching tag from table. */
     while(tagc > (tags[tabi])[i] &&
 	  tabi < last) {
       ++tabi;
     }
     first = tabi;

     /* Second, find last matching tag from table. */
     while(tagc == (tags[tabi])[i] &&
 	  tabi < last) {
       ++tabi;
     }
     last = tabi;

     /* If first and last matching tags are equal, we have a non-match
        and return. Else we continue with the next character. */
     ++i;

   } while(last != first);
   return TAG_LAST;
 }
 /*-----------------------------------------------------------------------------------*/
 static void
 parse_tag(void)
 {
   static char *tagattrparam;
   static unsigned char size;

   static char dummy;

   PRINTF(("Parsing tag '%s' '%s' '%s'\n",
 	  s.tag, s.tagattr, s.tagattrparam));

   switch(find_tag(s.tag)) {
   case TAG_P:
   case TAG_H1:
   case TAG_H2:
   case TAG_H3:
   case TAG_H4:
     /*    parse_char(ISO_nl);*/
     newline();
     /* FALLTHROUGH */
   case TAG_BR:
   case TAG_TR:
   case TAG_SLASHH:
     /*    parse_char(ISO_nl);*/
     dummy = 0;
     newline();
     break;
   case TAG_LI:
     newline();
     add_char(ISO_asterisk);
     add_char(ISO_space);
     break;
   case TAG_SCRIPT:
   case TAG_STYLE:
   case TAG_SELECT:
     switch_majorstate(MAJORSTATE_DISCARD);
     break;
   case TAG_SLASHSCRIPT:
   case TAG_SLASHSTYLE:
   case TAG_SLASHSELECT:
     do_word();
     switch_majorstate(s.lastmajorstate);
     break;
   case TAG_BODY:
     s.majorstate = s.lastmajorstate = MAJORSTATE_BODY;
     break;
   case TAG_FRAME:
     if(strncmp(s.tagattr, html_src, sizeof(html_src)) == 0 &&
        s.tagattrparam[0] != 0) {
       switch_majorstate(MAJORSTATE_BODY);
       newline();
       add_char(ISO_rbrack);
       do_word();
       htmlparser_link((char *)html_frame, strlen(html_frame), s.tagattrparam);
       PRINTF(("Frame [%s]\n", s.tagattrparam));
       add_char(ISO_lbrack);
       newline();
     }
     break;
   case TAG_IMG:
     if(strncmp(s.tagattr, html_alt, sizeof(html_alt)) == 0 &&
        s.tagattrparam[0] != 0) {
       /*      parse_char(ISO_lt);*/
       add_char(ISO_lt);
       tagattrparam = &s.tagattrparam[0];
       while(*tagattrparam) {
 	/*	parse_char(*tagattrparam);*/
 	add_char(*tagattrparam);
 	++tagattrparam;
       }
       /*      parse_char(ISO_gt);*/
       add_char(ISO_gt);
       do_word();
     }
     break;
   case TAG_A:
     PRINTF(("A %s %s\n", s.tagattr, s.tagattrparam));
     if(strncmp(s.tagattr, html_href, sizeof(html_href)) == 0 &&
        s.tagattrparam[0] != 0) {
       strcpy(s.linkurl, s.tagattrparam);
       do_word();
       switch_majorstate(MAJORSTATE_LINK);
     }
     break;
   case TAG_SLASHA:
     if(s.majorstate == MAJORSTATE_LINK) {
       switch_majorstate(s.lastmajorstate);
       s.word[s.wordlen] = 0;
       htmlparser_link(s.word, s.wordlen, s.linkurl);
       s.wordlen = 0;
     }
     break;
 #if WWW_CONF_FORMS
   case TAG_FORM:
     PRINTF(("Form tag\n"));
     switch_majorstate(MAJORSTATE_FORM);
     if(strncmp(s.tagattr, html_action, sizeof(html_action)) == 0) {
       PRINTF(("Form action '%s'\n", s.tagattrparam));
       strncpy(s.formaction, s.tagattrparam, WWW_CONF_MAX_FORMACTIONLEN - 1);
     } else if(strncmp(s.tagattr, html_name, sizeof(html_name)) == 0) {
       PRINTF(("Form name '%s'\n", s.tagattrparam));
       strncpy(s.formname, s.tagattrparam, WWW_CONF_MAX_FORMNAMELEN - 1);
     }
     s.inputname[0] = s.inputvalue[0] = 0;
     break;
   case TAG_SLASHFORM:
     switch_majorstate(MAJORSTATE_BODY);
     s.formaction[0] = s.formname[0] = 0;
     break;
   case TAG_INPUT:
     if(s.majorstate == MAJORSTATE_FORM) {
       /* First check if we are called at the end of an input tag. If
 	 so, we should render the input widget. */
       if(s.tagattr[0] == 0 &&
 	 s.inputname[0] != 0) {
 	PRINTF(("Render input type %d\n", s.inputtype));
 	switch(s.inputtype) {
 	case HTMLPARSER_INPUTTYPE_NONE:
 	case HTMLPARSER_INPUTTYPE_TEXT:
 	  s.inputvalue[s.inputvaluesize] = 0;
 	  htmlparser_inputfield(s.inputvaluesize, s.inputvalue, s.inputname,
 				s.formname, s.formaction);
 	  break;
 	case HTMLPARSER_INPUTTYPE_SUBMIT:
 	case HTMLPARSER_INPUTTYPE_IMAGE:
 	  htmlparser_submitbutton(s.inputvalue, s.inputname,
 				  s.formname, s.formaction);
 	  break;
 	}
 	s.inputtype = HTMLPARSER_INPUTTYPE_NONE;
       } else {
 	PRINTF(("Input '%s' '%s'\n", s.tagattr, s.tagattrparam));
 	if(strncmp(s.tagattr, html_type, sizeof(html_type)) == 0) {
 	  if(strncmp(s.tagattrparam, html_submit,
 		     sizeof(html_submit)) == 0) {
 	    s.inputtype = HTMLPARSER_INPUTTYPE_SUBMIT;
 	  } else if(strncmp(s.tagattrparam, html_image,
 			    sizeof(html_image)) == 0) {
 	    s.inputtype = HTMLPARSER_INPUTTYPE_IMAGE;
 	  } else if(strncmp(s.tagattrparam, html_text,
 			    sizeof(html_text)) == 0) {
 	    s.inputtype = HTMLPARSER_INPUTTYPE_TEXT;
 	  } else {
 	    s.inputtype = HTMLPARSER_INPUTTYPE_OTHER;
 	  }
 	} else if(strncmp(s.tagattr, html_name,
 			  sizeof(html_name)) == 0) {
 	  strncpy(s.inputname, s.tagattrparam,
 		  WWW_CONF_MAX_INPUTNAMELEN);
 	} else if(strncmp(s.tagattr, html_alt,
 			  sizeof(html_alt)) == 0 &&
 		  s.inputtype == HTMLPARSER_INPUTTYPE_IMAGE) {
 	  strncpy(s.inputvalue, s.tagattrparam,
 		  WWW_CONF_MAX_INPUTVALUELEN);
 	} else if(strncmp(s.tagattr, html_value,
 			  sizeof(html_value)) == 0) {
 	  strncpy(s.inputvalue, s.tagattrparam,
 		  WWW_CONF_MAX_INPUTVALUELEN);
 	} else if(strncmp(s.tagattr, html_size,
 			  sizeof(html_size)) == 0) {
 	  size = 0;
 	  if(s.tagattrparam[0] >= '0' &&
 	     s.tagattrparam[0] <= '9') {
 	    size = s.tagattrparam[0] - '0';
 	    if(s.tagattrparam[1] >= '0' &&
 	       s.tagattrparam[1] <= '9') {
 	      size = size * 10 + (s.tagattrparam[1] - '0');
 	    }
 	  }
 	  if(size >= WWW_CONF_MAX_INPUTVALUELEN) {
 	    size = WWW_CONF_MAX_INPUTVALUELEN - 1;
 	  }
 	  s.inputvaluesize = size;
 	  /*	  strncpy(s.inputvalue, s.tagattrparam,
 		  WWW_CONF_MAX_INPUTVALUELEN);*/
 	}
       }

     }
     break;
 #endif /* WWW_CONF_FORMS */
 #if WWW_CONF_RENDERSTATE
   case TAG_CENTER:
     /*    parse_char(ISO_nl);    */
     newline();
     htmlparser_renderstate(HTMLPARSER_RENDERSTATE_BEGIN |
 			   HTMLPARSER_RENDERSTATE_CENTER);
     break;
   case TAG_SLASHCENTER:
     /*    parse_char(ISO_nl);*/
     newline();
     htmlparser_renderstate(HTMLPARSER_RENDERSTATE_END |
 			   HTMLPARSER_RENDERSTATE_CENTER);
     break;
 #endif /* WWW_CONF_RENDERSTATE */
   }
 }
 /*-----------------------------------------------------------------------------------*/
 static u16_t
 parse_word(char *data, u8_t dlen)
 {
   static u8_t i;
   static u8_t len;
   unsigned char c;

   len = dlen;

   switch(s.minorstate) {
   case MINORSTATE_TEXT:
     for(i = 0; i < len; ++i) {
       c = data[i];
       if(iswhitespace(c)) {
 	do_word();
       } else if(c == ISO_lt) {
 	s.minorstate = MINORSTATE_TAG;
 	s.tagptr = 0;
 	/*	do_word();*/
 	break;
       } else if(c == ISO_ampersand) {
 	s.minorstate = MINORSTATE_EXTCHAR;
 	break;
       } else {
 	add_char(c);
       }
     }
     break;
   case MINORSTATE_EXTCHAR:
     for(i = 0; i < len; ++i) {
       c = data[i];
       if(c == ISO_semicolon) {
 	s.minorstate = MINORSTATE_TEXT;
 	add_char(' ');
 	break;
       } else if(iswhitespace(c)) {
 	s.minorstate = MINORSTATE_TEXT;
 	add_char('&');
 	add_char(' ');
 	break;
       }
     }
     break;
   case MINORSTATE_TAG:
     /* We are currently parsing within the name of a tag. We check
        for the end of a tag (the '>' character) or whitespace (which
        indicates that we should parse a tag attr argument
        instead). */
     for(i = 0; i < len; ++i) {
       c = data[i];
       if(c == ISO_gt) {
 	/* Full tag found. We continue parsing regular text. */
 	s.minorstate = MINORSTATE_TEXT;
 	s.tagattrptr = s.tagattrparamptr = 0;
 	endtagfound();
 	parse_tag();
 	break;
       } else if(iswhitespace(c)) {
 	/* The name of the tag found. We continue parsing the tag
 	   attr.*/
 	s.minorstate = MINORSTATE_TAGATTR;
 	s.tagattrptr = 0;
 	endtagfound();
 	break;
       } else {
 	/* Keep track of the name of the tag, but convert it to
 	   lower case. */

 	s.tag[s.tagptr] = lowercase(c);
 	++s.tagptr;
 	/* Check if the ->tag field is full. If so, we just eat up
 	   any data left in the tag. */
 	if(s.tagptr == sizeof(s.tag)) {
 	  s.minorstate = MINORSTATE_TAGEND;
 	  break;
 	}
       }

       /* Check for HTML comment, indicated by <!-- */
       if(s.tagptr == 3 &&
 	 s.tag[0] == ISO_bang &&
 	 s.tag[1] == ISO_dash &&
 	 s.tag[2] == ISO_dash) {
 	PRINTF(("Starting comment...\n"));
 	s.minorstate = MINORSTATE_HTMLCOMMENT;
 	s.tagptr = 0;
 	endtagfound();
 	break;
       }
     }
     break;
   case MINORSTATE_TAGATTR:
     /* We parse the "tag attr", i.e., the "href" in <a
        href="...">. */
     for(i = 0; i < len; ++i) {
       c = data[i];
       if(c == ISO_gt) {
 	/* Full tag found. */
 	s.minorstate = MINORSTATE_TEXT;
 	s.tagattrparamptr = 0;
 	s.tagattrptr = 0;
 	endtagfound();
 	parse_tag();
 	s.tagptr = 0;
 	endtagfound();
 	break;
       } else if(iswhitespace(c)) {
 	if(s.tagattrptr == 0) {
 	  /* Discard leading spaces. */
 	} else {
 	  /* A non-leading space is the end of the attribute. */
 	  s.tagattrparamptr = 0;
 	  endtagfound();
 	  parse_tag();
 	  s.minorstate = MINORSTATE_TAGATTRSPACE;
 	  break;
 	  /*	    s.tagattrptr = 0;
 		    endtagfound();*/
 	}
       } else if(c == ISO_eq) {
 	s.minorstate = MINORSTATE_TAGATTRPARAMNQ;
 	s.tagattrparamptr = 0;
 	endtagfound();
 	break;
       } else {
 	s.tagattr[s.tagattrptr] = lowercase(c);
 	++s.tagattrptr;
 	/* Check if the "tagattr" field is full. If so, we just eat
 	   up any data left in the tag. */
 	if(s.tagattrptr == sizeof(s.tagattr)) {
 	  s.minorstate = MINORSTATE_TAGEND;
 	  break;
 	}
       }
     }
     break;
   case MINORSTATE_TAGATTRSPACE:
     for(i = 0; i < len; ++i) {
       c = data[i];
       if(iswhitespace(c)) {
 	/* Discard spaces. */
       } else if(c == ISO_eq) {
 	s.minorstate = MINORSTATE_TAGATTRPARAMNQ;
 	s.tagattrparamptr = 0;
 	endtagfound();
 	parse_tag();
 	break;
       } else {
 	s.tagattr[0] = lowercase(c);
 	s.tagattrptr = 1;
 	s.minorstate = MINORSTATE_TAGATTR;
 	break;
       }
     }
     break;
   case MINORSTATE_TAGATTRPARAMNQ:
     /* We are parsing the "tag attr parameter", i.e., the link part
        in <a href="link">. */
     for(i = 0; i < len; ++i) {
       c = data[i];
       if(c == ISO_gt) {
 	/* Full tag found. */
 	endtagfound();
 	parse_tag();
 	s.minorstate = MINORSTATE_TEXT;
 	s.tagattrptr = 0;
 	endtagfound();
 	parse_tag();
 	s.tagptr = 0;
 	endtagfound();
 	break;
       } else if(iswhitespace(c) &&
 		s.tagattrparamptr == 0) {
 	/* Discard leading spaces. */
       } else if((c == ISO_citation ||
 		 c == ISO_citation2) &&
 		s.tagattrparamptr == 0) {
 	s.minorstate = MINORSTATE_TAGATTRPARAM;
 	s.quotechar = c;
 	PRINTF(("tag attr param q found\n"));
 	break;
       } else if(iswhitespace(c)) {
 	PRINTF(("Non-leading space found at %d\n",
 		s.tagattrparamptr));
 	/* Stop parsing if a non-leading space was found */
 	endtagfound();
 	parse_tag();

 	s.minorstate = MINORSTATE_TAGATTR;
 	s.tagattrptr = 0;
 	endtagfound();
 	break;
       } else {
 	s.tagattrparam[s.tagattrparamptr] = c;
 	++s.tagattrparamptr;
 	/* Check if the "tagattr" field is full. If so, we just eat
 	   up any data left in the tag. */
 	if(s.tagattrparamptr >= sizeof(s.tagattrparam) - 1) {
 	  s.minorstate = MINORSTATE_TAGEND;
 	  break;
 	}
       }
     }
     break;
   case MINORSTATE_TAGATTRPARAM:
     /* We are parsing the "tag attr parameter", i.e., the link
        part in <a href="link">. */
     for(i = 0; i < len; ++i) {
       c = data[i];
       if(c == s.quotechar) {
 	/* Found end of tag attr parameter. */
 	endtagfound();
 	parse_tag();

 	s.minorstate = MINORSTATE_TAGATTR;
 	s.tagattrptr = 0;
 	endtagfound();
 	break;
       } else {
 	if(iswhitespace(c)) {
 	  s.tagattrparam[s.tagattrparamptr] = ISO_space;
 	} else {
 	  s.tagattrparam[s.tagattrparamptr] = c;
 	}

 	++s.tagattrparamptr;
 	/* Check if the "tagattr" field is full. If so, we just eat
 	   up any data left in the tag. */
 	if(s.tagattrparamptr >= sizeof(s.tagattrparam) - 1) {
 	  s.minorstate = MINORSTATE_TAGEND;
 	  break;
 	}
       }
     }
     break;
   case MINORSTATE_HTMLCOMMENT:
     for(i = 0; i < len; ++i) {
       c = data[i];
       if(c == ISO_dash) {
 	++s.tagptr;
       } else if(c == ISO_gt && s.tagptr > 0) {
 	PRINTF(("Comment done.\n"));
 	s.minorstate = MINORSTATE_TEXT;
 	break;
       } else {
 	s.tagptr = 0;
       }
     }
     break;
   case MINORSTATE_TAGEND:
     /* Discard characters until a '>' is seen. */
     for(i = 0; i < len; ++i) {
       if(data[i] == ISO_gt) {
 	s.minorstate = MINORSTATE_TEXT;
 	s.tagattrptr = 0;
 	endtagfound();
 	parse_tag();
 	break;
       }
     }
     break;
   default:
     i = 0;
     break;
   }
   if(i >= len) {
     return len;
   }
   return i + 1;
 }
 /*-----------------------------------------------------------------------------------*/
 void
 htmlparser_parse(char *data, u16_t datalen)
 {
   u16_t plen;

   while(datalen > 0) {
     if(datalen > 255) {
       plen = parse_word(data, 255);
     } else {
       plen = parse_word(data, datalen);
     }
     datalen -= plen;
     data += plen;
   }
 }
 /*-----------------------------------------------------------------------------------*/
	/*
	* Copyright (c) 2002, Adam Dunkels.
	* All rights reserved.
	*
	* Redistribution and use in source and binary forms, with or without
	* modification, are permitted provided that the following conditions
	* are met:
	* 1. Redistributions of source code must retain the above copyright
	* notice, this list of conditions and the following disclaimer.
	* 2. Redistributions in binary form must reproduce the above
	* copyright notice, this list of conditions and the following
	* disclaimer in the documentation and/or other materials provided
	* with the distribution.
	* 3. The name of the author may not be used to endorse or promote
	* products derived from this software without specific prior
	* written permission.
	*
	* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
	* OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
	* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
	* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
	* DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
	* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
	* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
	* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
	* WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
	* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
	* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
	*
	* This file is part of the Contiki desktop environment
	*
	* $Id: htmlparser.c,v 1.11 2005/06/12 23:44:29 oliverschmidt Exp $
	*
	*/

	/* htmlparser.c:
	*
	* Implements a very simplistic HTML parser. It recognizes HTML links
	* (<a href>-tags), HTML img alt tags, a few text flow break tags
	G * (<br>, <p>, <h>), the <li> tag (but does not even try to
	* distinguish between <ol> or <ul>) as well as HTML comment tags
	* (<!-- -->).
	*
	* To save memory, the HTML parser is state machine driver, which
	* means that it will shave off one character from the HTML page,
	* process that character, and return to the next. Another way of
	* doing it would be to buffer a number of characters and process them
	* together.
	*
	* The main function in this file is the htmlparser_parse() function
	* which takes a htmlparser_state structur and a part of an HTML file
	* as an argument. The htmlparser_parse() function will call the
	* helper functions parse_char() and parse_tag(). Those functions will
	* in turn call the two callback functions htmlparser_char() and
	* htmlparser_tag(). Those functions must be implemented by the using
	* module (e.g., a web browser program).
	*
	* htmlparser_char() will be called for every non-tag character.
	*
	* htmlparser_tag() will be called whenever a full tag has been found.
	*
	*/


	#include "htmlparser.h"
	#include "html-strings.h"
	#include "www-conf.h"
	#include "cc.h"

	#include <string.h>

	#if 1
	#define PRINTF(x)
	#else
	#include <stdio.h>
	#define PRINTF(x) printf x
	#endif


	/-----------------------------------------------------------------------------------/
	#define ISO_A 0x41
	#define ISO_B 0x42
	#define ISO_E 0x45
	#define ISO_F 0x46
	#define ISO_G 0x47
	#define ISO_H 0x48
	#define ISO_I 0x49
	#define ISO_L 0x4c
	#define ISO_M 0x4d
	#define ISO_P 0x50
	#define ISO_R 0x52
	#define ISO_T 0x54

	#define ISO_a (ISO_A \| 0x20)
	#define ISO_b (ISO_B \| 0x20)
	#define ISO_e (ISO_E \| 0x20)
	#define ISO_f (ISO_F \| 0x20)
	#define ISO_g (ISO_G \| 0x20)
	#define ISO_h (ISO_H \| 0x20)
	#define ISO_i (ISO_I \| 0x20)
	#define ISO_l (ISO_L \| 0x20)
	#define ISO_m (ISO_M \| 0x20)
	#define ISO_p (ISO_P \| 0x20)
	#define ISO_r (ISO_R \| 0x20)
	#define ISO_t (ISO_T \| 0x20)

	#define ISO_ht 0x09
	#define ISO_nl 0x0a
	#define ISO_cr 0x0d
	#define ISO_space 0x20
	#define ISO_bang 0x21
	#define ISO_citation 0x22
	#define ISO_ampersand 0x26
	#define ISO_citation2 0x27
	#define ISO_asterisk 0x2a
	#define ISO_dash 0x2d
	#define ISO_slash 0x2f
	#define ISO_semicolon 0x3b
	#define ISO_lt 0x3c
	#define ISO_eq 0x3d
	#define ISO_gt 0x3e

	#define ISO_rbrack 0x5b
	#define ISO_lbrack 0x5d

	#define MINORSTATE_NONE 0
	#define MINORSTATE_TEXT 1 /* Parse normal text */
	#define MINORSTATE_EXTCHAR 2 /* Check for semi-colon */
	#define MINORSTATE_TAG 3 /* Check for name of tag. */
	#define MINORSTATE_TAGEND 4 /* Scan for end of tag. */
	#define MINORSTATE_TAGATTR 5 /* Parse tag attr. */
	#define MINORSTATE_TAGATTRSPACE 6 /* Parse optional space after tag
	attr. */
	#define MINORSTATE_TAGATTRPARAM 7 /* Parse tag attr parameter. */
	#define MINORSTATE_TAGATTRPARAMNQ 8 /* Parse tag attr parameter without
	quotation marks. */
	#define MINORSTATE_HTMLCOMMENT 9 /* Scan for HTML comment end */

	#define MAJORSTATE_NONE 0
	#define MAJORSTATE_BODY 1
	#define MAJORSTATE_LINK 2
	#define MAJORSTATE_FORM 3
	#define MAJORSTATE_DISCARD 4


	struct htmlparser_state {

	unsigned char minorstate;
	char tag[20];
	unsigned char tagptr;
	char tagattr[20];
	unsigned char tagattrptr;
	char tagattrparam[WWW_CONF_MAX_URLLEN];
	unsigned char tagattrparamptr;
	unsigned char lastchar, quotechar;
	unsigned char majorstate, lastmajorstate;
	char linkurl[WWW_CONF_MAX_URLLEN];

	char word[WWW_CONF_WEBPAGE_WIDTH];
	unsigned char wordlen;

	#if WWW_CONF_FORMS
	char formaction[WWW_CONF_MAX_FORMACTIONLEN];
	char formname[WWW_CONF_MAX_FORMNAMELEN];
	unsigned char inputtype;
	char inputname[WWW_CONF_MAX_INPUTNAMELEN];
	char inputvalue[WWW_CONF_MAX_INPUTVALUELEN];
	unsigned char inputvaluesize;
	#endif /* WWW_CONF_FORMS */
	};

	static struct htmlparser_state s;

	/-----------------------------------------------------------------------------------/
	static char last[1] = {0xff};

	static const char *tags[] = {
	#define TAG_FIRST 0
	#define TAG_SLASHA 0
	html_slasha,
	#define TAG_SLASHCENTER 1
	html_slashcenter,
	#define TAG_SLASHFORM 2
	html_slashform,
	#define TAG_SLASHH 3
	html_slashh,
	#define TAG_SLASHSCRIPT 4
	html_slashscript,
	#define TAG_SLASHSELECT 5
	html_slashselect,
	#define TAG_SLASHSTYLE 6
	html_slashstyle,
	#define TAG_A 7
	html_a,
	#define TAG_BODY 8
	html_body,
	#define TAG_BR 9
	html_br,
	#define TAG_CENTER 10
	html_center,
	#define TAG_FORM 11
	html_form,
	#define TAG_FRAME 12
	html_frame,
	#define TAG_H1 13
	html_h1,
	#define TAG_H2 14
	html_h2,
	#define TAG_H3 15
	html_h3,
	#define TAG_H4 16
	html_h4,
	#define TAG_IMG 17
	html_img,
	#define TAG_INPUT 18
	html_input,
	#define TAG_LI 19
	html_li,
	#define TAG_P 20
	html_p,
	#define TAG_SCRIPT 21
	html_script,
	#define TAG_SELECT 22
	html_select,
	#define TAG_STYLE 23
	html_style,
	#define TAG_TR 24
	html_tr,
	#define TAG_LAST 25
	last,
	};

	/-----------------------------------------------------------------------------------/
	static unsigned char CC_FASTCALL
	iswhitespace(char c)
	{
	return (c == ISO_space \|\|
	c == ISO_nl \|\|
	c == ISO_cr \|\|
	c == ISO_ht);
	}
	/-----------------------------------------------------------------------------------/
	void
	htmlparser_init(void)
	{
	s.majorstate = s.lastmajorstate = MAJORSTATE_DISCARD;
	s.minorstate = MINORSTATE_TEXT;
	s.lastchar = 0;
	}
	/-----------------------------------------------------------------------------------/
	static char CC_FASTCALL
	lowercase(char c)
	{
	/* XXX: This is a brute force approach to lower-case
	converting and should not be used anywhere else! It
	works for our purposes, however (i.e., HTML tags). */
	if(c > 0x40) {
	return (c & 0x1f) \| 0x60;
	} else {
	return c;
	}
	}
	/-----------------------------------------------------------------------------------/
	static void
	endtagfound(void)
	{
	s.tag[s.tagptr] = 0;
	s.tagattr[s.tagattrptr] = 0;
	s.tagattrparam[s.tagattrparamptr] = 0;
	}
	/-----------------------------------------------------------------------------------/
	static void CC_FASTCALL
	switch_majorstate(unsigned char newstate)
	{
	if(s.majorstate != newstate) {
	PRINTF(("Switching state from %d to %d (%d)\n", s.majorstate, newstate, s.lastmajorstate));
	s.lastmajorstate = s.majorstate;
	s.majorstate = newstate;
	}
	}
	/-----------------------------------------------------------------------------------/
	static void CC_FASTCALL
	add_char(unsigned char c)
	{
	if(s.wordlen < WWW_CONF_WEBPAGE_WIDTH - 1 && c < 0x80) {
	s.word[s.wordlen] = c;
	++s.wordlen;
	}
	}
	/-----------------------------------------------------------------------------------/
	static void
	do_word(void)
	{
	if(s.wordlen > 0) {
	if(s.majorstate == MAJORSTATE_LINK) {
	if(s.word[s.wordlen] != ISO_space) {
	add_char(ISO_space);
	}
	} else if(s.majorstate == MAJORSTATE_DISCARD) {
	s.wordlen = 0;
	} else {
	s.word[s.wordlen] = '\0';
	htmlparser_word(s.word, s.wordlen);
	s.wordlen = 0;
	}
	}
	}
	/-----------------------------------------------------------------------------------/
	static void
	newline(void)
	{
	do_word();
	htmlparser_newline();
	}
	/-----------------------------------------------------------------------------------/
	static unsigned char CC_FASTCALL
	find_tag(char *tag)
	{
	static unsigned char first, last, i, tabi;
	static char tagc;

	first = TAG_FIRST;
	last = TAG_LAST;
	i = 0;

	do {
	tagc = tag[i];

	if(tagc == 0 &&
	tags[first][i] == 0) {
	return first;
	}

	tabi = first;

	/* First, find first matching tag from table. */
	while(tagc > (tags[tabi])[i] &&
	tabi < last) {
	++tabi;
	}
	first = tabi;

	/* Second, find last matching tag from table. */
	while(tagc == (tags[tabi])[i] &&
	tabi < last) {
	++tabi;
	}
	last = tabi;

	/* If first and last matching tags are equal, we have a non-match
	and return. Else we continue with the next character. */
	++i;

	} while(last != first);
	return TAG_LAST;
	}
	/-----------------------------------------------------------------------------------/
	static void
	parse_tag(void)
	{
	static char *tagattrparam;
	static unsigned char size;

	static char dummy;

	PRINTF(("Parsing tag '%s' '%s' '%s'\n",
	s.tag, s.tagattr, s.tagattrparam));

	switch(find_tag(s.tag)) {
	case TAG_P:
	case TAG_H1:
	case TAG_H2:
	case TAG_H3:
	case TAG_H4:
	/* parse_char(ISO_nl);*/
	newline();
	/* FALLTHROUGH */
	case TAG_BR:
	case TAG_TR:
	case TAG_SLASHH:
	/* parse_char(ISO_nl);*/
	dummy = 0;
	newline();
	break;
	case TAG_LI:
	newline();
	add_char(ISO_asterisk);
	add_char(ISO_space);
	break;
	case TAG_SCRIPT:
	case TAG_STYLE:
	case TAG_SELECT:
	switch_majorstate(MAJORSTATE_DISCARD);
	break;
	case TAG_SLASHSCRIPT:
	case TAG_SLASHSTYLE:
	case TAG_SLASHSELECT:
	do_word();
	switch_majorstate(s.lastmajorstate);
	break;
	case TAG_BODY:
	s.majorstate = s.lastmajorstate = MAJORSTATE_BODY;
	break;
	case TAG_FRAME:
	if(strncmp(s.tagattr, html_src, sizeof(html_src)) == 0 &&
	s.tagattrparam[0] != 0) {
	switch_majorstate(MAJORSTATE_BODY);
	newline();
	add_char(ISO_rbrack);
	do_word();
	htmlparser_link((char *)html_frame, strlen(html_frame), s.tagattrparam);
	PRINTF(("Frame [%s]\n", s.tagattrparam));
	add_char(ISO_lbrack);
	newline();
	}
	break;
	case TAG_IMG:
	if(strncmp(s.tagattr, html_alt, sizeof(html_alt)) == 0 &&
	s.tagattrparam[0] != 0) {
	/* parse_char(ISO_lt);*/
	add_char(ISO_lt);
	tagattrparam = &s.tagattrparam[0];
	while(*tagattrparam) {
	/* parse_char(tagattrparam);/
	add_char(*tagattrparam);
	++tagattrparam;
	}
	/* parse_char(ISO_gt);*/
	add_char(ISO_gt);
	do_word();
	}
	break;
	case TAG_A:
	PRINTF(("A %s %s\n", s.tagattr, s.tagattrparam));
	if(strncmp(s.tagattr, html_href, sizeof(html_href)) == 0 &&
	s.tagattrparam[0] != 0) {
	strcpy(s.linkurl, s.tagattrparam);
	do_word();
	switch_majorstate(MAJORSTATE_LINK);
	}
	break;
	case TAG_SLASHA:
	if(s.majorstate == MAJORSTATE_LINK) {
	switch_majorstate(s.lastmajorstate);
	s.word[s.wordlen] = 0;
	htmlparser_link(s.word, s.wordlen, s.linkurl);
	s.wordlen = 0;
	}
	break;
	#if WWW_CONF_FORMS
	case TAG_FORM:
	PRINTF(("Form tag\n"));
	switch_majorstate(MAJORSTATE_FORM);
	if(strncmp(s.tagattr, html_action, sizeof(html_action)) == 0) {
	PRINTF(("Form action '%s'\n", s.tagattrparam));
	strncpy(s.formaction, s.tagattrparam, WWW_CONF_MAX_FORMACTIONLEN - 1);
	} else if(strncmp(s.tagattr, html_name, sizeof(html_name)) == 0) {
	PRINTF(("Form name '%s'\n", s.tagattrparam));
	strncpy(s.formname, s.tagattrparam, WWW_CONF_MAX_FORMNAMELEN - 1);
	}
	s.inputname[0] = s.inputvalue[0] = 0;
	break;
	case TAG_SLASHFORM:
	switch_majorstate(MAJORSTATE_BODY);
	s.formaction[0] = s.formname[0] = 0;
	break;
	case TAG_INPUT:
	if(s.majorstate == MAJORSTATE_FORM) {
	/* First check if we are called at the end of an input tag. If
	so, we should render the input widget. */
	if(s.tagattr[0] == 0 &&
	s.inputname[0] != 0) {
	PRINTF(("Render input type %d\n", s.inputtype));
	switch(s.inputtype) {
	case HTMLPARSER_INPUTTYPE_NONE:
	case HTMLPARSER_INPUTTYPE_TEXT:
	s.inputvalue[s.inputvaluesize] = 0;
	htmlparser_inputfield(s.inputvaluesize, s.inputvalue, s.inputname,
	s.formname, s.formaction);
	break;
	case HTMLPARSER_INPUTTYPE_SUBMIT:
	case HTMLPARSER_INPUTTYPE_IMAGE:
	htmlparser_submitbutton(s.inputvalue, s.inputname,
	s.formname, s.formaction);
	break;
	}
	s.inputtype = HTMLPARSER_INPUTTYPE_NONE;
	} else {
	PRINTF(("Input '%s' '%s'\n", s.tagattr, s.tagattrparam));
	if(strncmp(s.tagattr, html_type, sizeof(html_type)) == 0) {
	if(strncmp(s.tagattrparam, html_submit,
	sizeof(html_submit)) == 0) {
	s.inputtype = HTMLPARSER_INPUTTYPE_SUBMIT;
	} else if(strncmp(s.tagattrparam, html_image,
	sizeof(html_image)) == 0) {
	s.inputtype = HTMLPARSER_INPUTTYPE_IMAGE;
	} else if(strncmp(s.tagattrparam, html_text,
	sizeof(html_text)) == 0) {
	s.inputtype = HTMLPARSER_INPUTTYPE_TEXT;
	} else {
	s.inputtype = HTMLPARSER_INPUTTYPE_OTHER;
	}
	} else if(strncmp(s.tagattr, html_name,
	sizeof(html_name)) == 0) {
	strncpy(s.inputname, s.tagattrparam,
	WWW_CONF_MAX_INPUTNAMELEN);
	} else if(strncmp(s.tagattr, html_alt,
	sizeof(html_alt)) == 0 &&
	s.inputtype == HTMLPARSER_INPUTTYPE_IMAGE) {
	strncpy(s.inputvalue, s.tagattrparam,
	WWW_CONF_MAX_INPUTVALUELEN);
	} else if(strncmp(s.tagattr, html_value,
	sizeof(html_value)) == 0) {
	strncpy(s.inputvalue, s.tagattrparam,
	WWW_CONF_MAX_INPUTVALUELEN);
	} else if(strncmp(s.tagattr, html_size,
	sizeof(html_size)) == 0) {
	size = 0;
	if(s.tagattrparam[0] >= '0' &&
	s.tagattrparam[0] <= '9') {
	size = s.tagattrparam[0] - '0';
	if(s.tagattrparam[1] >= '0' &&
	s.tagattrparam[1] <= '9') {
	size = size * 10 + (s.tagattrparam[1] - '0');
	}
	}
	if(size >= WWW_CONF_MAX_INPUTVALUELEN) {
	size = WWW_CONF_MAX_INPUTVALUELEN - 1;
	}
	s.inputvaluesize = size;
	/* strncpy(s.inputvalue, s.tagattrparam,
	WWW_CONF_MAX_INPUTVALUELEN);*/
	}
	}

	}
	break;
	#endif /* WWW_CONF_FORMS */
	#if WWW_CONF_RENDERSTATE
	case TAG_CENTER:
	/* parse_char(ISO_nl); */
	newline();
	htmlparser_renderstate(HTMLPARSER_RENDERSTATE_BEGIN \|
	HTMLPARSER_RENDERSTATE_CENTER);
	break;
	case TAG_SLASHCENTER:
	/* parse_char(ISO_nl);*/
	newline();
	htmlparser_renderstate(HTMLPARSER_RENDERSTATE_END \|
	HTMLPARSER_RENDERSTATE_CENTER);
	break;
	#endif /* WWW_CONF_RENDERSTATE */
	}
	}
	/-----------------------------------------------------------------------------------/
	static u16_t
	parse_word(char *data, u8_t dlen)
	{
	static u8_t i;
	static u8_t len;
	unsigned char c;

	len = dlen;

	switch(s.minorstate) {
	case MINORSTATE_TEXT:
	for(i = 0; i < len; ++i) {
	c = data[i];
	if(iswhitespace(c)) {
	do_word();
	} else if(c == ISO_lt) {
	s.minorstate = MINORSTATE_TAG;
	s.tagptr = 0;
	/* do_word();*/
	break;
	} else if(c == ISO_ampersand) {
	s.minorstate = MINORSTATE_EXTCHAR;
	break;
	} else {
	add_char(c);
	}
	}
	break;
	case MINORSTATE_EXTCHAR:
	for(i = 0; i < len; ++i) {
	c = data[i];
	if(c == ISO_semicolon) {
	s.minorstate = MINORSTATE_TEXT;
	add_char(' ');
	break;
	} else if(iswhitespace(c)) {
	s.minorstate = MINORSTATE_TEXT;
	add_char('&');
	add_char(' ');
	break;
	}
	}
	break;
	case MINORSTATE_TAG:
	/* We are currently parsing within the name of a tag. We check
	for the end of a tag (the '>' character) or whitespace (which
	indicates that we should parse a tag attr argument
	instead). */
	for(i = 0; i < len; ++i) {
	c = data[i];
	if(c == ISO_gt) {
	/* Full tag found. We continue parsing regular text. */
	s.minorstate = MINORSTATE_TEXT;
	s.tagattrptr = s.tagattrparamptr = 0;
	endtagfound();
	parse_tag();
	break;
	} else if(iswhitespace(c)) {
	/* The name of the tag found. We continue parsing the tag
	attr.*/
	s.minorstate = MINORSTATE_TAGATTR;
	s.tagattrptr = 0;
	endtagfound();
	break;
	} else {
	/* Keep track of the name of the tag, but convert it to
	lower case. */

	s.tag[s.tagptr] = lowercase(c);
	++s.tagptr;
	/* Check if the ->tag field is full. If so, we just eat up
	any data left in the tag. */
	if(s.tagptr == sizeof(s.tag)) {
	s.minorstate = MINORSTATE_TAGEND;
	break;
	}
	}

	/* Check for HTML comment, indicated by <!-- */
	if(s.tagptr == 3 &&
	s.tag[0] == ISO_bang &&
	s.tag[1] == ISO_dash &&
	s.tag[2] == ISO_dash) {
	PRINTF(("Starting comment...\n"));
	s.minorstate = MINORSTATE_HTMLCOMMENT;
	s.tagptr = 0;
	endtagfound();
	break;
	}
	}
	break;
	case MINORSTATE_TAGATTR:
	/* We parse the "tag attr", i.e., the "href" in <a
	href="...">. */
	for(i = 0; i < len; ++i) {
	c = data[i];
	if(c == ISO_gt) {
	/* Full tag found. */
	s.minorstate = MINORSTATE_TEXT;
	s.tagattrparamptr = 0;
	s.tagattrptr = 0;
	endtagfound();
	parse_tag();
	s.tagptr = 0;
	endtagfound();
	break;
	} else if(iswhitespace(c)) {
	if(s.tagattrptr == 0) {
	/* Discard leading spaces. */
	} else {
	/* A non-leading space is the end of the attribute. */
	s.tagattrparamptr = 0;
	endtagfound();
	parse_tag();
	s.minorstate = MINORSTATE_TAGATTRSPACE;
	break;
	/* s.tagattrptr = 0;
	endtagfound();*/
	}
	} else if(c == ISO_eq) {
	s.minorstate = MINORSTATE_TAGATTRPARAMNQ;
	s.tagattrparamptr = 0;
	endtagfound();
	break;
	} else {
	s.tagattr[s.tagattrptr] = lowercase(c);
	++s.tagattrptr;
	/* Check if the "tagattr" field is full. If so, we just eat
	up any data left in the tag. */
	if(s.tagattrptr == sizeof(s.tagattr)) {
	s.minorstate = MINORSTATE_TAGEND;
	break;
	}
	}
	}
	break;
	case MINORSTATE_TAGATTRSPACE:
	for(i = 0; i < len; ++i) {
	c = data[i];
	if(iswhitespace(c)) {
	/* Discard spaces. */
	} else if(c == ISO_eq) {
	s.minorstate = MINORSTATE_TAGATTRPARAMNQ;
	s.tagattrparamptr = 0;
	endtagfound();
	parse_tag();
	break;
	} else {
	s.tagattr[0] = lowercase(c);
	s.tagattrptr = 1;
	s.minorstate = MINORSTATE_TAGATTR;
	break;
	}
	}
	break;
	case MINORSTATE_TAGATTRPARAMNQ:
	/* We are parsing the "tag attr parameter", i.e., the link part
	in <a href="link">. */
	for(i = 0; i < len; ++i) {
	c = data[i];
	if(c == ISO_gt) {
	/* Full tag found. */
	endtagfound();
	parse_tag();
	s.minorstate = MINORSTATE_TEXT;
	s.tagattrptr = 0;
	endtagfound();
	parse_tag();
	s.tagptr = 0;
	endtagfound();
	break;
	} else if(iswhitespace(c) &&
	s.tagattrparamptr == 0) {
	/* Discard leading spaces. */
	} else if((c == ISO_citation \|\|
	c == ISO_citation2) &&
	s.tagattrparamptr == 0) {
	s.minorstate = MINORSTATE_TAGATTRPARAM;
	s.quotechar = c;
	PRINTF(("tag attr param q found\n"));
	break;
	} else if(iswhitespace(c)) {
	PRINTF(("Non-leading space found at %d\n",
	s.tagattrparamptr));
	/* Stop parsing if a non-leading space was found */
	endtagfound();
	parse_tag();

	s.minorstate = MINORSTATE_TAGATTR;
	s.tagattrptr = 0;
	endtagfound();
	break;
	} else {
	s.tagattrparam[s.tagattrparamptr] = c;
	++s.tagattrparamptr;
	/* Check if the "tagattr" field is full. If so, we just eat
	up any data left in the tag. */
	if(s.tagattrparamptr >= sizeof(s.tagattrparam) - 1) {
	s.minorstate = MINORSTATE_TAGEND;
	break;
	}
	}
	}
	break;
	case MINORSTATE_TAGATTRPARAM:
	/* We are parsing the "tag attr parameter", i.e., the link
	part in <a href="link">. */
	for(i = 0; i < len; ++i) {
	c = data[i];
	if(c == s.quotechar) {
	/* Found end of tag attr parameter. */
	endtagfound();
	parse_tag();

	s.minorstate = MINORSTATE_TAGATTR;
	s.tagattrptr = 0;
	endtagfound();
	break;
	} else {
	if(iswhitespace(c)) {
	s.tagattrparam[s.tagattrparamptr] = ISO_space;
	} else {
	s.tagattrparam[s.tagattrparamptr] = c;
	}

	++s.tagattrparamptr;
	/* Check if the "tagattr" field is full. If so, we just eat
	up any data left in the tag. */
	if(s.tagattrparamptr >= sizeof(s.tagattrparam) - 1) {
	s.minorstate = MINORSTATE_TAGEND;
	break;
	}
	}
	}
	break;
	case MINORSTATE_HTMLCOMMENT:
	for(i = 0; i < len; ++i) {
	c = data[i];
	if(c == ISO_dash) {
	++s.tagptr;
	} else if(c == ISO_gt && s.tagptr > 0) {
	PRINTF(("Comment done.\n"));
	s.minorstate = MINORSTATE_TEXT;
	break;
	} else {
	s.tagptr = 0;
	}
	}
	break;
	case MINORSTATE_TAGEND:
	/* Discard characters until a '>' is seen. */
	for(i = 0; i < len; ++i) {
	if(data[i] == ISO_gt) {
	s.minorstate = MINORSTATE_TEXT;
	s.tagattrptr = 0;
	endtagfound();
	parse_tag();
	break;
	}
	}
	break;
	default:
	i = 0;
	break;
	}
	if(i >= len) {
	return len;
	}
	return i + 1;
	}
	/-----------------------------------------------------------------------------------/
	void
	htmlparser_parse(char *data, u16_t datalen)
	{
	u16_t plen;

	while(datalen > 0) {
	if(datalen > 255) {
	plen = parse_word(data, 255);
	} else {
	plen = parse_word(data, datalen);
	}
	datalen -= plen;
	data += plen;
	}
	}
	/-----------------------------------------------------------------------------------/