Initial import

commit: ca9ddcb6823d118622e4532edb1ecafa1a1b0692 [log] [tgz]
author: adamdunkels <adamdunkels> mer. mars 19 14:13:31 2003 +0000
committer: adamdunkels <adamdunkels> mer. mars 19 14:13:31 2003 +0000
tree: c4f9086ffa74cbcae19761140b6da61a0acb9d1b
diff --git a/contiki/apps/htmlparser.c b/contiki/apps/htmlparser.c
new file mode 100644
index 0000000..1ee41d1
--- /dev/null
+++ b/contiki/apps/htmlparser.c

@@ -0,0 +1,780 @@
+/*
+ * Copyright (c) 2002, Adam Dunkels.
+ * All rights reserved. 
+ *
+ * Redistribution and use in source and binary forms, with or without 
+ * modification, are permitted provided that the following conditions 
+ * are met: 
+ * 1. Redistributions of source code must retain the above copyright 
+ *    notice, this list of conditions and the following disclaimer. 
+ * 2. Redistributions in binary form must reproduce the above
+ *    copyright notice, this list of conditions and the following
+ *    disclaimer in the documentation and/or other materials provided
+ *    with the distribution. 
+ * 3. All advertising materials mentioning features or use of this
+ *    software must display the following acknowledgement:
+ *        This product includes software developed by Adam Dunkels. 
+ * 4. The name of the author may not be used to endorse or promote
+ *    products derived from this software without specific prior
+ *    written permission.  
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
+ * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
+ * GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.  
+ *
+ * This file is part of the Contiki desktop environment 
+ *
+ * $Id: htmlparser.c,v 1.1 2003/03/19 14:13:33 adamdunkels Exp $
+ *
+ */
+
+/* htmlparser.c:
+ *
+ * Implements a very simplistic HTML parser. It recognizes HTML links
+ * (<a href>-tags), HTML img alt tags, a few text flow break tags
+G * (<br>, <p>, <h>), the <li> tag (but does not even try to
+ * distinguish between <ol> or <ul>) as well as HTML comment tags
+ * (<!-- -->).
+ *
+ * To save memory, the HTML parser is state machine driver, which
+ * means that it will shave off one character from the HTML page,
+ * process that character, and return to the next. Another way of
+ * doing it would be to buffer a number of characters and process them
+ * together.
+ *
+ * The main function in this file is the htmlparser_parse() function
+ * which takes a htmlparser_state structur and a part of an HTML file
+ * as an argument. The htmlparser_parse() function will call the
+ * helper functions parse_char() and parse_tag(). Those functions will
+ * in turn call the two callback functions htmlparser_char() and
+ * htmlparser_tag(). Those functions must be implemented by the using
+ * module (e.g., a web browser program).
+ *
+ * htmlparser_char() will be called for every non-tag character.
+ *
+ * htmlparser_tag() will be called whenever a full tag has been found.
+ *
+ */
+
+
+#include "htmlparser.h"
+#include "html-strings.h"
+#include "www-conf.h"
+
+#if 1
+#define PRINTF(x)
+#else
+#include <stdio.h>
+#define PRINTF(x) printf x
+#endif
+
+#ifdef WITH_CC65
+#define FASTCALL __fastcall__
+#else
+#define FASTCALL
+#endif
+
+/*-----------------------------------------------------------------------------------*/
+#define ISO_A     0x41
+#define ISO_B     0x42
+#define ISO_E     0x45
+#define ISO_F     0x46
+#define ISO_G     0x47
+#define ISO_H     0x48
+#define ISO_I     0x49
+#define ISO_L     0x4c
+#define ISO_M     0x4d
+#define ISO_P     0x50
+#define ISO_R     0x52
+#define ISO_T     0x54
+
+#define ISO_a     (ISO_A | 0x20)
+#define ISO_b     (ISO_B | 0x20)
+#define ISO_e     (ISO_E | 0x20)
+#define ISO_f     (ISO_F | 0x20)
+#define ISO_g     (ISO_G | 0x20)
+#define ISO_h     (ISO_H | 0x20)
+#define ISO_i     (ISO_I | 0x20)
+#define ISO_l     (ISO_L | 0x20)
+#define ISO_m     (ISO_M | 0x20)
+#define ISO_p     (ISO_P | 0x20)
+#define ISO_r     (ISO_R | 0x20)
+#define ISO_t     (ISO_T | 0x20)
+
+#define ISO_ht    0x09
+#define ISO_nl    0x0a
+#define ISO_cr    0x0d
+#define ISO_space 0x20
+#define ISO_bang  0x21
+#define ISO_citation 0x22
+#define ISO_ampersand 0x26
+#define ISO_citation2 0x27
+#define ISO_asterisk 0x2a
+#define ISO_dash  0x2d
+#define ISO_slash 0x2f
+#define ISO_semicolon  0x3b
+#define ISO_lt    0x3c
+#define ISO_eq    0x3d
+#define ISO_gt    0x3e
+
+#define ISO_rbrack 0x5b
+#define ISO_lbrack 0x5d
+
+#define MINORSTATE_NONE           0
+#define MINORSTATE_TEXT           1 /* Parse normal text */
+#define MINORSTATE_EXTCHAR        2 /* Check for semi-colon */
+#define MINORSTATE_TAG            3 /* Check for name of tag. */
+#define MINORSTATE_TAGEND         4 /* Scan for end of tag. */
+#define MINORSTATE_TAGATTR        5 /* Parse tag attr. */
+#define MINORSTATE_TAGATTRSPACE   6 /* Parse optional space after tag
+				       attr. */
+#define MINORSTATE_TAGATTRPARAM   7 /* Parse tag attr parameter. */
+#define MINORSTATE_TAGATTRPARAMNQ 8 /* Parse tag attr parameter without
+				  quotation marks. */
+#define MINORSTATE_HTMLCOMMENT    9 /* Scan for HTML comment end */
+
+#define MAJORSTATE_NONE       0
+#define MAJORSTATE_BODY       1
+#define MAJORSTATE_LINK       2
+#define MAJORSTATE_FORM       3
+#define MAJORSTATE_DISCARD    4
+
+
+struct htmlparser_state {
+  unsigned char minorstate;
+  char tag[20];
+  unsigned char tagptr;
+  char tagattr[20];
+  unsigned char tagattrptr;
+  char tagattrparam[WWW_CONF_MAX_URLLEN];
+  unsigned char tagattrparamptr;
+  unsigned char lastchar, quotechar;
+  unsigned char majorstate, lastmajorstate;
+  char linkurl[WWW_CONF_MAX_URLLEN];
+  char linktext[40];
+  unsigned char linktextptr;
+#if WWW_CONF_FORMS
+  char formaction[WWW_CONF_MAX_FORMACTIONLEN];
+  char formname[WWW_CONF_MAX_FORMNAMELEN];
+  unsigned char inputtype;
+  char inputname[WWW_CONF_MAX_INPUTNAMELEN];
+  char inputvalue[WWW_CONF_MAX_INPUTVALUELEN];
+  unsigned char inputvaluesize;
+#endif /* WWW_CONF_FORMS */
+};
+
+static struct htmlparser_state s;
+
+/*-----------------------------------------------------------------------------------*/
+static char last[1] = {0xff};
+
+static char *tags[] = {
+#define TAG_FIRST       0
+#define TAG_SLASHA      0
+  html_slasha,
+#define TAG_SLASHCENTER 1
+  html_slashcenter,
+#define TAG_SLASHFORM   2
+  html_slashform,
+#define TAG_SLASHH      3
+  html_slashh,
+#define TAG_SLASHSCRIPT 4
+  html_slashscript,
+#define TAG_SLASHSELECT 5
+  html_slashselect,
+#define TAG_SLASHSTYLE  6
+  html_slashstyle,
+#define TAG_A           7
+  html_a,
+#define TAG_BODY        8
+  html_body,
+#define TAG_BR          9
+  html_br,
+#define TAG_CENTER     10 
+  html_center,
+#define TAG_FORM       11
+  html_form,
+#define TAG_FRAME      12    
+  html_frame,
+#define TAG_H1         13  
+  html_h1,
+#define TAG_H2         14
+  html_h2,
+#define TAG_H3         15  
+  html_h3,
+#define TAG_H4         16  
+  html_h4,
+#define TAG_IMG        17  
+  html_img,
+#define TAG_INPUT      18  
+  html_input,
+#define TAG_LI         19
+  html_li,
+#define TAG_P          20
+  html_p,
+#define TAG_SCRIPT     21
+  html_script, 
+#define TAG_SELECT     22
+  html_select,
+#define TAG_STYLE      23
+  html_style,
+#define TAG_TR         24   
+  html_tr,
+#define TAG_LAST       25
+  last,
+};
+
+/*-----------------------------------------------------------------------------------*/
+static unsigned char FASTCALL
+iswhitespace(char c)
+{
+  return (c == ISO_space ||
+	  c == ISO_nl ||
+	  c == ISO_cr ||
+	  c == ISO_ht);
+}
+/*-----------------------------------------------------------------------------------*/
+static unsigned char FASTCALL
+find_tag(char *tag)
+{
+  static unsigned char first, last, i, tabi;
+  static char tagc;
+  
+  tabi = first = TAG_FIRST;
+  last = TAG_LAST;
+  i = 0;
+  
+  do {
+    tagc = tag[i];
+
+    if(tagc == 0 &&
+       tags[first][i] == 0) {
+      return first;
+    }
+    
+    /* First, find first matching tag from table. */
+    while(tagc > (tags[tabi])[i] &&
+	  tabi < last) {
+      ++tabi;
+    }
+    first = tabi;
+    
+    /* Second, find last matching tag from table. */
+    while(tagc == (tags[tabi])[i] &&
+	  tabi < last) {
+      ++tabi;
+    }
+    last = tabi;
+    
+    /* If first and last matching tags are equal, we have a match and
+       return. Else we continue with the next character. */
+    ++i;
+    tabi = first;
+  } while(last != first);
+  return TAG_LAST;
+}
+/*-----------------------------------------------------------------------------------*/
+static void FASTCALL
+parse_char(unsigned char c)
+{
+  if(c < 0x80) {
+    if(s.majorstate == MAJORSTATE_LINK) {
+      if(s.linktextptr < sizeof(s.linktext)) {
+	if(iswhitespace(c)) {
+	  c = ISO_space;
+	}
+	s.linktext[s.linktextptr] = c;
+	++s.linktextptr;
+      }
+    } else if(s.majorstate != MAJORSTATE_DISCARD) {
+      htmlparser_char(c);
+    } 
+  }
+}
+/*-----------------------------------------------------------------------------------*/
+static void
+switch_majorstate(unsigned char newstate)
+{
+  if(s.majorstate != newstate) {
+    PRINTF(("Switching state from %d to %d (%d)\n", s.majorstate, newstate, s.lastmajorstate));
+    s.lastmajorstate = s.majorstate;
+    s.majorstate = newstate;
+  }
+}
+/*-----------------------------------------------------------------------------------*/
+static void
+parse_tag(void)
+{
+  static char *tagattrparam;
+  static unsigned char size, i;
+  
+  PRINTF(("Parsing tag '%s' '%s' '%s'\n",
+	  s.tag, s.tagattr, s.tagattrparam));
+
+  switch(find_tag(s.tag)) {
+  case TAG_P:
+  case TAG_H1:
+  case TAG_H2:
+  case TAG_H3:
+  case TAG_H4:
+    parse_char(ISO_nl);
+    /* FALLTHROUGH */
+  case TAG_BR:
+  case TAG_TR:
+  case TAG_SLASHH:
+    parse_char(ISO_nl);
+    break;
+  case TAG_LI:
+    parse_char(ISO_nl);
+    parse_char(ISO_asterisk);
+    parse_char(ISO_space);
+    break;
+  case TAG_SCRIPT:
+  case TAG_STYLE:
+  case TAG_SELECT:
+    switch_majorstate(MAJORSTATE_DISCARD);
+    break;
+  case TAG_SLASHSCRIPT:
+  case TAG_SLASHSTYLE:
+  case TAG_SLASHSELECT:
+    switch_majorstate(s.lastmajorstate);
+    break;
+  case TAG_BODY:
+    s.majorstate = s.lastmajorstate = MAJORSTATE_BODY;
+    break;
+  case TAG_FRAME:
+    if(strncmp(s.tagattr, html_src, sizeof(html_src)) == 0 &&
+       s.tagattrparam[0] != 0) {
+      switch_majorstate(MAJORSTATE_BODY);
+      parse_char(ISO_nl);
+      parse_char(ISO_rbrack);
+      parse_char(ISO_space);
+      htmlparser_link(html_frame, s.tagattrparam);
+      PRINTF(("Frame [%s]\n", s.tagattrparam));
+      parse_char(ISO_space);
+      parse_char(ISO_lbrack);
+      parse_char(ISO_nl);
+    }
+    break;
+  case TAG_IMG:
+    if(strncmp(s.tagattr, html_alt, sizeof(html_alt)) == 0 &&
+       s.tagattrparam[0] != 0) {
+      parse_char(ISO_lt);
+      tagattrparam = &s.tagattrparam[0];
+      while(*tagattrparam) {
+	parse_char(*tagattrparam);
+	++tagattrparam;
+      }
+      parse_char(ISO_gt);
+    }
+    break;
+  case TAG_A:
+    PRINTF(("A %s %s\n", s.tagattr, s.tagattrparam));
+    if(strncmp(s.tagattr, html_href, sizeof(html_href)) == 0 &&
+       s.tagattrparam[0] != 0) {
+      strcpy(s.linkurl, s.tagattrparam);
+      switch_majorstate(MAJORSTATE_LINK);
+      s.linktextptr = 0;
+    }
+    break;
+  case TAG_SLASHA:
+    if(s.majorstate == MAJORSTATE_LINK) {
+      switch_majorstate(s.lastmajorstate);
+      s.linktext[s.linktextptr] = 0;
+      htmlparser_link(s.linktext, s.linkurl);
+      PRINTF(("Link '%s' [%s]\n", s.linktext, s.linkurl));
+    }
+    break;
+#if WWW_CONF_FORMS
+  case TAG_FORM:
+    PRINTF(("Form tag\n"));
+    switch_majorstate(MAJORSTATE_FORM);
+    if(strncmp(s.tagattr, html_action, sizeof(html_action)) == 0) {
+      PRINTF(("Form action '%s'\n", s.tagattrparam));
+      strncpy(s.formaction, s.tagattrparam, WWW_CONF_MAX_FORMACTIONLEN - 1);
+    } else if(strncmp(s.tagattr, html_name, sizeof(html_name)) == 0) {
+      PRINTF(("Form name '%s'\n", s.tagattrparam));
+      strncpy(s.formname, s.tagattrparam, WWW_CONF_MAX_FORMNAMELEN - 1);
+    }
+    s.inputname[0] = s.inputvalue[0] = 0;
+    break;
+  case TAG_SLASHFORM:
+    switch_majorstate(MAJORSTATE_BODY);
+    s.formaction[0] = s.formname[0] = 0;
+    break;
+  case TAG_INPUT:
+    if(s.majorstate == MAJORSTATE_FORM) {
+      /* First check if we are called at the end of an input tag. If
+	 so, we should render the input widget. */
+      if(s.tagattr[0] == 0 &&
+	 s.inputname[0] != 0) {
+	PRINTF(("Render input type %d\n", s.inputtype));
+	switch(s.inputtype) {
+	case HTMLPARSER_INPUTTYPE_NONE:
+	case HTMLPARSER_INPUTTYPE_TEXT:
+	  for(i = 0; i < s.inputvaluesize; ++i) {
+	    if(s.inputvalue[i] == 0) {
+	      memset(&s.inputvalue[i], ISO_space, s.inputvaluesize - i);
+	      s.inputvalue[s.inputvaluesize] = 0;
+	      break;
+	    }
+	  }	  
+	  htmlparser_inputfield(s.inputvalue, s.inputname,
+				s.formname, s.formaction);
+	  break;
+	case HTMLPARSER_INPUTTYPE_SUBMIT:
+	case HTMLPARSER_INPUTTYPE_IMAGE:	  
+	  htmlparser_submitbutton(s.inputvalue, s.inputname,
+				  s.formname, s.formaction);
+	  break;
+	}
+	s.inputtype = HTMLPARSER_INPUTTYPE_NONE;
+      } else {
+	PRINTF(("Input '%s' '%s'\n", s.tagattr, s.tagattrparam));
+	if(strncmp(s.tagattr, html_type, sizeof(html_type)) == 0) {
+	  if(strncmp(s.tagattrparam, html_submit,
+		     sizeof(html_submit)) == 0) {
+	    s.inputtype = HTMLPARSER_INPUTTYPE_SUBMIT;
+	  } else if(strncmp(s.tagattrparam, html_image,
+			    sizeof(html_image)) == 0) {
+	    s.inputtype = HTMLPARSER_INPUTTYPE_IMAGE;
+	  } else if(strncmp(s.tagattrparam, html_text,
+			    sizeof(html_text)) == 0) {
+	    s.inputtype = HTMLPARSER_INPUTTYPE_TEXT;
+	  } else {
+	    s.inputtype = HTMLPARSER_INPUTTYPE_OTHER;
+	  }
+	} else if(strncmp(s.tagattr, html_name,
+			  sizeof(html_name)) == 0) {
+	  strncpy(s.inputname, s.tagattrparam,
+		  WWW_CONF_MAX_INPUTNAMELEN);
+	} else if(strncmp(s.tagattr, html_alt,
+			  sizeof(html_alt)) == 0 &&
+		  s.inputtype == HTMLPARSER_INPUTTYPE_IMAGE) {	  
+	  strncpy(s.inputvalue, s.tagattrparam,
+		  WWW_CONF_MAX_INPUTVALUELEN);	  
+	} else if(strncmp(s.tagattr, html_value,
+			  sizeof(html_value)) == 0) {
+	  strncpy(s.inputvalue, s.tagattrparam,
+		  WWW_CONF_MAX_INPUTVALUELEN);
+	} else if(strncmp(s.tagattr, html_size,
+			  sizeof(html_size)) == 0) {
+	  size = 0;
+	  if(s.tagattrparam[0] >= '0' &&
+	     s.tagattrparam[0] <= '9') {
+	    size = s.tagattrparam[0] - '0';
+	    if(s.tagattrparam[1] >= '0' &&
+	       s.tagattrparam[1] <= '9') {
+	      size = size * 10 + (s.tagattrparam[1] - '0');
+	    }
+	  }
+	  if(size >= WWW_CONF_MAX_INPUTVALUELEN) {
+	    size = WWW_CONF_MAX_INPUTVALUELEN - 1;
+	  }
+	  s.inputvaluesize = size;
+	  /*	  strncpy(s.inputvalue, s.tagattrparam,
+		  WWW_CONF_MAX_INPUTVALUELEN);*/
+	}
+      }
+      
+    }
+    break;
+#endif /* WWW_CONF_FORMS */    
+#if WWW_CONF_RENDERSTATE
+  case TAG_CENTER:
+    parse_char(ISO_nl);    
+    htmlparser_renderstate(HTMLPARSER_RENDERSTATE_BEGIN |
+			   HTMLPARSER_RENDERSTATE_CENTER);
+    break;
+  case TAG_SLASHCENTER:
+    parse_char(ISO_nl);
+    htmlparser_renderstate(HTMLPARSER_RENDERSTATE_END |
+			   HTMLPARSER_RENDERSTATE_CENTER);
+    break;
+#endif /* WWW_CONF_RENDERSTATE */
+  }
+}
+/*-----------------------------------------------------------------------------------*/
+void
+htmlparser_init(void)
+{
+  s.majorstate = s.lastmajorstate = MAJORSTATE_DISCARD;
+  s.minorstate = MINORSTATE_TEXT;
+  s.lastchar = 0;
+}
+/*-----------------------------------------------------------------------------------*/
+static char FASTCALL
+lowercase(char c)
+{
+  /* XXX: This is a *brute force* approach to lower-case
+     converting and should *not* be used anywhere else! It
+     works for our purposes, however (i.e., HTML tags). */
+  if(c > 0x40) {
+    return (c & 0x1f) | 0x60;
+  } else {
+    return c;
+  }
+}
+/*-----------------------------------------------------------------------------------*/
+static void 
+endtagfound(void)
+{
+  s.tag[s.tagptr] = 0;
+  s.tagattr[s.tagattrptr] = 0;
+  s.tagattrparam[s.tagattrparamptr] = 0;
+}
+/*-----------------------------------------------------------------------------------*/
+/* htmlparser_parse():
+ *
+ * This is the main function in the HTML parser module and it parses
+ * the HTML data in the input buffer. The htmlparser_state is updated
+ * as the buffer is parsed character by character. The functions
+ * parse_char() and parse_tag() (defined earlier in this file) are
+ * called to process regular characters and HTML tags,
+ * respectively.
+ *
+ * Note that the input buffer does not have to contain full HTML tags;
+ * the parser is state machine driven in order to be able to work with
+ * buffers that have been divided in any way.
+ */
+void
+htmlparser_parse(char *data, u16_t len)
+{
+  static char c;
+  
+  while(len > 0) {
+    c = *data;
+    --len;
+    ++data;
+    
+    switch(s.minorstate) {
+    case MINORSTATE_NONE:
+      break;
+    case MINORSTATE_TEXT:
+      /* We are currently parsing some text, so we look for signs of
+	 an HTML tag starting (i.e., a '<' character). We also
+	 compress any whitespace character to one single space
+	 character (' '). */
+      if(c == ISO_lt) {
+	s.minorstate = MINORSTATE_TAG;
+	s.tagptr = 0;
+	endtagfound();
+      } else if(c == ISO_ampersand) {
+	s.minorstate = MINORSTATE_EXTCHAR;
+      } else {
+	if(iswhitespace(c)) {
+	  if(s.lastchar != ISO_space) {
+	    parse_char(' ');
+	    s.lastchar = ISO_space;
+	    c = ISO_space;
+	  }
+	} else {
+	  parse_char(c);
+	}
+      }
+      break;
+    case MINORSTATE_EXTCHAR:
+      if(c == ISO_semicolon) {	
+	s.minorstate = MINORSTATE_TEXT;
+	parse_char(' ');
+      } else if(iswhitespace(c)) {	
+	s.minorstate = MINORSTATE_TEXT;
+	parse_char('&');
+	parse_char(' ');
+      }
+      break;
+    case MINORSTATE_TAG:
+      /* We are currently parsing within the name of a tag. We check
+	 for the end of a tag (the '>' character) or whitespace (which
+	 indicates that we should parse a tag attr argument
+	 instead). */
+      if(c == ISO_gt) {
+	/* Full tag found. We continue parsing regular text. */
+	s.minorstate = MINORSTATE_TEXT;
+	s.tagattrptr = s.tagattrparamptr = 0;
+	endtagfound();	  
+	parse_tag();
+      } else if(iswhitespace(c)) {
+	/* The name of the tag found. We continue parsing the tag
+	   attr.*/
+	s.minorstate = MINORSTATE_TAGATTR;
+	s.tagattrptr = 0;
+	endtagfound();
+      } else {
+	/* Keep track of the name of the tag, but convert it to
+	   lower case. */
+
+	s.tag[s.tagptr] = lowercase(c);
+	++s.tagptr;
+	/* Check if the ->tag field is full. If so, we just eat up
+	   any data left in the tag. */
+	if(s.tagptr == sizeof(s.tag)) {
+	  s.minorstate = MINORSTATE_TAGEND;
+	}
+      }
+
+      /* Check for HTML comment, indicated by <!-- */
+      if(s.tagptr == 3 &&
+	 s.tag[0] == ISO_bang &&
+	 s.tag[1] == ISO_dash &&
+	 s.tag[2] == ISO_dash) {
+	PRINTF(("Starting comment...\n"));
+	s.minorstate = MINORSTATE_HTMLCOMMENT;
+	s.tagptr = 0;
+	endtagfound();
+      }	         
+      break;
+    case MINORSTATE_TAGATTR:
+      /* We parse the "tag attr", i.e., the "href" in <a
+	 href="...">. */
+      if(c == ISO_gt) {
+	/* Full tag found. */
+	s.minorstate = MINORSTATE_TEXT;
+	s.tagattrparamptr = 0;
+	s.tagattrptr = 0;
+	endtagfound();
+	parse_tag();
+	s.tagptr = 0;
+	endtagfound();
+	
+      } else if(iswhitespace(c)) {
+	if(s.tagattrptr == 0) {
+	  /* Discard leading spaces. */
+	} else {
+	  /* A non-leading space is the end of the attribute. */
+	  s.tagattrparamptr = 0;
+	  endtagfound();
+	  parse_tag();
+	  s.minorstate = MINORSTATE_TAGATTRSPACE;
+	  /*	    s.tagattrptr = 0;
+		    endtagfound();*/
+	}
+      } else if(c == ISO_eq) {	
+	s.minorstate = MINORSTATE_TAGATTRPARAMNQ;
+	s.tagattrparamptr = 0;
+	endtagfound();
+      } else {
+	s.tagattr[s.tagattrptr] = lowercase(c);
+	++s.tagattrptr;
+	/* Check if the "tagattr" field is full. If so, we just eat
+	   up any data left in the tag. */
+	if(s.tagattrptr == sizeof(s.tagattr)) {
+	  s.minorstate = MINORSTATE_TAGEND;
+	}
+      }
+      break;
+    case MINORSTATE_TAGATTRSPACE:
+      if(iswhitespace(c)) {
+	/* Discard spaces. */
+      } else if(c == ISO_eq) {
+	s.minorstate = MINORSTATE_TAGATTRPARAMNQ;
+	s.tagattrparamptr = 0;
+	endtagfound();
+	parse_tag();
+      } else {
+	s.tagattr[0] = lowercase(c);
+	s.tagattrptr = 1;
+	s.minorstate = MINORSTATE_TAGATTR;
+      }
+      break;
+    case MINORSTATE_TAGATTRPARAMNQ:
+      /* We are parsing the "tag attr parameter", i.e., the link part
+	 in <a href="link">. */
+      if(c == ISO_gt) {
+	/* Full tag found. */
+	endtagfound();
+	parse_tag();
+	s.minorstate = MINORSTATE_TEXT;
+	s.tagattrptr = 0;       
+	endtagfound();
+      	parse_tag();
+	s.tagptr = 0;       
+	endtagfound();
+      } else if(iswhitespace(c) &&
+		s.tagattrparamptr == 0) {
+	/* Discard leading spaces. */	  
+      } else if((c == ISO_citation ||
+		 c == ISO_citation2) &&
+		s.tagattrparamptr == 0) {
+	s.minorstate = MINORSTATE_TAGATTRPARAM;
+	s.quotechar = c;
+	PRINTF(("tag attr param q found\n"));
+      } else if(iswhitespace(c)) {
+	PRINTF(("Non-leading space found at %d\n",
+		s.tagattrparamptr));
+	/* Stop parsing if a non-leading space was found */
+	endtagfound();
+	parse_tag();
+	  
+	s.minorstate = MINORSTATE_TAGATTR;
+	s.tagattrptr = 0;
+	endtagfound();
+      } else {
+	s.tagattrparam[s.tagattrparamptr] = c;
+	++s.tagattrparamptr;
+	/* Check if the "tagattr" field is full. If so, we just eat
+	   up any data left in the tag. */
+	if(s.tagattrparamptr >= sizeof(s.tagattrparam) - 1) {
+	  s.minorstate = MINORSTATE_TAGEND;
+	}
+      }
+
+      break;
+    case MINORSTATE_TAGATTRPARAM:
+      /* We are parsing the "tag attr parameter", i.e., the link
+	 part in <a href="link">. */
+      if(c == s.quotechar) {
+	/* Found end of tag attr parameter. */
+	endtagfound();
+	parse_tag();
+	
+	s.minorstate = MINORSTATE_TAGATTR;
+	s.tagattrptr = 0;
+	endtagfound();
+      } else {
+	if(iswhitespace(c)) {
+	  c = ISO_space;
+	}
+	s.tagattrparam[s.tagattrparamptr] = c;
+	++s.tagattrparamptr;
+	/* Check if the "tagattr" field is full. If so, we just eat
+	   up any data left in the tag. */
+	if(s.tagattrparamptr >= sizeof(s.tagattrparam) - 1) {
+	  s.minorstate = MINORSTATE_TAGEND;
+	}
+      }
+
+      break;
+    case MINORSTATE_HTMLCOMMENT:
+      if(c == ISO_dash) {
+	++s.tagptr;
+      } else if(c == ISO_gt && s.tagptr > 0) {
+	PRINTF(("Comment done.\n"));
+	s.minorstate = MINORSTATE_TEXT;
+      } else {
+	s.tagptr = 0;
+      }
+      break;
+    case MINORSTATE_TAGEND:
+      /* Discard characters until a '>' is seen. */
+      if(c == ISO_gt) {
+	s.minorstate = MINORSTATE_TEXT;
+	s.tagattrptr = 0;
+	endtagfound();
+	parse_tag();
+      }
+      break;
+    }
+  
+    s.lastchar = c;
+  }
+}
+/*-----------------------------------------------------------------------------------*/
commit	ca9ddcb6823d118622e4532edb1ecafa1a1b0692	[log] [tgz]
author	adamdunkels <adamdunkels>	mer. mars 19 14:13:31 2003 +0000
committer	adamdunkels <adamdunkels>	mer. mars 19 14:13:31 2003 +0000
tree	c4f9086ffa74cbcae19761140b6da61a0acb9d1b