Blame - contiki/apps/htmlparser.c - contiki-1.x

blob: 1ee41d1cfbd84c96476d633edf4014825899ca1b [file] [log] [blame]

adamdunkels	ca9ddcb	2003-03-19 14:13:31 +0000	[diff] [blame^]	1	/*
				2	* Copyright (c) 2002, Adam Dunkels.
				3	* All rights reserved.
				4	*
				5	* Redistribution and use in source and binary forms, with or without
				6	* modification, are permitted provided that the following conditions
				7	* are met:
				8	* 1. Redistributions of source code must retain the above copyright
				9	* notice, this list of conditions and the following disclaimer.
				10	* 2. Redistributions in binary form must reproduce the above
				11	* copyright notice, this list of conditions and the following
				12	* disclaimer in the documentation and/or other materials provided
				13	* with the distribution.
				14	* 3. All advertising materials mentioning features or use of this
				15	* software must display the following acknowledgement:
				16	* This product includes software developed by Adam Dunkels.
				17	* 4. The name of the author may not be used to endorse or promote
				18	* products derived from this software without specific prior
				19	* written permission.
				20	*
				21	* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
				22	* OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
				23	* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
				24	* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
				25	* DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
				26	* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
				27	* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
				28	* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
				29	* WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
				30	* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
				31	* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
				32	*
				33	* This file is part of the Contiki desktop environment
				34	*
				35	* $Id: htmlparser.c,v 1.1 2003/03/19 14:13:33 adamdunkels Exp $
				36	*
				37	*/
				38
				39	/* htmlparser.c:
				40	*
				41	* Implements a very simplistic HTML parser. It recognizes HTML links
				42	* (<a href>-tags), HTML img alt tags, a few text flow break tags
				43	G * (<br>, <p>, <h>), the <li> tag (but does not even try to
				44	* distinguish between <ol> or <ul>) as well as HTML comment tags
				45	* (<!-- -->).
				46	*
				47	* To save memory, the HTML parser is state machine driver, which
				48	* means that it will shave off one character from the HTML page,
				49	* process that character, and return to the next. Another way of
				50	* doing it would be to buffer a number of characters and process them
				51	* together.
				52	*
				53	* The main function in this file is the htmlparser_parse() function
				54	* which takes a htmlparser_state structur and a part of an HTML file
				55	* as an argument. The htmlparser_parse() function will call the
				56	* helper functions parse_char() and parse_tag(). Those functions will
				57	* in turn call the two callback functions htmlparser_char() and
				58	* htmlparser_tag(). Those functions must be implemented by the using
				59	* module (e.g., a web browser program).
				60	*
				61	* htmlparser_char() will be called for every non-tag character.
				62	*
				63	* htmlparser_tag() will be called whenever a full tag has been found.
				64	*
				65	*/
				66
				67
				68	#include "htmlparser.h"
				69	#include "html-strings.h"
				70	#include "www-conf.h"
				71
				72	#if 1
				73	#define PRINTF(x)
				74	#else
				75	#include <stdio.h>
				76	#define PRINTF(x) printf x
				77	#endif
				78
				79	#ifdef WITH_CC65
				80	#define FASTCALL __fastcall__
				81	#else
				82	#define FASTCALL
				83	#endif
				84
				85	/-----------------------------------------------------------------------------------/
				86	#define ISO_A 0x41
				87	#define ISO_B 0x42
				88	#define ISO_E 0x45
				89	#define ISO_F 0x46
				90	#define ISO_G 0x47
				91	#define ISO_H 0x48
				92	#define ISO_I 0x49
				93	#define ISO_L 0x4c
				94	#define ISO_M 0x4d
				95	#define ISO_P 0x50
				96	#define ISO_R 0x52
				97	#define ISO_T 0x54
				98
				99	#define ISO_a (ISO_A \| 0x20)
				100	#define ISO_b (ISO_B \| 0x20)
				101	#define ISO_e (ISO_E \| 0x20)
				102	#define ISO_f (ISO_F \| 0x20)
				103	#define ISO_g (ISO_G \| 0x20)
				104	#define ISO_h (ISO_H \| 0x20)
				105	#define ISO_i (ISO_I \| 0x20)
				106	#define ISO_l (ISO_L \| 0x20)
				107	#define ISO_m (ISO_M \| 0x20)
				108	#define ISO_p (ISO_P \| 0x20)
				109	#define ISO_r (ISO_R \| 0x20)
				110	#define ISO_t (ISO_T \| 0x20)
				111
				112	#define ISO_ht 0x09
				113	#define ISO_nl 0x0a
				114	#define ISO_cr 0x0d
				115	#define ISO_space 0x20
				116	#define ISO_bang 0x21
				117	#define ISO_citation 0x22
				118	#define ISO_ampersand 0x26
				119	#define ISO_citation2 0x27
				120	#define ISO_asterisk 0x2a
				121	#define ISO_dash 0x2d
				122	#define ISO_slash 0x2f
				123	#define ISO_semicolon 0x3b
				124	#define ISO_lt 0x3c
				125	#define ISO_eq 0x3d
				126	#define ISO_gt 0x3e
				127
				128	#define ISO_rbrack 0x5b
				129	#define ISO_lbrack 0x5d
				130
				131	#define MINORSTATE_NONE 0
				132	#define MINORSTATE_TEXT 1 /* Parse normal text */
				133	#define MINORSTATE_EXTCHAR 2 /* Check for semi-colon */
				134	#define MINORSTATE_TAG 3 /* Check for name of tag. */
				135	#define MINORSTATE_TAGEND 4 /* Scan for end of tag. */
				136	#define MINORSTATE_TAGATTR 5 /* Parse tag attr. */
				137	#define MINORSTATE_TAGATTRSPACE 6 /* Parse optional space after tag
				138	attr. */
				139	#define MINORSTATE_TAGATTRPARAM 7 /* Parse tag attr parameter. */
				140	#define MINORSTATE_TAGATTRPARAMNQ 8 /* Parse tag attr parameter without
				141	quotation marks. */
				142	#define MINORSTATE_HTMLCOMMENT 9 /* Scan for HTML comment end */
				143
				144	#define MAJORSTATE_NONE 0
				145	#define MAJORSTATE_BODY 1
				146	#define MAJORSTATE_LINK 2
				147	#define MAJORSTATE_FORM 3
				148	#define MAJORSTATE_DISCARD 4
				149
				150
				151	struct htmlparser_state {
				152	unsigned char minorstate;
				153	char tag[20];
				154	unsigned char tagptr;
				155	char tagattr[20];
				156	unsigned char tagattrptr;
				157	char tagattrparam[WWW_CONF_MAX_URLLEN];
				158	unsigned char tagattrparamptr;
				159	unsigned char lastchar, quotechar;
				160	unsigned char majorstate, lastmajorstate;
				161	char linkurl[WWW_CONF_MAX_URLLEN];
				162	char linktext[40];
				163	unsigned char linktextptr;
				164	#if WWW_CONF_FORMS
				165	char formaction[WWW_CONF_MAX_FORMACTIONLEN];
				166	char formname[WWW_CONF_MAX_FORMNAMELEN];
				167	unsigned char inputtype;
				168	char inputname[WWW_CONF_MAX_INPUTNAMELEN];
				169	char inputvalue[WWW_CONF_MAX_INPUTVALUELEN];
				170	unsigned char inputvaluesize;
				171	#endif /* WWW_CONF_FORMS */
				172	};
				173
				174	static struct htmlparser_state s;
				175
				176	/-----------------------------------------------------------------------------------/
				177	static char last[1] = {0xff};
				178
				179	static char *tags[] = {
				180	#define TAG_FIRST 0
				181	#define TAG_SLASHA 0
				182	html_slasha,
				183	#define TAG_SLASHCENTER 1
				184	html_slashcenter,
				185	#define TAG_SLASHFORM 2
				186	html_slashform,
				187	#define TAG_SLASHH 3
				188	html_slashh,
				189	#define TAG_SLASHSCRIPT 4
				190	html_slashscript,
				191	#define TAG_SLASHSELECT 5
				192	html_slashselect,
				193	#define TAG_SLASHSTYLE 6
				194	html_slashstyle,
				195	#define TAG_A 7
				196	html_a,
				197	#define TAG_BODY 8
				198	html_body,
				199	#define TAG_BR 9
				200	html_br,
				201	#define TAG_CENTER 10
				202	html_center,
				203	#define TAG_FORM 11
				204	html_form,
				205	#define TAG_FRAME 12
				206	html_frame,
				207	#define TAG_H1 13
				208	html_h1,
				209	#define TAG_H2 14
				210	html_h2,
				211	#define TAG_H3 15
				212	html_h3,
				213	#define TAG_H4 16
				214	html_h4,
				215	#define TAG_IMG 17
				216	html_img,
				217	#define TAG_INPUT 18
				218	html_input,
				219	#define TAG_LI 19
				220	html_li,
				221	#define TAG_P 20
				222	html_p,
				223	#define TAG_SCRIPT 21
				224	html_script,
				225	#define TAG_SELECT 22
				226	html_select,
				227	#define TAG_STYLE 23
				228	html_style,
				229	#define TAG_TR 24
				230	html_tr,
				231	#define TAG_LAST 25
				232	last,
				233	};
				234
				235	/-----------------------------------------------------------------------------------/
				236	static unsigned char FASTCALL
				237	iswhitespace(char c)
				238	{
				239	return (c == ISO_space \|\|
				240	c == ISO_nl \|\|
				241	c == ISO_cr \|\|
				242	c == ISO_ht);
				243	}
				244	/-----------------------------------------------------------------------------------/
				245	static unsigned char FASTCALL
				246	find_tag(char *tag)
				247	{
				248	static unsigned char first, last, i, tabi;
				249	static char tagc;
				250
				251	tabi = first = TAG_FIRST;
				252	last = TAG_LAST;
				253	i = 0;
				254
				255	do {
				256	tagc = tag[i];
				257
				258	if(tagc == 0 &&
				259	tags[first][i] == 0) {
				260	return first;
				261	}
				262
				263	/* First, find first matching tag from table. */
				264	while(tagc > (tags[tabi])[i] &&
				265	tabi < last) {
				266	++tabi;
				267	}
				268	first = tabi;
				269
				270	/* Second, find last matching tag from table. */
				271	while(tagc == (tags[tabi])[i] &&
				272	tabi < last) {
				273	++tabi;
				274	}
				275	last = tabi;
				276
				277	/* If first and last matching tags are equal, we have a match and
				278	return. Else we continue with the next character. */
				279	++i;
				280	tabi = first;
				281	} while(last != first);
				282	return TAG_LAST;
				283	}
				284	/-----------------------------------------------------------------------------------/
				285	static void FASTCALL
				286	parse_char(unsigned char c)
				287	{
				288	if(c < 0x80) {
				289	if(s.majorstate == MAJORSTATE_LINK) {
				290	if(s.linktextptr < sizeof(s.linktext)) {
				291	if(iswhitespace(c)) {
				292	c = ISO_space;
				293	}
				294	s.linktext[s.linktextptr] = c;
				295	++s.linktextptr;
				296	}
				297	} else if(s.majorstate != MAJORSTATE_DISCARD) {
				298	htmlparser_char(c);
				299	}
				300	}
				301	}
				302	/-----------------------------------------------------------------------------------/
				303	static void
				304	switch_majorstate(unsigned char newstate)
				305	{
				306	if(s.majorstate != newstate) {
				307	PRINTF(("Switching state from %d to %d (%d)\n", s.majorstate, newstate, s.lastmajorstate));
				308	s.lastmajorstate = s.majorstate;
				309	s.majorstate = newstate;
				310	}
				311	}
				312	/-----------------------------------------------------------------------------------/
				313	static void
				314	parse_tag(void)
				315	{
				316	static char *tagattrparam;
				317	static unsigned char size, i;
				318
				319	PRINTF(("Parsing tag '%s' '%s' '%s'\n",
				320	s.tag, s.tagattr, s.tagattrparam));
				321
				322	switch(find_tag(s.tag)) {
				323	case TAG_P:
				324	case TAG_H1:
				325	case TAG_H2:
				326	case TAG_H3:
				327	case TAG_H4:
				328	parse_char(ISO_nl);
				329	/* FALLTHROUGH */
				330	case TAG_BR:
				331	case TAG_TR:
				332	case TAG_SLASHH:
				333	parse_char(ISO_nl);
				334	break;
				335	case TAG_LI:
				336	parse_char(ISO_nl);
				337	parse_char(ISO_asterisk);
				338	parse_char(ISO_space);
				339	break;
				340	case TAG_SCRIPT:
				341	case TAG_STYLE:
				342	case TAG_SELECT:
				343	switch_majorstate(MAJORSTATE_DISCARD);
				344	break;
				345	case TAG_SLASHSCRIPT:
				346	case TAG_SLASHSTYLE:
				347	case TAG_SLASHSELECT:
				348	switch_majorstate(s.lastmajorstate);
				349	break;
				350	case TAG_BODY:
				351	s.majorstate = s.lastmajorstate = MAJORSTATE_BODY;
				352	break;
				353	case TAG_FRAME:
				354	if(strncmp(s.tagattr, html_src, sizeof(html_src)) == 0 &&
				355	s.tagattrparam[0] != 0) {
				356	switch_majorstate(MAJORSTATE_BODY);
				357	parse_char(ISO_nl);
				358	parse_char(ISO_rbrack);
				359	parse_char(ISO_space);
				360	htmlparser_link(html_frame, s.tagattrparam);
				361	PRINTF(("Frame [%s]\n", s.tagattrparam));
				362	parse_char(ISO_space);
				363	parse_char(ISO_lbrack);
				364	parse_char(ISO_nl);
				365	}
				366	break;
				367	case TAG_IMG:
				368	if(strncmp(s.tagattr, html_alt, sizeof(html_alt)) == 0 &&
				369	s.tagattrparam[0] != 0) {
				370	parse_char(ISO_lt);
				371	tagattrparam = &s.tagattrparam[0];
				372	while(*tagattrparam) {
				373	parse_char(*tagattrparam);
				374	++tagattrparam;
				375	}
				376	parse_char(ISO_gt);
				377	}
				378	break;
				379	case TAG_A:
				380	PRINTF(("A %s %s\n", s.tagattr, s.tagattrparam));
				381	if(strncmp(s.tagattr, html_href, sizeof(html_href)) == 0 &&
				382	s.tagattrparam[0] != 0) {
				383	strcpy(s.linkurl, s.tagattrparam);
				384	switch_majorstate(MAJORSTATE_LINK);
				385	s.linktextptr = 0;
				386	}
				387	break;
				388	case TAG_SLASHA:
				389	if(s.majorstate == MAJORSTATE_LINK) {
				390	switch_majorstate(s.lastmajorstate);
				391	s.linktext[s.linktextptr] = 0;
				392	htmlparser_link(s.linktext, s.linkurl);
				393	PRINTF(("Link '%s' [%s]\n", s.linktext, s.linkurl));
				394	}
				395	break;
				396	#if WWW_CONF_FORMS
				397	case TAG_FORM:
				398	PRINTF(("Form tag\n"));
				399	switch_majorstate(MAJORSTATE_FORM);
				400	if(strncmp(s.tagattr, html_action, sizeof(html_action)) == 0) {
				401	PRINTF(("Form action '%s'\n", s.tagattrparam));
				402	strncpy(s.formaction, s.tagattrparam, WWW_CONF_MAX_FORMACTIONLEN - 1);
				403	} else if(strncmp(s.tagattr, html_name, sizeof(html_name)) == 0) {
				404	PRINTF(("Form name '%s'\n", s.tagattrparam));
				405	strncpy(s.formname, s.tagattrparam, WWW_CONF_MAX_FORMNAMELEN - 1);
				406	}
				407	s.inputname[0] = s.inputvalue[0] = 0;
				408	break;
				409	case TAG_SLASHFORM:
				410	switch_majorstate(MAJORSTATE_BODY);
				411	s.formaction[0] = s.formname[0] = 0;
				412	break;
				413	case TAG_INPUT:
				414	if(s.majorstate == MAJORSTATE_FORM) {
				415	/* First check if we are called at the end of an input tag. If
				416	so, we should render the input widget. */
				417	if(s.tagattr[0] == 0 &&
				418	s.inputname[0] != 0) {
				419	PRINTF(("Render input type %d\n", s.inputtype));
				420	switch(s.inputtype) {
				421	case HTMLPARSER_INPUTTYPE_NONE:
				422	case HTMLPARSER_INPUTTYPE_TEXT:
				423	for(i = 0; i < s.inputvaluesize; ++i) {
				424	if(s.inputvalue[i] == 0) {
				425	memset(&s.inputvalue[i], ISO_space, s.inputvaluesize - i);
				426	s.inputvalue[s.inputvaluesize] = 0;
				427	break;
				428	}
				429	}
				430	htmlparser_inputfield(s.inputvalue, s.inputname,
				431	s.formname, s.formaction);
				432	break;
				433	case HTMLPARSER_INPUTTYPE_SUBMIT:
				434	case HTMLPARSER_INPUTTYPE_IMAGE:
				435	htmlparser_submitbutton(s.inputvalue, s.inputname,
				436	s.formname, s.formaction);
				437	break;
				438	}
				439	s.inputtype = HTMLPARSER_INPUTTYPE_NONE;
				440	} else {
				441	PRINTF(("Input '%s' '%s'\n", s.tagattr, s.tagattrparam));
				442	if(strncmp(s.tagattr, html_type, sizeof(html_type)) == 0) {
				443	if(strncmp(s.tagattrparam, html_submit,
				444	sizeof(html_submit)) == 0) {
				445	s.inputtype = HTMLPARSER_INPUTTYPE_SUBMIT;
				446	} else if(strncmp(s.tagattrparam, html_image,
				447	sizeof(html_image)) == 0) {
				448	s.inputtype = HTMLPARSER_INPUTTYPE_IMAGE;
				449	} else if(strncmp(s.tagattrparam, html_text,
				450	sizeof(html_text)) == 0) {
				451	s.inputtype = HTMLPARSER_INPUTTYPE_TEXT;
				452	} else {
				453	s.inputtype = HTMLPARSER_INPUTTYPE_OTHER;
				454	}
				455	} else if(strncmp(s.tagattr, html_name,
				456	sizeof(html_name)) == 0) {
				457	strncpy(s.inputname, s.tagattrparam,
				458	WWW_CONF_MAX_INPUTNAMELEN);
				459	} else if(strncmp(s.tagattr, html_alt,
				460	sizeof(html_alt)) == 0 &&
				461	s.inputtype == HTMLPARSER_INPUTTYPE_IMAGE) {
				462	strncpy(s.inputvalue, s.tagattrparam,
				463	WWW_CONF_MAX_INPUTVALUELEN);
				464	} else if(strncmp(s.tagattr, html_value,
				465	sizeof(html_value)) == 0) {
				466	strncpy(s.inputvalue, s.tagattrparam,
				467	WWW_CONF_MAX_INPUTVALUELEN);
				468	} else if(strncmp(s.tagattr, html_size,
				469	sizeof(html_size)) == 0) {
				470	size = 0;
				471	if(s.tagattrparam[0] >= '0' &&
				472	s.tagattrparam[0] <= '9') {
				473	size = s.tagattrparam[0] - '0';
				474	if(s.tagattrparam[1] >= '0' &&
				475	s.tagattrparam[1] <= '9') {
				476	size = size * 10 + (s.tagattrparam[1] - '0');
				477	}
				478	}
				479	if(size >= WWW_CONF_MAX_INPUTVALUELEN) {
				480	size = WWW_CONF_MAX_INPUTVALUELEN - 1;
				481	}
				482	s.inputvaluesize = size;
				483	/* strncpy(s.inputvalue, s.tagattrparam,
				484	WWW_CONF_MAX_INPUTVALUELEN);*/
				485	}
				486	}
				487
				488	}
				489	break;
				490	#endif /* WWW_CONF_FORMS */
				491	#if WWW_CONF_RENDERSTATE
				492	case TAG_CENTER:
				493	parse_char(ISO_nl);
				494	htmlparser_renderstate(HTMLPARSER_RENDERSTATE_BEGIN \|
				495	HTMLPARSER_RENDERSTATE_CENTER);
				496	break;
				497	case TAG_SLASHCENTER:
				498	parse_char(ISO_nl);
				499	htmlparser_renderstate(HTMLPARSER_RENDERSTATE_END \|
				500	HTMLPARSER_RENDERSTATE_CENTER);
				501	break;
				502	#endif /* WWW_CONF_RENDERSTATE */
				503	}
				504	}
				505	/-----------------------------------------------------------------------------------/
				506	void
				507	htmlparser_init(void)
				508	{
				509	s.majorstate = s.lastmajorstate = MAJORSTATE_DISCARD;
				510	s.minorstate = MINORSTATE_TEXT;
				511	s.lastchar = 0;
				512	}
				513	/-----------------------------------------------------------------------------------/
				514	static char FASTCALL
				515	lowercase(char c)
				516	{
				517	/* XXX: This is a brute force approach to lower-case
				518	converting and should not be used anywhere else! It
				519	works for our purposes, however (i.e., HTML tags). */
				520	if(c > 0x40) {
				521	return (c & 0x1f) \| 0x60;
				522	} else {
				523	return c;
				524	}
				525	}
				526	/-----------------------------------------------------------------------------------/
				527	static void
				528	endtagfound(void)
				529	{
				530	s.tag[s.tagptr] = 0;
				531	s.tagattr[s.tagattrptr] = 0;
				532	s.tagattrparam[s.tagattrparamptr] = 0;
				533	}
				534	/-----------------------------------------------------------------------------------/
				535	/* htmlparser_parse():
				536	*
				537	* This is the main function in the HTML parser module and it parses
				538	* the HTML data in the input buffer. The htmlparser_state is updated
				539	* as the buffer is parsed character by character. The functions
				540	* parse_char() and parse_tag() (defined earlier in this file) are
				541	* called to process regular characters and HTML tags,
				542	* respectively.
				543	*
				544	* Note that the input buffer does not have to contain full HTML tags;
				545	* the parser is state machine driven in order to be able to work with
				546	* buffers that have been divided in any way.
				547	*/
				548	void
				549	htmlparser_parse(char *data, u16_t len)
				550	{
				551	static char c;
				552
				553	while(len > 0) {
				554	c = *data;
				555	--len;
				556	++data;
				557
				558	switch(s.minorstate) {
				559	case MINORSTATE_NONE:
				560	break;
				561	case MINORSTATE_TEXT:
				562	/* We are currently parsing some text, so we look for signs of
				563	an HTML tag starting (i.e., a '<' character). We also
				564	compress any whitespace character to one single space
				565	character (' '). */
				566	if(c == ISO_lt) {
				567	s.minorstate = MINORSTATE_TAG;
				568	s.tagptr = 0;
				569	endtagfound();
				570	} else if(c == ISO_ampersand) {
				571	s.minorstate = MINORSTATE_EXTCHAR;
				572	} else {
				573	if(iswhitespace(c)) {
				574	if(s.lastchar != ISO_space) {
				575	parse_char(' ');
				576	s.lastchar = ISO_space;
				577	c = ISO_space;
				578	}
				579	} else {
				580	parse_char(c);
				581	}
				582	}
				583	break;
				584	case MINORSTATE_EXTCHAR:
				585	if(c == ISO_semicolon) {
				586	s.minorstate = MINORSTATE_TEXT;
				587	parse_char(' ');
				588	} else if(iswhitespace(c)) {
				589	s.minorstate = MINORSTATE_TEXT;
				590	parse_char('&');
				591	parse_char(' ');
				592	}
				593	break;
				594	case MINORSTATE_TAG:
				595	/* We are currently parsing within the name of a tag. We check
				596	for the end of a tag (the '>' character) or whitespace (which
				597	indicates that we should parse a tag attr argument
				598	instead). */
				599	if(c == ISO_gt) {
				600	/* Full tag found. We continue parsing regular text. */
				601	s.minorstate = MINORSTATE_TEXT;
				602	s.tagattrptr = s.tagattrparamptr = 0;
				603	endtagfound();
				604	parse_tag();
				605	} else if(iswhitespace(c)) {
				606	/* The name of the tag found. We continue parsing the tag
				607	attr.*/
				608	s.minorstate = MINORSTATE_TAGATTR;
				609	s.tagattrptr = 0;
				610	endtagfound();
				611	} else {
				612	/* Keep track of the name of the tag, but convert it to
				613	lower case. */
				614
				615	s.tag[s.tagptr] = lowercase(c);
				616	++s.tagptr;
				617	/* Check if the ->tag field is full. If so, we just eat up
				618	any data left in the tag. */
				619	if(s.tagptr == sizeof(s.tag)) {
				620	s.minorstate = MINORSTATE_TAGEND;
				621	}
				622	}
				623
				624	/* Check for HTML comment, indicated by <!-- */
				625	if(s.tagptr == 3 &&
				626	s.tag[0] == ISO_bang &&
				627	s.tag[1] == ISO_dash &&
				628	s.tag[2] == ISO_dash) {
				629	PRINTF(("Starting comment...\n"));
				630	s.minorstate = MINORSTATE_HTMLCOMMENT;
				631	s.tagptr = 0;
				632	endtagfound();
				633	}
				634	break;
				635	case MINORSTATE_TAGATTR:
				636	/* We parse the "tag attr", i.e., the "href" in <a
				637	href="...">. */
				638	if(c == ISO_gt) {
				639	/* Full tag found. */
				640	s.minorstate = MINORSTATE_TEXT;
				641	s.tagattrparamptr = 0;
				642	s.tagattrptr = 0;
				643	endtagfound();
				644	parse_tag();
				645	s.tagptr = 0;
				646	endtagfound();
				647
				648	} else if(iswhitespace(c)) {
				649	if(s.tagattrptr == 0) {
				650	/* Discard leading spaces. */
				651	} else {
				652	/* A non-leading space is the end of the attribute. */
				653	s.tagattrparamptr = 0;
				654	endtagfound();
				655	parse_tag();
				656	s.minorstate = MINORSTATE_TAGATTRSPACE;
				657	/* s.tagattrptr = 0;
				658	endtagfound();*/
				659	}
				660	} else if(c == ISO_eq) {
				661	s.minorstate = MINORSTATE_TAGATTRPARAMNQ;
				662	s.tagattrparamptr = 0;
				663	endtagfound();
				664	} else {
				665	s.tagattr[s.tagattrptr] = lowercase(c);
				666	++s.tagattrptr;
				667	/* Check if the "tagattr" field is full. If so, we just eat
				668	up any data left in the tag. */
				669	if(s.tagattrptr == sizeof(s.tagattr)) {
				670	s.minorstate = MINORSTATE_TAGEND;
				671	}
				672	}
				673	break;
				674	case MINORSTATE_TAGATTRSPACE:
				675	if(iswhitespace(c)) {
				676	/* Discard spaces. */
				677	} else if(c == ISO_eq) {
				678	s.minorstate = MINORSTATE_TAGATTRPARAMNQ;
				679	s.tagattrparamptr = 0;
				680	endtagfound();
				681	parse_tag();
				682	} else {
				683	s.tagattr[0] = lowercase(c);
				684	s.tagattrptr = 1;
				685	s.minorstate = MINORSTATE_TAGATTR;
				686	}
				687	break;
				688	case MINORSTATE_TAGATTRPARAMNQ:
				689	/* We are parsing the "tag attr parameter", i.e., the link part
				690	in <a href="link">. */
				691	if(c == ISO_gt) {
				692	/* Full tag found. */
				693	endtagfound();
				694	parse_tag();
				695	s.minorstate = MINORSTATE_TEXT;
				696	s.tagattrptr = 0;
				697	endtagfound();
				698	parse_tag();
				699	s.tagptr = 0;
				700	endtagfound();
				701	} else if(iswhitespace(c) &&
				702	s.tagattrparamptr == 0) {
				703	/* Discard leading spaces. */
				704	} else if((c == ISO_citation \|\|
				705	c == ISO_citation2) &&
				706	s.tagattrparamptr == 0) {
				707	s.minorstate = MINORSTATE_TAGATTRPARAM;
				708	s.quotechar = c;
				709	PRINTF(("tag attr param q found\n"));
				710	} else if(iswhitespace(c)) {
				711	PRINTF(("Non-leading space found at %d\n",
				712	s.tagattrparamptr));
				713	/* Stop parsing if a non-leading space was found */
				714	endtagfound();
				715	parse_tag();
				716
				717	s.minorstate = MINORSTATE_TAGATTR;
				718	s.tagattrptr = 0;
				719	endtagfound();
				720	} else {
				721	s.tagattrparam[s.tagattrparamptr] = c;
				722	++s.tagattrparamptr;
				723	/* Check if the "tagattr" field is full. If so, we just eat
				724	up any data left in the tag. */
				725	if(s.tagattrparamptr >= sizeof(s.tagattrparam) - 1) {
				726	s.minorstate = MINORSTATE_TAGEND;
				727	}
				728	}
				729
				730	break;
				731	case MINORSTATE_TAGATTRPARAM:
				732	/* We are parsing the "tag attr parameter", i.e., the link
				733	part in <a href="link">. */
				734	if(c == s.quotechar) {
				735	/* Found end of tag attr parameter. */
				736	endtagfound();
				737	parse_tag();
				738
				739	s.minorstate = MINORSTATE_TAGATTR;
				740	s.tagattrptr = 0;
				741	endtagfound();
				742	} else {
				743	if(iswhitespace(c)) {
				744	c = ISO_space;
				745	}
				746	s.tagattrparam[s.tagattrparamptr] = c;
				747	++s.tagattrparamptr;
				748	/* Check if the "tagattr" field is full. If so, we just eat
				749	up any data left in the tag. */
				750	if(s.tagattrparamptr >= sizeof(s.tagattrparam) - 1) {
				751	s.minorstate = MINORSTATE_TAGEND;
				752	}
				753	}
				754
				755	break;
				756	case MINORSTATE_HTMLCOMMENT:
				757	if(c == ISO_dash) {
				758	++s.tagptr;
				759	} else if(c == ISO_gt && s.tagptr > 0) {
				760	PRINTF(("Comment done.\n"));
				761	s.minorstate = MINORSTATE_TEXT;
				762	} else {
				763	s.tagptr = 0;
				764	}
				765	break;
				766	case MINORSTATE_TAGEND:
				767	/* Discard characters until a '>' is seen. */
				768	if(c == ISO_gt) {
				769	s.minorstate = MINORSTATE_TEXT;
				770	s.tagattrptr = 0;
				771	endtagfound();
				772	parse_tag();
				773	}
				774	break;
				775	}
				776
				777	s.lastchar = c;
				778	}
				779	}
				780	/-----------------------------------------------------------------------------------/