Annotation of XML/HTMLtree.c, revision 1.19
1.1 daniel 1: /*
2: * HTMLtree.c : implemetation of access function for an HTML tree.
3: *
4: * See Copyright for the status of this software.
5: *
6: * Daniel.Veillard@w3.org
7: */
8:
1.5 daniel 9:
1.13 daniel 10: #ifdef WIN32
11: #include "win32config.h"
12: #else
1.1 daniel 13: #include "config.h"
1.5 daniel 14: #endif
1.18 daniel 15:
16: #include "xmlversion.h"
17: #ifdef LIBXML_HTML_ENABLED
18:
1.1 daniel 19: #include <stdio.h>
1.5 daniel 20: #include <string.h> /* for memset() only ! */
21:
22: #ifdef HAVE_CTYPE_H
1.1 daniel 23: #include <ctype.h>
1.5 daniel 24: #endif
25: #ifdef HAVE_STDLIB_H
1.1 daniel 26: #include <stdlib.h>
1.5 daniel 27: #endif
1.1 daniel 28:
1.18 daniel 29: #include <libxml/xmlmemory.h>
30: #include <libxml/HTMLparser.h>
31: #include <libxml/HTMLtree.h>
32: #include <libxml/entities.h>
33: #include <libxml/valid.h>
1.1 daniel 34:
1.14 daniel 35: static void
36: htmlDocContentDump(xmlBufferPtr buf, xmlDocPtr cur);
37:
1.1 daniel 38: /**
39: * htmlDtdDump:
40: * @buf: the HTML buffer output
41: * @doc: the document
42: *
43: * Dump the HTML document DTD, if any.
44: */
45: static void
46: htmlDtdDump(xmlBufferPtr buf, xmlDocPtr doc) {
47: xmlDtdPtr cur = doc->intSubset;
48:
49: if (cur == NULL) {
50: fprintf(stderr, "htmlDtdDump : no internal subset\n");
51: return;
52: }
53: xmlBufferWriteChar(buf, "<!DOCTYPE ");
54: xmlBufferWriteCHAR(buf, cur->name);
55: if (cur->ExternalID != NULL) {
56: xmlBufferWriteChar(buf, " PUBLIC ");
57: xmlBufferWriteQuotedString(buf, cur->ExternalID);
1.2 daniel 58: if (cur->SystemID != NULL) {
59: xmlBufferWriteChar(buf, " ");
60: xmlBufferWriteQuotedString(buf, cur->SystemID);
61: }
1.1 daniel 62: } else if (cur->SystemID != NULL) {
63: xmlBufferWriteChar(buf, " SYSTEM ");
64: xmlBufferWriteQuotedString(buf, cur->SystemID);
65: }
66: xmlBufferWriteChar(buf, ">\n");
67: }
68:
69: /**
70: * htmlAttrDump:
71: * @buf: the HTML buffer output
72: * @doc: the document
73: * @cur: the attribute pointer
74: *
75: * Dump an HTML attribute
76: */
77: static void
78: htmlAttrDump(xmlBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur) {
1.6 daniel 79: xmlChar *value;
1.1 daniel 80:
81: if (cur == NULL) {
82: fprintf(stderr, "htmlAttrDump : property == NULL\n");
83: return;
84: }
85: xmlBufferWriteChar(buf, " ");
86: xmlBufferWriteCHAR(buf, cur->name);
1.19 ! daniel 87: if (cur->children != NULL) {
! 88: value = xmlNodeListGetString(doc, cur->children, 0);
! 89: if (value) {
! 90: xmlBufferWriteChar(buf, "=");
! 91: xmlBufferWriteQuotedString(buf, value);
! 92: xmlFree(value);
! 93: } else {
! 94: xmlBufferWriteChar(buf, "=\"\"");
! 95: }
1.1 daniel 96: }
97: }
98:
99: /**
100: * htmlAttrListDump:
101: * @buf: the HTML buffer output
102: * @doc: the document
103: * @cur: the first attribute pointer
104: *
105: * Dump a list of HTML attributes
106: */
107: static void
108: htmlAttrListDump(xmlBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur) {
109: if (cur == NULL) {
110: fprintf(stderr, "htmlAttrListDump : property == NULL\n");
111: return;
112: }
113: while (cur != NULL) {
114: htmlAttrDump(buf, doc, cur);
115: cur = cur->next;
116: }
117: }
118:
119:
1.14 daniel 120: void
1.1 daniel 121: htmlNodeDump(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur);
122: /**
123: * htmlNodeListDump:
124: * @buf: the HTML buffer output
125: * @doc: the document
126: * @cur: the first node
127: *
128: * Dump an HTML node list, recursive behaviour,children are printed too.
129: */
130: static void
131: htmlNodeListDump(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur) {
132: if (cur == NULL) {
133: fprintf(stderr, "htmlNodeListDump : node == NULL\n");
134: return;
135: }
136: while (cur != NULL) {
137: htmlNodeDump(buf, doc, cur);
138: cur = cur->next;
139: }
140: }
141:
142: /**
143: * htmlNodeDump:
144: * @buf: the HTML buffer output
145: * @doc: the document
146: * @cur: the current node
147: *
148: * Dump an HTML node, recursive behaviour,children are printed too.
149: */
1.14 daniel 150: void
1.1 daniel 151: htmlNodeDump(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur) {
152: htmlElemDescPtr info;
153:
154: if (cur == NULL) {
155: fprintf(stderr, "htmlNodeDump : node == NULL\n");
156: return;
157: }
158: /*
159: * Special cases.
160: */
1.14 daniel 161: if (cur->type == XML_HTML_DOCUMENT_NODE) {
162: htmlDocContentDump(buf, (xmlDocPtr) cur);
163: return;
164: }
1.1 daniel 165: if (cur->type == HTML_TEXT_NODE) {
166: if (cur->content != NULL) {
1.6 daniel 167: xmlChar *buffer;
1.1 daniel 168:
169: /* uses the HTML encoding routine !!!!!!!!!! */
1.9 daniel 170: #ifndef XML_USE_BUFFER_CONTENT
1.1 daniel 171: buffer = xmlEncodeEntitiesReentrant(doc, cur->content);
1.9 daniel 172: #else
173: buffer = xmlEncodeEntitiesReentrant(doc,
174: xmlBufferContent(cur->content));
175: #endif
1.1 daniel 176: if (buffer != NULL) {
177: xmlBufferWriteCHAR(buf, buffer);
1.4 daniel 178: xmlFree(buffer);
1.1 daniel 179: }
180: }
181: return;
182: }
183: if (cur->type == HTML_COMMENT_NODE) {
184: if (cur->content != NULL) {
185: xmlBufferWriteChar(buf, "<!--");
1.9 daniel 186: #ifndef XML_USE_BUFFER_CONTENT
1.1 daniel 187: xmlBufferWriteCHAR(buf, cur->content);
1.9 daniel 188: #else
189: xmlBufferWriteCHAR(buf, xmlBufferContent(cur->content));
190: #endif
1.1 daniel 191: xmlBufferWriteChar(buf, "-->");
192: }
193: return;
194: }
195: if (cur->type == HTML_ENTITY_REF_NODE) {
196: xmlBufferWriteChar(buf, "&");
197: xmlBufferWriteCHAR(buf, cur->name);
198: xmlBufferWriteChar(buf, ";");
199: return;
200: }
201:
202: /*
203: * Get specific HTmL info for taht node.
204: */
205: info = htmlTagLookup(cur->name);
206:
207: xmlBufferWriteChar(buf, "<");
208: xmlBufferWriteCHAR(buf, cur->name);
209: if (cur->properties != NULL)
210: htmlAttrListDump(buf, doc, cur->properties);
211:
1.7 daniel 212: if ((info != NULL) && (info->empty)) {
1.1 daniel 213: xmlBufferWriteChar(buf, ">");
214: if (cur->next != NULL) {
215: if ((cur->next->type != HTML_TEXT_NODE) &&
216: (cur->next->type != HTML_ENTITY_REF_NODE))
217: xmlBufferWriteChar(buf, "\n");
218: }
219: return;
220: }
1.17 daniel 221: if ((cur->content == NULL) && (cur->children == NULL)) {
1.7 daniel 222: if ((info != NULL) && (info->endTag != 0))
1.1 daniel 223: xmlBufferWriteChar(buf, ">");
224: else {
225: xmlBufferWriteChar(buf, "></");
226: xmlBufferWriteCHAR(buf, cur->name);
227: xmlBufferWriteChar(buf, ">");
228: }
229: if (cur->next != NULL) {
230: if ((cur->next->type != HTML_TEXT_NODE) &&
231: (cur->next->type != HTML_ENTITY_REF_NODE))
232: xmlBufferWriteChar(buf, "\n");
233: }
234: return;
235: }
236: xmlBufferWriteChar(buf, ">");
237: if (cur->content != NULL) {
1.6 daniel 238: xmlChar *buffer;
1.1 daniel 239:
1.9 daniel 240: #ifndef XML_USE_BUFFER_CONTENT
241: buffer = xmlEncodeEntitiesReentrant(doc, cur->content);
242: #else
243: buffer = xmlEncodeEntitiesReentrant(doc,
244: xmlBufferContent(cur->content));
245: #endif
1.1 daniel 246: if (buffer != NULL) {
247: xmlBufferWriteCHAR(buf, buffer);
1.4 daniel 248: xmlFree(buffer);
1.1 daniel 249: }
250: }
1.17 daniel 251: if (cur->children != NULL) {
252: if ((cur->children->type != HTML_TEXT_NODE) &&
253: (cur->children->type != HTML_ENTITY_REF_NODE) &&
254: (cur->children != cur->last))
1.1 daniel 255: xmlBufferWriteChar(buf, "\n");
1.17 daniel 256: htmlNodeListDump(buf, doc, cur->children);
1.1 daniel 257: if ((cur->last->type != HTML_TEXT_NODE) &&
1.10 daniel 258: (cur->last->type != HTML_ENTITY_REF_NODE) &&
1.17 daniel 259: (cur->children != cur->last))
1.1 daniel 260: xmlBufferWriteChar(buf, "\n");
261: }
1.11 daniel 262: if (!htmlIsAutoClosed(doc, cur)) {
263: xmlBufferWriteChar(buf, "</");
264: xmlBufferWriteCHAR(buf, cur->name);
265: xmlBufferWriteChar(buf, ">");
266: }
1.1 daniel 267: if (cur->next != NULL) {
268: if ((cur->next->type != HTML_TEXT_NODE) &&
269: (cur->next->type != HTML_ENTITY_REF_NODE))
270: xmlBufferWriteChar(buf, "\n");
271: }
272: }
273:
274: /**
1.16 daniel 275: * htmlNodeDumpFile:
276: * @out: the FILE pointer
277: * @doc: the document
278: * @cur: the current node
279: *
280: * Dump an HTML node, recursive behaviour,children are printed too.
281: */
282: void
283: htmlNodeDumpFile(FILE *out, xmlDocPtr doc, xmlNodePtr cur) {
284: xmlBufferPtr buf;
285:
286: buf = xmlBufferCreate();
287: if (buf == NULL) return;
288: htmlNodeDump(buf, doc, cur);
289: xmlBufferDump(out, buf);
290: xmlBufferFree(buf);
291: }
292:
293: /**
1.1 daniel 294: * htmlDocContentDump:
295: * @buf: the HTML buffer output
296: * @cur: the document
297: *
298: * Dump an HTML document.
299: */
300: static void
301: htmlDocContentDump(xmlBufferPtr buf, xmlDocPtr cur) {
1.12 daniel 302: int type;
303:
304: /*
305: * force to output the stuff as HTML, especially for entities
306: */
307: type = cur->type;
308: cur->type = XML_HTML_DOCUMENT_NODE;
1.1 daniel 309: if (cur->intSubset != NULL)
310: htmlDtdDump(buf, cur);
1.11 daniel 311: else {
312: /* Default to HTML-4.0 transitionnal @@@@ */
313: xmlBufferWriteChar(buf, "<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.0 Transitional//EN\" \"http://www.w3.org/TR/REC-html40/loose.dtd\">");
314:
315: }
1.17 daniel 316: if (cur->children != NULL) {
317: htmlNodeListDump(buf, cur, cur->children);
1.1 daniel 318: }
319: xmlBufferWriteChar(buf, "\n");
1.12 daniel 320: cur->type = type;
1.1 daniel 321: }
322:
323: /**
324: * htmlDocDumpMemory:
325: * @cur: the document
326: * @mem: OUT: the memory pointer
327: * @size: OUT: the memory lenght
328: *
1.6 daniel 329: * Dump an HTML document in memory and return the xmlChar * and it's size.
1.1 daniel 330: * It's up to the caller to free the memory.
331: */
332: void
1.6 daniel 333: htmlDocDumpMemory(xmlDocPtr cur, xmlChar**mem, int *size) {
1.1 daniel 334: xmlBufferPtr buf;
335:
336: if (cur == NULL) {
337: #ifdef DEBUG_TREE
1.15 daniel 338: fprintf(stderr, "htmlxmlDocDumpMemory : document == NULL\n");
1.1 daniel 339: #endif
340: *mem = NULL;
341: *size = 0;
342: return;
343: }
344: buf = xmlBufferCreate();
345: if (buf == NULL) {
346: *mem = NULL;
347: *size = 0;
348: return;
349: }
350: htmlDocContentDump(buf, cur);
351: *mem = buf->content;
352: *size = buf->use;
353: memset(buf, -1, sizeof(xmlBuffer));
1.4 daniel 354: xmlFree(buf);
1.1 daniel 355: }
356:
357:
358: /**
359: * htmlDocDump:
360: * @f: the FILE*
361: * @cur: the document
362: *
363: * Dump an HTML document to an open FILE.
364: */
365: void
366: htmlDocDump(FILE *f, xmlDocPtr cur) {
367: xmlBufferPtr buf;
368:
369: if (cur == NULL) {
370: #ifdef DEBUG_TREE
1.15 daniel 371: fprintf(stderr, "htmlDocDump : document == NULL\n");
1.1 daniel 372: #endif
373: return;
374: }
375: buf = xmlBufferCreate();
376: if (buf == NULL) return;
377: htmlDocContentDump(buf, cur);
378: xmlBufferDump(f, buf);
379: xmlBufferFree(buf);
380: }
381:
382: /**
383: * htmlSaveFile:
384: * @filename: the filename
385: * @cur: the document
386: *
387: * Dump an HTML document to a file.
388: *
389: * returns: the number of byte written or -1 in case of failure.
390: */
391: int
392: htmlSaveFile(const char *filename, xmlDocPtr cur) {
393: xmlBufferPtr buf;
394: FILE *output = NULL;
395: int ret;
396:
397: /*
398: * save the content to a temp buffer.
399: */
400: buf = xmlBufferCreate();
401: if (buf == NULL) return(0);
402: htmlDocContentDump(buf, cur);
403:
404: output = fopen(filename, "w");
405: if (output == NULL) return(-1);
406: ret = xmlBufferDump(output, buf);
407: fclose(output);
408:
409: xmlBufferFree(buf);
1.6 daniel 410: return(ret * sizeof(xmlChar));
1.1 daniel 411: }
412:
1.18 daniel 413: #endif /* LIBXML_HTML_ENABLED */
Webmaster