Annotation of XML/HTMLtree.c, revision 1.18
1.1 daniel 1: /*
2: * HTMLtree.c : implemetation of access function for an HTML tree.
3: *
4: * See Copyright for the status of this software.
5: *
6: * Daniel.Veillard@w3.org
7: */
8:
1.5 daniel 9:
1.13 daniel 10: #ifdef WIN32
11: #include "win32config.h"
12: #else
1.1 daniel 13: #include "config.h"
1.5 daniel 14: #endif
1.18 ! daniel 15:
! 16: #include "xmlversion.h"
! 17: #ifdef LIBXML_HTML_ENABLED
! 18:
1.1 daniel 19: #include <stdio.h>
1.5 daniel 20: #include <string.h> /* for memset() only ! */
21:
22: #ifdef HAVE_CTYPE_H
1.1 daniel 23: #include <ctype.h>
1.5 daniel 24: #endif
25: #ifdef HAVE_STDLIB_H
1.1 daniel 26: #include <stdlib.h>
1.5 daniel 27: #endif
1.1 daniel 28:
1.18 ! daniel 29: #include <libxml/xmlmemory.h>
! 30: #include <libxml/HTMLparser.h>
! 31: #include <libxml/HTMLtree.h>
! 32: #include <libxml/entities.h>
! 33: #include <libxml/valid.h>
1.1 daniel 34:
1.14 daniel 35: static void
36: htmlDocContentDump(xmlBufferPtr buf, xmlDocPtr cur);
37:
1.1 daniel 38: /**
39: * htmlDtdDump:
40: * @buf: the HTML buffer output
41: * @doc: the document
42: *
43: * Dump the HTML document DTD, if any.
44: */
45: static void
46: htmlDtdDump(xmlBufferPtr buf, xmlDocPtr doc) {
47: xmlDtdPtr cur = doc->intSubset;
48:
49: if (cur == NULL) {
50: fprintf(stderr, "htmlDtdDump : no internal subset\n");
51: return;
52: }
53: xmlBufferWriteChar(buf, "<!DOCTYPE ");
54: xmlBufferWriteCHAR(buf, cur->name);
55: if (cur->ExternalID != NULL) {
56: xmlBufferWriteChar(buf, " PUBLIC ");
57: xmlBufferWriteQuotedString(buf, cur->ExternalID);
1.2 daniel 58: if (cur->SystemID != NULL) {
59: xmlBufferWriteChar(buf, " ");
60: xmlBufferWriteQuotedString(buf, cur->SystemID);
61: }
1.1 daniel 62: } else if (cur->SystemID != NULL) {
63: xmlBufferWriteChar(buf, " SYSTEM ");
64: xmlBufferWriteQuotedString(buf, cur->SystemID);
65: }
66: xmlBufferWriteChar(buf, ">\n");
67: }
68:
69: /**
70: * htmlAttrDump:
71: * @buf: the HTML buffer output
72: * @doc: the document
73: * @cur: the attribute pointer
74: *
75: * Dump an HTML attribute
76: */
77: static void
78: htmlAttrDump(xmlBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur) {
1.6 daniel 79: xmlChar *value;
1.1 daniel 80:
81: if (cur == NULL) {
82: fprintf(stderr, "htmlAttrDump : property == NULL\n");
83: return;
84: }
85: xmlBufferWriteChar(buf, " ");
86: xmlBufferWriteCHAR(buf, cur->name);
1.17 daniel 87: value = xmlNodeListGetString(doc, cur->children, 0);
1.1 daniel 88: if (value) {
89: xmlBufferWriteChar(buf, "=");
90: xmlBufferWriteQuotedString(buf, value);
1.4 daniel 91: xmlFree(value);
1.1 daniel 92: } else {
93: xmlBufferWriteChar(buf, "=\"\"");
94: }
95: }
96:
97: /**
98: * htmlAttrListDump:
99: * @buf: the HTML buffer output
100: * @doc: the document
101: * @cur: the first attribute pointer
102: *
103: * Dump a list of HTML attributes
104: */
105: static void
106: htmlAttrListDump(xmlBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur) {
107: if (cur == NULL) {
108: fprintf(stderr, "htmlAttrListDump : property == NULL\n");
109: return;
110: }
111: while (cur != NULL) {
112: htmlAttrDump(buf, doc, cur);
113: cur = cur->next;
114: }
115: }
116:
117:
1.14 daniel 118: void
1.1 daniel 119: htmlNodeDump(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur);
120: /**
121: * htmlNodeListDump:
122: * @buf: the HTML buffer output
123: * @doc: the document
124: * @cur: the first node
125: *
126: * Dump an HTML node list, recursive behaviour,children are printed too.
127: */
128: static void
129: htmlNodeListDump(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur) {
130: if (cur == NULL) {
131: fprintf(stderr, "htmlNodeListDump : node == NULL\n");
132: return;
133: }
134: while (cur != NULL) {
135: htmlNodeDump(buf, doc, cur);
136: cur = cur->next;
137: }
138: }
139:
140: /**
141: * htmlNodeDump:
142: * @buf: the HTML buffer output
143: * @doc: the document
144: * @cur: the current node
145: *
146: * Dump an HTML node, recursive behaviour,children are printed too.
147: */
1.14 daniel 148: void
1.1 daniel 149: htmlNodeDump(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur) {
150: htmlElemDescPtr info;
151:
152: if (cur == NULL) {
153: fprintf(stderr, "htmlNodeDump : node == NULL\n");
154: return;
155: }
156: /*
157: * Special cases.
158: */
1.14 daniel 159: if (cur->type == XML_HTML_DOCUMENT_NODE) {
160: htmlDocContentDump(buf, (xmlDocPtr) cur);
161: return;
162: }
1.1 daniel 163: if (cur->type == HTML_TEXT_NODE) {
164: if (cur->content != NULL) {
1.6 daniel 165: xmlChar *buffer;
1.1 daniel 166:
167: /* uses the HTML encoding routine !!!!!!!!!! */
1.9 daniel 168: #ifndef XML_USE_BUFFER_CONTENT
1.1 daniel 169: buffer = xmlEncodeEntitiesReentrant(doc, cur->content);
1.9 daniel 170: #else
171: buffer = xmlEncodeEntitiesReentrant(doc,
172: xmlBufferContent(cur->content));
173: #endif
1.1 daniel 174: if (buffer != NULL) {
175: xmlBufferWriteCHAR(buf, buffer);
1.4 daniel 176: xmlFree(buffer);
1.1 daniel 177: }
178: }
179: return;
180: }
181: if (cur->type == HTML_COMMENT_NODE) {
182: if (cur->content != NULL) {
183: xmlBufferWriteChar(buf, "<!--");
1.9 daniel 184: #ifndef XML_USE_BUFFER_CONTENT
1.1 daniel 185: xmlBufferWriteCHAR(buf, cur->content);
1.9 daniel 186: #else
187: xmlBufferWriteCHAR(buf, xmlBufferContent(cur->content));
188: #endif
1.1 daniel 189: xmlBufferWriteChar(buf, "-->");
190: }
191: return;
192: }
193: if (cur->type == HTML_ENTITY_REF_NODE) {
194: xmlBufferWriteChar(buf, "&");
195: xmlBufferWriteCHAR(buf, cur->name);
196: xmlBufferWriteChar(buf, ";");
197: return;
198: }
199:
200: /*
201: * Get specific HTmL info for taht node.
202: */
203: info = htmlTagLookup(cur->name);
204:
205: xmlBufferWriteChar(buf, "<");
206: xmlBufferWriteCHAR(buf, cur->name);
207: if (cur->properties != NULL)
208: htmlAttrListDump(buf, doc, cur->properties);
209:
1.7 daniel 210: if ((info != NULL) && (info->empty)) {
1.1 daniel 211: xmlBufferWriteChar(buf, ">");
212: if (cur->next != NULL) {
213: if ((cur->next->type != HTML_TEXT_NODE) &&
214: (cur->next->type != HTML_ENTITY_REF_NODE))
215: xmlBufferWriteChar(buf, "\n");
216: }
217: return;
218: }
1.17 daniel 219: if ((cur->content == NULL) && (cur->children == NULL)) {
1.7 daniel 220: if ((info != NULL) && (info->endTag != 0))
1.1 daniel 221: xmlBufferWriteChar(buf, ">");
222: else {
223: xmlBufferWriteChar(buf, "></");
224: xmlBufferWriteCHAR(buf, cur->name);
225: xmlBufferWriteChar(buf, ">");
226: }
227: if (cur->next != NULL) {
228: if ((cur->next->type != HTML_TEXT_NODE) &&
229: (cur->next->type != HTML_ENTITY_REF_NODE))
230: xmlBufferWriteChar(buf, "\n");
231: }
232: return;
233: }
234: xmlBufferWriteChar(buf, ">");
235: if (cur->content != NULL) {
1.6 daniel 236: xmlChar *buffer;
1.1 daniel 237:
1.9 daniel 238: #ifndef XML_USE_BUFFER_CONTENT
239: buffer = xmlEncodeEntitiesReentrant(doc, cur->content);
240: #else
241: buffer = xmlEncodeEntitiesReentrant(doc,
242: xmlBufferContent(cur->content));
243: #endif
1.1 daniel 244: if (buffer != NULL) {
245: xmlBufferWriteCHAR(buf, buffer);
1.4 daniel 246: xmlFree(buffer);
1.1 daniel 247: }
248: }
1.17 daniel 249: if (cur->children != NULL) {
250: if ((cur->children->type != HTML_TEXT_NODE) &&
251: (cur->children->type != HTML_ENTITY_REF_NODE) &&
252: (cur->children != cur->last))
1.1 daniel 253: xmlBufferWriteChar(buf, "\n");
1.17 daniel 254: htmlNodeListDump(buf, doc, cur->children);
1.1 daniel 255: if ((cur->last->type != HTML_TEXT_NODE) &&
1.10 daniel 256: (cur->last->type != HTML_ENTITY_REF_NODE) &&
1.17 daniel 257: (cur->children != cur->last))
1.1 daniel 258: xmlBufferWriteChar(buf, "\n");
259: }
1.11 daniel 260: if (!htmlIsAutoClosed(doc, cur)) {
261: xmlBufferWriteChar(buf, "</");
262: xmlBufferWriteCHAR(buf, cur->name);
263: xmlBufferWriteChar(buf, ">");
264: }
1.1 daniel 265: if (cur->next != NULL) {
266: if ((cur->next->type != HTML_TEXT_NODE) &&
267: (cur->next->type != HTML_ENTITY_REF_NODE))
268: xmlBufferWriteChar(buf, "\n");
269: }
270: }
271:
272: /**
1.16 daniel 273: * htmlNodeDumpFile:
274: * @out: the FILE pointer
275: * @doc: the document
276: * @cur: the current node
277: *
278: * Dump an HTML node, recursive behaviour,children are printed too.
279: */
280: void
281: htmlNodeDumpFile(FILE *out, xmlDocPtr doc, xmlNodePtr cur) {
282: xmlBufferPtr buf;
283:
284: buf = xmlBufferCreate();
285: if (buf == NULL) return;
286: htmlNodeDump(buf, doc, cur);
287: xmlBufferDump(out, buf);
288: xmlBufferFree(buf);
289: }
290:
291: /**
1.1 daniel 292: * htmlDocContentDump:
293: * @buf: the HTML buffer output
294: * @cur: the document
295: *
296: * Dump an HTML document.
297: */
298: static void
299: htmlDocContentDump(xmlBufferPtr buf, xmlDocPtr cur) {
1.12 daniel 300: int type;
301:
302: /*
303: * force to output the stuff as HTML, especially for entities
304: */
305: type = cur->type;
306: cur->type = XML_HTML_DOCUMENT_NODE;
1.1 daniel 307: if (cur->intSubset != NULL)
308: htmlDtdDump(buf, cur);
1.11 daniel 309: else {
310: /* Default to HTML-4.0 transitionnal @@@@ */
311: xmlBufferWriteChar(buf, "<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.0 Transitional//EN\" \"http://www.w3.org/TR/REC-html40/loose.dtd\">");
312:
313: }
1.17 daniel 314: if (cur->children != NULL) {
315: htmlNodeListDump(buf, cur, cur->children);
1.1 daniel 316: }
317: xmlBufferWriteChar(buf, "\n");
1.12 daniel 318: cur->type = type;
1.1 daniel 319: }
320:
321: /**
322: * htmlDocDumpMemory:
323: * @cur: the document
324: * @mem: OUT: the memory pointer
325: * @size: OUT: the memory lenght
326: *
1.6 daniel 327: * Dump an HTML document in memory and return the xmlChar * and it's size.
1.1 daniel 328: * It's up to the caller to free the memory.
329: */
330: void
1.6 daniel 331: htmlDocDumpMemory(xmlDocPtr cur, xmlChar**mem, int *size) {
1.1 daniel 332: xmlBufferPtr buf;
333:
334: if (cur == NULL) {
335: #ifdef DEBUG_TREE
1.15 daniel 336: fprintf(stderr, "htmlxmlDocDumpMemory : document == NULL\n");
1.1 daniel 337: #endif
338: *mem = NULL;
339: *size = 0;
340: return;
341: }
342: buf = xmlBufferCreate();
343: if (buf == NULL) {
344: *mem = NULL;
345: *size = 0;
346: return;
347: }
348: htmlDocContentDump(buf, cur);
349: *mem = buf->content;
350: *size = buf->use;
351: memset(buf, -1, sizeof(xmlBuffer));
1.4 daniel 352: xmlFree(buf);
1.1 daniel 353: }
354:
355:
356: /**
357: * htmlDocDump:
358: * @f: the FILE*
359: * @cur: the document
360: *
361: * Dump an HTML document to an open FILE.
362: */
363: void
364: htmlDocDump(FILE *f, xmlDocPtr cur) {
365: xmlBufferPtr buf;
366:
367: if (cur == NULL) {
368: #ifdef DEBUG_TREE
1.15 daniel 369: fprintf(stderr, "htmlDocDump : document == NULL\n");
1.1 daniel 370: #endif
371: return;
372: }
373: buf = xmlBufferCreate();
374: if (buf == NULL) return;
375: htmlDocContentDump(buf, cur);
376: xmlBufferDump(f, buf);
377: xmlBufferFree(buf);
378: }
379:
380: /**
381: * htmlSaveFile:
382: * @filename: the filename
383: * @cur: the document
384: *
385: * Dump an HTML document to a file.
386: *
387: * returns: the number of byte written or -1 in case of failure.
388: */
389: int
390: htmlSaveFile(const char *filename, xmlDocPtr cur) {
391: xmlBufferPtr buf;
392: FILE *output = NULL;
393: int ret;
394:
395: /*
396: * save the content to a temp buffer.
397: */
398: buf = xmlBufferCreate();
399: if (buf == NULL) return(0);
400: htmlDocContentDump(buf, cur);
401:
402: output = fopen(filename, "w");
403: if (output == NULL) return(-1);
404: ret = xmlBufferDump(output, buf);
405: fclose(output);
406:
407: xmlBufferFree(buf);
1.6 daniel 408: return(ret * sizeof(xmlChar));
1.1 daniel 409: }
410:
1.18 ! daniel 411: #endif /* LIBXML_HTML_ENABLED */
Webmaster