Annotation of XML/HTMLtree.c, revision 1.13
1.1 daniel 1: /*
2: * HTMLtree.c : implemetation of access function for an HTML tree.
3: *
4: * See Copyright for the status of this software.
5: *
6: * Daniel.Veillard@w3.org
7: */
8:
1.5 daniel 9:
1.13 ! daniel 10: #ifdef WIN32
! 11: #include "win32config.h"
! 12: #else
1.1 daniel 13: #include "config.h"
1.5 daniel 14: #endif
1.1 daniel 15: #include <stdio.h>
1.5 daniel 16: #include <string.h> /* for memset() only ! */
17:
18: #ifdef HAVE_CTYPE_H
1.1 daniel 19: #include <ctype.h>
1.5 daniel 20: #endif
21: #ifdef HAVE_STDLIB_H
1.1 daniel 22: #include <stdlib.h>
1.5 daniel 23: #endif
1.1 daniel 24:
1.4 daniel 25: #include "xmlmemory.h"
1.1 daniel 26: #include "HTMLparser.h"
27: #include "HTMLtree.h"
28: #include "entities.h"
29: #include "valid.h"
30:
31: /**
32: * htmlDtdDump:
33: * @buf: the HTML buffer output
34: * @doc: the document
35: *
36: * Dump the HTML document DTD, if any.
37: */
38: static void
39: htmlDtdDump(xmlBufferPtr buf, xmlDocPtr doc) {
40: xmlDtdPtr cur = doc->intSubset;
41:
42: if (cur == NULL) {
43: fprintf(stderr, "htmlDtdDump : no internal subset\n");
44: return;
45: }
46: xmlBufferWriteChar(buf, "<!DOCTYPE ");
47: xmlBufferWriteCHAR(buf, cur->name);
48: if (cur->ExternalID != NULL) {
49: xmlBufferWriteChar(buf, " PUBLIC ");
50: xmlBufferWriteQuotedString(buf, cur->ExternalID);
1.2 daniel 51: if (cur->SystemID != NULL) {
52: xmlBufferWriteChar(buf, " ");
53: xmlBufferWriteQuotedString(buf, cur->SystemID);
54: }
1.1 daniel 55: } else if (cur->SystemID != NULL) {
56: xmlBufferWriteChar(buf, " SYSTEM ");
57: xmlBufferWriteQuotedString(buf, cur->SystemID);
58: }
59: xmlBufferWriteChar(buf, ">\n");
60: }
61:
62: /**
63: * htmlAttrDump:
64: * @buf: the HTML buffer output
65: * @doc: the document
66: * @cur: the attribute pointer
67: *
68: * Dump an HTML attribute
69: */
70: static void
71: htmlAttrDump(xmlBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur) {
1.6 daniel 72: xmlChar *value;
1.1 daniel 73:
74: if (cur == NULL) {
75: fprintf(stderr, "htmlAttrDump : property == NULL\n");
76: return;
77: }
78: xmlBufferWriteChar(buf, " ");
79: xmlBufferWriteCHAR(buf, cur->name);
80: value = xmlNodeListGetString(doc, cur->val, 0);
81: if (value) {
82: xmlBufferWriteChar(buf, "=");
83: xmlBufferWriteQuotedString(buf, value);
1.4 daniel 84: xmlFree(value);
1.1 daniel 85: } else {
86: xmlBufferWriteChar(buf, "=\"\"");
87: }
88: }
89:
90: /**
91: * htmlAttrListDump:
92: * @buf: the HTML buffer output
93: * @doc: the document
94: * @cur: the first attribute pointer
95: *
96: * Dump a list of HTML attributes
97: */
98: static void
99: htmlAttrListDump(xmlBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur) {
100: if (cur == NULL) {
101: fprintf(stderr, "htmlAttrListDump : property == NULL\n");
102: return;
103: }
104: while (cur != NULL) {
105: htmlAttrDump(buf, doc, cur);
106: cur = cur->next;
107: }
108: }
109:
110:
111: static void
112: htmlNodeDump(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur);
113: /**
114: * htmlNodeListDump:
115: * @buf: the HTML buffer output
116: * @doc: the document
117: * @cur: the first node
118: *
119: * Dump an HTML node list, recursive behaviour,children are printed too.
120: */
121: static void
122: htmlNodeListDump(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur) {
123: if (cur == NULL) {
124: fprintf(stderr, "htmlNodeListDump : node == NULL\n");
125: return;
126: }
127: while (cur != NULL) {
128: htmlNodeDump(buf, doc, cur);
129: cur = cur->next;
130: }
131: }
132:
133: /**
134: * htmlNodeDump:
135: * @buf: the HTML buffer output
136: * @doc: the document
137: * @cur: the current node
138: *
139: * Dump an HTML node, recursive behaviour,children are printed too.
140: */
141: static void
142: htmlNodeDump(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur) {
143: htmlElemDescPtr info;
144:
145: if (cur == NULL) {
146: fprintf(stderr, "htmlNodeDump : node == NULL\n");
147: return;
148: }
149: /*
150: * Special cases.
151: */
152: if (cur->type == HTML_TEXT_NODE) {
153: if (cur->content != NULL) {
1.6 daniel 154: xmlChar *buffer;
1.1 daniel 155:
156: /* uses the HTML encoding routine !!!!!!!!!! */
1.9 daniel 157: #ifndef XML_USE_BUFFER_CONTENT
1.1 daniel 158: buffer = xmlEncodeEntitiesReentrant(doc, cur->content);
1.9 daniel 159: #else
160: buffer = xmlEncodeEntitiesReentrant(doc,
161: xmlBufferContent(cur->content));
162: #endif
1.1 daniel 163: if (buffer != NULL) {
164: xmlBufferWriteCHAR(buf, buffer);
1.4 daniel 165: xmlFree(buffer);
1.1 daniel 166: }
167: }
168: return;
169: }
170: if (cur->type == HTML_COMMENT_NODE) {
171: if (cur->content != NULL) {
172: xmlBufferWriteChar(buf, "<!--");
1.9 daniel 173: #ifndef XML_USE_BUFFER_CONTENT
1.1 daniel 174: xmlBufferWriteCHAR(buf, cur->content);
1.9 daniel 175: #else
176: xmlBufferWriteCHAR(buf, xmlBufferContent(cur->content));
177: #endif
1.1 daniel 178: xmlBufferWriteChar(buf, "-->");
179: }
180: return;
181: }
182: if (cur->type == HTML_ENTITY_REF_NODE) {
183: xmlBufferWriteChar(buf, "&");
184: xmlBufferWriteCHAR(buf, cur->name);
185: xmlBufferWriteChar(buf, ";");
186: return;
187: }
188:
189: /*
190: * Get specific HTmL info for taht node.
191: */
192: info = htmlTagLookup(cur->name);
193:
194: xmlBufferWriteChar(buf, "<");
195: xmlBufferWriteCHAR(buf, cur->name);
196: if (cur->properties != NULL)
197: htmlAttrListDump(buf, doc, cur->properties);
198:
1.7 daniel 199: if ((info != NULL) && (info->empty)) {
1.1 daniel 200: xmlBufferWriteChar(buf, ">");
201: if (cur->next != NULL) {
202: if ((cur->next->type != HTML_TEXT_NODE) &&
203: (cur->next->type != HTML_ENTITY_REF_NODE))
204: xmlBufferWriteChar(buf, "\n");
205: }
206: return;
207: }
208: if ((cur->content == NULL) && (cur->childs == NULL)) {
1.7 daniel 209: if ((info != NULL) && (info->endTag != 0))
1.1 daniel 210: xmlBufferWriteChar(buf, ">");
211: else {
212: xmlBufferWriteChar(buf, "></");
213: xmlBufferWriteCHAR(buf, cur->name);
214: xmlBufferWriteChar(buf, ">");
215: }
216: if (cur->next != NULL) {
217: if ((cur->next->type != HTML_TEXT_NODE) &&
218: (cur->next->type != HTML_ENTITY_REF_NODE))
219: xmlBufferWriteChar(buf, "\n");
220: }
221: return;
222: }
223: xmlBufferWriteChar(buf, ">");
224: if (cur->content != NULL) {
1.6 daniel 225: xmlChar *buffer;
1.1 daniel 226:
1.9 daniel 227: #ifndef XML_USE_BUFFER_CONTENT
228: buffer = xmlEncodeEntitiesReentrant(doc, cur->content);
229: #else
230: buffer = xmlEncodeEntitiesReentrant(doc,
231: xmlBufferContent(cur->content));
232: #endif
1.1 daniel 233: if (buffer != NULL) {
234: xmlBufferWriteCHAR(buf, buffer);
1.4 daniel 235: xmlFree(buffer);
1.1 daniel 236: }
237: }
238: if (cur->childs != NULL) {
239: if ((cur->childs->type != HTML_TEXT_NODE) &&
1.10 daniel 240: (cur->childs->type != HTML_ENTITY_REF_NODE) &&
241: (cur->childs != cur->last))
1.1 daniel 242: xmlBufferWriteChar(buf, "\n");
243: htmlNodeListDump(buf, doc, cur->childs);
244: if ((cur->last->type != HTML_TEXT_NODE) &&
1.10 daniel 245: (cur->last->type != HTML_ENTITY_REF_NODE) &&
246: (cur->childs != cur->last))
1.1 daniel 247: xmlBufferWriteChar(buf, "\n");
248: }
1.11 daniel 249: if (!htmlIsAutoClosed(doc, cur)) {
250: xmlBufferWriteChar(buf, "</");
251: xmlBufferWriteCHAR(buf, cur->name);
252: xmlBufferWriteChar(buf, ">");
253: }
1.1 daniel 254: if (cur->next != NULL) {
255: if ((cur->next->type != HTML_TEXT_NODE) &&
256: (cur->next->type != HTML_ENTITY_REF_NODE))
257: xmlBufferWriteChar(buf, "\n");
258: }
259: }
260:
261: /**
262: * htmlDocContentDump:
263: * @buf: the HTML buffer output
264: * @cur: the document
265: *
266: * Dump an HTML document.
267: */
268: static void
269: htmlDocContentDump(xmlBufferPtr buf, xmlDocPtr cur) {
1.12 daniel 270: int type;
271:
272: /*
273: * force to output the stuff as HTML, especially for entities
274: */
275: type = cur->type;
276: cur->type = XML_HTML_DOCUMENT_NODE;
1.1 daniel 277: if (cur->intSubset != NULL)
278: htmlDtdDump(buf, cur);
1.11 daniel 279: else {
280: /* Default to HTML-4.0 transitionnal @@@@ */
281: xmlBufferWriteChar(buf, "<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.0 Transitional//EN\" \"http://www.w3.org/TR/REC-html40/loose.dtd\">");
282:
283: }
1.1 daniel 284: if (cur->root != NULL) {
1.8 daniel 285: htmlNodeListDump(buf, cur, cur->root);
1.1 daniel 286: }
287: xmlBufferWriteChar(buf, "\n");
1.12 daniel 288: cur->type = type;
1.1 daniel 289: }
290:
291: /**
292: * htmlDocDumpMemory:
293: * @cur: the document
294: * @mem: OUT: the memory pointer
295: * @size: OUT: the memory lenght
296: *
1.6 daniel 297: * Dump an HTML document in memory and return the xmlChar * and it's size.
1.1 daniel 298: * It's up to the caller to free the memory.
299: */
300: void
1.6 daniel 301: htmlDocDumpMemory(xmlDocPtr cur, xmlChar**mem, int *size) {
1.1 daniel 302: xmlBufferPtr buf;
303:
304: if (cur == NULL) {
305: #ifdef DEBUG_TREE
306: fprintf(stderr, "xmlDocDumpMemory : document == NULL\n");
307: #endif
308: *mem = NULL;
309: *size = 0;
310: return;
311: }
312: buf = xmlBufferCreate();
313: if (buf == NULL) {
314: *mem = NULL;
315: *size = 0;
316: return;
317: }
318: htmlDocContentDump(buf, cur);
319: *mem = buf->content;
320: *size = buf->use;
321: memset(buf, -1, sizeof(xmlBuffer));
1.4 daniel 322: xmlFree(buf);
1.1 daniel 323: }
324:
325:
326: /**
327: * htmlDocDump:
328: * @f: the FILE*
329: * @cur: the document
330: *
331: * Dump an HTML document to an open FILE.
332: */
333: void
334: htmlDocDump(FILE *f, xmlDocPtr cur) {
335: xmlBufferPtr buf;
336:
337: if (cur == NULL) {
338: #ifdef DEBUG_TREE
339: fprintf(stderr, "xmlDocDump : document == NULL\n");
340: #endif
341: return;
342: }
343: buf = xmlBufferCreate();
344: if (buf == NULL) return;
345: htmlDocContentDump(buf, cur);
346: xmlBufferDump(f, buf);
347: xmlBufferFree(buf);
348: }
349:
350: /**
351: * htmlSaveFile:
352: * @filename: the filename
353: * @cur: the document
354: *
355: * Dump an HTML document to a file.
356: *
357: * returns: the number of byte written or -1 in case of failure.
358: */
359: int
360: htmlSaveFile(const char *filename, xmlDocPtr cur) {
361: xmlBufferPtr buf;
362: FILE *output = NULL;
363: int ret;
364:
365: /*
366: * save the content to a temp buffer.
367: */
368: buf = xmlBufferCreate();
369: if (buf == NULL) return(0);
370: htmlDocContentDump(buf, cur);
371:
372: output = fopen(filename, "w");
373: if (output == NULL) return(-1);
374: ret = xmlBufferDump(output, buf);
375: fclose(output);
376:
377: xmlBufferFree(buf);
1.6 daniel 378: return(ret * sizeof(xmlChar));
1.1 daniel 379: }
380:
Webmaster