Annotation of XML/HTMLtree.c, revision 1.11
1.1 daniel 1: /*
2: * HTMLtree.c : implemetation of access function for an HTML tree.
3: *
4: * See Copyright for the status of this software.
5: *
6: * Daniel.Veillard@w3.org
7: */
8:
1.5 daniel 9:
10: #ifndef WIN32
1.1 daniel 11: #include "config.h"
1.5 daniel 12: #endif
1.1 daniel 13: #include <stdio.h>
1.5 daniel 14: #include <string.h> /* for memset() only ! */
15:
16: #ifdef HAVE_CTYPE_H
1.1 daniel 17: #include <ctype.h>
1.5 daniel 18: #endif
19: #ifdef HAVE_STDLIB_H
1.1 daniel 20: #include <stdlib.h>
1.5 daniel 21: #endif
1.1 daniel 22:
1.4 daniel 23: #include "xmlmemory.h"
1.1 daniel 24: #include "HTMLparser.h"
25: #include "HTMLtree.h"
26: #include "entities.h"
27: #include "valid.h"
28:
29: /**
30: * htmlDtdDump:
31: * @buf: the HTML buffer output
32: * @doc: the document
33: *
34: * Dump the HTML document DTD, if any.
35: */
36: static void
37: htmlDtdDump(xmlBufferPtr buf, xmlDocPtr doc) {
38: xmlDtdPtr cur = doc->intSubset;
39:
40: if (cur == NULL) {
41: fprintf(stderr, "htmlDtdDump : no internal subset\n");
42: return;
43: }
44: xmlBufferWriteChar(buf, "<!DOCTYPE ");
45: xmlBufferWriteCHAR(buf, cur->name);
46: if (cur->ExternalID != NULL) {
47: xmlBufferWriteChar(buf, " PUBLIC ");
48: xmlBufferWriteQuotedString(buf, cur->ExternalID);
1.2 daniel 49: if (cur->SystemID != NULL) {
50: xmlBufferWriteChar(buf, " ");
51: xmlBufferWriteQuotedString(buf, cur->SystemID);
52: }
1.1 daniel 53: } else if (cur->SystemID != NULL) {
54: xmlBufferWriteChar(buf, " SYSTEM ");
55: xmlBufferWriteQuotedString(buf, cur->SystemID);
56: }
57: xmlBufferWriteChar(buf, ">\n");
58: }
59:
60: /**
61: * htmlAttrDump:
62: * @buf: the HTML buffer output
63: * @doc: the document
64: * @cur: the attribute pointer
65: *
66: * Dump an HTML attribute
67: */
68: static void
69: htmlAttrDump(xmlBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur) {
1.6 daniel 70: xmlChar *value;
1.1 daniel 71:
72: if (cur == NULL) {
73: fprintf(stderr, "htmlAttrDump : property == NULL\n");
74: return;
75: }
76: xmlBufferWriteChar(buf, " ");
77: xmlBufferWriteCHAR(buf, cur->name);
78: value = xmlNodeListGetString(doc, cur->val, 0);
79: if (value) {
80: xmlBufferWriteChar(buf, "=");
81: xmlBufferWriteQuotedString(buf, value);
1.4 daniel 82: xmlFree(value);
1.1 daniel 83: } else {
84: xmlBufferWriteChar(buf, "=\"\"");
85: }
86: }
87:
88: /**
89: * htmlAttrListDump:
90: * @buf: the HTML buffer output
91: * @doc: the document
92: * @cur: the first attribute pointer
93: *
94: * Dump a list of HTML attributes
95: */
96: static void
97: htmlAttrListDump(xmlBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur) {
98: if (cur == NULL) {
99: fprintf(stderr, "htmlAttrListDump : property == NULL\n");
100: return;
101: }
102: while (cur != NULL) {
103: htmlAttrDump(buf, doc, cur);
104: cur = cur->next;
105: }
106: }
107:
108:
109: static void
110: htmlNodeDump(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur);
111: /**
112: * htmlNodeListDump:
113: * @buf: the HTML buffer output
114: * @doc: the document
115: * @cur: the first node
116: *
117: * Dump an HTML node list, recursive behaviour,children are printed too.
118: */
119: static void
120: htmlNodeListDump(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur) {
121: if (cur == NULL) {
122: fprintf(stderr, "htmlNodeListDump : node == NULL\n");
123: return;
124: }
125: while (cur != NULL) {
126: htmlNodeDump(buf, doc, cur);
127: cur = cur->next;
128: }
129: }
130:
131: /**
132: * htmlNodeDump:
133: * @buf: the HTML buffer output
134: * @doc: the document
135: * @cur: the current node
136: *
137: * Dump an HTML node, recursive behaviour,children are printed too.
138: */
139: static void
140: htmlNodeDump(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur) {
141: htmlElemDescPtr info;
142:
143: if (cur == NULL) {
144: fprintf(stderr, "htmlNodeDump : node == NULL\n");
145: return;
146: }
147: /*
148: * Special cases.
149: */
150: if (cur->type == HTML_TEXT_NODE) {
151: if (cur->content != NULL) {
1.6 daniel 152: xmlChar *buffer;
1.1 daniel 153:
154: /* uses the HTML encoding routine !!!!!!!!!! */
1.9 daniel 155: #ifndef XML_USE_BUFFER_CONTENT
1.1 daniel 156: buffer = xmlEncodeEntitiesReentrant(doc, cur->content);
1.9 daniel 157: #else
158: buffer = xmlEncodeEntitiesReentrant(doc,
159: xmlBufferContent(cur->content));
160: #endif
1.1 daniel 161: if (buffer != NULL) {
162: xmlBufferWriteCHAR(buf, buffer);
1.4 daniel 163: xmlFree(buffer);
1.1 daniel 164: }
165: }
166: return;
167: }
168: if (cur->type == HTML_COMMENT_NODE) {
169: if (cur->content != NULL) {
170: xmlBufferWriteChar(buf, "<!--");
1.9 daniel 171: #ifndef XML_USE_BUFFER_CONTENT
1.1 daniel 172: xmlBufferWriteCHAR(buf, cur->content);
1.9 daniel 173: #else
174: xmlBufferWriteCHAR(buf, xmlBufferContent(cur->content));
175: #endif
1.1 daniel 176: xmlBufferWriteChar(buf, "-->");
177: }
178: return;
179: }
180: if (cur->type == HTML_ENTITY_REF_NODE) {
181: xmlBufferWriteChar(buf, "&");
182: xmlBufferWriteCHAR(buf, cur->name);
183: xmlBufferWriteChar(buf, ";");
184: return;
185: }
186:
187: /*
188: * Get specific HTmL info for taht node.
189: */
190: info = htmlTagLookup(cur->name);
191:
192: xmlBufferWriteChar(buf, "<");
193: xmlBufferWriteCHAR(buf, cur->name);
194: if (cur->properties != NULL)
195: htmlAttrListDump(buf, doc, cur->properties);
196:
1.7 daniel 197: if ((info != NULL) && (info->empty)) {
1.1 daniel 198: xmlBufferWriteChar(buf, ">");
199: if (cur->next != NULL) {
200: if ((cur->next->type != HTML_TEXT_NODE) &&
201: (cur->next->type != HTML_ENTITY_REF_NODE))
202: xmlBufferWriteChar(buf, "\n");
203: }
204: return;
205: }
206: if ((cur->content == NULL) && (cur->childs == NULL)) {
1.7 daniel 207: if ((info != NULL) && (info->endTag != 0))
1.1 daniel 208: xmlBufferWriteChar(buf, ">");
209: else {
210: xmlBufferWriteChar(buf, "></");
211: xmlBufferWriteCHAR(buf, cur->name);
212: xmlBufferWriteChar(buf, ">");
213: }
214: if (cur->next != NULL) {
215: if ((cur->next->type != HTML_TEXT_NODE) &&
216: (cur->next->type != HTML_ENTITY_REF_NODE))
217: xmlBufferWriteChar(buf, "\n");
218: }
219: return;
220: }
221: xmlBufferWriteChar(buf, ">");
222: if (cur->content != NULL) {
1.6 daniel 223: xmlChar *buffer;
1.1 daniel 224:
1.9 daniel 225: #ifndef XML_USE_BUFFER_CONTENT
226: buffer = xmlEncodeEntitiesReentrant(doc, cur->content);
227: #else
228: buffer = xmlEncodeEntitiesReentrant(doc,
229: xmlBufferContent(cur->content));
230: #endif
1.1 daniel 231: if (buffer != NULL) {
232: xmlBufferWriteCHAR(buf, buffer);
1.4 daniel 233: xmlFree(buffer);
1.1 daniel 234: }
235: }
236: if (cur->childs != NULL) {
237: if ((cur->childs->type != HTML_TEXT_NODE) &&
1.10 daniel 238: (cur->childs->type != HTML_ENTITY_REF_NODE) &&
239: (cur->childs != cur->last))
1.1 daniel 240: xmlBufferWriteChar(buf, "\n");
241: htmlNodeListDump(buf, doc, cur->childs);
242: if ((cur->last->type != HTML_TEXT_NODE) &&
1.10 daniel 243: (cur->last->type != HTML_ENTITY_REF_NODE) &&
244: (cur->childs != cur->last))
1.1 daniel 245: xmlBufferWriteChar(buf, "\n");
246: }
1.11 ! daniel 247: if (!htmlIsAutoClosed(doc, cur)) {
! 248: xmlBufferWriteChar(buf, "</");
! 249: xmlBufferWriteCHAR(buf, cur->name);
! 250: xmlBufferWriteChar(buf, ">");
! 251: }
1.1 daniel 252: if (cur->next != NULL) {
253: if ((cur->next->type != HTML_TEXT_NODE) &&
254: (cur->next->type != HTML_ENTITY_REF_NODE))
255: xmlBufferWriteChar(buf, "\n");
256: }
257: }
258:
259: /**
260: * htmlDocContentDump:
261: * @buf: the HTML buffer output
262: * @cur: the document
263: *
264: * Dump an HTML document.
265: */
266: static void
267: htmlDocContentDump(xmlBufferPtr buf, xmlDocPtr cur) {
268: if (cur->intSubset != NULL)
269: htmlDtdDump(buf, cur);
1.11 ! daniel 270: else {
! 271: /* Default to HTML-4.0 transitionnal @@@@ */
! 272: xmlBufferWriteChar(buf, "<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.0 Transitional//EN\" \"http://www.w3.org/TR/REC-html40/loose.dtd\">");
! 273:
! 274: }
1.1 daniel 275: if (cur->root != NULL) {
1.8 daniel 276: htmlNodeListDump(buf, cur, cur->root);
1.1 daniel 277: }
278: xmlBufferWriteChar(buf, "\n");
279: }
280:
281: /**
282: * htmlDocDumpMemory:
283: * @cur: the document
284: * @mem: OUT: the memory pointer
285: * @size: OUT: the memory lenght
286: *
1.6 daniel 287: * Dump an HTML document in memory and return the xmlChar * and it's size.
1.1 daniel 288: * It's up to the caller to free the memory.
289: */
290: void
1.6 daniel 291: htmlDocDumpMemory(xmlDocPtr cur, xmlChar**mem, int *size) {
1.1 daniel 292: xmlBufferPtr buf;
293:
294: if (cur == NULL) {
295: #ifdef DEBUG_TREE
296: fprintf(stderr, "xmlDocDumpMemory : document == NULL\n");
297: #endif
298: *mem = NULL;
299: *size = 0;
300: return;
301: }
302: buf = xmlBufferCreate();
303: if (buf == NULL) {
304: *mem = NULL;
305: *size = 0;
306: return;
307: }
308: htmlDocContentDump(buf, cur);
309: *mem = buf->content;
310: *size = buf->use;
311: memset(buf, -1, sizeof(xmlBuffer));
1.4 daniel 312: xmlFree(buf);
1.1 daniel 313: }
314:
315:
316: /**
317: * htmlDocDump:
318: * @f: the FILE*
319: * @cur: the document
320: *
321: * Dump an HTML document to an open FILE.
322: */
323: void
324: htmlDocDump(FILE *f, xmlDocPtr cur) {
325: xmlBufferPtr buf;
326:
327: if (cur == NULL) {
328: #ifdef DEBUG_TREE
329: fprintf(stderr, "xmlDocDump : document == NULL\n");
330: #endif
331: return;
332: }
333: buf = xmlBufferCreate();
334: if (buf == NULL) return;
335: htmlDocContentDump(buf, cur);
336: xmlBufferDump(f, buf);
337: xmlBufferFree(buf);
338: }
339:
340: /**
341: * htmlSaveFile:
342: * @filename: the filename
343: * @cur: the document
344: *
345: * Dump an HTML document to a file.
346: *
347: * returns: the number of byte written or -1 in case of failure.
348: */
349: int
350: htmlSaveFile(const char *filename, xmlDocPtr cur) {
351: xmlBufferPtr buf;
352: FILE *output = NULL;
353: int ret;
354:
355: /*
356: * save the content to a temp buffer.
357: */
358: buf = xmlBufferCreate();
359: if (buf == NULL) return(0);
360: htmlDocContentDump(buf, cur);
361:
362: output = fopen(filename, "w");
363: if (output == NULL) return(-1);
364: ret = xmlBufferDump(output, buf);
365: fclose(output);
366:
367: xmlBufferFree(buf);
1.6 daniel 368: return(ret * sizeof(xmlChar));
1.1 daniel 369: }
370:
Webmaster