Annotation of XML/HTMLtree.c, revision 1.9
1.1 daniel 1: /*
2: * HTMLtree.c : implemetation of access function for an HTML tree.
3: *
4: * See Copyright for the status of this software.
5: *
6: * Daniel.Veillard@w3.org
7: */
8:
1.5 daniel 9:
10: #ifndef WIN32
1.1 daniel 11: #include "config.h"
1.5 daniel 12: #endif
1.1 daniel 13: #include <stdio.h>
1.5 daniel 14: #include <string.h> /* for memset() only ! */
15:
16: #ifdef HAVE_CTYPE_H
1.1 daniel 17: #include <ctype.h>
1.5 daniel 18: #endif
19: #ifdef HAVE_STDLIB_H
1.1 daniel 20: #include <stdlib.h>
1.5 daniel 21: #endif
1.1 daniel 22:
1.4 daniel 23: #include "xmlmemory.h"
1.1 daniel 24: #include "HTMLparser.h"
25: #include "HTMLtree.h"
26: #include "entities.h"
27: #include "valid.h"
28:
29: /**
30: * htmlDtdDump:
31: * @buf: the HTML buffer output
32: * @doc: the document
33: *
34: * Dump the HTML document DTD, if any.
35: */
36: static void
37: htmlDtdDump(xmlBufferPtr buf, xmlDocPtr doc) {
38: xmlDtdPtr cur = doc->intSubset;
39:
40: if (cur == NULL) {
41: fprintf(stderr, "htmlDtdDump : no internal subset\n");
42: return;
43: }
44: xmlBufferWriteChar(buf, "<!DOCTYPE ");
45: xmlBufferWriteCHAR(buf, cur->name);
46: if (cur->ExternalID != NULL) {
47: xmlBufferWriteChar(buf, " PUBLIC ");
48: xmlBufferWriteQuotedString(buf, cur->ExternalID);
1.2 daniel 49: if (cur->SystemID != NULL) {
50: xmlBufferWriteChar(buf, " ");
51: xmlBufferWriteQuotedString(buf, cur->SystemID);
52: }
1.1 daniel 53: } else if (cur->SystemID != NULL) {
54: xmlBufferWriteChar(buf, " SYSTEM ");
55: xmlBufferWriteQuotedString(buf, cur->SystemID);
56: }
57: xmlBufferWriteChar(buf, ">\n");
58: }
59:
60: /**
61: * htmlAttrDump:
62: * @buf: the HTML buffer output
63: * @doc: the document
64: * @cur: the attribute pointer
65: *
66: * Dump an HTML attribute
67: */
68: static void
69: htmlAttrDump(xmlBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur) {
1.6 daniel 70: xmlChar *value;
1.1 daniel 71:
72: if (cur == NULL) {
73: fprintf(stderr, "htmlAttrDump : property == NULL\n");
74: return;
75: }
76: xmlBufferWriteChar(buf, " ");
77: xmlBufferWriteCHAR(buf, cur->name);
78: value = xmlNodeListGetString(doc, cur->val, 0);
79: if (value) {
80: xmlBufferWriteChar(buf, "=");
81: xmlBufferWriteQuotedString(buf, value);
1.4 daniel 82: xmlFree(value);
1.1 daniel 83: } else {
84: xmlBufferWriteChar(buf, "=\"\"");
85: }
86: }
87:
88: /**
89: * htmlAttrListDump:
90: * @buf: the HTML buffer output
91: * @doc: the document
92: * @cur: the first attribute pointer
93: *
94: * Dump a list of HTML attributes
95: */
96: static void
97: htmlAttrListDump(xmlBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur) {
98: if (cur == NULL) {
99: fprintf(stderr, "htmlAttrListDump : property == NULL\n");
100: return;
101: }
102: while (cur != NULL) {
103: htmlAttrDump(buf, doc, cur);
104: cur = cur->next;
105: }
106: }
107:
108:
109: static void
110: htmlNodeDump(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur);
111: /**
112: * htmlNodeListDump:
113: * @buf: the HTML buffer output
114: * @doc: the document
115: * @cur: the first node
116: *
117: * Dump an HTML node list, recursive behaviour,children are printed too.
118: */
119: static void
120: htmlNodeListDump(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur) {
121: if (cur == NULL) {
122: fprintf(stderr, "htmlNodeListDump : node == NULL\n");
123: return;
124: }
125: while (cur != NULL) {
126: htmlNodeDump(buf, doc, cur);
127: cur = cur->next;
128: }
129: }
130:
131: /**
132: * htmlNodeDump:
133: * @buf: the HTML buffer output
134: * @doc: the document
135: * @cur: the current node
136: *
137: * Dump an HTML node, recursive behaviour,children are printed too.
138: */
139: static void
140: htmlNodeDump(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur) {
141: htmlElemDescPtr info;
142:
143: if (cur == NULL) {
144: fprintf(stderr, "htmlNodeDump : node == NULL\n");
145: return;
146: }
147: /*
148: * Special cases.
149: */
150: if (cur->type == HTML_TEXT_NODE) {
151: if (cur->content != NULL) {
1.6 daniel 152: xmlChar *buffer;
1.1 daniel 153:
154: /* uses the HTML encoding routine !!!!!!!!!! */
1.9 ! daniel 155: #ifndef XML_USE_BUFFER_CONTENT
1.1 daniel 156: buffer = xmlEncodeEntitiesReentrant(doc, cur->content);
1.9 ! daniel 157: #else
! 158: buffer = xmlEncodeEntitiesReentrant(doc,
! 159: xmlBufferContent(cur->content));
! 160: #endif
1.1 daniel 161: if (buffer != NULL) {
162: xmlBufferWriteCHAR(buf, buffer);
1.4 daniel 163: xmlFree(buffer);
1.1 daniel 164: }
165: }
166: return;
167: }
168: if (cur->type == HTML_COMMENT_NODE) {
169: if (cur->content != NULL) {
170: xmlBufferWriteChar(buf, "<!--");
1.9 ! daniel 171: #ifndef XML_USE_BUFFER_CONTENT
1.1 daniel 172: xmlBufferWriteCHAR(buf, cur->content);
1.9 ! daniel 173: #else
! 174: xmlBufferWriteCHAR(buf, xmlBufferContent(cur->content));
! 175: #endif
1.1 daniel 176: xmlBufferWriteChar(buf, "-->");
177: }
178: return;
179: }
180: if (cur->type == HTML_ENTITY_REF_NODE) {
181: xmlBufferWriteChar(buf, "&");
182: xmlBufferWriteCHAR(buf, cur->name);
183: xmlBufferWriteChar(buf, ";");
184: return;
185: }
186:
187: /*
188: * Get specific HTmL info for taht node.
189: */
190: info = htmlTagLookup(cur->name);
191:
192: xmlBufferWriteChar(buf, "<");
193: xmlBufferWriteCHAR(buf, cur->name);
194: if (cur->properties != NULL)
195: htmlAttrListDump(buf, doc, cur->properties);
196:
1.7 daniel 197: if ((info != NULL) && (info->empty)) {
1.1 daniel 198: xmlBufferWriteChar(buf, ">");
199: if (cur->next != NULL) {
200: if ((cur->next->type != HTML_TEXT_NODE) &&
201: (cur->next->type != HTML_ENTITY_REF_NODE))
202: xmlBufferWriteChar(buf, "\n");
203: }
204: return;
205: }
206: if ((cur->content == NULL) && (cur->childs == NULL)) {
1.7 daniel 207: if ((info != NULL) && (info->endTag != 0))
1.1 daniel 208: xmlBufferWriteChar(buf, ">");
209: else {
210: xmlBufferWriteChar(buf, "></");
211: xmlBufferWriteCHAR(buf, cur->name);
212: xmlBufferWriteChar(buf, ">");
213: }
214: if (cur->next != NULL) {
215: if ((cur->next->type != HTML_TEXT_NODE) &&
216: (cur->next->type != HTML_ENTITY_REF_NODE))
217: xmlBufferWriteChar(buf, "\n");
218: }
219: return;
220: }
221: xmlBufferWriteChar(buf, ">");
222: if (cur->content != NULL) {
1.6 daniel 223: xmlChar *buffer;
1.1 daniel 224:
1.9 ! daniel 225: #ifndef XML_USE_BUFFER_CONTENT
! 226: buffer = xmlEncodeEntitiesReentrant(doc, cur->content);
! 227: #else
! 228: buffer = xmlEncodeEntitiesReentrant(doc,
! 229: xmlBufferContent(cur->content));
! 230: #endif
1.1 daniel 231: if (buffer != NULL) {
232: xmlBufferWriteCHAR(buf, buffer);
1.4 daniel 233: xmlFree(buffer);
1.1 daniel 234: }
235: }
236: if (cur->childs != NULL) {
237: if ((cur->childs->type != HTML_TEXT_NODE) &&
238: (cur->childs->type != HTML_ENTITY_REF_NODE))
239: xmlBufferWriteChar(buf, "\n");
240: htmlNodeListDump(buf, doc, cur->childs);
241: if ((cur->last->type != HTML_TEXT_NODE) &&
242: (cur->last->type != HTML_ENTITY_REF_NODE))
243: xmlBufferWriteChar(buf, "\n");
244: }
245: xmlBufferWriteChar(buf, "</");
246: xmlBufferWriteCHAR(buf, cur->name);
247: xmlBufferWriteChar(buf, ">");
248: if (cur->next != NULL) {
249: if ((cur->next->type != HTML_TEXT_NODE) &&
250: (cur->next->type != HTML_ENTITY_REF_NODE))
251: xmlBufferWriteChar(buf, "\n");
252: }
253: }
254:
255: /**
256: * htmlDocContentDump:
257: * @buf: the HTML buffer output
258: * @cur: the document
259: *
260: * Dump an HTML document.
261: */
262: static void
263: htmlDocContentDump(xmlBufferPtr buf, xmlDocPtr cur) {
264: if (cur->intSubset != NULL)
265: htmlDtdDump(buf, cur);
266: if (cur->root != NULL) {
1.8 daniel 267: htmlNodeListDump(buf, cur, cur->root);
1.1 daniel 268: }
269: xmlBufferWriteChar(buf, "\n");
270: }
271:
272: /**
273: * htmlDocDumpMemory:
274: * @cur: the document
275: * @mem: OUT: the memory pointer
276: * @size: OUT: the memory lenght
277: *
1.6 daniel 278: * Dump an HTML document in memory and return the xmlChar * and it's size.
1.1 daniel 279: * It's up to the caller to free the memory.
280: */
281: void
1.6 daniel 282: htmlDocDumpMemory(xmlDocPtr cur, xmlChar**mem, int *size) {
1.1 daniel 283: xmlBufferPtr buf;
284:
285: if (cur == NULL) {
286: #ifdef DEBUG_TREE
287: fprintf(stderr, "xmlDocDumpMemory : document == NULL\n");
288: #endif
289: *mem = NULL;
290: *size = 0;
291: return;
292: }
293: buf = xmlBufferCreate();
294: if (buf == NULL) {
295: *mem = NULL;
296: *size = 0;
297: return;
298: }
299: htmlDocContentDump(buf, cur);
300: *mem = buf->content;
301: *size = buf->use;
302: memset(buf, -1, sizeof(xmlBuffer));
1.4 daniel 303: xmlFree(buf);
1.1 daniel 304: }
305:
306:
307: /**
308: * htmlDocDump:
309: * @f: the FILE*
310: * @cur: the document
311: *
312: * Dump an HTML document to an open FILE.
313: */
314: void
315: htmlDocDump(FILE *f, xmlDocPtr cur) {
316: xmlBufferPtr buf;
317:
318: if (cur == NULL) {
319: #ifdef DEBUG_TREE
320: fprintf(stderr, "xmlDocDump : document == NULL\n");
321: #endif
322: return;
323: }
324: buf = xmlBufferCreate();
325: if (buf == NULL) return;
326: htmlDocContentDump(buf, cur);
327: xmlBufferDump(f, buf);
328: xmlBufferFree(buf);
329: }
330:
331: /**
332: * htmlSaveFile:
333: * @filename: the filename
334: * @cur: the document
335: *
336: * Dump an HTML document to a file.
337: *
338: * returns: the number of byte written or -1 in case of failure.
339: */
340: int
341: htmlSaveFile(const char *filename, xmlDocPtr cur) {
342: xmlBufferPtr buf;
343: FILE *output = NULL;
344: int ret;
345:
346: /*
347: * save the content to a temp buffer.
348: */
349: buf = xmlBufferCreate();
350: if (buf == NULL) return(0);
351: htmlDocContentDump(buf, cur);
352:
353: output = fopen(filename, "w");
354: if (output == NULL) return(-1);
355: ret = xmlBufferDump(output, buf);
356: fclose(output);
357:
358: xmlBufferFree(buf);
1.6 daniel 359: return(ret * sizeof(xmlChar));
1.1 daniel 360: }
361:
Webmaster