Annotation of XML/HTMLtree.c, revision 1.3
1.1 daniel 1: /*
2: * HTMLtree.c : implemetation of access function for an HTML tree.
3: *
4: * See Copyright for the status of this software.
5: *
6: * Daniel.Veillard@w3.org
7: */
8:
9: #include "config.h"
10: #include <stdio.h>
11: #include <ctype.h>
12: #include <stdlib.h>
13: #include <string.h> /* for memset() only ! */
14:
15: #include "HTMLparser.h"
16: #include "HTMLtree.h"
17: #include "entities.h"
18: #include "valid.h"
19:
20: /**
21: * htmlDtdDump:
22: * @buf: the HTML buffer output
23: * @doc: the document
24: *
25: * Dump the HTML document DTD, if any.
26: */
27: static void
28: htmlDtdDump(xmlBufferPtr buf, xmlDocPtr doc) {
29: xmlDtdPtr cur = doc->intSubset;
30:
31: if (cur == NULL) {
32: fprintf(stderr, "htmlDtdDump : no internal subset\n");
33: return;
34: }
35: xmlBufferWriteChar(buf, "<!DOCTYPE ");
36: xmlBufferWriteCHAR(buf, cur->name);
37: if (cur->ExternalID != NULL) {
38: xmlBufferWriteChar(buf, " PUBLIC ");
39: xmlBufferWriteQuotedString(buf, cur->ExternalID);
1.2 daniel 40: if (cur->SystemID != NULL) {
41: xmlBufferWriteChar(buf, " ");
42: xmlBufferWriteQuotedString(buf, cur->SystemID);
43: }
1.1 daniel 44: } else if (cur->SystemID != NULL) {
45: xmlBufferWriteChar(buf, " SYSTEM ");
46: xmlBufferWriteQuotedString(buf, cur->SystemID);
47: }
48: xmlBufferWriteChar(buf, ">\n");
49: }
50:
51: /**
52: * htmlAttrDump:
53: * @buf: the HTML buffer output
54: * @doc: the document
55: * @cur: the attribute pointer
56: *
57: * Dump an HTML attribute
58: */
59: static void
60: htmlAttrDump(xmlBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur) {
61: CHAR *value;
62:
63: if (cur == NULL) {
64: fprintf(stderr, "htmlAttrDump : property == NULL\n");
65: return;
66: }
67: xmlBufferWriteChar(buf, " ");
68: xmlBufferWriteCHAR(buf, cur->name);
69: value = xmlNodeListGetString(doc, cur->val, 0);
70: if (value) {
71: xmlBufferWriteChar(buf, "=");
72: xmlBufferWriteQuotedString(buf, value);
73: free(value);
74: } else {
75: xmlBufferWriteChar(buf, "=\"\"");
76: }
77: }
78:
79: /**
80: * htmlAttrListDump:
81: * @buf: the HTML buffer output
82: * @doc: the document
83: * @cur: the first attribute pointer
84: *
85: * Dump a list of HTML attributes
86: */
87: static void
88: htmlAttrListDump(xmlBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur) {
89: if (cur == NULL) {
90: fprintf(stderr, "htmlAttrListDump : property == NULL\n");
91: return;
92: }
93: while (cur != NULL) {
94: htmlAttrDump(buf, doc, cur);
95: cur = cur->next;
96: }
97: }
98:
99:
100: static void
101: htmlNodeDump(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur);
102: /**
103: * htmlNodeListDump:
104: * @buf: the HTML buffer output
105: * @doc: the document
106: * @cur: the first node
107: *
108: * Dump an HTML node list, recursive behaviour,children are printed too.
109: */
110: static void
111: htmlNodeListDump(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur) {
112: if (cur == NULL) {
113: fprintf(stderr, "htmlNodeListDump : node == NULL\n");
114: return;
115: }
116: while (cur != NULL) {
117: htmlNodeDump(buf, doc, cur);
118: cur = cur->next;
119: }
120: }
121:
122: /**
123: * htmlNodeDump:
124: * @buf: the HTML buffer output
125: * @doc: the document
126: * @cur: the current node
127: *
128: * Dump an HTML node, recursive behaviour,children are printed too.
129: */
130: static void
131: htmlNodeDump(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur) {
132: htmlElemDescPtr info;
133:
134: if (cur == NULL) {
135: fprintf(stderr, "htmlNodeDump : node == NULL\n");
136: return;
137: }
138: /*
139: * Special cases.
140: */
141: if (cur->type == HTML_TEXT_NODE) {
142: if (cur->content != NULL) {
143: CHAR *buffer;
144:
145: /* uses the HTML encoding routine !!!!!!!!!! */
146: buffer = xmlEncodeEntitiesReentrant(doc, cur->content);
147: if (buffer != NULL) {
148: xmlBufferWriteCHAR(buf, buffer);
149: free(buffer);
150: }
151: }
152: return;
153: }
154: if (cur->type == HTML_COMMENT_NODE) {
155: if (cur->content != NULL) {
156: xmlBufferWriteChar(buf, "<!--");
157: xmlBufferWriteCHAR(buf, cur->content);
158: xmlBufferWriteChar(buf, "-->");
159: }
160: return;
161: }
162: if (cur->type == HTML_ENTITY_REF_NODE) {
163: xmlBufferWriteChar(buf, "&");
164: xmlBufferWriteCHAR(buf, cur->name);
165: xmlBufferWriteChar(buf, ";");
166: return;
167: }
168:
169: /*
170: * Get specific HTmL info for taht node.
171: */
172: info = htmlTagLookup(cur->name);
173:
174: xmlBufferWriteChar(buf, "<");
175: xmlBufferWriteCHAR(buf, cur->name);
176: if (cur->properties != NULL)
177: htmlAttrListDump(buf, doc, cur->properties);
178:
179: if (info->empty) {
180: xmlBufferWriteChar(buf, ">");
181: if (cur->next != NULL) {
182: if ((cur->next->type != HTML_TEXT_NODE) &&
183: (cur->next->type != HTML_ENTITY_REF_NODE))
184: xmlBufferWriteChar(buf, "\n");
185: }
186: return;
187: }
188: if ((cur->content == NULL) && (cur->childs == NULL)) {
189: if (info->endTag != 0)
190: xmlBufferWriteChar(buf, ">");
191: else {
192: xmlBufferWriteChar(buf, "></");
193: xmlBufferWriteCHAR(buf, cur->name);
194: xmlBufferWriteChar(buf, ">");
195: }
196: if (cur->next != NULL) {
197: if ((cur->next->type != HTML_TEXT_NODE) &&
198: (cur->next->type != HTML_ENTITY_REF_NODE))
199: xmlBufferWriteChar(buf, "\n");
200: }
201: return;
202: }
203: xmlBufferWriteChar(buf, ">");
204: if (cur->content != NULL) {
205: CHAR *buffer;
206:
207: buffer = xmlEncodeEntitiesReentrant(doc, cur->content);
208: if (buffer != NULL) {
209: xmlBufferWriteCHAR(buf, buffer);
210: free(buffer);
211: }
212: }
213: if (cur->childs != NULL) {
214: if ((cur->childs->type != HTML_TEXT_NODE) &&
215: (cur->childs->type != HTML_ENTITY_REF_NODE))
216: xmlBufferWriteChar(buf, "\n");
217: htmlNodeListDump(buf, doc, cur->childs);
218: if ((cur->last->type != HTML_TEXT_NODE) &&
219: (cur->last->type != HTML_ENTITY_REF_NODE))
220: xmlBufferWriteChar(buf, "\n");
221: }
222: xmlBufferWriteChar(buf, "</");
223: xmlBufferWriteCHAR(buf, cur->name);
224: xmlBufferWriteChar(buf, ">");
225: if (cur->next != NULL) {
226: if ((cur->next->type != HTML_TEXT_NODE) &&
227: (cur->next->type != HTML_ENTITY_REF_NODE))
228: xmlBufferWriteChar(buf, "\n");
229: }
230: }
231:
232: /**
233: * htmlDocContentDump:
234: * @buf: the HTML buffer output
235: * @cur: the document
236: *
237: * Dump an HTML document.
238: */
239: static void
240: htmlDocContentDump(xmlBufferPtr buf, xmlDocPtr cur) {
241: if (cur->intSubset != NULL)
242: htmlDtdDump(buf, cur);
243: if (cur->root != NULL) {
244: htmlNodeDump(buf, cur, cur->root);
245: }
246: xmlBufferWriteChar(buf, "\n");
247: }
248:
249: /**
250: * htmlDocDumpMemory:
251: * @cur: the document
252: * @mem: OUT: the memory pointer
253: * @size: OUT: the memory lenght
254: *
255: * Dump an HTML document in memory and return the CHAR * and it's size.
256: * It's up to the caller to free the memory.
257: */
258: void
259: htmlDocDumpMemory(xmlDocPtr cur, CHAR**mem, int *size) {
260: xmlBufferPtr buf;
261:
262: if (cur == NULL) {
263: #ifdef DEBUG_TREE
264: fprintf(stderr, "xmlDocDumpMemory : document == NULL\n");
265: #endif
266: *mem = NULL;
267: *size = 0;
268: return;
269: }
270: buf = xmlBufferCreate();
271: if (buf == NULL) {
272: *mem = NULL;
273: *size = 0;
274: return;
275: }
276: htmlDocContentDump(buf, cur);
277: *mem = buf->content;
278: *size = buf->use;
279: memset(buf, -1, sizeof(xmlBuffer));
280: free(buf);
281: }
282:
283:
284: /**
285: * htmlDocDump:
286: * @f: the FILE*
287: * @cur: the document
288: *
289: * Dump an HTML document to an open FILE.
290: */
291: void
292: htmlDocDump(FILE *f, xmlDocPtr cur) {
293: xmlBufferPtr buf;
294:
295: if (cur == NULL) {
296: #ifdef DEBUG_TREE
297: fprintf(stderr, "xmlDocDump : document == NULL\n");
298: #endif
299: return;
300: }
301: buf = xmlBufferCreate();
302: if (buf == NULL) return;
303: htmlDocContentDump(buf, cur);
304: xmlBufferDump(f, buf);
305: xmlBufferFree(buf);
306: }
307:
308: /**
309: * htmlSaveFile:
310: * @filename: the filename
311: * @cur: the document
312: *
313: * Dump an HTML document to a file.
314: *
315: * returns: the number of byte written or -1 in case of failure.
316: */
317: int
318: htmlSaveFile(const char *filename, xmlDocPtr cur) {
319: xmlBufferPtr buf;
320: FILE *output = NULL;
321: int ret;
322:
323: /*
324: * save the content to a temp buffer.
325: */
326: buf = xmlBufferCreate();
327: if (buf == NULL) return(0);
328: htmlDocContentDump(buf, cur);
329:
330: output = fopen(filename, "w");
331: if (output == NULL) return(-1);
332: ret = xmlBufferDump(output, buf);
333: fclose(output);
334:
335: xmlBufferFree(buf);
336: return(ret * sizeof(CHAR));
337: }
338:
Webmaster