Annotation of XML/HTMLtree.c, revision 1.25
1.1 daniel 1: /*
2: * HTMLtree.c : implemetation of access function for an HTML tree.
3: *
4: * See Copyright for the status of this software.
5: *
6: * Daniel.Veillard@w3.org
7: */
8:
1.5 daniel 9:
1.13 daniel 10: #ifdef WIN32
11: #include "win32config.h"
12: #else
1.1 daniel 13: #include "config.h"
1.5 daniel 14: #endif
1.18 daniel 15:
16: #include "xmlversion.h"
17: #ifdef LIBXML_HTML_ENABLED
18:
1.1 daniel 19: #include <stdio.h>
1.5 daniel 20: #include <string.h> /* for memset() only ! */
21:
22: #ifdef HAVE_CTYPE_H
1.1 daniel 23: #include <ctype.h>
1.5 daniel 24: #endif
25: #ifdef HAVE_STDLIB_H
1.1 daniel 26: #include <stdlib.h>
1.5 daniel 27: #endif
1.1 daniel 28:
1.18 daniel 29: #include <libxml/xmlmemory.h>
30: #include <libxml/HTMLparser.h>
31: #include <libxml/HTMLtree.h>
32: #include <libxml/entities.h>
33: #include <libxml/valid.h>
1.1 daniel 34:
1.21 veillard 35: /************************************************************************
36: * *
1.23 veillard 37: * Getting/Setting encoding meta tags *
38: * *
39: ************************************************************************/
40:
41: /**
42: * htmlGetMetaEncoding:
43: * @doc: the document
44: *
45: * Encoding definition lookup in the Meta tags
46: *
47: * Returns the current encoding as flagged in the HTML source
48: */
49: const xmlChar *
50: htmlGetMetaEncoding(htmlDocPtr doc) {
1.24 veillard 51: htmlNodePtr cur;
52: const xmlChar *content;
53: const xmlChar *encoding;
54:
55: if (doc == NULL)
56: return(NULL);
57: cur = doc->children;
58:
59: /*
60: * Search the html
61: */
62: while (cur != NULL) {
63: if (cur->name != NULL) {
64: if (!xmlStrcmp(cur->name, BAD_CAST"html"))
65: break;
66: if (!xmlStrcmp(cur->name, BAD_CAST"head"))
67: goto found_head;
68: if (!xmlStrcmp(cur->name, BAD_CAST"meta"))
69: goto found_meta;
70: }
71: cur = cur->next;
72: }
73: if (cur == NULL)
74: return(NULL);
75: cur = cur->children;
76:
77: /*
78: * Search the head
79: */
80: while (cur != NULL) {
81: if (cur->name != NULL) {
82: if (!xmlStrcmp(cur->name, BAD_CAST"head"))
83: break;
84: if (!xmlStrcmp(cur->name, BAD_CAST"meta"))
85: goto found_meta;
86: }
87: cur = cur->next;
88: }
89: if (cur == NULL)
90: return(NULL);
91: found_head:
92: cur = cur->children;
93:
94: /*
95: * Search the meta elements
96: */
97: found_meta:
98: while (cur != NULL) {
99: if (cur->name != NULL) {
100: if (!xmlStrcmp(cur->name, BAD_CAST"meta")) {
101: xmlAttrPtr attr = cur->properties;
102: int http;
103: const xmlChar *value;
104:
105: content = NULL;
106: http = 0;
107: while (attr != NULL) {
108: if ((attr->children != NULL) &&
109: (attr->children->type == XML_TEXT_NODE) &&
110: (attr->children->next == NULL)) {
111: #ifndef XML_USE_BUFFER_CONTENT
112: value = attr->children->content;
113: #else
114: value = xmlBufferContent(attr->children->content);
115: #endif
116: if (((!xmlStrcmp(attr->name, BAD_CAST"http-equiv")) ||
117: (!xmlStrcmp(attr->name, BAD_CAST"Http-Equiv")) ||
118: (!xmlStrcmp(attr->name, BAD_CAST"HTTP-EQUIV"))) &&
119: ((!xmlStrcmp(value, BAD_CAST"Content-Type")) ||
120: (!xmlStrcmp(value, BAD_CAST"content-type")) ||
121: (!xmlStrcmp(value, BAD_CAST"CONTENT-TYPE"))))
122: http = 1;
123: else if ((value != NULL) &&
124: ((!xmlStrcmp(attr->name, BAD_CAST"content")) ||
125: (!xmlStrcmp(attr->name, BAD_CAST"Content")) ||
126: (!xmlStrcmp(attr->name, BAD_CAST"CONTENT"))))
127: content = value;
128: if ((http != 0) && (content != NULL))
129: goto found_content;
130: }
131: attr = attr->next;
132: }
133: }
134: }
135: cur = cur->next;
136: }
137: return(NULL);
138:
139: found_content:
140: encoding = xmlStrstr(content, BAD_CAST"charset=");
141: if (encoding == NULL)
142: encoding = xmlStrstr(content, BAD_CAST"Charset=");
143: if (encoding == NULL)
144: encoding = xmlStrstr(content, BAD_CAST"CHARSET=");
145: if (encoding != NULL) {
146: encoding += 8;
147: } else {
148: encoding = xmlStrstr(content, BAD_CAST"charset =");
149: if (encoding == NULL)
150: encoding = xmlStrstr(content, BAD_CAST"Charset =");
151: if (encoding == NULL)
152: encoding = xmlStrstr(content, BAD_CAST"CHARSET =");
153: if (encoding != NULL)
154: encoding += 9;
155: }
156: if (encoding != NULL) {
157: while ((*encoding == ' ') || (*encoding == '\t')) encoding++;
158: }
159: return(encoding);
1.23 veillard 160: }
161:
162: /**
163: * htmlSetMetaEncoding:
164: * @doc: the document
165: * @encoding: the encoding string
166: *
167: * Sets the current encoding in the Meta tags
168: * NOTE: this will not change the document content encoding, just
169: * the META flag associated.
170: *
171: * Returns 0 in case of success and -1 in case of error
172: */
173: int
174: htmlSetMetaEncoding(htmlDocPtr doc, const xmlChar *encoding) {
175: }
176:
177: /************************************************************************
178: * *
1.21 veillard 179: * Dumping HTML tree content to a simple buffer *
180: * *
181: ************************************************************************/
182:
1.14 daniel 183: static void
184: htmlDocContentDump(xmlBufferPtr buf, xmlDocPtr cur);
185:
1.1 daniel 186: /**
187: * htmlDtdDump:
188: * @buf: the HTML buffer output
189: * @doc: the document
190: *
191: * Dump the HTML document DTD, if any.
192: */
193: static void
194: htmlDtdDump(xmlBufferPtr buf, xmlDocPtr doc) {
195: xmlDtdPtr cur = doc->intSubset;
196:
197: if (cur == NULL) {
198: fprintf(stderr, "htmlDtdDump : no internal subset\n");
199: return;
200: }
201: xmlBufferWriteChar(buf, "<!DOCTYPE ");
202: xmlBufferWriteCHAR(buf, cur->name);
203: if (cur->ExternalID != NULL) {
204: xmlBufferWriteChar(buf, " PUBLIC ");
205: xmlBufferWriteQuotedString(buf, cur->ExternalID);
1.2 daniel 206: if (cur->SystemID != NULL) {
207: xmlBufferWriteChar(buf, " ");
208: xmlBufferWriteQuotedString(buf, cur->SystemID);
209: }
1.1 daniel 210: } else if (cur->SystemID != NULL) {
211: xmlBufferWriteChar(buf, " SYSTEM ");
212: xmlBufferWriteQuotedString(buf, cur->SystemID);
213: }
214: xmlBufferWriteChar(buf, ">\n");
215: }
216:
217: /**
218: * htmlAttrDump:
219: * @buf: the HTML buffer output
220: * @doc: the document
221: * @cur: the attribute pointer
222: *
223: * Dump an HTML attribute
224: */
225: static void
226: htmlAttrDump(xmlBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur) {
1.6 daniel 227: xmlChar *value;
1.1 daniel 228:
229: if (cur == NULL) {
230: fprintf(stderr, "htmlAttrDump : property == NULL\n");
231: return;
232: }
233: xmlBufferWriteChar(buf, " ");
234: xmlBufferWriteCHAR(buf, cur->name);
1.19 daniel 235: if (cur->children != NULL) {
236: value = xmlNodeListGetString(doc, cur->children, 0);
237: if (value) {
238: xmlBufferWriteChar(buf, "=");
239: xmlBufferWriteQuotedString(buf, value);
240: xmlFree(value);
241: } else {
242: xmlBufferWriteChar(buf, "=\"\"");
243: }
1.1 daniel 244: }
245: }
246:
247: /**
248: * htmlAttrListDump:
249: * @buf: the HTML buffer output
250: * @doc: the document
251: * @cur: the first attribute pointer
252: *
253: * Dump a list of HTML attributes
254: */
255: static void
256: htmlAttrListDump(xmlBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur) {
257: if (cur == NULL) {
258: fprintf(stderr, "htmlAttrListDump : property == NULL\n");
259: return;
260: }
261: while (cur != NULL) {
262: htmlAttrDump(buf, doc, cur);
263: cur = cur->next;
264: }
265: }
266:
267:
1.14 daniel 268: void
1.1 daniel 269: htmlNodeDump(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur);
270: /**
271: * htmlNodeListDump:
272: * @buf: the HTML buffer output
273: * @doc: the document
274: * @cur: the first node
275: *
276: * Dump an HTML node list, recursive behaviour,children are printed too.
277: */
278: static void
279: htmlNodeListDump(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur) {
280: if (cur == NULL) {
281: fprintf(stderr, "htmlNodeListDump : node == NULL\n");
282: return;
283: }
284: while (cur != NULL) {
285: htmlNodeDump(buf, doc, cur);
286: cur = cur->next;
287: }
288: }
289:
290: /**
291: * htmlNodeDump:
292: * @buf: the HTML buffer output
293: * @doc: the document
294: * @cur: the current node
295: *
296: * Dump an HTML node, recursive behaviour,children are printed too.
297: */
1.14 daniel 298: void
1.1 daniel 299: htmlNodeDump(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur) {
300: htmlElemDescPtr info;
301:
302: if (cur == NULL) {
303: fprintf(stderr, "htmlNodeDump : node == NULL\n");
304: return;
305: }
306: /*
307: * Special cases.
308: */
1.20 daniel 309: if (cur->type == XML_DTD_NODE)
310: return;
1.14 daniel 311: if (cur->type == XML_HTML_DOCUMENT_NODE) {
312: htmlDocContentDump(buf, (xmlDocPtr) cur);
313: return;
314: }
1.1 daniel 315: if (cur->type == HTML_TEXT_NODE) {
316: if (cur->content != NULL) {
1.6 daniel 317: xmlChar *buffer;
1.1 daniel 318:
1.9 daniel 319: #ifndef XML_USE_BUFFER_CONTENT
1.1 daniel 320: buffer = xmlEncodeEntitiesReentrant(doc, cur->content);
1.9 daniel 321: #else
322: buffer = xmlEncodeEntitiesReentrant(doc,
323: xmlBufferContent(cur->content));
324: #endif
1.1 daniel 325: if (buffer != NULL) {
326: xmlBufferWriteCHAR(buf, buffer);
1.4 daniel 327: xmlFree(buffer);
1.1 daniel 328: }
329: }
330: return;
331: }
332: if (cur->type == HTML_COMMENT_NODE) {
333: if (cur->content != NULL) {
334: xmlBufferWriteChar(buf, "<!--");
1.9 daniel 335: #ifndef XML_USE_BUFFER_CONTENT
1.1 daniel 336: xmlBufferWriteCHAR(buf, cur->content);
1.9 daniel 337: #else
338: xmlBufferWriteCHAR(buf, xmlBufferContent(cur->content));
339: #endif
1.1 daniel 340: xmlBufferWriteChar(buf, "-->");
341: }
342: return;
343: }
344: if (cur->type == HTML_ENTITY_REF_NODE) {
345: xmlBufferWriteChar(buf, "&");
346: xmlBufferWriteCHAR(buf, cur->name);
347: xmlBufferWriteChar(buf, ";");
348: return;
349: }
350:
351: /*
352: * Get specific HTmL info for taht node.
353: */
354: info = htmlTagLookup(cur->name);
355:
356: xmlBufferWriteChar(buf, "<");
357: xmlBufferWriteCHAR(buf, cur->name);
358: if (cur->properties != NULL)
359: htmlAttrListDump(buf, doc, cur->properties);
360:
1.7 daniel 361: if ((info != NULL) && (info->empty)) {
1.1 daniel 362: xmlBufferWriteChar(buf, ">");
363: if (cur->next != NULL) {
364: if ((cur->next->type != HTML_TEXT_NODE) &&
365: (cur->next->type != HTML_ENTITY_REF_NODE))
366: xmlBufferWriteChar(buf, "\n");
367: }
368: return;
369: }
1.17 daniel 370: if ((cur->content == NULL) && (cur->children == NULL)) {
1.7 daniel 371: if ((info != NULL) && (info->endTag != 0))
1.1 daniel 372: xmlBufferWriteChar(buf, ">");
373: else {
374: xmlBufferWriteChar(buf, "></");
375: xmlBufferWriteCHAR(buf, cur->name);
376: xmlBufferWriteChar(buf, ">");
377: }
378: if (cur->next != NULL) {
379: if ((cur->next->type != HTML_TEXT_NODE) &&
380: (cur->next->type != HTML_ENTITY_REF_NODE))
381: xmlBufferWriteChar(buf, "\n");
382: }
383: return;
384: }
385: xmlBufferWriteChar(buf, ">");
386: if (cur->content != NULL) {
1.6 daniel 387: xmlChar *buffer;
1.1 daniel 388:
1.9 daniel 389: #ifndef XML_USE_BUFFER_CONTENT
390: buffer = xmlEncodeEntitiesReentrant(doc, cur->content);
391: #else
392: buffer = xmlEncodeEntitiesReentrant(doc,
393: xmlBufferContent(cur->content));
394: #endif
1.1 daniel 395: if (buffer != NULL) {
396: xmlBufferWriteCHAR(buf, buffer);
1.4 daniel 397: xmlFree(buffer);
1.1 daniel 398: }
399: }
1.17 daniel 400: if (cur->children != NULL) {
401: if ((cur->children->type != HTML_TEXT_NODE) &&
402: (cur->children->type != HTML_ENTITY_REF_NODE) &&
403: (cur->children != cur->last))
1.1 daniel 404: xmlBufferWriteChar(buf, "\n");
1.17 daniel 405: htmlNodeListDump(buf, doc, cur->children);
1.1 daniel 406: if ((cur->last->type != HTML_TEXT_NODE) &&
1.10 daniel 407: (cur->last->type != HTML_ENTITY_REF_NODE) &&
1.17 daniel 408: (cur->children != cur->last))
1.1 daniel 409: xmlBufferWriteChar(buf, "\n");
410: }
1.11 daniel 411: if (!htmlIsAutoClosed(doc, cur)) {
412: xmlBufferWriteChar(buf, "</");
413: xmlBufferWriteCHAR(buf, cur->name);
414: xmlBufferWriteChar(buf, ">");
415: }
1.1 daniel 416: if (cur->next != NULL) {
417: if ((cur->next->type != HTML_TEXT_NODE) &&
418: (cur->next->type != HTML_ENTITY_REF_NODE))
419: xmlBufferWriteChar(buf, "\n");
420: }
421: }
422:
423: /**
1.16 daniel 424: * htmlNodeDumpFile:
425: * @out: the FILE pointer
426: * @doc: the document
427: * @cur: the current node
428: *
429: * Dump an HTML node, recursive behaviour,children are printed too.
430: */
431: void
432: htmlNodeDumpFile(FILE *out, xmlDocPtr doc, xmlNodePtr cur) {
433: xmlBufferPtr buf;
434:
435: buf = xmlBufferCreate();
436: if (buf == NULL) return;
437: htmlNodeDump(buf, doc, cur);
438: xmlBufferDump(out, buf);
439: xmlBufferFree(buf);
440: }
441:
442: /**
1.1 daniel 443: * htmlDocContentDump:
444: * @buf: the HTML buffer output
445: * @cur: the document
446: *
447: * Dump an HTML document.
448: */
449: static void
450: htmlDocContentDump(xmlBufferPtr buf, xmlDocPtr cur) {
1.12 daniel 451: int type;
452:
453: /*
454: * force to output the stuff as HTML, especially for entities
455: */
456: type = cur->type;
457: cur->type = XML_HTML_DOCUMENT_NODE;
1.1 daniel 458: if (cur->intSubset != NULL)
459: htmlDtdDump(buf, cur);
1.11 daniel 460: else {
461: /* Default to HTML-4.0 transitionnal @@@@ */
462: xmlBufferWriteChar(buf, "<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.0 Transitional//EN\" \"http://www.w3.org/TR/REC-html40/loose.dtd\">");
463:
464: }
1.17 daniel 465: if (cur->children != NULL) {
466: htmlNodeListDump(buf, cur, cur->children);
1.1 daniel 467: }
468: xmlBufferWriteChar(buf, "\n");
1.22 veillard 469: cur->type = (xmlElementType) type;
1.1 daniel 470: }
471:
472: /**
473: * htmlDocDumpMemory:
474: * @cur: the document
475: * @mem: OUT: the memory pointer
476: * @size: OUT: the memory lenght
477: *
1.6 daniel 478: * Dump an HTML document in memory and return the xmlChar * and it's size.
1.1 daniel 479: * It's up to the caller to free the memory.
480: */
481: void
1.6 daniel 482: htmlDocDumpMemory(xmlDocPtr cur, xmlChar**mem, int *size) {
1.1 daniel 483: xmlBufferPtr buf;
484:
485: if (cur == NULL) {
486: #ifdef DEBUG_TREE
1.15 daniel 487: fprintf(stderr, "htmlxmlDocDumpMemory : document == NULL\n");
1.1 daniel 488: #endif
489: *mem = NULL;
490: *size = 0;
491: return;
492: }
493: buf = xmlBufferCreate();
494: if (buf == NULL) {
495: *mem = NULL;
496: *size = 0;
497: return;
498: }
499: htmlDocContentDump(buf, cur);
500: *mem = buf->content;
501: *size = buf->use;
502: memset(buf, -1, sizeof(xmlBuffer));
1.4 daniel 503: xmlFree(buf);
1.1 daniel 504: }
505:
506:
1.21 veillard 507: /************************************************************************
508: * *
509: * Dumping HTML tree content to an I/O output buffer *
510: * *
511: ************************************************************************/
512:
513: static void
514: htmlDocContentDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr cur, const char *encoding);
515:
516: /**
517: * htmlDtdDump:
518: * @buf: the HTML buffer output
519: * @doc: the document
520: *
521: * Dump the HTML document DTD, if any.
522: */
523: static void
524: htmlDtdDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, const char *encoding) {
525: xmlDtdPtr cur = doc->intSubset;
526:
527: if (cur == NULL) {
528: fprintf(stderr, "htmlDtdDump : no internal subset\n");
529: return;
530: }
531: xmlOutputBufferWriteString(buf, "<!DOCTYPE ");
532: xmlOutputBufferWriteString(buf, (const char *)cur->name);
533: if (cur->ExternalID != NULL) {
534: xmlOutputBufferWriteString(buf, " PUBLIC ");
535: xmlBufferWriteQuotedString(buf->buffer, cur->ExternalID);
536: if (cur->SystemID != NULL) {
537: xmlOutputBufferWriteString(buf, " ");
538: xmlBufferWriteQuotedString(buf->buffer, cur->SystemID);
539: }
540: } else if (cur->SystemID != NULL) {
541: xmlOutputBufferWriteString(buf, " SYSTEM ");
542: xmlBufferWriteQuotedString(buf->buffer, cur->SystemID);
543: }
544: xmlOutputBufferWriteString(buf, ">\n");
545: }
546:
547: /**
548: * htmlAttrDump:
549: * @buf: the HTML buffer output
550: * @doc: the document
551: * @cur: the attribute pointer
552: *
553: * Dump an HTML attribute
554: */
555: static void
556: htmlAttrDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur, const char *encoding) {
557: xmlChar *value;
558:
559: if (cur == NULL) {
560: fprintf(stderr, "htmlAttrDump : property == NULL\n");
561: return;
562: }
563: xmlOutputBufferWriteString(buf, " ");
564: xmlOutputBufferWriteString(buf, (const char *)cur->name);
565: if (cur->children != NULL) {
566: value = xmlNodeListGetString(doc, cur->children, 0);
567: if (value) {
568: xmlOutputBufferWriteString(buf, "=");
569: xmlBufferWriteQuotedString(buf->buffer, value);
570: xmlFree(value);
571: } else {
572: xmlOutputBufferWriteString(buf, "=\"\"");
573: }
574: }
575: }
576:
577: /**
578: * htmlAttrListDump:
579: * @buf: the HTML buffer output
580: * @doc: the document
581: * @cur: the first attribute pointer
582: *
583: * Dump a list of HTML attributes
584: */
585: static void
586: htmlAttrListDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur, const char *encoding) {
587: if (cur == NULL) {
588: fprintf(stderr, "htmlAttrListDump : property == NULL\n");
589: return;
590: }
591: while (cur != NULL) {
592: htmlAttrDumpOutput(buf, doc, cur, encoding);
593: cur = cur->next;
594: }
595: }
596:
597:
598: void htmlNodeDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
599: xmlNodePtr cur, const char *encoding);
600:
601: /**
602: * htmlNodeListDump:
603: * @buf: the HTML buffer output
604: * @doc: the document
605: * @cur: the first node
606: *
607: * Dump an HTML node list, recursive behaviour,children are printed too.
608: */
609: static void
610: htmlNodeListDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur, const char *encoding) {
611: if (cur == NULL) {
612: fprintf(stderr, "htmlNodeListDump : node == NULL\n");
613: return;
614: }
615: while (cur != NULL) {
616: htmlNodeDumpOutput(buf, doc, cur, encoding);
617: cur = cur->next;
618: }
619: }
620:
621: /**
622: * htmlNodeDump:
623: * @buf: the HTML buffer output
624: * @doc: the document
625: * @cur: the current node
626: *
627: * Dump an HTML node, recursive behaviour,children are printed too.
628: */
629: void
630: htmlNodeDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur, const char *encoding) {
631: htmlElemDescPtr info;
632:
633: if (cur == NULL) {
634: fprintf(stderr, "htmlNodeDump : node == NULL\n");
635: return;
636: }
637: /*
638: * Special cases.
639: */
640: if (cur->type == XML_DTD_NODE)
641: return;
642: if (cur->type == XML_HTML_DOCUMENT_NODE) {
643: htmlDocContentDumpOutput(buf, (xmlDocPtr) cur, encoding);
644: return;
645: }
646: if (cur->type == HTML_TEXT_NODE) {
647: if (cur->content != NULL) {
648: xmlChar *buffer;
649:
650: #ifndef XML_USE_BUFFER_CONTENT
651: buffer = xmlEncodeEntitiesReentrant(doc, cur->content);
652: #else
653: buffer = xmlEncodeEntitiesReentrant(doc,
654: xmlBufferContent(cur->content));
655: #endif
656: if (buffer != NULL) {
1.25 ! veillard 657: xmlOutputBufferWriteString(buf, (const char *)buffer);
1.21 veillard 658: xmlFree(buffer);
659: }
660: }
661: return;
662: }
663: if (cur->type == HTML_COMMENT_NODE) {
664: if (cur->content != NULL) {
665: xmlOutputBufferWriteString(buf, "<!--");
666: #ifndef XML_USE_BUFFER_CONTENT
667: xmlOutputBufferWriteString(buf, (const char *)cur->content);
668: #else
669: xmlOutputBufferWriteString(buf, xmlBufferContent(cur->content));
670: #endif
671: xmlOutputBufferWriteString(buf, "-->");
672: }
673: return;
674: }
675: if (cur->type == HTML_ENTITY_REF_NODE) {
676: xmlOutputBufferWriteString(buf, "&");
677: xmlOutputBufferWriteString(buf, (const char *)cur->name);
678: xmlOutputBufferWriteString(buf, ";");
679: return;
680: }
681:
682: /*
683: * Get specific HTmL info for taht node.
684: */
685: info = htmlTagLookup(cur->name);
686:
687: xmlOutputBufferWriteString(buf, "<");
688: xmlOutputBufferWriteString(buf, (const char *)cur->name);
689: if (cur->properties != NULL)
690: htmlAttrListDumpOutput(buf, doc, cur->properties, encoding);
691:
692: if ((info != NULL) && (info->empty)) {
693: xmlOutputBufferWriteString(buf, ">");
694: if (cur->next != NULL) {
695: if ((cur->next->type != HTML_TEXT_NODE) &&
696: (cur->next->type != HTML_ENTITY_REF_NODE))
697: xmlOutputBufferWriteString(buf, "\n");
698: }
699: return;
700: }
701: if ((cur->content == NULL) && (cur->children == NULL)) {
702: if ((info != NULL) && (info->endTag != 0))
703: xmlOutputBufferWriteString(buf, ">");
704: else {
705: xmlOutputBufferWriteString(buf, "></");
706: xmlOutputBufferWriteString(buf, (const char *)cur->name);
707: xmlOutputBufferWriteString(buf, ">");
708: }
709: if (cur->next != NULL) {
710: if ((cur->next->type != HTML_TEXT_NODE) &&
711: (cur->next->type != HTML_ENTITY_REF_NODE))
712: xmlOutputBufferWriteString(buf, "\n");
713: }
714: return;
715: }
716: xmlOutputBufferWriteString(buf, ">");
717: if (cur->content != NULL) {
718: #if 0
719: xmlChar *buffer;
720:
721: #ifndef XML_USE_BUFFER_CONTENT
722: buffer = xmlEncodeEntitiesReentrant(doc, cur->content);
723: #else
724: buffer = xmlEncodeEntitiesReentrant(doc,
725: xmlBufferContent(cur->content));
726: #endif
727: if (buffer != NULL) {
728: xmlOutputBufferWriteString(buf, buffer);
729: xmlFree(buffer);
730: }
731: #else
732: /*
733: * Uses the OutputBuffer property to automatically convert
734: * invalids to charrefs
735: */
736:
737: #ifndef XML_USE_BUFFER_CONTENT
738: xmlOutputBufferWriteString(buf, (const char *) cur->content);
739: #else
740: xmlOutputBufferWriteString(buf,
741: (const char *) xmlBufferContent(cur->content));
742: #endif
743: #endif
744: }
745: if (cur->children != NULL) {
746: if ((cur->children->type != HTML_TEXT_NODE) &&
747: (cur->children->type != HTML_ENTITY_REF_NODE) &&
748: (cur->children != cur->last))
749: xmlOutputBufferWriteString(buf, "\n");
750: htmlNodeListDumpOutput(buf, doc, cur->children, encoding);
751: if ((cur->last->type != HTML_TEXT_NODE) &&
752: (cur->last->type != HTML_ENTITY_REF_NODE) &&
753: (cur->children != cur->last))
754: xmlOutputBufferWriteString(buf, "\n");
755: }
756: if (!htmlIsAutoClosed(doc, cur)) {
757: xmlOutputBufferWriteString(buf, "</");
758: xmlOutputBufferWriteString(buf, (const char *)cur->name);
759: xmlOutputBufferWriteString(buf, ">");
760: }
761: if (cur->next != NULL) {
762: if ((cur->next->type != HTML_TEXT_NODE) &&
763: (cur->next->type != HTML_ENTITY_REF_NODE))
764: xmlOutputBufferWriteString(buf, "\n");
765: }
766: }
767:
768: /**
769: * htmlDocContentDump:
770: * @buf: the HTML buffer output
771: * @cur: the document
772: *
773: * Dump an HTML document.
774: */
775: static void
776: htmlDocContentDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr cur, const char *encoding) {
777: int type;
778:
779: /*
780: * force to output the stuff as HTML, especially for entities
781: */
782: type = cur->type;
783: cur->type = XML_HTML_DOCUMENT_NODE;
784: if (cur->intSubset != NULL)
785: htmlDtdDumpOutput(buf, cur, NULL);
786: else {
787: /* Default to HTML-4.0 transitionnal @@@@ */
788: xmlOutputBufferWriteString(buf, "<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.0 Transitional//EN\" \"http://www.w3.org/TR/REC-html40/loose.dtd\">");
789:
790: }
791: if (cur->children != NULL) {
792: htmlNodeListDumpOutput(buf, cur, cur->children, encoding);
793: }
794: xmlOutputBufferWriteString(buf, "\n");
1.22 veillard 795: cur->type = (xmlElementType) type;
1.21 veillard 796: }
797:
798:
799: /************************************************************************
800: * *
801: * Saving functions front-ends *
802: * *
803: ************************************************************************/
804:
1.1 daniel 805: /**
806: * htmlDocDump:
807: * @f: the FILE*
808: * @cur: the document
809: *
810: * Dump an HTML document to an open FILE.
1.21 veillard 811: *
812: * returns: the number of byte written or -1 in case of failure.
1.1 daniel 813: */
1.21 veillard 814: int
1.1 daniel 815: htmlDocDump(FILE *f, xmlDocPtr cur) {
1.21 veillard 816: xmlOutputBufferPtr buf;
1.24 veillard 817: xmlCharEncodingHandlerPtr handler = NULL;
818: const char *encoding;
1.21 veillard 819: int ret;
1.1 daniel 820:
821: if (cur == NULL) {
822: #ifdef DEBUG_TREE
1.15 daniel 823: fprintf(stderr, "htmlDocDump : document == NULL\n");
1.1 daniel 824: #endif
1.21 veillard 825: return(-1);
1.1 daniel 826: }
1.24 veillard 827:
828: encoding = (const char *) htmlGetMetaEncoding(cur);
829:
830: if (encoding != NULL) {
831: xmlCharEncoding enc;
832:
833: enc = xmlParseCharEncoding(encoding);
834: if (enc != cur->charset) {
835: if (cur->charset != XML_CHAR_ENCODING_UTF8) {
836: /*
837: * Not supported yet
838: */
839: return(-1);
840: }
841:
842: handler = xmlFindCharEncodingHandler(encoding);
843: if (handler == NULL)
844: return(-1);
845: }
846: }
847:
848: /*
1.25 ! veillard 849: * Fallback to HTML or ASCII when the encoding is unspecified
1.24 veillard 850: */
851: if (handler == NULL)
1.25 ! veillard 852: handler = xmlFindCharEncodingHandler("HTML");
! 853: if (handler == NULL)
1.24 veillard 854: handler = xmlFindCharEncodingHandler("ascii");
855:
856: buf = xmlOutputBufferCreateFile(f, handler);
1.21 veillard 857: if (buf == NULL) return(-1);
858: htmlDocContentDumpOutput(buf, cur, NULL);
859:
860: ret = xmlOutputBufferClose(buf);
861: return(ret);
862: }
863:
864: /**
865: * htmlSaveFile:
866: * @filename: the filename (or URL)
867: * @cur: the document
868: *
869: * Dump an HTML document to a file. If @filename is "-" the stdout file is
870: * used.
871: * returns: the number of byte written or -1 in case of failure.
872: */
873: int
874: htmlSaveFile(const char *filename, xmlDocPtr cur) {
875: xmlOutputBufferPtr buf;
1.24 veillard 876: xmlCharEncodingHandlerPtr handler = NULL;
877: const char *encoding;
1.21 veillard 878: int ret;
879:
1.24 veillard 880: encoding = (const char *) htmlGetMetaEncoding(cur);
881:
882: if (encoding != NULL) {
883: xmlCharEncoding enc;
884:
885: enc = xmlParseCharEncoding(encoding);
886: if (enc != cur->charset) {
887: if (cur->charset != XML_CHAR_ENCODING_UTF8) {
888: /*
889: * Not supported yet
890: */
891: return(-1);
892: }
893:
894: handler = xmlFindCharEncodingHandler(encoding);
895: if (handler == NULL)
896: return(-1);
897: }
898: }
899:
900: /*
1.25 ! veillard 901: * Fallback to HTML or ASCII when the encoding is unspecified
1.24 veillard 902: */
903: if (handler == NULL)
1.25 ! veillard 904: handler = xmlFindCharEncodingHandler("HTML");
! 905: if (handler == NULL)
1.24 veillard 906: handler = xmlFindCharEncodingHandler("ascii");
907:
1.21 veillard 908: /*
909: * save the content to a temp buffer.
910: */
1.24 veillard 911: buf = xmlOutputBufferCreateFilename(filename, handler, cur->compression);
1.21 veillard 912: if (buf == NULL) return(0);
913:
914: htmlDocContentDumpOutput(buf, cur, NULL);
915:
916: ret = xmlOutputBufferClose(buf);
917: return(ret);
1.1 daniel 918: }
919:
920: /**
921: * htmlSaveFile:
922: * @filename: the filename
923: * @cur: the document
924: *
925: * Dump an HTML document to a file.
926: *
927: * returns: the number of byte written or -1 in case of failure.
928: */
929: int
1.21 veillard 930: htmlSaveFileEnc(const char *filename, xmlDocPtr cur, const char *encoding) {
931: xmlOutputBufferPtr buf;
932: xmlCharEncodingHandlerPtr handler = NULL;
1.1 daniel 933: int ret;
934:
1.21 veillard 935: if (encoding != NULL) {
936: xmlCharEncoding enc;
937:
938: enc = xmlParseCharEncoding(encoding);
939: if (enc != cur->charset) {
940: if (cur->charset != XML_CHAR_ENCODING_UTF8) {
941: /*
942: * Not supported yet
943: */
944: return(-1);
945: }
946:
947: handler = xmlFindCharEncodingHandler(encoding);
948: if (handler == NULL)
949: return(-1);
950: }
951: }
1.24 veillard 952:
953: /*
1.25 ! veillard 954: * Fallback to HTML or ASCII when the encoding is unspecified
1.24 veillard 955: */
1.25 ! veillard 956: if (handler == NULL)
! 957: handler = xmlFindCharEncodingHandler("HTML");
1.24 veillard 958: if (handler == NULL)
959: handler = xmlFindCharEncodingHandler("ascii");
1.21 veillard 960:
1.1 daniel 961: /*
962: * save the content to a temp buffer.
963: */
1.21 veillard 964: buf = xmlOutputBufferCreateFilename(filename, handler, 0);
1.1 daniel 965: if (buf == NULL) return(0);
966:
1.21 veillard 967: htmlDocContentDumpOutput(buf, cur, encoding);
1.1 daniel 968:
1.21 veillard 969: ret = xmlOutputBufferClose(buf);
970: return(ret);
1.1 daniel 971: }
1.18 daniel 972: #endif /* LIBXML_HTML_ENABLED */
Webmaster