Annotation of XML/HTMLtree.c, revision 1.13

1.1       daniel      1: /*
                      2:  * HTMLtree.c : implemetation of access function for an HTML tree.
                      3:  *
                      4:  * See Copyright for the status of this software.
                      5:  *
                      6:  * Daniel.Veillard@w3.org
                      7:  */
                      8: 
1.5       daniel      9: 
1.13    ! daniel     10: #ifdef WIN32
        !            11: #include "win32config.h"
        !            12: #else
1.1       daniel     13: #include "config.h"
1.5       daniel     14: #endif
1.1       daniel     15: #include <stdio.h>
1.5       daniel     16: #include <string.h> /* for memset() only ! */
                     17: 
                     18: #ifdef HAVE_CTYPE_H
1.1       daniel     19: #include <ctype.h>
1.5       daniel     20: #endif
                     21: #ifdef HAVE_STDLIB_H
1.1       daniel     22: #include <stdlib.h>
1.5       daniel     23: #endif
1.1       daniel     24: 
1.4       daniel     25: #include "xmlmemory.h"
1.1       daniel     26: #include "HTMLparser.h"
                     27: #include "HTMLtree.h"
                     28: #include "entities.h"
                     29: #include "valid.h"
                     30: 
                     31: /**
                     32:  * htmlDtdDump:
                     33:  * @buf:  the HTML buffer output
                     34:  * @doc:  the document
                     35:  * 
                     36:  * Dump the HTML document DTD, if any.
                     37:  */
                     38: static void
                     39: htmlDtdDump(xmlBufferPtr buf, xmlDocPtr doc) {
                     40:     xmlDtdPtr cur = doc->intSubset;
                     41: 
                     42:     if (cur == NULL) {
                     43:         fprintf(stderr, "htmlDtdDump : no internal subset\n");
                     44:        return;
                     45:     }
                     46:     xmlBufferWriteChar(buf, "<!DOCTYPE ");
                     47:     xmlBufferWriteCHAR(buf, cur->name);
                     48:     if (cur->ExternalID != NULL) {
                     49:        xmlBufferWriteChar(buf, " PUBLIC ");
                     50:        xmlBufferWriteQuotedString(buf, cur->ExternalID);
1.2       daniel     51:        if (cur->SystemID != NULL) {
                     52:            xmlBufferWriteChar(buf, " ");
                     53:            xmlBufferWriteQuotedString(buf, cur->SystemID);
                     54:        } 
1.1       daniel     55:     }  else if (cur->SystemID != NULL) {
                     56:        xmlBufferWriteChar(buf, " SYSTEM ");
                     57:        xmlBufferWriteQuotedString(buf, cur->SystemID);
                     58:     }
                     59:     xmlBufferWriteChar(buf, ">\n");
                     60: }
                     61: 
                     62: /**
                     63:  * htmlAttrDump:
                     64:  * @buf:  the HTML buffer output
                     65:  * @doc:  the document
                     66:  * @cur:  the attribute pointer
                     67:  *
                     68:  * Dump an HTML attribute
                     69:  */
                     70: static void
                     71: htmlAttrDump(xmlBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur) {
1.6       daniel     72:     xmlChar *value;
1.1       daniel     73: 
                     74:     if (cur == NULL) {
                     75:         fprintf(stderr, "htmlAttrDump : property == NULL\n");
                     76:        return;
                     77:     }
                     78:     xmlBufferWriteChar(buf, " ");
                     79:     xmlBufferWriteCHAR(buf, cur->name);
                     80:     value = xmlNodeListGetString(doc, cur->val, 0);
                     81:     if (value) {
                     82:        xmlBufferWriteChar(buf, "=");
                     83:        xmlBufferWriteQuotedString(buf, value);
1.4       daniel     84:        xmlFree(value);
1.1       daniel     85:     } else  {
                     86:        xmlBufferWriteChar(buf, "=\"\"");
                     87:     }
                     88: }
                     89: 
                     90: /**
                     91:  * htmlAttrListDump:
                     92:  * @buf:  the HTML buffer output
                     93:  * @doc:  the document
                     94:  * @cur:  the first attribute pointer
                     95:  *
                     96:  * Dump a list of HTML attributes
                     97:  */
                     98: static void
                     99: htmlAttrListDump(xmlBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur) {
                    100:     if (cur == NULL) {
                    101:         fprintf(stderr, "htmlAttrListDump : property == NULL\n");
                    102:        return;
                    103:     }
                    104:     while (cur != NULL) {
                    105:         htmlAttrDump(buf, doc, cur);
                    106:        cur = cur->next;
                    107:     }
                    108: }
                    109: 
                    110: 
                    111: static void
                    112: htmlNodeDump(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur);
                    113: /**
                    114:  * htmlNodeListDump:
                    115:  * @buf:  the HTML buffer output
                    116:  * @doc:  the document
                    117:  * @cur:  the first node
                    118:  *
                    119:  * Dump an HTML node list, recursive behaviour,children are printed too.
                    120:  */
                    121: static void
                    122: htmlNodeListDump(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur) {
                    123:     if (cur == NULL) {
                    124:         fprintf(stderr, "htmlNodeListDump : node == NULL\n");
                    125:        return;
                    126:     }
                    127:     while (cur != NULL) {
                    128:         htmlNodeDump(buf, doc, cur);
                    129:        cur = cur->next;
                    130:     }
                    131: }
                    132: 
                    133: /**
                    134:  * htmlNodeDump:
                    135:  * @buf:  the HTML buffer output
                    136:  * @doc:  the document
                    137:  * @cur:  the current node
                    138:  *
                    139:  * Dump an HTML node, recursive behaviour,children are printed too.
                    140:  */
                    141: static void
                    142: htmlNodeDump(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur) {
                    143:     htmlElemDescPtr info;
                    144: 
                    145:     if (cur == NULL) {
                    146:         fprintf(stderr, "htmlNodeDump : node == NULL\n");
                    147:        return;
                    148:     }
                    149:     /*
                    150:      * Special cases.
                    151:      */
                    152:     if (cur->type == HTML_TEXT_NODE) {
                    153:        if (cur->content != NULL) {
1.6       daniel    154:             xmlChar *buffer;
1.1       daniel    155: 
                    156:            /* uses the HTML encoding routine !!!!!!!!!! */
1.9       daniel    157: #ifndef XML_USE_BUFFER_CONTENT
1.1       daniel    158:             buffer = xmlEncodeEntitiesReentrant(doc, cur->content);
1.9       daniel    159: #else
                    160:             buffer = xmlEncodeEntitiesReentrant(doc, 
                    161:                                                 xmlBufferContent(cur->content));
                    162: #endif 
1.1       daniel    163:            if (buffer != NULL) {
                    164:                xmlBufferWriteCHAR(buf, buffer);
1.4       daniel    165:                xmlFree(buffer);
1.1       daniel    166:            }
                    167:        }
                    168:        return;
                    169:     }
                    170:     if (cur->type == HTML_COMMENT_NODE) {
                    171:        if (cur->content != NULL) {
                    172:            xmlBufferWriteChar(buf, "<!--");
1.9       daniel    173: #ifndef XML_USE_BUFFER_CONTENT
1.1       daniel    174:            xmlBufferWriteCHAR(buf, cur->content);
1.9       daniel    175: #else
                    176:            xmlBufferWriteCHAR(buf, xmlBufferContent(cur->content));
                    177: #endif
1.1       daniel    178:            xmlBufferWriteChar(buf, "-->");
                    179:        }
                    180:        return;
                    181:     }
                    182:     if (cur->type == HTML_ENTITY_REF_NODE) {
                    183:         xmlBufferWriteChar(buf, "&");
                    184:        xmlBufferWriteCHAR(buf, cur->name);
                    185:         xmlBufferWriteChar(buf, ";");
                    186:        return;
                    187:     }
                    188: 
                    189:     /*
                    190:      * Get specific HTmL info for taht node.
                    191:      */
                    192:     info = htmlTagLookup(cur->name);
                    193: 
                    194:     xmlBufferWriteChar(buf, "<");
                    195:     xmlBufferWriteCHAR(buf, cur->name);
                    196:     if (cur->properties != NULL)
                    197:         htmlAttrListDump(buf, doc, cur->properties);
                    198: 
1.7       daniel    199:     if ((info != NULL) && (info->empty)) {
1.1       daniel    200:         xmlBufferWriteChar(buf, ">");
                    201:        if (cur->next != NULL) {
                    202:            if ((cur->next->type != HTML_TEXT_NODE) &&
                    203:                (cur->next->type != HTML_ENTITY_REF_NODE))
                    204:                xmlBufferWriteChar(buf, "\n");
                    205:        }
                    206:        return;
                    207:     }
                    208:     if ((cur->content == NULL) && (cur->childs == NULL)) {
1.7       daniel    209:         if ((info != NULL) && (info->endTag != 0))
1.1       daniel    210:            xmlBufferWriteChar(buf, ">");
                    211:        else {
                    212:            xmlBufferWriteChar(buf, "></");
                    213:            xmlBufferWriteCHAR(buf, cur->name);
                    214:            xmlBufferWriteChar(buf, ">");
                    215:        }
                    216:        if (cur->next != NULL) {
                    217:            if ((cur->next->type != HTML_TEXT_NODE) &&
                    218:                (cur->next->type != HTML_ENTITY_REF_NODE))
                    219:                xmlBufferWriteChar(buf, "\n");
                    220:        }
                    221:        return;
                    222:     }
                    223:     xmlBufferWriteChar(buf, ">");
                    224:     if (cur->content != NULL) {
1.6       daniel    225:        xmlChar *buffer;
1.1       daniel    226: 
1.9       daniel    227: #ifndef XML_USE_BUFFER_CONTENT
                    228:     buffer = xmlEncodeEntitiesReentrant(doc, cur->content);
                    229: #else
                    230:     buffer = xmlEncodeEntitiesReentrant(doc, 
                    231:                                         xmlBufferContent(cur->content));
                    232: #endif
1.1       daniel    233:        if (buffer != NULL) {
                    234:            xmlBufferWriteCHAR(buf, buffer);
1.4       daniel    235:            xmlFree(buffer);
1.1       daniel    236:        }
                    237:     }
                    238:     if (cur->childs != NULL) {
                    239:         if ((cur->childs->type != HTML_TEXT_NODE) &&
1.10      daniel    240:            (cur->childs->type != HTML_ENTITY_REF_NODE) &&
                    241:            (cur->childs != cur->last))
1.1       daniel    242:            xmlBufferWriteChar(buf, "\n");
                    243:        htmlNodeListDump(buf, doc, cur->childs);
                    244:         if ((cur->last->type != HTML_TEXT_NODE) &&
1.10      daniel    245:            (cur->last->type != HTML_ENTITY_REF_NODE) &&
                    246:            (cur->childs != cur->last))
1.1       daniel    247:            xmlBufferWriteChar(buf, "\n");
                    248:     }
1.11      daniel    249:     if (!htmlIsAutoClosed(doc, cur)) {
                    250:        xmlBufferWriteChar(buf, "</");
                    251:        xmlBufferWriteCHAR(buf, cur->name);
                    252:        xmlBufferWriteChar(buf, ">");
                    253:     }
1.1       daniel    254:     if (cur->next != NULL) {
                    255:         if ((cur->next->type != HTML_TEXT_NODE) &&
                    256:            (cur->next->type != HTML_ENTITY_REF_NODE))
                    257:            xmlBufferWriteChar(buf, "\n");
                    258:     }
                    259: }
                    260: 
                    261: /**
                    262:  * htmlDocContentDump:
                    263:  * @buf:  the HTML buffer output
                    264:  * @cur:  the document
                    265:  *
                    266:  * Dump an HTML document.
                    267:  */
                    268: static void
                    269: htmlDocContentDump(xmlBufferPtr buf, xmlDocPtr cur) {
1.12      daniel    270:     int type;
                    271: 
                    272:     /*
                    273:      * force to output the stuff as HTML, especially for entities
                    274:      */
                    275:     type = cur->type;
                    276:     cur->type = XML_HTML_DOCUMENT_NODE;
1.1       daniel    277:     if (cur->intSubset != NULL)
                    278:         htmlDtdDump(buf, cur);
1.11      daniel    279:     else {
                    280:        /* Default to HTML-4.0 transitionnal @@@@ */
                    281:        xmlBufferWriteChar(buf, "<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.0 Transitional//EN\" \"http://www.w3.org/TR/REC-html40/loose.dtd\">");
                    282: 
                    283:     }
1.1       daniel    284:     if (cur->root != NULL) {
1.8       daniel    285:         htmlNodeListDump(buf, cur, cur->root);
1.1       daniel    286:     }
                    287:     xmlBufferWriteChar(buf, "\n");
1.12      daniel    288:     cur->type = type;
1.1       daniel    289: }
                    290: 
                    291: /**
                    292:  * htmlDocDumpMemory:
                    293:  * @cur:  the document
                    294:  * @mem:  OUT: the memory pointer
                    295:  * @size:  OUT: the memory lenght
                    296:  *
1.6       daniel    297:  * Dump an HTML document in memory and return the xmlChar * and it's size.
1.1       daniel    298:  * It's up to the caller to free the memory.
                    299:  */
                    300: void
1.6       daniel    301: htmlDocDumpMemory(xmlDocPtr cur, xmlChar**mem, int *size) {
1.1       daniel    302:     xmlBufferPtr buf;
                    303: 
                    304:     if (cur == NULL) {
                    305: #ifdef DEBUG_TREE
                    306:         fprintf(stderr, "xmlDocDumpMemory : document == NULL\n");
                    307: #endif
                    308:        *mem = NULL;
                    309:        *size = 0;
                    310:        return;
                    311:     }
                    312:     buf = xmlBufferCreate();
                    313:     if (buf == NULL) {
                    314:        *mem = NULL;
                    315:        *size = 0;
                    316:        return;
                    317:     }
                    318:     htmlDocContentDump(buf, cur);
                    319:     *mem = buf->content;
                    320:     *size = buf->use;
                    321:     memset(buf, -1, sizeof(xmlBuffer));
1.4       daniel    322:     xmlFree(buf);
1.1       daniel    323: }
                    324: 
                    325: 
                    326: /**
                    327:  * htmlDocDump:
                    328:  * @f:  the FILE*
                    329:  * @cur:  the document
                    330:  *
                    331:  * Dump an HTML document to an open FILE.
                    332:  */
                    333: void
                    334: htmlDocDump(FILE *f, xmlDocPtr cur) {
                    335:     xmlBufferPtr buf;
                    336: 
                    337:     if (cur == NULL) {
                    338: #ifdef DEBUG_TREE
                    339:         fprintf(stderr, "xmlDocDump : document == NULL\n");
                    340: #endif
                    341:        return;
                    342:     }
                    343:     buf = xmlBufferCreate();
                    344:     if (buf == NULL) return;
                    345:     htmlDocContentDump(buf, cur);
                    346:     xmlBufferDump(f, buf);
                    347:     xmlBufferFree(buf);
                    348: }
                    349: 
                    350: /**
                    351:  * htmlSaveFile:
                    352:  * @filename:  the filename
                    353:  * @cur:  the document
                    354:  *
                    355:  * Dump an HTML document to a file.
                    356:  * 
                    357:  * returns: the number of byte written or -1 in case of failure.
                    358:  */
                    359: int
                    360: htmlSaveFile(const char *filename, xmlDocPtr cur) {
                    361:     xmlBufferPtr buf;
                    362:     FILE *output = NULL;
                    363:     int ret;
                    364: 
                    365:     /* 
                    366:      * save the content to a temp buffer.
                    367:      */
                    368:     buf = xmlBufferCreate();
                    369:     if (buf == NULL) return(0);
                    370:     htmlDocContentDump(buf, cur);
                    371: 
                    372:     output = fopen(filename, "w");
                    373:     if (output == NULL) return(-1);
                    374:     ret = xmlBufferDump(output, buf);
                    375:     fclose(output);
                    376: 
                    377:     xmlBufferFree(buf);
1.6       daniel    378:     return(ret * sizeof(xmlChar));
1.1       daniel    379: }
                    380: 

Webmaster