|
|
1.1 daniel 1: /*
2: * testHTML.c : a small tester program for HTML input.
3: *
4: * See Copyright for the status of this software.
5: *
6: * Daniel.Veillard@w3.org
7: */
8:
9: #ifdef WIN32
10: #define HAVE_FCNTL_H
11: #include <io.h>
12: #else
13: #include <config.h>
14: #endif
15: #include <sys/types.h>
16: #ifdef HAVE_SYS_STAT_H
17: #include <sys/stat.h>
18: #endif
19: #ifdef HAVE_FCNTL_H
20: #include <fcntl.h>
21: #endif
22: #ifdef HAVE_UNISTD_H
23: #include <unistd.h>
24: #endif
25: #include <stdio.h>
26: #include <string.h>
27: #include <stdlib.h>
28:
29: #include "HTMLparser.h"
30: #include "HTMLtree.h"
31: #include "debugXML.h"
32:
33: static int debug = 0;
34: static int copy = 0;
35:
36: /*
37: * Note: this is perfectly clean HTML, i.e. not a useful test.
38: static CHAR buffer[] =
39: "<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.0 Transitional//EN\"\n\
40: \"http://www.w3.org/TR/REC-html40/loose.dtd\">\n\
41: <html>\n\
42: <head>\n\
43: <title>This service is temporary down</title>\n\
44: </head>\n\
45: \n\
46: <body bgcolor=\"#FFFFFF\">\n\
47: <h1 align=\"center\">Sorry, this service is temporary down</h1>\n\
48: We are doing our best to get it back on-line,\n\
49: \n\
50: <p>The W3C system administrators</p>\n\
51: </body>\n\
52: </html>\n\
53: ";
1.2 ! daniel 54: */
1.1 daniel 55:
56: /************************************************************************
57: * *
58: * Debug *
59: * *
60: ************************************************************************/
61:
62: void parseAndPrintFile(char *filename) {
63: htmlDocPtr doc, tmp;
64:
65: /*
66: * build an HTML tree from a string;
67: */
68: doc = htmlParseFile(filename, NULL);
69:
70: /*
71: * test intermediate copy if needed.
72: */
73: if (copy) {
74: tmp = doc;
75: doc = xmlCopyDoc(doc, 1);
76: xmlFreeDoc(tmp);
77: }
78:
79: /*
80: * print it.
81: */
82: if (!debug)
83: htmlDocDump(stdout, doc);
84: else
85: xmlDebugDumpDocument(stdout, doc);
86:
87: /*
88: * free it.
89: */
90: xmlFreeDoc(doc);
91: }
92:
93: void parseAndPrintBuffer(CHAR *buf) {
94: htmlDocPtr doc, tmp;
95:
96: /*
97: * build an HTML tree from a string;
98: */
99: doc = htmlParseDoc(buf, NULL);
100:
101: /*
102: * test intermediate copy if needed.
103: */
104: if (copy) {
105: tmp = doc;
106: doc = xmlCopyDoc(doc, 1);
107: xmlFreeDoc(tmp);
108: }
109:
110: /*
111: * print it.
112: */
113: if (!debug)
114: htmlDocDump(stdout, doc);
115: else
116: xmlDebugDumpDocument(stdout, doc);
117:
118: /*
119: * free it.
120: */
121: xmlFreeDoc(doc);
122: }
123:
124: int main(int argc, char **argv) {
125: int i;
126: int files = 0;
127:
128: for (i = 1; i < argc ; i++) {
129: if ((!strcmp(argv[i], "-debug")) || (!strcmp(argv[i], "--debug")))
130: debug++;
131: else if ((!strcmp(argv[i], "-copy")) || (!strcmp(argv[i], "--copy")))
132: copy++;
133: }
134: for (i = 1; i < argc ; i++) {
135: if (argv[i][0] != '-') {
136: parseAndPrintFile(argv[i]);
137: files ++;
138: }
139: }
140: if (files == 0) {
141: printf("Usage : %s [--debug] [--copy] HTMLfiles ...\n",
142: argv[0]);
143: printf("\tParse the HTML files and output the result of the parsing\n");
144: printf("\t--debug : dump a debug tree of the in-memory document\n");
145: printf("\t--copy : used to test the internal copy implementation\n");
146: }
147:
148: return(0);
149: }