Annotation of XML/nanohttp.c, revision 1.2
1.1 daniel 1: /*
2: * nanohttp.c: minimalist HTTP implementation to fetch external subsets.
3: *
4: * See Copyright for the status of this software.
5: *
6: * Daniel.Veillard@w3.org
7: */
8:
9: #include <stdio.h>
10: #include <string.h>
11: #include <stdlib.h>
12: #include <unistd.h>
13: #include <sys/socket.h>
14: #include <netinet/in.h>
15: #include <arpa/inet.h>
16: #include <netdb.h>
17: #include <fcntl.h>
18: #include <errno.h>
19: #include <sys/time.h>
20: #include <sys/select.h>
21:
22: #define XML_NANO_HTTP_MAX_REDIR 10
23:
24: #define XML_NANO_HTTP_CHUNK 4096
25:
26: #define XML_NANO_HTTP_CLOSED 0
27: #define XML_NANO_HTTP_WRITE 1
28: #define XML_NANO_HTTP_READ 2
29: #define XML_NANO_HTTP_NONE 4
30:
31: typedef struct xmlNanoHTTPCtxt {
32: char *protocol; /* the protocol name */
33: char *hostname; /* the host name */
34: int port; /* the port */
35: char *path; /* the path within the URL */
36: int fd; /* the file descriptor for the socket */
37: int state; /* WRITE / READ / CLOSED */
38: char *out; /* buffer sent (zero terminated) */
39: char *outptr; /* index within the buffer sent */
40: char *in; /* the receiving buffer */
41: char *content; /* the start of the content */
42: char *inptr; /* the next byte to read from network */
43: char *inrptr; /* the next byte to give back to the client */
44: int inlen; /* len of the input buffer */
45: int last; /* return code for last operation */
46: int returnValue; /* the protocol return value */
47: char *contentType; /* the MIME type for the input */
48: char *location; /* the new URL in case of redirect */
49: } xmlNanoHTTPCtxt, *xmlNanoHTTPCtxtPtr;
50:
51: static void xmlNanoHTTPScanURL(xmlNanoHTTPCtxtPtr ctxt, const char *URL) {
52: const char *cur = URL;
53: char buf[4096];
54: int index = 0;
55: int port = 0;
56:
57: if (ctxt->protocol != NULL) {
58: free(ctxt->protocol);
59: ctxt->protocol = NULL;
60: }
61: if (ctxt->hostname != NULL) {
62: free(ctxt->hostname);
63: ctxt->hostname = NULL;
64: }
65: if (ctxt->path != NULL) {
66: free(ctxt->path);
67: ctxt->path = NULL;
68: }
69: buf[index] = 0;
70: while (*cur != 0) {
71: if ((cur[0] == ':') && (cur[1] == '/') && (cur[2] == '/')) {
72: buf[index] = 0;
73: ctxt->protocol = strdup(buf);
74: index = 0;
75: cur += 3;
76: break;
77: }
78: buf[index++] = *cur++;
79: }
80: if (*cur == 0) return;
81:
82: buf[index] = 0;
83: while (1) {
84: if (cur[0] == ':') {
85: buf[index] = 0;
86: ctxt->hostname = strdup(buf);
87: index = 0;
88: cur += 1;
89: while ((*cur >= '0') && (*cur <= '9')) {
90: port *= 10;
91: port += *cur - '0';
92: cur++;
93: }
94: if (port != 0) ctxt->port = port;
95: while ((cur[0] != '/') && (*cur != 0))
96: cur++;
97: break;
98: }
99: if ((*cur == '/') || (*cur == 0)) {
100: buf[index] = 0;
101: ctxt->hostname = strdup(buf);
102: index = 0;
103: break;
104: }
105: buf[index++] = *cur++;
106: }
107: if (*cur == 0)
108: ctxt->path = strdup("/");
109: else
110: ctxt->path = strdup(cur);
111: }
112:
113: static xmlNanoHTTPCtxtPtr xmlNanoHTTPNewCtxt(const char *URL) {
114: xmlNanoHTTPCtxtPtr ret;
115:
116: ret = (xmlNanoHTTPCtxtPtr) malloc(sizeof(xmlNanoHTTPCtxt));
117: if (ret == NULL) return(NULL);
118:
119: memset(ret, 0, sizeof(xmlNanoHTTPCtxt));
120: ret->port = 80;
121: ret->returnValue = 0;
122:
123: xmlNanoHTTPScanURL(ret, URL);
124:
125: return(ret);
126: }
127:
128: static void xmlNanoHTTPFreeCtxt(xmlNanoHTTPCtxtPtr ctxt) {
129: if (ctxt->hostname != NULL) free(ctxt->hostname);
130: if (ctxt->protocol != NULL) free(ctxt->protocol);
131: if (ctxt->path != NULL) free(ctxt->path);
132: if (ctxt->out != NULL) free(ctxt->out);
133: if (ctxt->in != NULL) free(ctxt->in);
134: if (ctxt->contentType != NULL) free(ctxt->contentType);
135: if (ctxt->location != NULL) free(ctxt->location);
136: ctxt->state = XML_NANO_HTTP_NONE;
137: if (ctxt->fd >= 0) close(ctxt->fd);
138: ctxt->fd = -1;
139: free(ctxt);
140: }
141:
142: static void xmlNanoHTTPSend(xmlNanoHTTPCtxtPtr ctxt) {
143: if (ctxt->state & XML_NANO_HTTP_WRITE)
144: ctxt->last = write(ctxt->fd, ctxt->outptr, strlen(ctxt->outptr));
145: }
146:
147: static int xmlNanoHTTPRecv(xmlNanoHTTPCtxtPtr ctxt) {
148: fd_set rfd;
149: struct timeval tv;
150:
151:
152: while (ctxt->state & XML_NANO_HTTP_READ) {
153: if (ctxt->in == NULL) {
154: ctxt->in = (char *) malloc(65000 * sizeof(char));
155: if (ctxt->in == NULL) {
156: ctxt->last = -1;
157: return(-1);
158: }
159: ctxt->inlen = 65000;
160: ctxt->inptr = ctxt->content = ctxt->inrptr = ctxt->in;
161: }
162: if (ctxt->inrptr > ctxt->in + XML_NANO_HTTP_CHUNK) {
163: int delta = ctxt->inrptr - ctxt->in;
164: int len = ctxt->inptr - ctxt->inrptr;
165:
166: memmove(ctxt->in, ctxt->inrptr, len);
167: ctxt->inrptr -= delta;
168: ctxt->content -= delta;
169: ctxt->inptr -= delta;
170: }
171: if ((ctxt->in + ctxt->inlen) < (ctxt->inptr + XML_NANO_HTTP_CHUNK)) {
172: int d_inptr = ctxt->inptr - ctxt->in;
173: int d_content = ctxt->content - ctxt->in;
174: int d_inrptr = ctxt->inrptr - ctxt->in;
175:
176: ctxt->inlen *= 2;
177: ctxt->in = (char *) realloc(ctxt->in, ctxt->inlen);
178: if (ctxt->in == NULL) {
179: ctxt->last = -1;
180: return(-1);
181: }
182: ctxt->inptr = ctxt->in + d_inptr;
183: ctxt->content = ctxt->in + d_content;
184: ctxt->inrptr = ctxt->in + d_inrptr;
185: }
186: ctxt->last = read(ctxt->fd, ctxt->inptr, XML_NANO_HTTP_CHUNK);
187: if (ctxt->last > 0) {
188: ctxt->inptr += ctxt->last;
189: return(ctxt->last);
190: }
191: if (ctxt->last == 0) {
192: return(0);
193: }
194: #ifdef EWOULDBLOCK
195: if ((ctxt->last == -1) && (errno != EWOULDBLOCK)) {
196: return 0;
197: }
198: #endif
199: tv.tv_sec=10;
200: tv.tv_usec=0;
201: FD_ZERO(&rfd);
202: FD_SET(ctxt->fd, &rfd);
203:
204: if(select(ctxt->fd+1, &rfd, NULL, NULL, &tv)<1)
205: return 0;
206: }
207: return(0);
208: }
209:
210: char *xmlNanoHTTPReadLine(xmlNanoHTTPCtxtPtr ctxt) {
211: static char buf[4096];
212: char *bp=buf;
213:
214: while(bp - buf < 4095) {
215: if(ctxt->inrptr == ctxt->inptr) {
216: if (xmlNanoHTTPRecv(ctxt) == 0) {
217: if (bp == buf)
218: return NULL;
219: else
220: *bp = 0;
221: return buf;
222: }
223: }
224: *bp = *ctxt->inrptr++;
225: if(*bp == '\n') {
226: *bp = 0;
227: return buf;
228: }
229: if(*bp != '\r')
230: bp++;
231: }
232: buf[4095] = 0;
233: return(buf);
234: }
235:
236:
237: static void xmlNanoHTTPScanAnswer(xmlNanoHTTPCtxtPtr ctxt, const char *line) {
238: const char *cur = line;
239:
240: if (line == NULL) return;
241:
242: if (!strncmp(line, "HTTP/", 5)) {
243: int version = 0;
244: int ret = 0;
245:
246: cur += 5;
247: while ((*cur >= '0') && (*cur <= '9')) {
248: version *= 10;
249: version += *cur - '0';
250: cur++;
251: }
252: if (*cur == '.') {
253: cur++;
254: if ((*cur >= '0') && (*cur <= '9')) {
255: version *= 10;
256: version += *cur - '0';
257: cur++;
258: }
259: while ((*cur >= '0') && (*cur <= '9'))
260: cur++;
261: } else
262: version *= 10;
263: if ((*cur != ' ') && (*cur != '\t')) return;
264: while ((*cur == ' ') || (*cur == '\t')) cur++;
265: if ((*cur < '0') || (*cur > '9')) return;
266: while ((*cur >= '0') && (*cur <= '9')) {
267: ret *= 10;
268: ret += *cur - '0';
269: cur++;
270: }
271: if ((*cur != 0) && (*cur != ' ') && (*cur != '\t')) return;
272: ctxt->returnValue = ret;
273: } else if (!strncmp(line, "Content-Type:", 13)) {
274: cur += 13;
275: while ((*cur == ' ') || (*cur == '\t')) cur++;
276: if (ctxt->contentType != NULL)
277: free(ctxt->contentType);
278: ctxt->contentType = strdup(cur);
279: } else if (!strncmp(line, "ContentType:", 12)) {
280: cur += 12;
281: if (ctxt->contentType != NULL) return;
282: while ((*cur == ' ') || (*cur == '\t')) cur++;
283: ctxt->contentType = strdup(cur);
284: } else if (!strncmp(line, "content-type:", 13)) {
285: cur += 13;
286: if (ctxt->contentType != NULL) return;
287: while ((*cur == ' ') || (*cur == '\t')) cur++;
288: ctxt->contentType = strdup(cur);
289: } else if (!strncmp(line, "contenttype:", 12)) {
290: cur += 12;
291: if (ctxt->contentType != NULL) return;
292: while ((*cur == ' ') || (*cur == '\t')) cur++;
293: ctxt->contentType = strdup(cur);
294: } else if (!strncmp(line, "Location:", 9)) {
295: cur += 9;
296: while ((*cur == ' ') || (*cur == '\t')) cur++;
297: if (ctxt->location != NULL)
298: free(ctxt->location);
299: ctxt->location = strdup(cur);
300: } else if (!strncmp(line, "location:", 9)) {
301: cur += 9;
302: if (ctxt->location != NULL) return;
303: while ((*cur == ' ') || (*cur == '\t')) cur++;
304: ctxt->location = strdup(cur);
305: }
306: }
307:
308: static int xmlNanoHTTPConnectAttempt(struct in_addr ia, int port)
309: {
310: int s=socket(PF_INET, SOCK_STREAM, IPPROTO_TCP);
311: struct sockaddr_in sin;
312: fd_set wfd;
313: struct timeval tv;
1.2 ! daniel 314: int status;
1.1 daniel 315:
316: if(s==-1) {
317: perror("socket");
318: return(-1);
319: }
320:
1.2 ! daniel 321: #ifdef _WINSOCKAPI_
! 322: {
! 323: long levents = FD_READ | FD_WRITE | FD_ACCEPT |
! 324: FD_CONNECT | FD_CLOSE ;
! 325: int rv = 0 ;
! 326: u_long one = 1;
! 327:
! 328: status = ioctlsocket(sockfd, FIONBIO, &one) == SOCKET_ERROR ? -1 : 0;
! 329: }
! 330: #else /* _WINSOCKAPI_ */
! 331: #if defined(VMS)
! 332: {
! 333: int enable = 1;
! 334: status = IOCTL(sockfd, FIONBIO, &enable);
! 335: }
! 336: #else /* VMS */
! 337: if((status = fcntl(sockfd, F_GETFL, 0)) != -1) {
! 338: #ifdef O_NONBLOCK
! 339: status |= O_NONBLOCK;
! 340: #else /* O_NONBLOCK */
! 341: #ifdef F_NDELAY
! 342: status |= F_NDELAY;
! 343: #endif /* F_NDELAY */
! 344: #endif /* !O_NONBLOCK */
! 345: status = fcntl(sockfd, F_SETFL, status);
! 346: }
! 347: if(status < 0) {
1.1 daniel 348: perror("nonblocking");
349: close(s);
350: return(-1);
351: }
1.2 ! daniel 352: #endif /* !VMS */
! 353: #endif /* !_WINSOCKAPI_ */
! 354:
1.1 daniel 355:
356: sin.sin_family = AF_INET;
357: sin.sin_addr = ia;
358: sin.sin_port = htons(port);
359:
360: if((connect(s, (struct sockaddr *)&sin, sizeof(sin))==-1) &&
361: (errno != EINPROGRESS)) {
362: perror("connect");
363: close(s);
364: return(-1);
365: }
366:
367: tv.tv_sec = 60; /* We use 60 second timeouts for now */
368: tv.tv_usec = 0;
369:
370: FD_ZERO(&wfd);
371: FD_SET(s, &wfd);
372:
373: switch(select(s+1, NULL, &wfd, NULL, &tv))
374: {
375: case 0:
376: /* Time out */
377: close(s);
378: return(-1);
379: case -1:
380: /* Ermm.. ?? */
381: perror("select");
382: close(s);
383: return(-1);
384: }
385:
386: return s;
387: }
388:
389: int xmlNanoHTTPConnectHost(const char *host, int port)
390: {
391: struct hostent *h;
392: int i;
393: int s;
394:
395: h=gethostbyname(host);
396: if(h==NULL)
397: {
398: fprintf(stderr,"unable to resolve '%s'.\n", host);
399: return(-1);
400: }
401:
402:
403: for(i=0; h->h_addr_list[i]; i++)
404: {
405: struct in_addr ia;
406: memcpy(&ia, h->h_addr_list[i],4);
407: s = xmlNanoHTTPConnectAttempt(ia, port);
408: if(s != -1)
409: return s;
410: }
411: fprintf(stderr, "unable to connect to '%s'.\n", host);
412: return(-1);
413: }
414:
415: int xmlNanoHTTPOldFetch(const char *URL, const char *filename,
416: char **contentType) {
417: xmlNanoHTTPCtxtPtr ctxt;
418: char buf[4096];
419: int ret;
420: int fd;
421: char *p;
422: int head;
423: int nbRedirects = 0;
424: char *redirURL = NULL;
425:
426: retry:
427: if (redirURL == NULL)
428: ctxt = xmlNanoHTTPNewCtxt(URL);
429: else
430: ctxt = xmlNanoHTTPNewCtxt(redirURL);
431:
432: if ((ctxt->protocol == NULL) || (strcmp(ctxt->protocol, "http"))) {
433: xmlNanoHTTPFreeCtxt(ctxt);
434: if (redirURL != NULL) free(redirURL);
435: return(-1);
436: }
437: if (ctxt->hostname == NULL) {
438: xmlNanoHTTPFreeCtxt(ctxt);
439: if (redirURL != NULL) free(redirURL);
440: return(-1);
441: }
442: ret = xmlNanoHTTPConnectHost(ctxt->hostname, ctxt->port);
443: if (ret < 0) {
444: xmlNanoHTTPFreeCtxt(ctxt);
445: if (redirURL != NULL) free(redirURL);
446: return(-1);
447: }
448: ctxt->fd = ret;
449: snprintf(buf, sizeof(buf),"GET %s HTTP/1.0\r\nhost: %s\r\n\r\n",
450: ctxt->path, ctxt->hostname);
451: ctxt->outptr = ctxt->out = strdup(buf);
452: ctxt->state = XML_NANO_HTTP_WRITE;
453: xmlNanoHTTPSend(ctxt);
454: ctxt->state = XML_NANO_HTTP_READ;
455: head = 1;
456:
457: while ((p = xmlNanoHTTPReadLine(ctxt)) != NULL) {
458: if (head && (*p == 0)) {
459: head = 0;
460: ctxt->content = ctxt->inrptr;
461: break;
462: }
463: xmlNanoHTTPScanAnswer(ctxt, p);
464: if (p != NULL) printf("%s\n", p);
465: }
466: while (xmlNanoHTTPRecv(ctxt)) ;
467:
468: if (!strcmp(filename, "-"))
469: fd = 0;
470: else {
471: fd = open(filename, O_CREAT | O_WRONLY);
472: if (fd < 0) {
473: xmlNanoHTTPFreeCtxt(ctxt);
474: if (redirURL != NULL) free(redirURL);
475: return(-1);
476: }
477: }
478:
479: printf("Code %d, content-type '%s'\n\n",
480: ctxt->returnValue, ctxt->contentType);
481: if ((ctxt->location != NULL) && (ctxt->returnValue >= 300) &&
482: (ctxt->returnValue < 400)) {
483: printf("Redirect to: %s\n", ctxt->location);
484: if (nbRedirects < XML_NANO_HTTP_MAX_REDIR) {
485: nbRedirects++;
486: if (redirURL != NULL) free(redirURL);
487: redirURL = strdup(ctxt->location);
488: xmlNanoHTTPFreeCtxt(ctxt);
489: goto retry;
490: }
491: }
492:
493: write(fd, ctxt->content, ctxt->inptr - ctxt->content);
494: xmlNanoHTTPFreeCtxt(ctxt);
495: if (redirURL != NULL) free(redirURL);
496: return(0);
497: }
498:
499: void *
500: xmlNanoHTTPOpen(const char *URL, char **contentType) {
501: xmlNanoHTTPCtxtPtr ctxt;
502: char buf[4096];
503: int ret;
504: char *p;
505: int head;
506: int nbRedirects = 0;
507: char *redirURL = NULL;
508:
509: retry:
510: if (redirURL == NULL)
511: ctxt = xmlNanoHTTPNewCtxt(URL);
512: else {
513: ctxt = xmlNanoHTTPNewCtxt(redirURL);
514: free(redirURL);
515: redirURL = NULL;
516: }
517:
518: if ((ctxt->protocol == NULL) || (strcmp(ctxt->protocol, "http"))) {
519: xmlNanoHTTPFreeCtxt(ctxt);
520: if (redirURL != NULL) free(redirURL);
521: return(NULL);
522: }
523: if (ctxt->hostname == NULL) {
524: xmlNanoHTTPFreeCtxt(ctxt);
525: return(NULL);
526: }
527: ret = xmlNanoHTTPConnectHost(ctxt->hostname, ctxt->port);
528: if (ret < 0) {
529: xmlNanoHTTPFreeCtxt(ctxt);
530: return(NULL);
531: }
532: ctxt->fd = ret;
533: snprintf(buf, sizeof(buf),"GET %s HTTP/1.0\r\nhost: %s\r\n\r\n",
534: ctxt->path, ctxt->hostname);
535: ctxt->outptr = ctxt->out = strdup(buf);
536: ctxt->state = XML_NANO_HTTP_WRITE;
537: xmlNanoHTTPSend(ctxt);
538: ctxt->state = XML_NANO_HTTP_READ;
539: head = 1;
540:
541: while ((p = xmlNanoHTTPReadLine(ctxt)) != NULL) {
542: if (head && (*p == 0)) {
543: head = 0;
544: ctxt->content = ctxt->inrptr;
545: break;
546: }
547: xmlNanoHTTPScanAnswer(ctxt, p);
548:
549: if (p != NULL) printf("%s\n", p);
550: }
551:
552: if ((ctxt->location != NULL) && (ctxt->returnValue >= 300) &&
553: (ctxt->returnValue < 400)) {
554: printf("Redirect to: %s\n", ctxt->location);
555: while (xmlNanoHTTPRecv(ctxt)) ;
556: if (nbRedirects < XML_NANO_HTTP_MAX_REDIR) {
557: nbRedirects++;
558: redirURL = strdup(ctxt->location);
559: xmlNanoHTTPFreeCtxt(ctxt);
560: goto retry;
561: }
562: xmlNanoHTTPFreeCtxt(ctxt);
563: return(NULL);
564:
565: }
566:
567: printf("Code %d, content-type '%s'\n\n",
568: ctxt->returnValue, ctxt->contentType);
569:
570: return((void *) ctxt);
571: }
572:
573: int
574: xmlNanoHTTPRead(void *ctx, void *dest, int len) {
575: xmlNanoHTTPCtxtPtr ctxt = (xmlNanoHTTPCtxtPtr) ctx;
576:
577: if (ctx == NULL) return(-1);
578: if (dest == NULL) return(-1);
579: if (len <= 0) return(0);
580:
581: while (ctxt->inptr - ctxt->inrptr < len) {
582: if (xmlNanoHTTPRecv(ctxt) == 0) break;
583: }
584: if (ctxt->inptr - ctxt->inrptr < len)
585: len = ctxt->inptr - ctxt->inrptr;
586: memcpy(dest, ctxt->inrptr, len);
587: ctxt->inrptr += len;
588: return(len);
589: }
590:
591: void
592: xmlNanoHTTPClose(void *ctx) {
593: xmlNanoHTTPCtxtPtr ctxt = (xmlNanoHTTPCtxtPtr) ctx;
594:
595: if (ctx == NULL) return;
596:
597: xmlNanoHTTPFreeCtxt(ctxt);
598: }
599:
600: int xmlNanoHTTPFetch(const char *URL, const char *filename,
601: char **contentType) {
602: void *ctxt;
603: char buf[4096];
604: int fd;
605: int len;
606:
607: ctxt = xmlNanoHTTPOpen(URL, contentType);
608: if (ctxt == NULL) return(-1);
609:
610: if (!strcmp(filename, "-"))
611: fd = 0;
612: else {
613: fd = open(filename, O_CREAT | O_WRONLY);
614: if (fd < 0) {
615: xmlNanoHTTPClose(ctxt);
616: return(-1);
617: }
618: }
619:
620: while ((len = xmlNanoHTTPRead(ctxt, buf, sizeof(buf))) > 0) {
621: write(fd, buf, len);
622: }
623:
624: xmlNanoHTTPClose(ctxt);
625: return(0);
626: }
627:
628: #ifdef STANDALONE
629: int main(int argc, char **argv) {
630: char *contentType = NULL;
631:
632: if (argv[1] != NULL) {
633: if (argv[2] != NULL)
634: xmlNanoHTTPFetch(argv[1], argv[2], &contentType);
635: else
636: xmlNanoHTTPFetch(argv[1], "-", &contentType);
637: } else {
638: printf("%s: minimal HTTP GET implementation\n", argv[0]);
639: printf("\tusage %s [ URL [ filename ] ]\n", argv[0]);
640: }
641: return(0);
642: }
643: #endif /* STANDALONE */
Webmaster