--- usr/utils/Kbuild | 4 +- usr/utils/wc.c | 208 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 211 insertions(+), 1 deletions(-) create mode 100644 usr/utils/wc.c diff --git a/usr/utils/Kbuild b/usr/utils/Kbuild index a52ea61..7c8ccfb 100644 --- a/usr/utils/Kbuild +++ b/usr/utils/Kbuild @@ -3,7 +3,7 @@ # progs := chroot dd mkdir mkfifo mknod mount pivot_root umount -progs += true false sleep ln nuke minips cat ls losetup +progs += true false sleep ln nuke minips cat ls losetup wc progs += uname halt kill readlink cpio sync dmesg modprobe static-y := $(addprefix static/, $(progs)) @@ -60,6 +60,8 @@ static/losetup-y := losetup.o shared/losetup-y := losetup.o static/modprobe-y := modprobe.o shared/modprobe-y := modprobe.o +static/wc-y := wc.o +shared/wc-y := wc.o # Additionally linked targets always := static/reboot static/poweroff shared/reboot shared/poweroff diff --git a/usr/utils/wc.c b/usr/utils/wc.c new file mode 100644 index 0000000..f5059fc --- /dev/null +++ b/usr/utils/wc.c @@ -0,0 +1,208 @@ +/* vi: set sw=4 ts=4: */ +/* + * wc implementation for busybox + * + * Copyright (C) 2003 Manuel Novoa III + * + * Licensed under GPLv2 or later, see file LICENSE in this tarball for details. + */ + +/* BB_AUDIT SUSv3 _NOT_ compliant -- option -m is not currently supported. */ +/* http://www.opengroup.org/onlinepubs/007904975/utilities/wc.html */ + +/* Mar 16, 2003 Manuel Novoa III (mjn3@codepoet.org) + * + * Rewritten to fix a number of problems and do some size optimizations. + * Problems in the previous busybox implementation (besides bloat) included: + * 1) broken 'wc -c' optimization (read note below) + * 2) broken handling of '-' args + * 3) no checking of ferror on EOF returns + * 4) isprint() wasn't considered when word counting. + * + * TODO: + * + * When locale support is enabled, count multibyte chars in the '-m' case. + * + * NOTES: + * + * The previous busybox wc attempted an optimization using stat for the + * case of counting chars only. I omitted that because it was broken. + * It didn't take into account the possibility of input coming from a + * pipe, or input from a file with file pointer not at the beginning. + * + * To implement such a speed optimization correctly, not only do you + * need the size, but also the file position. Note also that the + * file position may be past the end of file. Consider the example + * (adapted from example in gnu wc.c) + * + * echo hello > /tmp/testfile && + * (dd ibs=1k skip=1 count=0 &> /dev/null; wc -c) < /tmp/testfile + * + * for which 'wc -c' should output '0'. + */ +#include +#include +#include +#include +#undef isspace +#undef isprint +#define isspace(c) ((((c) == ' ') || (((unsigned int)((c) - 9)) <= (13 - 9)))) +#define isprint(c) (((unsigned int)((c) - 0x20)) <= (0x7e - 0x20)) +#define isspace_given_isprint(c) ((c) == ' ') + +#define COUNT_T unsigned long +#define COUNT_FMT "u" +#define optind 1 +FILE *fopen_or_warn_stdin(const char *filename) +{ + FILE *fp = stdin; + + if (filename[0]) { + fp = fopen(filename, "r"); + } + + return fp; +} + +enum { + WC_LINES = 0, + WC_WORDS = 1, + WC_CHARS = 2, + WC_LENGTH = 3 +}; + +int main(int argc, char **argv) +{ + FILE *fp; + const char *s, *arg; + const char *start_fmt = "%9"COUNT_FMT; + const char *fname_fmt = " %s\n"; + COUNT_T *pcounts; + COUNT_T counts[4]; + COUNT_T totals[4]; + unsigned linepos; + unsigned u; + int num_files = 0; + int c; + signed char status = EXIT_SUCCESS; + signed char in_word; + unsigned print_type; + + print_type = getopt(argc, argv, "lwcL"); + + if (print_type == 0) { + print_type = (1 << WC_LINES) | (1 << WC_WORDS) | (1 << WC_CHARS); + } + + argv += optind; + if (!argv[0]) { + *--argv = (char *) "wc"; + fname_fmt = "\n"; + if (!((print_type-1) & print_type)) /* exactly one option? */ + start_fmt = "%"COUNT_FMT; + } + + memset(totals, 0, sizeof(totals)); + + pcounts = counts; + + while ((arg = *argv++) != 0) { + ++num_files; + fp = fopen_or_warn_stdin(arg); + if (!fp) { + status = EXIT_FAILURE; + continue; + } + + memset(counts, 0, sizeof(counts)); + linepos = 0; + in_word = 0; + + do { + /* Our -w doesn't match GNU wc exactly... oh well */ + + ++counts[WC_CHARS]; + c = getc(fp); + if (isprint(c)) { + ++linepos; + if (!isspace_given_isprint(c)) { + in_word = 1; + continue; + } + } else if (((unsigned int)(c - 9)) <= 4) { + /* \t 9 + * \n 10 + * \v 11 + * \f 12 + * \r 13 + */ + if (c == '\t') { + linepos = (linepos | 7) + 1; + } else { /* '\n', '\r', '\f', or '\v' */ + DO_EOF: + if (linepos > counts[WC_LENGTH]) { + counts[WC_LENGTH] = linepos; + } + if (c == '\n') { + ++counts[WC_LINES]; + } + if (c != '\v') { + linepos = 0; + } + } + } else if (c == EOF) { +/* if (ferror(fp)) { + status = EXIT_FAILURE; + } +*/ --counts[WC_CHARS]; + goto DO_EOF; /* Treat an EOF as '\r'. */ + } else { + continue; + } + + counts[WC_WORDS] += in_word; + in_word = 0; + if (c == EOF) { + break; + } + } while (1); + + if (totals[WC_LENGTH] < counts[WC_LENGTH]) { + totals[WC_LENGTH] = counts[WC_LENGTH]; + } + totals[WC_LENGTH] -= counts[WC_LENGTH]; + + if(fp != stdin) + fclose(fp); + + OUTPUT: + /* coreutils wc tries hard to print pretty columns + * (saves results for all files, find max col len etc...) + * we won't try that hard, it will bloat us too much */ + s = start_fmt; + u = 0; + do { + if (print_type & (1 << u)) { + printf(s, pcounts[u]); + s = " %9"COUNT_FMT; /* Ok... restore the leading space. */ + } + totals[u] += pcounts[u]; + } while (++u < 4); + printf(fname_fmt, arg); + } + + /* If more than one file was processed, we want the totals. To save some + * space, we set the pcounts ptr to the totals array. This has the side + * effect of trashing the totals array after outputting it, but that's + * irrelavent since we no longer need it. */ + if (num_files > 1) { + num_files = 0; /* Make sure we don't get here again. */ + arg = "total"; + pcounts = totals; + --argv; + goto OUTPUT; + } + + fflush(stdout); + exit(status); +} -- 1.6.3.3