// microsoft excel spreadsheets exported as .csv // have some unique quallities: // - separator is , or ; It seems that this is dependent of you // are living in the US or somewhere else // - text, even with embedded space is not quoted, except when // the text contains a separator // // This program tries to convert an excel generated .csv into an // orderly .csv: // - text is quoted when space or separator or ' is in the text // - separator is always , // - we assume, that a quoted field does not need further handling, // it is copied asis // // This program reads from stdin, writes to stdout // One optional parameter: the separator that is used, default ; // #include #include #include char sepin,sepout; int maxlen; char* field; int lf; int quoted; void outputfield() { int i, needquote; char c; needquote = 0; // determine if extra quotation is necessary if (! quoted) { for (i=0; i= maxlen-1) { maxlen = 1.5*maxlen + 1; field = realloc(field,maxlen); } field[lf++] = c; } void usage() { printf("Converts Excel-cvs to cvs by changig delimiter and adding quotes.\n"); printf("Usage:\n"); printf("mstocsv [sc] [cs] [ss] [cc]\n"); printf("mstocsv reads from STDIN and writes to STDOUT\n"); printf(" sc: input delimiter ';', output delimiter ',' (default)\n"); printf(" cs: input delimiter ',', output delimiter ';'\n"); printf(" ss: input delimiter ';', output delimiter ';'\n"); printf(" cc: input delimiter ',', output delimiter ','\n"); printf("\n"); printf("Examples:\n"); printf(" mstocvs < in.cvs > out.cvs\n"); printf(" mstocvs ss < in.cvs > out.cvs # delimiter remains ';', but quotation is done\n"); printf(" mstocvs sc < in.cvs | my_cvs_processing_program\n"); } int main(int argc, char*argv[]) { sepin = ';'; sepout = ','; if (argc > 2) { usage(); return 1; } if (argc == 2 && strlen(argv[1]) == 2 ) { switch (argv[1][0]) { case 'c': sepin = ','; break; case 's': sepin = ';'; break; default: usage(); return 1; } switch (argv[1][1]) { case 'c': sepout = ','; break; case 's': sepout = ';'; break; default: usage(); return 1; } } else { if (argc != 1) { usage(); return 1; } } maxlen = 20; field = malloc(maxlen); int state; int c,cc; enum { START, INFIELD, INQFIELD, LFFOUND, CRFOUND, EOFFOUND, }; state = START; while(1) { switch (state) { case START: lf = 0; quoted = 0; c = getchar(); switch (c) { case EOF: state = EOFFOUND; break; case '"': state = INQFIELD; quoted = 1; addtofield(c); break; case '\n': state = LFFOUND; break; case '\r': state = CRFOUND; break; default: if (c == sepin) { putchar(sepout); break; } addtofield(c); state = INFIELD; break; } break; case INFIELD: c = getchar(); switch(c) { case EOF: state = EOFFOUND; break; case '\n': state = LFFOUND; break; case '\r': state = CRFOUND; break; default: if (c == sepin) { outputfield(); putchar(sepout); state = START; break; } addtofield(c); break; } break; case INQFIELD: c = getchar(); switch(c) { case EOF: state = EOFFOUND; break; case '\n': state = LFFOUND; break; case '\r': state = CRFOUND; break; case '"': addtofield('"'); cc = getchar(); if (cc == '"') addtofield('"'); else { if (cc != EOF) ungetc(cc,stdin); state = INFIELD; } break; default: addtofield(c); break; } break; case LFFOUND: outputfield(); putchar('\n'); state = START; break; case CRFOUND: c = getchar(); // assuming it is '\n' outputfield(); putchar('\r'); putchar(c); state = START; break; case EOFFOUND: outputfield(); free(field); return 0; } } }