/* libutf8/src/tests/decode.c
 *
 *  (c)2006-2009, Laurence Withers, <l@lwithers.me.uk>.
 *  Released under the GNU GPLv3. See file COPYING or
 *  http://www.gnu.org/copyleft/gpl.html for details.
*/

#include "utf8.h"

#include <stdio.h>
#include <string.h>



void
writeout(const wchar_t* x, int amt)
{
    int dummy;
    dummy = fwrite(x, sizeof(wchar_t), amt, stdout);
}



enum
utf8_decode_error_action error_callback(const struct utf8_decode_state* ctx,
    enum utf8_decode_error error, wchar_t* newch)
{
    fprintf(stderr, "Line %d, col %d (char %d, byte %d): ",
            ctx->line + 1, ctx->col + 1, ctx->char_offset, ctx->byte_offset);
    switch(error) {
    case utf8_decode_error_lone_cchar:
        fprintf(stderr, "a lone continuation char was encountered.\n");
        break;

    case utf8_decode_error_not_cchar:
        fprintf(stderr, "a continuation char was expected, but not encountered.\n");
        break;

    case utf8_decode_error_not_schar:
        fprintf(stderr, "an invalid character was encountered (not start char).\n");
        break;

    case utf8_decode_error_overlong:
        fprintf(stderr, "an overlong character sequence was encountered.\n");
        break;

    case utf8_decode_error_illegal_cp:
        fprintf(stderr, "an illegal code point was encountered.\n");
        break;
    }

    *newch = 0xFFFD;
    return utf8_decode_error_action_replace;
}



int
main(int argc, char* argv[])
{
    char inbuf[1024];
    wchar_t outbuf[1024];
    struct utf8_decode_state ctx;

    if(argc == 2 && !strcmp(argv[1], "--print-summary")) {
        printf("Decodes UTF-8 on stdin to UCS-4 on stdout.\n");
        return 0;
    }

    if(argc != 1) {
        fprintf(stderr, "No parameters expected. This program decodes UTF-8 presented on stdin\n"
                "and transforms it to UCS-4 on stdout.\n");
        return 1;
    }

    /* set up ctx structure */
    memset(&ctx, 0, sizeof(ctx));
    ctx.wr = outbuf;
    ctx.wr_size = sizeof(outbuf) / sizeof(wchar_t);
    ctx.error_callback = error_callback;

    /* loop over input */
    while(!feof(stdin)) {
        /* read input */
        ctx.rd_remain = fread(inbuf, 1, sizeof(inbuf), stdin);
        ctx.rd = inbuf;

        /* decode it */
        while(ctx.rd_remain) {
            if(!utf8_decoder(&ctx)) {
                perror("utf8_decoder");
                fprintf(stderr, "(at line %d, col %d, char %d, byte %d)\n",
                        ctx.line + 1, ctx.col + 1, ctx.char_offset, ctx.byte_offset);
                return 1;
            }

            /* write output */
            writeout(outbuf, ctx.written);
        }
    }

    if(!ctx.complete) {
        fprintf(stderr, "Input did not end on a character boundary.\n");
    }

    return 0;
}


/* options for text editors
kate: replace-trailing-space-save true; space-indent true; tab-width 4;
vim: expandtab:ts=4:sw=4:syntax=c.doxygen
*/
