/*
 * Copyright (C) 2019-2024 Alexander Borisov
 *
 * Author: Alexander Borisov <borisov@lexbor.com>
 */

#include "lexbor/encoding/encoding.h"
#include "lexbor/encoding/multi.h"
#include "lexbor/encoding/range.h"


#define append_to_file(fc, data, len, codepoint)                               \
    do {                                                                       \
        lxb_char_t *data_to = data;                                            \
                                                                               \
        for (size_t j = 0; j < len; j++) {                                     \
            fprintf(fc, "\\x%02X", (unsigned) data_to[j]);                     \
        }                                                                      \
                                                                               \
        fprintf(fc, " 0x%04X\n", (unsigned) codepoint);                        \
    }                                                                          \
    while (0)


lxb_inline lxb_codepoint_t
decode_gb18030_range(uint32_t index)
{
    size_t mid, left, right;
    const lxb_encoding_range_index_t *range;

    /*
     * Pointer greater than 39419 and less than 189000,
     * or pointer is greater than 1237575
     */
    if ((unsigned) (index - 39419) < (189000 - 39419)
        || index > 1237575)
    {
        return LXB_STATUS_ERROR;
    }

    if (index == 7457) {
        return 0xE7C7;
    }

    left = 0;
    right = LXB_ENCODING_RANGE_INDEX_GB18030_SIZE;
    range = lxb_encoding_range_index_gb18030;

    /* Some compilers say about uninitialized mid */
    mid = 0;

    while (left < right) {
        mid = left + (right - left) / 2;

        if (range[mid].index < index) {
            left = mid + 1;

            if (left < right && range[ left ].index > index) {
                break;
            }
        }
        else if (range[mid].index > index) {
            right = mid - 1;

            if (right > 0 && range[right].index <= index) {
                mid = right;
                break;
            }
        }
        else {
            break;
        }
    }

    return range[mid].codepoint + index - range[mid].index;
}

int main(int argc, const char * argv[])
{
    size_t size;
    lxb_char_t data[8];
    lxb_status_t status;
    lxb_codepoint_t cp;
    lxb_encoding_encode_t ctx;
    const lxb_codepoint_t *p;
    const lxb_encoding_data_t *enc_data;

    const char *filepath = "./gb18030_map_decode.txt";

    enc_data = lxb_encoding_data(LXB_ENCODING_GB18030);

    FILE *fc = fopen(filepath, "w");
    if (fc == NULL) {
        printf("Failed to opening file: %s\n", filepath);
        exit(EXIT_FAILURE);
    }

    fprintf(fc, "#\n"
            "# Copyright (C) 2019-2024 Alexander Borisov\n"
            "#\n"
            "# Author: Alexander Borisov <borisov@lexbor.com>\n"
            "#\n\n");

    fprintf(fc, "#\n"
            "# This file generated by the program\n"
            "# \"utils/lexbor/encoding/gb18030_map_decode.c\"\n"
            "#\n\n");

    /* Single index */
    size = sizeof(lxb_encoding_multi_gb18030_map) / sizeof(lxb_codepoint_t);

    for (size_t i = 0; i < size; i++) {
        p = &lxb_encoding_multi_gb18030_map[i];
        cp = *p;

        if (cp == LXB_ENCODING_ERROR_CODEPOINT) {
            continue;
        }

        lxb_encoding_encode_init(&ctx, enc_data, data, sizeof(data));

        status = enc_data->encode(&ctx, &p, (p + 1));
        if (status != LXB_STATUS_OK) {
            printf("Failed to encoding: "LEXBOR_FORMAT_Z"\n", i);
            return EXIT_FAILURE;
        }

        append_to_file(fc, ctx.buffer_out, ctx.buffer_used, cp);
    }

    /* Range index */
    uint32_t first, second, third, last, pointer;

    for (first = 0x81; first <= 0x84; first++) {
        for (second = 0x30; second <= 0x31; second++) {
            for (third = 0x81; third <= 0xB0; third++) {
                for (last = 0x30; last <= 0x39; last++) {

                    pointer = ((first  - 0x81) * (10 * 126 * 10))
                    + ((second - 0x30) * (10 * 126))
                    + ((third  - 0x81) * 10) + last - 0x30;

                    cp = decode_gb18030_range(pointer);
                    if (cp == LXB_STATUS_ERROR) {
                        continue;
                    }

                    p = &cp;

                    lxb_encoding_encode_init(&ctx, enc_data,
                                             data, sizeof(data));

                    status = enc_data->encode(&ctx, &p, (p + 1));
                    if (status != LXB_STATUS_OK) {
                        printf("Failed to encoding: %u\n", cp);
                        return EXIT_FAILURE;
                    }

                    append_to_file(fc, ctx.buffer_out, ctx.buffer_used, cp);
                }
            }
        }
    }

    fprintf(fc, "\n# END\n");

    fclose(fc);

    return EXIT_SUCCESS;
}
