// © 2016 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
/*
**********************************************************************
*   Copyright (C) 2009-2015, International Business Machines
*   Corporation and others.  All Rights Reserved.
**********************************************************************
*/

#include <utility>

#include "unicode/bytestream.h"
#include "unicode/utypes.h"
#include "unicode/ures.h"
#include "unicode/localpointer.h"
#include "unicode/putil.h"
#include "unicode/uenum.h"
#include "unicode/uloc.h"
#include "bytesinkutil.h"
#include "charstr.h"
#include "cmemory.h"
#include "cstring.h"
#include "putilimp.h"
#include "uinvchar.h"
#include "ulocimp.h"
#include "uassert.h"

namespace {

/* struct holding a single variant */
typedef struct VariantListEntry : public icu::UMemory {
    const char              *variant;
    struct VariantListEntry *next;
} VariantListEntry;

/* struct holding a single attribute value */
struct AttributeListEntry : public icu::UMemory {
    const char              *attribute;
    struct AttributeListEntry *next;
};

/* struct holding a single extension */
struct ExtensionListEntry : public icu::UMemory {
    const char                  *key;
    const char                  *value;
    struct ExtensionListEntry   *next;
};

#define MAXEXTLANG 3
typedef struct ULanguageTag {
    char                *buf;   /* holding parsed subtags */
    const char          *language;
    const char          *extlang[MAXEXTLANG];
    const char          *script;
    const char          *region;
    VariantListEntry    *variants;
    ExtensionListEntry  *extensions;
    const char          *privateuse;
    const char          *legacy;
} ULanguageTag;

#define MINLEN 2
#define SEP '-'
#define PRIVATEUSE 'x'
#define LDMLEXT 'u'

#define LOCALE_SEP '_'
#define LOCALE_EXT_SEP '@'
#define LOCALE_KEYWORD_SEP ';'
#define LOCALE_KEY_TYPE_SEP '='

constexpr auto ISALPHA = uprv_isASCIILetter;
inline bool ISNUMERIC(char c) { return c >= '0' && c <= '9'; }

constexpr char EMPTY[] = "";
constexpr char LANG_UND[] = "und";
constexpr char PRIVATEUSE_KEY[] = "x";
constexpr char _POSIX[] = "_POSIX";
constexpr char POSIX_KEY[] = "va";
constexpr char POSIX_VALUE[] = "posix";
constexpr char LOCALE_ATTRIBUTE_KEY[] = "attribute";
constexpr char PRIVUSE_VARIANT_PREFIX[] = "lvariant";
constexpr char LOCALE_TYPE_YES[] = "yes";

#define LANG_UND_LEN 3

/*
 Updated on 2018-09-12 from
 https://www.iana.org/assignments/language-subtag-registry/language-subtag-registry .

 This table has 2 parts. The part for
 legacy language tags (marked as “Type: grandfathered” in BCP 47)
 is generated by the following scripts from the IANA language tag registry.

 curl  https://www.iana.org/assignments/language-subtag-registry/language-subtag-registry |\
 egrep -A 7 'Type: grandfathered' | \
 egrep 'Tag|Prefe' | grep -B1 'Preferred' | grep -v '^--' | \
 awk -n '/Tag/ {printf("    \"%s\", ", $2);} /Preferred/ {printf("\"%s\",\n", $2);}' |\
 tr 'A-Z' 'a-z'


 The 2nd part is made of five ICU-specific entries. They're kept for
 the backward compatibility for now, even though there are no preferred
 values. They may have to be removed for the strict BCP 47 compliance.

*/
constexpr const char* LEGACY[] = {
/*  legacy          preferred */
    "art-lojban",   "jbo",
    "en-gb-oed",    "en-gb-oxendict",
    "i-ami",        "ami",
    "i-bnn",        "bnn",
    "i-hak",        "hak",
    "i-klingon",    "tlh",
    "i-lux",        "lb",
    "i-navajo",     "nv",
    "i-pwn",        "pwn",
    "i-tao",        "tao",
    "i-tay",        "tay",
    "i-tsu",        "tsu",
    "no-bok",       "nb",
    "no-nyn",       "nn",
    "sgn-be-fr",    "sfb",
    "sgn-be-nl",    "vgt",
    "sgn-ch-de",    "sgg",
    "zh-guoyu",     "cmn",
    "zh-hakka",     "hak",
    "zh-min-nan",   "nan",
    "zh-xiang",     "hsn",

    // Legacy tags with no preferred value in the IANA
    // registry. Kept for now for the backward compatibility
    // because ICU has mapped them this way.
    "i-default",    "en-x-i-default",
    "i-enochian",   "und-x-i-enochian",
    "i-mingo",      "see-x-i-mingo",
    "zh-min",       "nan-x-zh-min",
};

/*
 Updated on 2018-09-12 from
 https://www.iana.org/assignments/language-subtag-registry/language-subtag-registry .

 The table lists redundant tags with preferred value in the IANA language tag registry.
 It's generated with the following command:

 curl  https://www.iana.org/assignments/language-subtag-registry/language-subtag-registry |\
 grep 'Type: redundant' -A 5 | egrep '^(Tag:|Prefer)' | grep -B1 'Preferred' | \
 awk -n '/Tag/ {printf("    \"%s\",       ", $2);} /Preferred/ {printf("\"%s\",\n", $2);}' | \
 tr 'A-Z' 'a-z'

 In addition, ja-latn-hepburn-heploc is mapped to ja-latn-alalc97 because
 a variant tag 'hepburn-heploc' has the preferred subtag, 'alaic97'.
*/

constexpr const char* REDUNDANT[] = {
//  redundant       preferred
    "sgn-br",       "bzs",
    "sgn-co",       "csn",
    "sgn-de",       "gsg",
    "sgn-dk",       "dsl",
    "sgn-es",       "ssp",
    "sgn-fr",       "fsl",
    "sgn-gb",       "bfi",
    "sgn-gr",       "gss",
    "sgn-ie",       "isg",
    "sgn-it",       "ise",
    "sgn-jp",       "jsl",
    "sgn-mx",       "mfs",
    "sgn-ni",       "ncs",
    "sgn-nl",       "dse",
    "sgn-no",       "nsl",
    "sgn-pt",       "psr",
    "sgn-se",       "swl",
    "sgn-us",       "ase",
    "sgn-za",       "sfs",
    "zh-cmn",       "cmn",
    "zh-cmn-hans",  "cmn-hans",
    "zh-cmn-hant",  "cmn-hant",
    "zh-gan",       "gan",
    "zh-wuu",       "wuu",
    "zh-yue",       "yue",

    // variant tag with preferred value
    "ja-latn-hepburn-heploc", "ja-latn-alalc97",
};

/*
  Updated on 2018-09-12 from
  https://www.iana.org/assignments/language-subtag-registry/language-subtag-registry .

  grep 'Type: language' -A 7 language-subtag-registry  | egrep 'Subtag|Prefe' | \
  grep -B1 'Preferred' | grep -v '^--' | \
  awk -n '/Subtag/ {printf("    \"%s\",       ", $2);} /Preferred/ {printf("\"%s\",\n", $2);}'

  Make sure that 2-letter language subtags come before 3-letter subtags.
*/
constexpr char DEPRECATEDLANGS[][4] = {
/*  deprecated  new */
    "in",       "id",
    "iw",       "he",
    "ji",       "yi",
    "jw",       "jv",
    "mo",       "ro",
    "aam",       "aas",
    "adp",       "dz",
    "aue",       "ktz",
    "ayx",       "nun",
    "bgm",       "bcg",
    "bjd",       "drl",
    "ccq",       "rki",
    "cjr",       "mom",
    "cka",       "cmr",
    "cmk",       "xch",
    "coy",       "pij",
    "cqu",       "quh",
    "drh",       "khk",
    "drw",       "prs",
    "gav",       "dev",
    "gfx",       "vaj",
    "ggn",       "gvr",
    "gti",       "nyc",
    "guv",       "duz",
    "hrr",       "jal",
    "ibi",       "opa",
    "ilw",       "gal",
    "jeg",       "oyb",
    "kgc",       "tdf",
    "kgh",       "kml",
    "koj",       "kwv",
    "krm",       "bmf",
    "ktr",       "dtp",
    "kvs",       "gdj",
    "kwq",       "yam",
    "kxe",       "tvd",
    "kzj",       "dtp",
    "kzt",       "dtp",
    "lii",       "raq",
    "lmm",       "rmx",
    "meg",       "cir",
    "mst",       "mry",
    "mwj",       "vaj",
    "myt",       "mry",
    "nad",       "xny",
    "ncp",       "kdz",
    "nnx",       "ngv",
    "nts",       "pij",
    "oun",       "vaj",
    "pcr",       "adx",
    "pmc",       "huw",
    "pmu",       "phr",
    "ppa",       "bfy",
    "ppr",       "lcq",
    "pry",       "prt",
    "puz",       "pub",
    "sca",       "hle",
    "skk",       "oyb",
    "tdu",       "dtp",
    "thc",       "tpo",
    "thx",       "oyb",
    "tie",       "ras",
    "tkk",       "twm",
    "tlw",       "weo",
    "tmp",       "tyj",
    "tne",       "kak",
    "tnf",       "prs",
    "tsf",       "taj",
    "uok",       "ema",
    "xba",       "cax",
    "xia",       "acn",
    "xkh",       "waw",
    "xsj",       "suj",
    "ybd",       "rki",
    "yma",       "lrr",
    "ymt",       "mtm",
    "yos",       "zom",
    "yuu",       "yug",
};

/*
  Updated on 2018-04-24 from

  curl  https://www.iana.org/assignments/language-subtag-registry/language-subtag-registry | \
  grep 'Type: region' -A 7 | egrep 'Subtag|Prefe' | \
  grep -B1 'Preferred' | \
  awk -n '/Subtag/ {printf("    \"%s\",       ", $2);} /Preferred/ {printf("\"%s\",\n", $2);}'
*/
constexpr char DEPRECATEDREGIONS[][3] = {
/*  deprecated  new */
    "BU",       "MM",
    "DD",       "DE",
    "FX",       "FR",
    "TP",       "TL",
    "YD",       "YE",
    "ZR",       "CD",
};

/*
* -------------------------------------------------
*
* These ultag_ functions may be exposed as APIs later
*
* -------------------------------------------------
*/

ULanguageTag*
ultag_parse(const char* tag, int32_t tagLen, int32_t* parsedLen, UErrorCode& status);

void
ultag_close(ULanguageTag* langtag);

const char*
ultag_getLanguage(const ULanguageTag* langtag);

#if 0
const char*
ultag_getJDKLanguage(const ULanguageTag* langtag);
#endif

const char*
ultag_getExtlang(const ULanguageTag* langtag, int32_t idx);

int32_t
ultag_getExtlangSize(const ULanguageTag* langtag);

const char*
ultag_getScript(const ULanguageTag* langtag);

const char*
ultag_getRegion(const ULanguageTag* langtag);

const char*
ultag_getVariant(const ULanguageTag* langtag, int32_t idx);

int32_t
ultag_getVariantsSize(const ULanguageTag* langtag);

const char*
ultag_getExtensionKey(const ULanguageTag* langtag, int32_t idx);

const char*
ultag_getExtensionValue(const ULanguageTag* langtag, int32_t idx);

int32_t
ultag_getExtensionsSize(const ULanguageTag* langtag);

const char*
ultag_getPrivateUse(const ULanguageTag* langtag);

#if 0
const char*
ultag_getLegacy(const ULanguageTag* langtag);
#endif

}  // namespace

U_NAMESPACE_BEGIN

/**
 * \class LocalULanguageTagPointer
 * "Smart pointer" class, closes a ULanguageTag via ultag_close().
 * For most methods see the LocalPointerBase base class.
 *
 * @see LocalPointerBase
 * @see LocalPointer
 * @internal
 */
U_DEFINE_LOCAL_OPEN_POINTER(LocalULanguageTagPointer, ULanguageTag, ultag_close);

U_NAMESPACE_END

/*
* -------------------------------------------------
*
* Language subtag syntax validation functions
*
* -------------------------------------------------
*/

namespace {

bool
_isAlphaString(const char* s, int32_t len) {
    int32_t i;
    for (i = 0; i < len; i++) {
        if (!ISALPHA(*(s + i))) {
            return false;
        }
    }
    return true;
}

bool
_isNumericString(const char* s, int32_t len) {
    int32_t i;
    for (i = 0; i < len; i++) {
        if (!ISNUMERIC(*(s + i))) {
            return false;
        }
    }
    return true;
}

bool
_isAlphaNumericString(const char* s, int32_t len) {
    int32_t i;
    for (i = 0; i < len; i++) {
        if (!ISALPHA(*(s + i)) && !ISNUMERIC(*(s + i))) {
            return false;
        }
    }
    return true;
}

bool
_isAlphaNumericStringLimitedLength(const char* s, int32_t len, int32_t min, int32_t max) {
    if (len < 0) {
        len = (int32_t)uprv_strlen(s);
    }
    if (len >= min && len <= max && _isAlphaNumericString(s, len)) {
        return true;
    }
    return false;
}

}  // namespace

bool
ultag_isLanguageSubtag(const char* s, int32_t len) {
    /*
     * unicode_language_subtag = alpha{2,3} | alpha{5,8};
     * NOTE: Per ICUTC 2019/01/23- accepting alpha 4
     * See ICU-20372
     */
    if (len < 0) {
        len = (int32_t)uprv_strlen(s);
    }
    if (len >= 2 && len <= 8 && _isAlphaString(s, len)) {
        return true;
    }
    return false;
}

namespace {

bool
_isExtlangSubtag(const char* s, int32_t len) {
    /*
     * extlang       = 3ALPHA              ; selected ISO 639 codes
     *                 *2("-" 3ALPHA)      ; permanently reserved
     */
    if (len < 0) {
        len = (int32_t)uprv_strlen(s);
    }
    if (len == 3 && _isAlphaString(s, len)) {
        return true;
    }
    return false;
}

}  // namespace

bool
ultag_isScriptSubtag(const char* s, int32_t len) {
    /*
     * script        = 4ALPHA              ; ISO 15924 code
     */
    if (len < 0) {
        len = (int32_t)uprv_strlen(s);
    }
    if (len == 4 && _isAlphaString(s, len)) {
        return true;
    }
    return false;
}

bool
ultag_isRegionSubtag(const char* s, int32_t len) {
    /*
     * region        = 2ALPHA              ; ISO 3166-1 code
     *               / 3DIGIT              ; UN M.49 code
     */
    if (len < 0) {
        len = (int32_t)uprv_strlen(s);
    }
    if (len == 2 && _isAlphaString(s, len)) {
        return true;
    }
    if (len == 3 && _isNumericString(s, len)) {
        return true;
    }
    return false;
}

namespace {

bool
_isVariantSubtag(const char* s, int32_t len) {
    /*
     * variant       = 5*8alphanum         ; registered variants
     *               / (DIGIT 3alphanum)
     */
    if (len < 0) {
        len = (int32_t)uprv_strlen(s);
    }
    if (_isAlphaNumericStringLimitedLength(s, len, 5, 8)) {
        return true;
    }
    if (len == 4 && ISNUMERIC(*s) && _isAlphaNumericString(s + 1, 3)) {
        return true;
    }
    return false;
}

bool
_isSepListOf(bool (*test)(const char*, int32_t), const char* s, int32_t len) {
    const char *p = s;
    const char *pSubtag = nullptr;

    if (len < 0) {
        len = (int32_t)uprv_strlen(s);
    }

    while ((p - s) < len) {
        if (*p == SEP) {
            if (pSubtag == nullptr) {
                return false;
            }
            if (!test(pSubtag, (int32_t)(p - pSubtag))) {
                return false;
            }
            pSubtag = nullptr;
        } else if (pSubtag == nullptr) {
            pSubtag = p;
        }
        p++;
    }
    if (pSubtag == nullptr) {
        return false;
    }
    return test(pSubtag, (int32_t)(p - pSubtag));
}

}  // namespace

bool
ultag_isVariantSubtags(const char* s, int32_t len) {
    return _isSepListOf(&_isVariantSubtag, s, len);
}

namespace {

// This is for the ICU-specific "lvariant" handling.
bool
_isPrivateuseVariantSubtag(const char* s, int32_t len) {
    /*
     * variant       = 1*8alphanum         ; registered variants
     *               / (DIGIT 3alphanum)
     */
    return _isAlphaNumericStringLimitedLength(s, len , 1, 8);
}

bool
_isExtensionSingleton(const char* s, int32_t len) {
    /*
     * extension     = singleton 1*("-" (2*8alphanum))
     *
     * singleton     = DIGIT               ; 0 - 9
     *               / %x41-57             ; A - W
     *               / %x59-5A             ; Y - Z
     *               / %x61-77             ; a - w
     *               / %x79-7A             ; y - z
     */
    if (len < 0) {
        len = (int32_t)uprv_strlen(s);
    }
    if (len == 1 && (ISALPHA(*s) || ISNUMERIC(*s)) && (uprv_tolower(*s) != PRIVATEUSE)) {
        return true;
    }
    return false;
}

bool
_isExtensionSubtag(const char* s, int32_t len) {
    /*
     * extension     = singleton 1*("-" (2*8alphanum))
     */
    return _isAlphaNumericStringLimitedLength(s, len, 2, 8);
}

}  // namespace

bool
ultag_isExtensionSubtags(const char* s, int32_t len) {
    return _isSepListOf(&_isExtensionSubtag, s, len);
}

namespace {

bool
_isPrivateuseValueSubtag(const char* s, int32_t len) {
    /*
     * privateuse    = "x" 1*("-" (1*8alphanum))
     */
    return _isAlphaNumericStringLimitedLength(s, len, 1, 8);
}

}  // namespace

bool
ultag_isPrivateuseValueSubtags(const char* s, int32_t len) {
    return _isSepListOf(&_isPrivateuseValueSubtag, s, len);
}

bool
ultag_isUnicodeLocaleAttribute(const char* s, int32_t len) {
    /*
     * attribute = alphanum{3,8} ;
     */
    return _isAlphaNumericStringLimitedLength(s, len , 3, 8);
}

bool
ultag_isUnicodeLocaleAttributes(const char* s, int32_t len) {
    return _isSepListOf(&ultag_isUnicodeLocaleAttribute, s, len);
}

bool
ultag_isUnicodeLocaleKey(const char* s, int32_t len) {
    /*
     * key = alphanum alpha ;
     */
    if (len < 0) {
        len = (int32_t)uprv_strlen(s);
    }
    if (len == 2 && (ISALPHA(*s) || ISNUMERIC(*s)) && ISALPHA(s[1])) {
        return true;
    }
    return false;
}

bool
_isUnicodeLocaleTypeSubtag(const char*s, int32_t len) {
    /*
     * alphanum{3,8}
     */
    return _isAlphaNumericStringLimitedLength(s, len , 3, 8);
}

bool
ultag_isUnicodeLocaleType(const char*s, int32_t len) {
    /*
     * type = alphanum{3,8} (sep alphanum{3,8})* ;
     */
    return _isSepListOf(&_isUnicodeLocaleTypeSubtag, s, len);
}

namespace {

bool
_isTKey(const char* s, int32_t len)
{
    /*
     * tkey = alpha digit ;
     */
    if (len < 0) {
        len = (int32_t)uprv_strlen(s);
    }
    if (len == 2 && ISALPHA(*s) && ISNUMERIC(*(s + 1))) {
        return true;
    }
    return false;
}

}  // namespace

const char*
ultag_getTKeyStart(const char *localeID) {
    const char *result = localeID;
    const char *sep;
    while((sep = uprv_strchr(result, SEP)) != nullptr) {
        if (_isTKey(result, static_cast<int32_t>(sep - result))) {
            return result;
        }
        result = ++sep;
    }
    if (_isTKey(result, -1)) {
        return result;
    }
    return nullptr;
}

namespace {

bool
_isTValue(const char* s, int32_t len)
{
    /*
     * tvalue = (sep alphanum{3,8})+ ;
     */
    return _isAlphaNumericStringLimitedLength(s, len , 3, 8);
}

bool
_isTransformedExtensionSubtag(int32_t& state, const char* s, int32_t len)
{
    const int32_t kStart = 0;       // Start, wait for unicode_language_subtag, tkey or end
    const int32_t kGotLanguage = 1; // Got unicode_language_subtag, wait for unicode_script_subtag,
                                    // unicode_region_subtag, unicode_variant_subtag, tkey or end
    const int32_t kGotScript = 2;   // Got unicode_script_subtag, wait for unicode_region_subtag,
                                    // unicode_variant_subtag, tkey, or end
    const int32_t kGotRegion = 3;   // Got unicode_region_subtag, wait for unicode_variant_subtag,
                                    // tkey, or end.
    const int32_t kGotVariant = 4;  // Got unicode_variant_subtag, wait for unicode_variant_subtag
                                    // tkey or end.
    const int32_t kGotTKey = -1;    // Got tkey, wait for tvalue. ERROR if stop here.
    const int32_t kGotTValue = 6;   // Got tvalue, wait for tkey, tvalue or end


    if (len < 0) {
        len = (int32_t)uprv_strlen(s);
    }
    switch (state) {
        case kStart:
            if (ultag_isLanguageSubtag(s, len) && len != 4) {
                state = kGotLanguage;
                return true;
            }
            if (_isTKey(s, len)) {
                state = kGotTKey;
                return true;
            }
            return false;
        case kGotLanguage:
            if (ultag_isScriptSubtag(s, len)) {
                state = kGotScript;
                return true;
            }
            U_FALLTHROUGH;
        case kGotScript:
            if (ultag_isRegionSubtag(s, len)) {
                state = kGotRegion;
                return true;
            }
            U_FALLTHROUGH;
        case kGotRegion:
            U_FALLTHROUGH;
        case kGotVariant:
            if (_isVariantSubtag(s, len)) {
                state = kGotVariant;
                return true;
            }
            if (_isTKey(s, len)) {
                state = kGotTKey;
                return true;
            }
            return false;
        case kGotTKey:
            if (_isTValue(s, len)) {
                state = kGotTValue;
                return true;
            }
            return false;
        case kGotTValue:
            if (_isTKey(s, len)) {
                state = kGotTKey;
                return true;
            }
            if (_isTValue(s, len)) {
                return true;
            }
            return false;
    }
    return false;
}

bool
_isUnicodeExtensionSubtag(int32_t& state, const char* s, int32_t len)
{
    const int32_t kStart = 0;         // Start, wait for a key or attribute or end
    const int32_t kGotKey = 1;        // Got a key, wait for type or key or end
    const int32_t kGotType = 2;       // Got a type, wait for key or end

    switch (state) {
        case kStart:
            if (ultag_isUnicodeLocaleKey(s, len)) {
                state = kGotKey;
                return true;
            }
            if (ultag_isUnicodeLocaleAttribute(s, len)) {
                return true;
            }
            return false;
        case kGotKey:
            if (ultag_isUnicodeLocaleKey(s, len)) {
                return true;
            }
            if (_isUnicodeLocaleTypeSubtag(s, len)) {
                state = kGotType;
                return true;
            }
            return false;
        case kGotType:
            if (ultag_isUnicodeLocaleKey(s, len)) {
                state = kGotKey;
                return true;
            }
            if (_isUnicodeLocaleTypeSubtag(s, len)) {
                return true;
            }
            return false;
    }
    return false;
}

bool
_isStatefulSepListOf(bool (*test)(int32_t&, const char*, int32_t), const char* s, int32_t len)
{
    int32_t state = 0;
    const char* p;
    const char* start = s;
    int32_t subtagLen = 0;

    if (len < 0) {
        len = (int32_t)uprv_strlen(s);
    }

    for (p = s; len > 0; p++, len--) {
        if (*p == SEP) {
            if (!test(state, start, subtagLen)) {
                return false;
            }
            subtagLen = 0;
            start = p + 1;
        } else {
            subtagLen++;
        }
    }

    if (test(state, start, subtagLen) && state >= 0) {
        return true;
    }
    return false;
}

}  // namespace

bool
ultag_isTransformedExtensionSubtags(const char* s, int32_t len)
{
    return _isStatefulSepListOf(&_isTransformedExtensionSubtag, s, len);
}

bool
ultag_isUnicodeExtensionSubtags(const char* s, int32_t len) {
    return _isStatefulSepListOf(&_isUnicodeExtensionSubtag, s, len);
}

namespace {

/*
* -------------------------------------------------
*
* Helper functions
*
* -------------------------------------------------
*/

bool
_addVariantToList(VariantListEntry **first, icu::LocalPointer<VariantListEntry> var) {
    if (*first == nullptr) {
        var->next = nullptr;
        *first = var.orphan();
    } else {
        VariantListEntry *prev, *cur;
        int32_t cmp;

        /* variants order should be preserved */
        prev = nullptr;
        cur = *first;
        while (true) {
            if (cur == nullptr) {
                var->next = nullptr;
                prev->next = var.orphan();
                break;
            }

            /* Checking for duplicate variant */
            cmp = uprv_compareInvCharsAsAscii(var->variant, cur->variant);
            if (cmp == 0) {
                /* duplicated variant */
                return false;
            }
            prev = cur;
            cur = cur->next;
        }
    }

    return true;
}

bool
_addAttributeToList(AttributeListEntry **first, AttributeListEntry *attr) {
    bool bAdded = true;

    if (*first == nullptr) {
        attr->next = nullptr;
        *first = attr;
    } else {
        AttributeListEntry *prev, *cur;
        int32_t cmp;

        /* reorder variants in alphabetical order */
        prev = nullptr;
        cur = *first;
        while (true) {
            if (cur == nullptr) {
                prev->next = attr;
                attr->next = nullptr;
                break;
            }
            cmp = uprv_compareInvCharsAsAscii(attr->attribute, cur->attribute);
            if (cmp < 0) {
                if (prev == nullptr) {
                    *first = attr;
                } else {
                    prev->next = attr;
                }
                attr->next = cur;
                break;
            }
            if (cmp == 0) {
                /* duplicated variant */
                bAdded = false;
                break;
            }
            prev = cur;
            cur = cur->next;
        }
    }

    return bAdded;
}

bool
_addExtensionToList(ExtensionListEntry **first, ExtensionListEntry *ext, bool localeToBCP) {
    bool bAdded = true;

    if (*first == nullptr) {
        ext->next = nullptr;
        *first = ext;
    } else {
        ExtensionListEntry *prev, *cur;
        int32_t cmp;

        /* reorder variants in alphabetical order */
        prev = nullptr;
        cur = *first;
        while (true) {
            if (cur == nullptr) {
                prev->next = ext;
                ext->next = nullptr;
                break;
            }
            if (localeToBCP) {
                /* special handling for locale to bcp conversion */
                int32_t len, curlen;

                len = (int32_t)uprv_strlen(ext->key);
                curlen = (int32_t)uprv_strlen(cur->key);

                if (len == 1 && curlen == 1) {
                    if (*(ext->key) == *(cur->key)) {
                        cmp = 0;
                    } else if (*(ext->key) == PRIVATEUSE) {
                        cmp = 1;
                    } else if (*(cur->key) == PRIVATEUSE) {
                        cmp = -1;
                    } else {
                        cmp = *(ext->key) - *(cur->key);
                    }
                } else if (len == 1) {
                    cmp = *(ext->key) - LDMLEXT; 
                } else if (curlen == 1) {
                    cmp = LDMLEXT - *(cur->key);
                } else {
                    cmp = uprv_compareInvCharsAsAscii(ext->key, cur->key);
                    /* Both are u extension keys - we need special handling for 'attribute' */
                    if (cmp != 0) {
                        if (uprv_strcmp(cur->key, LOCALE_ATTRIBUTE_KEY) == 0) {
                            cmp = 1;
                        } else if (uprv_strcmp(ext->key, LOCALE_ATTRIBUTE_KEY) == 0) {
                            cmp = -1;
                        }
                    }
                }
            } else {
                cmp = uprv_compareInvCharsAsAscii(ext->key, cur->key);
            }
            if (cmp < 0) {
                if (prev == nullptr) {
                    *first = ext;
                } else {
                    prev->next = ext;
                }
                ext->next = cur;
                break;
            }
            if (cmp == 0) {
                /* duplicated extension key */
                bAdded = false;
                break;
            }
            prev = cur;
            cur = cur->next;
        }
    }

    return bAdded;
}

void
_initializeULanguageTag(ULanguageTag* langtag) {
    int32_t i;

    langtag->buf = nullptr;

    langtag->language = EMPTY;
    for (i = 0; i < MAXEXTLANG; i++) {
        langtag->extlang[i] = nullptr;
    }

    langtag->script = EMPTY;
    langtag->region = EMPTY;

    langtag->variants = nullptr;
    langtag->extensions = nullptr;

    langtag->legacy = EMPTY;
    langtag->privateuse = EMPTY;
}

void
_appendLanguageToLanguageTag(const char* localeID, icu::ByteSink& sink, bool strict, UErrorCode& status) {
    UErrorCode tmpStatus = U_ZERO_ERROR;

    if (U_FAILURE(status)) {
        return;
    }

    icu::CharString buf = ulocimp_getLanguage(localeID, tmpStatus);
    if (U_FAILURE(tmpStatus)) {
        if (strict) {
            status = U_ILLEGAL_ARGUMENT_ERROR;
            return;
        }
    }

    /* Note: returned language code is in lower case letters */

    if (buf.isEmpty()) {
        sink.Append(LANG_UND, LANG_UND_LEN);
    } else if (!ultag_isLanguageSubtag(buf.data(), buf.length())) {
            /* invalid language code */
        if (strict) {
            status = U_ILLEGAL_ARGUMENT_ERROR;
            return;
        }
        sink.Append(LANG_UND, LANG_UND_LEN);
    } else {
        /* resolve deprecated */
        for (int32_t i = 0; i < UPRV_LENGTHOF(DEPRECATEDLANGS); i += 2) {
            // 2-letter deprecated subtags are listede before 3-letter
            // ones in DEPRECATEDLANGS[]. Get out of loop on coming
            // across the 1st 3-letter subtag, if the input is a 2-letter code.
            // to avoid continuing to try when there's no match.
            if (buf.length() < (int32_t)uprv_strlen(DEPRECATEDLANGS[i])) break;
            if (uprv_compareInvCharsAsAscii(buf.data(), DEPRECATEDLANGS[i]) == 0) {
                const char* const resolved = DEPRECATEDLANGS[i + 1];
                sink.Append(resolved, (int32_t)uprv_strlen(resolved));
                return;
            }
        }
        sink.Append(buf.data(), buf.length());
    }
}

void
_appendScriptToLanguageTag(const char* localeID, icu::ByteSink& sink, bool strict, UErrorCode& status) {
    UErrorCode tmpStatus = U_ZERO_ERROR;

    if (U_FAILURE(status)) {
        return;
    }

    icu::CharString buf = ulocimp_getScript(localeID, tmpStatus);
    if (U_FAILURE(tmpStatus)) {
        if (strict) {
            status = U_ILLEGAL_ARGUMENT_ERROR;
        }
        return;
    }

    if (!buf.isEmpty()) {
        if (!ultag_isScriptSubtag(buf.data(), buf.length())) {
            /* invalid script code */
            if (strict) {
                status = U_ILLEGAL_ARGUMENT_ERROR;
            }
            return;
        } else {
            sink.Append("-", 1);
            sink.Append(buf.data(), buf.length());
        }
    }
}

void
_appendRegionToLanguageTag(const char* localeID, icu::ByteSink& sink, bool strict, UErrorCode& status) {
    UErrorCode tmpStatus = U_ZERO_ERROR;

    if (U_FAILURE(status)) {
        return;
    }

    icu::CharString buf = ulocimp_getRegion(localeID, tmpStatus);
    if (U_FAILURE(tmpStatus)) {
        if (strict) {
            status = U_ILLEGAL_ARGUMENT_ERROR;
        }
        return;
    }

    if (!buf.isEmpty()) {
        if (!ultag_isRegionSubtag(buf.data(), buf.length())) {
            /* invalid region code */
            if (strict) {
                status = U_ILLEGAL_ARGUMENT_ERROR;
            }
            return;
        } else {
            sink.Append("-", 1);
            /* resolve deprecated */
            for (int32_t i = 0; i < UPRV_LENGTHOF(DEPRECATEDREGIONS); i += 2) {
                if (uprv_compareInvCharsAsAscii(buf.data(), DEPRECATEDREGIONS[i]) == 0) {
                    const char* const resolved = DEPRECATEDREGIONS[i + 1];
                    sink.Append(resolved, (int32_t)uprv_strlen(resolved));
                    return;
                }
            }
            sink.Append(buf.data(), buf.length());
        }
    }
}

void _sortVariants(VariantListEntry* first) {
    for (VariantListEntry* var1 = first; var1 != nullptr; var1 = var1->next) {
        for (VariantListEntry* var2 = var1->next; var2 != nullptr; var2 = var2->next) {
            // Swap var1->variant and var2->variant.
            if (uprv_compareInvCharsAsAscii(var1->variant, var2->variant) > 0) {
                const char* temp = var1->variant;
                var1->variant = var2->variant;
                var2->variant = temp;
            }
        }
    }
}

void
_appendVariantsToLanguageTag(const char* localeID, icu::ByteSink& sink, bool strict, bool& hadPosix, UErrorCode& status) {
    if (U_FAILURE(status)) { return; }

    UErrorCode tmpStatus = U_ZERO_ERROR;
    icu::CharString buf = ulocimp_getVariant(localeID, tmpStatus);
    if (U_FAILURE(tmpStatus) || tmpStatus == U_STRING_NOT_TERMINATED_WARNING) {
        if (strict) {
            status = U_ILLEGAL_ARGUMENT_ERROR;
        }
        return;
    }

    if (!buf.isEmpty()) {
        char *p, *pVar;
        bool bNext = true;
        VariantListEntry *varFirst = nullptr;

        pVar = nullptr;
        p = buf.data();
        while (bNext) {
            if (*p == SEP || *p == LOCALE_SEP || *p == 0) {
                if (*p == 0) {
                    bNext = false;
                } else {
                    *p = 0; /* terminate */
                }
                if (pVar == nullptr) {
                    if (strict) {
                        status = U_ILLEGAL_ARGUMENT_ERROR;
                        break;
                    }
                    /* ignore empty variant */
                } else {
                    /* ICU uses upper case letters for variants, but
                       the canonical format is lowercase in BCP47 */
                    for (int32_t i = 0; *(pVar + i) != 0; i++) {
                        *(pVar + i) = uprv_tolower(*(pVar + i));
                    }

                    /* validate */
                    if (_isVariantSubtag(pVar, -1)) {
                        if (uprv_strcmp(pVar,POSIX_VALUE) || buf.length() != (int32_t)uprv_strlen(POSIX_VALUE)) {
                            /* emit the variant to the list */
                            icu::LocalPointer<VariantListEntry> var(new VariantListEntry, status);
                            if (U_FAILURE(status)) {
                                break;
                            }
                            var->variant = pVar;
                            if (!_addVariantToList(&varFirst, std::move(var))) {
                                /* duplicated variant */
                                if (strict) {
                                    status = U_ILLEGAL_ARGUMENT_ERROR;
                                    break;
                                }
                            }
                        } else {
                            /* Special handling for POSIX variant, need to remember that we had it and then */
                            /* treat it like an extension later. */
                            hadPosix = true;
                        }
                    } else if (strict) {
                        status = U_ILLEGAL_ARGUMENT_ERROR;
                        break;
                    } else if (_isPrivateuseValueSubtag(pVar, -1)) {
                        /* Handle private use subtags separately */
                        break;
                    }
                }
                /* reset variant starting position */
                pVar = nullptr;
            } else if (pVar == nullptr) {
                pVar = p;
            }
            p++;
        }

        if (U_SUCCESS(status)) {
            if (varFirst != nullptr) {
                int32_t varLen;

                /* per UTS35, we should sort the variants */
                _sortVariants(varFirst);

                /* write out validated/normalized variants to the target */
                VariantListEntry* var = varFirst;
                while (var != nullptr) {
                    sink.Append("-", 1);
                    varLen = (int32_t)uprv_strlen(var->variant);
                    sink.Append(var->variant, varLen);
                    var = var->next;
                }
            }
        }

        /* clean up */
        for (VariantListEntry* var = varFirst; var != nullptr; ) {
            VariantListEntry *tmpVar = var->next;
            delete var;
            var = tmpVar;
        }

        if (U_FAILURE(status)) {
            return;
        }
    }
}

void
_appendKeywordsToLanguageTag(const char* localeID, icu::ByteSink& sink, bool strict, bool hadPosix, UErrorCode& status) {
    if (U_FAILURE(status)) { return; }

    icu::MemoryPool<AttributeListEntry> attrPool;
    icu::MemoryPool<ExtensionListEntry> extPool;
    icu::MemoryPool<icu::CharString> strPool;

    icu::LocalUEnumerationPointer keywordEnum(uloc_openKeywords(localeID, &status));
    if (U_FAILURE(status) && !hadPosix) {
        return;
    }
    if (keywordEnum.isValid() || hadPosix) {
        /* reorder extensions */
        int32_t len;
        const char *key;
        ExtensionListEntry *firstExt = nullptr;
        ExtensionListEntry *ext;
        AttributeListEntry *firstAttr = nullptr;
        AttributeListEntry *attr;
        icu::MemoryPool<icu::CharString> extBufPool;
        const char *bcpKey=nullptr, *bcpValue=nullptr;
        UErrorCode tmpStatus = U_ZERO_ERROR;
        int32_t keylen;
        bool isBcpUExt;

        while (true) {
            key = uenum_next(keywordEnum.getAlias(), nullptr, &status);
            if (key == nullptr) {
                break;
            }

            icu::CharString buf = ulocimp_getKeywordValue(localeID, key, tmpStatus);
            len = buf.length();

            if (U_FAILURE(tmpStatus)) {
                if (tmpStatus == U_MEMORY_ALLOCATION_ERROR) {
                    status = U_MEMORY_ALLOCATION_ERROR;
                    break;
                }
                if (strict) {
                    status = U_ILLEGAL_ARGUMENT_ERROR;
                    break;
                }
                /* ignore this keyword */
                tmpStatus = U_ZERO_ERROR;
                continue;
            }

            keylen = (int32_t)uprv_strlen(key);
            isBcpUExt = (keylen > 1);

            /* special keyword used for representing Unicode locale attributes */
            if (uprv_strcmp(key, LOCALE_ATTRIBUTE_KEY) == 0) {
                if (len > 0) {
                    int32_t i = 0;
                    while (true) {
                        icu::CharString attrBuf;
                        for (; i < len; i++) {
                            if (buf[i] != '-') {
                                attrBuf.append(buf[i], status);
                            } else {
                                i++;
                                break;
                            }
                        }
                        if (U_FAILURE(status)) {
                            return;
                        }
                        if (attrBuf.isEmpty() && i >= len) {
                            break;
                        }

                        /* create AttributeListEntry */
                        attr = attrPool.create();
                        if (attr == nullptr) {
                            status = U_MEMORY_ALLOCATION_ERROR;
                            break;
                        }
                        if (icu::CharString* str =
                                strPool.create(std::move(attrBuf), status)) {
                            if (U_FAILURE(status)) { break; }
                            attr->attribute = str->data();
                        } else {
                            status = U_MEMORY_ALLOCATION_ERROR;
                            break;
                        }

                        if (!_addAttributeToList(&firstAttr, attr)) {
                            if (strict) {
                                status = U_ILLEGAL_ARGUMENT_ERROR;
                                break;
                            }
                        }
                    }
                    /* for a place holder ExtensionListEntry */
                    bcpKey = LOCALE_ATTRIBUTE_KEY;
                    bcpValue = nullptr;
                }
            } else if (isBcpUExt) {
                bcpKey = uloc_toUnicodeLocaleKey(key);
                if (bcpKey == nullptr) {
                    if (strict) {
                        status = U_ILLEGAL_ARGUMENT_ERROR;
                        break;
                    }
                    continue;
                }

                /* we've checked buf is null-terminated above */
                bcpValue = uloc_toUnicodeLocaleType(key, buf.data());
                if (bcpValue == nullptr) {
                    if (strict) {
                        status = U_ILLEGAL_ARGUMENT_ERROR;
                        break;
                    }
                    continue;
                }
                if (bcpValue == buf.data()) {
                    /*
                    When uloc_toUnicodeLocaleType(key, buf) returns the
                    input value as is, the value is well-formed, but has
                    no known mapping. This implementation normalizes the
                    value to lower case
                    */
                    icu::CharString* extBuf = extBufPool.create(buf, tmpStatus);

                    if (extBuf == nullptr) {
                        status = U_MEMORY_ALLOCATION_ERROR;
                        break;
                    }
                    if (U_FAILURE(tmpStatus)) {
                        status = tmpStatus;
                        break;
                    }

                    T_CString_toLowerCase(extBuf->data());
                    bcpValue = extBuf->data();
                }
            } else {
                if (*key == PRIVATEUSE) {
                    if (!ultag_isPrivateuseValueSubtags(buf.data(), len)) {
                        if (strict) {
                            status = U_ILLEGAL_ARGUMENT_ERROR;
                            break;
                        }
                        continue;
                    }
                } else {
                    if (!_isExtensionSingleton(key, keylen) || !ultag_isExtensionSubtags(buf.data(), len)) {
                        if (strict) {
                            status = U_ILLEGAL_ARGUMENT_ERROR;
                            break;
                        }
                        continue;
                    }
                }
                bcpKey = key;
                icu::CharString* extBuf =
                    extBufPool.create(buf.data(), len, tmpStatus);
                if (extBuf == nullptr) {
                    status = U_MEMORY_ALLOCATION_ERROR;
                    break;
                }
                if (U_FAILURE(tmpStatus)) {
                    status = tmpStatus;
                    break;
                }
                bcpValue = extBuf->data();
            }

            /* create ExtensionListEntry */
            ext = extPool.create();
            if (ext == nullptr) {
                status = U_MEMORY_ALLOCATION_ERROR;
                break;
            }
            ext->key = bcpKey;
            ext->value = bcpValue;

            if (!_addExtensionToList(&firstExt, ext, true)) {
                if (strict) {
                    status = U_ILLEGAL_ARGUMENT_ERROR;
                    break;
                }
            }
        }

        /* Special handling for POSIX variant - add the keywords for POSIX */
        if (hadPosix) {
            /* create ExtensionListEntry for POSIX */
            ext = extPool.create();
            if (ext == nullptr) {
                status = U_MEMORY_ALLOCATION_ERROR;
                return;
            }
            ext->key = POSIX_KEY;
            ext->value = POSIX_VALUE;

            if (!_addExtensionToList(&firstExt, ext, true)) {
                // Silently ignore errors.
            }
        }

        if (U_SUCCESS(status) && (firstExt != nullptr || firstAttr != nullptr)) {
            bool startLDMLExtension = false;
            for (ext = firstExt; ext; ext = ext->next) {
                if (!startLDMLExtension && uprv_strlen(ext->key) > 1) {
                    /* first LDML u singlton extension */
                   sink.Append("-u", 2);
                   startLDMLExtension = true;
                }

                /* write out the sorted BCP47 attributes, extensions and private use */
                if (uprv_strcmp(ext->key, LOCALE_ATTRIBUTE_KEY) == 0) {
                    /* write the value for the attributes */
                    for (attr = firstAttr; attr; attr = attr->next) {
                        sink.Append("-", 1);
                        sink.Append(
                                attr->attribute, static_cast<int32_t>(uprv_strlen(attr->attribute)));
                    }
                } else {
                    sink.Append("-", 1);
                    sink.Append(ext->key, static_cast<int32_t>(uprv_strlen(ext->key)));
                    if (uprv_strcmp(ext->value, "true") != 0 &&
                        uprv_strcmp(ext->value, "yes") != 0) {
                      sink.Append("-", 1);
                      sink.Append(ext->value, static_cast<int32_t>(uprv_strlen(ext->value)));
                    }
                }
            }
        }
    }
}

/**
 * Append keywords parsed from LDML extension value
 * e.g. "u-ca-gregory-co-trad" -> {calendar = gregorian} {collation = traditional}
 * Note: char* buf is used for storing keywords
 */
void
_appendLDMLExtensionAsKeywords(const char* ldmlext, ExtensionListEntry** appendTo, icu::MemoryPool<ExtensionListEntry>& extPool, icu::MemoryPool<icu::CharString>& kwdBuf, bool& posixVariant, UErrorCode& status) {
    if (U_FAILURE(status)) { return; }

    const char *pTag;   /* beginning of current subtag */
    const char *pKwds;  /* beginning of key-type pairs */
    bool variantExists = posixVariant;

    ExtensionListEntry *kwdFirst = nullptr;    /* first LDML keyword */
    ExtensionListEntry *kwd, *nextKwd;

    int32_t len;

    /* Reset the posixVariant value */
    posixVariant = false;

    pTag = ldmlext;
    pKwds = nullptr;

    {
        AttributeListEntry *attrFirst = nullptr;   /* first attribute */
        AttributeListEntry *attr, *nextAttr;

        icu::MemoryPool<icu::CharString> strPool;
        icu::MemoryPool<AttributeListEntry> attrPool;

        /* Iterate through u extension attributes */
        while (*pTag) {
            /* locate next separator char */
            for (len = 0; *(pTag + len) && *(pTag + len) != SEP; len++);

            if (ultag_isUnicodeLocaleKey(pTag, len)) {
                pKwds = pTag;
                break;
            }

            /* add this attribute to the list */
            attr = attrPool.create();
            if (attr == nullptr) {
                status = U_MEMORY_ALLOCATION_ERROR;
                return;
            }

            if (icu::CharString* str = strPool.create(pTag, len, status)) {
                if (U_FAILURE(status)) { return; }
                attr->attribute = str->data();
            } else {
                status = U_MEMORY_ALLOCATION_ERROR;
                return;
            }

            // duplicate attribute is ignored, causes no error.
            _addAttributeToList(&attrFirst, attr);

            /* next tag */
            pTag += len;
            if (*pTag) {
                /* next to the separator */
                pTag++;
            }
        }

        if (attrFirst) {
            /* emit attributes as an LDML keyword, e.g. attribute=attr1-attr2 */

            kwd = extPool.create();
            if (kwd == nullptr) {
                status = U_MEMORY_ALLOCATION_ERROR;
                return;
            }

            icu::CharString* value = kwdBuf.create();
            if (value == nullptr) {
                status = U_MEMORY_ALLOCATION_ERROR;
                return;
            }

            /* attribute subtags sorted in alphabetical order as type */
            attr = attrFirst;
            while (attr != nullptr) {
                nextAttr = attr->next;
                if (attr != attrFirst) {
                    value->append('-', status);
                }
                value->append(attr->attribute, status);
                attr = nextAttr;
            }
            if (U_FAILURE(status)) {
                return;
            }

            kwd->key = LOCALE_ATTRIBUTE_KEY;
            kwd->value = value->data();

            if (!_addExtensionToList(&kwdFirst, kwd, false)) {
                status = U_ILLEGAL_ARGUMENT_ERROR;
                return;
            }
        }
    }

    if (pKwds) {
        const char *pBcpKey = nullptr;     /* u extension key subtag */
        const char *pBcpType = nullptr;    /* beginning of u extension type subtag(s) */
        int32_t bcpKeyLen = 0;
        int32_t bcpTypeLen = 0;
        bool isDone = false;

        pTag = pKwds;
        /* BCP47 representation of LDML key/type pairs */
        while (!isDone) {
            const char *pNextBcpKey = nullptr;
            int32_t nextBcpKeyLen = 0;
            bool emitKeyword = false;

            if (*pTag) {
                /* locate next separator char */
                for (len = 0; *(pTag + len) && *(pTag + len) != SEP; len++);

                if (ultag_isUnicodeLocaleKey(pTag, len)) {
                    if (pBcpKey) {
                        emitKeyword = true;
                        pNextBcpKey = pTag;
                        nextBcpKeyLen = len;
                    } else {
                        pBcpKey = pTag;
                        bcpKeyLen = len;
                    }
                } else {
                    U_ASSERT(pBcpKey != nullptr);
                    /* within LDML type subtags */
                    if (pBcpType) {
                        bcpTypeLen += (len + 1);
                    } else {
                        pBcpType = pTag;
                        bcpTypeLen = len;
                    }
                }

                /* next tag */
                pTag += len;
                if (*pTag) {
                    /* next to the separator */
                    pTag++;
                }
            } else {
                /* processing last one */
                emitKeyword = true;
                isDone = true;
            }

            if (emitKeyword) {
                const char *pKey = nullptr;    /* LDML key */
                const char *pType = nullptr;   /* LDML type */

                char bcpKeyBuf[3];          /* BCP key length is always 2 for now */

                U_ASSERT(pBcpKey != nullptr);

                if (bcpKeyLen >= (int32_t)sizeof(bcpKeyBuf)) {
                    /* the BCP key is invalid */
                    status = U_ILLEGAL_ARGUMENT_ERROR;
                    return;
                }
                U_ASSERT(bcpKeyLen <= 2);

                uprv_strncpy(bcpKeyBuf, pBcpKey, bcpKeyLen);
                bcpKeyBuf[bcpKeyLen] = 0;

                /* u extension key to LDML key */
                pKey = uloc_toLegacyKey(bcpKeyBuf);
                if (pKey == nullptr) {
                    status = U_ILLEGAL_ARGUMENT_ERROR;
                    return;
                }
                if (pKey == bcpKeyBuf) {
                    /*
                    The key returned by toLegacyKey points to the input buffer.
                    We normalize the result key to lower case.
                    */
                    T_CString_toLowerCase(bcpKeyBuf);
                    icu::CharString* key = kwdBuf.create(bcpKeyBuf, bcpKeyLen, status);
                    if (key == nullptr) {
                        status = U_MEMORY_ALLOCATION_ERROR;
                        return;
                    }
                    if (U_FAILURE(status)) {
                        return;
                    }
                    pKey = key->data();
                }

                if (pBcpType) {
                    icu::CharString bcpTypeBuf(pBcpType, bcpTypeLen, status);
                    if (U_FAILURE(status)) {
                        return;
                    }

                    /* BCP type to locale type */
                    pType = uloc_toLegacyType(pKey, bcpTypeBuf.data());
                    if (pType == nullptr) {
                        status = U_ILLEGAL_ARGUMENT_ERROR;
                        return;
                    }
                    if (pType == bcpTypeBuf.data()) {
                        /*
                        The type returned by toLegacyType points to the input buffer.
                        We normalize the result type to lower case.
                        */
                        /* normalize to lower case */
                        T_CString_toLowerCase(bcpTypeBuf.data());
                        if (icu::CharString* type =
                                kwdBuf.create(std::move(bcpTypeBuf), status)) {
                            if (U_FAILURE(status)) { return; }
                            pType = type->data();
                        } else {
                            status = U_MEMORY_ALLOCATION_ERROR;
                            return;
                        }
                    }
                } else {
                    /* typeless - default type value is "yes" */
                    pType = LOCALE_TYPE_YES;
                }

                /* Special handling for u-va-posix, since we want to treat this as a variant, 
                   not as a keyword */
                if (!variantExists && !uprv_strcmp(pKey, POSIX_KEY) && !uprv_strcmp(pType, POSIX_VALUE) ) {
                    posixVariant = true;
                } else {
                    /* create an ExtensionListEntry for this keyword */
                    kwd = extPool.create();
                    if (kwd == nullptr) {
                        status = U_MEMORY_ALLOCATION_ERROR;
                        return;
                    }

                    kwd->key = pKey;
                    kwd->value = pType;

                    if (!_addExtensionToList(&kwdFirst, kwd, false)) {
                        // duplicate keyword is allowed, Only the first
                        // is honored.
                    }
                }

                pBcpKey = pNextBcpKey;
                bcpKeyLen = pNextBcpKey != nullptr ? nextBcpKeyLen : 0;
                pBcpType = nullptr;
                bcpTypeLen = 0;
            }
        }
    }

    kwd = kwdFirst;
    while (kwd != nullptr) {
        nextKwd = kwd->next;
        _addExtensionToList(appendTo, kwd, false);
        kwd = nextKwd;
    }
}

void
_appendKeywords(ULanguageTag* langtag, icu::ByteSink& sink, UErrorCode& status) {
    if (U_FAILURE(status)) { return; }

    int32_t i, n;
    int32_t len;
    ExtensionListEntry *kwdFirst = nullptr;
    ExtensionListEntry *kwd;
    const char *key, *type;
    icu::MemoryPool<ExtensionListEntry> extPool;
    icu::MemoryPool<icu::CharString> kwdBuf;
    bool posixVariant = false;

    n = ultag_getExtensionsSize(langtag);

    /* resolve locale keywords and reordering keys */
    for (i = 0; i < n; i++) {
        key = ultag_getExtensionKey(langtag, i);
        type = ultag_getExtensionValue(langtag, i);
        if (*key == LDMLEXT) {
            /* Determine if variants already exists */
            if (ultag_getVariantsSize(langtag)) {
                posixVariant = true;
            }

            _appendLDMLExtensionAsKeywords(type, &kwdFirst, extPool, kwdBuf, posixVariant, status);
            if (U_FAILURE(status)) {
                break;
            }
        } else {
            kwd = extPool.create();
            if (kwd == nullptr) {
                status = U_MEMORY_ALLOCATION_ERROR;
                break;
            }
            kwd->key = key;
            kwd->value = type;
            if (!_addExtensionToList(&kwdFirst, kwd, false)) {
                status = U_ILLEGAL_ARGUMENT_ERROR;
                break;
            }
        }
    }

    if (U_SUCCESS(status)) {
        type = ultag_getPrivateUse(langtag);
        if ((int32_t)uprv_strlen(type) > 0) {
            /* add private use as a keyword */
            kwd = extPool.create();
            if (kwd == nullptr) {
                status = U_MEMORY_ALLOCATION_ERROR;
            } else {
                kwd->key = PRIVATEUSE_KEY;
                kwd->value = type;
                if (!_addExtensionToList(&kwdFirst, kwd, false)) {
                    status = U_ILLEGAL_ARGUMENT_ERROR;
                }
            }
        }
    }

    /* If a POSIX variant was in the extensions, write it out before writing the keywords. */

    if (U_SUCCESS(status) && posixVariant) {
        len = (int32_t) uprv_strlen(_POSIX);
        sink.Append(_POSIX, len);
    }

    if (U_SUCCESS(status) && kwdFirst != nullptr) {
        /* write out the sorted keywords */
        bool firstValue = true;
        kwd = kwdFirst;
        do {
            if (firstValue) {
                sink.Append("@", 1);
                firstValue = false;
            } else {
                sink.Append(";", 1);
            }

            /* key */
            len = (int32_t)uprv_strlen(kwd->key);
            sink.Append(kwd->key, len);
            sink.Append("=", 1);

            /* type */
            len = (int32_t)uprv_strlen(kwd->value);
            sink.Append(kwd->value, len);

            kwd = kwd->next;
        } while (kwd);
    }
}

void
_appendPrivateuseToLanguageTag(const char* localeID, icu::ByteSink& sink, bool strict, bool /*hadPosix*/, UErrorCode& status) {
    if (U_FAILURE(status)) { return; }

    UErrorCode tmpStatus = U_ZERO_ERROR;
    icu::CharString buf = ulocimp_getVariant(localeID, tmpStatus);
    if (U_FAILURE(tmpStatus)) {
        if (strict) {
            status = U_ILLEGAL_ARGUMENT_ERROR;
        }
        return;
    }

    if (!buf.isEmpty()) {
        char *p, *pPriv;
        bool bNext = true;
        bool firstValue = true;
        bool writeValue;

        pPriv = nullptr;
        p = buf.data();
        while (bNext) {
            writeValue = false;
            if (*p == SEP || *p == LOCALE_SEP || *p == 0) {
                if (*p == 0) {
                    bNext = false;
                } else {
                    *p = 0; /* terminate */
                }
                if (pPriv != nullptr) {
                    /* Private use in the canonical format is lowercase in BCP47 */
                    for (int32_t i = 0; *(pPriv + i) != 0; i++) {
                        *(pPriv + i) = uprv_tolower(*(pPriv + i));
                    }

                    /* validate */
                    if (_isPrivateuseValueSubtag(pPriv, -1)) {
                        if (firstValue) {
                            if (!_isVariantSubtag(pPriv, -1)) {
                                writeValue = true;
                            }
                        } else {
                            writeValue = true;
                        }
                    } else if (strict) {
                        status = U_ILLEGAL_ARGUMENT_ERROR;
                        break;
                    } else {
                        break;
                    }

                    if (writeValue) {
                        sink.Append("-", 1);

                        if (firstValue) {
                            sink.Append(PRIVATEUSE_KEY, UPRV_LENGTHOF(PRIVATEUSE_KEY) - 1);
                            sink.Append("-", 1);
                            sink.Append(PRIVUSE_VARIANT_PREFIX, UPRV_LENGTHOF(PRIVUSE_VARIANT_PREFIX) - 1);
                            sink.Append("-", 1);
                            firstValue = false;
                        }

                        int32_t len = (int32_t)uprv_strlen(pPriv);
                        sink.Append(pPriv, len);
                    }
                }
                /* reset private use starting position */
                pPriv = nullptr;
            } else if (pPriv == nullptr) {
                pPriv = p;
            }
            p++;
        }
    }
}

/*
* -------------------------------------------------
*
* ultag_ functions
*
* -------------------------------------------------
*/

/* Bit flags used by the parser */
#define LANG 0x0001
#define EXTL 0x0002
#define SCRT 0x0004
#define REGN 0x0008
#define VART 0x0010
#define EXTS 0x0020
#define EXTV 0x0040
#define PRIV 0x0080

/**
 * Ticket #12705 - The optimizer in Visual Studio 2015 Update 3 has problems optimizing this function.
 * As a work-around, optimization is disabled for this function on VS2015 and VS2017.
 * This work-around should be removed once the following versions of Visual Studio are no
 * longer supported: All versions of VS2015/VS2017, and versions of VS2019 below 16.4.
 */
#if defined(_MSC_VER) && (_MSC_VER >= 1900) && (_MSC_VER < 1924)
#pragma optimize( "", off )
#endif

ULanguageTag*
ultag_parse(const char* tag, int32_t tagLen, int32_t* parsedLen, UErrorCode& status) {
    if (U_FAILURE(status)) { return nullptr; }

    char *tagBuf;
    int16_t next;
    char *pSubtag, *pNext, *pLastGoodPosition;
    int32_t subtagLen;
    int32_t extlangIdx;
    icu::LocalPointer<ExtensionListEntry> pExtension;
    char *pExtValueSubtag, *pExtValueSubtagEnd;
    int32_t i;
    bool privateuseVar = false;
    int32_t legacyLen = 0;

    if (parsedLen != nullptr) {
        *parsedLen = 0;
    }

    if (tagLen < 0) {
        tagLen = (int32_t)uprv_strlen(tag);
    }

    /* copy the entire string */
    tagBuf = (char*)uprv_malloc(tagLen + 1);
    if (tagBuf == nullptr) {
        status = U_MEMORY_ALLOCATION_ERROR;
        return nullptr;
    }
    
    if (tagLen > 0) {
        uprv_memcpy(tagBuf, tag, tagLen);
    }
    *(tagBuf + tagLen) = 0;

    /* create a ULanguageTag */
    icu::LocalULanguageTagPointer t(
            (ULanguageTag*)uprv_malloc(sizeof(ULanguageTag)));
    if (t.isNull()) {
        uprv_free(tagBuf);
        status = U_MEMORY_ALLOCATION_ERROR;
        return nullptr;
    }
    _initializeULanguageTag(t.getAlias());
    t->buf = tagBuf;

    if (tagLen < MINLEN) {
        /* the input tag is too short - return empty ULanguageTag */
        return t.orphan();
    }

    size_t parsedLenDelta = 0;
    // Legacy tag will be consider together. Legacy tag with intervening
    // script and region such as art-DE-lojban or art-Latn-lojban won't be
    // matched.
    /* check if the tag is legacy */
    for (i = 0; i < UPRV_LENGTHOF(LEGACY); i += 2) {
        int32_t checkLegacyLen = static_cast<int32_t>(uprv_strlen(LEGACY[i]));
        if (tagLen < checkLegacyLen) {
            continue;
        }
        if (tagLen > checkLegacyLen && tagBuf[checkLegacyLen] != '-') {
            // make sure next char is '-'.
            continue;
        }
        if (uprv_strnicmp(LEGACY[i], tagBuf, checkLegacyLen) == 0) {
            int32_t newTagLength;

            legacyLen = checkLegacyLen;  /* back up for output parsedLen */
            int32_t replacementLen = static_cast<int32_t>(uprv_strlen(LEGACY[i+1]));
            newTagLength = replacementLen + tagLen - checkLegacyLen;
            int32_t oldTagLength = tagLen;
            if (tagLen < newTagLength) {
                uprv_free(tagBuf);
                // Change t->buf after the free and before return to avoid the second double free in
                // the destructor of t when t is out of scope.
                t->buf = tagBuf = (char*)uprv_malloc(newTagLength + 1);
                if (tagBuf == nullptr) {
                    status = U_MEMORY_ALLOCATION_ERROR;
                    return nullptr;
                }
                tagLen = newTagLength;
            }
            parsedLenDelta = checkLegacyLen - replacementLen;
            uprv_strcpy(t->buf, LEGACY[i + 1]);
            if (checkLegacyLen != tagLen) {
                uprv_memcpy(t->buf + replacementLen, tag + checkLegacyLen,
                            oldTagLength - checkLegacyLen);
                // NUL-terminate after memcpy().
                t->buf[replacementLen + oldTagLength - checkLegacyLen] = 0;
            }
            break;
        }
    }

    if (legacyLen == 0) {
        for (i = 0; i < UPRV_LENGTHOF(REDUNDANT); i += 2) {
            const char* redundantTag = REDUNDANT[i];
            size_t redundantTagLen = uprv_strlen(redundantTag);
            // The preferred tag for a redundant tag is always shorter than redundant
            // tag. A redundant tag may or may not be followed by other subtags.
            // (i.e. "zh-yue" or "zh-yue-u-co-pinyin").
            if (uprv_strnicmp(redundantTag, tagBuf, static_cast<uint32_t>(redundantTagLen)) == 0) {
                const char* redundantTagEnd = tagBuf + redundantTagLen;
                if (*redundantTagEnd  == '\0' || *redundantTagEnd == SEP) {
                    const char* preferredTag = REDUNDANT[i + 1];
                    size_t preferredTagLen = uprv_strlen(preferredTag);
                    uprv_memcpy(t->buf, preferredTag, preferredTagLen);
                    if (*redundantTagEnd == SEP) {
                        uprv_memmove(tagBuf + preferredTagLen,
                                     redundantTagEnd,
                                     tagLen - redundantTagLen + 1);
                    } else {
                        tagBuf[preferredTagLen] = '\0';
                    }
                    // parsedLen should be the length of the input
                    // before redundantTag is replaced by preferredTag.
                    // Save the delta to add it back later.
                    parsedLenDelta = redundantTagLen - preferredTagLen;
                    break;
                }
            }
        }
    }

    /*
     * langtag      =   language
     *                  ["-" script]
     *                  ["-" region]
     *                  *("-" variant)
     *                  *("-" extension)
     *                  ["-" privateuse]
     */

    next = LANG | PRIV;
    pNext = pLastGoodPosition = tagBuf;
    extlangIdx = 0;
    pExtValueSubtag = nullptr;
    pExtValueSubtagEnd = nullptr;

    while (pNext) {
        char *pSep;

        pSubtag = pNext;

        /* locate next separator char */
        pSep = pSubtag;
        while (*pSep) {
            if (*pSep == SEP) {
                break;
            }
            pSep++;
        }
        if (*pSep == 0) {
            /* last subtag */
            pNext = nullptr;
        } else {
            pNext = pSep + 1;
        }
        subtagLen = (int32_t)(pSep - pSubtag);

        if (next & LANG) {
            if (ultag_isLanguageSubtag(pSubtag, subtagLen)) {
                *pSep = 0;  /* terminate */
                // TODO: move deprecated language code handling here.
                t->language = T_CString_toLowerCase(pSubtag);

                pLastGoodPosition = pSep;
                next = SCRT | REGN | VART | EXTS | PRIV;
                if (subtagLen <= 3)
                  next |= EXTL;
                continue;
            }
        }
        if (next & EXTL) {
            if (_isExtlangSubtag(pSubtag, subtagLen)) {
                *pSep = 0;
                t->extlang[extlangIdx++] = T_CString_toLowerCase(pSubtag);

                pLastGoodPosition = pSep;
                if (extlangIdx < 3) {
                    next = EXTL | SCRT | REGN | VART | EXTS | PRIV;
                } else {
                    next = SCRT | REGN | VART | EXTS | PRIV;
                }
                continue;
            }
        }
        if (next & SCRT) {
            if (ultag_isScriptSubtag(pSubtag, subtagLen)) {
                char *p = pSubtag;

                *pSep = 0;

                /* to title case */
                *p = uprv_toupper(*p);
                p++;
                for (; *p; p++) {
                    *p = uprv_tolower(*p);
                }

                t->script = pSubtag;

                pLastGoodPosition = pSep;
                next = REGN | VART | EXTS | PRIV;
                continue;
            }
        }
        if (next & REGN) {
            if (ultag_isRegionSubtag(pSubtag, subtagLen)) {
                *pSep = 0;
                // TODO: move deprecated region code handling here.
                t->region = T_CString_toUpperCase(pSubtag);

                pLastGoodPosition = pSep;
                next = VART | EXTS | PRIV;
                continue;
            }
        }
        if (next & VART) {
            if (_isVariantSubtag(pSubtag, subtagLen) ||
               (privateuseVar && _isPrivateuseVariantSubtag(pSubtag, subtagLen))) {
                icu::LocalPointer<VariantListEntry> var(new VariantListEntry, status);
                if (U_FAILURE(status)) {
                    return nullptr;
                }
                *pSep = 0;
                var->variant = T_CString_toUpperCase(pSubtag);
                if (!_addVariantToList(&(t->variants), std::move(var))) {
                    /* duplicated variant entry */
                    break;
                }
                pLastGoodPosition = pSep;
                next = VART | EXTS | PRIV;
                continue;
            }
        }
        if (next & EXTS) {
            if (_isExtensionSingleton(pSubtag, subtagLen)) {
                if (pExtension.isValid()) {
                    if (pExtValueSubtag == nullptr || pExtValueSubtagEnd == nullptr) {
                        /* the previous extension is incomplete */
                        delete pExtension.orphan();
                        break;
                    }

                    /* terminate the previous extension value */
                    *pExtValueSubtagEnd = 0;
                    pExtension->value = T_CString_toLowerCase(pExtValueSubtag);

                    /* insert the extension to the list */
                    if (_addExtensionToList(&(t->extensions), pExtension.getAlias(), false)) {
                        pExtension.orphan();
                        pLastGoodPosition = pExtValueSubtagEnd;
                    } else {
                        /* stop parsing here */
                        delete pExtension.orphan();
                        break;
                    }
                }

                /* create a new extension */
                pExtension.adoptInsteadAndCheckErrorCode(new ExtensionListEntry, status);
                if (U_FAILURE(status)) {
                    return nullptr;
                }
                *pSep = 0;
                pExtension->key = T_CString_toLowerCase(pSubtag);
                pExtension->value = nullptr;   /* will be set later */

                /*
                 * reset the start and the end location of extension value
                 * subtags for this extension
                 */
                pExtValueSubtag = nullptr;
                pExtValueSubtagEnd = nullptr;

                next = EXTV;
                continue;
            }
        }
        if (next & EXTV) {
            if (_isExtensionSubtag(pSubtag, subtagLen)) {
                if (pExtValueSubtag == nullptr) {
                    /* if the start position of this extension's value is not yet,
                        this one is the first value subtag */
                    pExtValueSubtag = pSubtag;
                }

                /* Mark the end of this subtag */
                pExtValueSubtagEnd = pSep;
                next = EXTS | EXTV | PRIV;

                continue;
            }
        }
        if (next & PRIV) {
            if (uprv_tolower(*pSubtag) == PRIVATEUSE && subtagLen == 1) {
                char *pPrivuseVal;

                if (pExtension.isValid()) {
                    /* Process the last extension */
                    if (pExtValueSubtag == nullptr || pExtValueSubtagEnd == nullptr) {
                        /* the previous extension is incomplete */
                        delete pExtension.orphan();
                        break;
                    } else {
                        /* terminate the previous extension value */
                        *pExtValueSubtagEnd = 0;
                        pExtension->value = T_CString_toLowerCase(pExtValueSubtag);

                        /* insert the extension to the list */
                        if (_addExtensionToList(&(t->extensions), pExtension.getAlias(), false)) {
                            pExtension.orphan();
                            pLastGoodPosition = pExtValueSubtagEnd;
                        } else {
                        /* stop parsing here */
                            delete pExtension.orphan();
                            break;
                        }
                    }
                }

                /* The rest of part will be private use value subtags */
                if (pNext == nullptr) {
                    /* empty private use subtag */
                    break;
                }
                /* back up the private use value start position */
                pPrivuseVal = pNext;

                /* validate private use value subtags */
                while (pNext) {
                    pSubtag = pNext;
                    pSep = pSubtag;
                    while (*pSep) {
                        if (*pSep == SEP) {
                            break;
                        }
                        pSep++;
                    }
                    if (*pSep == 0) {
                        /* last subtag */
                        pNext = nullptr;
                    } else {
                        pNext = pSep + 1;
                    }
                    subtagLen = (int32_t)(pSep - pSubtag);

                    if (uprv_strncmp(pSubtag, PRIVUSE_VARIANT_PREFIX, uprv_strlen(PRIVUSE_VARIANT_PREFIX)) == 0) {
                        *pSep = 0;
                        next = VART;
                        privateuseVar = true;
                        break;
                    } else if (_isPrivateuseValueSubtag(pSubtag, subtagLen)) {
                        pLastGoodPosition = pSep;
                    } else {
                        break;
                    }
                }

                if (next == VART) {
                    continue;
                }

                if (pLastGoodPosition - pPrivuseVal > 0) {
                    *pLastGoodPosition = 0;
                    t->privateuse = T_CString_toLowerCase(pPrivuseVal);
                }
                /* No more subtags, exiting the parse loop */
                break;
            }
            break;
        }

        /* If we fell through here, it means this subtag is illegal - quit parsing */
        break;
    }

    if (pExtension.isValid()) {
        /* Process the last extension */
        if (pExtValueSubtag == nullptr || pExtValueSubtagEnd == nullptr) {
            /* the previous extension is incomplete */
            delete pExtension.orphan();
        } else {
            /* terminate the previous extension value */
            *pExtValueSubtagEnd = 0;
            pExtension->value = T_CString_toLowerCase(pExtValueSubtag);
            /* insert the extension to the list */
            if (_addExtensionToList(&(t->extensions), pExtension.getAlias(), false)) {
                pExtension.orphan();
                pLastGoodPosition = pExtValueSubtagEnd;
            } else {
                delete pExtension.orphan();
            }
        }
    }

    if (parsedLen != nullptr) {
        *parsedLen = (int32_t)(pLastGoodPosition - t->buf + parsedLenDelta);
    }

    return t.orphan();
}

// Ticket #12705 - Turn optimization back on.
#if defined(_MSC_VER) && (_MSC_VER >= 1900) && (_MSC_VER < 1924)
#pragma optimize( "", on )
#endif

void
ultag_close(ULanguageTag* langtag) {

    if (langtag == nullptr) {
        return;
    }

    uprv_free(langtag->buf);

    if (langtag->variants) {
        VariantListEntry *curVar = langtag->variants;
        while (curVar) {
            VariantListEntry *nextVar = curVar->next;
            delete curVar;
            curVar = nextVar;
        }
    }

    if (langtag->extensions) {
        ExtensionListEntry *curExt = langtag->extensions;
        while (curExt) {
            ExtensionListEntry *nextExt = curExt->next;
            delete curExt;
            curExt = nextExt;
        }
    }

    uprv_free(langtag);
}

const char*
ultag_getLanguage(const ULanguageTag* langtag) {
    return langtag->language;
}

#if 0
const char*
ultag_getJDKLanguage(const ULanguageTag* langtag) {
    int32_t i;
    for (i = 0; DEPRECATEDLANGS[i] != nullptr; i += 2) {
        if (uprv_compareInvCharsAsAscii(DEPRECATEDLANGS[i], langtag->language) == 0) {
            return DEPRECATEDLANGS[i + 1];
        }
    }
    return langtag->language;
}
#endif

const char*
ultag_getExtlang(const ULanguageTag* langtag, int32_t idx) {
    if (idx >= 0 && idx < MAXEXTLANG) {
        return langtag->extlang[idx];
    }
    return nullptr;
}

int32_t
ultag_getExtlangSize(const ULanguageTag* langtag) {
    int32_t size = 0;
    int32_t i;
    for (i = 0; i < MAXEXTLANG; i++) {
        if (langtag->extlang[i]) {
            size++;
        }
    }
    return size;
}

const char*
ultag_getScript(const ULanguageTag* langtag) {
    return langtag->script;
}

const char*
ultag_getRegion(const ULanguageTag* langtag) {
    return langtag->region;
}

const char*
ultag_getVariant(const ULanguageTag* langtag, int32_t idx) {
    const char *var = nullptr;
    VariantListEntry *cur = langtag->variants;
    int32_t i = 0;
    while (cur) {
        if (i == idx) {
            var = cur->variant;
            break;
        }
        cur = cur->next;
        i++;
    }
    return var;
}

int32_t
ultag_getVariantsSize(const ULanguageTag* langtag) {
    int32_t size = 0;
    VariantListEntry *cur = langtag->variants;
    while (true) {
        if (cur == nullptr) {
            break;
        }
        size++;
        cur = cur->next;
    }
    return size;
}

const char*
ultag_getExtensionKey(const ULanguageTag* langtag, int32_t idx) {
    const char *key = nullptr;
    ExtensionListEntry *cur = langtag->extensions;
    int32_t i = 0;
    while (cur) {
        if (i == idx) {
            key = cur->key;
            break;
        }
        cur = cur->next;
        i++;
    }
    return key;
}

const char*
ultag_getExtensionValue(const ULanguageTag* langtag, int32_t idx) {
    const char *val = nullptr;
    ExtensionListEntry *cur = langtag->extensions;
    int32_t i = 0;
    while (cur) {
        if (i == idx) {
            val = cur->value;
            break;
        }
        cur = cur->next;
        i++;
    }
    return val;
}

int32_t
ultag_getExtensionsSize(const ULanguageTag* langtag) {
    int32_t size = 0;
    ExtensionListEntry *cur = langtag->extensions;
    while (true) {
        if (cur == nullptr) {
            break;
        }
        size++;
        cur = cur->next;
    }
    return size;
}

const char*
ultag_getPrivateUse(const ULanguageTag* langtag) {
    return langtag->privateuse;
}

#if 0
const char*
ultag_getLegacy(const ULanguageTag* langtag) {
    return langtag->legacy;
}
#endif

}  // namespace

/*
* -------------------------------------------------
*
* Locale/BCP47 conversion APIs, exposed as uloc_*
*
* -------------------------------------------------
*/
U_CAPI int32_t U_EXPORT2
uloc_toLanguageTag(const char* localeID,
                   char* langtag,
                   int32_t langtagCapacity,
                   UBool strict,
                   UErrorCode* status) {
    return icu::ByteSinkUtil::viaByteSinkToTerminatedChars(
        langtag, langtagCapacity,
        [&](icu::ByteSink& sink, UErrorCode& status) {
            ulocimp_toLanguageTag(localeID, sink, strict, status);
        },
        *status);
}

U_EXPORT icu::CharString
ulocimp_toLanguageTag(const char* localeID,
                      bool strict,
                      UErrorCode& status) {
    return icu::ByteSinkUtil::viaByteSinkToCharString(
        [&](icu::ByteSink& sink, UErrorCode& status) {
            ulocimp_toLanguageTag(localeID, sink, strict, status);
        },
        status);
}

U_EXPORT void
ulocimp_toLanguageTag(const char* localeID,
                      icu::ByteSink& sink,
                      bool strict,
                      UErrorCode& status) {
    if (U_FAILURE(status)) { return; }

    UErrorCode tmpStatus = U_ZERO_ERROR;
    bool hadPosix = false;
    const char* pKeywordStart;

    /* Note: uloc_canonicalize returns "en_US_POSIX" for input locale ID "".  See #6835 */
    icu::CharString canonical = ulocimp_canonicalize(localeID, tmpStatus);
    if (U_FAILURE(tmpStatus)) {
        status = tmpStatus;
        return;
    }

    /* For handling special case - private use only tag */
    pKeywordStart = locale_getKeywordsStart(canonical.data());
    if (pKeywordStart == canonical.data()) {
        int kwdCnt = 0;
        bool done = false;

        icu::LocalUEnumerationPointer kwdEnum(uloc_openKeywords(canonical.data(), &tmpStatus));
        if (U_SUCCESS(tmpStatus)) {
            kwdCnt = uenum_count(kwdEnum.getAlias(), &tmpStatus);
            if (kwdCnt == 1) {
                const char *key;
                int32_t len = 0;

                key = uenum_next(kwdEnum.getAlias(), &len, &tmpStatus);
                if (len == 1 && *key == PRIVATEUSE) {
                    icu::CharString buf = ulocimp_getKeywordValue(localeID, key, tmpStatus);
                    if (U_SUCCESS(tmpStatus)) {
                        if (ultag_isPrivateuseValueSubtags(buf.data(), buf.length())) {
                            /* return private use only tag */
                            sink.Append("und-x-", 6);
                            sink.Append(buf.data(), buf.length());
                            done = true;
                        } else if (strict) {
                            status = U_ILLEGAL_ARGUMENT_ERROR;
                            done = true;
                        }
                        /* if not strict mode, then "und" will be returned */
                    } else {
                        status = U_ILLEGAL_ARGUMENT_ERROR;
                        done = true;
                    }
                }
            }
            if (done) {
                return;
            }
        }
    }

    _appendLanguageToLanguageTag(canonical.data(), sink, strict, status);
    _appendScriptToLanguageTag(canonical.data(), sink, strict, status);
    _appendRegionToLanguageTag(canonical.data(), sink, strict, status);
    _appendVariantsToLanguageTag(canonical.data(), sink, strict, hadPosix, status);
    _appendKeywordsToLanguageTag(canonical.data(), sink, strict, hadPosix, status);
    _appendPrivateuseToLanguageTag(canonical.data(), sink, strict, hadPosix, status);
}


U_CAPI int32_t U_EXPORT2
uloc_forLanguageTag(const char* langtag,
                    char* localeID,
                    int32_t localeIDCapacity,
                    int32_t* parsedLength,
                    UErrorCode* status) {
    return icu::ByteSinkUtil::viaByteSinkToTerminatedChars(
        localeID, localeIDCapacity,
        [&](icu::ByteSink& sink, UErrorCode& status) {
            ulocimp_forLanguageTag(langtag, -1, sink, parsedLength, status);
        },
        *status);
}

U_EXPORT icu::CharString
ulocimp_forLanguageTag(const char* langtag,
                       int32_t tagLen,
                       int32_t* parsedLength,
                       UErrorCode& status) {
    return icu::ByteSinkUtil::viaByteSinkToCharString(
        [&](icu::ByteSink& sink, UErrorCode& status) {
            ulocimp_forLanguageTag(langtag, tagLen, sink, parsedLength, status);
        },
        status);
}

U_EXPORT void
ulocimp_forLanguageTag(const char* langtag,
                       int32_t tagLen,
                       icu::ByteSink& sink,
                       int32_t* parsedLength,
                       UErrorCode& status) {
    if (U_FAILURE(status)) { return; }

    bool isEmpty = true;
    const char *subtag, *p;
    int32_t len;
    int32_t i, n;
    bool noRegion = true;

    icu::LocalULanguageTagPointer lt(ultag_parse(langtag, tagLen, parsedLength, status));
    if (U_FAILURE(status)) {
        return;
    }

    /* language */
    subtag = ultag_getExtlangSize(lt.getAlias()) > 0 ? ultag_getExtlang(lt.getAlias(), 0) : ultag_getLanguage(lt.getAlias());
    if (uprv_compareInvCharsAsAscii(subtag, LANG_UND) != 0) {
        len = (int32_t)uprv_strlen(subtag);
        if (len > 0) {
            sink.Append(subtag, len);
            isEmpty = false;
        }
    }

    /* script */
    subtag = ultag_getScript(lt.getAlias());
    len = (int32_t)uprv_strlen(subtag);
    if (len > 0) {
        sink.Append("_", 1);
        isEmpty = false;

        /* write out the script in title case */
        char c = uprv_toupper(*subtag);
        sink.Append(&c, 1);
        sink.Append(subtag + 1, len - 1);
    }

    /* region */
    subtag = ultag_getRegion(lt.getAlias());
    len = (int32_t)uprv_strlen(subtag);
    if (len > 0) {
        sink.Append("_", 1);
        isEmpty = false;

        /* write out the region in upper case */
        p = subtag;
        while (*p) {
            char c = uprv_toupper(*p);
            sink.Append(&c, 1);
            p++;
        }
        noRegion = false;
    }

    /* variants */
    _sortVariants(lt.getAlias()->variants);
    n = ultag_getVariantsSize(lt.getAlias());
    if (n > 0) {
        if (noRegion) {
            sink.Append("_", 1);
            isEmpty = false;
        }

        for (i = 0; i < n; i++) {
            subtag = ultag_getVariant(lt.getAlias(), i);
            sink.Append("_", 1);

            /* write out the variant in upper case */
            p = subtag;
            while (*p) {
                char c = uprv_toupper(*p);
                sink.Append(&c, 1);
                p++;
            }
        }
    }

    /* keywords */
    n = ultag_getExtensionsSize(lt.getAlias());
    subtag = ultag_getPrivateUse(lt.getAlias());
    if (n > 0 || uprv_strlen(subtag) > 0) {
        if (isEmpty && n > 0) {
            /* need a language */
            sink.Append(LANG_UND, LANG_UND_LEN);
        }
        _appendKeywords(lt.getAlias(), sink, status);
    }
}
