/*
    Copyright (C) 2018, 2020 Daniel Schultz

    This file is part of FLINT.

    FLINT is free software: you can redistribute it and/or modify it under
    the terms of the GNU Lesser General Public License (LGPL) as published
    by the Free Software Foundation; either version 3 of the License, or
    (at your option) any later version.  See <https://www.gnu.org/licenses/>.
*/

#include "mpn_extras.h"
#include "nmod.h"
#include "nmod_vec.h"
#include "mpoly.h"
#include "nmod_mpoly.h"

slong _nmod_mpoly_scalar_addmul_ui1(
    ulong * Acoeffs, ulong * Aexps,
    const ulong * Bcoeffs, const ulong * Bexps, slong Blen,
    const ulong * Ccoeffs, const ulong * Cexps, slong Clen,
    ulong d,
    ulong maskhi,
    nmod_t fctx)
{
    slong i = 0, j = 0, k = 0;

    while (i < Blen && j < Clen)
    {
        if ((Bexps[i]^maskhi) > (Cexps[j]^maskhi))
        {
            Aexps[k] = Bexps[i];
            Acoeffs[k] = Bcoeffs[i];
            i++;
            k++;
        }
        else if ((Bexps[i]^maskhi) == (Cexps[j]^maskhi))
        {
            Aexps[k] = Bexps[i];
            Acoeffs[k] = nmod_addmul(Bcoeffs[i], Ccoeffs[j], d, fctx);
            k += (Acoeffs[k] != 0);
            i++;
            j++;
        }
        else
        {
            Aexps[k] = Cexps[j];
            Acoeffs[k] = nmod_mul(Ccoeffs[j], d, fctx);
            k += (Acoeffs[k] != 0);
            j++;
        }
    }

    while (i < Blen)
    {
        Aexps[k] = Bexps[i];
        Acoeffs[k] = Bcoeffs[i];
        i++;
        k++;
    }

    while (j < Clen)
    {
        Aexps[k] = Cexps[j];
        Acoeffs[k] = nmod_mul(Ccoeffs[j], d, fctx);
        k += (Acoeffs[k] != 0);
        j++;
    }

    return k;
}

static slong _nmod_mpoly_scalar_addmul_ui(
    ulong * Acoeffs, ulong * Aexps,
    const ulong * Bcoeffs, const ulong * Bexps, slong Blen,
    const ulong * Ccoeffs, const ulong * Cexps, slong Clen,
    ulong d,
    slong N,
    const ulong * cmpmask,
    nmod_t fctx)
{
    slong i = 0, j = 0, k = 0;

    if (N == 1)
    {
        return _nmod_mpoly_scalar_addmul_ui1(Acoeffs, Aexps,
                                    Bcoeffs, Bexps, Blen,
                                    Ccoeffs, Cexps, Clen, d, cmpmask[0], fctx);
    }

    while (i < Blen && j < Clen)
    {
        int cmp = mpoly_monomial_cmp(Bexps + i*N, Cexps + j*N, N, cmpmask);

        if (cmp > 0)
        {
            mpoly_monomial_set(Aexps + k*N, Bexps + i*N, N);
            Acoeffs[k] = Bcoeffs[i];
            i++;
            k++;
        }
        else if (cmp == 0)
        {
            mpoly_monomial_set(Aexps + k*N, Bexps + i*N, N);
            Acoeffs[k] = nmod_addmul(Bcoeffs[i], Ccoeffs[j], d, fctx);
            k += (Acoeffs[k] != 0);
            i++;
            j++;
        }
        else
        {
            mpoly_monomial_set(Aexps + k*N, Cexps + j*N, N);
            Acoeffs[k] = nmod_mul(Ccoeffs[j], d, fctx);
            k += (Acoeffs[k] != 0);
            j++;
        }
    }

    while (i < Blen)
    {
        mpoly_monomial_set(Aexps + k*N, Bexps + i*N, N);
        Acoeffs[k] = Bcoeffs[i];
        i++;
        k++;
    }

    while (j < Clen)
    {
        mpoly_monomial_set(Aexps + k*N, Cexps + j*N, N);
        Acoeffs[k] = nmod_mul(Ccoeffs[j], d, fctx);
        k += (Acoeffs[k] != 0);
        j++;
    }

    return k;
}

void nmod_mpoly_scalar_addmul_ui(
    nmod_mpoly_t A,
    const nmod_mpoly_t B,
    const nmod_mpoly_t C,
    ulong d,
    const nmod_mpoly_ctx_t ctx)
{
    ulong Abits;
    slong N;
    ulong * Bexps = B->exps, * Cexps = C->exps;
    ulong * cmpmask;
    int freeBexps = 0, freeCexps = 0;
    TMP_INIT;

    if (d >= ctx->mod.n)
        NMOD_RED(d, d, ctx->mod);

    if (B->length == 0)
    {
        nmod_mpoly_scalar_mul_ui(A, C, d, ctx);
        return;
    }
    else if (C->length == 0 || d == 0)
    {
        nmod_mpoly_set(A, B, ctx);
        return;
    }

    TMP_START;
    Abits = FLINT_MAX(B->bits, C->bits);
    N = mpoly_words_per_exp(Abits, ctx->minfo);
    cmpmask = (ulong *) TMP_ALLOC(N*sizeof(ulong));
    mpoly_get_cmpmask(cmpmask, N, Abits, ctx->minfo);

    if (Abits != B->bits)
    {
        freeBexps = 1;
        Bexps = (ulong *) flint_malloc(N*B->length*sizeof(ulong));
        mpoly_repack_monomials(Bexps, Abits, B->exps, B->bits,
                                                    B->length, ctx->minfo);
    }

    if (Abits != C->bits)
    {
        freeCexps = 1;
        Cexps = (ulong *) flint_malloc(N*C->length*sizeof(ulong));
        mpoly_repack_monomials(Cexps, Abits, C->exps, C->bits,
                                                    C->length, ctx->minfo);
    }

    if (A == B || A == C)
    {
        nmod_mpoly_t T;
        nmod_mpoly_init3(T, B->length + C->length, Abits, ctx);
        T->length = _nmod_mpoly_scalar_addmul_ui(T->coeffs, T->exps,
                                    B->coeffs, Bexps, B->length,
                                    C->coeffs, Cexps, C->length, d,
                                                      N, cmpmask, ctx->mod);
        nmod_mpoly_swap(A, T, ctx);
        nmod_mpoly_clear(T, ctx);
    }
    else
    {
        nmod_mpoly_fit_length_reset_bits(A, B->length + C->length, Abits, ctx);
        A->length = _nmod_mpoly_scalar_addmul_ui(A->coeffs, A->exps,
                                    B->coeffs, Bexps, B->length,
                                    C->coeffs, Cexps, C->length, d,
                                                      N, cmpmask, ctx->mod);
    }

    if (freeBexps)
        flint_free(Bexps);

    if (freeCexps)
        flint_free(Cexps);

    TMP_END;
}

/*
    c is assumed to be invertible and reduced mod n
*/
void nmod_mpoly_scalar_mul_nmod_invertible(
    nmod_mpoly_t A,
    const nmod_mpoly_t B,
    ulong c,
    const nmod_mpoly_ctx_t ctx)
{
    FLINT_ASSERT(c != 0);
    FLINT_ASSERT(c < ctx->mod.n);
    FLINT_ASSERT(n_gcd(c, ctx->mod.n) == 1);

    if (A == B)
    {
        if (c == 1)
            return;
    }
    else
    {
        slong N;

        nmod_mpoly_fit_length_reset_bits(A, B->length, B->bits, ctx);
        A->length = B->length;

        N = mpoly_words_per_exp(B->bits, ctx->minfo);
        flint_mpn_copyi(A->exps, B->exps, N*B->length);
        if (c == UWORD(1))
        {
            flint_mpn_copyi(A->coeffs, B->coeffs, B->length);
            return;
        }
    }

    _nmod_vec_scalar_mul_nmod(A->coeffs, B->coeffs, B->length, c, ctx->mod);
}


/*
    c is assumed to be reduced mod n
*/
void nmod_mpoly_scalar_mul_nmod_general(
    nmod_mpoly_t A,
    const nmod_mpoly_t B,
    ulong c,
    const nmod_mpoly_ctx_t ctx)
{
    slong i, N;
    slong Alen, Blen;
    ulong * Aexp, * Bexp;
    ulong * Acoeff, * Bcoeff;

    FLINT_ASSERT(c < ctx->mod.n);

    N = mpoly_words_per_exp(B->bits, ctx->minfo);

    nmod_mpoly_fit_length_reset_bits(A, B->length, B->bits, ctx);

    Aexp = A->exps;
    Bexp = B->exps;
    Acoeff = A->coeffs;
    Bcoeff = B->coeffs;
    Blen = B->length;

    Alen = 0;
    for (i = 0; i < Blen; i++)
    {
        mpoly_monomial_set(Aexp + N*Alen, Bexp + N*i, N);
        Acoeff[Alen] = nmod_mul(Bcoeff[i], c, ctx->mod);
        Alen += (Acoeff[Alen] != UWORD(0));
    }

    A->length = Alen;
}


void nmod_mpoly_scalar_mul_ui(nmod_mpoly_t A, const nmod_mpoly_t B,
                                           ulong c, const nmod_mpoly_ctx_t ctx)
{
    if (c >= ctx->mod.n)
        NMOD_RED(c, c, ctx->mod);

    if (c == 0 || nmod_mpoly_is_zero(B, ctx))
    {
        nmod_mpoly_zero(A, ctx);
        return;
    }

    if (n_gcd(c, ctx->mod.n) == 1)
        nmod_mpoly_scalar_mul_nmod_invertible(A, B, c, ctx);
    else
        nmod_mpoly_scalar_mul_nmod_general(A, B, c, ctx);

    return;
}
