/*
 * CDDL HEADER START
 *
 * The contents of this file are subject to the terms of the
 * Common Development and Distribution License (the "License").
 * You may not use this file except in compliance with the License.
 *
 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
 * or http://www.opensolaris.org/os/licensing.
 * See the License for the specific language governing permissions
 * and limitations under the License.
 *
 * When distributing Covered Code, include this CDDL HEADER in each
 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
 * If applicable, add the following below this CDDL HEADER, with the
 * fields enclosed by brackets "[]" replaced with your own identifying
 * information: Portions Copyright [yyyy] [name of copyright owner]
 *
 * CDDL HEADER END
 */

/*
 * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
 * Use is subject to license terms.
 */

#pragma ident	"@(#)mlib_v_ImageZoom_NN_f.c	9.2	07/11/05 SMI"

/*
 * FUNCTION
 *      mlib_ImageZoom - image scaling with edge condition
 *
 * SYNOPSIS
 *      mlib_status mlib_ImageZoom(mlib_image        *dst,
 *                                 const mlib_image  *src,
 *                                 mlib_f32          zoomx,
 *                                 mlib_f32          zoomy,
 *                                 mlib_filter       filter,
 *                                 mlib_edge         edge)
 *
 * ARGUMENTS
 *      dst       Pointer to destination image
 *      src       Pointer to source image
 *      zoomx     X zoom factor.
 *      zoomy     Y zoom factor.
 *      filter    Type of resampling filter.
 *      edge      Type of edge condition.
 *
 * DESCRIPTION
 *  The center of the source image is mapped to the center of the
 *  destination image.
 *
 *  The upper-left corner pixel of an image is located at (0.5, 0.5).
 *
 *  The resampling filter can be one of the following:
 *    MLIB_NEAREST
 *    MLIB_BILINEAR
 *    MLIB_BICUBIC
 *    MLIB_BICUBIC2
 *
 *  The edge condition can be one of the following:
 *    MLIB_EDGE_DST_NO_WRITE  (default)
 *    MLIB_EDGE_DST_FILL_ZERO
 *    MLIB_EDGE_OP_NEAREST
 *    MLIB_EDGE_SRC_EXTEND
 *    MLIB_EDGE_SRC_PADDED
 */

#include <mlib_image.h>
#include <vis_proto.h>

/* *********************************************************** */

#define	_MLIB_VIS_VER_

#include <mlib_ImageZoom.h>

/* *********************************************************** */

#define	VARIABLE(FORMAT)                                             \
	mlib_s32 j,                                                  \
		dx = GetElemStruct(DX),                              \
		dy = GetElemStruct(DY),                              \
		x = GetElemSubStruct(current, srcX),                 \
		y = GetElemSubStruct(current, srcY),                 \
		src_stride = GetElemStruct(src_stride),              \
		dst_stride = GetElemStruct(dst_stride),              \
		width  = GetElemSubStruct(current, width),           \
		height = GetElemSubStruct(current, height);          \
	FORMAT *sp = GetElemSubStruct(current, sp),                  \
		*dp = GetElemSubStruct(current, dp)

/* *********************************************************** */

mlib_status
mlib_ImageZoom_U8_1_Nearest(
    mlib_work_image *param)
{
	VARIABLE(mlib_u8);
	mlib_u8 *dl = dp;
	mlib_s32 y0 = -1;
	mlib_u8 *tsp = sp;

	y = GetElemSubStruct(current, srcY) & MLIB_MASK;

	for (j = 0; j < height; j++) {

		if ((y0 >> MLIB_SHIFT) == (y >> MLIB_SHIFT)) {
			mlib_ImageCopy_na((mlib_u8 *)dl - dst_stride, dl,
			    width);
		} else {
			mlib_u8 *dp = dl, *dend = dl + width;
			mlib_s32 s0, s1;

			x = GetElemSubStruct(current, srcX) & MLIB_MASK;

			if ((mlib_addr)dp & 1) {
				(*dp++) = tsp[x >> MLIB_SHIFT];
				x += dx;
			}

			for (; dp <= dend - 2; dp += 2) {
				s0 = tsp[x >> MLIB_SHIFT];
				x += dx;
				s1 = tsp[x >> MLIB_SHIFT];
				x += dx;
				*(mlib_s16 *)dp = (s0 << 8) | s1;
			}

			if (dp < dend) {
				(*dp++) = tsp[x >> MLIB_SHIFT];
			}
		}

		y0 = y;
		y += dy;
		dl += dst_stride;
		tsp = sp + (y >> MLIB_SHIFT) * src_stride;
	}

	return (MLIB_SUCCESS);
}

/* *********************************************************** */

mlib_status
mlib_ImageZoom_U8_3_Nearest(
    mlib_work_image *param)
{
	VARIABLE(mlib_u8);
	mlib_u8 *dl = dp, *tsp, *tt;
	mlib_s32 cx, y0 = -1, dx7 = 7 * dx, dx15 = 8 * dx + dx7;

	tsp = sp;
	y = GetElemSubStruct(current, srcY) & MLIB_MASK;

	for (j = 0; j < height; j++) {

		if ((y0 >> MLIB_SHIFT) == (y >> MLIB_SHIFT)) {
			mlib_ImageCopy_na(dl - dst_stride, dl, 3 * width);
		} else {
			mlib_u8 *dp = dl, *dend = dl + 3 * width;

			vis_write_gsr(7);
			x = GetElemSubStruct(current, srcX) & MLIB_MASK;

			while (((mlib_addr)dp & 7) && (dp < dend)) {
				cx = x >> MLIB_SHIFT;
				tt = tsp + 2 * cx + cx;
				dp[0] = tt[0];
				dp[1] = tt[1];
				dp[2] = tt[2];
				x += dx;
				dp += 3;
			}

			x += dx7;
#pragma pipeloop(0)
			for (; dp <= dend - 24; dp += 24) {
				mlib_d64 s0, s1, s2;

				cx = x >> MLIB_SHIFT;
				tt = tsp + 2 * cx + cx;
				x -= dx;
				s2 = vis_faligndata(vis_ld_u8_i(tt, 2), s2);
				s2 = vis_faligndata(vis_ld_u8_i(tt, 1), s2);
				s2 = vis_faligndata(vis_ld_u8_i(tt, 0), s2);

				cx = x >> MLIB_SHIFT;
				tt = tsp + 2 * cx + cx;
				x -= dx;
				s2 = vis_faligndata(vis_ld_u8_i(tt, 2), s2);
				s2 = vis_faligndata(vis_ld_u8_i(tt, 1), s2);
				s2 = vis_faligndata(vis_ld_u8_i(tt, 0), s2);

				cx = x >> MLIB_SHIFT;
				tt = tsp + 2 * cx + cx;
				x -= dx;
				s2 = vis_faligndata(vis_ld_u8_i(tt, 2), s2);
				s2 = vis_faligndata(vis_ld_u8_i(tt, 1), s2);
				s1 = vis_faligndata(vis_ld_u8_i(tt, 0), s1);

				cx = x >> MLIB_SHIFT;
				tt = tsp + 2 * cx + cx;
				x -= dx;
				s1 = vis_faligndata(vis_ld_u8_i(tt, 2), s1);
				s1 = vis_faligndata(vis_ld_u8_i(tt, 1), s1);
				s1 = vis_faligndata(vis_ld_u8_i(tt, 0), s1);

				cx = x >> MLIB_SHIFT;
				tt = tsp + 2 * cx + cx;
				x -= dx;
				s1 = vis_faligndata(vis_ld_u8_i(tt, 2), s1);
				s1 = vis_faligndata(vis_ld_u8_i(tt, 1), s1);
				s1 = vis_faligndata(vis_ld_u8_i(tt, 0), s1);

				cx = x >> MLIB_SHIFT;
				tt = tsp + 2 * cx + cx;
				x -= dx;
				s1 = vis_faligndata(vis_ld_u8_i(tt, 2), s1);
				s0 = vis_faligndata(vis_ld_u8_i(tt, 1), s0);
				s0 = vis_faligndata(vis_ld_u8_i(tt, 0), s0);

				cx = x >> MLIB_SHIFT;
				tt = tsp + 2 * cx + cx;
				x -= dx;
				s0 = vis_faligndata(vis_ld_u8_i(tt, 2), s0);
				s0 = vis_faligndata(vis_ld_u8_i(tt, 1), s0);
				s0 = vis_faligndata(vis_ld_u8_i(tt, 0), s0);

				cx = x >> MLIB_SHIFT;
				tt = tsp + 2 * cx + cx;
				x += dx15;
				s0 = vis_faligndata(vis_ld_u8_i(tt, 2), s0);
				s0 = vis_faligndata(vis_ld_u8_i(tt, 1), s0);
				s0 = vis_faligndata(vis_ld_u8_i(tt, 0), s0);

				((mlib_d64 *)dp)[0] = s0;
				((mlib_d64 *)dp)[1] = s1;
				((mlib_d64 *)dp)[2] = s2;
			}

			x -= dx7;

			while (dp < dend) {
				cx = x >> MLIB_SHIFT;
				tt = tsp + 2 * cx + cx;
				dp[0] = tt[0];
				dp[1] = tt[1];
				dp[2] = tt[2];
				x += dx;
				dp += 3;
			}
		}

		y0 = y;
		y += dy;
		dl = (void *)((mlib_u8 *)dl + dst_stride);
		tsp = (void *)((mlib_u8 *)sp + (y >> MLIB_SHIFT) * src_stride);
	}

	return (MLIB_SUCCESS);
}

/* *********************************************************** */

mlib_status
mlib_ImageZoom_S16_3_Nearest(
    mlib_work_image *param)
{
	VARIABLE(mlib_s16);
	mlib_s16 *dl = dp, *tsp, *tt;
	mlib_s32 cx, y0 = -1, dx3 = 3 * dx, dx7 = 4 * dx + dx3;

	tsp = sp;
	y = GetElemSubStruct(current, srcY) & MLIB_MASK;

	for (j = 0; j < height; j++) {

		if ((y0 >> MLIB_SHIFT) == (y >> MLIB_SHIFT)) {
			mlib_ImageCopy_na((void *)((mlib_u8 *)dl - dst_stride),
			    (void *)dl, 6 * width);
		} else {
			mlib_s16 *dp = dl, *dend = dl + 3 * width;

			vis_write_gsr(6);
			x = GetElemSubStruct(current, srcX) & MLIB_MASK;

			while (((mlib_addr)dp & 7) && (dp < dend)) {
				cx = x >> MLIB_SHIFT;
				tt = tsp + 2 * cx + cx;
				dp[0] = tt[0];
				dp[1] = tt[1];
				dp[2] = tt[2];
				x += dx;
				dp += 3;
			}

			x += dx3;

#pragma pipeloop(0)
			for (; dp <= dend - 12; dp += 12) {
				mlib_d64 s0, s1, s2;

				cx = x >> MLIB_SHIFT;
				tt = tsp + 2 * cx + cx;
				x -= dx;
				s2 = vis_faligndata(vis_ld_u16_i(tt, 4), s2);
				s2 = vis_faligndata(vis_ld_u16_i(tt, 2), s2);
				s2 = vis_faligndata(vis_ld_u16_i(tt, 0), s2);

				cx = x >> MLIB_SHIFT;
				tt = tsp + 2 * cx + cx;
				x -= dx;
				s2 = vis_faligndata(vis_ld_u16_i(tt, 4), s2);
				s1 = vis_faligndata(vis_ld_u16_i(tt, 2), s1);
				s1 = vis_faligndata(vis_ld_u16_i(tt, 0), s1);

				cx = x >> MLIB_SHIFT;
				tt = tsp + 2 * cx + cx;
				x -= dx;
				s1 = vis_faligndata(vis_ld_u16_i(tt, 4), s1);
				s1 = vis_faligndata(vis_ld_u16_i(tt, 2), s1);
				s0 = vis_faligndata(vis_ld_u16_i(tt, 0), s0);

				cx = x >> MLIB_SHIFT;
				tt = tsp + 2 * cx + cx;
				x += dx7;
				s0 = vis_faligndata(vis_ld_u16_i(tt, 4), s0);
				s0 = vis_faligndata(vis_ld_u16_i(tt, 2), s0);
				s0 = vis_faligndata(vis_ld_u16_i(tt, 0), s0);

				((mlib_d64 *)dp)[0] = s0;
				((mlib_d64 *)dp)[1] = s1;
				((mlib_d64 *)dp)[2] = s2;
			}

			x -= dx3;

			while (dp < dend) {
				cx = x >> MLIB_SHIFT;
				tt = tsp + 2 * cx + cx;
				dp[0] = tt[0];
				dp[1] = tt[1];
				dp[2] = tt[2];
				x += dx;
				dp += 3;
			}
		}

		y0 = y;
		y += dy;
		dl = (void *)((mlib_u8 *)dl + dst_stride);
		tsp = (void *)((mlib_u8 *)sp + (y >> MLIB_SHIFT) * src_stride);
	}

	return (MLIB_SUCCESS);
}

/* *********************************************************** */

mlib_status
mlib_ImageZoom_S16_1_Nearest(
    mlib_work_image *param)
{
	VARIABLE(mlib_u16);
	mlib_u16 *dl = dp, *tsp;
	mlib_s32 y0 = -1, dx3 = 3 * dx, dx7 = 4 * dx + dx3;

	tsp = sp;
	y = GetElemSubStruct(current, srcY) & MLIB_MASK;

	for (j = 0; j < height; j++) {

		if ((y0 >> MLIB_SHIFT) == (y >> MLIB_SHIFT)) {
			mlib_ImageCopy_na((void *)((mlib_u8 *)dl - dst_stride),
			    (void *)dl, 2 * width);
		} else {
			mlib_u16 *dp = dl, *dend = dl + width;

			vis_write_gsr(6);
			x = GetElemSubStruct(current, srcX) & MLIB_MASK;

			while (((mlib_addr)dp & 7) && (dp < dend)) {
				(*dp++) = tsp[x >> MLIB_SHIFT];
				x += dx;
			}

			x += dx3;

#pragma pipeloop(0)
			for (; dp <= dend - 4; dp += 4) {
				mlib_d64 s0;

				s0 = vis_faligndata(vis_ld_u16_i(tsp,
				    (x >> (MLIB_SHIFT - 1)) & ~1), s0);
				x -= dx;
				s0 = vis_faligndata(vis_ld_u16_i(tsp,
				    (x >> (MLIB_SHIFT - 1)) & ~1), s0);
				x -= dx;
				s0 = vis_faligndata(vis_ld_u16_i(tsp,
				    (x >> (MLIB_SHIFT - 1)) & ~1), s0);
				x -= dx;
				s0 = vis_faligndata(vis_ld_u16_i(tsp,
				    (x >> (MLIB_SHIFT - 1)) & ~1), s0);
				x += dx7;

				*(mlib_d64 *)dp = s0;
			}

			x -= dx3;

			while (dp < dend) {
				(*dp++) = tsp[x >> MLIB_SHIFT];
				x += dx;
			}
		}

		y0 = y;
		y += dy;
		dl = (void *)((mlib_u8 *)dl + dst_stride);
		tsp = (void *)((mlib_u8 *)sp + (y >> MLIB_SHIFT) * src_stride);
	}

	return (MLIB_SUCCESS);
}

/* *********************************************************** */
