JDK14/Java14源码在线阅读

/*
 * Copyright (c) 2003, 2008, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
 * under the terms of the GNU General Public License version 2 only, as
 * published by the Free Software Foundation.  Oracle designates this
 * particular file as subject to the "Classpath" exception as provided
 * by Oracle in the LICENSE file that accompanied this code.
 *
 * This code is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 * version 2 for more details (a copy is included in the LICENSE file that
 * accompanied this code).
 *
 * You should have received a copy of the GNU General Public License version
 * 2 along with this work; if not, write to the Free Software Foundation,
 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 *
 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 * or visit www.oracle.com if you need additional information or have any
 * questions.
 */

#if !defined(JAVA2D_NO_MLIB) || defined(MLIB_ADD_SUFF)

#include "vis_AlphaMacros.h"

/***************************************************************/

#define Gray2Argb(x)   \
    0xff000000 | (x << 16) | (x << 8) | x

/***************************************************************/

#if VIS >= 0x200

#define BMASK_FOR_ARGB         \
    vis_write_bmask(0x03214765, 0);

#else

#define BMASK_FOR_ARGB

#endif

/***************************************************************/

#define RGB2ABGR_DB(x)         \
    x = vis_for(x, amask);     \
    ARGB2ABGR_DB(x)

/***************************************************************/

#define INSERT_U8_34R                                          \
    sda = vis_fpmerge(vis_read_hi(sd0), vis_read_lo(sd1));     \
    sdb = vis_fpmerge(vis_read_lo(sd0), vis_read_hi(sd2));     \
    sdc = vis_fpmerge(vis_read_hi(sd1), vis_read_lo(sd2));     \
    sdd = vis_fpmerge(vis_read_hi(sda), vis_read_lo(sdb));     \
    sde = vis_fpmerge(vis_read_lo(sda), vis_read_hi(sdc));     \
    sdf = vis_fpmerge(vis_read_hi(sdb), vis_read_lo(sdc));     \
    sdg = vis_fpmerge(vis_read_hi(sdd), vis_read_lo(sde));     \
    sdh = vis_fpmerge(vis_read_lo(sdd), vis_read_hi(sdf));     \
    sdi = vis_fpmerge(vis_read_hi(sde), vis_read_lo(sdf));     \
    sdj = vis_fpmerge(vis_read_hi(sdg), vis_read_hi(sdi));     \
    sdk = vis_fpmerge(vis_read_lo(sdg), vis_read_lo(sdi));     \
    sdl = vis_fpmerge(vis_read_hi(sFF), vis_read_hi(sdh));     \
    sdm = vis_fpmerge(vis_read_lo(sFF), vis_read_lo(sdh));     \
    dd0 = vis_fpmerge(vis_read_hi(sdl), vis_read_hi(sdj));     \
    dd1 = vis_fpmerge(vis_read_lo(sdl), vis_read_lo(sdj));     \
    dd2 = vis_fpmerge(vis_read_hi(sdm), vis_read_hi(sdk));     \
    dd3 = vis_fpmerge(vis_read_lo(sdm), vis_read_lo(sdk))

/***************************************************************/

void IntArgbToIntAbgrConvert_line(mlib_s32 *srcBase,
                                  mlib_s32 *dstBase,
                                  mlib_s32 width)
{
    mlib_s32 *dst_end = dstBase + width;
    mlib_d64 dd;
    mlib_f32 ff;

    BMASK_FOR_ARGB

    if ((mlib_s32)srcBase & 7) {
        ff = *(mlib_f32*)srcBase;
        ARGB2ABGR_FL(ff)
        *(mlib_f32*)dstBase = ff;
        srcBase++;
        dstBase++;
    }

    if ((mlib_s32)dstBase & 7) {
#pragma pipeloop(0)
        for (; dstBase <= (dst_end - 2); dstBase += 2) {
            dd = *(mlib_d64*)srcBase;
            ARGB2ABGR_DB(dd)
            ((mlib_f32*)dstBase)[0] = vis_read_hi(dd);
            ((mlib_f32*)dstBase)[1] = vis_read_lo(dd);
            srcBase += 2;
        }
    } else {
#pragma pipeloop(0)
        for (; dstBase <= (dst_end - 2); dstBase += 2) {
            dd = *(mlib_d64*)srcBase;
            ARGB2ABGR_DB(dd)
            *(mlib_d64*)dstBase = dd;
            srcBase += 2;
        }
    }

    if (dstBase < dst_end) {
        ff = *(mlib_f32*)srcBase;
        ARGB2ABGR_FL(ff)
        *(mlib_f32*)dstBase = ff;
    }
}

/***************************************************************/

void ADD_SUFF(FourByteAbgrToIntArgbConvert)(BLIT_PARAMS)
{
    mlib_u32 *argb = (mlib_u32 *)dstBase;
    mlib_u8  *pabgr = (mlib_u8 *)srcBase;
    mlib_s32 dstScan = (pDstInfo)->scanStride;
    mlib_s32 srcScan = (pSrcInfo)->scanStride;
    mlib_s32 i, j, count, left;
    mlib_d64 w_abgr;

    if (width < 16) {
        for (j = 0; j < height; j++) {
            mlib_u8  *src = srcBase;
            mlib_s32 *dst = dstBase;

            for (i = 0; i < width; i++) {
                *dst++ = (src[0] << 24) | (src[3] << 16) |
                         (src[2] << 8) | (src[1]);
                src += 4;
            }

            PTR_ADD(dstBase, dstScan);
            PTR_ADD(srcBase, srcScan);
        }
        return;
    }

    if (dstScan == 4*width && srcScan == dstScan) {
        width *= height;
        height = 1;
    }
    count = width >> 1;
    left = width & 1;

    BMASK_FOR_ARGB

    if ((((mlib_addr)pabgr & 3) == 0) && ((srcScan & 3) == 0)) {
        mlib_u32 *abgr = (mlib_u32 *)pabgr;

        dstScan >>= 2;
        srcScan >>= 2;

        for (i = 0; i < height; i++, argb += dstScan, abgr += srcScan) {
            if ((((mlib_addr) argb | (mlib_addr) abgr) & 7) == 0) {
                mlib_d64 *d_abgr = (mlib_d64 *) abgr;
                mlib_d64 *d_argb = (mlib_d64 *) argb;

#pragma pipeloop(0)
                for (j = 0; j < count; j++) {
                    w_abgr = d_abgr[j];
                    ARGB2ABGR_DB(w_abgr)
                    d_argb[j] = w_abgr;
                }

                if (left) {
                    w_abgr = d_abgr[count];
                    ARGB2ABGR_DB(w_abgr)
                    ((mlib_f32 *) argb)[2 * count] = vis_read_hi(w_abgr);
                }
            } else {
                mlib_f32 v_abgr0, v_abgr1;

#pragma pipeloop(0)
                for (j = 0; j < count; j++) {
                    v_abgr0 = ((mlib_f32 *) abgr)[2 * j];
                    v_abgr1 = ((mlib_f32 *) abgr)[2 * j + 1];
                    w_abgr = vis_freg_pair(v_abgr0, v_abgr1);
                    ARGB2ABGR_DB(w_abgr)
                    ((mlib_f32 *) argb)[2 * j] = vis_read_hi(w_abgr);
                    ((mlib_f32 *) argb)[2 * j + 1] = vis_read_lo(w_abgr);
                }

                if (left) {
                    v_abgr0 = ((mlib_f32 *) abgr)[2 * count];
                    w_abgr = vis_freg_pair(v_abgr0, 0);
                    ARGB2ABGR_DB(w_abgr)
                    ((mlib_f32 *) argb)[2 * count] = vis_read_hi(w_abgr);
                }
            }
        }
    } else {      /* abgr is not aligned */
        mlib_u8 *abgr = pabgr;
        mlib_d64 *d_abgr, db0, db1;

        dstScan >>= 2;

        for (i = 0; i < height; i++, argb += dstScan, abgr += srcScan) {
            d_abgr = vis_alignaddr(abgr, 0);
            db0 = *d_abgr++;

            if (((mlib_addr) argb & 7) == 0) {
                mlib_d64 *d_argb = (mlib_d64 *) argb;

#pragma pipeloop(0)
                for (j = 0; j < count; j++) {
                    db1 = d_abgr[j];
                    w_abgr = vis_faligndata(db0, db1);
                    db0 = db1;
                    ARGB2ABGR_DB(w_abgr)
                    d_argb[j] = w_abgr;
                }

                if (left) {
                    db1 = d_abgr[j];
                    w_abgr = vis_faligndata(db0, db1);
                    ARGB2ABGR_DB(w_abgr)
                    ((mlib_f32 *) argb)[2 * count] = vis_read_hi(w_abgr);
                }
            } else {
                mlib_d64 w_abgr;

                db1 = *d_abgr++;
                w_abgr = vis_faligndata(db0, db1);
                db0 = db1;
#pragma pipeloop(0)
                for (j = 0; j < count; j++) {
                    ARGB2ABGR_DB(w_abgr)
                    ((mlib_f32 *) argb)[2 * j] = vis_read_hi(w_abgr);
                    ((mlib_f32 *) argb)[2 * j + 1] = vis_read_lo(w_abgr);
                    db1 = d_abgr[j];
                    w_abgr = vis_faligndata(db0, db1);
                    db0 = db1;
                }

                if (left) {
                    ARGB2ABGR_DB(w_abgr)
                    ((mlib_f32 *) argb)[2 * count] = vis_read_hi(w_abgr);
                }
            }
        }
    }
}

/***************************************************************/

void ADD_SUFF(IntArgbToFourByteAbgrConvert)(BLIT_PARAMS)
{
    mlib_u32 *argb = (mlib_u32 *)srcBase;
    mlib_u8 *abgr = (mlib_u8 *)dstBase;
    mlib_s32 dstScan = (pDstInfo)->scanStride;
    mlib_s32 srcScan = (pSrcInfo)->scanStride;
    mlib_s32 i, j, count, left;
    mlib_d64 w_abgr;

    if (width < 16) {
        for (j = 0; j < height; j++) {
            mlib_s32 *src = srcBase;
            mlib_u8  *dst = dstBase;

            for (i = 0; i < width; i++) {
                mlib_u32 x = *src++;
                dst[0] = x >> 24;
                dst[1] = x;
                dst[2] = x >> 8;
                dst[3] = x >> 16;
                dst += 4;
            }

            PTR_ADD(dstBase, dstScan);
            PTR_ADD(srcBase, srcScan);
        }
        return;
    }

    if (dstScan == 4*width && srcScan == dstScan) {
        width *= height;
        height = 1;
    }
    count = width >> 1;
    left = width & 1;

    BMASK_FOR_ARGB

    srcScan >>= 2;

    for (i = 0; i < height; i++, argb += srcScan, abgr += dstScan) {

        if ((((mlib_addr) abgr | (mlib_addr) argb) & 7) == 0) {
            mlib_d64 *d_argb = (mlib_d64 *) argb;
            mlib_d64 *d_abgr = (mlib_d64 *) abgr;

#pragma pipeloop(0)
            for (j = 0; j < count; j++) {
                w_abgr = d_argb[j];
                ARGB2ABGR_DB(w_abgr)
                d_abgr[j] = w_abgr;
            }

            if (left) {
                w_abgr = d_argb[count];
                ARGB2ABGR_DB(w_abgr)
                ((mlib_f32 *) abgr)[2 * count] = vis_read_hi(w_abgr);
            }

        } else if (((mlib_addr) abgr & 3) == 0) {
            mlib_f32 v_argb0, v_argb1;

#pragma pipeloop(0)
            for (j = 0; j < count; j++) {
                v_argb0 = ((mlib_f32 *) argb)[2 * j];
                v_argb1 = ((mlib_f32 *) argb)[2 * j + 1];
                w_abgr = vis_freg_pair(v_argb0, v_argb1);

                ARGB2ABGR_DB(w_abgr)
                ((mlib_f32 *) abgr)[2 * j] = vis_read_hi(w_abgr);
                ((mlib_f32 *) abgr)[2 * j + 1] = vis_read_lo(w_abgr);
            }

            if (left) {
                v_argb0 = ((mlib_f32 *) argb)[2 * count];
                w_abgr = vis_freg_pair(v_argb0, vis_fzeros());

                ARGB2ABGR_DB(w_abgr)
                ((mlib_f32 *) abgr)[2 * count] = vis_read_hi(w_abgr);
            }

        } else {      /* abgr is not aligned */

            mlib_u8 *pend = abgr + (width << 2) - 1;
            mlib_d64 *d_abgr, db0, db1;
            mlib_s32 emask, off;
            mlib_f32 *f_argb = (mlib_f32 *) argb;

            off = (mlib_addr)abgr & 7;
            vis_alignaddr((void *)(8 - off), 0);
            d_abgr = (mlib_d64 *) (abgr - off);

            db1 = vis_freg_pair(*f_argb++, *f_argb++);
            ARGB2ABGR_DB(db1)
            w_abgr = vis_faligndata(db1, db1);
            emask = vis_edge8(abgr, pend);
            vis_pst_8(w_abgr, d_abgr++, emask);
            db0 = db1;

            db1 = vis_freg_pair(f_argb[0], f_argb[1]);
#pragma pipeloop(0)
            for (; (mlib_addr)d_abgr < (mlib_addr)(pend - 6); ) {
                ARGB2ABGR_DB(db1)
                w_abgr = vis_faligndata(db0, db1);
                *d_abgr++ = w_abgr;
                db0 = db1;
                f_argb += 2;
                db1 = vis_freg_pair(f_argb[0], f_argb[1]);
            }

            if ((mlib_addr)d_abgr <= (mlib_addr)pend) {
                ARGB2ABGR_DB(db1)
                w_abgr = vis_faligndata(db0, db1);
                emask = vis_edge8(d_abgr, pend);
                vis_pst_8(w_abgr, d_abgr, emask);
            }
        }
    }
}

/***************************************************************/

void ADD_SUFF(IntRgbToFourByteAbgrConvert)(BLIT_PARAMS)
{
    mlib_u32 *argb = (mlib_u32 *)srcBase;
    mlib_u8  *abgr = (mlib_u8 *)dstBase;
    mlib_s32 dstScan = (pDstInfo)->scanStride;
    mlib_s32 srcScan = (pSrcInfo)->scanStride;
    mlib_s32 i, j, count, left;
    mlib_d64 w_abgr;
    mlib_d64 amask = vis_to_double_dup(0xFF000000);

    if (width < 16) {
        for (j = 0; j < height; j++) {
            mlib_s32 *src = srcBase;
            mlib_u8  *dst = dstBase;

            for (i = 0; i < width; i++) {
                mlib_u32 x = *src++;
                dst[0] = 0xFF;
                dst[1] = x;
                dst[2] = x >> 8;
                dst[3] = x >> 16;
                dst += 4;
            }

            PTR_ADD(dstBase, dstScan);
            PTR_ADD(srcBase, srcScan);
        }
        return;
    }

    if (dstScan == 4*width && srcScan == dstScan) {
        width *= height;
        height = 1;
    }
    count = width >> 1;
    left = width & 1;

    BMASK_FOR_ARGB

    srcScan >>= 2;

    for (i = 0; i < height; i++, argb += srcScan, abgr += dstScan) {

        if ((((mlib_addr) abgr | (mlib_addr) argb) & 7) == 0) {
            mlib_d64 *d_argb = (mlib_d64 *) argb;
            mlib_d64 *d_abgr = (mlib_d64 *) abgr;

#pragma pipeloop(0)
            for (j = 0; j < count; j++) {
                w_abgr = d_argb[j];
                RGB2ABGR_DB(w_abgr)
                d_abgr[j] = w_abgr;
            }

            if (left) {
                w_abgr = d_argb[count];
                RGB2ABGR_DB(w_abgr)
                ((mlib_f32 *) abgr)[2 * count] = vis_read_hi(w_abgr);
            }

        } else if (((mlib_addr) abgr & 3) == 0) {
            mlib_f32 v_argb0, v_argb1;

#pragma pipeloop(0)
            for (j = 0; j < count; j++) {
                v_argb0 = ((mlib_f32 *) argb)[2 * j];
                v_argb1 = ((mlib_f32 *) argb)[2 * j + 1];
                w_abgr = vis_freg_pair(v_argb0, v_argb1);

                RGB2ABGR_DB(w_abgr)
                ((mlib_f32 *) abgr)[2 * j] = vis_read_hi(w_abgr);
                ((mlib_f32 *) abgr)[2 * j + 1] = vis_read_lo(w_abgr);
            }

            if (left) {
                v_argb0 = ((mlib_f32 *) argb)[2 * count];
                w_abgr = vis_freg_pair(v_argb0, vis_fzeros());

                RGB2ABGR_DB(w_abgr)
                ((mlib_f32 *) abgr)[2 * count] = vis_read_hi(w_abgr);
            }

        } else {      /* abgr is not aligned */

            mlib_u8 *pend = abgr + (width << 2) - 1;
            mlib_d64 *d_abgr, db0, db1;
            mlib_s32 emask, off;
            mlib_f32 *f_argb = (mlib_f32 *) argb;

            off = (mlib_addr)abgr & 7;
            vis_alignaddr((void *)(8 - off), 0);
            d_abgr = (mlib_d64 *) (abgr - off);

            db1 = vis_freg_pair(*f_argb++, *f_argb++);
            RGB2ABGR_DB(db1)
            w_abgr = vis_faligndata(db1, db1);
            emask = vis_edge8(abgr, pend);
            vis_pst_8(w_abgr, d_abgr++, emask);
            db0 = db1;

            db1 = vis_freg_pair(f_argb[0], f_argb[1]);
#pragma pipeloop(0)
            for (; (mlib_addr)d_abgr < (mlib_addr)(pend - 6); ) {
                RGB2ABGR_DB(db1)
                w_abgr = vis_faligndata(db0, db1);
                *d_abgr++ = w_abgr;
                db0 = db1;
                f_argb += 2;
                db1 = vis_freg_pair(f_argb[0], f_argb[1]);
            }

            if ((mlib_addr)d_abgr <= (mlib_addr)pend) {
                RGB2ABGR_DB(db1)
                w_abgr = vis_faligndata(db0, db1);
                emask = vis_edge8(d_abgr, pend);
                vis_pst_8(w_abgr, d_abgr, emask);
            }
        }
    }
}

/***************************************************************/

void ADD_SUFF(ThreeByteBgrToFourByteAbgrConvert)(BLIT_PARAMS)
{
    mlib_s32 dstScan = pDstInfo->scanStride;
    mlib_s32 srcScan = pSrcInfo->scanStride;
    mlib_d64 sd0, sd1, sd2;
    mlib_d64 dd0, dd1, dd2, dd3;
    mlib_d64 sda, sdb, sdc, sdd;
    mlib_d64 sde, sdf, sdg, sdh;
    mlib_d64 sdi, sdj, sdk, sdl;
    mlib_d64 sdm;
    mlib_d64 sFF;
    mlib_s32 r, g, b;
    mlib_s32 i, j;

    if (width < 16) {
        for (j = 0; j < height; j++) {
            mlib_u8 *src = srcBase;
            mlib_u8 *dst = dstBase;

#pragma pipeloop(0)
            for (i = 0; i < width; i++) {
                dst[0] = 0xFF;
                dst[1] = src[0];
                dst[2] = src[1];
                dst[3] = src[2];
                src += 3;
                dst += 4;
            }

            PTR_ADD(dstBase, dstScan);
            PTR_ADD(srcBase, srcScan);
        }
        return;
    }

    if (dstScan == 4*width && srcScan == 3*width) {
        width *= height;
        height = 1;
    }

    sFF = vis_fone();

    for (j = 0; j < height; j++) {
        mlib_u8 *pSrc = srcBase;
        mlib_u8 *pDst = dstBase;

        if (!(((mlib_s32)pSrc | (mlib_s32)pDst) & 7)) {
#pragma pipeloop(0)
            for (i = 0; i <= ((mlib_s32)width - 8); i += 8) {
                sd0 = ((mlib_d64*)pSrc)[0];
                sd1 = ((mlib_d64*)pSrc)[1];
                sd2 = ((mlib_d64*)pSrc)[2];
                pSrc += 3*8;
                INSERT_U8_34R;
                ((mlib_d64*)pDst)[0] = dd0;
                ((mlib_d64*)pDst)[1] = dd1;
                ((mlib_d64*)pDst)[2] = dd2;
                ((mlib_d64*)pDst)[3] = dd3;
                pDst += 4*8;
            }

            for (; i < width; i++) {
                b = pSrc[0];
                g = pSrc[1];
                r = pSrc[2];
                ((mlib_u16*)pDst)[0] = 0xff00 | b;
                ((mlib_u16*)pDst)[1] = (g << 8) | r;
                pSrc += 3;
                pDst += 4;
            }
        } else if (!((mlib_s32)pDst & 1)) {
#pragma pipeloop(0)
            for (i = 0; i < width; i++) {
                b = pSrc[0];
                g = pSrc[1];
                r = pSrc[2];
                ((mlib_u16*)pDst)[0] = 0xff00 | b;
                ((mlib_u16*)pDst)[1] = (g << 8) | r;
                pSrc += 3;
                pDst += 4;
            }
        } else {
            *pDst++ = 0xff;
#pragma pipeloop(0)
            for (i = 0; i < (mlib_s32)width - 1; i++) {
                b = pSrc[0];
                g = pSrc[1];
                r = pSrc[2];
                ((mlib_u16*)pDst)[0] = (b << 8) | g;
                ((mlib_u16*)pDst)[1] = (r << 8) | 0xff;
                pSrc += 3;
                pDst += 4;
            }
            if (width) {
                pDst[0] = pSrc[0];
                pDst[1] = pSrc[1];
                pDst[2] = pSrc[2];
            }
        }

        PTR_ADD(dstBase, dstScan);
        PTR_ADD(srcBase, srcScan);
    }
}

/***************************************************************/

#if 1

#define LOAD_BGR(dd) {                                 \
    mlib_u8  *sp = pSrc - 1 + 3*(tmpsxloc >> shift);   \
    mlib_d64 *ap = (void*)((mlib_addr)sp &~ 7);        \
    vis_alignaddr(sp, 0);                              \
    dd = vis_faligndata(ap[0], ap[1]);                 \
    tmpsxloc += sxinc;                                 \
}

#else

#define LOAD_BGR(dd) {                                 \
    mlib_u8 *sp = pSrc + 3*(tmpsxloc >> shift);        \
    dd = vis_faligndata(vis_ld_u8(sp + 2), dd);        \
    dd = vis_faligndata(vis_ld_u8(sp + 1), dd);        \
    dd = vis_faligndata(vis_ld_u8(sp    ), dd);        \
    dd = vis_faligndata(amask, dd);                    \
    tmpsxloc += sxinc;                                 \
}

#endif

/***************************************************************/

void ADD_SUFF(ThreeByteBgrToFourByteAbgrScaleConvert)(SCALE_PARAMS)
{
    mlib_s32 dstScan = pDstInfo->scanStride;
    mlib_s32 srcScan = pSrcInfo->scanStride;
    mlib_d64 d0;
    mlib_d64 amask;
    mlib_s32 r, g, b;
    mlib_s32 i, j;

    if (width < 16 /*|| (((mlib_s32)dstBase | dstScan) & 3)*/) {
        for (j = 0; j < height; j++) {
            mlib_u8  *pSrc = srcBase;
            mlib_u8  *pDst = dstBase;
            mlib_s32 tmpsxloc = sxloc;

            PTR_ADD(pSrc, (syloc >> shift) * srcScan);

#pragma pipeloop(0)
            for (i = 0; i < width; i++) {
                mlib_u8 *pp = pSrc + 3*(tmpsxloc >> shift);
                pDst[0] = 0xff;
                pDst[1] = pp[0];
                pDst[2] = pp[1];
                pDst[3] = pp[2];
                tmpsxloc += sxinc;
                pDst += 4;
            }

            PTR_ADD(dstBase, dstScan);
            syloc += syinc;
        }
        return;
    }

    vis_alignaddr(NULL, 7);
    amask = vis_to_double_dup(0xFF000000);

    for (j = 0; j < height; j++) {
        mlib_u8 *pSrc = srcBase;
        mlib_u8 *pDst = dstBase;
        mlib_s32 tmpsxloc = sxloc;

        PTR_ADD(pSrc, (syloc >> shift) * srcScan);

        if (!((mlib_s32)pDst & 3)) {
#pragma pipeloop(0)
            for (i = 0; i < width; i++) {
                LOAD_BGR(d0);
                ((mlib_f32*)pDst)[0] = vis_fors(vis_read_hi(d0),
                                                vis_read_hi(amask));
                pDst += 4;
            }
        } else if (!((mlib_s32)pDst & 1)) {
#pragma pipeloop(0)
            for (i = 0; i < width; i++) {
                mlib_u8 *pp = pSrc + 3*(tmpsxloc >> shift);
                tmpsxloc += sxinc;
                b = pp[0];
                g = pp[1];
                r = pp[2];
                ((mlib_u16*)pDst)[2*i    ] = 0xff00 | b;
                ((mlib_u16*)pDst)[2*i + 1] = (g << 8) | r;
            }
        } else {
            *pDst++ = 0xff;
#pragma pipeloop(0)
            for (i = 0; i < (mlib_s32)width - 1; i++) {
                mlib_u8 *pp = pSrc + 3*(tmpsxloc >> shift);
                tmpsxloc += sxinc;
                b = pp[0];
                g = pp[1];
                r = pp[2];
                ((mlib_u16*)pDst)[2*i    ] = (b << 8) | g;
                ((mlib_u16*)pDst)[2*i + 1] = (r << 8) | 0xff;
            }
            if (width) {
                mlib_u8 *pp = pSrc + 3*(tmpsxloc >> shift);
                tmpsxloc += sxinc;
                pDst[4*i  ] = pp[0];
                pDst[4*i+1] = pp[1];
                pDst[4*i+2] = pp[2];
            }
        }

        PTR_ADD(dstBase, dstScan);
        syloc += syinc;
    }
}

/***************************************************************/

void ADD_SUFF(ByteGrayToFourByteAbgrConvert)(BLIT_PARAMS)
{
    mlib_s32 dstScan = pDstInfo->scanStride;
    mlib_s32 srcScan = pSrcInfo->scanStride;
    mlib_d64 d0, d1, d2, d3;
    mlib_f32 ff, aa = vis_fones();
    mlib_s32 i, j, x;

    if (!(((mlib_s32)dstBase | dstScan) & 3)) {
        ADD_SUFF(ByteGrayToIntArgbConvert)(BLIT_CALL_PARAMS);
        return;
    }

    if (width < 16) {
        for (j = 0; j < height; j++) {
            mlib_u8 *src = srcBase;
            mlib_u8 *dst = dstBase;

            for (i = 0; i < width; i++) {
                x = *src++;
                dst[0] = 0xff;
                dst[1] = x;
                dst[2] = x;
                dst[3] = x;
                dst += 4;
            }

            PTR_ADD(dstBase, dstScan);
            PTR_ADD(srcBase, srcScan);
        }
        return;
    }

    if (srcScan == width && dstScan == 4*width) {
        width *= height;
        height = 1;
    }

    for (j = 0; j < height; j++) {
        mlib_u8 *src = srcBase;
        mlib_u8 *dst = dstBase;
        mlib_u8 *dst_end;

        dst_end = dst + 4*width;

        while (((mlib_s32)src & 3) && dst < dst_end) {
            x = *src++;
            dst[0] = 0xff;
            dst[1] = x;
            dst[2] = x;
            dst[3] = x;
            dst += 4;
        }

        if (!((mlib_s32)dst & 3)) {
#pragma pipeloop(0)
            for (; dst <= (dst_end - 4*4); dst += 4*4) {
                ff = *(mlib_f32*)src;
                d0 = vis_fpmerge(aa, ff);
                d1 = vis_fpmerge(ff, ff);
                d2 = vis_fpmerge(vis_read_hi(d0), vis_read_hi(d1));
                d3 = vis_fpmerge(vis_read_lo(d0), vis_read_lo(d1));
                ((mlib_f32*)dst)[0] = vis_read_hi(d2);
                ((mlib_f32*)dst)[1] = vis_read_lo(d2);
                ((mlib_f32*)dst)[2] = vis_read_hi(d3);
                ((mlib_f32*)dst)[3] = vis_read_lo(d3);
                src += 4;
            }
        } else {
            mlib_d64 *dp;

            dp = vis_alignaddr(dst, 0);
            d3 = vis_faligndata(dp[0], dp[0]);
            vis_alignaddrl(dst, 0);

#pragma pipeloop(0)
            for (; dst <= (dst_end - 4*4); dst += 4*4) {
                ff = *(mlib_f32*)src;
                d0 = vis_fpmerge(aa, ff);
                d1 = vis_fpmerge(ff, ff);
                d2 = vis_fpmerge(vis_read_hi(d0), vis_read_hi(d1));
                *dp++ = vis_faligndata(d3, d2);
                d3 = vis_fpmerge(vis_read_lo(d0), vis_read_lo(d1));
                *dp++ = vis_faligndata(d2, d3);
                src += 4;
            }

            vis_pst_8(vis_faligndata(d3, d3), dp, vis_edge8(dp, dst - 1));
        }

        while (dst < dst_end) {
            x = *src++;
            dst[0] = 0xff;
            dst[1] = x;
            dst[2] = x;
            dst[3] = x;
            dst += 4;
        }

        PTR_ADD(dstBase, dstScan);
        PTR_ADD(srcBase, srcScan);
    }
}

/***************************************************************/

void ADD_SUFF(IntArgbToFourByteAbgrXorBlit)(BLIT_PARAMS)
{
    mlib_s32 dstScan = pDstInfo->scanStride;
    mlib_s32 srcScan = pSrcInfo->scanStride;
    mlib_u32 xorpixel = pCompInfo->details.xorPixel;
    mlib_u32 alphamask = pCompInfo->alphaMask;
    mlib_d64 dd, d_xorpixel, d_alphamask, d_zero;
    mlib_s32 i, j, x, neg_mask;

    if (width < 16) {
        xorpixel = (xorpixel << 24) | (xorpixel >> 8);
        alphamask = (alphamask << 24) | (alphamask >> 8);

        for (j = 0; j < height; j++) {
            mlib_s32 *src = srcBase;
            mlib_u8  *dst = dstBase;

            for (i = 0; i < width; i++) {
                x = src[i];
                neg_mask = x >> 31;
                x = (x ^ xorpixel) & (neg_mask &~ alphamask);
                dst[0] ^= x >> 24;
                dst[1] ^= x;
                dst[2] ^= x >> 8;
                dst[3] ^= x >> 16;
                dst += 4;
            }

            PTR_ADD(dstBase, dstScan);
            PTR_ADD(srcBase, srcScan);
        }
        return;
    }

    if (srcScan == 4*width && dstScan == 4*width) {
        width *= height;
        height = 1;
    }

    d_zero = vis_fzero();
    d_xorpixel = vis_freg_pair(vis_ldfa_ASI_PL(&xorpixel),
                               vis_ldfa_ASI_PL(&xorpixel));
    d_alphamask = vis_freg_pair(vis_ldfa_ASI_PL(&alphamask),
                                vis_ldfa_ASI_PL(&alphamask));

    dd = vis_freg_pair(vis_read_hi(d_xorpixel), vis_read_hi(d_alphamask));
    ARGB2ABGR_DB(dd)
    xorpixel = ((mlib_s32*)&dd)[0];
    alphamask = ((mlib_s32*)&dd)[1];

    for (j = 0; j < height; j++) {
        mlib_s32 *src = srcBase;
        mlib_u8  *dst = dstBase;
        mlib_u8  *dst_end;

        dst_end = dst + 4*width;

        if (!((mlib_s32)dst & 7)) {
#pragma pipeloop(0)
            for (; dst <= (dst_end - 8); dst += 8) {
                dd = vis_freg_pair(((mlib_f32*)src)[0], ((mlib_f32*)src)[1]);
                src += 2;
                neg_mask = vis_fcmplt32(dd, d_zero);
                ARGB2ABGR_DB(dd)
                dd = vis_fxor(dd, d_xorpixel);
                dd = vis_fandnot(d_alphamask, dd);
                dd = vis_fxor(dd, *(mlib_d64*)dst);
                vis_pst_32(dd, dst, neg_mask);
            }
        }

        while (dst < dst_end) {
            x = *src++;
            neg_mask = x >> 31;
            x = (x ^ xorpixel) & (neg_mask &~ alphamask);
            dst[0] ^= x >> 24;
            dst[1] ^= x;
            dst[2] ^= x >> 8;
            dst[3] ^= x >> 16;
            dst += 4;
        }

        PTR_ADD(dstBase, dstScan);
        PTR_ADD(srcBase, srcScan);
    }
}

/***************************************************************/

void ADD_SUFF(ByteGrayToFourByteAbgrScaleConvert)(SCALE_PARAMS)
{
    mlib_s32 dstScan = pDstInfo->scanStride;
    mlib_s32 srcScan = pSrcInfo->scanStride;
    mlib_d64 d0, d1, d2, d3, dd;
    mlib_f32 ff, aa;
    mlib_s32 i, j, x;

/*  if (!(((mlib_s32)dstBase | dstScan) & 3)) {
    ADD_SUFF(ByteGrayToIntArgbScaleConvert)(SCALE_CALL_PARAMS);
    return;
    }*/

    if (width < 16) {
        for (j = 0; j < height; j++) {
            mlib_u8 *src = srcBase;
            mlib_u8 *dst = dstBase;
            mlib_s32 tmpsxloc = sxloc;

            PTR_ADD(src, (syloc >> shift) * srcScan);

            for (i = 0; i < width; i++) {
                x = src[tmpsxloc >> shift];
                tmpsxloc += sxinc;
                dst[4*i    ] = 0xff;
                dst[4*i + 1] = x;
                dst[4*i + 2] = x;
                dst[4*i + 3] = x;
            }

            PTR_ADD(dstBase, dstScan);
            syloc += syinc;
        }
        return;
    }

    aa = vis_fones();

    for (j = 0; j < height; j++) {
        mlib_u8 *src = srcBase;
        mlib_u8 *dst = dstBase;
        mlib_u8 *dst_end;
        mlib_s32 tmpsxloc = sxloc;

        PTR_ADD(src, (syloc >> shift) * srcScan);

        dst_end = dst + 4*width;

        if (!((mlib_s32)dst & 3)) {
            vis_alignaddr(NULL, 7);
#pragma pipeloop(0)
            for (; dst <= (dst_end - 4*4); dst += 4*4) {
                LOAD_NEXT_U8(dd, src + ((tmpsxloc + 3*sxinc) >> shift));
                LOAD_NEXT_U8(dd, src + ((tmpsxloc + 2*sxinc) >> shift));
                LOAD_NEXT_U8(dd, src + ((tmpsxloc +   sxinc) >> shift));
                LOAD_NEXT_U8(dd, src + ((tmpsxloc          ) >> shift));
                tmpsxloc += 4*sxinc;
                ff = vis_read_hi(dd);
                d0 = vis_fpmerge(aa, ff);
                d1 = vis_fpmerge(ff, ff);
                d2 = vis_fpmerge(vis_read_hi(d0), vis_read_hi(d1));
                d3 = vis_fpmerge(vis_read_lo(d0), vis_read_lo(d1));
                ((mlib_f32*)dst)[0] = vis_read_hi(d2);
                ((mlib_f32*)dst)[1] = vis_read_lo(d2);
                ((mlib_f32*)dst)[2] = vis_read_hi(d3);
                ((mlib_f32*)dst)[3] = vis_read_lo(d3);
            }
        } else {
            mlib_d64 *dp;

            dp = vis_alignaddr(dst, 0);
            d3 = vis_faligndata(dp[0], dp[0]);
            vis_alignaddrl(dst, 0);

#pragma pipeloop(0)
            for (; dst <= (dst_end - 4*4); dst += 4*4) {
                mlib_d64 s0, s1, s2, s3;
                s0 = vis_ld_u8(src + ((tmpsxloc          ) >> shift));
                s1 = vis_ld_u8(src + ((tmpsxloc +   sxinc) >> shift));
                s2 = vis_ld_u8(src + ((tmpsxloc + 2*sxinc) >> shift));
                s3 = vis_ld_u8(src + ((tmpsxloc + 3*sxinc) >> shift));
                tmpsxloc += 4*sxinc;
                s0 = vis_fpmerge(vis_read_lo(s0), vis_read_lo(s2));
                s1 = vis_fpmerge(vis_read_lo(s1), vis_read_lo(s3));
                dd = vis_fpmerge(vis_read_lo(s0), vis_read_lo(s1));
                ff = vis_read_lo(dd);
                d0 = vis_fpmerge(aa, ff);
                d1 = vis_fpmerge(ff, ff);
                d2 = vis_fpmerge(vis_read_hi(d0), vis_read_hi(d1));
                *dp++ = vis_faligndata(d3, d2);
                d3 = vis_fpmerge(vis_read_lo(d0), vis_read_lo(d1));
                *dp++ = vis_faligndata(d2, d3);
            }

            vis_pst_8(vis_faligndata(d3, d3), dp, vis_edge8(dp, dst - 1));
        }

        while (dst < dst_end) {
            x = src[tmpsxloc >> shift];
            tmpsxloc += sxinc;
            dst[0] = 0xff;
            dst[1] = x;
            dst[2] = x;
            dst[3] = x;
            dst += 4;
        }

        PTR_ADD(dstBase, dstScan);
        syloc += syinc;
    }
}

/***************************************************************/

void ADD_SUFF(ByteIndexedToFourByteAbgrConvert)(BLIT_PARAMS)
{
    jint *pixLut = pSrcInfo->lutBase;
    mlib_s32 dstScan = pDstInfo->scanStride;
    mlib_s32 srcScan = pSrcInfo->scanStride;
    mlib_d64 dd, d_old;
    mlib_s32 i, j, x;

/*  if (!(((mlib_s32)dstBase | dstScan) & 3)) {
    ADD_SUFF(ByteIndexedToIntAbgrConvert)(BLIT_CALL_PARAMS);
    return;
    }*/

    if (width < 8) {
        for (j = 0; j < height; j++) {
            mlib_u8 *src = srcBase;
            mlib_u8 *dst = dstBase;

            for (i = 0; i < width; i++) {
                x = pixLut[src[i]];
                dst[4*i    ] = x >> 24;
                dst[4*i + 1] = x;
                dst[4*i + 2] = x >> 8;
                dst[4*i + 3] = x >> 16;
            }

            PTR_ADD(dstBase, dstScan);
            PTR_ADD(srcBase, srcScan);
        }
        return;
    }

    if (srcScan == width && dstScan == 4*width) {
        width *= height;
        height = 1;
    }

    BMASK_FOR_ARGB

    for (j = 0; j < height; j++) {
        mlib_u8 *src = srcBase;
        mlib_u8 *dst = dstBase;
        mlib_u8 *dst_end;

        dst_end = dst + 4*width;

        if (!((mlib_s32)dst & 7)) {
#pragma pipeloop(0)
            for (; dst <= (dst_end - 2*4); dst += 2*4) {
                dd = vis_freg_pair(((mlib_f32*)pixLut)[src[0]],
                                   ((mlib_f32*)pixLut)[src[1]]);
                ARGB2ABGR_DB(dd)
                *(mlib_d64*)dst = dd;
                src += 2;
            }
        } else {
            mlib_d64 *dp;

            dp = vis_alignaddr(dst, 0);
            dd = vis_faligndata(dp[0], dp[0]);
            vis_alignaddrl(dst, 0);

#pragma pipeloop(0)
            for (; dst <= (dst_end - 2*4); dst += 2*4) {
                d_old = dd;
                dd = vis_freg_pair(((mlib_f32*)pixLut)[src[0]],
                                   ((mlib_f32*)pixLut)[src[1]]);
                ARGB2ABGR_DB(dd)
                *dp++ = vis_faligndata(d_old, dd);
                src += 2;
            }

            vis_pst_8(vis_faligndata(dd, dd), dp, vis_edge8(dp, dst - 1));
        }

        while (dst < dst_end) {
            x = pixLut[*src++];
            dst[0] = x >> 24;
            dst[1] = x;
            dst[2] = x >> 8;
            dst[3] = x >> 16;
            dst += 4;
        }

        PTR_ADD(dstBase, dstScan);
        PTR_ADD(srcBase, srcScan);
    }
}

/***************************************************************/

void ADD_SUFF(ByteIndexedBmToFourByteAbgrXparOver)(BLIT_PARAMS)
{
    jint *pixLut = pSrcInfo->lutBase;
    mlib_s32 dstScan = pDstInfo->scanStride;
    mlib_s32 srcScan = pSrcInfo->scanStride;
    mlib_d64 dd, dzero;
    mlib_s32 i, j, x, mask;

/*  if (!(((mlib_s32)dstBase | dstScan) & 3)) {
    ADD_SUFF(ByteIndexedToIntAbgrConvert)(BLIT_CALL_PARAMS);
    return;
    }*/

    if (width < 8) {
        for (j = 0; j < height; j++) {
            mlib_u8 *src = srcBase;
            mlib_u8 *dst = dstBase;

            for (i = 0; i < width; i++) {
                x = pixLut[src[i]];
                if (x < 0) {
                    dst[4*i    ] = x >> 24;
                    dst[4*i + 1] = x;
                    dst[4*i + 2] = x >> 8;
                    dst[4*i + 3] = x >> 16;
                }
            }

            PTR_ADD(dstBase, dstScan);
            PTR_ADD(srcBase, srcScan);
        }
        return;
    }

    if (srcScan == width && dstScan == 4*width) {
        width *= height;
        height = 1;
    }

    BMASK_FOR_ARGB

    dzero = vis_fzero();

    for (j = 0; j < height; j++) {
        mlib_u8 *src = srcBase;
        mlib_u8 *dst = dstBase;
        mlib_u8 *dst_end;

        dst_end = dst + 4*width;

        if (!((mlib_s32)dst & 7)) {
#pragma pipeloop(0)
            for (; dst <= (dst_end - 2*4); dst += 2*4) {
                dd = vis_freg_pair(((mlib_f32*)pixLut)[src[0]],
                                   ((mlib_f32*)pixLut)[src[1]]);
                mask = vis_fcmplt32(dd, dzero);
                ARGB2ABGR_DB(dd)
                vis_pst_32(dd, dst, mask);
                src += 2;
            }
        }

        while (dst < dst_end) {
            x = pixLut[*src++];
            if (x < 0) {
                dst[0] = x >> 24;
                dst[1] = x;
                dst[2] = x >> 8;
                dst[3] = x >> 16;
            }
            dst += 4;
        }

        PTR_ADD(dstBase, dstScan);
        PTR_ADD(srcBase, srcScan);
    }
}

/***************************************************************/

void ADD_SUFF(ByteIndexedBmToFourByteAbgrXparBgCopy)(BCOPY_PARAMS)
{
    jint *pixLut = pSrcInfo->lutBase;
    mlib_s32 dstScan = pDstInfo->scanStride;
    mlib_s32 srcScan = pSrcInfo->scanStride;
    mlib_d64 dd, dzero, d_bgpixel;
    mlib_s32 i, j, x, mask;
    mlib_s32 bgpix0 = bgpixel;
    mlib_s32 bgpix1 = bgpixel >> 8;
    mlib_s32 bgpix2 = bgpixel >> 16;
    mlib_s32 bgpix3 = bgpixel >> 24;

    if (width < 8) {
        for (j = 0; j < height; j++) {
            mlib_u8 *src = srcBase;
            mlib_u8 *dst = dstBase;

            for (i = 0; i < width; i++) {
                x = pixLut[src[i]];
                if (x < 0) {
                    dst[4*i    ] = x >> 24;
                    dst[4*i + 1] = x;
                    dst[4*i + 2] = x >> 8;
                    dst[4*i + 3] = x >> 16;
                } else {
                    dst[4*i    ] = bgpix0;
                    dst[4*i + 1] = bgpix1;
                    dst[4*i + 2] = bgpix2;
                    dst[4*i + 3] = bgpix3;
                }
            }

            PTR_ADD(dstBase, dstScan);
            PTR_ADD(srcBase, srcScan);
        }
        return;
    }

    if (srcScan == width && dstScan == 4*width) {
        width *= height;
        height = 1;
    }

    BMASK_FOR_ARGB

    dzero = vis_fzero();
    d_bgpixel = vis_freg_pair(vis_ldfa_ASI_PL(&bgpixel),
                              vis_ldfa_ASI_PL(&bgpixel));

    for (j = 0; j < height; j++) {
        mlib_u8 *src = srcBase;
        mlib_u8 *dst = dstBase;
        mlib_u8 *dst_end;

        dst_end = dst + 4*width;

        if (!((mlib_s32)dst & 7)) {
#pragma pipeloop(0)
            for (; dst <= (dst_end - 2*4); dst += 2*4) {
                dd = vis_freg_pair(((mlib_f32*)pixLut)[src[0]],
                                   ((mlib_f32*)pixLut)[src[1]]);
                mask = vis_fcmplt32(dd, dzero);
                ARGB2ABGR_DB(dd)
                *(mlib_d64*)dst = d_bgpixel;
                vis_pst_32(dd, dst, mask);
                src += 2;
            }
        }

        while (dst < dst_end) {
            x = pixLut[*src++];
            if (x < 0) {
                dst[0] = x >> 24;
                dst[1] = x;
                dst[2] = x >> 8;
                dst[3] = x >> 16;
            } else {
                dst[0] = bgpix0;
                dst[1] = bgpix1;
                dst[2] = bgpix2;
                dst[3] = bgpix3;
            }
            dst += 4;
        }

        PTR_ADD(dstBase, dstScan);
        PTR_ADD(srcBase, srcScan);
    }
}

/***************************************************************/

void ADD_SUFF(ByteIndexedToFourByteAbgrScaleConvert)(SCALE_PARAMS)
{
    jint *pixLut = pSrcInfo->lutBase;
    mlib_s32 dstScan = pDstInfo->scanStride;
    mlib_s32 srcScan = pSrcInfo->scanStride;
    mlib_d64 dd, d_old;
    mlib_s32 i, j, x;

/*
    if (!(((mlib_s32)dstBase | dstScan) & 3)) {
        ADD_SUFF(ByteIndexedToIntAbgrScaleConvert)(SCALE_CALL_PARAMS);
        return;
    }
*/

    if (width < 8) {
        for (j = 0; j < height; j++) {
            mlib_u8 *src = srcBase;
            mlib_u8 *dst = dstBase;
            mlib_s32 tmpsxloc = sxloc;

            PTR_ADD(src, (syloc >> shift) * srcScan);

            for (i = 0; i < width; i++) {
                x = pixLut[src[tmpsxloc >> shift]];
                tmpsxloc += sxinc;
                dst[4*i    ] = x >> 24;
                dst[4*i + 1] = x;
                dst[4*i + 2] = x >> 8;
                dst[4*i + 3] = x >> 16;
            }

            PTR_ADD(dstBase, dstScan);
            syloc += syinc;
        }
        return;
    }

    BMASK_FOR_ARGB

    for (j = 0; j < height; j++) {
        mlib_u8 *src = srcBase;
        mlib_u8 *dst = dstBase;
        mlib_u8 *dst_end;
        mlib_s32 tmpsxloc = sxloc;

        PTR_ADD(src, (syloc >> shift) * srcScan);

        dst_end = dst + 4*width;

        if (!((mlib_s32)dst & 7)) {
#pragma pipeloop(0)
            for (; dst <= (dst_end - 2*4); dst += 2*4) {
                dd = LOAD_2F32(pixLut, src[tmpsxloc >> shift],
                                       src[(tmpsxloc + sxinc) >> shift]);
                tmpsxloc += 2*sxinc;
                ARGB2ABGR_DB(dd)
                *(mlib_d64*)dst = dd;
            }
        } else {
            mlib_d64 *dp;

            dp = vis_alignaddr(dst, 0);
            dd = vis_faligndata(dp[0], dp[0]);
            vis_alignaddrl(dst, 0);

#pragma pipeloop(0)
            for (; dst <= (dst_end - 2*4); dst += 2*4) {
                d_old = dd;
                dd = LOAD_2F32(pixLut, src[tmpsxloc >> shift],
                                       src[(tmpsxloc + sxinc) >> shift]);
                tmpsxloc += 2*sxinc;
                ARGB2ABGR_DB(dd)
                *dp++ = vis_faligndata(d_old, dd);
            }

            vis_pst_8(vis_faligndata(dd, dd), dp, vis_edge8(dp, dst - 1));
        }

        while (dst < dst_end) {
            x = pixLut[src[tmpsxloc >> shift]];
            tmpsxloc += sxinc;
            dst[0] = x >> 24;
            dst[1] = x;
            dst[2] = x >> 8;
            dst[3] = x >> 16;
            dst += 4;
        }

        PTR_ADD(dstBase, dstScan);
        syloc += syinc;
    }
}

/***************************************************************/

void ADD_SUFF(ByteIndexedBmToFourByteAbgrScaleXparOver)(SCALE_PARAMS)
{
    jint *pixLut = pSrcInfo->lutBase;
    mlib_s32 dstScan = pDstInfo->scanStride;
    mlib_s32 srcScan = pSrcInfo->scanStride;
    mlib_d64 dd, dzero;
    mlib_s32 i, j, x, mask;

/*
    if (!(((mlib_s32)dstBase | dstScan) & 3)) {
        ADD_SUFF(ByteIndexedToIntAbgrScaleConvert)(SCALE_CALL_PARAMS);
        return;
    }
*/

    if (width < 8) {
        for (j = 0; j < height; j++) {
            mlib_u8 *src = srcBase;
            mlib_u8 *dst = dstBase;
            mlib_s32 tmpsxloc = sxloc;

            PTR_ADD(src, (syloc >> shift) * srcScan);

            for (i = 0; i < width; i++) {
                x = pixLut[src[tmpsxloc >> shift]];
                tmpsxloc += sxinc;
                if (x < 0) {
                    dst[4*i    ] = x >> 24;
                    dst[4*i + 1] = x;
                    dst[4*i + 2] = x >> 8;
                    dst[4*i + 3] = x >> 16;
                }
            }

            PTR_ADD(dstBase, dstScan);
            syloc += syinc;
        }
        return;
    }

    BMASK_FOR_ARGB

    dzero = vis_fzero();

    for (j = 0; j < height; j++) {
        mlib_u8 *src = srcBase;
        mlib_u8 *dst = dstBase;
        mlib_u8 *dst_end;
        mlib_s32 tmpsxloc = sxloc;

        PTR_ADD(src, (syloc >> shift) * srcScan);

        dst_end = dst + 4*width;

        if (!((mlib_s32)dst & 7)) {
#pragma pipeloop(0)
            for (; dst <= (dst_end - 2*4); dst += 2*4) {
                dd = LOAD_2F32(pixLut, src[tmpsxloc >> shift],
                                       src[(tmpsxloc + sxinc) >> shift]);
                tmpsxloc += 2*sxinc;
                mask = vis_fcmplt32(dd, dzero);
                ARGB2ABGR_DB(dd)
                vis_pst_32(dd, dst, mask);
            }
        }

        while (dst < dst_end) {
            x = pixLut[src[tmpsxloc >> shift]];
            tmpsxloc += sxinc;
            if (x < 0) {
                dst[0] = x >> 24;
                dst[1] = x;
                dst[2] = x >> 8;
                dst[3] = x >> 16;
            }
            dst += 4;
        }

        PTR_ADD(dstBase, dstScan);
        syloc += syinc;
    }
}

/***************************************************************/

void ADD_SUFF(IntArgbBmToFourByteAbgrScaleXparOver)(SCALE_PARAMS)
{
    mlib_s32 dstScan = pDstInfo->scanStride;
    mlib_s32 srcScan = pSrcInfo->scanStride;
    mlib_d64 dd, amask;
    mlib_s32 i, j, x, mask;

    if (width < 16) {
        for (j = 0; j < height; j++) {
            mlib_s32 *src = srcBase;
            mlib_u8  *dst = dstBase;
            mlib_s32 tmpsxloc = sxloc;

            PTR_ADD(src, (syloc >> shift) * srcScan);

            for (i = 0; i < width; i++) {
                x = src[tmpsxloc >> shift];
                tmpsxloc += sxinc;
                if (x >> 24) {
                    dst[4*i    ] = 0xFF;
                    dst[4*i + 1] = x;
                    dst[4*i + 2] = x >> 8;
                    dst[4*i + 3] = x >> 16;
                }
            }

            PTR_ADD(dstBase, dstScan);
            syloc += syinc;
        }
        return;
    }

    BMASK_FOR_ARGB

    amask = vis_to_double_dup(0xFF000000);

    for (j = 0; j < height; j++) {
        mlib_s32 *src = srcBase;
        mlib_u8  *dst = dstBase;
        mlib_u8  *dst_end;
        mlib_s32 tmpsxloc = sxloc;

        PTR_ADD(src, (syloc >> shift) * srcScan);

        dst_end = dst + 4*width;

        if (!((mlib_s32)dst & 7)) {
#pragma pipeloop(0)
            for (; dst <= (dst_end - 2*4); dst += 2*4) {
                mlib_s32 *pp0 = src + (tmpsxloc >> shift);
                mlib_s32 *pp1 = src + ((tmpsxloc + sxinc) >> shift);
                dd = vis_freg_pair(*(mlib_f32*)pp0, *(mlib_f32*)pp1);
                tmpsxloc += 2*sxinc;
                ARGB2ABGR_DB(dd)
                dd = vis_for(dd, amask);
                mask = (((-*(mlib_u8*)pp0) >> 31) & 2) |
                       (((-*(mlib_u8*)pp1) >> 31) & 1);
                vis_pst_32(dd, dst, mask);
            }
        }

        while (dst < dst_end) {
            x = src[tmpsxloc >> shift];
            tmpsxloc += sxinc;
            if (x >> 24) {
                dst[0] = 0xFF;
                dst[1] = x;
                dst[2] = x >> 8;
                dst[3] = x >> 16;
            }
            dst += 4;
        }

        PTR_ADD(dstBase, dstScan);
        syloc += syinc;
    }
}

/***************************************************************/

#ifdef MLIB_ADD_SUFF
#pragma weak IntArgbBmToFourByteAbgrPreScaleXparOver_F =       \
             IntArgbBmToFourByteAbgrScaleXparOver_F
#else
#pragma weak IntArgbBmToFourByteAbgrPreScaleXparOver =         \
             IntArgbBmToFourByteAbgrScaleXparOver
#endif

/***************************************************************/

void ADD_SUFF(FourByteAbgrToIntArgbScaleConvert)(SCALE_PARAMS)
{
    mlib_s32 dstScan = pDstInfo->scanStride;
    mlib_s32 srcScan = pSrcInfo->scanStride;
    mlib_s32 i, j;

    if (width < 16) {
        for (j = 0; j < height; j++) {
            mlib_u8  *src = srcBase;
            mlib_s32 *dst = dstBase;
            mlib_s32 tmpsxloc = sxloc;

            PTR_ADD(src, (syloc >> shift) * srcScan);

            for (i = 0; i < width; i++) {
                mlib_u8 *pp = src + 4*(tmpsxloc >> shift);
                *dst++ = (pp[0] << 24) | (pp[3] << 16) | (pp[2] << 8) | pp[1];
                tmpsxloc += sxinc;
            }

            PTR_ADD(dstBase, dstScan);
            syloc += syinc;
        }
        return;
    }

    BMASK_FOR_ARGB

    for (j = 0; j < height; j++) {
        mlib_u8  *src = srcBase;
        mlib_s32 *dst = dstBase;
        mlib_s32 *dst_end = dst + width;
        mlib_s32 tmpsxloc = sxloc;
        mlib_s32 off;
        mlib_d64 dd, dd0, dd1;
        mlib_f32 *pp0, *pp1;

        PTR_ADD(src, (syloc >> shift) * srcScan);

        if ((mlib_s32)dst & 7) {
            mlib_u8 *pp = src + 4*(tmpsxloc >> shift);
            *dst++ = (pp[0] << 24) | (pp[3] << 16) | (pp[2] << 8) | pp[1];
            tmpsxloc += sxinc;
        }

        off = (mlib_s32)src & 3;
        if (!off) {
#pragma pipeloop(0)
            for (; dst <= (dst_end - 2); dst += 2) {
                pp0 = (mlib_f32*)src + (tmpsxloc >> shift);
                pp1 = (mlib_f32*)src + ((tmpsxloc + sxinc) >> shift);
                tmpsxloc += 2*sxinc;
                dd = vis_freg_pair(pp0[0], pp1[0]);
                ARGB2ABGR_DB(dd)
                *(mlib_d64*)dst = dd;
            }
        } else {
            vis_alignaddr(NULL, off);
#pragma pipeloop(0)
            for (; dst <= (dst_end - 2); dst += 2) {
                pp0 = (mlib_f32*)(src - off) + (tmpsxloc >> shift);
                pp1 = (mlib_f32*)(src - off) + ((tmpsxloc + sxinc) >> shift);
                tmpsxloc += 2*sxinc;
                dd0 = vis_freg_pair(pp0[0], pp0[1]);
                dd1 = vis_freg_pair(pp1[0], pp1[1]);
                dd0 = vis_faligndata(dd0, dd0);
                dd1 = vis_faligndata(dd1, dd1);
                ARGB2ABGR_FL2(dd, vis_read_hi(dd0), vis_read_hi(dd1))
                *(mlib_d64*)dst = dd;
            }
        }

        if (dst < dst_end) {
            mlib_u8 *pp = src + 4*(tmpsxloc >> shift);
            *dst++ = (pp[0] << 24) | (pp[3] << 16) | (pp[2] << 8) | pp[1];
            tmpsxloc += sxinc;
        }

        PTR_ADD(dstBase, dstScan);
        syloc += syinc;
    }
}

/***************************************************************/

void ADD_SUFF(IntArgbToFourByteAbgrScaleConvert)(SCALE_PARAMS)
{
    mlib_s32 dstScan = pDstInfo->scanStride;
    mlib_s32 srcScan = pSrcInfo->scanStride;
    mlib_s32 i, j;
    mlib_s32 x;

    if (width < 16) {
        for (j = 0; j < height; j++) {
            mlib_s32 *src = srcBase;
            mlib_u8  *dst = dstBase;
            mlib_s32 tmpsxloc = sxloc;

            PTR_ADD(src, (syloc >> shift) * srcScan);

            for (i = 0; i < width; i++) {
                x = src[tmpsxloc >> shift];
                tmpsxloc += sxinc;
                dst[4*i    ] = x >> 24;
                dst[4*i + 1] = x;
                dst[4*i + 2] = x >> 8;
                dst[4*i + 3] = x >> 16;
            }

            PTR_ADD(dstBase, dstScan);
            syloc += syinc;
        }
        return;
    }

    BMASK_FOR_ARGB

    for (j = 0; j < height; j++) {
        mlib_s32 *src = srcBase;
        mlib_u8  *dst = dstBase;
        mlib_u8  *dst_end = dst + 4*width;
        mlib_s32 tmpsxloc = sxloc;
        mlib_d64 dd, d_old;
        mlib_f32 *pp0, *pp1;

        PTR_ADD(src, (syloc >> shift) * srcScan);

        if (!((mlib_s32)dst & 3)) {
            if ((mlib_s32)dst & 7) {
                x = src[tmpsxloc >> shift];
                tmpsxloc += sxinc;
                dst[0] = x >> 24;
                dst[1] = x;
                dst[2] = x >> 8;
                dst[3] = x >> 16;
                dst += 4;
            }
#pragma pipeloop(0)
            for (; dst <= (dst_end - 2*4); dst += 2*4) {
                pp0 = (mlib_f32*)src + (tmpsxloc >> shift);
                pp1 = (mlib_f32*)src + ((tmpsxloc + sxinc) >> shift);
                tmpsxloc += 2*sxinc;
                dd = vis_freg_pair(pp0[0], pp1[0]);
                ARGB2ABGR_DB(dd)
                *(mlib_d64*)dst = dd;
            }
        } else {
            mlib_d64 *dp;

            dp = vis_alignaddr(dst, 0);
            dd = vis_faligndata(dp[0], dp[0]);
            vis_alignaddrl(dst, 0);

#pragma pipeloop(0)
            for (; dst <= (dst_end - 2*4); dst += 2*4) {
                d_old = dd;
                pp0 = (mlib_f32*)src + (tmpsxloc >> shift);
                pp1 = (mlib_f32*)src + ((tmpsxloc + sxinc) >> shift);
                tmpsxloc += 2*sxinc;
                dd = vis_freg_pair(pp0[0], pp1[0]);
                ARGB2ABGR_DB(dd)
                *dp++ = vis_faligndata(d_old, dd);
            }

            vis_pst_8(vis_faligndata(dd, dd), dp, vis_edge8(dp, dst - 1));
        }

        if (dst < dst_end) {
            x = src[tmpsxloc >> shift];
            tmpsxloc += sxinc;
            dst[0] = x >> 24;
            dst[1] = x;
            dst[2] = x >> 8;
            dst[3] = x >> 16;
            dst += 4;
        }

        PTR_ADD(dstBase, dstScan);
        syloc += syinc;
    }
}

/***************************************************************/

void ADD_SUFF(IntRgbToFourByteAbgrScaleConvert)(SCALE_PARAMS)
{
    mlib_s32 dstScan = pDstInfo->scanStride;
    mlib_s32 srcScan = pSrcInfo->scanStride;
    mlib_s32 i, j;
    mlib_s32 x;
    mlib_d64 amask = vis_to_double_dup(0xFF000000);

    if (width < 16) {
        for (j = 0; j < height; j++) {
            mlib_s32 *src = srcBase;
            mlib_u8  *dst = dstBase;
            mlib_s32 tmpsxloc = sxloc;

            PTR_ADD(src, (syloc >> shift) * srcScan);

            for (i = 0; i < width; i++) {
                x = src[tmpsxloc >> shift];
                tmpsxloc += sxinc;
                dst[4*i    ] = 0xFF;
                dst[4*i + 1] = x;
                dst[4*i + 2] = x >> 8;
                dst[4*i + 3] = x >> 16;
            }

            PTR_ADD(dstBase, dstScan);
            syloc += syinc;
        }
        return;
    }

    BMASK_FOR_ARGB

    for (j = 0; j < height; j++) {
        mlib_s32 *src = srcBase;
        mlib_u8  *dst = dstBase;
        mlib_u8  *dst_end = dst + 4*width;
        mlib_s32 tmpsxloc = sxloc;
        mlib_d64 dd, d_old;
        mlib_f32 *pp0, *pp1;

        PTR_ADD(src, (syloc >> shift) * srcScan);

        if (!((mlib_s32)dst & 3)) {
            if ((mlib_s32)dst & 7) {
                x = src[tmpsxloc >> shift];
                tmpsxloc += sxinc;
                dst[0] = 0xFF;
                dst[1] = x;
                dst[2] = x >> 8;
                dst[3] = x >> 16;
                dst += 4;
            }
#pragma pipeloop(0)
            for (; dst <= (dst_end - 2*4); dst += 2*4) {
                pp0 = (mlib_f32*)src + (tmpsxloc >> shift);
                pp1 = (mlib_f32*)src + ((tmpsxloc + sxinc) >> shift);
                tmpsxloc += 2*sxinc;
                dd = vis_freg_pair(pp0[0], pp1[0]);
                RGB2ABGR_DB(dd)
                *(mlib_d64*)dst = dd;
            }
        } else {
            mlib_d64 *dp;

            dp = vis_alignaddr(dst, 0);
            dd = vis_faligndata(dp[0], dp[0]);
            vis_alignaddrl(dst, 0);

#pragma pipeloop(0)
            for (; dst <= (dst_end - 2*4); dst += 2*4) {
                d_old = dd;
                pp0 = (mlib_f32*)src + (tmpsxloc >> shift);
                pp1 = (mlib_f32*)src + ((tmpsxloc + sxinc) >> shift);
                tmpsxloc += 2*sxinc;
                dd = vis_freg_pair(pp0[0], pp1[0]);
                RGB2ABGR_DB(dd)
                *dp++ = vis_faligndata(d_old, dd);
            }


/**代码未完, 请加载全部代码(NowJava.com).**/
展开阅读全文

关注时代Java

关注时代Java