/*
* Copyright (c) 1998, 2003, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation. Oracle designates this
* particular file as subject to the "Classpath" exception as provided
* by Oracle in the LICENSE file that accompanied this code.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*/
#include "vis_proto.h"
#include "mlib_image.h"
#include "mlib_v_ImageLookUpFunc.h"
/***************************************************************/
static void mlib_v_ImageLookUpSI_U8_S32_2_SrcOff0_D1(const mlib_u8 *src,
mlib_s32 *dst,
mlib_s32 xsize,
const mlib_d64 *table);
static void mlib_v_ImageLookUpSI_U8_S32_2_DstNonAl_D1(const mlib_u8 *src,
mlib_s32 *dst,
mlib_s32 xsize,
const mlib_d64 *table);
static void mlib_v_ImageLookUpSI_U8_S32_2_SMALL(const mlib_u8 *src,
mlib_s32 *dst,
mlib_s32 xsize,
const mlib_s32 **table);
static void mlib_v_ImageLookUpSI_U8_S32_3_SrcOff0_D1(const mlib_u8 *src,
mlib_s32 *dst,
mlib_s32 xsize,
const mlib_d64 *table);
static void mlib_v_ImageLookUpSI_U8_S32_3_DstNonAl_D1(const mlib_u8 *src,
mlib_s32 *dst,
mlib_s32 xsize,
const mlib_d64 *table);
static void mlib_v_ImageLookUpSI_U8_S32_3_SMALL(const mlib_u8 *src,
mlib_s32 *dst,
mlib_s32 xsize,
const mlib_s32 **table);
static void mlib_v_ImageLookUpSI_U8_S32_4_SrcOff0_D1(const mlib_u8 *src,
mlib_s32 *dst,
mlib_s32 xsize,
const mlib_d64 *table);
static void mlib_v_ImageLookUpSI_U8_S32_4_DstNonAl_D1(const mlib_u8 *src,
mlib_s32 *dst,
mlib_s32 xsize,
const mlib_d64 *table);
static void mlib_v_ImageLookUpSI_U8_S32_4_SMALL(const mlib_u8 *src,
mlib_s32 *dst,
mlib_s32 xsize,
const mlib_s32 **table);
/***************************************************************/
void mlib_v_ImageLookUpSI_U8_S32_2_SrcOff0_D1(const mlib_u8 *src,
mlib_s32 *dst,
mlib_s32 xsize,
const mlib_d64 *table)
{
mlib_u32 *sa; /* aligned pointer to source data */
mlib_u8 *sp; /* pointer to source data */
mlib_u32 s0; /* source data */
mlib_d64 *dp; /* aligned pointer to destination */
mlib_d64 acc0, acc1; /* destination data */
mlib_d64 acc2, acc3; /* destination data */
mlib_s32 i; /* loop variable */
mlib_u32 s00, s01, s02, s03;
sa = (mlib_u32*)src;
dp = (mlib_d64 *) dst;
i = 0;
if (xsize >= 4) {
s0 = *sa++;
s00 = (s0 >> 21) & 0x7F8;
s01 = (s0 >> 13) & 0x7F8;
#pragma pipeloop(0)
for(i = 0; i <= xsize - 8; i+=4, dp += 4) {
s02 = (s0 >> 5) & 0x7F8;
s03 = (s0 << 3) & 0x7F8;
acc0 = *(mlib_d64*)((mlib_u8*)table + s00);
acc1 = *(mlib_d64*)((mlib_u8*)table + s01);
acc2 = *(mlib_d64*)((mlib_u8*)table + s02);
acc3 = *(mlib_d64*)((mlib_u8*)table + s03);
s0 = *sa++;
s00 = (s0 >> 21) & 0x7F8;
s01 = (s0 >> 13) & 0x7F8;
dp[0] = acc0;
dp[1] = acc1;
dp[2] = acc2;
dp[3] = acc3;
}
s02 = (s0 >> 5) & 0x7F8;
s03 = (s0 << 3) & 0x7F8;
acc0 = *(mlib_d64*)((mlib_u8*)table + s00);
acc1 = *(mlib_d64*)((mlib_u8*)table + s01);
acc2 = *(mlib_d64*)((mlib_u8*)table + s02);
acc3 = *(mlib_d64*)((mlib_u8*)table + s03);
dp[0] = acc0;
dp[1] = acc1;
dp[2] = acc2;
dp[3] = acc3;
dp += 4;
i += 4;
}
sp = (mlib_u8*)sa;
if ( i <= xsize - 2) {
*dp++ = table[sp[0]];
*dp++ = table[sp[1]];
i+=2; sp += 2;
}
if ( i < xsize) *dp++ = table[sp[0]];
}
/***************************************************************/
void mlib_v_ImageLookUpSI_U8_S32_2_DstNonAl_D1(const mlib_u8 *src,
mlib_s32 *dst,
mlib_s32 xsize,
const mlib_d64 *table)
{
mlib_u32 *sa; /* aligned pointer to source data */
mlib_u8 *sp; /* pointer to source data */
mlib_u32 s0; /* source data */
mlib_s32 *dl; /* pointer to start of destination */
mlib_d64 *dp; /* aligned pointer to destination */
mlib_d64 acc0, acc1; /* destination data */
mlib_d64 acc2, acc3, acc4; /* destination data */
mlib_s32 i; /* loop variable */
mlib_u32 s00, s01, s02, s03;
sa = (mlib_u32*)src;
dl = dst;
dp = (mlib_d64 *) ((mlib_addr) dl & (~7)) + 1;
vis_alignaddr(dp, 4);
s0 = *sa++;
s00 = (s0 >> 21) & 0x7F8;
acc0 = *(mlib_d64*)((mlib_u8*)table + s00);
*(mlib_f32*)dl = vis_read_hi(acc0);
xsize--;
sp = (mlib_u8*)sa - 3;
if (xsize >= 3) {
s01 = (s0 >> 13) & 0x7F8;
s02 = (s0 >> 5) & 0x7F8;
s03 = (s0 << 3) & 0x7F8;
acc1 = *(mlib_d64*)((mlib_u8*)table + s01);
acc2 = *(mlib_d64*)((mlib_u8*)table + s02);
acc3 = *(mlib_d64*)((mlib_u8*)table + s03);
dp[0] = vis_faligndata(acc0, acc1);
dp[1] = vis_faligndata(acc1, acc2);
dp[2] = vis_faligndata(acc2, acc3);
acc0 = acc3; dp += 3; xsize -= 3;
sp = (mlib_u8*)sa;
}
i = 0;
if (xsize >= 4) {
s0 = *sa++;
s00 = (s0 >> 21) & 0x7F8;
s01 = (s0 >> 13) & 0x7F8;
#pragma pipeloop(0)
for(i = 0; i <= xsize - 8; i+=4, dp += 4) {
s02 = (s0 >> 5) & 0x7F8;
s03 = (s0 << 3) & 0x7F8;
acc1 = *(mlib_d64*)((mlib_u8*)table + s00);
acc2 = *(mlib_d64*)((mlib_u8*)table + s01);
acc3 = *(mlib_d64*)((mlib_u8*)table + s02);
acc4 = *(mlib_d64*)((mlib_u8*)table + s03);
s0 = *sa++;
s00 = (s0 >> 21) & 0x7F8;
s01 = (s0 >> 13) & 0x7F8;
dp[0] = vis_faligndata(acc0, acc1);
dp[1] = vis_faligndata(acc1, acc2);
dp[2] = vis_faligndata(acc2, acc3);
dp[3] = vis_faligndata(acc3, acc4);
acc0 = acc4;
}
s02 = (s0 >> 5) & 0x7F8;
s03 = (s0 << 3) & 0x7F8;
acc1 = *(mlib_d64*)((mlib_u8*)table + s00);
acc2 = *(mlib_d64*)((mlib_u8*)table + s01);
acc3 = *(mlib_d64*)((mlib_u8*)table + s02);
acc4 = *(mlib_d64*)((mlib_u8*)table + s03);
dp[0] = vis_faligndata(acc0, acc1);
dp[1] = vis_faligndata(acc1, acc2);
dp[2] = vis_faligndata(acc2, acc3);
dp[3] = vis_faligndata(acc3, acc4);
acc0 = acc4;
dp += 4;
i += 4;
sp = (mlib_u8*)sa;
}
if ( i <= xsize - 2) {
acc1 = table[sp[0]];
acc2 = table[sp[1]];
*dp++ = vis_faligndata(acc0, acc1);
*dp++ = vis_faligndata(acc1, acc2);
i+=2; sp += 2;
acc0 = acc2;
}
if ( i < xsize) {
acc1 = table[sp[0]];
*dp++ = vis_faligndata(acc0, acc1);
acc0 = acc1;
}
*(mlib_f32*) dp = vis_read_lo(acc0);
}
/***************************************************************/
void mlib_v_ImageLookUpSI_U8_S32_2_SMALL(const mlib_u8 *src,
mlib_s32 *dst,
mlib_s32 xsize,
const mlib_s32 **table)
{
mlib_u32 *sa; /* aligned pointer to source data */
mlib_u8 *sp; /* pointer to source data */
mlib_u32 s0; /* source data */
mlib_f32 *dp; /* aligned pointer to destination */
mlib_f32 acc0, acc1; /* destination data */
mlib_f32 acc2, acc3; /* destination data */
mlib_f32 acc4, acc5; /* destination data */
mlib_f32 acc6, acc7; /* destination data */
mlib_f32 *table0 = (mlib_f32*)table[0];
mlib_f32 *table1 = (mlib_f32*)table[1];
mlib_s32 i; /* loop variable */
mlib_u32 s00, s01, s02, s03;
sa = (mlib_u32*)src;
dp = (mlib_f32*)dst;
i = 0;
if (xsize >= 4) {
s0 = *sa++;
s00 = (s0 >> 22) & 0x3FC;
s01 = (s0 >> 14) & 0x3FC;
#pragma pipeloop(0)
for(i = 0; i <= xsize - 8; i+=4, dp += 8) {
s02 = (s0 >> 6) & 0x3FC;
s03 = (s0 << 2) & 0x3FC;
acc0 = *(mlib_f32*)((mlib_u8*)table0 + s00);
acc1 = *(mlib_f32*)((mlib_u8*)table1 + s00);
acc2 = *(mlib_f32*)((mlib_u8*)table0 + s01);
acc3 = *(mlib_f32*)((mlib_u8*)table1 + s01);
acc4 = *(mlib_f32*)((mlib_u8*)table0 + s02);
acc5 = *(mlib_f32*)((mlib_u8*)table1 + s02);
acc6 = *(mlib_f32*)((mlib_u8*)table0 + s03);
acc7 = *(mlib_f32*)((mlib_u8*)table1 + s03);
s0 = *sa++;
s00 = (s0 >> 22) & 0x3FC;
s01 = (s0 >> 14) & 0x3FC;
dp[0] = acc0;
dp[1] = acc1;
dp[2] = acc2;
dp[3] = acc3;
dp[4] = acc4;
dp[5] = acc5;
dp[6] = acc6;
dp[7] = acc7;
}
s02 = (s0 >> 6) & 0x3FC;
s03 = (s0 << 2) & 0x3FC;
acc0 = *(mlib_f32*)((mlib_u8*)table0 + s00);
acc1 = *(mlib_f32*)((mlib_u8*)table1 + s00);
acc2 = *(mlib_f32*)((mlib_u8*)table0 + s01);
acc3 = *(mlib_f32*)((mlib_u8*)table1 + s01);
acc4 = *(mlib_f32*)((mlib_u8*)table0 + s02);
acc5 = *(mlib_f32*)((mlib_u8*)table1 + s02);
acc6 = *(mlib_f32*)((mlib_u8*)table0 + s03);
acc7 = *(mlib_f32*)((mlib_u8*)table1 + s03);
dp[0] = acc0;
dp[1] = acc1;
dp[2] = acc2;
dp[3] = acc3;
dp[4] = acc4;
dp[5] = acc5;
dp[6] = acc6;
dp[7] = acc7;
dp += 8;
i += 4;
}
sp = (mlib_u8*)sa;
if ( i < xsize ) {
*dp++ = table0[sp[0]];
*dp++ = table1[sp[0]];
i++; sp++;
}
if ( i < xsize ) {
*dp++ = table0[sp[0]];
*dp++ = table1[sp[0]];
i++; sp++;
}
if ( i < xsize ) {
*dp++ = table0[sp[0]];
*dp++ = table1[sp[0]];
}
}
/***************************************************************/
void mlib_v_ImageLookUpSI_U8_S32_2(const mlib_u8 *src,
mlib_s32 slb,
mlib_s32 *dst,
mlib_s32 dlb,
mlib_s32 xsize,
mlib_s32 ysize,
const mlib_s32 **table)
{
if ((xsize * ysize) < 600) {
mlib_u8 *sl;
mlib_s32 *dl;
mlib_s32 j, i;
const mlib_s32 *tab0 = table[0];
const mlib_s32 *tab1 = table[1];
sl = (void *)src;
dl = dst;
/* row loop */
for (j = 0; j < ysize; j ++) {
mlib_u8 *sp = sl;
mlib_s32 *dp = dl;
mlib_s32 off, size = xsize;
off = (mlib_s32)((4 - ((mlib_addr)sp & 3)) & 3);
off = (off < size) ? off : size;
for (i = 0; i < off; i++) {
*dp++ = tab0[sp[0]];
*dp++ = tab1[sp[0]];
size--; sp++;
}
if (size > 0) {
mlib_v_ImageLookUpSI_U8_S32_2_SMALL(sp, (mlib_s32*)dp, size, table);
}
sl = (mlib_u8 *) ((mlib_u8 *) sl + slb);
dl = (mlib_s32 *) ((mlib_u8 *) dl + dlb);
}
} else {
mlib_u8 *sl;
mlib_s32 *dl;
mlib_d64 dtab[256];
mlib_u32 *tab;
mlib_u32 *tab0 = (mlib_u32*)table[0];
mlib_u32 *tab1 = (mlib_u32*)table[1];
mlib_s32 i, j;
mlib_u32 s0, s1;
tab = (mlib_u32*)dtab;
s0 = tab0[0];
s1 = tab1[0];
for (i = 0; i < 255; i++) {
tab[2*i] = s0;
tab[2*i+1] = s1;
s0 = tab0[i+1];
s1 = tab1[i+1];
}
tab[510] = s0;
tab[511] = s1;
sl = (void *)src;
dl = dst;
/* row loop */
for (j = 0; j < ysize; j ++) {
mlib_u8 *sp = sl;
mlib_u32 *dp = (mlib_u32*)dl;
mlib_s32 off, size = xsize;
off = (mlib_s32)((4 - ((mlib_addr)sp & 3)) & 3);
off = (off < size) ? off : size;
#pragma pipeloop(0)
for (i = 0; i < off; i++) {
dp[0] = tab0[sp[0]];
dp[1] = tab1[sp[0]];
dp += 2; sp++;
}
size -= off;
if (size > 0) {
if (((mlib_addr)dp & 7) == 0) {
mlib_v_ImageLookUpSI_U8_S32_2_SrcOff0_D1(sp, (mlib_s32*)dp, size, dtab);
} else {
mlib_v_ImageLookUpSI_U8_S32_2_DstNonAl_D1(sp, (mlib_s32*)dp, size, dtab);
}
}
sl = (mlib_u8 *) ((mlib_u8 *) sl + slb);
dl = (mlib_s32 *) ((mlib_u8 *) dl + dlb);
}
}
}
/***************************************************************/
void mlib_v_ImageLookUpSI_U8_S32_3_SrcOff0_D1(const mlib_u8 *src,
mlib_s32 *dst,
mlib_s32 xsize,
const mlib_d64 *table)
{
mlib_u8 *sp; /* pointer to source data */
mlib_u32 *sa; /* aligned pointer to source data */
mlib_u32 s0; /* source data */
mlib_s32 *dl; /* pointer to start of destination */
mlib_d64 *dp; /* aligned pointer to destination */
mlib_d64 t0, t1, t2, t3; /* destination data */
mlib_d64 t4, t5, t6, t7; /* destination data */
mlib_s32 i; /* loop variable */
mlib_s32 *ptr;
mlib_u32 s00, s01, s02, s03;
dl = dst;
sp = (void *)src;
dp = (mlib_d64 *) dl;
sa = (mlib_u32*)sp;
vis_alignaddr((void *) 0, 4);
i = 0;
if (xsize >= 4) {
s0 = *sa++;
s00 = (s0 >> 20) & 0xFF0;
s01 = (s0 >> 12) & 0xFF0;
#pragma pipeloop(0)
for(i = 0; i <= xsize - 8; i+=4, dp+=6) {
s02 = (s0 >> 4) & 0xFF0;
s03 = (s0 << 4) & 0xFF0;
t0 = *(mlib_d64*)((mlib_u8*)table + s00);
t1 = *(mlib_d64*)((mlib_u8*)table + s00 + 8);
t2 = *(mlib_d64*)((mlib_u8*)table + s01);
t3 = *(mlib_d64*)((mlib_u8*)table + s01 + 8);
t4 = *(mlib_d64*)((mlib_u8*)table + s02);
t5 = *(mlib_d64*)((mlib_u8*)table + s02 + 8);
t6 = *(mlib_d64*)((mlib_u8*)table + s03);
t7 = *(mlib_d64*)((mlib_u8*)table + s03 + 8);
t1 = vis_faligndata(t1, t1);
t1 = vis_faligndata(t1, t2);
t2 = vis_faligndata(t2, t3);
t5 = vis_faligndata(t5, t5);
t5 = vis_faligndata(t5, t6);
t6 = vis_faligndata(t6, t7);
s0 = *sa++;
s00 = (s0 >> 20) & 0xFF0;
s01 = (s0 >> 12) & 0xFF0;
dp[0] = t0;
dp[1] = t1;
dp[2] = t2;
dp[3] = t4;
dp[4] = t5;
dp[5] = t6;
}
s02 = (s0 >> 4) & 0xFF0;
s03 = (s0 << 4) & 0xFF0;
t0 = *(mlib_d64*)((mlib_u8*)table + s00);
t1 = *(mlib_d64*)((mlib_u8*)table + s00 + 8);
t2 = *(mlib_d64*)((mlib_u8*)table + s01);
t3 = *(mlib_d64*)((mlib_u8*)table + s01 + 8);
t4 = *(mlib_d64*)((mlib_u8*)table + s02);
t5 = *(mlib_d64*)((mlib_u8*)table + s02 + 8);
t6 = *(mlib_d64*)((mlib_u8*)table + s03);
t7 = *(mlib_d64*)((mlib_u8*)table + s03 + 8);
t1 = vis_faligndata(t1, t1);
t1 = vis_faligndata(t1, t2);
t2 = vis_faligndata(t2, t3);
t5 = vis_faligndata(t5, t5);
t5 = vis_faligndata(t5, t6);
t6 = vis_faligndata(t6, t7);
dp[0] = t0;
dp[1] = t1;
dp[2] = t2;
dp[3] = t4;
dp[4] = t5;
dp[5] = t6;
i += 4; dp += 6;
}
dl = (mlib_s32*)dp;
sp = (mlib_u8*)sa;
#pragma pipeloop(0)
for (; i < xsize; i++) {
ptr = (mlib_s32*)(table + (sp[0] << 1));
dl[0] = ptr[0];
dl[1] = ptr[1];
dl[2] = ptr[2];
dl += 3; sp ++;
}
}
/***************************************************************/
void mlib_v_ImageLookUpSI_U8_S32_3_DstNonAl_D1(const mlib_u8 *src,
mlib_s32 *dst,
mlib_s32 xsize,
const mlib_d64 *table)
{
mlib_u8 *sp; /* pointer to source data */
mlib_u32 *sa; /* aligned pointer to source data */
mlib_u32 s0; /* source data */
mlib_s32 *dl; /* pointer to start of destination */
mlib_d64 *dp; /* aligned pointer to destination */
mlib_d64 t0, t1, t2, t3; /* destination data */
mlib_d64 t4, t5, t6, t7; /* destination data */
mlib_s32 i; /* loop variable */
mlib_s32 *ptr;
mlib_u32 s00, s01, s02, s03;
dl = dst;
sp = (void *)src;
dp = (mlib_d64 *) ((mlib_addr) dl & (~7));
sa = (mlib_u32*)sp;
vis_alignaddr((void *) 0, 4);
i = 0;
if (xsize >= 4) {
s0 = *sa++;
s00 = (s0 >> 20) & 0xFF0;
s01 = (s0 >> 12) & 0xFF0;
#pragma pipeloop(0)
for(i = 0; i <= xsize - 8; i+=4, dp+=6) {
s02 = (s0 >> 4) & 0xFF0;
s03 = (s0 << 4) & 0xFF0;
t0 = *(mlib_d64*)((mlib_u8*)table + s00);
t1 = *(mlib_d64*)((mlib_u8*)table + s00 + 8);
t2 = *(mlib_d64*)((mlib_u8*)table + s01);
t3 = *(mlib_d64*)((mlib_u8*)table + s01 + 8);
t4 = *(mlib_d64*)((mlib_u8*)table + s02);
t5 = *(mlib_d64*)((mlib_u8*)table + s02 + 8);
t6 = *(mlib_d64*)((mlib_u8*)table + s03);
t7 = *(mlib_d64*)((mlib_u8*)table + s03 + 8);
t1 = vis_faligndata(t0, t1);
t3 = vis_faligndata(t3, t3);
t3 = vis_faligndata(t3, t4);
t4 = vis_faligndata(t4, t5);
s0 = *sa++;
s00 = (s0 >> 20) & 0xFF0;
s01 = (s0 >> 12) & 0xFF0;
*(mlib_f32*)((mlib_f32*)dp + 1) = vis_read_hi(t0);
dp[1] = t1;
dp[2] = t2;
dp[3] = t3;
dp[4] = t4;
dp[5] = t6;
*(mlib_f32*)((mlib_f32*)dp + 12) = vis_read_hi(t7);
}
s02 = (s0 >> 4) & 0xFF0;
s03 = (s0 << 4) & 0xFF0;
t0 = *(mlib_d64*)((mlib_u8*)table + s00);
t1 = *(mlib_d64*)((mlib_u8*)table + s00 + 8);
t2 = *(mlib_d64*)((mlib_u8*)table + s01);
t3 = *(mlib_d64*)((mlib_u8*)table + s01 + 8);
t4 = *(mlib_d64*)((mlib_u8*)table + s02);
t5 = *(mlib_d64*)((mlib_u8*)table + s02 + 8);
t6 = *(mlib_d64*)((mlib_u8*)table + s03);
t7 = *(mlib_d64*)((mlib_u8*)table + s03 + 8);
t1 = vis_faligndata(t0, t1);
t3 = vis_faligndata(t3, t3);
t3 = vis_faligndata(t3, t4);
t4 = vis_faligndata(t4, t5);
*(mlib_f32*)((mlib_f32*)dp + 1) = vis_read_hi(t0);
dp[1] = t1;
dp[2] = t2;
dp[3] = t3;
dp[4] = t4;
dp[5] = t6;
*(mlib_f32*)((mlib_f32*)dp + 12) = vis_read_hi(t7);
i += 4; dp += 6;
}
dl = (mlib_s32*)dp + 1;
sp = (mlib_u8*)sa;
#pragma pipeloop(0)
for (; i < xsize; i++) {
ptr = (mlib_s32*)(table + (sp[0] << 1));
dl[0] = ptr[0];
dl[1] = ptr[1];
dl[2] = ptr[2];
dl += 3; sp ++;
}
}
/***************************************************************/
void mlib_v_ImageLookUpSI_U8_S32_3_SMALL(const mlib_u8 *src,
mlib_s32 *dst,
mlib_s32 xsize,
const mlib_s32 **table)
{
mlib_u32 *sa; /* aligned pointer to source data */
mlib_u8 *sp; /* pointer to source data */
mlib_u32 s0; /* source data */
mlib_f32 *dp; /* aligned pointer to destination */
mlib_f32 acc0, acc1; /* destination data */
mlib_f32 acc2, acc3; /* destination data */
mlib_f32 acc4, acc5; /* destination data */
mlib_f32 acc6, acc7; /* destination data */
mlib_f32 acc8, acc9; /* destination data */
mlib_f32 acc10, acc11; /* destination data */
mlib_f32 *table0 = (mlib_f32*)table[0];
mlib_f32 *table1 = (mlib_f32*)table[1];
mlib_f32 *table2 = (mlib_f32*)table[2];
mlib_s32 i; /* loop variable */
mlib_u32 s00, s01, s02, s03;
sa = (mlib_u32*)src;
dp = (mlib_f32*)dst;
i = 0;
if (xsize >= 4) {
s0 = *sa++;
s00 = (s0 >> 22) & 0x3FC;
s01 = (s0 >> 14) & 0x3FC;
#pragma pipeloop(0)
for(i = 0; i <= xsize - 8; i+=4, dp += 12) {
s02 = (s0 >> 6) & 0x3FC;
s03 = (s0 << 2) & 0x3FC;
acc0 = *(mlib_f32*)((mlib_u8*)table0 + s00);
acc1 = *(mlib_f32*)((mlib_u8*)table1 + s00);
acc2 = *(mlib_f32*)((mlib_u8*)table2 + s00);
acc3 = *(mlib_f32*)((mlib_u8*)table0 + s01);
acc4 = *(mlib_f32*)((mlib_u8*)table1 + s01);
acc5 = *(mlib_f32*)((mlib_u8*)table2 + s01);
acc6 = *(mlib_f32*)((mlib_u8*)table0 + s02);
acc7 = *(mlib_f32*)((mlib_u8*)table1 + s02);
acc8 = *(mlib_f32*)((mlib_u8*)table2 + s02);
acc9 = *(mlib_f32*)((mlib_u8*)table0 + s03);
acc10 = *(mlib_f32*)((mlib_u8*)table1 + s03);
acc11 = *(mlib_f32*)((mlib_u8*)table2 + s03);
s0 = *sa++;
s00 = (s0 >> 22) & 0x3FC;
s01 = (s0 >> 14) & 0x3FC;
dp[0] = acc0;
dp[1] = acc1;
dp[2] = acc2;
dp[3] = acc3;
dp[4] = acc4;
dp[5] = acc5;
dp[6] = acc6;
dp[7] = acc7;
dp[8] = acc8;
dp[9] = acc9;
dp[10] = acc10;
dp[11] = acc11;
}
s02 = (s0 >> 6) & 0x3FC;
s03 = (s0 << 2) & 0x3FC;
acc0 = *(mlib_f32*)((mlib_u8*)table0 + s00);
acc1 = *(mlib_f32*)((mlib_u8*)table1 + s00);
acc2 = *(mlib_f32*)((mlib_u8*)table2 + s00);
acc3 = *(mlib_f32*)((mlib_u8*)table0 + s01);
acc4 = *(mlib_f32*)((mlib_u8*)table1 + s01);
acc5 = *(mlib_f32*)((mlib_u8*)table2 + s01);
acc6 = *(mlib_f32*)((mlib_u8*)table0 + s02);
acc7 = *(mlib_f32*)((mlib_u8*)table1 + s02);
acc8 = *(mlib_f32*)((mlib_u8*)table2 + s02);
acc9 = *(mlib_f32*)((mlib_u8*)table0 + s03);
acc10 = *(mlib_f32*)((mlib_u8*)table1 + s03);
acc11 = *(mlib_f32*)((mlib_u8*)table2 + s03);
dp[0] = acc0;
dp[1] = acc1;
dp[2] = acc2;
dp[3] = acc3;
dp[4] = acc4;
dp[5] = acc5;
dp[6] = acc6;
dp[7] = acc7;
dp[8] = acc8;
dp[9] = acc9;
dp[10] = acc10;
dp[11] = acc11;
dp += 12;
i += 4;
}
sp = (mlib_u8*)sa;
if ( i < xsize ) {
*dp++ = table0[sp[0]];
*dp++ = table1[sp[0]];
*dp++ = table2[sp[0]];
i++; sp++;
}
if ( i < xsize ) {
*dp++ = table0[sp[0]];
*dp++ = table1[sp[0]];
*dp++ = table2[sp[0]];
i++; sp++;
}
if ( i < xsize ) {
*dp++ = table0[sp[0]];
*dp++ = table1[sp[0]];
*dp++ = table2[sp[0]];
}
}
/***************************************************************/
void mlib_v_ImageLookUpSI_U8_S32_3(const mlib_u8 *src,
mlib_s32 slb,
mlib_s32 *dst,
mlib_s32 dlb,
mlib_s32 xsize,
mlib_s32 ysize,
const mlib_s32 **table)
{
if ((xsize * ysize) < 600) {
mlib_u8 *sl;
mlib_s32 *dl;
mlib_s32 j, i;
const mlib_s32 *tab0 = table[0];
const mlib_s32 *tab1 = table[1];
const mlib_s32 *tab2 = table[2];
sl = (void *)src;
dl = dst;
/* row loop */
for (j = 0; j < ysize; j ++) {
mlib_u8 *sp = sl;
mlib_s32 *dp = dl;
mlib_s32 off, size = xsize;
off = (mlib_s32)((4 - ((mlib_addr)sp & 3)) & 3);
off = (off < size) ? off : size;
for (i = 0; i < off; i++) {
*dp++ = tab0[sp[0]];
*dp++ = tab1[sp[0]];
*dp++ = tab2[sp[0]];
size--; sp++;
}
if (size > 0) {
mlib_v_ImageLookUpSI_U8_S32_3_SMALL(sp, (mlib_s32*)dp, size, table);
}
sl = (mlib_u8 *) ((mlib_u8 *) sl + slb);
dl = (mlib_s32 *) ((mlib_u8 *) dl + dlb);
}
} else {
mlib_u8 *sl;
mlib_s32 *dl;
mlib_d64 dtab[512];
mlib_u32 *tab;
mlib_u32 *tab0 = (mlib_u32*)table[0];
mlib_u32 *tab1 = (mlib_u32*)table[1];
mlib_u32 *tab2 = (mlib_u32*)table[2];
mlib_s32 i, j;
mlib_u32 s0, s1, s2;
tab = (mlib_u32*)dtab;
s0 = tab0[0];
s1 = tab1[0];
s2 = tab2[0];
for (i = 0; i < 255; i++) {
tab[4*i] = s0;
tab[4*i+1] = s1;
tab[4*i+2] = s2;
s0 = tab0[i+1];
s1 = tab1[i+1];
s2 = tab2[i+1];
}
tab[1020] = s0;
tab[1021] = s1;
tab[1022] = s2;
sl = (void *)src;
dl = dst;
/* row loop */
for (j = 0; j < ysize; j ++) {
mlib_u8 *sp = sl;
mlib_u32 *dp = (mlib_u32*)dl;
mlib_s32 off, size = xsize;
off = (mlib_s32)((4 - ((mlib_addr)sp & 3)) & 3);
off = (off < size) ? off : size;
#pragma pipeloop(0)
for (i = 0; i < off; i++) {
dp[0] = tab0[sp[0]];
dp[1] = tab1[sp[0]];
dp[2] = tab2[sp[0]];
dp += 3; sp++;
}
size -= off;
if (size > 0) {
if (((mlib_addr)dp & 7) == 0) {
mlib_v_ImageLookUpSI_U8_S32_3_SrcOff0_D1(sp, (mlib_s32*)dp, size, dtab);
} else {
mlib_v_ImageLookUpSI_U8_S32_3_DstNonAl_D1(sp, (mlib_s32*)dp, size, dtab);
}
}
sl = (mlib_u8 *) ((mlib_u8 *) sl + slb);
dl = (mlib_s32 *) ((mlib_u8 *) dl + dlb);
}
}
}
/***************************************************************/
void mlib_v_ImageLookUpSI_U8_S32_4_SrcOff0_D1(const mlib_u8 *src,
mlib_s32 *dst,
mlib_s32 xsize,
const mlib_d64 *table)
{
mlib_u32 *sa; /* aligned pointer to source data */
mlib_u8 *sp; /* pointer to source data */
mlib_u32 s0; /* source data */
mlib_d64 *dp; /* aligned pointer to destination */
mlib_d64 t0, t1, t2, t3; /* destination data */
mlib_d64 t4, t5, t6, t7; /* destination data */
mlib_s32 i; /* loop variable */
mlib_u32 s00, s01, s02, s03;
sa = (mlib_u32*)src;
dp = (mlib_d64 *) dst;
i = 0;
if (xsize >= 4) {
s0 = *sa++;
s00 = (s0 >> 20) & 0xFF0;
s01 = (s0 >> 12) & 0xFF0;
#pragma pipeloop(0)
for(i = 0; i <= xsize - 8; i+=4, dp+=8) {
s02 = (s0 >> 4) & 0xFF0;
s03 = (s0 << 4) & 0xFF0;
t0 = *(mlib_d64*)((mlib_u8*)table + s00);
t1 = *(mlib_d64*)((mlib_u8*)table + s00 + 8);
t2 = *(mlib_d64*)((mlib_u8*)table + s01);
t3 = *(mlib_d64*)((mlib_u8*)table + s01 + 8);
t4 = *(mlib_d64*)((mlib_u8*)table + s02);
t5 = *(mlib_d64*)((mlib_u8*)table + s02 + 8);
t6 = *(mlib_d64*)((mlib_u8*)table + s03);
t7 = *(mlib_d64*)((mlib_u8*)table + s03 + 8);
s0 = *sa++;
s00 = (s0 >> 20) & 0xFF0;
s01 = (s0 >> 12) & 0xFF0;
dp[0] = t0;
dp[1] = t1;
dp[2] = t2;
dp[3] = t3;
dp[4] = t4;
dp[5] = t5;
dp[6] = t6;
dp[7] = t7;
}
s02 = (s0 >> 4) & 0xFF0;
s03 = (s0 << 4) & 0xFF0;
t0 = *(mlib_d64*)((mlib_u8*)table + s00);
t1 = *(mlib_d64*)((mlib_u8*)table + s00 + 8);
t2 = *(mlib_d64*)((mlib_u8*)table + s01);
t3 = *(mlib_d64*)((mlib_u8*)table + s01 + 8);
t4 = *(mlib_d64*)((mlib_u8*)table + s02);
t5 = *(mlib_d64*)((mlib_u8*)table + s02 + 8);
t6 = *(mlib_d64*)((mlib_u8*)table + s03);
t7 = *(mlib_d64*)((mlib_u8*)table + s03 + 8);
dp[0] = t0;
dp[1] = t1;
dp[2] = t2;
dp[3] = t3;
dp[4] = t4;
dp[5] = t5;
dp[6] = t6;
dp[7] = t7;
dp += 8;
i += 4;
}
sp = (mlib_u8*)sa;
if ( i < xsize ) {
*dp++ = table[2*sp[0]];
*dp++ = table[2*sp[0] + 1];
i++; sp++;
}
if ( i < xsize ) {
*dp++ = table[2*sp[0]];
*dp++ = table[2*sp[0] + 1];
i++; sp++;
}
if ( i < xsize ) {
*dp++ = table[2*sp[0]];
*dp++ = table[2*sp[0] + 1];
}
}
/***************************************************************/
void mlib_v_ImageLookUpSI_U8_S32_4_DstNonAl_D1(const mlib_u8 *src,
mlib_s32 *dst,
mlib_s32 xsize,
const mlib_d64 *table)
{
mlib_u32 *sa; /* aligned pointer to source data */
mlib_u8 *sp; /* pointer to source data */
mlib_u32 s0; /* source data */
mlib_s32 *dl; /* pointer to start of destination */
mlib_d64 *dp; /* aligned pointer to destination */
mlib_d64 t0, t1, t2, t3; /* destination data */
mlib_d64 t4, t5, t6, t7, t8; /* destination data */
mlib_s32 i; /* loop variable */
mlib_u32 s00, s01, s02, s03;
sa = (mlib_u32*)src;
dl = dst;
dp = (mlib_d64 *) ((mlib_addr) dl & (~7)) + 1;
vis_alignaddr(dp, 4);
s0 = *sa++;
s00 = (s0 >> 20) & 0xFF0;
t0 = *(mlib_d64*)((mlib_u8*)table + s00);
t1 = *(mlib_d64*)((mlib_u8*)table + s00 + 8);
*(mlib_f32*)dl = vis_read_hi(t0);
dp[0] = vis_faligndata(t0, t1);
t0 = t1;
xsize--; dp++;
sp = (mlib_u8*)sa - 3;
if (xsize >= 3) {
s01 = (s0 >> 12) & 0xFF0;
s02 = (s0 >> 4) & 0xFF0;
s03 = (s0 << 4) & 0xFF0;
t1 = *(mlib_d64*)((mlib_u8*)table + s01);
t2 = *(mlib_d64*)((mlib_u8*)table + s01 + 8);
t3 = *(mlib_d64*)((mlib_u8*)table + s02);
t4 = *(mlib_d64*)((mlib_u8*)table + s02 + 8);
t5 = *(mlib_d64*)((mlib_u8*)table + s03);
t6 = *(mlib_d64*)((mlib_u8*)table + s03 + 8);
dp[0] = vis_faligndata(t0, t1);
dp[1] = vis_faligndata(t1, t2);
dp[2] = vis_faligndata(t2, t3);
dp[3] = vis_faligndata(t3, t4);
dp[4] = vis_faligndata(t4, t5);
dp[5] = vis_faligndata(t5, t6);
t0 = t6; dp += 6; xsize -= 3;
sp = (mlib_u8*)sa;
}
i = 0;
if (xsize >= 4) {
s0 = *sa++;
s00 = (s0 >> 20) & 0xFF0;
s01 = (s0 >> 12) & 0xFF0;
#pragma pipeloop(0)
for(i = 0; i <= xsize - 8; i+=4, dp += 8) {
s02 = (s0 >> 4) & 0xFF0;
s03 = (s0 << 4) & 0xFF0;
t1 = *(mlib_d64*)((mlib_u8*)table + s00);
t2 = *(mlib_d64*)((mlib_u8*)table + s00 + 8);
t3 = *(mlib_d64*)((mlib_u8*)table + s01);
t4 = *(mlib_d64*)((mlib_u8*)table + s01 + 8);
t5 = *(mlib_d64*)((mlib_u8*)table + s02);
t6 = *(mlib_d64*)((mlib_u8*)table + s02 + 8);
t7 = *(mlib_d64*)((mlib_u8*)table + s03);
t8 = *(mlib_d64*)((mlib_u8*)table + s03 + 8);
s0 = *sa++;
s00 = (s0 >> 20) & 0xFF0;
s01 = (s0 >> 12) & 0xFF0;
dp[0] = vis_faligndata(t0, t1);
dp[1] = vis_faligndata(t1, t2);
dp[2] = vis_faligndata(t2, t3);
dp[3] = vis_faligndata(t3, t4);
dp[4] = vis_faligndata(t4, t5);
dp[5] = vis_faligndata(t5, t6);
dp[6] = vis_faligndata(t6, t7);
dp[7] = vis_faligndata(t7, t8);
t0 = t8;
}
s02 = (s0 >> 4) & 0xFF0;
s03 = (s0 << 4) & 0xFF0;
t1 = *(mlib_d64*)((mlib_u8*)table + s00);
t2 = *(mlib_d64*)((mlib_u8*)table + s00 + 8);
t3 = *(mlib_d64*)((mlib_u8*)table + s01);
t4 = *(mlib_d64*)((mlib_u8*)table + s01 + 8);
t5 = *(mlib_d64*)((mlib_u8*)table + s02);
t6 = *(mlib_d64*)((mlib_u8*)table + s02 + 8);
t7 = *(mlib_d64*)((mlib_u8*)table + s03);
t8 = *(mlib_d64*)((mlib_u8*)table + s03 + 8);
dp[0] = vis_faligndata(t0, t1);
dp[1] = vis_faligndata(t1, t2);
dp[2] = vis_faligndata(t2, t3);
dp[3] = vis_faligndata(t3, t4);
dp[4] = vis_faligndata(t4, t5);
dp[5] = vis_faligndata(t5, t6);
dp[6] = vis_faligndata(t6, t7);
dp[7] = vis_faligndata(t7, t8);
t0 = t8;
dp += 8;
i += 4;
sp = (mlib_u8*)sa;
}
if ( i < xsize ) {
t1 = table[2*sp[0]];
t2 = table[2*sp[0] + 1];
*dp++ = vis_faligndata(t0, t1);
*dp++ = vis_faligndata(t1, t2);
i++; sp++;
t0 = t2;
}
if ( i < xsize ) {
t1 = table[2*sp[0]];
t2 = table[2*sp[0] + 1];
*dp++ = vis_faligndata(t0, t1);
*dp++ = vis_faligndata(t1, t2);
i++; sp++;
t0 = t2;
}
if ( i < xsize ) {
t1 = table[2*sp[0]];
t2 = table[2*sp[0] + 1];
*dp++ = vis_faligndata(t0, t1);
*dp++ = vis_faligndata(t1, t2);
t0 = t2;
}
*(mlib_f32*) dp = vis_read_lo(t0);
}
/***************************************************************/
void mlib_v_ImageLookUpSI_U8_S32_4_SMALL(const mlib_u8 *src,
mlib_s32 *dst,
mlib_s32 xsize,
const mlib_s32 **table)
{
mlib_u32 *sa; /* aligned pointer to source data */
mlib_u8 *sp; /* pointer to source data */
mlib_u32 s0; /* source data */
mlib_f32 *dp; /* aligned pointer to destination */
mlib_f32 acc0, acc1; /* destination data */
mlib_f32 acc2, acc3; /* destination data */
mlib_f32 acc4, acc5; /* destination data */
mlib_f32 acc6, acc7; /* destination data */
mlib_f32 acc8, acc9; /* destination data */
mlib_f32 acc10, acc11; /* destination data */
mlib_f32 acc12, acc13; /* destination data */
mlib_f32 acc14, acc15; /* destination data */
mlib_f32 *table0 = (mlib_f32*)table[0];
mlib_f32 *table1 = (mlib_f32*)table[1];
mlib_f32 *table2 = (mlib_f32*)table[2];
mlib_f32 *table3 = (mlib_f32*)table[3];
mlib_s32 i; /* loop variable */
mlib_u32 s00, s01, s02, s03;
sa = (mlib_u32*)src;
dp = (mlib_f32*)dst;
i = 0;
if (xsize >= 4) {
s0 = *sa++;
s00 = (s0 >> 22) & 0x3FC;
s01 = (s0 >> 14) & 0x3FC;
#pragma pipeloop(0)
for(i = 0; i <= xsize - 8; i+=4, dp += 16) {
s02 = (s0 >> 6) & 0x3FC;
s03 = (s0 << 2) & 0x3FC;
acc0 = *(mlib_f32*)((mlib_u8*)table0 + s00);
acc1 = *(mlib_f32*)((mlib_u8*)table1 + s00);
acc2 = *(mlib_f32*)((mlib_u8*)table2 + s00);
acc3 = *(mlib_f32*)((mlib_u8*)table3 + s00);
acc4 = *(mlib_f32*)((mlib_u8*)table0 + s01);
acc5 = *(mlib_f32*)((mlib_u8*)table1 + s01);
acc6 = *(mlib_f32*)((mlib_u8*)table2 + s01);
acc7 = *(mlib_f32*)((mlib_u8*)table3 + s01);
acc8 = *(mlib_f32*)((mlib_u8*)table0 + s02);
acc9 = *(mlib_f32*)((mlib_u8*)table1 + s02);
acc10 = *(mlib_f32*)((mlib_u8*)table2 + s02);
acc11 = *(mlib_f32*)((mlib_u8*)table3 + s02);
acc12 = *(mlib_f32*)((mlib_u8*)table0 + s03);
acc13 = *(mlib_f32*)((mlib_u8*)table1 + s03);
acc14 = *(mlib_f32*)((mlib_u8*)table2 + s03);
acc15 = *(mlib_f32*)((mlib_u8*)table3 + s03);
s0 = *sa++;
s00 = (s0 >> 22) & 0x3FC;
s01 = (s0 >> 14) & 0x3FC;
dp[0] = acc0;
dp[1] = acc1;
dp[2] = acc2;
dp[3] = acc3;
dp[4] = acc4;
dp[5] = acc5;
dp[6] = acc6;
dp[7] = acc7;
dp[8] = acc8;
dp[9] = acc9;
dp[10] = acc10;
dp[11] = acc11;
dp[12] = acc12;
dp[13] = acc13;
dp[14] = acc14;
dp[15] = acc15;
}
s02 = (s0 >> 6) & 0x3FC;
s03 = (s0 << 2) & 0x3FC;
acc0 = *(mlib_f32*)((mlib_u8*)table0 + s00);
acc1 = *(mlib_f32*)((mlib_u8*)table1 + s00);
acc2 = *(mlib_f32*)((mlib_u8*)table2 + s00);
acc3 = *(mlib_f32*)((mlib_u8*)table3 + s00);
acc4 = *(mlib_f32*)((mlib_u8*)table0 + s01);
acc5 = *(mlib_f32*)((mlib_u8*)table1 + s01);
acc6 = *(mlib_f32*)((mlib_u8*)table2 + s01);
acc7 = *(mlib_f32*)((mlib_u8*)table3 + s01);
acc8 = *(mlib_f32*)((mlib_u8*)table0 + s02);
acc9 = *(mlib_f32*)((mlib_u8*)table1 + s02);
/**代码未完, 请加载全部代码(NowJava.com).**/