/*   SCE CONFIDENTIAL                                       */
/*   PlayStation(R)3 Programmer Tool Runtime Library 475.001 */
/*   Copyright (C) 2009 Sony Computer Entertainment Inc.    */
/*   All Rights Reserved.                                   */

#ifndef __CELL_GCM_HELPER_H__
#define __CELL_GCM_HELPER_H__


/* buitin VMX */
#ifdef __PPU__
#include <vec_types.h>
#include <altivec.h>
#elif defined(__SPU__)
#include <vec_types.h>
#include <vmx2spu.h>
#endif


#ifdef __SNC__
#pragma diag_suppress=bad_linkage_of_ref_within_inline_function
#endif

static inline float cellGcmSwap16Float32(const float f)
{
    union SwapF32_16
    {
        uint32_t ui;
        float f;
    } v;
    v.f = f;
    v.ui = (v.ui>>16) | (v.ui<<16);
    return v.f;
}

//	32bit signed fixed-point	[1:11:20]
static inline int32_t cellGcmGetFixedSint32(const float f)
{
	return (int32_t)(f * 1048576.0f);	// f * 2^20
}

//	16bit unsigned fixed-point	[0:12:4]
static inline uint16_t cellGcmGetFixedUint16(const float f)
{
	return (uint16_t)(f * 16.0f);		// f * 2^4
}

//	uint32 type memory copy
static inline void *cellGcmMemoryCopyUint32(void *dst, const void *src, size_t n)
{
	uint32_t *d = (uint32_t *)dst;
	const uint32_t *s = (uint32_t *)src;

	while(n > 0){
		*d = CELL_GCM_ENDIAN_SWAP(*s);
		d++;
		s++;
		n-=4;
	}

	return dst;
}

static inline uint32_t cellGcmAlign(uint32_t alignment, uint32_t value)
{
	return (alignment==0? value: (value==0? 0: (((uint32_t)((value - 1) / alignment) + 1) * alignment)));
}

#ifdef __SPU__
static inline uint16_t cellGcmGetFloatToHalf(const float val)
{
	vec_uint4 vec = (vec_uint4)vec_splats(val);
	vec_uint4 mask = {0x80000000, 0x7f800000, 0x007fffff, 0};	// s1, e8, m23, unused
	vec = (vec_uint4)vec_and(vec, mask);

	vec_uint4 sign = (vec_uint4)vec_splat(vec, 0);
	vec_int4  exponent = (vec_int4)vec_splat(vec, 1);
	vec_uint4 mantissa = (vec_uint4)vec_splat(vec, 2);

	vec_uint4 shift16 = (vec_uint4)vec_splats(16);
	vec_uint4 shift23 = (vec_uint4)vec_splats(23);
	vec_uint4 shift13 = (vec_uint4)vec_splats(13);
	vec_uint4 shift10 = (vec_uint4)vec_splats(10);

	sign = (vec_uint4)vec_sr(sign, shift16);			// s >> 16
	exponent = (vec_int4)vec_sr(exponent, shift23);	// e >> 23
	mantissa = (vec_uint4)vec_sr(mantissa, shift13);	// m >> 13

	vec_int4 bias = (vec_int4)vec_splats(127-15);
	vec_int4 zero = (vec_int4)vec_splats(0);
	vec_int4 emax = (vec_int4)vec_splats(31);

	exponent = (vec_int4)vec_sub(exponent, bias);	// e - bias
	exponent = (vec_int4)vec_max(exponent, zero);	// if(e <  0) e = 0;
	exponent = (vec_int4)vec_min(exponent, emax);	// if(e > 31) e = 31;

	// (s) | (e<<10) | (m)
	vec_uint4 result = (vec_uint4)vec_sl((vec_uint4)exponent, shift10);
	result = (vec_uint4)vec_or(result, mantissa);
	result = (vec_uint4)vec_or(result, sign);

	return (uint16_t)vec_extract(result, 0);
}

static inline float cellGcmGetHalfToFloat(const uint16_t val)
{
	vec_uint4 vec = (vec_uint4)vec_splats((uint32_t)val);
	vec_uint4 uzero = (vec_uint4)vec_splats(0);
	vec_uint4 zmask = (vec_uint4)vec_cmpeq(vec, uzero);	// val == 0 ? 1 : 0

	vec_uint4 mask = {0x8000, 0x7c00, 0x03ff, 0};	// s1, e5, m10, unused
	vec = (vec_uint4)vec_and(vec, mask);

	vec_uint4 sign = (vec_uint4)vec_splat(vec, 0);
	vec_int4  exponent = (vec_int4)vec_splat(vec, 1);
	vec_uint4 mantissa = (vec_uint4)vec_splat(vec, 2);

	vec_uint4 shift16 = (vec_uint4)vec_splats(16);
	vec_uint4 shift23 = (vec_uint4)vec_splats(23);
	vec_uint4 shift13 = (vec_uint4)vec_splats(13);
	vec_uint4 shift10 = (vec_uint4)vec_splats(10);

	sign = (vec_uint4)vec_sl(sign, shift16);			// s << 16
	exponent = (vec_int4)vec_sr(exponent, shift10);	// e >> 10
	mantissa = (vec_uint4)vec_sl(mantissa, shift13);	// m << 13

	vec_int4 bias = (vec_int4)vec_splats(127-15);
	exponent = (vec_int4)vec_add(exponent, bias);		// e + bias

	// (s) | ((e<<23) & 0x7f800000) | (m)
	vec_uint4 result = (vec_uint4)vec_sl((vec_uint4)exponent, shift23);
	result = (vec_uint4)vec_or(result, mantissa);
	result = (vec_uint4)vec_or(result, sign);

	// val == 0 ? return 0 : return result
	result = (vec_uint4)vec_sel(result, uzero, zmask);

	return (float)vec_extract((vec_float4)result, 0);
}

#else
static inline uint16_t cellGcmGetFloatToHalf(const float val)
{
	uint8_t *tmp = (uint8_t*)&val;
#ifdef CELL_GCM_LITTLE_ENDIAN
	uint32_t bits = ((uint32_t)tmp[3] << 24) | ((uint32_t)tmp[2] << 16) | ((uint32_t)tmp[1] << 8) |(uint32_t)tmp[0];
#else	// __PPU__
	uint32_t bits = ((uint32_t)tmp[0] << 24) | ((uint32_t)tmp[1] << 16) | ((uint32_t)tmp[2] << 8) |(uint32_t)tmp[3];
#endif

	if (bits == 0) {
		return 0;
	}
	int32_t e = ((bits & 0x7f800000) >> 23) - 127 + 15;
	if (e < 0) {
		return 0;
	}
	else if (e > 31) {
		e = 31;
	}
	uint32_t s = bits & 0x80000000;
	uint32_t m = bits & 0x007fffff;

	return ((s >> 16) & 0x8000) | ((e << 10) & 0x7c00) | ((m >> 13) & 0x03ff);
}

static inline float cellGcmGetHalfToFloat(const uint16_t val)
{
	if (val == 0) {
		return 0.0f;
	}
	uint32_t s = val & 0x8000;
	int32_t e =((val & 0x7c00) >> 10) - 15 + 127;
	uint32_t m =  val & 0x03ff;
	uint32_t floatVal = (s << 16) | ((e << 23) & 0x7f800000) | (m << 13);
	float result;
	uint8_t *tmp = (uint8_t*)&result;
#ifdef CELL_GCM_LITTLE_ENDIAN
	tmp[3] = (floatVal >> 24) & 0xff;
	tmp[2] = (floatVal >> 16) & 0xff;
	tmp[1] = (floatVal >> 8) & 0xff;
	tmp[0] = floatVal & 0xff;
#else	// __PPU__
	tmp[0] = (floatVal >> 24) & 0xff;
	tmp[1] = (floatVal >> 16) & 0xff;
	tmp[2] = (floatVal >> 8) & 0xff;
	tmp[3] = floatVal & 0xff;
#endif
	return result;
}
#endif

#endif	/* __CELL_GCM_HELPER_H__ */
