/* WARNING: This file is generated by combine.pl from combine.inc.
   Please edit one of those files rather than this one. */

#line 1 "pixman-combine.c.template"
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif

#include <math.h>
#include <string.h>

#include "pixman-private.h"

#include "pixman-combine64.h"

#define Red(x) (((x) >> R_SHIFT) & MASK)
#define Green(x) (((x) >> G_SHIFT) & MASK)
#define Blue(x) ((x) & MASK)

/*** per channel helper functions ***/

static void
fbCombineMaskC (uint64_t *src, uint64_t *mask)
{
    uint64_t a = *mask;

    uint64_t	x;
    uint32_t	xa;

    if (!a)
    {
	*(src) = 0;
	return;
    }

    x = *(src);
    if (a == ~0)
    {
	x = x >> A_SHIFT;
	x |= x << G_SHIFT;
	x |= x << R_SHIFT;
	*(mask) = x;
	return;
    }

    xa = x >> A_SHIFT;
    FbByteMulC(x, a);
    *(src) = x;
    FbByteMul(a, xa);
    *(mask) = a;
}

static void
fbCombineMaskValueC (uint64_t *src, const uint64_t *mask)
{
    uint64_t a = *mask;
    uint64_t	x;

    if (!a)
    {
	*(src) = 0;
	return;
    }

    if (a == ~0)
	return;

    x = *(src);
    FbByteMulC(x, a);
    *(src) =x;
}

static void
fbCombineMaskAlphaC (const uint64_t *src, uint64_t *mask)
{
    uint64_t a = *(mask);
    uint64_t	x;

    if (!a)
	return;

    x = *(src) >> A_SHIFT;
    if (x == MASK)
	return;
    if (a == ~0)
    {
	x = x >> A_SHIFT;
	x |= x << G_SHIFT;
	x |= x << R_SHIFT;
	*(mask) = x;
	return;
    }

    FbByteMul(a, x);
    *(mask) = a;
}



/*
 * There are two ways of handling alpha -- either as a single unified value or
 * a separate value for each component, hence each macro must have two
 * versions.  The unified alpha version has a 'U' at the end of the name,
 * the component version has a 'C'.  Similarly, functions which deal with
 * this difference will have two versions using the same convention.
 */

/*
 * All of the composing functions
 */

static force_inline uint64_t
combineMask (const uint64_t *src, const uint64_t *mask, int i)
{
    uint64_t s, m;

    if (mask)
    {
	m = *(mask + i) >> A_SHIFT;

	if (!m)
	    return 0;
    }

    s = *(src + i);

    if (mask)
	FbByteMul (s, m);

    return s;
}

static void
fbCombineClear (pixman_implementation_t *imp, pixman_op_t op,
		uint64_t *dest, const uint64_t *src, const uint64_t *mask, int width)
{
    memset(dest, 0, width*sizeof(uint64_t));
}

static void
fbCombineSrcU (pixman_implementation_t *imp, pixman_op_t op,
	       uint64_t *dest, const uint64_t *src, const uint64_t *mask, int width)
{
    int i;

    if (!mask)
	memcpy (dest, src, width * sizeof (uint64_t));
    else
    {
	for (i = 0; i < width; ++i)
	{
	    uint64_t s = combineMask (src, mask, i);
	    
	    *(dest + i) = s;
	}
    }
}

/* if the Src is opaque, call fbCombineSrcU */
static void
fbCombineOverU (pixman_implementation_t *imp, pixman_op_t op,
		uint64_t *dest, const uint64_t *src, const uint64_t *mask, int width)
{
    int i;
    for (i = 0; i < width; ++i) {
        uint64_t s = combineMask (src, mask, i);
        uint64_t d = *(dest + i);
        uint64_t ia = Alpha(~s);

        FbByteMulAdd(d, ia, s);
	*(dest + i) = d;
    }
}

/* if the Dst is opaque, this is a noop */
static void
fbCombineOverReverseU (pixman_implementation_t *imp, pixman_op_t op,
		       uint64_t *dest, const uint64_t *src, const uint64_t *mask, int width)
{
    int i;
    for (i = 0; i < width; ++i) {
        uint64_t s = combineMask (src, mask, i);
        uint64_t d = *(dest + i);
        uint64_t ia = Alpha(~*(dest + i));
        FbByteMulAdd(s, ia, d);
	*(dest + i) = s;
    }
}

/* if the Dst is opaque, call fbCombineSrcU */
static void
fbCombineInU (pixman_implementation_t *imp, pixman_op_t op,
	      uint64_t *dest, const uint64_t *src, const uint64_t *mask, int width)
{
    int i;
    for (i = 0; i < width; ++i) {
        uint64_t s = combineMask (src, mask, i);
        uint64_t a = Alpha(*(dest + i));
        FbByteMul(s, a);
	*(dest + i) = s;
    }
}

/* if the Src is opaque, this is a noop */
static void
fbCombineInReverseU (pixman_implementation_t *imp, pixman_op_t op,
		     uint64_t *dest, const uint64_t *src, const uint64_t *mask, int width)
{
    int i;
    for (i = 0; i < width; ++i) {
	uint64_t s = combineMask (src, mask, i);
	uint64_t d = *(dest + i);
        uint64_t a = Alpha(s);
        FbByteMul(d, a);
	*(dest + i) = d;
    }
}

/* if the Dst is opaque, call fbCombineClear */
static void
fbCombineOutU (pixman_implementation_t *imp, pixman_op_t op,
	       uint64_t *dest, const uint64_t *src, const uint64_t *mask, int width)
{
    int i;
    for (i = 0; i < width; ++i) {
        uint64_t s = combineMask (src, mask, i);
        uint64_t a = Alpha(~*(dest + i));
        FbByteMul(s, a);
	*(dest + i) = s;
    }
}

/* if the Src is opaque, call fbCombineClear */
static void
fbCombineOutReverseU (pixman_implementation_t *imp, pixman_op_t op,
		      uint64_t *dest, const uint64_t *src, const uint64_t *mask, int width)
{
    int i;
    for (i = 0; i < width; ++i) {
	uint64_t s = combineMask (src, mask, i);
        uint64_t d = *(dest + i);
        uint64_t a = Alpha(~s);
        FbByteMul(d, a);
	*(dest + i) = d;
    }
}

/* if the Src is opaque, call fbCombineInU */
/* if the Dst is opaque, call fbCombineOverU */
/* if both the Src and Dst are opaque, call fbCombineSrcU */
static void
fbCombineAtopU (pixman_implementation_t *imp, pixman_op_t op,
		uint64_t *dest, const uint64_t *src, const uint64_t *mask, int width)
{
    int i;
    for (i = 0; i < width; ++i) {
        uint64_t s = combineMask (src, mask, i);
        uint64_t d = *(dest + i);
        uint64_t dest_a = Alpha(d);
        uint64_t src_ia = Alpha(~s);

        FbByteAddMul(s, dest_a, d, src_ia);
	*(dest + i) = s;
    }
}

/* if the Src is opaque, call fbCombineOverReverseU */
/* if the Dst is opaque, call fbCombineInReverseU */
/* if both the Src and Dst are opaque, call fbCombineDstU */
static void
fbCombineAtopReverseU (pixman_implementation_t *imp, pixman_op_t op,
		       uint64_t *dest, const uint64_t *src, const uint64_t *mask, int width)
{
    int i;
    for (i = 0; i < width; ++i) {
        uint64_t s = combineMask (src, mask, i);
        uint64_t d = *(dest + i);
        uint64_t src_a = Alpha(s);
        uint64_t dest_ia = Alpha(~d);

        FbByteAddMul(s, dest_ia, d, src_a);
	*(dest + i) = s;
    }
}

/* if the Src is opaque, call fbCombineOverU */
/* if the Dst is opaque, call fbCombineOverReverseU */
/* if both the Src and Dst are opaque, call fbCombineClear */
static void
fbCombineXorU (pixman_implementation_t *imp, pixman_op_t op,
	       uint64_t *dest, const uint64_t *src, const uint64_t *mask, int width)
{
    int i;
    for (i = 0; i < width; ++i) {
        uint64_t s = combineMask (src, mask, i);
        uint64_t d = *(dest + i);
        uint64_t src_ia = Alpha(~s);
        uint64_t dest_ia = Alpha(~d);

        FbByteAddMul(s, dest_ia, d, src_ia);
	*(dest + i) = s;
    }
}

static void
fbCombineAddU (pixman_implementation_t *imp, pixman_op_t op,
	       uint64_t *dest, const uint64_t *src, const uint64_t *mask, int width)
{
    int i;
    for (i = 0; i < width; ++i) {
        uint64_t s = combineMask (src, mask, i);
        uint64_t d = *(dest + i);
        FbByteAdd(d, s);
	*(dest + i) = d;
    }
}

/* if the Src is opaque, call fbCombineAddU */
/* if the Dst is opaque, call fbCombineAddU */
/* if both the Src and Dst are opaque, call fbCombineAddU */
static void
fbCombineSaturateU (pixman_implementation_t *imp, pixman_op_t op,
		    uint64_t *dest, const uint64_t *src, const uint64_t *mask, int width)
{
    int i;
    for (i = 0; i < width; ++i) {
        uint64_t s = combineMask (src, mask, i);
        uint64_t d = *(dest + i);
        uint32_t sa, da;

        sa = s >> A_SHIFT;
        da = ~d >> A_SHIFT;
        if (sa > da)
        {
            sa = IntDiv(da, sa);
            FbByteMul(s, sa);
        };
        FbByteAdd(d, s);
	*(dest + i) = d;
    }
}

/* 
 * PDF blend modes:
 * The following blend modes have been taken from the PDF ISO 32000
 * specification, which at this point in time is available from
 * http://www.adobe.com/devnet/acrobat/pdfs/PDF32000_2008.pdf
 * The relevant chapters are 11.3.5 and 11.3.6.
 * The formula for computing the final pixel color given in 11.3.6 is:
 * αr × Cr = (1 – αs) × αb × Cb + (1 – αb) × αs × Cs + αb × αs × B(Cb, Cs)
 * with B() being the blend function.
 * Note that OVER is a special case of this operation, using B(Cb, Cs) = Cs
 *
 * These blend modes should match the SVG filter draft specification, as 
 * it has been designed to mirror ISO 32000. Note that at the current point
 * no released draft exists that shows this, as the formulas have not been
 * updated yet after the release of ISO 32000.
 *
 * The default implementation here uses the PdfSeparableBlendMode and 
 * PdfNonSeparableBlendMode macros, which take the blend function as an 
 * argument. Note that this implementation operates on premultiplied colors,
 * while the PDF specification does not. Therefore the code uses the formula
 * ar.Cra = (1 – as) . Dca + (1 – ad) . Sca + B(Dca, ad, Sca, as)
 */

/* 
 * Multiply
 * B(Dca, ad, Sca, as) = Dca.Sca
 */

static void
fbCombineMultiplyU (pixman_implementation_t *imp, pixman_op_t op,
		    uint64_t *dest, const uint64_t *src, const uint64_t *mask, int width)
{
    int i;
    for (i = 0; i < width; ++i) {
        uint64_t s = combineMask (src, mask, i);
        uint64_t d = *(dest + i);
	uint64_t ss = s;
        uint64_t src_ia = Alpha (~s);
	uint64_t dest_ia = Alpha (~d);

	FbByteAddMul (ss, dest_ia, d, src_ia);
	FbByteMulC (d, s);
	FbByteAdd (d, ss); 	
	*(dest + i) = d;
    }
}

static void
fbCombineMultiplyC (pixman_implementation_t *imp, pixman_op_t op,
                    uint64_t *dest, const uint64_t *src, const uint64_t *mask, int width)
{
    int i;
    for (i = 0; i < width; ++i) {
        uint64_t m = *(mask + i);
        uint64_t s = *(src + i);
	uint64_t d = *(dest + i);
	uint64_t r = d;
	uint64_t dest_ia = Alpha (~d);

	fbCombineMaskValueC (&s, &m);

	FbByteAddMulC (r, ~m, s, dest_ia);
	FbByteMulC (d, s);
	FbByteAdd (r, d);

	*(dest + i) = r;
    }
}

#define PdfSeparableBlendMode(name)		    \
static void					    \
fbCombine ## name ## U (pixman_implementation_t *imp, pixman_op_t op, \
                        uint64_t *dest, const uint64_t *src, const uint64_t *mask, int width) \
{						    \
    int i;					    \
    for (i = 0; i < width; ++i) {		    \
        uint64_t s = combineMask (src, mask, i);     \
        uint64_t d = *(dest + i);		    \
        uint16_t sa = Alpha(s);			    \
        uint16_t isa = ~sa;			    \
        uint16_t da = Alpha(d);	  		    \
        uint16_t ida = ~da;			    \
	uint64_t	result;				    \
						    \
	result = d;				    \
        FbByteAddMul(result, isa, s, ida);	    \
						    \
	*(dest + i) = result +			    \
	    (DivOne (sa * da) << A_SHIFT) +	    \
	    (Blend ## name (Red (d), da, Red (s), sa) << R_SHIFT) + \
	    (Blend ## name (Green (d), da, Green (s), sa) << G_SHIFT) + \
	    (Blend ## name (Blue (d), da, Blue (s), sa)); \
    }						    \
}						    \
						    \
static void				    \
fbCombine ## name ## C (pixman_implementation_t *imp, pixman_op_t op, \
			uint64_t *dest, const uint64_t *src, const uint64_t *mask, int width) \
{						    \
    int i;					    \
    for (i = 0; i < width; ++i) {		    \
	uint64_t m = *(mask + i);		    \
	uint64_t s = *(src + i);                     \
	uint64_t d = *(dest + i);		    \
	uint16_t da = Alpha(d);  		    \
	uint16_t ida = ~da;			    \
	uint64_t result;				    \
						    \
	fbCombineMaskValueC (&s, &m);		    \
						    \
	result = d;				    \
	FbByteAddMulC (result, ~m, s, ida);	    \
						    \
	result +=				    \
	    (DivOne (Alpha (m) * da) << A_SHIFT) +				\
	    (Blend ## name (Red (d), da, Red (s), Red (m)) << R_SHIFT) +	\
	    (Blend ## name (Green (d), da, Green (s), Green (m)) << G_SHIFT) +	\
	    (Blend ## name (Blue (d), da, Blue (s), Blue (m)));			\
						    \
	*(dest + i) = result;			    \
    }						    \
}

/*
 * Screen
 * B(Dca, ad, Sca, as) = Dca.sa + Sca.da - Dca.Sca
 */

static inline uint64_t
BlendScreen (uint64_t dca, uint64_t da, uint64_t sca, uint64_t sa)
{
  return DivOne (sca * da + dca * sa - sca * dca);
}

PdfSeparableBlendMode (Screen)

/*
 * Overlay
 * B(Dca, ab, Sca, as) = 
 *   if 2.Dca < Da
 *     2.Sca.Dca
 *   otherwise
 *     Sa.Da - 2.(Da - Dca).(Sa - Sca)
 */

static inline uint64_t
BlendOverlay (uint64_t dca, uint64_t da, uint64_t sca, uint64_t sa)
{
    uint64_t rca;

    if (2 * dca < da)
	rca = 2 * sca * dca;
    else
	rca = sa * da - 2 * (da - dca) * (sa - sca);
    return DivOne (rca);
}

PdfSeparableBlendMode (Overlay)

/*
 * Darken
 * B(Dca, ab, Sca, as) = min (Sca.Da, Dca.Sa)
 */

static inline uint64_t
BlendDarken (uint64_t dca, uint64_t da, uint64_t sca, uint64_t sa)
{
    uint64_t s, d;
    
    s = sca * da;
    d = dca * sa;
    return DivOne (s > d ? d : s);
}

PdfSeparableBlendMode (Darken)

/*
 * Lighten
 * B(Dca, ab, Sca, as) = max (Sca.Da, Dca.Sa)
 */

static inline uint64_t
BlendLighten (uint64_t dca, uint64_t da, uint64_t sca, uint64_t sa)
{
    uint64_t s, d;
    
    s = sca * da;
    d = dca * sa;
    return DivOne (s > d ? s : d);
}

PdfSeparableBlendMode (Lighten)

/*
 * Color dodge
 * B(Dca, ab, Sca, as) = 
 *   if Sca == Sa
 *     (Dca != 0).Sa.Da
 *   otherwise
 *     Da.Sa. min (Dca / Da / (1 - Sca/Sa))
 */ 

static inline uint64_t
BlendColorDodge (uint64_t dca, uint64_t da, uint64_t sca, uint64_t sa)
{
    if (sca >= sa) {
	return DivOne (sa * da);
    } else {
	uint64_t rca = dca * sa * sa / (sa - sca);
	return DivOne (rca > sa * da ? sa * da : rca);
    }
}

PdfSeparableBlendMode (ColorDodge)

/*
 * Color burn
 * B(Dca, ab, Sca, as) = 
 *   if Sca. == 0
 *     (Da == Dca).SaDa
 *   otherwise
 *     Sa.Da.(1 - min (1, (1 - Dca/Da).Sa / Sca))
 */

static inline uint64_t
BlendColorBurn (uint64_t dca, uint64_t da, uint64_t sca, uint64_t sa)
{
    if (sca == 0) {
	return 0;
    } else {
	uint64_t sada = sa * da;
	uint64_t rca = (da - dca) * sa * sa / sca;
	return DivOne (rca > sada ? 0 : sada - rca);
    }
}

PdfSeparableBlendMode (ColorBurn)

/*
 * Hard light
 * B(Dca, ab, Sca, as) = 
 *   if 2.Sca < Sa
 *     2.Sca.Dca
 *   otherwise
 *     Sa.Da - 2.(Da - Dca).(Sa - Sca)
 */
static inline uint64_t
BlendHardLight (uint64_t dca, uint64_t da, uint64_t sca, uint64_t sa)
{
    if (2 * sca < sa)
	return DivOne (2 * sca * dca);
    else
	return DivOne (sa * da - 2 * (da - dca) * (sa - sca));
}

PdfSeparableBlendMode (HardLight)

/*
 * Soft light
 * B(Dca, ab, Sca, as) = 
 *   if (2.Sca <= Sa)
 *     Dca.(Sa - (1 - Dca/Da).(2.Sca - Sa))
 *   otherwise if Dca.4 <= Da
 *     Dca.(Sa + (2.Sca - Sa).((16.Dca/Da - 12).Dca/Da + 3)
 *   otherwise
 *     (Dca.Sa + (SQRT (Dca/Da).Da - Dca).(2.Sca - Sa))
 */

static inline uint64_t
BlendSoftLight (uint64_t dca_org, uint64_t da_org, uint64_t sca_org, uint64_t sa_org)
{
    double dca = dca_org * (1.0 / MASK);
    double da = da_org * (1.0 / MASK);
    double sca = sca_org * (1.0 / MASK);
    double sa = sa_org * (1.0 / MASK);
    double rca;

    if (2 * sca < sa) {
	if (da == 0)
	    rca = dca * sa;
	else
	    rca = dca * sa - dca * (da - dca) * (sa - 2 * sca) / da;
    } else if (da == 0) {
	rca = 0;
    } else if (4 * dca <= da) {
	rca = dca * sa + (2 * sca - sa) * dca * ((16 * dca / da - 12) * dca / da + 3);
    } else {
	rca = dca * sa + (sqrt (dca * da) - dca) * (2 * sca - sa);
    }
    return rca * MASK + 0.5;
}

PdfSeparableBlendMode (SoftLight)

/*
 * Difference
 * B(Dca, ab, Sca, as) = abs (Dca.Sa - Sca.Da)
 */

static inline uint64_t
BlendDifference (uint64_t dca, uint64_t da, uint64_t sca, uint64_t sa)
{
    uint64_t dcasa = dca * sa;
    uint64_t scada = sca * da;

    if (scada < dcasa)
	return DivOne (dcasa - scada);
    else
	return DivOne (scada - dcasa);
}

PdfSeparableBlendMode (Difference)

/*
 * Exclusion
 * B(Dca, ab, Sca, as) = (Sca.Da + Dca.Sa - 2.Sca.Dca)
 */

/* This can be made faster by writing it directly and not using
 * PdfSeparableBlendMode, but that's a performance optimization */

static inline uint64_t
BlendExclusion (uint64_t dca, uint64_t da, uint64_t sca, uint64_t sa)
{
    return DivOne (sca * da + dca * sa - 2 * dca * sca);
}

PdfSeparableBlendMode (Exclusion)

#undef PdfSeparableBlendMode

/*
 * PDF nonseperable blend modes are implemented using the following functions
 * to operate in HSL space, with Cmax, Cmid, Cmin referring to the max, mid 
 * and min value of the red, green and blue components.
 * 
 * Lum (C) = 0.3 × Cred + 0.59 × Cgreen + 0.11 × Cblue
 *
 * ClipColor (C):
 *   l = Lum (C)
 *   min = Cmin
 *   max = Cmax
 *   if n < 0.0
 *     C = l + ( ( ( C – l ) × l ) ⁄ ( l – min ) )
 *   if x > 1.0
 *     C = l + ( ( ( C – l ) × ( 1 – l ) ) ⁄ ( max – l ) )
 *   return C
 *
 * SetLum (C, l):
 *   d = l – Lum (C)
 *   C += d
 *   return ClipColor (C)
 *
 * Sat (C) = Max (C) - Min (C)
 *
 * SetSat (C, s):
 *  if Cmax > Cmin
 *    Cmid = ( ( ( Cmid – Cmin ) × s ) ⁄ ( Cmax – Cmin ) )
 *    Cmax = s
 *  else
 *    Cmid = Cmax = 0.0
 *  Cmin = 0.0
 *  return C
 */

/* For premultiplied colors, we need to know what happens when C is
 * multiplied by a real number. Lum and Sat are linear:
 *
 *    Lum (r × C) = r × Lum (C)		Sat (r * C) = r * Sat (C)
 *
 * If we extend ClipColor with an extra argument a and change
 *
 *        if x >= 1.0
 *
 * into
 *
 *        if x >= a
 *
 * then ClipColor is also linear:
 *
 *    r * ClipColor (C, a) = ClipColor (rC, ra);
 *
 * for positive r.
 *
 * Similarly, we can extend SetLum with an extra argument that is just passed
 * on to ClipColor:
 *
 *   r * SetLum ( C, l, a)
 *
 *   = r × ClipColor ( C + l - Lum (C), a)
 *
 *   = ClipColor ( r * C + r × l - r * Lum (C), r * a)
 *
 *   = SetLum ( r * C, r * l, r * a)
 *
 * Finally, SetSat:
 *
 *    r * SetSat (C, s) = SetSat (x * C, r * s)
 *
 * The above holds for all non-zero x, because they x'es in the fraction for
 * C_mid cancel out. Specifically, it holds for x = r:
 *
 *    r * SetSat (C, s) = SetSat (rC, rs)
 *  
 */

/* So, for the non-separable PDF blend modes, we have (using s, d for non-premultiplied
 * colors, and S, D for premultiplied:
 *
 *   Color:
 *
 *     a_s * a_d * B(s, d)
 *   = a_s * a_d * SetLum (S/a_s, Lum (D/a_d), 1)
 *   = SetLum (S * a_d, a_s * Lum (D), a_s * a_d)
 *
 *
 *   Luminosity:
 *
 *     a_s * a_d * B(s, d)
 *   = a_s * a_d * SetLum (D/a_d, Lum(S/a_s), 1)
 *   = SetLum (a_s * D, a_d * Lum(S), a_s * a_d)
 *
 *
 *   Saturation:
 *
 *     a_s * a_d * B(s, d)
 *   = a_s * a_d * SetLum (SetSat (D/a_d, Sat (S/a_s)), Lum (D/a_d), 1)
 *   = SetLum (a_s * a_d * SetSat (D/a_d, Sat (S/a_s)), a_s * Lum (D), a_s * a_d)
 *   = SetLum (SetSat (a_s * D, a_d * Sat (S), a_s * Lum (D), a_s * a_d))
 *
 *   Hue:
 *
 *     a_s * a_d * B(s, d)
 *   = a_s * a_d * SetLum (SetSat (S/a_s, Sat (D/a_d)), Lum (D/a_d), 1)
 *   = a_s * a_d * SetLum (SetSat (a_d * S, a_s * Sat (D)), a_s * Lum (D), a_s * a_d)
 *
 */
    
#define Min(c) (c[0] < c[1] ? (c[0] < c[2] ? c[0] : c[2]) : (c[1] < c[2] ? c[1] : c[2]))
#define Max(c) (c[0] > c[1] ? (c[0] > c[2] ? c[0] : c[2]) : (c[1] > c[2] ? c[1] : c[2]))
#define Lum(c) ((c[0] * 30 + c[1] * 59 + c[2] * 11) / 100)
#define Sat(c) (Max (c) - Min (c))

#define PdfNonSeparableBlendMode(name)					\
static void								\
fbCombine ## name ## U (pixman_implementation_t *imp, pixman_op_t op,	\
			uint64_t *dest, const uint64_t *src, const uint64_t *mask, int width) \
{									\
    int i;								\
    for (i = 0; i < width; ++i) {					\
        uint64_t s = combineMask (src, mask, i);				\
        uint64_t d = *(dest + i);					\
        uint16_t sa = Alpha(s);						\
        uint16_t isa = ~sa;						\
        uint16_t da = Alpha(d);						\
        uint16_t ida = ~da;						\
	uint64_t	result;							\
	uint64_t sc[3], dc[3], c[3];					\
									\
	result = d;							\
        FbByteAddMul(result, isa, s, ida);				\
	dc[0] = Red (d);						\
	sc[0] = Red (s);						\
	dc[1] = Green (d);						\
	sc[1] = Green (s);						\
	dc[2] = Blue (d);						\
	sc[2] = Blue (s);						\
	Blend ## name (c, dc, da, sc, sa);				\
									\
	*(dest + i) = result +						\
	    (DivOne (sa * da) << A_SHIFT) +				\
	    (DivOne (c[0]) << R_SHIFT) +				\
	    (DivOne (c[1]) << G_SHIFT) +				\
	    (DivOne (c[2]));						\
    }									\
}									

static void
SetLum (uint64_t dest[3], uint64_t src[3], uint64_t sa, uint64_t lum)
{
  double a, l, min, max;
  double tmp[3];
  
  a = sa * (1.0 / MASK);
  l = lum * (1.0 / MASK);
  tmp[0] = src[0] * (1.0 / MASK);
  tmp[1] = src[1] * (1.0 / MASK);
  tmp[2] = src[2] * (1.0 / MASK);
  l = l - Lum (tmp);
  tmp[0] += l;
  tmp[1] += l;
  tmp[2] += l;

  /* ClipColor */
  l = Lum (tmp);
  min = Min (tmp);
  max = Max (tmp);

  if (min < 0) {
    tmp[0] = l + (tmp[0] - l) * l / (l - min);
    tmp[1] = l + (tmp[1] - l) * l / (l - min);
    tmp[2] = l + (tmp[2] - l) * l / (l - min);
  }
  if (max > a) {
    tmp[0] = l + (tmp[0] - l) * (a - l) / (max - l);
    tmp[1] = l + (tmp[1] - l) * (a - l) / (max - l);
    tmp[2] = l + (tmp[2] - l) * (a - l) / (max - l);
  }
  dest[0] = tmp[0] * MASK + 0.5;
  dest[1] = tmp[1] * MASK + 0.5;
  dest[2] = tmp[2] * MASK + 0.5;
}

static void
SetSat (uint64_t dest[3], uint64_t src[3], uint64_t sat)
{
  int id[3];
  uint64_t min, max;

  if (src[0] > src[1]) {
    if (src[0] > src[2]) {
      id[0] = 0;
      if (src[1] > src[2]) {
	id[1] = 1;
	id[2] = 2;
      } else {
	id[1] = 2;
	id[2] = 1;
      }
    } else {
      id[0] = 2;
      id[1] = 0;
      id[2] = 1;
    }
  } else {
    if (src[0] > src[2]) {
      id[0] = 1;
      id[1] = 0;
      id[2] = 2;
    } else {
      id[2] = 0;
      if (src[1] > src[2]) {
	id[0] = 1;
	id[1] = 2;
      } else {
	id[0] = 2;
	id[1] = 1;
      }
    }
  }
  max = dest[id[0]];
  min = dest[id[2]];
  if (max > min) {
    dest[id[1]] = (dest[id[1]] - min) * sat / (max - min);
    dest[id[0]] = sat;
    dest[id[2]] = 0;
  } else {
    dest[0] = dest[1] = dest[2] = 0;
  }
}

/*
 * Hue:
 * B(Cb, Cs) = SetLum (SetSat (Cs, Sat (Cb)), Lum (Cb))
 */
static inline void
BlendHSLHue (uint64_t c[3], uint64_t dc[3], uint64_t da, uint64_t sc[3], uint64_t sa)
{
    c[0] = sc[0] * da;
    c[1] = sc[1] * da;
    c[2] = sc[2] * da;
    SetSat (c, c, Sat (dc) * sa);
    SetLum (c, c, sa * da, Lum (dc) * sa);
}

PdfNonSeparableBlendMode (HSLHue)

/*
 * Saturation:
 * B(Cb, Cs) = SetLum (SetSat (Cb, Sat (Cs)), Lum (Cb))
 */
static inline void
BlendHSLSaturation (uint64_t c[3], uint64_t dc[3], uint64_t da, uint64_t sc[3], uint64_t sa)
{
    c[0] = dc[0] * sa;
    c[1] = dc[1] * sa;
    c[2] = dc[2] * sa;
    SetSat (c, c, Sat (sc) * da);
    SetLum (c, c, sa * da, Lum (dc) * sa);
}

PdfNonSeparableBlendMode (HSLSaturation)

/*
 * Color:
 * B(Cb, Cs) = SetLum (Cs, Lum (Cb))
 */
static inline void
BlendHSLColor (uint64_t c[3], uint64_t dc[3], uint64_t da, uint64_t sc[3], uint64_t sa)
{
    c[0] = sc[0] * da;
    c[1] = sc[1] * da;
    c[2] = sc[2] * da;
    SetLum (c, c, sa * da, Lum (dc) * sa);
}

PdfNonSeparableBlendMode (HSLColor)

/*
 * Luminosity:
 * B(Cb, Cs) = SetLum (Cb, Lum (Cs))
 */
static inline void
BlendHSLLuminosity (uint64_t c[3], uint64_t dc[3], uint64_t da, uint64_t sc[3], uint64_t sa)
{
    c[0] = dc[0] * sa;
    c[1] = dc[1] * sa;
    c[2] = dc[2] * sa;
    SetLum (c, c, sa * da, Lum (sc) * da);
}

PdfNonSeparableBlendMode (HSLLuminosity)

#undef Sat
#undef Lum
#undef Max
#undef Min
#undef PdfNonSeparableBlendMode

/* Overlay
 *
 * All of the disjoint composing functions

 The four entries in the first column indicate what source contributions
 come from each of the four areas of the picture -- areas covered by neither
 A nor B, areas covered only by A, areas covered only by B and finally
 areas covered by both A and B.

 Disjoint			Conjoint
 Fa		Fb		Fa		Fb
 (0,0,0,0)	0		0		0		0
 (0,A,0,A)	1		0		1		0
 (0,0,B,B)	0		1		0		1
 (0,A,B,A)	1		min((1-a)/b,1)	1		max(1-a/b,0)
 (0,A,B,B)	min((1-b)/a,1)	1		max(1-b/a,0)	1
 (0,0,0,A)	max(1-(1-b)/a,0) 0		min(1,b/a)	0
 (0,0,0,B)	0		max(1-(1-a)/b,0) 0		min(a/b,1)
 (0,A,0,0)	min(1,(1-b)/a)	0		max(1-b/a,0)	0
 (0,0,B,0)	0		min(1,(1-a)/b)	0		max(1-a/b,0)
 (0,0,B,A)	max(1-(1-b)/a,0) min(1,(1-a)/b)	 min(1,b/a)	max(1-a/b,0)
 (0,A,0,B)	min(1,(1-b)/a)	max(1-(1-a)/b,0) max(1-b/a,0)	min(1,a/b)
 (0,A,B,0)	min(1,(1-b)/a)	min(1,(1-a)/b)	max(1-b/a,0)	max(1-a/b,0)

*/

#define CombineAOut 1
#define CombineAIn  2
#define CombineBOut 4
#define CombineBIn  8

#define CombineClear	0
#define CombineA	(CombineAOut|CombineAIn)
#define CombineB	(CombineBOut|CombineBIn)
#define CombineAOver	(CombineAOut|CombineBOut|CombineAIn)
#define CombineBOver	(CombineAOut|CombineBOut|CombineBIn)
#define CombineAAtop	(CombineBOut|CombineAIn)
#define CombineBAtop	(CombineAOut|CombineBIn)
#define CombineXor	(CombineAOut|CombineBOut)

/* portion covered by a but not b */
static uint16_t
fbCombineDisjointOutPart (uint16_t a, uint16_t b)
{
    /* min (1, (1-b) / a) */

    b = ~b;		    /* 1 - b */
    if (b >= a)		    /* 1 - b >= a -> (1-b)/a >= 1 */
	return MASK;	    /* 1 */
    return IntDiv(b,a);     /* (1-b) / a */
}

/* portion covered by both a and b */
static uint16_t
fbCombineDisjointInPart (uint16_t a, uint16_t b)
{
    /* max (1-(1-b)/a,0) */
    /*  = - min ((1-b)/a - 1, 0) */
    /*  = 1 - min (1, (1-b)/a) */

    b = ~b;		    /* 1 - b */
    if (b >= a)		    /* 1 - b >= a -> (1-b)/a >= 1 */
	return 0;	    /* 1 - 1 */
    return ~IntDiv(b,a);    /* 1 - (1-b) / a */
}

/* portion covered by a but not b */
static uint16_t
fbCombineConjointOutPart (uint16_t a, uint16_t b)
{
    /* max (1-b/a,0) */
    /* = 1-min(b/a,1) */

    /* min (1, (1-b) / a) */

    if (b >= a)		    /* b >= a -> b/a >= 1 */
	return 0x00;	    /* 0 */
    return ~IntDiv(b,a);    /* 1 - b/a */
}

/* portion covered by both a and b */
static uint16_t
fbCombineConjointInPart (uint16_t a, uint16_t b)
{
    /* min (1,b/a) */

    if (b >= a)		    /* b >= a -> b/a >= 1 */
	return MASK;	    /* 1 */
    return IntDiv(b,a);     /* b/a */
}

#define GetComp(v,i)   ((uint32_t) (uint16_t) ((v) >> i))

#define Add(x,y,i,t)   ((t) = GetComp(x,i) + GetComp(y,i),              \
                        (uint64_t) ((uint16_t) ((t) | (0 - ((t) >> G_SHIFT)))) << (i))

#define FbGen(x,y,i,ax,ay,t,u,v) ((t) = (IntMult(GetComp(y,i),ay,(u)) + \
					 IntMult(GetComp(x,i),ax,(v))), \
				  	 (uint64_t) ((uint16_t) ((t) |		\
					 (0 - ((t) >> G_SHIFT)))) << (i))

static void
fbCombineDisjointGeneralU (uint64_t *dest, const uint64_t *src, const uint64_t *mask, int width, uint16_t combine)
{
    int i;
    for (i = 0; i < width; ++i) {
        uint64_t s = combineMask (src, mask, i);
        uint64_t d = *(dest + i);
        uint64_t m,n,o,p;
        uint32_t Fa, Fb, t, u, v;
        uint16_t sa = s >> A_SHIFT;
        uint16_t da = d >> A_SHIFT;

        switch (combine & CombineA) {
        default:
            Fa = 0;
            break;
        case CombineAOut:
            Fa = fbCombineDisjointOutPart (sa, da);
            break;
        case CombineAIn:
            Fa = fbCombineDisjointInPart (sa, da);
            break;
        case CombineA:
            Fa = MASK;
            break;
        }

        switch (combine & CombineB) {
        default:
            Fb = 0;
            break;
        case CombineBOut:
            Fb = fbCombineDisjointOutPart (da, sa);
            break;
        case CombineBIn:
            Fb = fbCombineDisjointInPart (da, sa);
            break;
        case CombineB:
            Fb = MASK;
            break;
        }
        m = FbGen (s,d,0,Fa,Fb,t, u, v);
        n = FbGen (s,d,G_SHIFT,Fa,Fb,t, u, v);
        o = FbGen (s,d,R_SHIFT,Fa,Fb,t, u, v);
        p = FbGen (s,d,A_SHIFT,Fa,Fb,t, u, v);
        s = m|n|o|p;
	*(dest + i) = s;
    }
}

static void
fbCombineDisjointOverU (pixman_implementation_t *imp, pixman_op_t op,
			uint64_t *dest, const uint64_t *src, const uint64_t *mask, int width)
{
    int i;
    for (i = 0; i < width; ++i) {
        uint64_t s = combineMask (src, mask, i);
        uint32_t a = s >> A_SHIFT;

        if (a != 0x00)
        {
            if (a != MASK)
            {
                uint64_t d = *(dest + i);
                a = fbCombineDisjointOutPart (d >> A_SHIFT, a);
                FbByteMulAdd(d, a, s);
                s = d;
            }
	    *(dest + i) = s;
        }
    }
}

static void
fbCombineDisjointInU (pixman_implementation_t *imp, pixman_op_t op,
		      uint64_t *dest, const uint64_t *src, const uint64_t *mask, int width)
{
    fbCombineDisjointGeneralU (dest, src, mask, width, CombineAIn);
}

static void
fbCombineDisjointInReverseU (pixman_implementation_t *imp, pixman_op_t op,
			     uint64_t *dest, const uint64_t *src, const uint64_t *mask, int width)
{
    fbCombineDisjointGeneralU (dest, src, mask, width, CombineBIn);
}

static void
fbCombineDisjointOutU (pixman_implementation_t *imp, pixman_op_t op,
		       uint64_t *dest, const uint64_t *src, const uint64_t *mask, int width)
{
    fbCombineDisjointGeneralU (dest, src, mask, width, CombineAOut);
}

static void
fbCombineDisjointOutReverseU (pixman_implementation_t *imp, pixman_op_t op,
			      uint64_t *dest, const uint64_t *src, const uint64_t *mask, int width)
{
    fbCombineDisjointGeneralU (dest, src, mask, width, CombineBOut);
}

static void
fbCombineDisjointAtopU (pixman_implementation_t *imp, pixman_op_t op,
			uint64_t *dest, const uint64_t *src, const uint64_t *mask, int width)
{
    fbCombineDisjointGeneralU (dest, src, mask, width, CombineAAtop);
}

static void
fbCombineDisjointAtopReverseU (pixman_implementation_t *imp, pixman_op_t op,
			       uint64_t *dest, const uint64_t *src, const uint64_t *mask, int width)
{
    fbCombineDisjointGeneralU (dest, src, mask, width, CombineBAtop);
}

static void
fbCombineDisjointXorU (pixman_implementation_t *imp, pixman_op_t op,
		       uint64_t *dest, const uint64_t *src, const uint64_t *mask, int width)
{
    fbCombineDisjointGeneralU (dest, src, mask, width, CombineXor);
}

static void
fbCombineConjointGeneralU (uint64_t *dest, const uint64_t *src, const uint64_t *mask, int width, uint16_t combine)
{
    int i;
    for (i = 0; i < width; ++i) {
        uint64_t s = combineMask (src, mask, i);
        uint64_t d = *(dest + i);
        uint64_t m,n,o,p;
        uint32_t Fa, Fb, t, u, v;
        uint16_t sa = s >> A_SHIFT;
        uint16_t da = d >> A_SHIFT;

        switch (combine & CombineA) {
        default:
            Fa = 0;
            break;
        case CombineAOut:
            Fa = fbCombineConjointOutPart (sa, da);
            break;
        case CombineAIn:
            Fa = fbCombineConjointInPart (sa, da);
            break;
        case CombineA:
            Fa = MASK;
            break;
        }

        switch (combine & CombineB) {
        default:
            Fb = 0;
            break;
        case CombineBOut:
            Fb = fbCombineConjointOutPart (da, sa);
            break;
        case CombineBIn:
            Fb = fbCombineConjointInPart (da, sa);
            break;
        case CombineB:
            Fb = MASK;
            break;
        }
        m = FbGen (s,d,0,Fa,Fb,t, u, v);
        n = FbGen (s,d,G_SHIFT,Fa,Fb,t, u, v);
        o = FbGen (s,d,R_SHIFT,Fa,Fb,t, u, v);
        p = FbGen (s,d,A_SHIFT,Fa,Fb,t, u, v);
        s = m|n|o|p;
	*(dest + i) = s;
    }
}

static void
fbCombineConjointOverU (pixman_implementation_t *imp, pixman_op_t op,
			uint64_t *dest, const uint64_t *src, const uint64_t *mask, int width)
{
    fbCombineConjointGeneralU (dest, src, mask, width, CombineAOver);
}


static void
fbCombineConjointOverReverseU (pixman_implementation_t *imp, pixman_op_t op,
			       uint64_t *dest, const uint64_t *src, const uint64_t *mask, int width)
{
    fbCombineConjointGeneralU (dest, src, mask, width, CombineBOver);
}


static void
fbCombineConjointInU (pixman_implementation_t *imp, pixman_op_t op,
		      uint64_t *dest, const uint64_t *src, const uint64_t *mask, int width)
{
    fbCombineConjointGeneralU (dest, src, mask, width, CombineAIn);
}


static void
fbCombineConjointInReverseU (pixman_implementation_t *imp, pixman_op_t op,
			     uint64_t *dest, const uint64_t *src, const uint64_t *mask, int width)
{
    fbCombineConjointGeneralU (dest, src, mask, width, CombineBIn);
}

static void
fbCombineConjointOutU (pixman_implementation_t *imp, pixman_op_t op,
		       uint64_t *dest, const uint64_t *src, const uint64_t *mask, int width)
{
    fbCombineConjointGeneralU (dest, src, mask, width, CombineAOut);
}

static void
fbCombineConjointOutReverseU (pixman_implementation_t *imp, pixman_op_t op,
			      uint64_t *dest, const uint64_t *src, const uint64_t *mask, int width)
{
    fbCombineConjointGeneralU (dest, src, mask, width, CombineBOut);
}

static void
fbCombineConjointAtopU (pixman_implementation_t *imp, pixman_op_t op,
			uint64_t *dest, const uint64_t *src, const uint64_t *mask, int width)
{
    fbCombineConjointGeneralU (dest, src, mask, width, CombineAAtop);
}

static void
fbCombineConjointAtopReverseU (pixman_implementation_t *imp, pixman_op_t op,
			       uint64_t *dest, const uint64_t *src, const uint64_t *mask, int width)
{
    fbCombineConjointGeneralU (dest, src, mask, width, CombineBAtop);
}

static void
fbCombineConjointXorU (pixman_implementation_t *imp, pixman_op_t op,
		       uint64_t *dest, const uint64_t *src, const uint64_t *mask, int width)
{
    fbCombineConjointGeneralU (dest, src, mask, width, CombineXor);
}

/********************************************************************************/
/*************************** Per Channel functions ******************************/
/********************************************************************************/

static void
fbCombineClearC (pixman_implementation_t *imp, pixman_op_t op,
		 uint64_t *dest, const uint64_t *src, const uint64_t *mask, int width)
{
    memset(dest, 0, width*sizeof(uint64_t));
}

static void
fbCombineSrcC (pixman_implementation_t *imp, pixman_op_t op,
	       uint64_t *dest, const uint64_t *src, const uint64_t *mask, int width)
{
    int i;

    for (i = 0; i < width; ++i) {
	uint64_t s = *(src + i);
	uint64_t m = *(mask + i);

	fbCombineMaskValueC (&s, &m);

	*(dest) = s;
    }
}

static void
fbCombineOverC (pixman_implementation_t *imp, pixman_op_t op,
		uint64_t *dest, const uint64_t *src, const uint64_t *mask, int width)
{
    int i;

    for (i = 0; i < width; ++i) {
	uint64_t s = *(src + i);
	uint64_t m = *(mask + i);
	uint64_t a;

	fbCombineMaskC (&s, &m);

	a = ~m;
        if (a != ~0)
        {
            if (a)
            {
                uint64_t d = *(dest + i);
                FbByteMulAddC(d, a, s);
                s = d;
            }
	    *(dest + i) = s;
        }
    }
}

static void
fbCombineOverReverseC (pixman_implementation_t *imp, pixman_op_t op,
		       uint64_t *dest, const uint64_t *src, const uint64_t *mask, int width)
{
    int i;

    for (i = 0; i < width; ++i) {
        uint64_t d = *(dest + i);
        uint64_t a = ~d >> A_SHIFT;

        if (a)
        {
            uint64_t s = *(src + i);
	    uint64_t m = *(mask + i);

	    fbCombineMaskValueC (&s, &m);

            if (a != MASK)
            {
                FbByteMulAdd(s, a, d);
            }
	    *(dest + i) = s;
        }
    }
}

static void
fbCombineInC (pixman_implementation_t *imp, pixman_op_t op,
	      uint64_t *dest, const uint64_t *src, const uint64_t *mask, int width)
{
    int i;

    for (i = 0; i < width; ++i) {
        uint64_t d = *(dest + i);
        uint32_t a = d >> A_SHIFT;
        uint64_t s = 0;
        if (a)
        {
	    uint64_t m = *(mask + i);

	    s = *(src + i);
	    fbCombineMaskValueC (&s, &m);
            if (a != MASK)
            {
                FbByteMul(s, a);
            }
        }
	*(dest + i) = s;
    }
}

static void
fbCombineInReverseC (pixman_implementation_t *imp, pixman_op_t op,
		     uint64_t *dest, const uint64_t *src, const uint64_t *mask, int width)
{
    int i;

    for (i = 0; i < width; ++i) {
        uint64_t s = *(src + i);
        uint64_t m = *(mask + i);
        uint64_t a;

	fbCombineMaskAlphaC (&s, &m);

	a = m;
        if (a != ~0)
        {
            uint64_t d = 0;
            if (a)
            {
                d = *(dest + i);
                FbByteMulC(d, a);
            }
	    *(dest + i) = d;
        }
    }
}

static void
fbCombineOutC (pixman_implementation_t *imp, pixman_op_t op,
	       uint64_t *dest, const uint64_t *src, const uint64_t *mask, int width)
{
    int i;

    for (i = 0; i < width; ++i) {
        uint64_t d = *(dest + i);
        uint32_t a = ~d >> A_SHIFT;
        uint64_t s = 0;
        if (a)
        {
	    uint64_t m = *(mask + i);

	    s = *(src + i);
	    fbCombineMaskValueC (&s, &m);

            if (a != MASK)
            {
                FbByteMul(s, a);
            }
        }
	*(dest + i) = s;
    }
}

static void
fbCombineOutReverseC (pixman_implementation_t *imp, pixman_op_t op,
		      uint64_t *dest, const uint64_t *src, const uint64_t *mask, int width)
{
    int i;

    for (i = 0; i < width; ++i) {
	uint64_t s = *(src + i);
	uint64_t m = *(mask + i);
	uint64_t a;

	fbCombineMaskAlphaC (&s, &m);

        a = ~m;
        if (a != ~0)
        {
            uint64_t d = 0;
            if (a)
            {
                d = *(dest + i);
                FbByteMulC(d, a);
            }
	    *(dest + i) = d;
        }
    }
}

static void
fbCombineAtopC (pixman_implementation_t *imp, pixman_op_t op,
		uint64_t *dest, const uint64_t *src, const uint64_t *mask, int width)
{
    int i;

    for (i = 0; i < width; ++i) {
        uint64_t d = *(dest + i);
        uint64_t s = *(src + i);
        uint64_t m = *(mask + i);
        uint64_t ad;
        uint32_t as = d >> A_SHIFT;

	fbCombineMaskC (&s, &m);

        ad = ~m;

        FbByteAddMulC(d, ad, s, as);
	*(dest + i) = d;
    }
}

static void
fbCombineAtopReverseC (pixman_implementation_t *imp, pixman_op_t op,
		       uint64_t *dest, const uint64_t *src, const uint64_t *mask, int width)
{
    int i;

    for (i = 0; i < width; ++i) {

        uint64_t d = *(dest + i);
        uint64_t s = *(src + i);
        uint64_t m = *(mask + i);
        uint64_t ad;
        uint32_t as = ~d >> A_SHIFT;

	fbCombineMaskC (&s, &m);

	ad = m;

        FbByteAddMulC(d, ad, s, as);
	*(dest + i) = d;
    }
}

static void
fbCombineXorC (pixman_implementation_t *imp, pixman_op_t op,
	       uint64_t *dest, const uint64_t *src, const uint64_t *mask, int width)
{
    int i;

    for (i = 0; i < width; ++i) {
        uint64_t d = *(dest + i);
        uint64_t s = *(src + i);
        uint64_t m = *(mask + i);
        uint64_t ad;
        uint32_t as = ~d >> A_SHIFT;

	fbCombineMaskC (&s, &m);

	ad = ~m;

        FbByteAddMulC(d, ad, s, as);
	*(dest + i) = d;
    }
}

static void
fbCombineAddC (pixman_implementation_t *imp, pixman_op_t op,
	       uint64_t *dest, const uint64_t *src, const uint64_t *mask, int width)
{
    int i;

    for (i = 0; i < width; ++i) {
        uint64_t s = *(src + i);
        uint64_t m = *(mask + i);
        uint64_t d = *(dest + i);

	fbCombineMaskValueC (&s, &m);

        FbByteAdd(d, s);
	*(dest + i) = d;
    }
}

static void
fbCombineSaturateC (pixman_implementation_t *imp, pixman_op_t op,
		    uint64_t *dest, const uint64_t *src, const uint64_t *mask, int width)
{
    int i;

    for (i = 0; i < width; ++i) {
        uint64_t s, d;
        uint32_t sa, sr, sg, sb, da;
        uint32_t t, u, v;
        uint64_t m,n,o,p;

        d = *(dest + i);
        s = *(src + i);
	m = *(mask + i);

	fbCombineMaskC (&s, &m);

        sa = (m >> A_SHIFT);
        sr = (m >> R_SHIFT) & MASK;
        sg = (m >> G_SHIFT) & MASK;
        sb =  m             & MASK;
        da = ~d >> A_SHIFT;

        if (sb <= da)
            m = Add(s,d,0,t);
        else
            m = FbGen (s, d, 0, (da << G_SHIFT) / sb, MASK, t, u, v);

        if (sg <= da)
            n = Add(s,d,G_SHIFT,t);
        else
            n = FbGen (s, d, G_SHIFT, (da << G_SHIFT) / sg, MASK, t, u, v);

        if (sr <= da)
            o = Add(s,d,R_SHIFT,t);
        else
            o = FbGen (s, d, R_SHIFT, (da << G_SHIFT) / sr, MASK, t, u, v);

        if (sa <= da)
            p = Add(s,d,A_SHIFT,t);
        else
            p = FbGen (s, d, A_SHIFT, (da << G_SHIFT) / sa, MASK, t, u, v);

	*(dest + i) = m|n|o|p;
    }
}

static void
fbCombineDisjointGeneralC (uint64_t *dest, const uint64_t *src, const uint64_t *mask, int width, uint16_t combine)
{
    int i;

    for (i = 0; i < width; ++i) {
        uint64_t s, d;
        uint64_t m,n,o,p;
        uint64_t Fa, Fb;
        uint32_t t, u, v;
        uint64_t sa;
        uint16_t da;

        s = *(src + i);
        m = *(mask + i);
        d = *(dest + i);
        da = d >> A_SHIFT;

	fbCombineMaskC (&s, &m);

	sa = m;

        switch (combine & CombineA) {
        default:
            Fa = 0;
            break;
        case CombineAOut:
            m = (uint64_t)fbCombineDisjointOutPart ((uint16_t) (sa >> 0), da);
            n = (uint64_t)fbCombineDisjointOutPart ((uint16_t) (sa >> G_SHIFT), da) << G_SHIFT;
            o = (uint64_t)fbCombineDisjointOutPart ((uint16_t) (sa >> R_SHIFT), da) << R_SHIFT;
            p = (uint64_t)fbCombineDisjointOutPart ((uint16_t) (sa >> A_SHIFT), da) << A_SHIFT;
            Fa = m|n|o|p;
            break;
        case CombineAIn:
            m = (uint64_t)fbCombineDisjointInPart ((uint16_t) (sa >> 0), da);
            n = (uint64_t)fbCombineDisjointInPart ((uint16_t) (sa >> G_SHIFT), da) << G_SHIFT;
            o = (uint64_t)fbCombineDisjointInPart ((uint16_t) (sa >> R_SHIFT), da) << R_SHIFT;
            p = (uint64_t)fbCombineDisjointInPart ((uint16_t) (sa >> A_SHIFT), da) << A_SHIFT;
            Fa = m|n|o|p;
            break;
        case CombineA:
            Fa = ~0;
            break;
        }

        switch (combine & CombineB) {
        default:
            Fb = 0;
            break;
        case CombineBOut:
            m = (uint64_t)fbCombineDisjointOutPart (da, (uint16_t) (sa >> 0));
            n = (uint64_t)fbCombineDisjointOutPart (da, (uint16_t) (sa >> G_SHIFT)) << G_SHIFT;
            o = (uint64_t)fbCombineDisjointOutPart (da, (uint16_t) (sa >> R_SHIFT)) << R_SHIFT;
            p = (uint64_t)fbCombineDisjointOutPart (da, (uint16_t) (sa >> A_SHIFT)) << A_SHIFT;
            Fb = m|n|o|p;
            break;
        case CombineBIn:
            m = (uint64_t)fbCombineDisjointInPart (da, (uint16_t) (sa >> 0));
            n = (uint64_t)fbCombineDisjointInPart (da, (uint16_t) (sa >> G_SHIFT)) << G_SHIFT;
            o = (uint64_t)fbCombineDisjointInPart (da, (uint16_t) (sa >> R_SHIFT)) << R_SHIFT;
            p = (uint64_t)fbCombineDisjointInPart (da, (uint16_t) (sa >> A_SHIFT)) << A_SHIFT;
            Fb = m|n|o|p;
            break;
        case CombineB:
            Fb = ~0;
            break;
        }
        m = FbGen (s,d,0,GetComp(Fa,0),GetComp(Fb,0),t, u, v);
        n = FbGen (s,d,G_SHIFT,GetComp(Fa,G_SHIFT),GetComp(Fb,G_SHIFT),t, u, v);
        o = FbGen (s,d,R_SHIFT,GetComp(Fa,R_SHIFT),GetComp(Fb,R_SHIFT),t, u, v);
        p = FbGen (s,d,A_SHIFT,GetComp(Fa,A_SHIFT),GetComp(Fb,A_SHIFT),t, u, v);
        s = m|n|o|p;
	*(dest + i) = s;
    }
}

static void
fbCombineDisjointOverC (pixman_implementation_t *imp, pixman_op_t op,
			uint64_t *dest, const uint64_t *src, const uint64_t *mask, int width)
{
    fbCombineDisjointGeneralC (dest, src, mask, width, CombineAOver);
}

static void
fbCombineDisjointInC (pixman_implementation_t *imp, pixman_op_t op,
		      uint64_t *dest, const uint64_t *src, const uint64_t *mask, int width)
{
    fbCombineDisjointGeneralC (dest, src, mask, width, CombineAIn);
}

static void
fbCombineDisjointInReverseC (pixman_implementation_t *imp, pixman_op_t op,
			     uint64_t *dest, const uint64_t *src, const uint64_t *mask, int width)
{
    fbCombineDisjointGeneralC (dest, src, mask, width, CombineBIn);
}

static void
fbCombineDisjointOutC (pixman_implementation_t *imp, pixman_op_t op,
		       uint64_t *dest, const uint64_t *src, const uint64_t *mask, int width)
{
    fbCombineDisjointGeneralC (dest, src, mask, width, CombineAOut);
}

static void
fbCombineDisjointOutReverseC (pixman_implementation_t *imp, pixman_op_t op,
			      uint64_t *dest, const uint64_t *src, const uint64_t *mask, int width)
{
    fbCombineDisjointGeneralC (dest, src, mask, width, CombineBOut);
}

static void
fbCombineDisjointAtopC (pixman_implementation_t *imp, pixman_op_t op,
			uint64_t *dest, const uint64_t *src, const uint64_t *mask, int width)
{
    fbCombineDisjointGeneralC (dest, src, mask, width, CombineAAtop);
}

static void
fbCombineDisjointAtopReverseC (pixman_implementation_t *imp, pixman_op_t op,
			       uint64_t *dest, const uint64_t *src, const uint64_t *mask, int width)
{
    fbCombineDisjointGeneralC (dest, src, mask, width, CombineBAtop);
}

static void
fbCombineDisjointXorC (pixman_implementation_t *imp, pixman_op_t op,
		       uint64_t *dest, const uint64_t *src, const uint64_t *mask, int width)
{
    fbCombineDisjointGeneralC (dest, src, mask, width, CombineXor);
}

static void
fbCombineConjointGeneralC (uint64_t *dest, const uint64_t *src, const uint64_t *mask, int width, uint16_t combine)
{
    int i;

    for (i = 0; i < width; ++i) {
        uint64_t s, d;
        uint64_t m,n,o,p;
        uint64_t Fa, Fb;
        uint32_t t, u, v;
        uint64_t sa;
        uint16_t da;

        s = *(src + i);
        m = *(mask + i);
        d = *(dest + i);
        da = d >> A_SHIFT;

	fbCombineMaskC (&s, &m);

        sa = m;

        switch (combine & CombineA) {
        default:
            Fa = 0;
            break;
        case CombineAOut:
            m = (uint64_t)fbCombineConjointOutPart ((uint16_t) (sa >> 0), da);
            n = (uint64_t)fbCombineConjointOutPart ((uint16_t) (sa >> G_SHIFT), da) << G_SHIFT;
            o = (uint64_t)fbCombineConjointOutPart ((uint16_t) (sa >> R_SHIFT), da) << R_SHIFT;
            p = (uint64_t)fbCombineConjointOutPart ((uint16_t) (sa >> A_SHIFT), da) << A_SHIFT;
            Fa = m|n|o|p;
            break;
        case CombineAIn:
            m = (uint64_t)fbCombineConjointInPart ((uint16_t) (sa >> 0), da);
            n = (uint64_t)fbCombineConjointInPart ((uint16_t) (sa >> G_SHIFT), da) << G_SHIFT;
            o = (uint64_t)fbCombineConjointInPart ((uint16_t) (sa >> R_SHIFT), da) << R_SHIFT;
            p = (uint64_t)fbCombineConjointInPart ((uint16_t) (sa >> A_SHIFT), da) << A_SHIFT;
            Fa = m|n|o|p;
            break;
        case CombineA:
            Fa = ~0;
            break;
        }

        switch (combine & CombineB) {
        default:
            Fb = 0;
            break;
        case CombineBOut:
            m = (uint64_t)fbCombineConjointOutPart (da, (uint16_t) (sa >> 0));
            n = (uint64_t)fbCombineConjointOutPart (da, (uint16_t) (sa >> G_SHIFT)) << G_SHIFT;
            o = (uint64_t)fbCombineConjointOutPart (da, (uint16_t) (sa >> R_SHIFT)) << R_SHIFT;
            p = (uint64_t)fbCombineConjointOutPart (da, (uint16_t) (sa >> A_SHIFT)) << A_SHIFT;
            Fb = m|n|o|p;
            break;
        case CombineBIn:
            m = (uint64_t)fbCombineConjointInPart (da, (uint16_t) (sa >> 0));
            n = (uint64_t)fbCombineConjointInPart (da, (uint16_t) (sa >> G_SHIFT)) << G_SHIFT;
            o = (uint64_t)fbCombineConjointInPart (da, (uint16_t) (sa >> R_SHIFT)) << R_SHIFT;
            p = (uint64_t)fbCombineConjointInPart (da, (uint16_t) (sa >> A_SHIFT)) << A_SHIFT;
            Fb = m|n|o|p;
            break;
        case CombineB:
            Fb = ~0;
            break;
        }
        m = FbGen (s,d,0,GetComp(Fa,0),GetComp(Fb,0),t, u, v);
        n = FbGen (s,d,G_SHIFT,GetComp(Fa,G_SHIFT),GetComp(Fb,G_SHIFT),t, u, v);
        o = FbGen (s,d,R_SHIFT,GetComp(Fa,R_SHIFT),GetComp(Fb,R_SHIFT),t, u, v);
        p = FbGen (s,d,A_SHIFT,GetComp(Fa,A_SHIFT),GetComp(Fb,A_SHIFT),t, u, v);
        s = m|n|o|p;
	*(dest + i) = s;
    }
}

static void
fbCombineConjointOverC (pixman_implementation_t *imp, pixman_op_t op,
			uint64_t *dest, const uint64_t *src, const uint64_t *mask, int width)
{
    fbCombineConjointGeneralC (dest, src, mask, width, CombineAOver);
}

static void
fbCombineConjointOverReverseC (pixman_implementation_t *imp, pixman_op_t op,
			       uint64_t *dest, const uint64_t *src, const uint64_t *mask, int width)
{
    fbCombineConjointGeneralC (dest, src, mask, width, CombineBOver);
}

static void
fbCombineConjointInC (pixman_implementation_t *imp, pixman_op_t op,
		      uint64_t *dest, const uint64_t *src, const uint64_t *mask, int width)
{
    fbCombineConjointGeneralC (dest, src, mask, width, CombineAIn);
}

static void
fbCombineConjointInReverseC (pixman_implementation_t *imp, pixman_op_t op,
			     uint64_t *dest, const uint64_t *src, const uint64_t *mask, int width)
{
    fbCombineConjointGeneralC (dest, src, mask, width, CombineBIn);
}

static void
fbCombineConjointOutC (pixman_implementation_t *imp, pixman_op_t op,
		       uint64_t *dest, const uint64_t *src, const uint64_t *mask, int width)
{
    fbCombineConjointGeneralC (dest, src, mask, width, CombineAOut);
}

static void
fbCombineConjointOutReverseC (pixman_implementation_t *imp, pixman_op_t op,
			      uint64_t *dest, const uint64_t *src, const uint64_t *mask, int width)
{
    fbCombineConjointGeneralC (dest, src, mask, width, CombineBOut);
}

static void
fbCombineConjointAtopC (pixman_implementation_t *imp, pixman_op_t op,
			uint64_t *dest, const uint64_t *src, const uint64_t *mask, int width)
{
    fbCombineConjointGeneralC (dest, src, mask, width, CombineAAtop);
}

static void
fbCombineConjointAtopReverseC (pixman_implementation_t *imp, pixman_op_t op,
			       uint64_t *dest, const uint64_t *src, const uint64_t *mask, int width)
{
    fbCombineConjointGeneralC (dest, src, mask, width, CombineBAtop);
}

static void
fbCombineConjointXorC (pixman_implementation_t *imp, pixman_op_t op,
		       uint64_t *dest, const uint64_t *src, const uint64_t *mask, int width)
{
    fbCombineConjointGeneralC (dest, src, mask, width, CombineXor);
}

void
_pixman_setup_combiner_functions_64 (pixman_implementation_t *imp)
{
    /* Unified alpha */
    imp->combine_64[PIXMAN_OP_CLEAR] = fbCombineClear;
    imp->combine_64[PIXMAN_OP_SRC] = fbCombineSrcU;
    /* dest */
    imp->combine_64[PIXMAN_OP_OVER] = fbCombineOverU;
    imp->combine_64[PIXMAN_OP_OVER_REVERSE] = fbCombineOverReverseU;
    imp->combine_64[PIXMAN_OP_IN] = fbCombineInU;
    imp->combine_64[PIXMAN_OP_IN_REVERSE] = fbCombineInReverseU;
    imp->combine_64[PIXMAN_OP_OUT] = fbCombineOutU;
    imp->combine_64[PIXMAN_OP_OUT_REVERSE] = fbCombineOutReverseU;
    imp->combine_64[PIXMAN_OP_ATOP] = fbCombineAtopU;
    imp->combine_64[PIXMAN_OP_ATOP_REVERSE] = fbCombineAtopReverseU;
    imp->combine_64[PIXMAN_OP_XOR] = fbCombineXorU;
    imp->combine_64[PIXMAN_OP_ADD] = fbCombineAddU;
    imp->combine_64[PIXMAN_OP_SATURATE] = fbCombineSaturateU;

    /* Disjoint, unified */
    imp->combine_64[PIXMAN_OP_DISJOINT_CLEAR] = fbCombineClear;
    imp->combine_64[PIXMAN_OP_DISJOINT_SRC] = fbCombineSrcU;
    /* dest */
    imp->combine_64[PIXMAN_OP_DISJOINT_OVER] = fbCombineDisjointOverU;
    imp->combine_64[PIXMAN_OP_DISJOINT_OVER_REVERSE] = fbCombineSaturateU;
    imp->combine_64[PIXMAN_OP_DISJOINT_IN] = fbCombineDisjointInU;
    imp->combine_64[PIXMAN_OP_DISJOINT_IN_REVERSE] = fbCombineDisjointInReverseU;
    imp->combine_64[PIXMAN_OP_DISJOINT_OUT] = fbCombineDisjointOutU;
    imp->combine_64[PIXMAN_OP_DISJOINT_OUT_REVERSE] = fbCombineDisjointOutReverseU;
    imp->combine_64[PIXMAN_OP_DISJOINT_ATOP] = fbCombineDisjointAtopU;
    imp->combine_64[PIXMAN_OP_DISJOINT_ATOP_REVERSE] = fbCombineDisjointAtopReverseU;
    imp->combine_64[PIXMAN_OP_DISJOINT_XOR] = fbCombineDisjointXorU;

    /* Conjoint, unified */
    imp->combine_64[PIXMAN_OP_CONJOINT_CLEAR] = fbCombineClear;
    imp->combine_64[PIXMAN_OP_CONJOINT_SRC] = fbCombineSrcU;
    /* dest */
    imp->combine_64[PIXMAN_OP_CONJOINT_OVER] = fbCombineConjointOverU;
    imp->combine_64[PIXMAN_OP_CONJOINT_OVER_REVERSE] = fbCombineConjointOverReverseU;
    imp->combine_64[PIXMAN_OP_CONJOINT_IN] = fbCombineConjointInU;
    imp->combine_64[PIXMAN_OP_CONJOINT_IN_REVERSE] = fbCombineConjointInReverseU;
    imp->combine_64[PIXMAN_OP_CONJOINT_OUT] = fbCombineConjointOutU;
    imp->combine_64[PIXMAN_OP_CONJOINT_OUT_REVERSE] = fbCombineConjointOutReverseU;
    imp->combine_64[PIXMAN_OP_CONJOINT_ATOP] = fbCombineConjointAtopU;
    imp->combine_64[PIXMAN_OP_CONJOINT_ATOP_REVERSE] = fbCombineConjointAtopReverseU;
    imp->combine_64[PIXMAN_OP_CONJOINT_XOR] = fbCombineConjointXorU;

    imp->combine_64[PIXMAN_OP_MULTIPLY] = fbCombineMultiplyU;
    imp->combine_64[PIXMAN_OP_SCREEN] = fbCombineScreenU;
    imp->combine_64[PIXMAN_OP_OVERLAY] = fbCombineOverlayU;
    imp->combine_64[PIXMAN_OP_DARKEN] = fbCombineDarkenU;
    imp->combine_64[PIXMAN_OP_LIGHTEN] = fbCombineLightenU;
    imp->combine_64[PIXMAN_OP_COLOR_DODGE] = fbCombineColorDodgeU;
    imp->combine_64[PIXMAN_OP_COLOR_BURN] = fbCombineColorBurnU;
    imp->combine_64[PIXMAN_OP_HARD_LIGHT] = fbCombineHardLightU;
    imp->combine_64[PIXMAN_OP_SOFT_LIGHT] = fbCombineSoftLightU;
    imp->combine_64[PIXMAN_OP_DIFFERENCE] = fbCombineDifferenceU;
    imp->combine_64[PIXMAN_OP_EXCLUSION] = fbCombineExclusionU;
    imp->combine_64[PIXMAN_OP_HSL_HUE] = fbCombineHSLHueU;
    imp->combine_64[PIXMAN_OP_HSL_SATURATION] = fbCombineHSLSaturationU;
    imp->combine_64[PIXMAN_OP_HSL_COLOR] = fbCombineHSLColorU;
    imp->combine_64[PIXMAN_OP_HSL_LUMINOSITY] = fbCombineHSLLuminosityU;

    /* Component alpha combiners */
    imp->combine_64_ca[PIXMAN_OP_CLEAR] = fbCombineClearC;
    imp->combine_64_ca[PIXMAN_OP_SRC] = fbCombineSrcC;
    /* dest */
    imp->combine_64_ca[PIXMAN_OP_OVER] = fbCombineOverC;
    imp->combine_64_ca[PIXMAN_OP_OVER_REVERSE] = fbCombineOverReverseC;
    imp->combine_64_ca[PIXMAN_OP_IN] = fbCombineInC;
    imp->combine_64_ca[PIXMAN_OP_IN_REVERSE] = fbCombineInReverseC;
    imp->combine_64_ca[PIXMAN_OP_OUT] = fbCombineOutC;
    imp->combine_64_ca[PIXMAN_OP_OUT_REVERSE] = fbCombineOutReverseC;
    imp->combine_64_ca[PIXMAN_OP_ATOP] = fbCombineAtopC;
    imp->combine_64_ca[PIXMAN_OP_ATOP_REVERSE] = fbCombineAtopReverseC;
    imp->combine_64_ca[PIXMAN_OP_XOR] = fbCombineXorC;
    imp->combine_64_ca[PIXMAN_OP_ADD] = fbCombineAddC;
    imp->combine_64_ca[PIXMAN_OP_SATURATE] = fbCombineSaturateC;

    /* Disjoint CA */
    imp->combine_64_ca[PIXMAN_OP_DISJOINT_CLEAR] = fbCombineClearC;
    imp->combine_64_ca[PIXMAN_OP_DISJOINT_SRC] = fbCombineSrcC;
    /* dest */
    imp->combine_64_ca[PIXMAN_OP_DISJOINT_OVER] = fbCombineDisjointOverC;
    imp->combine_64_ca[PIXMAN_OP_DISJOINT_OVER_REVERSE] = fbCombineSaturateC,
    imp->combine_64_ca[PIXMAN_OP_DISJOINT_IN] = fbCombineDisjointInC;
    imp->combine_64_ca[PIXMAN_OP_DISJOINT_IN_REVERSE] = fbCombineDisjointInReverseC;
    imp->combine_64_ca[PIXMAN_OP_DISJOINT_OUT] = fbCombineDisjointOutC;
    imp->combine_64_ca[PIXMAN_OP_DISJOINT_OUT_REVERSE] = fbCombineDisjointOutReverseC;
    imp->combine_64_ca[PIXMAN_OP_DISJOINT_ATOP] = fbCombineDisjointAtopC;
    imp->combine_64_ca[PIXMAN_OP_DISJOINT_ATOP_REVERSE] = fbCombineDisjointAtopReverseC;
    imp->combine_64_ca[PIXMAN_OP_DISJOINT_XOR] = fbCombineDisjointXorC;

    /* Conjoint CA */
    imp->combine_64_ca[PIXMAN_OP_CONJOINT_CLEAR] = fbCombineClearC;
    imp->combine_64_ca[PIXMAN_OP_CONJOINT_SRC] = fbCombineSrcC;
    /* dest */
    imp->combine_64_ca[PIXMAN_OP_CONJOINT_OVER] = fbCombineConjointOverC;
    imp->combine_64_ca[PIXMAN_OP_CONJOINT_OVER_REVERSE] = fbCombineConjointOverReverseC;
    imp->combine_64_ca[PIXMAN_OP_CONJOINT_IN] = fbCombineConjointInC;
    imp->combine_64_ca[PIXMAN_OP_CONJOINT_IN_REVERSE] = fbCombineConjointInReverseC;
    imp->combine_64_ca[PIXMAN_OP_CONJOINT_OUT] = fbCombineConjointOutC;
    imp->combine_64_ca[PIXMAN_OP_CONJOINT_OUT_REVERSE] = fbCombineConjointOutReverseC;
    imp->combine_64_ca[PIXMAN_OP_CONJOINT_ATOP] = fbCombineConjointAtopC;
    imp->combine_64_ca[PIXMAN_OP_CONJOINT_ATOP_REVERSE] = fbCombineConjointAtopReverseC;
    imp->combine_64_ca[PIXMAN_OP_CONJOINT_XOR] = fbCombineConjointXorC;

    imp->combine_64_ca[PIXMAN_OP_MULTIPLY] = fbCombineMultiplyC;
    imp->combine_64_ca[PIXMAN_OP_SCREEN] = fbCombineScreenC;
    imp->combine_64_ca[PIXMAN_OP_OVERLAY] = fbCombineOverlayC;
    imp->combine_64_ca[PIXMAN_OP_DARKEN] = fbCombineDarkenC;
    imp->combine_64_ca[PIXMAN_OP_LIGHTEN] = fbCombineLightenC;
    imp->combine_64_ca[PIXMAN_OP_COLOR_DODGE] = fbCombineColorDodgeC;
    imp->combine_64_ca[PIXMAN_OP_COLOR_BURN] = fbCombineColorBurnC;
    imp->combine_64_ca[PIXMAN_OP_HARD_LIGHT] = fbCombineHardLightC;
    imp->combine_64_ca[PIXMAN_OP_SOFT_LIGHT] = fbCombineSoftLightC;
    imp->combine_64_ca[PIXMAN_OP_DIFFERENCE] = fbCombineDifferenceC;
    imp->combine_64_ca[PIXMAN_OP_EXCLUSION] = fbCombineExclusionC;
    /* It is not clear that these make sense, so leave them out for now */
    imp->combine_64_ca[PIXMAN_OP_HSL_HUE] = NULL;
    imp->combine_64_ca[PIXMAN_OP_HSL_SATURATION] = NULL;
    imp->combine_64_ca[PIXMAN_OP_HSL_COLOR] = NULL;
    imp->combine_64_ca[PIXMAN_OP_HSL_LUMINOSITY] = NULL;
}


