/***************************************************************************
                           mirror.cpp
                           ----------
    begin                : Sat Jul 31 2004
    copyright            : (C) 2004 by Dirk Ziegelmeier
    email                : dziegel@gmx.de
 ***************************************************************************/

/*
 * Mirror algorithm taken from DScaler.
 * Copyright (c) 2002 Rob Muller. All rights reserved.
 *
 * Ported by: Dirk Ziegelmeier
 *
 *   This program is free software; you can redistribute it and/or modify
 *   it under the terms of the GNU General Public License as published by
 *   the Free Software Foundation; either version 2 of the License, or
 *   (at your option) any later version.
 */

#include <stdlib.h>

#include <kdebug.h>
#include <klocale.h>
#include <kconfig.h>

#include "mirror.h"
#include "kdetvimagefilter.h"
#include "kdetvimagefiltercontext.h"
#include "kdetvcpudetection.h"
#include "x86-64_macros.inc"

// note that the pixels are stored as         Y0 C0a Y1 C0b Y2 C1a Y3 C1b (4 pixels)
// a swapped block of pixels looks like this: Y3 C1a Y2 C1b Y1 C0a Y0 C0b
#define SWAPPIXELS(pixelMM, temp1MM, temp2MM, CMask, MaskLumi02, MaskLumi13)                   \
    "movq    "pixelMM",    "temp1MM"\n\t"    /* make a copy */                                 \
    "psllq   $32,          "pixelMM"\n\t"    /* shift pixel 2 and 3 two pixels to the left */  \
    "psrlq   $32,          "temp1MM"\n\t"    /* shift pixel 0 and 1 two pixels to the right */ \
    "por     "temp1MM",    "pixelMM"\n\t"    /* combine the pixels again */                    \
    "movq    "pixelMM",    "temp2MM"\n\t"    /* make a copy */                                 \
    "movq    "pixelMM",    "temp1MM"\n\t"    /* make another copy */                           \
    "pand    "CMask",      "temp2MM"\n\t"    /* the chroma part of the pixels is ready now */  \
    "psllq   $16,          "pixelMM"\n\t"    /* shift 1 pixel to the left */                   \
    "psrlq   $16,          "temp1MM"\n\t"    /* shift 1 pixel to the right */                  \
    "pand    "MaskLumi02", "pixelMM"\n\t"    /* remove one pixel and all chroma*/              \
    "pand    "MaskLumi13", "temp1MM"\n\t"    /* remove one pixel and all chroma*/              \
    "por     "temp2MM",    "pixelMM"\n\t"    /* combine with the chroma */                     \
    "por     "temp1MM",    "pixelMM"\n\t"    /* combine the pixels */

class MirrorImageFilter : public KdetvImageFilter
{
public:
    MirrorImageFilter()
        : KdetvImageFilter(i18n("Mirror"))
    {
    }

    virtual ~MirrorImageFilter()
    {
    }

    virtual KdetvImageFilterContext* operator<< (KdetvImageFilterContext* ctx);

    virtual KdetvImage::ImageFormat inputFormats()
    {
        return KdetvImage::FORMAT_YUYV;
    };
};

KdetvImageFilterContext* MirrorImageFilter::operator<< (KdetvImageFilterContext* ctx)
{
    // Return if we don't have MMX
    if(!(KdetvCpuDetection::capabilities() & KdetvCpuDetection::Cap_MMX)) {
        return ctx;
    }

    long           Cycles        = ctx->out->bytesPerLine() / 8 / 2; // half line in 8 byte chunks
    int64_t        qwMaskLumi02  = 0x00FF000000FF0000ull;
    int64_t        qwMaskLumi13  = 0x000000FF000000FFull;
    int64_t        qwCMask       = 0xFF00FF00FF00FF00ull;
    unsigned char* Pixels        = ctx->out->buffer();
    int            fieldHeight   = ctx->out->size().height();
    unsigned long  outputpitch   = ctx->out->bytesPerLine() + ctx->out->stride();

    for (int y=0; y<fieldHeight; y++) {
        unsigned char* Pixels2 = Pixels + ((Cycles<<4) - 8);         // Pixels2 now points to the last pixel of the line

        __asm__ __volatile__
            (
             MOVX"    %[Pixels],       %%"XAX"\n\t"
             MOVX"    %[Pixels2],      %%"XDX"\n\t"
             MOVX"    %[Cycles],       %%"XCX"\n\t"

             "movq    %[qwCMask],      %%mm5\n\t"
             "movq    %[qwMaskLumi02], %%mm6\n\t"
             "movq    %[qwMaskLumi13], %%mm7\n\t"

             "1:\n\t"
             "movq    (%%"XAX"),       %%mm0\n\t"     // get a pixelblock from the first half of the line
             "movq    (%%"XDX"),       %%mm3\n\t"     // get the corresponding block from the second half

             SWAPPIXELS("%%mm0", "%%mm1", "%%mm2", "%%mm5", "%%mm6", "%%mm7") // swap the pixels from the first block

             "movq    %%mm0,           (%%"XDX")\n\t" // and swap the block of pixels

             SWAPPIXELS("%%mm3", "%%mm1", "%%mm2", "%%mm5", "%%mm6", "%%mm7") // swap the pixels from the second block

             "movq    %%mm3,           (%%"XAX")\n\t" // and swap the block of pixels

             ADDX"    $8,              %%"XAX"\n\t"   // next pixel block
             SUBX"    $8,              %%"XDX"\n\t"   // previous pixel block
             DECX"    %%"XCX"\n\t"
             "jne     1b\n\t"

             : /* no outputs */

             : [qwCMask]      "m"(qwCMask),
               [qwMaskLumi02] "m"(qwMaskLumi02),
               [qwMaskLumi13] "m"(qwMaskLumi13),
               [Pixels]       "g"(Pixels),
               [Pixels2]      "g"(Pixels2),
               [Cycles]       "g"(Cycles)

             : XAX, XCX, XDX,

#ifdef ARCH_386
               "st", "st(1)", "st(2)", "st(3)", "st(4)", "st(5)", "st(6)", "st(7)",
#endif
               "mm0", "mm1", "mm2", "mm3", "mm4", "mm5", "mm6", "mm7",
               "memory", "cc"
             );

        Pixels += outputpitch;
    }

#ifdef ARCH_386
    __asm__ __volatile__ ("emms\n\t");
#endif

    return ctx;
}

// -----------------------------------------------------------------------

MirrorPlugin::MirrorPlugin(Kdetv *ktv, const QString& cfgkey, QObject *parent, const char* name)
    : KdetvFilterPlugin(ktv, cfgkey, parent, name)
{
    _filter = new MirrorImageFilter();
}

MirrorPlugin::~MirrorPlugin()
{
    delete _filter;
}

extern "C" {
    KDETV_EXPORT MirrorPlugin* create_mirror(Kdetv* ktv)
    {
        return new MirrorPlugin(ktv, "mirrorimage", 0, "Mirror image");
    }
}

#include "mirror.moc"
