/*
    libfame - Fast Assembly MPEG Encoder Library
    Copyright (C) 2000-2001 Vivien Chappelier

    This library is free software; you can redistribute it and/or
    modify it under the terms of the GNU Library General Public
    License as published by the Free Software Foundation; either
    version 2 of the License, or (at your option) any later version.

    This library is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
    Library General Public License for more details.

    You should have received a copy of the GNU Library General Public
    License along with this library; if not, write to the Free
    Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
/**************************** motion estimation shape padding ****************/

static void repetitive_fill_Y(unsigned char *plane,
			      unsigned char *shape,
			      int rpitch,
			      int spitch)
{
  int i, j, k;
  int p, last;
  int inside;
  int row[16];
  unsigned char *l;

  /* TODO: MMX version */
  /* TODO: int32 version */

  /* horizontal padding */
  last = 0;
  for(j = 0; j < 16; j++) {
    p = -1;
    inside = 1;
    row[j] = 1;
    for(i = 0; i < 16; i++) {
      if(inside && !shape[i]) { /* edge down */
	if(i) p = plane[i-1];
	inside = 0;
	last = i;
      } else if(!inside && shape[i]) { /* edge up */
	inside = 1;
	if(p < 0) p = plane[i];
	else p = (p+plane[i]+1)>>1;
	memset(plane+last, p, i-last); /* fill */
      }
    }

    if(!inside) {
      if(!last)	row[j] = 0; /* empty row */
      else memset(plane+last, p, 16-last); /* fill */
    }
    plane += rpitch;
    shape += spitch;
  }

  plane -= rpitch << 4; /* restore plane pointer */

  /* vertical padding */
  last = 0;
  l = NULL;
  inside = 1;
  for(i = 0; i < 16; i++) {
    if(inside && !row[i]) { /* edge down */
      if(i) l = plane+(i-1)*rpitch;
      inside = 0;
      last = i;
    } else if(!inside && row[i]) { /* edge up */
      inside = 1;
      if(l == NULL) l = plane+i*rpitch;
      else {
	for(k = 0; k < 16; k++) /* average */
	  plane[last*rpitch+k] = (unsigned char)
	    (((unsigned short)l[k]+
	      (unsigned short)plane[i*rpitch+k]+1)>>1);
	l = plane+last*rpitch;
	last ++; /* already filled */
      }
      for(k = last; k < i; k++)
	memcpy(plane+k*rpitch, l, 16); /* fill */
    }
  }

  if(!inside) {
    if(last)
      for(k = last; k < 16; k++)
	memcpy(plane+k*rpitch, l, 16); /* fill */
  }
}

static void repetitive_fill_C(unsigned char *plane,
			      unsigned char *shape,
			      int rpitch,
			      int spitch)
{
  int i, j, k;
  int p, last;
  int inside;
  int row[8];
  unsigned char *l;

  /* TODO: MMX version */
  /* TODO: int32 version */

  rpitch >>= 1;

  /* horizontal padding */
  last = 0;
  for(j = 0; j < 8; j++) {
    p = -1;
    inside = 1;
    row[j] = 1;
    for(i = 0; i < 8; i++) {
      if(inside && !(shape[(i<<1)] | shape[(i<<1)+1] |
		     shape[(i<<1)+spitch] | shape[(i<<1)+spitch+1])) {
	/* edge down */
	if(i) p = plane[i-1];
	inside = 0;
	last = i;
      } else if(!inside && (shape[(i<<1)] | shape[(i<<1)+1] |
			    shape[(i<<1)+spitch] | shape[(i<<1)+spitch+1])) {
	/* edge up */
	inside = 1;
	if(p < 0) p = plane[i];
	else p = (p+plane[i]+1)>>1;
	memset(plane+last, p, i-last); /* fill */
      }
    }

    if(!inside) {
      if(!last)	row[j] = 0; /* empty row */
      else memset(plane+last, p, 8-last); /* fill */
    }
    plane += rpitch;
    shape += spitch << 1;
  }

  plane -= rpitch << 3; /* restore plane pointer */

  /* vertical padding */
  last = 0;
  l = NULL;
  inside = 1;
  for(i = 0; i < 8; i++) {
    if(inside && !row[i]) { /* edge down */
      if(i) l = plane+(i-1)*rpitch;
      inside = 0;
      last = i;
    } else if(!inside && row[i]) { /* edge up */
      inside = 1;
      if(l == NULL) l = plane+i*rpitch;
      else {
	for(k = 0; k < 8; k++) /* mean */
	  plane[last*rpitch+k] = (unsigned char)
	    (((unsigned short)l[k]+
	      (unsigned short)plane[i*rpitch+k]+1)>>1);
	l = plane+last*rpitch;
	last ++; /* already filled */
      }
      for(k = last; k < i; k++)
	memcpy(plane+k*rpitch, l, 8); /* fill */
    }
  }

  if(!inside) {
    if(last)
      for(k = last; k < 8; k++)
	memcpy(plane+k*rpitch, l, 8); /* fill */
  }
}

/*  extended_pad_withmask                                                    */
/*                                                                           */
/*  Description:                                                             */
/*    Perform extended padding of arbitrary shape for motion estimation.     */
/*                                                                           */
/*  Arguments:                                                               */
/*    int i: reference number                                                */
/*    int width: width of the frame                                          */
/*    int height: height of the frame                                        */
/*    fame_yuv_t frame: the frame to pad                                     */
/*    unsigned char *shape: shape of the frame                               */
/*    unsigned char *padded: temporary shape buffer                          */
/*    unsigned char *bab_map: binary alpha block type map                    */
/*    fame_box_t box: bounding box                                           */
/*                                                                           */
/*  Return value:                                                            */
/*    None.                                                                  */

static void inline extended_pad_withmask(int i,
					 int width,
					 int height,
					 fame_yuv_t **frame,
					 unsigned char *shape,
					 unsigned char *bab_map,
					 fame_box_t *box)
{
  int x, y, k;
  int pitch;
  unsigned char *Y, *U, *V;
  unsigned char *Yh, *Uh, *Vh;
  unsigned char *Yv, *Uv, *Vv;
  unsigned char *d, *s, *b;

  width += 15;  /* roundup */
  height += 15; /* roundup */
  width >>= 4;  /* convert to macroblock unit */
  height >>= 4; /* convert to macroblock unit */
  width += 2;   /* add 1 for border on both sides */
  height += 2;  /* add 1 for border on both sides */
  pitch = width;
 
  if(i) {
    Y = frame[i]->y;
    U = frame[i]->u;
    V = frame[i]->v;
    Yh = frame[i&2]->y;
    Uh = frame[i&2]->u;
    Vh = frame[i&2]->v;
    Yv = frame[i&1]->y;
    Uv = frame[i&1]->u;
    Vv = frame[i&1]->v;
    b = bab_map + 1 + pitch; /* first block inside the bounding box */
    for(y = 1; y < height-1; y ++) {
      for(x = 1; x < width-1; x ++) {
	if((i&1) && b[0] >= bab_border_16x16 && b[1] == bab_not_coded) {
	  /* fix rightmost half sample (= int sample) :  */
	  /* ... x X x X x 0                             */
	  /*             ^--- = X+0/2 = X/2, should be X */
	  d = Y + 15; s = Yh + 15;
	  for(k = 0; k < 16; k++, d += (pitch<<4), s += (pitch<<4)) *d = *s;
	  d = U + 7; s = Uh + 7;
	  for(k = 0; k < 8; k++, d += (pitch<<3), s += (pitch<<3)) *d = *s;
	  d = V + 7; s = Vh + 7;
	  for(k = 0; k < 8; k++, d += (pitch<<3), s += (pitch<<3)) *d = *s;
	}
	if((i&2) && b[0] >= bab_border_16x16 && b[pitch] == bab_not_coded) {
	  /* fix bottommost half sample (= int sample) : */
	  /*          ...                                */
	  /*           X                                 */
	  /*           x<-- = X+0/2 = X/2, should be X   */
	  /*           0                                 */
	  d = Y + (pitch<<8) - (pitch<<4); s = Yv + (pitch<<8) - (pitch<<4);
	  memcpy(d, s, 16);
	  d = U + (pitch<<6) - (pitch<<3); s = Uv + (pitch<<6) - (pitch<<3);
	  memcpy(d, s, 8);
	  d = V + (pitch<<6) - (pitch<<3); s = Vv + (pitch<<6) - (pitch<<3);
	  memcpy(d, s, 8);
	}
	b++;
	Y += 16;
	U += 8;
	V += 8;
	Yh += 16;
	Uh += 8;
	Vh += 8;
	Yv += 16;
	Uv += 8;
	Vv += 8;
      }
      b += 2; /* borders */
      Y += (pitch << 8) - ((width-2) << 4);
      U += (pitch << 6) - ((width-2) << 3);
      V += (pitch << 6) - ((width-2) << 3);
      Yh += (pitch << 8) - ((width-2) << 4);
      Uh += (pitch << 6) - ((width-2) << 3);
      Vh += (pitch << 6) - ((width-2) << 3);
      Yv += (pitch << 8) - ((width-2) << 4);
      Uv += (pitch << 6) - ((width-2) << 3);
      Vv += (pitch << 6) - ((width-2) << 3);
    }
  }

  Y = frame[i]->y - (pitch << 8) - 16;
  U = frame[i]->u - (pitch << 6) - 8;
  V = frame[i]->v - (pitch << 6) - 8;
  Yh = frame[i&2]->y - (pitch << 8) - 16;
  Uh = frame[i&2]->u - (pitch << 6) - 8;
  Vh = frame[i&2]->v - (pitch << 6) - 8;
  Yv = frame[i&1]->y - (pitch << 8) - 16;
  Uv = frame[i&1]->u - (pitch << 6) - 8;
  Vv = frame[i&1]->v - (pitch << 6) - 8;

  for(y = 0; y < height; y ++) {
    for(x = 0; x < width; x ++) {
      if(*bab_map == bab_not_coded) {
	if(x > 0 && bab_map[-1] >= bab_all_coded) {
	  /* pad from left */
	  d = Y; s = Yh-1;
	  for(k = 0; k < 16; k++, d += (pitch<<4), s += (pitch<<4))
	    memset(d, *s, 16);
	  d = U; s = Uh-1;
	  for(k = 0; k < 8; k++, d += (pitch<<3), s += (pitch<<3))
	    memset(d, *s, 8);
	  d = V; s = Vh-1;
	  for(k = 0; k < 8; k++, d += (pitch<<3), s += (pitch<<3))
	    memset(d, *s, 8);
	}
	else if(y > 0 && bab_map[-pitch] >= bab_all_coded) {
	  /* pad from above */
	  d = Y; s = Yv - (pitch << 4); /* Y */
	  for(k = 0; k < 16; k++, d += (pitch<<4)) memcpy(d, s, 16);
	  d = U; s = Uv - (pitch << 3); /* U */
	  for(k = 0; k < 8; k++, d += (pitch<<3)) memcpy(d, s, 8);
	  d = V; s = Vv - (pitch << 3); /* V */
	  for(k = 0; k < 8; k++, d += (pitch<<3)) memcpy(d, s, 8);
	} 
	else if(x < width-1 && bab_map[1] >= bab_all_coded) {
	  /* pad from right */
	  d = Y; s = Yh+16;
	  for(k = 0; k < 16; k++, d += (pitch<<4), s += (pitch<<4))
	    memset(d, *s, 16);
	  d = U; s = Uh+8;
	  for(k = 0; k < 8; k++, d += (pitch<<3), s += (pitch<<3))
	    memset(d, *s, 8);
	  d = V; s = Vh+8;
	  for(k = 0; k < 8; k++, d += (pitch<<3), s += (pitch<<3))
	    memset(d, *s, 8);
	}
	else if(y < height-1 && bab_map[pitch] >= bab_all_coded) {
	  /* pad from below */
	  d = Y; s = Yv + (pitch << 8); /* Y */
	  for(k = 0; k < 16; k++, d += (pitch<<4)) memcpy(d, s, 16);
	  d = U; s = Uv + (pitch << 6); /* U */
	  for(k = 0; k < 8; k++, d += (pitch<<3)) memcpy(d, s, 8);
	  d = V; s = Vv + (pitch << 6); /* V */
	  for(k = 0; k < 8; k++, d += (pitch<<3)) memcpy(d, s, 8);
	} 
	else
	{
	  /* pad with grey */
	  d = Y; for(k = 0; k < 16; k++, d += (pitch<<4)) memset(d, 128, 16);
	  d = U; for(k = 0; k < 8; k++, d += (pitch<<3)) memset(d, 128, 8);
	  d = V; for(k = 0; k < 8; k++, d += (pitch<<3)) memset(d, 128, 8);
	}
      }
      bab_map++;
      Y += 16;
      U += 8;
      V += 8;
      Yh += 16;
      Uh += 8;
      Vh += 8;
      Yv += 16;
      Uv += 8;
      Vv += 8;
    }
    Y += (pitch << 8) - (width << 4);
    U += (pitch << 6) - (width << 3);
    V += (pitch << 6) - (width << 3);
    Yh += (pitch << 8) - (width << 4);
    Uh += (pitch << 6) - (width << 3);
    Vh += (pitch << 6) - (width << 3);
    Yv += (pitch << 8) - (width << 4);
    Uv += (pitch << 6) - (width << 3);
    Vv += (pitch << 6) - (width << 3);
  }
}


/*  extended_pad_withoutmask                                                 */
/*                                                                           */
/*  Description:                                                             */
/*    Perform extended padding of rectangular video for motion estimation.   */
/*                                                                           */
/*  Arguments:                                                               */
/*    int i: reference number                                                */
/*    int width: width of the frame                                          */
/*    int height: height of the frame                                        */
/*    fame_yuv_t frame: the frame to pad                                     */
/*    unsigned char *shape: shape of the frame                               */
/*    unsigned char *padded: temporary shape buffer                          */
/*    unsigned char *bab_map: binary alpha block type map                    */
/*    fame_box_t box: bounding box                                           */
/*                                                                           */
/*  Return value:                                                            */
/*    None.                                                                  */

static void inline extended_pad_withoutmask(int i,
				   int width,
				   int height,
				   fame_yuv_t **frame,
				   unsigned char *shape,   /* not used */
				   unsigned char *bab_map, /* not used */
				   fame_box_t *box)
{
  int y, w, h, wr, hr, p, e;
  unsigned char *s, *d;

#define extended_pad_component(comp)		\
{						\
  /* pad horizontally for Y */			\
  d = frame[i]->comp;				\
  s = frame[i&2]->comp;				\
  for(y = 0; y < h; y++) {			\
    memset(d-e, s[0], e);			\
    memset(d+w, s[w-1], wr - w + e);		\
    d += p;					\
    s += p;					\
  }						\
  /* pad vertically for Y */			\
  s = frame[i&1]->comp;				\
  d = frame[i]->comp - p;			\
  for(y = 0;y < e; y++) {			\
    memcpy(d - e, s - e, wr + 2*e);		\
    d -= p;					\
  }						\
  s = frame[i&1]->comp + (h-1)*p;		\
  d = frame[i]->comp + h*p;			\
  for(y = h; y < hr+e; y++) {			\
    memcpy(d - e, s - e, wr + 2*e);		\
    d += p;					\
  }						\
}

  e = 16;
  w = frame[i]->w;
  h = frame[i]->h;
  wr = (w+15)&(~15); /* round to the next 16-pixel boundary */
  hr = (h+15)&(~15); /* round to the next 16-pixel boundary */
  p = frame[i]->p;
  
  extended_pad_component(y);

  p >>= 1;
  h >>= 1;
  w >>= 1;
  hr >>= 1;
  wr >>= 1;
  e >>= 1;

  extended_pad_component(u);
  extended_pad_component(v);  
}
