diff -Nur xephyr.orig/hw/kdrive/ephyr/ephyr.c xephyr/hw/kdrive/ephyr/ephyr.c
--- xephyr.orig/hw/kdrive/ephyr/ephyr.c	2006-08-18 17:28:08.000000000 +0200
+++ xephyr/hw/kdrive/ephyr/ephyr.c	2006-08-18 17:28:00.000000000 +0200
@@ -93,6 +93,17 @@
 	ErrorF("\nXephyr: requested screen depth not supported, setting to match hosts.\n");
     }
   
+  /* 
+   * The color swizzling mode was designed to simulate the "One Laptop
+   * Per Child" display, for which 16bit colors are best.
+   */
+  
+  if (hostx_want_swizzle () && hostx_get_server_depth() != 16)
+    {
+    fprintf (stderr, "Switching to depth 16 since you asked for color swizzling\n");
+    hostx_set_server_depth (16);
+    }
+
   screen->fb[0].depth = hostx_get_server_depth();
   screen->rate = 72;
   
diff -Nur xephyr.orig/hw/kdrive/ephyr/ephyrinit.c xephyr/hw/kdrive/ephyr/ephyrinit.c
--- xephyr.orig/hw/kdrive/ephyr/ephyrinit.c	2006-08-18 17:29:30.000000000 +0200
+++ xephyr/hw/kdrive/ephyr/ephyrinit.c	2006-08-18 17:29:18.000000000 +0200
@@ -62,6 +62,7 @@
   ErrorF("-parent XID   Use existing window as Xephyr root win\n");
   ErrorF("-host-cursor  Re-use exisiting X host server cursor\n");
   ErrorF("-fullscreen   Attempt to run Xephyr fullscreen\n");
+  ErrorF("-swizzle      Run in color swizzling mode\n");
   ErrorF("\n");
 
   exit(1);
@@ -93,6 +94,11 @@
       hostx_use_fullscreen();
       return 1;
     }
+  else if (!strcmp (argv[i], "-swizzle"))
+    {
+      hostx_use_swizzle();
+      return 1;
+    }
   else if (argv[i][0] == ':')
     {
       hostx_set_display_name(argv[i]);
diff -Nur xephyr.orig/hw/kdrive/ephyr/hostx.c xephyr/hw/kdrive/ephyr/hostx.c
--- xephyr.orig/hw/kdrive/ephyr/hostx.c	2006-08-18 17:41:46.000000000 +0200
+++ xephyr/hw/kdrive/ephyr/hostx.c	2006-08-18 17:41:38.000000000 +0200
@@ -41,6 +41,8 @@
 #include <X11/keysym.h>
 #include <X11/extensions/XShm.h>
 
+#include "swizzle.h"
+
 /*  
  * All xlib calls go here, which gets built as its own .a .
  * Mixing kdrive and xlib headers causes all sorts of types
@@ -62,11 +64,13 @@
   int             win_width, win_height;
   Bool            use_host_cursor;
   Bool            use_fullscreen;
+  Bool            use_swizzle;
   Bool            have_shm;
 
   long            damage_debug_msec;
 
   unsigned char  *fb_data;   	/* only used when host bpp != server bpp */
+  unsigned char  *original_fb; /* Only used in color swizzling & AA mode */
   unsigned long   cmap[256];
 
   XShmSegmentInfo shminfo;
@@ -164,6 +168,18 @@
   HostX.use_host_cursor = True;
 }
 
+void
+hostx_use_swizzle(void)
+{
+  HostX.use_swizzle = True;
+}
+
+int
+hostx_want_swizzle(void)
+{
+  return HostX.use_swizzle;
+}
+
 int
 hostx_want_preexisting_window(void)
 {
@@ -553,6 +569,14 @@
     }
 }
 
+static void
+copy_16bits_pixel_into_host_fb (unsigned char *host, unsigned short server_pixel) {
+
+  host[2] = ((server_pixel & 0xf800) >> 8);
+  host[1] = ((server_pixel & 0x07e0) >> 3);
+  host[0] = ((server_pixel & 0x001f) << 3);
+}
+
 void
 hostx_paint_rect(int sx,    int sy,
 		 int dx,    int dy, 
@@ -563,6 +587,17 @@
    *  on to the window
    */
 
+  int x_min = 0, x_max = 0, y_min = 0, y_max = 0;
+  
+  if (HostX.use_swizzle) {    
+    static Bool swizzle_initialized = 0;
+    
+    if (!swizzle_initialized) {
+      HostX.original_fb = (unsigned char *) malloc (HostX.ximg->bytes_per_line * HostX.ximg->height);
+      swizzle_initialized = 1;
+    }
+  }
+  
   if (HostXWantDamageDebug)
     {
       hostx_paint_debug_rect(dx, dy, width, height);
@@ -582,9 +617,50 @@
 
   if (!host_depth_matches_server())
     {
-      int            x,y,idx, bytes_per_pixel = (HostX.server_depth>>3);
+      int            x,y,idx, bytes_per_pixel = (HostX.server_depth>>3), host_bpp = HostX.ximg->bits_per_pixel / 8;
       unsigned char  r,g,b;
       unsigned long  host_pixel;
+      unsigned char *current_host_pixel;
+
+      if (HostX.use_swizzle) {
+	int border;
+	unsigned short current_server_pixel;
+	
+	border = 1;
+	x_min = (sx - border < 0)? 0 : sx - border;
+	y_min = (sy - border < 0)? 0 : sy - border;
+	x_max = (sx + width  + border > HostX.ximg->width)?  HostX.ximg->width  : sx + width  + border;
+	y_max = (sy + height + border > HostX.ximg->height)? HostX.ximg->height : sy + height + border;
+	
+	/* First, update the zone we're redrawing in the local,
+	 * unaltered framebuffer.
+	 */
+	
+	for (y = sy; y < sy + height; y++) {
+	  for (x = sx; x < sx + width; x++) {
+	    idx = (HostX.win_width * y + x) * bytes_per_pixel;
+	    current_server_pixel = *(unsigned short*)(HostX.fb_data+idx);
+	    current_host_pixel = HostX.original_fb + (x + y * HostX.ximg->width) * host_bpp;
+	    copy_16bits_pixel_into_host_fb (current_host_pixel, current_server_pixel);
+	  }
+	}
+	
+	/* 
+	 * Then, perform the swizzling + antialiasing. See swizzle.c for more
+	 * details.
+	 */
+	
+	gr_swizzle_xrgb32 (HostX.original_fb,
+			   HostX.ximg->width * host_bpp,
+			   HostX.ximg->data,
+			   HostX.ximg->width * host_bpp,
+			   HostX.ximg->width,
+			   HostX.ximg->height,
+			   x_min,
+			   y_min,
+			   x_max - x_min,
+			   y_max - y_min);
+      } else {
 
       for (y=sy; y<sy+height; y++)
 	for (x=sx; x<sx+width; x++)
@@ -617,16 +693,25 @@
 	      }
 	  }
     }
+    }
 
   if (HostX.have_shm)
     {
-      XShmPutImage(HostX.dpy, HostX.win, HostX.gc, HostX.ximg, 
-		       sx, sy, dx, dy, width, height, False);
+      if (HostX.use_swizzle)
+	XShmPutImage(HostX.dpy, HostX.win, HostX.gc, HostX.ximg, 
+		     x_min, y_min, x_min, y_min, x_max - x_min, y_max - y_min, False);
+      else
+	XShmPutImage(HostX.dpy, HostX.win, HostX.gc, HostX.ximg, 
+		     sx, sy, dx, dy, width, height, False);
     }
   else
     {
-      XPutImage(HostX.dpy, HostX.win, HostX.gc, HostX.ximg, 
-		sx, sy, dx, dy, width, height);
+      if (HostX.use_swizzle)
+	XPutImage(HostX.dpy, HostX.win, HostX.gc, HostX.ximg, 
+		  x_min, y_min, x_min, y_min, x_max - x_min, y_max - y_min);
+      else
+	XPutImage(HostX.dpy, HostX.win, HostX.gc, HostX.ximg, 
+		  sx, sy, dx, dy, width, height);
     }
 
   XSync(HostX.dpy, False);
diff -Nur xephyr.orig/hw/kdrive/ephyr/hostx.h xephyr/hw/kdrive/ephyr/hostx.h
--- xephyr.orig/hw/kdrive/ephyr/hostx.h	2006-08-18 17:42:31.000000000 +0200
+++ xephyr/hw/kdrive/ephyr/hostx.h	2006-08-18 17:42:24.000000000 +0200
@@ -90,6 +90,12 @@
 hostx_use_host_cursor(void);
 
 void
+hostx_use_swizzle(void);
+
+int
+hostx_want_swizzle(void);
+
+void
 hostx_use_fullscreen(void);
 
 int
diff -Nur xephyr.orig/hw/kdrive/ephyr/Makefile.am xephyr/hw/kdrive/ephyr/Makefile.am
--- xephyr.orig/hw/kdrive/ephyr/Makefile.am	2006-08-18 17:43:01.000000000 +0200
+++ xephyr/hw/kdrive/ephyr/Makefile.am	2006-08-18 17:42:53.000000000 +0200
@@ -19,7 +19,9 @@
 
 libxephyr_hostx_a_SOURCES =                             \
 	hostx.c                                         \
-	hostx.h    
+	hostx.h                                         \
+	swizzle.h                                       \
+	swizzle.c
 
 libxephyr_hostx_a_INCLUDES = @XEPHYR_INCS@ 
 
diff -Nur xephyr.orig/hw/kdrive/ephyr/swizzle.c xephyr/hw/kdrive/ephyr/swizzle.c
--- xephyr.orig/hw/kdrive/ephyr/swizzle.c	1970-01-01 01:00:00.000000000 +0100
+++ xephyr/hw/kdrive/ephyr/swizzle.c	2006-08-18 17:47:35.000000000 +0200
@@ -0,0 +1,739 @@
+/*  Authors:  David Turner  <david@freetype.org>
+ *            Manu Cornet   <manu@manucornet.net>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+ * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 
+ *
+ */
+
+#include <stdlib.h>
+#include <memory.h>
+
+#include "swizzle.h"
+
+/* Technical note:
+ *
+ *   The following code is used to simulate the color display of an
+ *   OLPC screen on a traditional LCD or CRT screen. First, here's more or
+ *   less how the laptop's hardware works:
+ *
+ *   When in color mode, the screen uses the following colored pixels
+ *   arrangement, where each pixel is square and can have its luminousity
+ *   set between 0 and 255:
+ *
+ *     R G B R G B ....
+ *     G B R G B R
+ *     B R G B R G
+ *     R G B R G B
+ *     G B R G B R
+ *     B R G B R G
+ *     :
+ *     :
+ *
+ *   In its normal mode of operation, the laptop's graphics chip gets its
+ *   data from a normal frame buffer (where each pixel has three
+ *   components: R, G and B) but only selects the red signal for the first
+ *   pixel, the green signal for the second pixel, etc...
+ *
+ *   In other words, it ignores 2/3rd of the frame buffer data !
+ *
+ *   There is also another interesting mode of operation, so-called
+ *   "anti-aliasing mode" where the value of displayed pixel is obtained
+ *   by performing a simple 3x3 linear filter on the frame-buffer data.
+ *   the filter's matrix being:
+ *
+ *       0   1/8   0
+ *
+ *      1/8  1/2  1/8
+ *
+ *       0   1/8   0
+ *
+ *   Note that this filtering is per-color, so the *displayed* intensity of
+ *   a given red pixel corresponds to an average of the pixel's red value and
+ *   its four neighboring pixels' red values.
+ *
+ *   Note that the code below uses a 3-lines work buffer, where each line
+ *   in the buffer holds a copy of the source frame buffer.
+ *
+ *   More exactly, each line has 'width+2' pixels, where the first and last
+ *   pixels are always set to 0. this allows us to ignore edge cases in the
+ *   filtering code.
+ *
+ *   Similarly, we artificially extend the source buffer with zero-ed lines
+ *   above and below.
+ */
+
+
+/* define ANTIALIAS to perform anti-alias filtering before swizzling */
+#define ANTIALIAS
+
+/* define POSTPROCESS to enhance the output for traditional displays,
+ * otherwise, you'll get those ugly diagonals everywhere
+ */
+#define POSTPROCESS
+
+/************************************************************************/
+/************************************************************************/
+/*****                                                              *****/
+/*****               G E N E R I C   F I L T E R I N G              *****/
+/*****                                                              *****/
+/************************************************************************/
+/************************************************************************/
+
+/* the type of a line filtering function, see below for usage */
+typedef void
+(*filter_func_t)( unsigned char**  lines,
+                  unsigned char*   write,
+                  int              width,
+                  int              offset );
+
+static void __inline__
+copy_line_generic( unsigned char*    from,
+                   unsigned char*    to,
+                   int               x,
+                   int               width,
+                   int               buff_width,
+                   int               pix_bytes )
+{
+  if (x > 0)
+  {
+    width += 1;
+    from  -= pix_bytes;
+  }
+  else
+    to += pix_bytes;
+
+  if (x+width < buff_width)
+    width += 1;
+
+  memcpy( to, from, width*pix_bytes );
+}
+
+
+
+/* a generic function to perform 3x3 filtering of a given rectangle,
+ * from a source bitmap into a destination one, the source *can* be
+ * equal to the destination.
+ *
+ * IMPORTANT: this will read the rectangle (x-1,y-1,width+2,height+2)
+ * from the source (clipping and edge cases are handled).
+ *
+ *  read_buff    :: first byte of source buffer
+ *  read_pitch   :: source buffer bytes per row
+ *  write_buff   :: first byte of target buffer
+ *  write_pitch  :: target buffer bytes per row
+ *  buff_width   :: width in pixels of both buffers
+ *  buff_height  :: height in pixels of both buffers
+ *  x            :: rectangle's left-most horizontal coordinate
+ *  y            :: rectangle's top-most vertical coordinate
+ *  width        :: rectangle width in pixels
+ *  height       :: rectangle height in pixels
+ *  pix_bytes    :: number of bytes per pixels in both buffer
+ *  filter_func  :: line filtering function
+ *  temp_lines   :: a work buffer of at least '3*(width+2)*pix_bytes' bytes
+ */
+static void
+filter_rect_generic( unsigned char*   read_buff,
+                     int              read_pitch,
+                     unsigned char*   write_buff,
+                     int              write_pitch,
+                     int              buff_width,
+                     int              buff_height,
+                     int              x,
+                     int              y,
+                     int              width,
+                     int              height,
+                     int              pix_bytes,
+                     filter_func_t    filter_func,
+                     unsigned char*   temp_lines )
+{
+  unsigned char*  lines[3];
+  int             offset   = 0;
+  int             delta;
+
+  /* clip rectangle, just to be sure */
+  if (x < 0) {
+    width += x;
+    x      = 0;
+  }
+  delta = x+width - buff_width;
+  if (delta > 0)
+    width -= delta;
+  
+  if (y < 0) {
+    height += y;
+    y       = 0;
+  }
+  delta = y+height - buff_height;
+  if (delta > 0)
+    height -= delta;
+
+  if (width <= 0 || height <= 0)  /* nothing to do */
+    return;
+
+  offset = (x+y) % 3;
+
+  /* now setup the three work lines */
+  read_buff  += y*read_pitch  + pix_bytes*x;
+  write_buff += y*write_pitch + pix_bytes*x;
+
+  memset( temp_lines, 0, 3*pix_bytes*(width+2) );
+
+  lines[0] = (unsigned char*) temp_lines;
+  lines[1] = lines[0] + pix_bytes*(width+2);
+  lines[2] = lines[1] + pix_bytes*(width+2);
+
+  /* lines[0] correspond to the pixels of the line above
+   */
+   if (y > 0)
+     copy_line_generic( read_buff - read_pitch, lines[0],
+                        x, width, buff_width, pix_bytes );
+
+  /* lines[1] correspond to the pixels of the current line
+   */
+   copy_line_generic( read_buff, lines[1],
+                      x, width, buff_width, pix_bytes );
+
+  /* process all lines, except the last one */
+  for ( ; height > 1; height-- )
+  {
+    unsigned char*   tmp;
+
+    /* lines[2] correspond to the pixels of the line below */
+    copy_line_generic( read_buff + read_pitch, lines[2],
+                       x, width, buff_width, pix_bytes );
+
+    filter_func( lines, write_buff, width, offset );
+
+    if (++offset == 3)
+      offset = 0;
+
+    /* scroll the work lines */
+    tmp      = lines[0];
+    lines[0] = lines[1];
+    lines[1] = lines[2];
+    lines[2] = tmp;
+
+    read_buff  += read_pitch;
+    write_buff += write_pitch;
+  }
+
+  /* process last line */
+  if (y+height == buff_height)
+    memset( lines[2], 0, (width+2)*pix_bytes );
+  else
+    copy_line_generic( read_buff + read_pitch, lines[2],
+                       x, width, buff_width, pix_bytes );
+
+  filter_func ( lines, write_buff, width, offset );
+}
+
+
+/************************************************************************/
+/************************************************************************/
+/*****                                                              *****/
+/*****               R G B 2 4   S U P P O R T                      *****/
+/*****                                                              *****/
+/************************************************************************/
+/************************************************************************/
+
+
+/* this function performs AA+swizzling of a given line from/to RGB24 buffers
+ */
+static void
+swizzle_line_rgb24( unsigned char**  lines,
+                    unsigned char*   write,
+                    int              width,
+                    int              offset )
+{
+  unsigned char*  above   = lines[0] + 3;
+  unsigned char*  current = lines[1] + 3;
+  unsigned char*  below   = lines[2] + 3;
+  int             nn;
+
+  width *= 3;
+  for ( nn = 0; nn < width; nn += 3 )
+  {
+    unsigned int  sum;
+    int           off = nn + offset;
+
+#ifdef ANTIALIAS
+    sum  = current[off] << 2;
+
+    sum += current[off-3] +
+           current[off+3] +
+           above  [off]   +
+           below  [off]   ;
+
+    /* performance trick: use shifts to avoid jumps */
+    sum = (sum >> 3) << (offset*8);
+#else /* !ANTIALIAS */
+    sum = current[off] << (offset*8);
+#endif
+
+    write[nn]   = (unsigned char) sum;
+    write[nn+1] = (unsigned char)(sum >> 8);
+    write[nn+2] = (unsigned char)(sum >> 16);
+
+    if ( ++offset == 3 )
+      offset = 0;
+  }
+}
+
+
+/* the following function is used to post-process the result of the
+ * swizzling algorithm to provide a mroe pleasant output on normal
+ * (LCD and CRT) display screens.
+ *
+ * that's because the normal processing creates images that are not
+ * relevant to the display's true nature. For example, consider a 3x3
+ * white square on the original frame buffer, after simple swizzling, this
+ * will generate the following picture (represented by RGB triplets):
+ *
+ *  (255,0,0)(0,255,0)(0,0,255)
+ *  (0,255,0)(0,0,255)(255,0,0)
+ *  (0,0,255)(255,0,0)(0,255,0)
+ *
+ * the laptop's DCON chip ignores all the 0s above, and will essentially
+ * display a *bright* white square
+ *
+ * a traditional display will not, and this will result in an image that
+ * will be much darker (due to all the zeroes).
+ *
+ * moreover, on an typical LCD screen, this creates very visible
+ * black diagonals. On a CRT, somes thinner diagonals are also visible, but
+ * this is mostly due to the fact that the human eye is much more sensitive
+ * to green than red and blue
+ */
+
+
+/* in this algorithm we steal the green and blue components from each pixel's
+ * neighbours. For example, for a red pixel, we compute the average of the
+ * green pixels on its right and below it, and the average of the blue pixels
+ * on its left and above it.
+ */
+static void
+postprocess_line_rgb24( unsigned char** lines,
+                        unsigned char*  write,
+                        int             width,
+                        int             offset )
+{
+  unsigned char*  above   = lines[0] + 3;
+  unsigned char*  current = lines[1] + 3;
+  unsigned char*  below   = lines[2] + 3;
+  int             nn;
+
+  width *= 3;
+  for ( nn = 0; nn < width; nn += 3 )
+  {
+    if (offset == 0)  /* red */
+    {
+      write[nn]   = current[nn];
+      write[nn+1] = (unsigned char)((current[nn+4] + below[nn+1]) >> 1);
+      write[nn+2] = (unsigned char)((current[nn-1] + above[nn+2]) >> 1);
+      offset      = 1;
+    }
+    else if (offset == 1)  /* green */
+    {
+      write[nn]   = (unsigned char)((current[nn-3] + above[nn]) >> 1);
+      write[nn+1] = current[nn+1];
+      write[nn+2] = (unsigned char)((current[nn+5] + below[nn+2]) >> 1);
+      offset      = 2;
+    }
+    else  /* blue */
+    {
+      write[nn]   = (unsigned char)((current[nn+3] + below[nn])   >> 1);
+      write[nn+1] = (unsigned char)((current[nn-2] + above[nn+1]) >> 1);
+      write[nn+2] = current[nn+2];
+      offset      = 0;
+    }
+  }
+}
+
+
+
+void
+gr_swizzle_rgb24( unsigned char*    read_buff,
+                  int               read_pitch,
+                  unsigned char*    write_buff,
+                  int               write_pitch,
+                  int               buff_width,
+                  int               buff_height,
+                  int               x,
+                  int               y,
+                  int               width,
+                  int               height )
+{
+  unsigned char*  temp_lines;
+  unsigned char   temp_local[ 2048 ];
+  int             temp_size;
+
+  if ( height <= 0 || width <= 0 )
+    return;
+
+  if ( read_pitch < 0 )
+    read_buff -= (height-1)*read_pitch;
+
+  if ( write_pitch < 0 )
+    write_buff -= (height-1)*write_pitch;
+
+ /* we allocate a work buffer that will be used to hold three
+  * working 'lines', each of them having width+2 pixels. the first
+  * and last pixels being always 0 in each line
+  */
+  temp_size  = (width+2)*3*3;
+  if (temp_size <= sizeof(temp_local) )
+  {
+    /* try to use stack allocation, which is a lot faster than malloc */
+    temp_lines = temp_local;
+  }
+  else
+  {
+    temp_lines = (unsigned char*)malloc( temp_size );
+    if ( temp_lines == NULL )
+      return;
+  }
+
+  /* do the AA+swizzling here */
+  filter_rect_generic( read_buff, read_pitch, write_buff, write_pitch,
+                       buff_width, buff_height, x, y, width, height, 3,
+                       swizzle_line_rgb24, temp_lines );
+
+#ifdef POSTPROCESS
+  /* perform darkness 'correction' :-) */
+  filter_rect_generic( write_buff, write_pitch, write_buff, write_pitch,
+                       buff_width, buff_height, x, y, width, height, 3,
+                       postprocess_line_rgb24, temp_lines );
+#endif
+
+  /* free work buffer if needed */
+  if (temp_lines != temp_local)
+    free( temp_lines );
+}
+
+
+
+
+/************************************************************************/
+/************************************************************************/
+/*****                                                              *****/
+/*****               R G B 5 6 5  S U P P O R T                     *****/
+/*****                                                              *****/
+/************************************************************************/
+/************************************************************************/
+
+
+
+/* this function performs AA+swizzling of a given line from/to RGB565 buffers
+ */
+static void
+swizzle_line_rgb565( unsigned char** lines,
+                     unsigned char*  _write,
+                     int             width,
+                     int             offset )
+{
+  unsigned short*  above   = (unsigned short*) lines[0] + 1;
+  unsigned short*  current = (unsigned short*) lines[1] + 1;
+  unsigned short*  below   = (unsigned short*) lines[2] + 1;
+  unsigned short*  write   = (unsigned short*) _write;
+  int              nn;
+
+  static const unsigned int    masks[3] = { 0xf800, 0x07e0, 0x001f };
+
+  for (nn = 0; nn < width; nn++)
+  {
+    unsigned int   mask = masks[offset];
+#ifdef ANTIALIAS
+    unsigned int   sum;
+
+    sum  = ((unsigned int)current[nn] & mask) << 2;
+
+    sum += ((unsigned int)current[nn-1] & mask) +
+           ((unsigned int)current[nn+1] & mask) +
+           ((unsigned int)above[nn] & mask)     +
+           ((unsigned int)below[nn] & mask);
+
+    write[nn] = (unsigned short)( (sum >> 3) & mask );
+#else
+    write[nn] = (unsigned short)( current[nn] & mask );
+#endif
+
+    if (++offset == 3)
+      offset = 0;
+  }
+}
+
+
+static void
+postprocess_line_rgb565( unsigned char** lines,
+                         unsigned char*  _write,
+                         int             width,
+                         int             offset )
+{
+  unsigned short*  above   = (unsigned short*)lines[0] + 1;
+  unsigned short*  current = (unsigned short*)lines[1] + 1;
+  unsigned short*  below   = (unsigned short*)lines[2] + 1;
+  unsigned short*  write   = (unsigned short*)_write;
+  int              nn;
+
+  for ( nn = 0; nn < width; nn += 1 )
+  {
+    int  r, g, b;
+
+    if (offset == 0)  /* red */
+    {
+      r      =  (current[nn] & 0xf800);
+      g      = ((current[nn+1] & 0x07e0) + (below[nn] & 0x07e0)) >> 1;
+      b      = ((current[nn-1] & 0x001f) + (above[nn] & 0x01f)) >> 1;
+      offset = 1;
+    }
+    else if (offset == 1)  /* green */
+    {
+      r      = ((current[nn-1] & 0xf800) + (above[nn] & 0xf800)) >> 1;
+      g      =  (current[nn] & 0x07e0);
+      b      = ((current[nn+1] & 0x001f) + (below[nn] & 0x001f)) >> 1;
+      offset = 2;
+    }
+    else  /* blue */
+    {
+      r      = ((current[nn+1] & 0xf800) + (below[nn] & 0xf800)) >> 1;
+      g      = ((current[nn-1] & 0x07e0) + (above[nn] & 0x07e0)) >> 1;
+      b      =  (current[nn] & 0x001f);
+      offset = 0;
+    }
+
+    write[nn] = (unsigned short)( (r & 0xf800) | (g & 0x07e0) | (b & 0x001f) );
+  }
+}
+
+
+
+void
+gr_swizzle_rgb565( unsigned char*    read_buff,
+                  int               read_pitch,
+                  unsigned char*    write_buff,
+                  int               write_pitch,
+                   int               buff_width,
+                   int               buff_height,
+                   int               x,
+                   int               y,
+                  int               width,
+                  int               height )
+{
+  unsigned char*  temp_lines;
+  unsigned char   temp_local[ 2048 ];
+  int             temp_size;
+
+  if ( height <= 0 || width <= 0 )
+    return;
+
+  if ( read_pitch < 0 )
+    read_buff -= (height-1)*read_pitch;
+
+  if ( write_pitch < 0 )
+    write_buff -= (height-1)*write_pitch;
+
+ /* we allocate a work buffer that will be used to hold three
+  * working 'lines', each of them having width+2 pixels. the first
+  * and last pixels being always 0 in each line
+  */
+  temp_size  = (width+2)*3*2;
+  if (temp_size <= sizeof(temp_local) )
+  {
+    /* try to use stack allocation, which is a lot faster than malloc */
+    temp_lines = temp_local;
+  }
+  else
+  {
+    temp_lines = (unsigned char*)malloc( temp_size );
+    if ( temp_lines == NULL )
+      return;
+  }
+
+  /* do the AA+swizzling here */
+  filter_rect_generic( read_buff, read_pitch, write_buff, write_pitch,
+                       buff_width, buff_height, x, y, width, height, 2,
+                       swizzle_line_rgb565, temp_lines );
+
+#ifdef POSTPROCESS
+  /* perform darkness 'correction' :-) */
+  filter_rect_generic( write_buff, write_pitch, write_buff, write_pitch,
+                       buff_width, buff_height, x, y, width, height, 2,
+                       postprocess_line_rgb565, temp_lines );
+#endif
+
+  /* free work buffer if needed */
+  if (temp_lines != temp_local)
+    free( temp_lines );
+}
+
+
+
+/************************************************************************/
+/************************************************************************/
+/*****                                                              *****/
+/*****               X R G B 3 2   S U P P O R T                    *****/
+/*****                                                              *****/
+/************************************************************************/
+/************************************************************************/
+
+/* this function performs AA+swizzling of a given line from/to 32-bit ARGB or RGB
+ * buffers
+ */
+static void
+swizzle_line_xrgb32( unsigned char**  lines,
+                     unsigned char*   _write,
+                     int              width,
+                     int              offset )
+{
+  unsigned int*  above   = (unsigned int*) lines[0] + 1;
+  unsigned int*  current = (unsigned int*) lines[1] + 1;
+  unsigned int*  below   = (unsigned int*) lines[2] + 1;
+  unsigned int*  write   = (unsigned int*) _write;
+  int            nn;
+  unsigned int   mask    = (0xff0000) >> (offset*8);
+
+  for (nn = 0; nn < width; nn++)
+  {
+#ifdef ANTIALIAS
+    unsigned int   sum;
+
+    sum  = (current[nn] & mask) << 2;
+
+    sum += (current[nn-1] & mask) +
+           (current[nn+1] & mask) +
+           (above[nn]     & mask) +
+           (below[nn]     & mask);
+
+    write[nn] = (sum >> 3) & mask;  /* should we set ALPHA to 0xFF ? */
+#else
+    write[nn] = current[nn] & mask;
+#endif
+
+    mask >>= 8;
+    if (mask == 0)
+      mask = 0x00ff0000;
+  }
+}
+
+
+
+static void
+postprocess_line_xrgb32( unsigned char** lines,
+                         unsigned char*  _write,
+                         int             width,
+                         int             offset )
+{
+  unsigned int*  above   = (unsigned int*)lines[0] + 1;
+  unsigned int*  current = (unsigned int*)lines[1] + 1;
+  unsigned int*  below   = (unsigned int*)lines[2] + 1;
+  unsigned int*  write   = (unsigned int*)_write;
+  int              nn;
+
+  for ( nn = 0; nn < width; nn += 1 )
+  {
+    unsigned int  r, g, b;
+
+    if (offset == 0)  /* red */
+    {
+      r      =  (current[nn] & 0xff0000);
+      g      = ((current[nn+1] & 0x00ff00) + (below[nn] & 0x00ff00)) >> 1;
+      b      = ((current[nn-1] & 0x0000ff) + (above[nn] & 0x0000ff)) >> 1;
+      offset = 1;
+    }
+    else if (offset == 1)  /* green */
+    {
+      r      = ((current[nn-1] & 0xff0000) + (above[nn] & 0xff0000)) >> 1;
+      g      =  (current[nn] & 0x00ff00);
+      b      = ((current[nn+1] & 0x0000ff) + (below[nn] & 0x0000ff)) >> 1;
+      offset = 2;
+    }
+    else  /* blue */
+    {
+      r      = ((current[nn+1] & 0xff0000) + (below[nn] & 0xff0000)) >> 1;
+      g      = ((current[nn-1] & 0x00ff00) + (above[nn] & 0x00ff00)) >> 1;
+      b      =  (current[nn] & 0x0000ff);
+      offset = 0;
+    }
+
+    /* should we set alpha to 0xff ? */
+    write[nn] = (r & 0xff0000) | (g & 0x00ff00) | (b & 0x0000ff);
+  }
+}
+
+
+
+void
+gr_swizzle_xrgb32( unsigned char*    read_buff,
+                       int               read_pitch,
+                       unsigned char*    write_buff,
+                       int               write_pitch,
+                       int               buff_width,
+                       int               buff_height,
+                       int               x,
+                       int               y,
+                       int               width,
+                       int               height )
+{
+  unsigned char*  temp_lines;
+  unsigned char   temp_local[ 2048 ];
+  int             temp_size;
+
+  if ( height <= 0 || width <= 0 )
+    return;
+
+  if ( read_pitch < 0 )
+    read_buff -= (height-1)*read_pitch;
+
+  if ( write_pitch < 0 )
+    write_buff -= (height-1)*write_pitch;
+
+ /* we allocate a work buffer that will be used to hold three
+  * working 'lines', each of them having width+2 pixels. the first
+  * and last pixels being always 0 in each line
+  */
+  temp_size  = (width+2)*3*4;
+  if (temp_size <= sizeof(temp_local) )
+  {
+    /* try to use stack allocation, which is a lot faster than malloc */
+    temp_lines = temp_local;
+  }
+  else
+  {
+    temp_lines = (unsigned char*)malloc( temp_size );
+    if ( temp_lines == NULL )
+      return;
+  }
+
+  /* do the AA+swizzling here */
+  filter_rect_generic( read_buff, read_pitch, write_buff, write_pitch,
+                       buff_width, buff_height, x, y, width, height, 4,
+                       swizzle_line_xrgb32, temp_lines );
+
+#ifdef POSTPROCESS
+  /* perform darkness 'correction' :-) */
+  filter_rect_generic( write_buff, write_pitch, write_buff, write_pitch,
+                       buff_width, buff_height, x, y, width, height, 4,
+                       postprocess_line_xrgb32, temp_lines );
+#endif
+
+  /* free work buffer if needed */
+  if (temp_lines != temp_local)
+    free( temp_lines );
+}
diff -Nur xephyr.orig/hw/kdrive/ephyr/swizzle.h xephyr/hw/kdrive/ephyr/swizzle.h
--- xephyr.orig/hw/kdrive/ephyr/swizzle.h	1970-01-01 01:00:00.000000000 +0100
+++ xephyr/hw/kdrive/ephyr/swizzle.h	2006-08-18 17:47:42.000000000 +0200
@@ -0,0 +1,62 @@
+/*  Authors:  David Turner  <david@freetype.org>
+ *            Manu Cornet   <manu@manucornet.net>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+ * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 
+ *
+ */
+
+#include <stdlib.h>
+#include <memory.h>
+
+void
+gr_swizzle_rgb24 (unsigned char*    read_buff,
+                  int               read_pitch,
+                  unsigned char*    write_buff,
+                  int               write_pitch,
+                  int               buff_width,
+                  int               buff_height,
+                  int               x,
+                  int               y,
+                  int               width,
+                  int               height);
+
+void
+gr_swizzle_rgb565 (unsigned char*    read_buff,
+		   int               read_pitch,
+		   unsigned char*    write_buff,
+		   int               write_pitch,
+                   int               buff_width,
+                   int               buff_height,
+                   int               x,
+                   int               y,
+		   int               width,
+		   int               height);
+
+void
+gr_swizzle_xrgb32 (unsigned char*    read_buff,
+		   int               read_pitch,
+		   unsigned char*    write_buff,
+		   int               write_pitch,
+		   int               buff_width,
+		   int               buff_height,
+		   int               x,
+		   int               y,
+		   int               width,
+		   int               height);
