/* strcat(dest, src) -- Append SRC on the end of DEST.
   For Intel 80x86, x>=3.
   Copyright (C) 1994, 1995 Free Software Foundation, Inc.
   Contributed by Ulrich Drepper <drepper@ipd.info.uni-karlsruhe.de>.

The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Library General Public License as
published by the Free Software Foundation; either version 2 of the
License, or (at your option) any later version.

The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
Library General Public License for more details.

You should have received a copy of the GNU Library General Public
License along with the GNU C Library; see the file COPYING.LIB.  If
not, write to the Free Software Foundation, Inc., 675 Mass Ave,
Cambridge, MA 02139, USA.  */

#include <ansidecl.h>
#include <string.h>

#include "asm-ops.h"

char *
strcat(char * dest, const char *src)
{
register char * __res;

/*
 * We have to use two different versions, one for i386 and for the rest
 * because the difference is so big.  Be aware when the name of the i586
 * (aka Pentium) in the gcc changes (I assume __i586__) or the next
 * generation (i686) is available.
 */
#if !defined(__i486__) && !defined(__i586__)

__asm__(
        "cld\n\t"
	"repne\n\t"
	"scasb\n\t"
	"decl %0\n\t"

	"subl %0,%3\n\t"
	
	"testl $0x3,%0\n\t"
	"jz " LF(1) "\n\t"
	"movb (%3,%0),%%al\n\t"
	"movb %%al,(%0)\n\t"
	"orb %%al,%%al\n\t"
	"jz " LF(8) "\n\t"
	"incl %0\n\t"
	
	"testl $0x3,%0\n\t"
	"jz " LF(1) "\n\t"
	"movb (%3,%0),%%al\n\t"
	"movb %%al,(%0)\n\t"
	"orb %%al,%%al\n\t"
	"jz " LF(8) "\n\t"
	"incl %0\n\t"
	
	"testl $0x3,%0\n\t"
	"jz " LF(1) "\n\t"
	"movb (%3,%0),%%al\n\t"
	"movb %%al,(%0)\n\t"
	"orb %%al,%%al\n\t"
	"jz " LF(8) "\n\t"
	"incl %0\n"

LL(1)	"\tmovl (%3,%0),%%eax\n\t"
	"movl %%eax,%%ecx\n\t"
	"addl $0xfefefeff,%%ecx\n\t"
	"jnc " LF(9) "\n\t"
	"xorl %%eax,%%ecx\n\t"
	"notl %%ecx\n\t"
	"andl $0x01010100,%%ecx\n\t"
	"jnz " LF(9) "\n\t"
	"movl %%eax,(%0)\n\t"

	"movl 4(%3,%0),%%eax\n\t"
	"movl %%eax,%%ecx\n\t"
	"addl $0xfefefeff,%%ecx\n\t"
	"jnc " LF(91) "\n\t"
	"xorl %%eax,%%ecx\n\t"
	"notl %%ecx\n\t"
	"andl $0x01010100,%%ecx\n\t"
	"jnz " LF(91) "\n\t"
	"movl %%eax,4(%0)\n\t"

	"movl 8(%3,%0),%%eax\n\t"
	"movl %%eax,%%ecx\n\t"
	"addl $0xfefefeff,%%ecx\n\t"
	"jnc " LF(92) "\n\t"
	"xorl %%eax,%%ecx\n\t"
	"notl %%ecx\n\t"
	"andl $0x01010100,%%ecx\n\t"
	"jnz " LF(92) "\n\t"
	"movl %%eax,8(%0)\n\t"

	"movl 12(%3,%0),%%eax\n\t"
	"movl %%eax,%%ecx\n\t"
	"addl $0xfefefeff,%%ecx\n\t"
	"jnc " LF(93) "\n\t"
	"xorl %%eax,%%ecx\n\t"
	"notl %%ecx\n\t"
	"andl $0x01010100,%%ecx\n\t"
	"jnz " LF(93) "\n\t"
	"movl %%eax,12(%0)\n\t"
	"addl $16,%0\n\t"
	"jmp " LB(1) "\n"

LL(93)	"\taddl $4,%0\n"
LL(92)	"\taddl $4,%0\n"
LL(91)	"\taddl $4,%0\n"

LL(9)	"\tmovb %%al,(%0)\n\t"
	"orb %%al,%%al\n\t"
	"jz " LF(8) "\n\t"
	"movb %%ah,1(%0)\n\t"
	"orb %%ah,%%ah\n\t"
	"jz " LF(8) "\n\t"
	"shrl $16,%%eax\n\t"
	"movb %%al,2(%0)\n\t"
	"orb %%al,%%al\n\t"
	"jz " LF(8) "\n\t"
	"movb %%ah,3(%0)\n"
LL(8)
				
	: :"D" (dest),"a" (0),"c" (0xffffffff), "d" (src):"cx","dx","di");
#else
__asm__(
	"testb $0xff,(%1)\n\t"
	"jz " LF(8) "\n\t"
	
        /* test the first bytes seperately until aligned */
	"testb $3,%0\n\t"
	"jz " LF(1) "\n\t"
	"testb $0xff,(%0)\n\t"	/* was last character ? */
	"jz " LF(2) "\n\t"	/* yes -> branch */
	"incl %0\n\t"
	
	"testb $3,%0\n\t"
	"jz " LF(1) "\n\t"
	"testb $0xff,(%0)\n\t"
	"jz " LF(2) "\n\t"
	"incl %0\n\t"
	
	"testb $3,%0\n\t"
	"jz " LF(1) "\n\t"
	"testl $0xff,(%0)\n\t"
	"jz " LF(2) "\n\t"
	"incl %0\n"

	/* If you cannot guess what this is for look through the resulting
	 * code.  The dumb version has an .align at the end of the conditional
	 * region.  This is quite long.  If we could make the jump to the
	 * label '1' behind the NOPs we could save the time in 75% of the
	 * cases without any costs.  Exactly this is done here. If anything
	 * in the prepending code changes the number of NOPs may have to
	 * change, too.  */

#if defined(I_DONT_KNOW_WHAT_THIS_MEANS)
LL(1)	"\tsubl $16,%0\n\t"

	ALIGN "\n"
#else
	"\tnop; nop; nop; nop; nop\n\t"
	"nop; nop; nop; nop; nop; nop\n"
LL(1)   "\tsubl $16,%0\n"
#endif

LL(4)	"\taddl $16,%0\n\t"

	"movl (%0),%%eax\n\t"
	"movl %%eax,%%ecx\n\t"
	"addl $0xfefefeff,%%ecx\n\t"
	"jnc " LF(3) "\n\t"
	"xorl %%eax,%%ecx\n\t"
	"notl %%ecx\n\t"
	"andl $0x01010100,%%ecx\n\t"
	"jnz " LF(3) "\n\t"
	
	"movl 4(%0),%%eax\n\t"
	"movl %%eax,%%ecx\n\t"
	"addl $0xfefefeff,%%ecx\n\t"
	"jnc " LF(5) "\n\t"
	"xorl %%eax,%%ecx\n\t"
	"notl %%ecx\n\t"
	"andl $0x01010100,%%ecx\n\t"
	"jnz " LF(5) "\n\t"
	
	"movl 8(%0),%%eax\n\t"
	"movl %%eax,%%ecx\n\t"
	"addl $0xfefefeff,%%ecx\n\t"
	"jnc " LF(6) "\n\t"
	"xorl %%eax,%%ecx\n\t"
	"notl %%ecx\n\t"
	"andl $0x01010100,%%ecx\n\t"
	"jnz " LF(6) "\n\t"
	
	"movl 12(%0),%%eax\n\t"
	"movl %%eax,%%ecx\n\t"
	"addl $0xfefefeff,%%ecx\n\t"
	"jnc " LF(7) "\n\t"
	"xorl %%eax,%%ecx\n\t"
	"notl %%ecx\n\t"
	"andl $0x01010100,%%ecx\n\t"
	"jz " LB(4) "\n"

LL(7)	"\taddl $4,%0\n"
LL(6)	"\taddl $4,%0\n"
LL(5)	"\taddl $4,%0\n"

LL(3)
	: :"d" (dest),"c" (src):"ax","dx","cx");

__asm__(
	"testb $0xff,%%al\n\t"
	"jz " LF(2) "\n\t"
	"incl %%edx\n\t"
	"testb $0xff,%%ah\n\t"
	"jz " LF(2) "\n\t"
	"incl %%edx\n\t"
        "shrl $8,%%eax\n\t"
	"testb $0xff,%%ah\n\t"
	"jz " LF(2) "\n\t"
	"incl %%edx\n"

LL(2)	"\tsubl %0,%%edx\n\t"

	"testb $3,%0\n\t"
	"jz " LF(29) "\n\t"
	"movb (%0),%%al\n\t"
	"movb %%al,(%%edx,%0)\n\t"
	"testb $0xff,%%al\n\t"
	"jz " LF(8) "\n\t"
	"incl %0\n\t"

	"testb $3,%0\n\t"
	"jz " LF(29) "\n\t"
	"movb (%0),%%al\n\t"
	"movb %%al,(%%edx,%0)\n\t"
	"testb $0xff,%%al\n\t"
	"jz " LF(8) "\n\t"
	"incl %0\n\t"

	"testb $3,%0\n\t"
	"jz " LF(29) "\n\t"
	"movb (%0),%%al\n\t"
	"movb %%al,(%%edx,%0)\n\t"
	"testb $0xff,%%al\n\t"
	"jz " LF(8) "\n\t"
	"incl %0\n\t"

LL(29)	"\tmovl (%0),%%eax\n\t"
	"movl %%eax,%%edi\n\t"
	"addl $0xfefefeff,%%edi\n\t"
	"jnc " LF(9) "\n\t"
	"xorl %%eax,%%edi\n\t"
	"notl %%edi\n\t"
	"andl $0x01010100,%%edi\n\t"
	"jnz " LF(9) "\n\t"
	"movl %%eax,(%%edx,%0)\n\t"

	"movl 4(%0),%%eax\n\t"
	"movl %%eax,%%edi\n\t"
	"addl $0xfefefeff,%%edi\n\t"
	"jnc " LF(91) "\n\t"
	"xorl %%eax,%%edi\n\t"
	"notl %%edi\n\t"
	"andl $0x01010100,%%edi\n\t"
	"jnz " LF(91) "\n\t"
	"movl %%eax,4(%%edx,%0)\n\t"

	"movl 8(%0),%%eax\n\t"
	"movl %%eax,%%edi\n\t"
	"addl $0xfefefeff,%%edi\n\t"
	"jnc " LF(92) "\n\t"
	"xorl %%eax,%%edi\n\t"
	"notl %%edi\n\t"
	"andl $0x01010100,%%edi\n\t"
	"jnz " LF(92) "\n\t"
	"movl %%eax,8(%%edx,%0)\n\t"

	"movl 12(%0),%%eax\n\t"
	"movl %%eax,%%edi\n\t"
	"addl $0xfefefeff,%%edi\n\t"
	"jnc " LF(93) "\n\t"
 	"xorl %%eax,%%edi\n\t"
	"notl %%edi\n\t"
	"andl $0x01010100,%%edi\n\t"
	"jnz " LF(93) "\n\t"
	"movl %%eax,12(%%edx,%0)\n\t"
	"addl $16,%0\n\t"
	"jmp " LB(29) "\n"

LL(93)	"\taddl $4,%0\n"
LL(92)	"\taddl $4,%0\n"
LL(91)	"\taddl $4,%0\n"

LL(9)	"\tmovb %%al,(%%edx,%0)\n\t"
	"orb %%al,%%al\n\t"
	"jz " LF(8) "\n\t"
	"movb %%ah,1(%%edx,%0)\n\t"
	"orb %%ah,%%ah\n\t"
	"jz " LF(8) "\n\t"
	"shrl $16,%%eax\n\t"
	"movb %%al,2(%%edx,%0)\n\t"
	"orb %%al,%%al\n\t"
	"jz " LF(8) "\n\t"
	"movb %%ah,3(%%edx,%0)\n"
	
LL(8)
        : :"c" (src):"dx","ax","di");
#endif

	/*
	 * This is just a trick to convince GCC that DEST has not
	 * changed and so no saving is needed.
	 */
__asm__("":"=a" (__res) :"0" (dest));

return __res;
}
