/*
 * 5799-WZQ (C) COPYRIGHT IBM CORPORATION 1986,1988
 * LICENSED MATERIALS - PROPERTY OF IBM
 * REFER TO COPYRIGHT INSTRUCTIONS FORM NUMBER G120-2083
 */
/* $Header:strcat.s 12.0$ */
/* $ACIS:strcat.s 12.0$ */
/* $Source: /ibm/acis/usr/src/lib/libc/ca/gen/RCS/strcat.s,v $ */

	.data
rcsid:	.asciz	"$Header:strcat.s 12.0$"
	.text

#include "LINKG.h"

 #
 # version of strcat written in assembler for better performance
 #	strcat(s1,s2) char *s1, *s2;
 #
ENTRY(strcat)
	put	r6,-4(sp)	# save r6, going to need extra register.
	mr	r0,r2		# save starting location of r2
	nilz	r5,r2,0x03	# round string addr down to word boundary
	s	r2,r5		# more of the rounding
	sli	r5,3		# calc offset into following switch statement
	get	r4,$0f		# get address of beginning of switch statement
	a	r4,r5		# add displacement of the case
	brx	r4		# Branch to calculated case and
	ls	r4,0(r2)	# get first word (possibly rounded down).
 # Find the null byte in s1. At this point:
 # r0 contains the base of s1.
 # r2 points to string or the nearest word boundary before string.
 # r4 contains 4 bytes of which one of these being the 0th of s1.
 # The following is a switch statement. Initially, control jumps to one of
 # the labels 0, 1, 2 or 3 depending on where string's first byte is in r2
 # 
9:	inc	r2,4		# point to next word
 # code at 0f, 1f, 2f must be exactly 8 bytes long to match switch above
 # The switch:
0:
	niuz	r5,r4,0xff00	# get byte 0 from r4
	beq	0f		# branch if byte 0 is null

1:
	niuz	r5,r4,0x00ff	# get byte 1 from r4
	beq	1f		# branch if byte 1 is null

2:
	nilz	r5,r4,0xff00	# get byte 2 from r4
	beq	2f		# branch if byte 2 is null

3:
	nilz	r5,r4,0x00ff	# get byte 3 from r4
	jeq	3f
	ls	r4,4(r2)	# pick up next word
	b	9b		# null not found in byte 3, try next word
 # Come to one of labels 0, 1, 2 or 3 when a null byte was found and
 # adjust r2 to point to the zero byte
3:	inc	r2,1
2:	inc	r2,1
1:	inc	r2,1
0:
 # second part: do strcpy.
 # At this point: r2 is pointing at the null byte of s1.
 # start copy byte at a time until we are sure that r3 (s2) is
 # word aligned so we can later do word loads.
 # 
	nilz	r5,r3,0x03	# round string addr down to word boundary
	s	r3,r5		# more of the rounding
	sli	r5,4		# calc offset into following switch statement
	get	r4,$0f		# get address of beginning of switch statement
	a	r4,r5		# add displacement of the case
	brx	r4		# branch to calculated case and
	ls	r4,0(r3)	# get first word (possibly rounded down).
 # At this point:
 # r0 is the base of the string for calculating length just before return
 # r3 points to string or the nearest word boundary before string
 # if string is not on a word boundary.
 # r4 contains 4 bytes as pointed to by r3.
 # The following is a switch statement. Initially, control jumps to one of
 # the labels 0, 1, 2 or 3 depending on where string's first byte is in r3.
 # 
9:	inc	r3,4		# point to next word
 # code at 0f, 1f, 2f must be exactly 16 bytes long to match switch above
 # The switch:
0:
	niuz	r5,r4,0xff00	# get byte 0 from r4		(4)
	beq	Ldone		# branch if byte 0 is null	(4)
	sri16	r5,8		#				(2)
	stc	r5,0(r2)	#				(4)
	inc	r2,1		#				(2)

1:
	niuz	r5,r4,0x00ff	# get byte 1 from r4
	beq	Ldone		# branch if byte 1 is null
	sri16	r5,0
	stc	r5,0(r2)
	inc	r2,1

2:
	nilz	r5,r4,0xff00	# get byte 2 from r4
	beq	Ldone		# branch if byte 2 is null
	sri	r5,8
	stc	r5,0(r2)
	inc	r2,1

3:
	nilz	r5,r4,0x00ff	# get byte 3 from r4
	jeq	Ldone
	stcs	r5,0(r2)
	inc	r2,1

 # At this point r3 is word aligned on s2.
 # Determine alignment of r2: branch to one of two algorithms
 # depending on whether r2 is word, half-word or odd aligned.
 # If 1/2 word aligned, to 1/2 word puts into s1.
 # if odd, do 1 byte-1/2 word-1 byte puts.
	inc	r3,4
	nilz	r5,r2,0x01	# high bit on?
	bnex	Lodd		# Branch to odd/even and
	ls	r4,0(r3)	# get next word via r3.
Leven:
	niuz	r5,r4,0xff00	# get byte 0 from r4
	jeq	0f
	niuz	r6,r4,0x00ff	# get byte 1 from r4
	jeq	1f
	o	r5,r6
	sri16	r5,0
	putha	r5,0(r2)

	nilz	r5,r4,0xff00	# get byte 2 from r4
	jeq	2f		# branch if byte 2 is null
	nilz	r6,r4,0x00ff	# get byte 3 from r4
	jeq	3f
	o	r5,r6
	putha	r5,2(r2)
	inc	r2,4
	inc	r3,4
	bx	Leven		# null not found in byte 3, try next word and
	 ls	r4,0(r3)	# get next s2 word.

1:
	sri16	r5,8
	stcs	r5,0(r2)
	get	r5,$0
	stcs	r5,1(r2)	# null terminate
	mr	r2,r0		# return base of s1
	brx	r15		# return and
	get	r6,-4(sp)	# restore r6
	
3:	sri	r5,8		# r5(XXXc) <- r5(XXcX)
	stcs	r5,2(r2)
	get	r5,$0
	stcs	r5,3(r2)
	mr	r2,r0		# return base of s1.
	brx	r15
	get	r6,-4(sp)

2:	inc	r2,2
0:	get	r5,$0
	stcs	r5,0(r2)
	mr	r2,r0		# Return base of s1.
	brx	r15		# return and
	get	r6,-4(sp)	# restore r6

Lodd:
	niuz	r5,r4,0xff00	# get byte 0 from r4
	sri16	r5,8
	stcs	r5,0(r2)
	jeq	Lreturn		# null byte in place, return
	
	niuz	r5,r4,0x00ff
	jeq	1f
	sri	r5,8
	nilz	r6,r4,0xff00
	mc32	r5,r6
	inc	r2,1		# cannot putha at 1(rN)! */
	putha	r5,0(r2)
	jeq	Lreturn

	nilz	r5,r4,0x00ff
	stcs	r5,2(r2)
	jeq	Lreturn
	inc	r2,3		# s1 += 3 (already up 1 from prev inc)
	inc	r3,4		# s2 += 4
	bx	Lodd
	get	r4,0(r3)

1:	inc	r2,1
Ldone:				# add null terminator and return
	get	r5,$0
	stcs	r5,0(r2)
Lreturn:			# null terminator in place, return
	mr	r2,r0		# Return base of s1.
	brx	r15
	get	r6,-4(sp)	# Restore r6


	TTNOFRM
