/*
 * 5799-WZQ (C) COPYRIGHT IBM CORPORATION 1986
 * LICENSED MATERIALS - PROPERTY OF IBM
 * REFER TO COPYRIGHT INSTRUCTIONS FORM NUMBER G120-2083
 */
/* $Header:umf2x.s 12.0$ */
/* $ACIS:umf2x.s 12.0$ */
/* $Source: /ibm/acis/usr/src/lib/libc/ca/gen/RCS/umf2x.s,v $ */

#include "LINKG.h"

	.data
rcsid:	.asciz	"$Header:umf2x.s 12.0$"
	.text


 #       Multiply of two unsigned 64-bit fractions
 #       int _umf2x2(opa,opb)
 #			opa = pointer to multiplicand (and product)
 #			opb = pointer to multiplier
 #		     result = amount of postnormalizing shift (1 or 0 only)
 #
 #			We form the sum of these products:
 #
 #					       |----|  a2 x b2
 #					|----| |----|  a1 x b2
 #				 |----| |----|         a1 x b1
 #					|----| |----|  a2 x b1
 #				---------------------
 #		and return only  |----| |----|
 #
 
 
.set mq      ,       r10                #multiplier quotient system control reg
 
ENTRY(_umf2x2)
	stm	r5,REG_OFFSET+20(sp)

	ls	r14,0(r3)
	ls	r6,4(r3)		#multiplier (b1,b2) loaded
	ls	r10,4(r2)		#multiplicand a2 loaded

	mr	r5,r10
	balix	r15,mu			#multiply a2 x b2
	mts	%mq,r10
	mr	r13,r4			#high word of a2 x b2 -- ignore low

	ls	r5,0(r2)		#get a1
	balix	r15,mu			#multiply a1 x b2
	mts	%mq,r5
	mfs	%mq,r15
	s	r12,r12			#add product to r12,r13
	a	r13,r15
	ae	r12,r4			#no carry possible to r11 

	mr	r6,r14
	balix	r15,mu			#multiply a1 x b1
	mts	%mq,r5
	mfs	%mq,r15			
 	a	r12,r15
	aei	r11,r4,0		#combine products

	mr	r5,r10
	balix	r15,mu			#multiply a2 x b1
	mts	%mq,r5
	mfs	%mq,r15
	a	r13,r15
	ae	r12,r4			#combine with previous,
	aei	r11,r11,0		#propagating carries.

 #	Reentry from _umf2x1 to postnormalize, round, and return.
 #			r11,r12,r13 = product so far
 #			condition code reflects state of r11
 #

lpn:	lis	r0,0			#now handle postnormalizing --
	jm	l1			#already normalized
	dec	r0,1
	a	r13,r13
	ae	r12,r12
	ae	r11,r11
l1:	a	r13,r13			#round up to 2-word result
	aei	r12,r12,0
	aei	r11,r11,0
	jnc0	l2			#it just might overflow --
	lis	r0,0			#it did.  Back off the normalizing,
	lis	r12,0			#recreate the product.
	cau	r11,0-0x8000(r0)
l2:	sts	r11,0(r2)		#store back over first operand
	sts	r12,4(r2)
	lm	r5,REG_OFFSET+20(sp)	#and return.
	brx	r15
	mr	r2,r0


 #       Multiply of unsigned 64-bit fraction by unsigned 32-bit fraction
 #
 #       int _umf2x1(opa,opb)
 #			opa = pointer to multiplicand (and product)
 #			opb = pointer to 1-word multiplier
 #		     result = amount of postnormalizing shift (1 or 0 only)
 #
 #			We form the sum of these products:
 #
 #					|----| |----|  a1 x b
 #				 |----| |----|         a1 x b
 #				---------------------
 #		and return only  |----| |----|
 #
 #		Max error is 0.5 ulp.

ENTRY(_umf2x1)
	stm	r5,REG_OFFSET+20(sp)

	ls	r6,0(r3)		#multiplier b loaded
	ls	r5,4(r2)		#multiplicand a2 loaded

	balix	r15,mu			#multiply a2 x b
	mts	%mq,r5
	mfs	%mq,r13			#product to r12, r13
	mr	r12,r4

	ls	r5,0(r2)		#multiplicand a1 loaded
	balix	r15,mu			#multiply a1 x b
	mts	%mq,r5
	mfs	%mq,r15			
 	a	r12,r15
	bx	lpn
	aei	r11,r4,0		#combine products
					#and join common postnormalization.
					#NOTE later use of condition code.

 #       Multiply of unsigned 32-bit fraction by unsigned 32-bit fraction
 #
 #       int _umf1x1(opa,opb)
 #			opa = pointer to multiplicand (and product)
 #			opb = pointer to 1-word multiplier
 #		     result = amount of postnormalizing shift (1 or 0 only)
 #
 #			We form the simple product
 #
 #				 |----| |----|         a x b
 #				---------------------
 #		and return only  |----| 
 #
 #		Max error is 0.5 ulp.

ENTRY(_umf1x1)
	stm	r5,REG_OFFSET+20(sp)

	ls	r6,0(r3)		#multiplier b loaded
	ls	r5,0(r2)		#multiplicand a loaded

	balix	r15,mu			#multiply a x b
	mts	%mq,r5
	mfs	%mq,r13			#product to r12, r13
	ai	r12,r4,0

	lis	r0,0
	jm	s1			#normalize if 'positive',
	dec	r0,1			#meaning sign bit unoccupied.
	a	r13,r13
	ae	r12,r12
s1:	a	r13,r13			#round up to 1-word result
	aei	r12,r12,0		#it just might overflow --
	jnc0	s2
	lis	r0,0			#it did.  Back off the normalizing,
	cau	r12,0-0x8000(r0)	#recreate the product.
s2:	sts	r12,0(r2)		#store back over first operand
	lm	r5,REG_OFFSET+20(sp)
	brx	r15			#and return.
	mr	r2,r0

mu:					#unsigned-multiply subroutine
	s	r4,r4			#of r5 by r6 producing r4,mq.
	m	r4,r6			#on entry, mq already = r5.
	m	r4,r6
	m	r4,r6
	m	r4,r6
	m	r4,r6
	m	r4,r6
	m	r4,r6
	m	r4,r6
	m	r4,r6
	m	r4,r6
	m	r4,r6
	m	r4,r6
	m	r4,r6
	m	r4,r6
	m	r4,r6			
	m	r4,r6
	jc0	mu2		
	a	r4,r6			#add r6 in again if r5 >= 2^31	
mu2:	cis	r6,0
	bnmr	r15
	brx	r15			#likewise add in r5 if r6 >= 2^31
	a	r4,r5
 

 #       Multiply and normalize for ieee double fractions
 #
 #       int FPdMULT(val1, val1a, val2, val2a, aprod)
 #			val1  =  multiplicand first word
 #			val1a =  multiplicand second word
 #			val2  =  multiplier first word
 #			val2a =  multiplier second word
 #			aprod =  pointer to 4-elt product array
 #		     result = -shiftcount to normalize a 1 into aprod bit 0,
 #			or -10000 if all 0 (should not occur)
 #
 #			We form the sum of these products:
 #
 #					       |----| |----|  a2 x b2
 #					|----| |----|  a1 x b2
 #				 |----| |----|         a1 x b1
 #					|----| |----|  a2 x b1
 #				---------------------
 #		      and return |----| |----| |----| |----| normalized.
 #
 #				  r11	 r12	r13    r14
 
ENTRY(FPdMULT)
	stm	r5,REG_OFFSET+20(sp)

 #		juggle registers:  val1=r2  val1a=r3  val2=r4  val2a=r5
 #		to		   a1 = r2  a2 = r10  b1 = r3  b2 = r6


	mr	r10,r3			#multiplicand (a1,a2) loaded
	mr	r3,r4
	mr	r6,r5			#multiplier (b1,b2) loaded

	mr	r5,r10
	balix	r15,mu			#multiply a2 x b2
	mts	%mq,r10
	mfs	%mq,r14			#low word of a2 x b2
	mr	r13,r4			#high word of a2 x b2

	mr	r5,r2			#get a1
	balix	r15,mu			#multiply a1 x b2
	mts	%mq,r5
	mfs	%mq,r15
	a	r13,r15			#add product to r12,r13
	aei	r12,r4,0		#no carry possible to r11 

	mr	r6,r3
	balix	r15,mu			#multiply a1 x b1
	mts	%mq,r5
	mfs	%mq,r15			
 	a	r12,r15
	aei	r11,r4,0		#combine products

	mr	r5,r10
	balix	r15,mu			#multiply a2 x b1
	mts	%mq,r5
	mfs	%mq,r15
	a	r13,r15
	ae	r12,r4			#combine with previous,
	aei	r11,r11,0		#propagating carries.

 #	Postnormalize

	lis	r0,0
	srpi16	r11,28-16		#Special-case top 4 bits zero --
	jnz	1f
	sli	r11,4			#it's true in all known cases.
	mr	r15,r12
	sri16	r12,28-16
	o	r11,r12
	sli	r15,4
	srpi16	r13,28-16
	o	r12,r15
	sli	r13,4
	srpi16	r14,28-16
	o	r13,r15
	sli	r14,4
	sis	r0,4
1:	ais	r11,0			#already normalized ?
	jm	3f			#yes, skip 1-bit loop	

2:	ci	r0,-10000		#dFPMULT logic says this can't
	jnh	3f			#happen, so let it happen slooow.
	a	r14,r14
	ae	r13,r13
	ae	r12,r12
	ae	r11,r11
	bnmx	2b
	dec	r0,1
3:
	l	r2,ARG5_OFFSET(sp)	#address of result array
	sts	r11,0(r2)		
	sts	r12,4(r2)		#store all four words,
	sts	r13,8(r2)
	sts	r14,12(r2)
	lm	r5,REG_OFFSET+20(sp)	#and return.
	brx	r15
	cas	r2,r0,r0		# move results to r2

	TTNOFRM
