;
; See the comments on header files in ssdaxpy.asm.
;
include model.h
INCLUDE PROLOGUE.H
	public dnrm2
; 8087 dot product
; dnrm2 ()
;
.8087
.286C
dnrm2	 proc	 near
	push	bp
	mov	bp,sp
; Current syntax dnrm2(&x,n,&nrm,stride) -- > l2 norm of x stored in h
; &x in s , n in cx , &nrm in di
;
;	This is equivalent to the following C function.
;	dnrm2(xptr,n,normptr,stride)
;	double *xptr,*normptr;
;	int n,stride;
;	{
;		int i;
;		*xptr=0.0;
;		for(i=0;i<n;i++)
;			  *normptr+=(*xptr)*(*xptr);
;			  xptr+=stride;
;		}
;	}
;
;	The stride is the distance between consecutive elements of x in bytes.
;
;	For most computers you must be carefull to avoid overflow and underflow
;	in this calculation. As the 8087 registers have extra width in the
;	exponent field you can relax if you do it this way.
;
	push di
	push si
	push cx
	push ax
	mov si,4[bp]   ;vector
	mov cx,6[bp]   ;vector length
	mov ax,10[bp]  ;stride
	finit
	fldz	       ;begin with zero
done:
	fld qword ptr [si]    ;put two copies of an element of x on the stack
	fld st(0)
	fmul		      ;multiply, don't pop the stack
	add si,ax	      ;increment the pointer to the vector
	fadd		      ;accumulate the norm
	loop done
	fsqrt
	fwait		      ;wait for the square root to finish
	mov di,8[bp]	      ;get the address of the norm
	fstp qword ptr [di]   ;and store
	pop ax
	pop cx
	pop si
	pop di
	pop bp
	ret
dnrm2	 endp
INCLUDE EPILOGUE.H
end
