1636
ASM / Arbitrary Precision Multiplication (z80)
« on: September 17, 2012, 09:22:38 am »
In case anybody else wanted some arbitrary precision multiplication, here is a routine 
)

Code: [Select]
;===============================================================
AP_Mul_AP:
;===============================================================
;Inputs:
; HL points to the first number (little-endian, size prefix 2 bytes)
; DE points to the second number (little-endian, size prefix 2 bytes)
; BC points to the output location
;Output:
; The RAM at HL contains the product (little-endian with
; size prefix). Size is adjusted so there won't be zeroes
; at the end.
;Notes:
; This will erase bytes of RAM at (HL) equal to the
; size of the first number plus the size of the second.
; So if you use saveSScreen:
; First number uses 2+M bytes
; Second number uses 2+N bytes
; Output number uses 2+M+N bytes
; At most, 2+N+2+M+2+M+N=768.
; 2+N+2+M+2+M+N = 768
; 6+2M+2N = 768
; 2(M+N) = 762
; M+N = 381
; In otherwords, you can multiply two, 190 byte numbers. In
; Decimal, this comes out to roughly two 457 digit numbers
; to get up to 915 digit number. It requires 14952 bytes of
; RAM to use two numbers >9000 digits
;===============================================================
di
;First, we get the size
ld (output),bc ;4 bytes, save the output location
ld c,(hl)
inc hl
ld b,(hl)
inc hl
ld (Num1Loc),hl ;3 bytes, save the location of the first number
ld (Num1Size),bc ;4 bytes, save the size
ex de,hl
ld e,(hl)
inc hl
ld d,(hl)
add hl,de
ld (Num2Loc),hl ;3 bytes, save the location
ld (Num2Size),de ;4 bytes, save the size
ex de,hl
add hl,bc ;size of the output RAM
;Now we clear out the bytes for the output
ld b,h
ld c,l
ld hl,(output)
ld (hl),c
inc hl
ld (hl),b
inc hl
ld (output),hl
ld (Num3Size),bc
xor a
ld (hl),a
cpi
jp pe,$-3
;Now we start multiplying stuff
;We need to do:
; hl=(Num2Loc)
; de=(Num1Loc)
; ix=(output)
; bc=(Num2Size)
;(hl)_Times_(de)_To_(ix)
exx
ld bc,(Num3Size)
exx
ld bc,(Num2Size)
; ld hl,(Num2Loc)
ex de,hl
Loop1:
push bc
ld b,8
Loop2:
;===============================================================
; rl (output)
;===============================================================
exx
ld hl,(output)
ld d,b
ld e,c
dec de
inc d
inc e
or a
rl (hl)
inc hl
dec e
jr nz,$-4
dec d
jr nz,$-7
exx
;===============================================================
rlc (hl)
jr nc,NoAdd
;add (output),(Num1Loc)
exx
ld de,(Num1Loc)
push bc
ld bc,(Num1Size)
dec bc
inc c
inc b
ld hl,(output)
or a
AddLoop:
ld a,(de)
adc a,(hl)
ld (hl),a
inc hl
inc de
dec c
jr nz,AddLoop
dec b
jr nz,AddLoop
jr nc,$+3
inc (hl)
pop bc
exx
NoAdd:
djnz Loop2
pop bc
cpd
jp pe,Loop1
ld hl,(output)
ld bc,(Num3Size)
add hl,bc
dec hl
inc bc
xor a
cpd
jp po,$+5
jr z,$-5
ld hl,(output)
dec hl
ld (hl),b
dec hl
ld (hl),c
ret
I used this in a test app and wrote a routine to multiply two base 10 integers in Str1 and Str2 respectively (I had to convert the base 10 string to hex data, then multiply, then convert back to base 10). I then used this routine in BatLib to make the routine for base conversion that handles large numbers (hundreds of digits). Feel free to supply other routines or optimisations, too! (I know some of you will find some 