Omnimaga: The Coders Of Tomorrow
Welcome, Guest. Please login or register.
 
Omnimaga: The Coders Of Tomorrow
23 May, 2013, 23:32:00 *
Welcome, Guest. Please login or register.

Login with username, password and session length
 
   home   news downloads projects tutorials misc forums rules new posts irc about Login Register  
+-OmnomIRC

You must Register, be logged in and have at least 40 posts to use this shout-box! If it still doesn't show up afterward, it might be that OmnomIRC is disabled for your group or under maintenance.

Note: You can also use an IRC client like mIRC, X-Chat or Mibbit to connect to an EFnet server and #omnimaga.

Pages: 1 2 [3]   Go Down
  Print  
Author Topic: 24 bit multiplication - multiply two 24 bit numbers to get 48 bit result  (Read 1783 times) Bookmark and Share
0 Members and 1 Guest are viewing this topic.
FloppusMaximus
LV5 Advanced (Next: 300)
*****
Offline Offline

Last Login: 09 May, 2013, 05:05:29
Date Registered: 03 October, 2010, 00:02:51
Posts: 286

Total Post Ratings: +52

View Profile
« Reply #30 on: 11 December, 2011, 23:41:31 »
+1

My first multiplication routine takes 2746 - 4570 cycles, the second takes 1680 - 2880 cycles.
Oh boy, optimization time Cheesy

The best I have so far is somewhere around 1800 cycles average (I'm too lazy to work out the exact probabilities at the moment, and not counting memory delays) using a squaring table and undocumented IX instructions.  Input is BDE and CHL, output is BCDEAL.  This routine works by expanding the formula 2xy = x²+y²-|x-y|², summed over each of the 9 pairs of bytes in the input.

(I'm not saying this is practical - unless you really have thousands of 24-bit multiplications to perform, you don't need this kind of speed.  This is just for fun.)

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
SUBFIRST .macro src1, src2, hdest, ldest
exx
ld a, src1
sub src2
jr nc, $ + 4
neg
exx
ld l, a
ld a, ldest
sub (hl)
ld ldest, a
inc h
ld a, hdest
sbc a, (hl)
ld hdest, a
  .endm

SUBNEXT .macro src1, src2, hdest, ldest
dec h
ex af, af'
exx
ld a, src1
sub src2
jr nc, $ + 4
neg
exx
ld l, a
ex af, af'
ld a, ldest
sbc a, (hl)
ld ldest, a
inc h
ld a, hdest
sbc a, (hl)
ld hdest, a
  .endm

BDE_times_CHL_sqrdiff_v3:
ld a, d
exx
ld h, high(sqrtab)
ld l, a
ld e, (hl)
inc h
ld d, (hl) ; DE = d²
exx
ld a, b
exx
ld l, a
ld b, (hl)
dec h
ld c, (hl) ; BC = b²
exx
ld a, e
exx
ld l, a
ld a, (hl)
inc h
ld h, (hl)
ld l, a ; HL = e²
call BC_DE_HL_times_10101
push bc
push hl
  push de
   exx
   ld a, h
   exx
   ld h, high(sqrtab)
   ld l, a
   ld e, (hl)
   inc h
   ld d, (hl) ; DE = h²
   exx
   ld a, c
   exx
   ld l, a
   ld b, (hl)
   dec h
   ld c, (hl) ; BC = c²
   exx
   ld a, l
   exx
   ld l, a
   ld a, (hl)
   inc h
   ld h, (hl)
   ld l, a ; HL = l²
   call BC_DE_HL_times_10101
   pop ix
  add ix, de
  pop de
adc hl, de
ex de, hl
pop hl
adc hl, bc
ld b, h
ld c, l ; BCDEIX = total
push af

ld h, high(sqrtab)
SUBFIRST e, l, ixh, ixl
SUBNEXT  d, h, d, e
SUBNEXT  b, c, b, c
jp nc, BDE_times_CHL_sqrdiff_v3_nc1
pop af
ccf
push af
BDE_times_CHL_sqrdiff_v3_nc1:

inc b

dec h
SUBFIRST e, h, e, ixh
SUBNEXT  d, c, c, d
jr nc, BDE_times_CHL_sqrdiff_v3_nc2
dec b
jp nz, BDE_times_CHL_sqrdiff_v3_nc2
pop af
ccf
push af
BDE_times_CHL_sqrdiff_v3_nc2:

dec h
SUBFIRST d, l, e, ixh
SUBNEXT  b, h, c, d
jr nc, BDE_times_CHL_sqrdiff_v3_nc3
dec b
jp nz, BDE_times_CHL_sqrdiff_v3_nc3
pop af
ccf
push af
BDE_times_CHL_sqrdiff_v3_nc3:

inc c

dec h
SUBFIRST b, l, d, e
jr nc, BDE_times_CHL_sqrdiff_v3_nc4
dec c
jp nz, BDE_times_CHL_sqrdiff_v3_nc4
dec b
jp nz, BDE_times_CHL_sqrdiff_v3_nc4
pop af
ccf
push af
BDE_times_CHL_sqrdiff_v3_nc4:

dec h
SUBFIRST e, c, d, e
pop hl
jr nc, BDE_times_CHL_sqrdiff_v3_nc5
dec c
jp nz, BDE_times_CHL_sqrdiff_v3_nc5
dec b
jp nz, BDE_times_CHL_sqrdiff_v3_nc5
inc l
BDE_times_CHL_sqrdiff_v3_nc5:

dec b
dec c

rr l
rr b
rr c
rr d
rr e
ld a, ixl
ld l, a
ld a, ixh
rra
rr l
ret


BC_DE_HL_times_10101:
push bc
ld a, h
ex af, af'
sub a
ld c, a
ld b, l
add hl, bc
adc a, a
ld b, e
add hl, bc
adc a, c ; AHL = [ L+H+E L ]
pop bc
push hl
push bc
  ld c, a
  ld b, 0
  ex af, af'
  ld h, a
  add hl, bc ; no way this can carry (initial HL is a square)
  ld c, a
  ld b, e
  sub a
  add hl, bc
  adc a, a ; AHL(SP+2) = [ H+E L+H L+H+E L ]
  add hl, de
  adc a, 0 ; AHL(SP+2) = [ H+E+D L+H+E L+H+E L ]
  pop bc
add hl, bc
adc a, 0 ; AHL(SP) = [ H+E+D+B L+H+E+C L+H+E L ]
ld e, d
ld d, c
add hl, de
adc a, b
jr nc, BC_DE_HL_times_10101_nc1
inc b ; BAHL(SP) = [ B B H+E+D+C+B L+H+E+D+C L+H+E L ]
BC_DE_HL_times_10101_nc1:
add a, e
jr nc, BC_DE_HL_times_10101_nc2
inc b ; BAHL(SP) = [ B D+B H+E+D+C+B L+H+E+D+C L+H+E L ]
BC_DE_HL_times_10101_nc2:
pop de
add a, c
ld c, a
ret nc
inc b ; BCHLDE = [ B D+C+B H+E+D+C+B L+H+E+D+C L+H+E L ]
ret

To get back to the topic somewhat, ACagliano, it sounds like you're more interested in squaring than in general multiplication.  Squaring can be considerably faster, especially if you use a lookup table (e.g., my best 16-bit squaring routine is around 170 cycles, versus around 800 for general multiplication.)
Logged
Pages: 1 2 [3]   Go Up
  Print  
 
Jump to:  

Powered by EzPortal
Powered by MySQL Powered by SMF 1.1.18 | SMF © 2013, Simple Machines Powered by PHP
Page created in 0.622 seconds with 31 queries.
Skin by DJ Omnimaga edited from SMF default theme with the help of tr1p1ea.
All programs, games and songs avaliable on this website are property of their respective owners.
Best viewed in Opera, Firefox, Chrome and Safari with a resolution of 1024x768 or above.