1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111
| __asm {
section .text local
global _FullComplexAuto
align 16
_FullComplexAuto type func
; r0: in -- r6
; r1: sc -- r5
; (sp-28): idx -- r3
[ push r6
push r7
]
[ push d6
push d7
]
[bmclr #$4,sr.l ; saturation off
tfra r0,r6 ; r6 -> in
]
[ move.l (sp-28),r3 ; r3 -> idx
move.l #4,n0
]
adda #4,r6,r7
[ dosetup1 inner1
doen1 #160 ; AUTO_LEN/2
]
loopstart1
inner1
[ clr d12
clr d13
clr d14
clr d15
tfra r1,r5 ; r5 -> sc
move.2f (r6)+n0,d4:d5 ; load in_r(1),in_i(1)
]
[ move.2f (r5)+,d0:d1 ; load sc_r(1),sc_ri(1)
move.2f (r7)+n0,d6:d7 ; load in_r(4),in_i(4)
] ; software pipelining
[ dosetup2 inner2
doen2 #10
]
loopstart2
inner2
[ clr d8
clr d9
clr d10
clr d11
]
[ dosetup3 inner3
doen3 #128
]
loopstart3
inner3
[ mac d4,d0,d8 ; A_r(1) += in_r(1) * sc_r(1)
mac d5,d0,d9 ; A_i(1) += in_i(1) * sc_r(1)
mac d6,d0,d10 ; A_r(4) += in_r(4) * sc_r(1)
mac d7,d0,d11 ; A_i(4) += in_i(4) * sc_r(1)
move.2f (r6)+n0,d2:d3 ; load in_r(4),in_i(4)
]
[ mac d5,d1,d8 ; A_r(1) += in_i(1) * sc_i(1)
mac -d4,d1,d9 ; A_i(1) += -in_r(1) * sc_i(1)
mac d7,d1,d10 ; A_r(4) += in_i(4) * sc_i(1)
mac -d6,d1,d11 ; A_i(4) += -in_r(4) * sc_i(1)
move.2f (r5)+,d0:d1 ; load sc_r(2),sc_ri(2)
move.2f (r7)+n0,d6:d7 ; load in_r(8),in_i(8)
]
[ mac d2,d0,d8 ; A_r(1) += in_r(4) * sc_r(2)
mac d3,d0,d9 ; A_i(1) += in_i(4) * sc_r(2)
mac d6,d0,d10 ; A_r(4) += in_r(8) * sc_r(2)
mac d7,d0,d11 ; A_i(4) += in_i(8) * sc_r(2)
move.2f (r6)+n0,d4:d5 ; load in_r(8),in_i(8)
]
[ mac d3,d1,d8 ; A_r(1) += in_i(4) * sc_i(2)
mac -d2,d1,d9 ; A_i(1) += -in_r(4) * sc_i(2)
mac d7,d1,d10 ; A_r(4) += in_i(8) * sc_i(2)
mac -d6,d1,d11 ; A_i(4) += -in_r(8) * sc_i(2)
move.2f (r5)+,d0:d1 ; load sc_r(3),sc_ri(3)
move.2f (r7)+n0,d6:d7 ; load in_r(12),in_i(12)
]
loopend3
[ asrr #14,d8 ; scaling
asrr #14,d9
asrr #14,d10
asrr #14,d11
]
[ add d8,d12,d12 ; A_r(1)
add d9,d13,d13 ; A_i(1)
add d10,d14,d14 ; A_r(4)
add d11,d15,d15 ; A_i(4)
]
loopend2
[ abs d12
abs d13
abs d14
abs d15
adda #8,r0,r0
]
[ add d12,d13,d12 ; |A_r(1)| + |A_i(1)|
add d14,d15,d13 ; |A_r(4)| + |A_i(4)|tfra r0,r6 ; r6 -> in tfra r1,r5 ; r5 -> sc][move.2l d12:d13,(r3)+adda #4,r6,r7
]
loopend1 bmset #$4,sr.l ; saturation mode on
nop
nop
[ pop d6
pop d7
]
[ pop r6
pop r7
]
rts
endsec
}; |