Yes. An easy way to show this is to compile the following two functions that do the same thing and then look at the disassembly.
#include <stdint.h>
#include <math.h>
uint32_t foo1(uint32_t shftAmt) {
return pow(2, shftAmt);
}
uint32_t foo2(uint32_t shftAmt) {
return (1 << shftAmt);
}
cc -arch armv7 -O3 -S -o - shift.c
(I happen to find ARM asm easier to read but if you want x86 just remove the arch flag)
_foo1:
@ BB#0:
push {r7, lr}
vmov s0, r0
mov r7, sp
vcvt.f64.u32 d16, s0
vmov r0, r1, d16
blx _exp2
vmov d16, r0, r1
vcvt.u32.f64 s0, d16
vmov r0, s0
pop {r7, pc}
_foo2:
@ BB#0:
movs r1, #1
lsl.w r0, r1, r0
bx lr
You can see foo2
only takes 2 instructions vs foo1
which takes several instructions. It has to move the data to the FP HW registers (vmov
), convert the integer to a float (vcvt.f64.u32
) call the exp
function and then convert the answer back to an uint (vcvt.u32.f64
) and move it from the FP HW back to the GP registers.
与恶龙缠斗过久,自身亦成为恶龙;凝视深渊过久,深渊将回以凝视…