|
sethi %hi(x), %l0
ld [%l0 + %lo(x)], %l0 // l0 = x
sethi %hi(y), %l1
ld [%l1 + %lo(y)], %l1 // l1 = y
cmp %l0, %l1
bl L1
nop
sethi %hi(y), %l2
st %l0, [%l2 + %lo(y)] // y = l0 (x)
ba L2 // unconditional branch
nop
L1: sethi %hi(x), %l2
st %l1, [%l2 + %lo(x)] // x = l1 (y)
L2:
|
Improved program:
sethi %hi(x), %l0
ld [%l0 + %lo(x)], %l0 // l0 = x
sethi %hi(y), %l1
ld [%l1 + %lo(y)], %l1 // l1 = y
cmp %l0, %l1
bl L1
nop
sethi %hi(y), %l2
ba L2
st %l0, [%l2 + %lo(y)] // Will be executed before branch takes place
L1: sethi %hi(x), %l2
st %l1, [%l2 + %lo(x)] // x = l1 (y)
L2:
sethi %hi(x), %l0
ld [%l0 + %lo(x)], %l0 // l0 = x
sethi %hi(y), %l1
ld [%l1 + %lo(y)], %l1 // l1 = y
cmp %l0, %l1
bl L1
nop
sethi %hi(y), %l2
ba L2 // Skip over else part !
st %l0, [%l2 + %lo(y)] // y = l0 (x)
L1: sethi %hi(x), %l2
st %l1, [%l2 + %lo(x)] // x = l1 (y)
L2:
does NOT work because this instruction is a compare instruction that sets the flags for the conditional branch instruction.
If we move the compare instruction into the delay slot of the branch instruction, like this:
sethi %hi(x), %l0
ld [%l0 + %lo(x)], %l0 // l0 = x
sethi %hi(y), %l1
ld [%l1 + %lo(y)], %l1 // l1 = y
bl L1
cmp %l0, %l1
sethi %hi(y), %l2
ba L2 // Skip over else part !
st %l0, [%l2 + %lo(y)] // y = l0 (x)
L1: sethi %hi(x), %l2
st %l1, [%l2 + %lo(x)] // x = l1 (y)
L2:
|
Then the flags will NOT be set correctly when the bl L1 instruction is executed.
sethi %hi(x), %l0
ld [%l0 + %lo(x)], %l0 // l0 = x
sethi %hi(y), %l1
ld [%l1 + %lo(y)], %l1 // l1 = y
cmp %l0, %l1
bl L1
nop
sethi %hi(y), %l2 // Instructions in THEN part
ba L2
st %l0, [%l2 + %lo(y)]
L1: sethi %hi(x), %l2
st %l1, [%l2 + %lo(x)] // Instructions in ELSE part
L2:
|
We cannot not shuffle instructions arbitrarily - the most important goal is to keep the program correct
I have highlighted the instructions in the THEN part in blue and the instructions in the ELSE part in magenta
OK, consider the program when we have moved the instruction "sethi %hi(x), %l2" (at label L1) into the delay slot:
sethi %hi(x), %l0
ld [%l0 + %lo(x)], %l0 // l0 = x
sethi %hi(y), %l1
ld [%l1 + %lo(y)], %l1 // l1 = y
cmp %l0, %l1
bl L1
sethi %hi(x), %l2 <------ !!!
sethi %hi(y), %l2
ba L2
st %l0, [%l2 + %lo(y)]
L1:
st %l1, [%l2 + %lo(x)]
L2:
|
bl L1 // if (x < y), "bl" will branch to L1....
sethi %hi(x), %l2 ......(1)
L1:
st %l1, [%l2 + %lo(x)] ......(2)
|
bl L1 // if (x >= y), "bl" will NOT branch to L1....
sethi %hi(x), %l2 **** Also executed !!!
sethi %hi(y), %l2
ba L2
st %l0, [%l2 + %lo(y)]
L2:
|
We see that in this case, ONE instruction from the ELSE part is executed.
(Note that is THIS example, the instruction from the ELSE part will not cause an error in the execution, but that is NOT true in general
The annulling branch instruction is denoted by adding ,a to the branch instruction
The above program can be written using an annulling branch instruction as follows:
sethi %hi(x), %l0
ld [%l0 + %lo(x)], %l0 // l0 = x
sethi %hi(y), %l1
ld [%l1 + %lo(y)], %l1 // l1 = y
cmp %l0, %l1
bl,a L1 // Will void next instruct if no branch
sethi %hi(x), %l2
sethi %hi(y), %l2
ba L2
st %l0, [%l2 + %lo(y)]
L1:
st %l1, [%l2 + %lo(x)]
L2:
|
bl,a L1 // if (x >= y), "bl" will NOT branch to L1....
|
Now the program will execute the same instructions and in the same sequence in BOTH cases (in both true and false cases).
Hence, the transformed program is equalivalent to the original program and it is correct.