[M3devel] optimizer doesn't do much?
Jay K
jay.krell at cornell.edu
Tue Jun 1 19:58:50 CEST 2010
Here's a version from cm3cg with lots of optimizations turned on, a few turned off, *and* volatile generally removed:
xorl %eax, %eax
testq %rdi, %rdi
movq %rsi, %rdx
je L3
cmpq $-1, (%rdi)
movq %rdi, %rax
jne L3
movq 16(%rdi), %rax
movq %rax, -16(%rsp)
movq 8(%rdi), %rax
L3:
movq -16(%rsp), %r10
movq %rdx, %rdi
movq %rax, %r11
jmp *%r11
Much better!
- Jay
----------------------------------------
> From: jay.krell at cornell.edu
> To: m3devel at elegosoft.com
> Subject: optimizer doesn't do much?
> Date: Sun, 30 May 2010 18:11:26 +0000
>
>
> This little function:
>
>
> PROCEDURE CallProcx (p: FinallyProc; VAR a: RT0.RaiseActivation) RAISES ANY =
> BEGIN
> p (a);
> END CallProcx;
>
>
> generates all of this at -O2 and above:
>
>
> _RTExFrame__CallProcx:
> pushq %rbp
> movq %rsp, %rbp
> subq $32, %rsp
> movq %rdi, -24(%rbp)
> movq %rsi, -32(%rbp)
> movq -24(%rbp), %rax ; why not just use %rdi?
> movq %rax, -8(%rbp)
> movq -8(%rbp), %rax ; why? It just stored it!
> testq %rax, %rax
> je L2 ; huh? Compare to NIL, then then just call it anyway?
> movq -8(%rbp), %rax ; why? Again, it is already there.
> cmpq $-1, (%rax)
> jne L2
> movq -8(%rbp), %rax ; why? Again, it is already there.
> movq 16(%rax), %rax
> movq %rax, -16(%rbp)
> movq -8(%rbp), %rax ; again! yeah %rax got clobbered,
> ; but surely it could be using
> ; a different register?
> movq 8(%rax), %rax
> movq %rax, -8(%rbp)
> L2:
> movq -8(%rbp), %rax ; and again
> movq -32(%rbp), %rdi ; %rsi still has it..
> movq -16(%rbp), %r10
> call *%rax
> leave
> ret
>
>
> it is even worse if you omit RAISES ANY.
> RAISES ANY saves it from calling pushframe.
> and shouldn't it be calling fault_proc for NIL?
>
>
> Here is what similar C code gets me:
> Is this a fair comparison?
>
>
> typedef void (*F1)(void* chain, void* activation);
>
> typedef struct {
> long marker;
> F1 f1;
> void* chain;
> } Closure_t;
>
>
> void call_F1(Closure_t* cl, void* activation)
> {
> if (cl->marker == -1)
> cl->f1(cl->chain, activation);
> else
> ((F1)cl)(0, activation);
> }
>
>
> _call_F1:
> pushl %ebp
> movl %esp, %ebp
> movl 8(%ebp), %ecx
> movl 12(%ebp), %eax
> cmpl $-1, (%ecx)
> je L7
> movl %eax, 12(%ebp)
> movl $0, 8(%ebp)
> leave
> jmp *%ecx
> .align 4,0x90
> L7:
> movl 8(%ecx), %eax
> movl %eax, 8(%ebp)
> movl 4(%ecx), %ecx
> leave
> jmp *%ecx
>
>
> .err..oops different processor:
>
>
> _call_F1:
> pushq %rbp
> cmpq $-1, (%rdi)
> movq %rdi, %rax
> movq %rsp, %rbp
> je L7
> xorl %edi, %edi
> movq %rax, %r11
> leave
> jmp *%r11
> L7:
> movq 16(%rdi), %rdi
> movq 8(%rax), %r11
> leave
> jmp *%r11
>
>
> I don't always care, at least it works, but it does seem surprisingly bad.
>
>
> - Jay
>
>
More information about the M3devel
mailing list