[M3devel] optimizer doesn't do much?

Jay K jay.krell at cornell.edu
Tue Jun 1 19:58:50 CEST 2010


Here's a version from cm3cg with lots of optimizations turned on, a few turned off, *and* volatile generally removed:

    xorl    %eax, %eax
    testq    %rdi, %rdi
    movq    %rsi, %rdx
    je    L3
    cmpq    $-1, (%rdi)
    movq    %rdi, %rax
    jne    L3
    movq    16(%rdi), %rax
    movq    %rax, -16(%rsp)
    movq    8(%rdi), %rax
L3:
    movq    -16(%rsp), %r10
    movq    %rdx, %rdi
    movq    %rax, %r11
    jmp    *%r11

Much better!

 - Jay


----------------------------------------
> From: jay.krell at cornell.edu
> To: m3devel at elegosoft.com
> Subject: optimizer doesn't do much?
> Date: Sun, 30 May 2010 18:11:26 +0000
>
>
> This little function:
>
>
> PROCEDURE CallProcx (p: FinallyProc;  VAR a: RT0.RaiseActivation) RAISES ANY =
>   BEGIN
>     p (a);
>   END CallProcx;
>
>
> generates all of this at -O2 and above:
>
>
> _RTExFrame__CallProcx:
>     pushq    %rbp
>     movq    %rsp, %rbp
>     subq    $32, %rsp
>      movq    %rdi, -24(%rbp)
>     movq    %rsi, -32(%rbp)
>     movq    -24(%rbp), %rax ; why not just use %rdi?
>     movq    %rax, -8(%rbp)
>     movq    -8(%rbp), %rax ; why? It just stored it!
>     testq    %rax, %rax
>     je    L2 ; huh? Compare to NIL, then then just call it anyway?
>     movq    -8(%rbp), %rax ; why? Again, it is already there.
>     cmpq    $-1, (%rax)
>     jne    L2
>     movq    -8(%rbp), %rax ; why? Again, it is already there.
>     movq    16(%rax), %rax
>     movq    %rax, -16(%rbp)
>     movq    -8(%rbp), %rax ; again! yeah %rax got clobbered,
>                            ; but surely it could be using
>                            ; a different register?
>     movq    8(%rax), %rax
>     movq    %rax, -8(%rbp)
> L2:
>     movq    -8(%rbp), %rax  ; and again
>     movq    -32(%rbp), %rdi ; %rsi still has it..
>     movq    -16(%rbp), %r10
>     call    *%rax
>     leave
>     ret
>
>
> it is even worse if you omit RAISES ANY.
> RAISES ANY saves it from calling pushframe.
> and shouldn't it be calling fault_proc for NIL?
>
>
> Here is what similar C code gets me:
> Is this a fair comparison?
>
>
> typedef void (*F1)(void* chain, void* activation);
>
> typedef struct {
>     long marker;
>     F1 f1;
>     void* chain;
> } Closure_t;
>
>
> void call_F1(Closure_t* cl, void* activation)
> {
>     if (cl->marker == -1)
>         cl->f1(cl->chain, activation);
>     else
>         ((F1)cl)(0, activation);
> }
>
>
> _call_F1:
>     pushl    %ebp
>     movl    %esp, %ebp
>     movl    8(%ebp), %ecx
>     movl    12(%ebp), %eax
>     cmpl    $-1, (%ecx)
>     je    L7
>     movl    %eax, 12(%ebp)
>     movl    $0, 8(%ebp)
>     leave
>     jmp    *%ecx
>     .align 4,0x90
> L7:
>     movl    8(%ecx), %eax
>     movl    %eax, 8(%ebp)
>     movl    4(%ecx), %ecx
>     leave
>     jmp    *%ecx
>
>
> .err..oops different processor:
>
>
> _call_F1:
>     pushq    %rbp
>     cmpq    $-1, (%rdi)
>     movq    %rdi, %rax
>     movq    %rsp, %rbp
>     je    L7
>     xorl    %edi, %edi
>     movq    %rax, %r11
>     leave
>     jmp    *%r11
> L7:
>     movq    16(%rdi), %rdi
>     movq    8(%rax), %r11
>     leave
>     jmp    *%r11
>
>
> I don't always care, at least it works, but it does seem surprisingly bad.
>
>
>  - Jay
>
>
 		 	   		  


More information about the M3devel mailing list