[M3devel] optimizer doesn't do much?

Jay K jay.krell at cornell.edu
Sun May 30 20:11:26 CEST 2010


This little function:


PROCEDURE CallProcx (p: FinallyProc;  VAR a: RT0.RaiseActivation) RAISES ANY =
  BEGIN
    p (a);
  END CallProcx;


generates all of this at -O2 and above:


_RTExFrame__CallProcx:
    pushq    %rbp
    movq    %rsp, %rbp
    subq    $32, %rsp
     movq    %rdi, -24(%rbp)
    movq    %rsi, -32(%rbp)
    movq    -24(%rbp), %rax ; why not just use %rdi?
    movq    %rax, -8(%rbp)
    movq    -8(%rbp), %rax ; why? It just stored it!
    testq    %rax, %rax
    je    L2 ; huh? Compare to NIL, then then just call it anyway?
    movq    -8(%rbp), %rax ; why? Again, it is already there.
    cmpq    $-1, (%rax)
    jne    L2
    movq    -8(%rbp), %rax ; why? Again, it is already there.
    movq    16(%rax), %rax
    movq    %rax, -16(%rbp)
    movq    -8(%rbp), %rax ; again! yeah %rax got clobbered,
                           ; but surely it could be using
                           ; a different register?
    movq    8(%rax), %rax
    movq    %rax, -8(%rbp)
L2:
    movq    -8(%rbp), %rax  ; and again
    movq    -32(%rbp), %rdi ; %rsi still has it..
    movq    -16(%rbp), %r10
    call    *%rax
    leave
    ret


it is even worse if you omit RAISES ANY.
RAISES ANY saves it from calling pushframe.
and shouldn't it be calling fault_proc for NIL?


Here is what similar C code gets me:
Is this a fair comparison?


typedef void (*F1)(void* chain, void* activation);

typedef struct {
    long marker;
    F1 f1;
    void* chain;
} Closure_t;


void call_F1(Closure_t* cl, void* activation)
{
    if (cl->marker == -1)
        cl->f1(cl->chain, activation);
    else
        ((F1)cl)(0, activation);
}


_call_F1:
    pushl    %ebp
    movl    %esp, %ebp
    movl    8(%ebp), %ecx
    movl    12(%ebp), %eax
    cmpl    $-1, (%ecx)
    je    L7
    movl    %eax, 12(%ebp)
    movl    $0, 8(%ebp)
    leave
    jmp    *%ecx
    .align 4,0x90
L7:
    movl    8(%ecx), %eax
    movl    %eax, 8(%ebp)
    movl    4(%ecx), %ecx
    leave
    jmp    *%ecx


.err..oops different processor:


_call_F1:
    pushq    %rbp
    cmpq    $-1, (%rdi)
    movq    %rdi, %rax
    movq    %rsp, %rbp
    je    L7
    xorl    %edi, %edi
    movq    %rax, %r11
    leave
    jmp    *%r11
L7:
    movq    16(%rdi), %rdi
    movq    8(%rax), %r11
    leave
    jmp    *%r11


I don't always care, at least it works, but it does seem surprisingly bad.


 - Jay

 		 	   		  


More information about the M3devel mailing list