nmh-workers
[Top] [All Lists]

[Nmh-workers] strcasecmp.c and gcc -O2

2007-09-06 06:54:15

strcasecomp(a,b) segfaults if a=NULL and compiled with -O2 and gcc >= 4.1.1.

The code in question is:


int
mh_strcasecmp (const char *s1, const char *s2) 
{
    const unsigned char *us1, *us2;

    us1 = (const unsigned char *) s1,
    us2 = (const unsigned char *) s2;

    if (!us1)
        us1 = "";
    if (!us2)
        us2 = "";
 
    while (tolower(*us1) == tolower(*us2++)) 
        if (*us1++ == '\0')
            return (0);
    return (tolower(*us1) - tolower(*--us2));
}


It seems the compiler (using -O2) totally optimizes away the two if clauses. 
Looking at the assembler:

gcc-4.2.0 -S -DHAVE_CONFIG_H -I.. -I. -I.. -march=i386  -pipe -O2 strcasecmp.c

strcasecmp:
        pushl   %ebp
        movl    %esp, %ebp
        pushl   %edi
        pushl   %esi
        pushl   %ebx
        subl    $12, %esp
        movl    8(%ebp), %esi
        movl    12(%ebp), %ebx
        call    __ctype_tolower_loc   <------- the gcc internal tolower called 
immediately
        movl    (%eax), %ecx
        jmp     .L15
        .p2align 2,,3
.L17:
        incl    %esi
.L15:
        movb    (%esi), %dl
        movzbl  %dl, %eax
        movl    (%ecx,%eax,4), %edi
        movzbl  (%ebx), %eax
        incl    %ebx
        cmpl    (%ecx,%eax,4), %edi
        jne     .L22
        testb   %dl, %dl
        jne     .L17
        xorl    %eax, %eax
        addl    $12, %esp
        popl    %ebx
        popl    %esi
        popl    %edi
        leave
        ret
.L22:
        movzbl  -1(%ebx), %eax
        subl    (%ecx,%eax,4), %edi
        movl    %edi, %eax
        addl    $12, %esp
        popl    %ebx
        popl    %esi
        popl    %edi
        leave
        ret


gcc-4.2.0 -S -DHAVE_CONFIG_H -I.. -I. -I.. -march=i386  -pipe -O1 strcasecmp.c
strcasecmp:
        pushl   %ebp
        movl    %esp, %ebp
        pushl   %edi
        pushl   %esi
        pushl   %ebx
        subl    $12, %esp   
        movl    8(%ebp), %eax 
        movl    12(%ebp), %edx
        testl   %eax, %eax            <-- Test for null
        je      .L12                  <-- jump to assignment
        movl    %eax, %esi
        jmp     .L14
.L12:
        movl    $.LC0, %esi
.L14:
        testl   %edx, %edx            <-- Test for null
        jne     .L15
        movl    $.LC0, %edi           <-- do the assignment
        jmp     .L17
.L18:
        testb   %dl, %dl
        jne     .L19
        movl    $0, %eax
        jmp     .L21
.L19:
        incl    %esi
        jmp     .L22
.L15:
        movl    %edx, %edi
.L17:
        call    __ctype_tolower_loc  <-- And _here_ we do the tolower stuff
        movl    (%eax), %ecx
.L22:
        movb    (%esi), %dl
        movzbl  %dl, %eax
        movl    (%ecx,%eax,4), %ebx
        movzbl  (%edi), %eax
        incl    %edi
        cmpl    (%ecx,%eax,4), %ebx
        je      .L18
        movzbl  -1(%edi), %eax
        subl    (%ecx,%eax,4), %ebx
        movl    %ebx, %eax
.L21:
        addl    $12, %esp
        popl    %ebx
        popl    %esi
        popl    %edi
        leave
        ret


Now, not being a C expert, is there anything wrong with the c code? If not, 
is there some mismagics with the tolower use? (now using the gcc internal,
rather than the external one).

Is the compiler busted on the optimization and it should be reported to 
the gcc crew?

/Anders


_______________________________________________
Nmh-workers mailing list
Nmh-workers(_at_)nongnu(_dot_)org
http://lists.nongnu.org/mailman/listinfo/nmh-workers

<Prev in Thread] Current Thread [Next in Thread>