[AArch64] Eliminate `cmp` by reassocating `add` and `sub` #119606

Kmeakin · 2024-12-11T19:26:10Z

This rust fn:
https://godbolt.org/z/W9WM6YPYf

#[no_mangle]
pub fn last(xs: &[u8]) -> Option<&u8> {
    xs.last()
}

produces this assembly:

last:
        add     x8, x1, x0
        cmp     x1, #0
        sub     x8, x8, #1
        csel    x0, xzr, x8, eq
        ret

Equivalent C also produces the same assembly
https://godbolt.org/z/c5dMbv63K

#include <stddef.h>
#include <stdint.h>

uint8_t* last(uint8_t* x0, size_t x1) {
    if (x1 == 0) {
        return NULL;
    } else {
        return x0 + x1 - 1;
    }
}

By reassocating sub and add (from (x0 + x1) -1 to x0 + (x1 - 1)), we can save an instruction by reusing the overflow flag from the first sub:

tgt:                                    // @tgt
        subs    x8, x1, #1
        add     x8, x0, x8
        csel    x0, xzr, x8, lo
        ret

I could not convince clang to produce this assembly, even using __builtin_sub_overflow:
https://godbolt.org/z/6vPWjx1zv

#include <stdbool.h>
#include <stddef.h>
#include <stdint.h>

uint8_t* tgt(uint8_t* x0, size_t x1) {
    size_t sum;
    bool overflow = __builtin_sub_overflow(x1, 1, &sum);

    if (overflow) {
        return NULL;
    } else {
        return x0 + sum;
    }
}

I could only produce this assembly by writing the LLVM IR manually:
https://godbolt.org/z/hvjEEsG9r

define noundef i64 @tgt(i64 noundef %0, i64 noundef %1) {
  %4 = tail call { i64, i1 } @llvm.usub.with.overflow.i64(i64 %1, i64 1)
  %overflow = extractvalue { i64, i1 } %4, 1
  %diff = extractvalue {i64, i1} %4, 0
  %sum = add i64 %0, %diff
  %ret = select i1 %overflow, i64 0, i64 %sum 
  ret i64 %ret
}

The text was updated successfully, but these errors were encountered:

llvmbot · 2024-12-11T19:26:28Z

@llvm/issue-subscribers-backend-aarch64

Author: Karl Meakin (Kmeakin)

This rust fn:

https://godbolt.org/z/5EKvWeEYb

#[no_mangle]
pub fn last(xs:&amp;[u8])-&gt;Option&lt;&amp;u8&gt;{
    xs.last()
}

produces this assembly:

last:
        add     x8, x1, x0
        cmp     x1, #<!-- -->0
        sub     x8, x8, #<!-- -->1
        csel    x0, xzr, x8, eq
        ret

Equivalent C also produces the same assembly
https://godbolt.org/z/c5dMbv63K

#include &lt;stddef.h&gt;
#include &lt;stdint.h&gt;

uint8_t* last(uint8_t* x0, size_t x1) {
    if (x1 == 0) {
        return NULL;
    } else {
        return x0 + x1 - 1;
    }
}

By reassocating sub and add, we can save an instruction by reusing the overflow flag from the first sub:

tgt:                                    // @<!-- -->tgt
        subs    x8, x1, #<!-- -->1
        add     x8, x0, x8
        csel    x0, xzr, x8, lo
        ret

I could not convince clang to produce this assembly, even using __builtin_sub_overflow:
https://godbolt.org/z/6vPWjx1zv

#include &lt;stdbool.h&gt;
#include &lt;stddef.h&gt;
#include &lt;stdint.h&gt;

uint8_t* tgt(uint8_t* x0, size_t x1) {
    size_t sum;
    bool overflow = __builtin_sub_overflow(x1, 1, &amp;sum);

    if (overflow) {
        return NULL;
    } else {
        return x0 + sum;
    }
}

I could only produce this assembly by writing the LLVM IR manually:
https://godbolt.org/z/hvjEEsG9r

define noundef i64 @<!-- -->tgt(i64 noundef %0, i64 noundef %1) {
  %4 = tail call { i64, i1 } @<!-- -->llvm.usub.with.overflow.i64(i64 %1, i64 1)
  %overflow = extractvalue { i64, i1 } %4, 1
  %diff = extractvalue {i64, i1} %4, 0
  %sum = add i64 %0, %diff
  %ret = select i1 %overflow, i64 0, i64 %sum 
  ret i64 %ret
}

Kmeakin added backend:AArch64 missed-optimization labels Dec 11, 2024

mskamp linked a pull request Dec 30, 2024 that will close this issue

[AArch64] Eliminate Common Subexpression of CSEL by Reassociation #121350

Open

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

[AArch64] Eliminate `cmp` by reassocating `add` and `sub` #119606

[AArch64] Eliminate `cmp` by reassocating `add` and `sub` #119606

Kmeakin commented Dec 11, 2024 •

edited

Loading

llvmbot commented Dec 11, 2024

[AArch64] Eliminate cmp by reassocating add and sub #119606

[AArch64] Eliminate cmp by reassocating add and sub #119606

Comments

Kmeakin commented Dec 11, 2024 • edited Loading

llvmbot commented Dec 11, 2024

[AArch64] Eliminate `cmp` by reassocating `add` and `sub` #119606

[AArch64] Eliminate `cmp` by reassocating `add` and `sub` #119606

Kmeakin commented Dec 11, 2024 •

edited

Loading