仅需要8个字节时将堆栈减24?

我有C代码:

long fib(long n) {
  if (n < 2) return 1;
  return fib(n-1) + fib(n-2);
}

int main(int argc, char** argv) {
    return 0;
}

which I compiled by running gcc -O0 -fno-optimize-sibling-calls -S file.c yielding assembly code that has not been optimized:

    .file   "long.c"
    .text
    .globl  fib
    .type   fib, @function
fib:
.LFB5:
    .cfi_startproc
    pushq   %rbp
    .cfi_def_cfa_offset 16
    .cfi_offset 6, -16
    movq    %rsp, %rbp
    .cfi_def_cfa_register 6
    pushq   %rbx
    subq    $24, %rsp
    .cfi_offset 3, -24
    movq    %rdi, -24(%rbp)
    cmpq    $1, -24(%rbp)
    jg  .L2
    movl    $1, %eax
    jmp .L3
.L2:
    movq    -24(%rbp), %rax
    subq    $1, %rax
    movq    %rax, %rdi
    call    fib
    movq    %rax, %rbx
    movq    -24(%rbp), %rax
    subq    $2, %rax
    movq    %rax, %rdi
    call    fib
    addq    %rbx, %rax
.L3:
    addq    $24, %rsp
    popq    %rbx
    popq    %rbp
    .cfi_def_cfa 7, 8
    ret
    .cfi_endproc
.LFE5:
    .size   fib, .-fib
    .globl  main
    .type   main, @function
main:
.LFB6:
    .cfi_startproc
    pushq   %rbp
    .cfi_def_cfa_offset 16
    .cfi_offset 6, -16
    movq    %rsp, %rbp
    .cfi_def_cfa_register 6
    movl    %edi, -4(%rbp)
    movq    %rsi, -16(%rbp)
    movl    $0, %eax
    popq    %rbp
    .cfi_def_cfa 7, 8
    ret
    .cfi_endproc
.LFE6:
    .size   main, .-main
    .ident  "GCC: (Ubuntu 7.5.0-3ubuntu1~18.04) 7.5.0"
    .section    .note.GNU-stack,"",@progbits

我的问题是:

Why do we decrement the stack pointer by 24, subq $24, %rsp? As I see it, we store one element only, first argument n in %rdi, on the stack after the initial two pushes. So why don't we just decrement the stack pointer by 8 and then move n to -8(%rbp)? So

subq    $8, %rsp
movq    %rdi, -8(%rbp)