gcc为函数生成指令内为什么会有多处return

发布时间 2023-04-13 21:45:44作者: tsecer

函数返回

如果一个函数的返回点比较多,而且函数比较长,想通过调试器知道函数从哪个位置退出就会比较麻烦。有些资料说一般编译器的所有return最终会经过同一条ret(机器指令)返回,所以只要找到该指令的位置打断点即可。这个对于没有开优化的指令可能是正确的,开启优化生成的二进制中经常可以看到一个函数内有多处ret,所以这种说法并不准确。

return语义

在对return语句的处理过程中,所有的return生成的指令的确是跳转到相同位置(return_label)。

/* Output a return with no value.  */

static void
expand_null_return_1 (void)
{
  clear_pending_stack_adjust ();
  do_pending_stack_adjust ();
  emit_jump (return_label);
}

jump redirect

try_optimize_cfg函数中,如果满足


	      /* Try to change a branch to a return to just that return.  */
	      rtx_insn *ret, *use;
	      if (single_succ_p (b)
		  && onlyjump_p (BB_END (b))
		  && bb_is_just_return (single_succ (b), &ret, &use))

则尝试修改跳转的目的地为ret指定的label,这里也就是最终的返回地址。

	  if (redirect_jump (as_a <rtx_jump_insn *> (BB_END (b)),
			     PATTERN (ret), 0))

也就是

/* Create some permanent unique rtl objects shared between all functions.  */
void
init_emit_once (void)
{
///...
simple_return_rtx = gen_rtx_fmt_ (SIMPLE_RETURN, VOIDmode);
///...
}

中创建的SIMPLE_RETURN特殊类型表达式。如果跳转的目的是这个label,生成的汇编代码(对386系统来说)就是一条ret机器指令。

/* Return true if BB contains just a return and possibly a USE of the
   return value.  Fill in *RET and *USE with the return and use insns
   if any found, otherwise NULL.  */

static bool
bb_is_just_return (basic_block bb, rtx_insn **ret, rtx_insn **use)
{
  *ret = *use = NULL;
  rtx_insn *insn;

  if (bb == EXIT_BLOCK_PTR_FOR_FN (cfun))
    return false;

  FOR_BB_INSNS (bb, insn)
    if (NONDEBUG_INSN_P (insn))
      {
	if (!*ret && ANY_RETURN_P (PATTERN (insn)))
	  *ret = insn;
	else if (!*ret && !*use && GET_CODE (PATTERN (insn)) == USE
	    && REG_P (XEXP (PATTERN (insn), 0))
	    && REG_FUNCTION_VALUE_P (XEXP (PATTERN (insn), 0)))
	  *use = insn;
	else
	  return false;
      }

  return !!*ret;
}

/* Do simple CFG optimizations - basic block merging, simplifying of jump
   instructions etc.  Return nonzero if changes were made.  */

static bool
try_optimize_cfg (int mode)
{
///...
	  for (b = ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb; b
	       != EXIT_BLOCK_PTR_FOR_FN (cfun);)
	    {
///...
	      /* Try to change a branch to a return to just that return.  */
	      rtx_insn *ret, *use;
	      if (single_succ_p (b)
		  && onlyjump_p (BB_END (b))
		  && bb_is_just_return (single_succ (b), &ret, &use))
		{
		  if (redirect_jump (as_a <rtx_jump_insn *> (BB_END (b)),
				     PATTERN (ret), 0))
		    {
		      if (use)
			emit_insn_before (copy_insn (PATTERN (use)),
					  BB_END (b));
		      if (dump_file)
			fprintf (dump_file, "Changed jump %d->%d to return.\n",
				 b->index, single_succ (b)->index);
		      redirect_edge_succ (single_succ_edge (b),
					  EXIT_BLOCK_PTR_FOR_FN (cfun));
		      single_succ_edge (b)->flags &= ~EDGE_CROSSING;
		      changed_here = true;
		    }
		}
///...
	}
///...
}

epilogue

在函数逻辑生成之后,在函数的结尾生成函数的epilogue,这个逻辑可以不同的后端自己定制。


/* Return a sequence to be used as the epilogue for the current function,
   or NULL.  */

static rtx_insn *
make_epilogue_seq (void)
{
  if (!targetm.have_epilogue ())
    return NULL;

  start_sequence ();
  emit_note (NOTE_INSN_EPILOGUE_BEG);
  rtx_insn *seq = targetm.gen_epilogue ();
  if (seq)
    emit_jump_insn (seq);

  /* Retain a map of the epilogue insns.  */
  record_insns (seq, NULL, &epilogue_insn_hash);
  set_insn_locations (seq, epilogue_location);

  seq = get_insns ();
  rtx_insn *returnjump = get_last_insn ();
  end_sequence ();

  if (JUMP_P (returnjump))
    set_return_jump_label (returnjump);

  return seq;
}

复杂epilogue

如果一些epilogue比较复杂,可能涉及到栈帧的调整,这个时候basic_block_reorder流程就有可能重新复制一份代码。在386系统下,如果开启了optimize_bb_for_speed_p,只要bb的指令长度小于16,就有可能被拷贝一份。

///@file:bb-reorder.c
/* Return true when BB can and should be copied. CODE_MAY_GROW is true
   when code size is allowed to grow by duplication.  */

static bool
copy_bb_p (const_basic_block bb, int code_may_grow)
{
  int size = 0;
  int max_size = uncond_jump_length;
  rtx_insn *insn;

  if (!bb->frequency)
    return false;
  if (EDGE_COUNT (bb->preds) < 2)
    return false;
  if (!can_duplicate_block_p (bb))
    return false;

  /* Avoid duplicating blocks which have many successors (PR/13430).  */
  if (EDGE_COUNT (bb->succs) > 8)
    return false;

  if (code_may_grow && optimize_bb_for_speed_p (bb))
    max_size *= PARAM_VALUE (PARAM_MAX_GROW_COPY_BB_INSNS);

  FOR_BB_INSNS (bb, insn)
    {
      if (INSN_P (insn))
	size += get_attr_min_length (insn);
    }

  if (size <= max_size)
    return true;

  if (dump_file)
    {
      fprintf (dump_file,
	       "Block %d can't be copied because its size = %d.\n",
	       bb->index, size);
    }

  return false;
}

机器描述

在i386.md中描述了对应的机器指令。

;; Used by x86_machine_dependent_reorg to avoid penalty on single byte RET
;; instruction Athlon and K8 have.

(define_insn "simple_return_internal_long"
  [(simple_return)
   (unspec [(const_int 0)] UNSPEC_REP)]
  "reload_completed"
  "* return ix86_output_function_return (true);"
  [(set_attr "length" "2")
   (set_attr "atom_unit" "jeu")
   (set_attr "length_immediate" "0")
   (set_attr "prefix_rep" "1")
   (set_attr "modrm" "0")])

(define_insn_and_split "simple_return_pop_internal"
  [(simple_return)
   (use (match_operand:SI 0 "const_int_operand"))]
  "reload_completed"
  "%!ret\t%0"
  "&& cfun->machine->function_return_type != indirect_branch_keep"
  [(const_int 0)]
  "ix86_split_simple_return_pop_internal (operands[0]); DONE;"
  [(set_attr "length" "3")
   (set_attr "atom_unit" "jeu")
   (set_attr "length_immediate" "2")
   (set_attr "modrm" "0")
   (set_attr "maybe_prefix_bnd" "1")])

栗子

在开启优化的版本中有多处ret指令。

tsecer@harry: cat multi.return.cpp 
int foo(int x, int y)
{
        switch (x)
        {
                case 1: return y * 1;
                case 2: return y * 2;
                case 3: return y * 3;
                case 4: return y * 4;
        }
}

tsecer@harry: gcc -O3 -S multi.return.cpp
tsecer@harry: cat multi.return.s 
        .file   "multi.return.cpp"
        .text
        .p2align 4,,15
        .globl  _Z3fooii
        .type   _Z3fooii, @function
_Z3fooii:
.LFB0:
        .cfi_startproc
        cmpl    $2, %edi
        je      .L3
        jle     .L12
        cmpl    $3, %edi
        je      .L6
        leal    0(,%rsi,4), %eax
        ret
        .p2align 4,,10
        .p2align 3
.L6:
        leal    (%rsi,%rsi,2), %eax
        ret
        .p2align 4,,10
        .p2align 3
.L12:
        movl    %esi, %eax
        ret
        .p2align 4,,10
        .p2align 3
.L3:
        leal    (%rsi,%rsi), %eax
        ret
        .cfi_endproc
.LFE0:
        .size   _Z3fooii, .-_Z3fooii
        .section        .note.GNU-stack,"",@progbits
tsecer@harry: 

bb拷贝栗子


tsecer@harry: cat bb.copy.cpp 
int foo(int x, int y)
{
    int a[20];
    extern int bar(int *);
    bar(a);
switch (x)
{
case 1: return y * 1;
case 2: return y * 2;
case 3 : return y *3;    
case 4: return y *4;
}
return x * 5;
}

tsecer@harry: gcc -O3 -S bb.copy.cpp
tsecer@harry: cat bb.copy.s 
        .file   "bb.copy.cpp"
        .text
        .p2align 4,,15
        .globl  _Z3fooii
        .type   _Z3fooii, @function
_Z3fooii:
.LFB0:
        .cfi_startproc
        pushq   %rbp
        .cfi_def_cfa_offset 16
        .cfi_offset 6, -16
        pushq   %rbx
        .cfi_def_cfa_offset 24
        .cfi_offset 3, -24
        movl    %edi, %ebp
        movl    %esi, %ebx
        subq    $88, %rsp
        .cfi_def_cfa_offset 112
        movq    %rsp, %rdi
        call    _Z3barPi
        cmpl    $2, %ebp
        je      .L3
        jle     .L13
        cmpl    $3, %ebp
        je      .L6
        cmpl    $4, %ebp
        leal    0(,%rbx,4), %eax
        jne     .L2
.L1:
        addq    $88, %rsp
        .cfi_remember_state
        .cfi_def_cfa_offset 24
        popq    %rbx
        .cfi_def_cfa_offset 16
        popq    %rbp
        .cfi_def_cfa_offset 8
        ret
        .p2align 4,,10
        .p2align 3
.L6:
        .cfi_restore_state
        addq    $88, %rsp
        .cfi_remember_state
        .cfi_def_cfa_offset 24
        leal    (%rbx,%rbx,2), %eax
        popq    %rbx
        .cfi_def_cfa_offset 16
        popq    %rbp
        .cfi_def_cfa_offset 8
        ret
        .p2align 4,,10
        .p2align 3
.L13:
        .cfi_restore_state
        cmpl    $1, %ebp
        movl    %ebx, %eax
        je      .L1
.L2:
        addq    $88, %rsp
        .cfi_remember_state
        .cfi_def_cfa_offset 24
        leal    0(%rbp,%rbp,4), %eax
        popq    %rbx
        .cfi_def_cfa_offset 16
        popq    %rbp
        .cfi_def_cfa_offset 8
        ret
        .p2align 4,,10
        .p2align 3
.L3:
        .cfi_restore_state
        addq    $88, %rsp
        .cfi_def_cfa_offset 24
        leal    (%rbx,%rbx), %eax
        popq    %rbx
        .cfi_def_cfa_offset 16
        popq    %rbp
        .cfi_def_cfa_offset 8
        ret
        .cfi_endproc
.LFE0:
        .size   _Z3fooii, .-_Z3fooii
        .section        .note.GNU-stack,"",@progbits
tsecer@harry: