分享
 
 
 

Before main() 分析

王朝other·作者佚名  2006-11-24
窄屏简体版  字體: |||超大  

原创:alert7(alert7)

来源:http://www.xfocus.org/

Before main() 分析

作者:alert7

alert7@xfocus.org

>

主页: http://www.xfocus.org

时间: 2001-9-25

★ 前言

本文分析了在main()之前的ELF程序流程,试图让您更清楚的把握程序的流程的脉络走向。

从而更深入的了解ELF。不正确之处,还请斧正。

★ 综述

ELF的可执行文件与共享库在结构上非常类似,它们具有一张程序段表,用来描述这些段如何映射到进程空间.

对于可执行文件来说,段的加载位置是固定的,程序段表中如实反映了段的加载地址.对于共享库来说,段的加

载位置是浮动的,位置无关的,程序段表反映的是以0作为基准地址的相对加载地址.尽管共享库的连接是不

充分的,为了便于测试动态链接器,Linux允许直接加载共享库运行.如果应用程序具有动态链接器的描述段,

内核在完成程序段加载后,紧接着加载动态链接器,并且启动动态链接器的入口.如果没有动态链接器的描述段,

就直接交给用户程序入口。

上述这部分请参考:linuxforum论坛上opera写的《分析ELF的加载过程》

在控制权交给动态链接器的入口后,首先调用_dl_start函数获得真实的程序入口(注:该入口地址

不是main的地址,也就是说一般程序的入口不是main),然后循环调用每个共享object的初始化函数,

接着跳转到真实的程序入口,一般为_start(程序中的_start)的一个例程,该例程压入一些参数到堆栈,

就直接调用__libc_start_main函数。在__libc_start_main函数中替动态连接器和自己程序安排

destructor,并运行程序的初始化函数。然后才把控制权交给main()函数。

★ main()之前流程

下面就是动态链接器的入口。

/* Initial entry point code for the dynamic linker.

The C function `_dl_start' is the real entry point;

its return value is the user program's entry point. */

#define RTLD_START asm ("\

.text\n\

.globl _start\n\

.globl _dl_start_user\n\

_start:\n\

pushl %esp\n\

call _dl_start\n\/*该函数返回时候,%eax中存放着user entry point address*/

popl %ebx\n\/*%ebx放着是esp的内容*/

_dl_start_user:\n\

# Save the user entry point address in %edi.\n\

movl %eax, %edi\n\/*入口地址放在%edi*/

# Point %ebx at the GOT.

call 0f\n\

0: popl %ebx\n\

addl $_GLOBAL_OFFSET_TABLE_+[.-0b], %ebx\n\

# Store the highest stack address\n\

movl __libc_stack_end@GOT(%ebx), %eax\n\

movl %esp, (%eax)\n\/*把栈顶%esp放到GOT的__libc_stack_end中*/

# See if we were run as a command with the executable file\n\

# name as an extra leading argument.\n\

movl _dl_skip_args@GOT(%ebx), %eax\n\

movl (%eax), %eax\n\

# Pop the original argument count.\n\

popl %ecx\n\

# Subtract _dl_skip_args from it.\n\

subl %eax, %ecx\n\

# Adjust the stack pointer to skip _dl_skip_args words.\n\

leal (%esp,%eax,4), %esp\n\

# Push back the modified argument count.\n\

pushl %ecx\n\

# Push the searchlist of the main object as argument in\n\

# _dl_init_next call below.\n\

movl _dl_main_searchlist@GOT(%ebx), %eax\n\

movl (%eax), %esi\n\

0: movl %esi,%eax\n\

# Call _dl_init_next to return the address of an initializer\n\

# function to run.\n\

call _dl_init_next@PLT\n\/*该函数返回初始化函数的地址,返回地址放在%eax中*/

# Check for zero return, when out of initializers.\n\

testl %eax, %eax\n\

jz 1f\n\

# Call the shared object initializer function.\n\

# NOTE: We depend only on the registers (%ebx, %esi and %edi)\n\

# and the return address pushed by this call;\n\

# the initializer is called with the stack just\n\

# as it appears on entry, and it is free to move\n\

# the stack around, as long as it winds up jumping to\n\

# the return address on the top of the stack.\n\

call *%eax\n\/*调用共享object初始化函数*/

# Loop to call _dl_init_next for the next initializer.\n\

jmp 0b\n\

1: # Clear the startup flag.\n\

movl _dl_starting_up@GOT(%ebx), %eax\n\

movl $0, (%eax)\n\

# Pass our finalizer function to the user in %edx, as per ELF ABI.\n\

movl _dl_fini@GOT(%ebx), %edx\n\

# Jump to the user's entry point.\n\

jmp *%edi\n\

.previous\n\

");

sysdeps\i386\start.s中

user's entry也就是下面的_start例程

/* This is the canonical entry point, usually the first thing in the text

segment. The SVR4/i386 ABI (pages 3-31, 3-32) says that when the entry

point runs, most registers' values are unspecified, except for:

%edx Contains a function pointer to be registered with `atexit'.

This is how the dynamic linker arranges to have DT_FINI

functions called for shared libraries that have been loaded

before this code runs.

%esp The stack contains the arguments and environment:

0(%esp) argc

4(%esp) argv[0]

...

(4*argc)(%esp) NULL

(4*(argc+1))(%esp) envp[0]

...

NULL

*/

.text

.globl _start

_start:

/* Clear the frame pointer. The ABI suggests this be done, to mark

the outermost frame obviously. */

xorl %ebp, %ebp

/* Extract the arguments as encoded on the stack and set up

the arguments for `main': argc, argv. envp will be determined

later in __libc_start_main. */

popl %esi /* Pop the argument count. */

movl %esp, %ecx /* argv starts just at the current stack top.*/

/* Before pushing the arguments align the stack to a double word

boundary to avoid penalties from misaligned accesses. Thanks

to Edward Seidl for pointing this out. */

andl $0xfffffff8, %esp

pushl %eax /* Push garbage because we allocate

28 more bytes. */

/* Provide the highest stack address to the user code (for stacks

which grow downwards). */

pushl %esp

pushl %edx /* Push address of the shared library

termination function. */

/* Push address of our own entry points to .fini and .init. */

pushl $_fini

pushl $_init

pushl %ecx /* Push second argument: argv. */

pushl %esi /* Push first argument: argc. */

pushl $main

/* Call the user's main function, and exit with its value.

But let the libc call main. */

call __libc_start_main

hlt /* Crash if somehow `exit' does return. */

__libc_start_main在sysdeps\generic\libc_start.c中

假设定义的是PIC的代码。

struct startup_info

{

void *sda_base;

int (*main) (int, char **, char **, void *);

int (*init) (int, char **, char **, void *);

void (*fini) (void);

};

int

__libc_start_main (int argc, char **argv, char **envp,

void *auxvec, void (*rtld_fini) (void),

struct startup_info *stinfo,

char **stack_on_entry)

{

/* the PPC SVR4 ABI says that the top thing on the stack will

be a NULL pointer, so if not we assume that we're being called

as a statically-linked program by Linux... */

if (*stack_on_entry != NULL)

{

/* ...in which case, we have argc as the top thing on the

stack, followed by argv (NULL-terminated), envp (likewise),

and the auxilary vector. */

argc = *(int *) stack_on_entry;

argv = stack_on_entry + 1;

envp = argv + argc + 1;

auxvec = envp;

while (*(char **) auxvec != NULL)

++auxvec;

++auxvec;

rtld_fini = NULL;

}

/* Store something that has some relationship to the end of the

stack, for backtraces. This variable should be thread-specific. */

__libc_stack_end = stack_on_entry + 4;

/* Set the global _environ variable correctly. */

__environ = envp;

/* Register the destructor of the dynamic linker if there is any. */

if (rtld_fini != NULL)

atexit (rtld_fini);/*替动态连接器安排destructor*/

/* Call the initializer of the libc. */

__libc_init_first (argc, argv, envp);/*一个空函数*/

/* Register the destructor of the program, if any. */

if (stinfo->fini)

atexit (stinfo->fini);/*安排程序自己的destructor*/

/* Call the initializer of the program, if any. */

/*运行程序的初始化函数*/

if (stinfo->init)

stinfo->init (argc, argv, __environ, auxvec);

/*运行程序main函数,到此,控制权才交给我们一般所说的程序入口*/

exit (stinfo->main (argc, argv, __environ, auxvec));

}

void

__libc_init_first (int argc __attribute__ ((unused)), ...)

{

}

int

atexit (void (*func) (void))

{

struct exit_function *new = __new_exitfn ();

if (new == NULL)

return -1;

new->flavor = ef_at;

new->func.at = func;

return 0;

}

/* Run initializers for MAP and its dependencies, in inverse dependency

order (that is, leaf nodes first). */

ElfW(Addr)

internal_function

_dl_init_next (struct r_scope_elem *searchlist)

{

unsigned int i;

/* The search list for symbol lookup is a flat list in top-down

dependency order, so processing that list from back to front gets us

breadth-first leaf-to-root order. */

i = searchlist->r_nlist;

while (i-- > 0)

{

struct link_map *l = searchlist->r_list[i];

if (l->l_init_called)

/* This object is all done. */

continue;

if (l->l_init_running)

{

/* This object's initializer was just running.

Now mark it as having run, so this object

will be skipped in the future. */

l->l_init_running = 0;

l->l_init_called = 1;

continue;

}

if (l->l_info[DT_INIT]

&& (l->l_name[0] != '\0' || l->l_type != lt_executable))

{

/* Run this object's initializer. */

l->l_init_running = 1;

/* Print a debug message if wanted. */

if (_dl_debug_impcalls)

_dl_debug_message (1, "\ncalling init: ",

l->l_name[0] ? l->l_name : _dl_argv[0],

"\n\n", NULL);

/*共享库的基地址+init在基地址中的偏移量*/

return l->l_addr + l->l_info[DT_INIT]->d_un.d_ptr;

}

/* No initializer for this object.

Mark it so we will skip it in the future. */

l->l_init_called = 1;

}

/* Notify the debugger all new objects are now ready to go. */

_r_debug.r_state = RT_CONSISTENT;

_dl_debug_state ();

return 0;

}

在main()之前的程序流程看试有点简单,但正在运行的时候还是比较复杂的

(自己用GBD跟踪下就知道了),因为一般的程序都需要涉及到PLT,GOT标号的

重定位。弄清楚这个对ELF由为重要,以后有机会再补上一篇吧。

★ 手动确定程序和动态连接器的入口

[alert7@redhat62 alert7]$ cat helo.c

#include

int main(int argc,char **argv)

{

printf("

[1] [2] 下一页

 
 
 
免责声明:本文为网络用户发布,其观点仅代表作者个人观点,与本站无关,本站仅提供信息存储服务。文中陈述内容未经本站证实,其真实性、完整性、及时性本站不作任何保证或承诺,请读者仅作参考,并请自行核实相关内容。
2023年上半年GDP全球前十五强
 百态   2023-10-24
美众议院议长启动对拜登的弹劾调查
 百态   2023-09-13
上海、济南、武汉等多地出现不明坠落物
 探索   2023-09-06
印度或要将国名改为“巴拉特”
 百态   2023-09-06
男子为女友送行,买票不登机被捕
 百态   2023-08-20
手机地震预警功能怎么开?
 干货   2023-08-06
女子4年卖2套房花700多万做美容:不但没变美脸,面部还出现变形
 百态   2023-08-04
住户一楼被水淹 还冲来8头猪
 百态   2023-07-31
女子体内爬出大量瓜子状活虫
 百态   2023-07-25
地球连续35年收到神秘规律性信号,网友:不要回答!
 探索   2023-07-21
全球镓价格本周大涨27%
 探索   2023-07-09
钱都流向了那些不缺钱的人,苦都留给了能吃苦的人
 探索   2023-07-02
倩女手游刀客魅者强控制(强混乱强眩晕强睡眠)和对应控制抗性的关系
 百态   2020-08-20
美国5月9日最新疫情:美国确诊人数突破131万
 百态   2020-05-09
荷兰政府宣布将集体辞职
 干货   2020-04-30
倩女幽魂手游师徒任务情义春秋猜成语答案逍遥观:鹏程万里
 干货   2019-11-12
倩女幽魂手游师徒任务情义春秋猜成语答案神机营:射石饮羽
 干货   2019-11-12
倩女幽魂手游师徒任务情义春秋猜成语答案昆仑山:拔刀相助
 干货   2019-11-12
倩女幽魂手游师徒任务情义春秋猜成语答案天工阁:鬼斧神工
 干货   2019-11-12
倩女幽魂手游师徒任务情义春秋猜成语答案丝路古道:单枪匹马
 干货   2019-11-12
倩女幽魂手游师徒任务情义春秋猜成语答案镇郊荒野:与虎谋皮
 干货   2019-11-12
倩女幽魂手游师徒任务情义春秋猜成语答案镇郊荒野:李代桃僵
 干货   2019-11-12
倩女幽魂手游师徒任务情义春秋猜成语答案镇郊荒野:指鹿为马
 干货   2019-11-12
倩女幽魂手游师徒任务情义春秋猜成语答案金陵:小鸟依人
 干货   2019-11-12
倩女幽魂手游师徒任务情义春秋猜成语答案金陵:千金买邻
 干货   2019-11-12
 
推荐阅读
 
 
 
>>返回首頁<<
 
靜靜地坐在廢墟上,四周的荒凉一望無際,忽然覺得,淒涼也很美
© 2005- 王朝網路 版權所有