1.新建socket
函数原形:
static int inet_create(struct socket *sock, int protocol)
在net/ipv4/af_inet.c中
详细解释
static int inet_create(struct socket *sock, int protocol)
{
struct sock *sk;
struct proto *prot;
sock-state = SS_UNCONNECTED;
/* 设置状态为未连接 */
sk = sk_alloc(PF_INET, GFP_KERNEL, 1); /* 申请sock所需的内存 */
/* net/core/sock.c */
if (sk == NULL)
goto do_oom;
switch (sock-type) {
case SOCK_STREAM:
/* TCP协议 */
if (protocol && protocol != IPPROTO_TCP)
goto free_and_noproto;
protocol = IPPROTO_TCP;
prot = &tcp_prot;
/* tcp_prot定义在net/ipv4/tcp_ipv4.c */
sock-ops = &inet_stream_ops; /* 针对STREAM的socket操作 */
break;
case SOCK_SEQPACKET:
/* 不支持 */
goto free_and_badtype;
case SOCK_DGRAM:
/* UDP协议 */
if (protocol && protocol != IPPROTO_UDP)
goto free_and_noproto;
protocol = IPPROTO_UDP;
sk-no_check = UDP_CSUM_DEFAULT;
prot=&udp_prot;
/* udp_prot定义在net/ipv4/udp.c */
sock-ops = &inet_dgram_ops; /* 针对DGRAM的socket操作 */
break;
case SOCK_RAW:
/* RAW */
if (!capable(CAP_NET_RAW)) /* 判断是否有权利建立SOCK_RAW */
goto free_and_badperm;
if (!protocol)
/* protocol不能为0 */
goto free_and_noproto;
prot = &raw_prot;
/* raw_prot定义在net/ipv4/raw.c */
sk-reuse = 1;
/* 允许地址重用 */
sk-num = protocol;
sock-ops = &inet_dgram_ops; /* RAW的一些特性和DGRAM相同 */
if (protocol == IPPROTO_RAW)
sk-protinfo.af_inet.hdrincl = 1;
/* 允许自己定制ip头 */
break;
default:
goto free_and_badtype;
}
if (ipv4_config.no_pmtu_disc)
sk-protinfo.af_inet.pmtudisc = IP_PMTUDISC_DONT;
else
sk-protinfo.af_inet.pmtudisc = IP_PMTUDISC_WANT;
sk-protinfo.af_inet.id = 0;
sock_init_data(sock,sk);
/* 初始化一些数据 */
/* net/core/sock.c */
sk-destruct = inet_sock_destruct; /* 当销毁socket时调用inet_sock_destruct */
sk-zapped = 0;
sk-family = PF_INET;
sk-protocol = protocol;
sk-prot = prot;
sk-backlog_rcv = prot-backlog_rcv; /* prot-backlog_rcv()见各个类型的定义 */
sk-protinfo.af_inet.ttl = sysctl_ip_default_ttl; /* 设置默认ttl */
/* 修改/proc/sys/net/ipv4/ip_default_ttl */
sk-protinfo.af_inet.mc_loop = 1;
sk-protinfo.af_inet.mc_ttl = 1;
sk-protinfo.af_inet.mc_index = 0;
sk-protinfo.af_inet.mc_list = NULL;
#ifdef INET_REFCNT_DEBUG
atomic_inc(&inet_sock_nr);
#endif
if (sk-num) {
/* It assumes that any protocol which allows
* the user to assign a number at socket
* creation time automatically
* shares.
*/
sk-sport = htons(sk-num); /* 设置本地端口 */
/* Add to protocol hash chains. */
sk-prot-hash(sk);
}
if (sk-prot-init) {
int err = sk-prot-init(sk); /* 协议对socket的初始化 */
if (err != 0) {
inet_sock_release(sk);
return(err);
}
}
return(0);
free_and_badtype:
sk_free(sk);
/* 释放内存 */
return -ESOCKTNOSUPPORT;
free_and_badperm:
sk_free(sk);
return -EPERM;
free_and_noproto:
sk_free(sk);
return -EPROTONOSUPPORT;
do_oom:
return -ENOBUFS;
}
在net/core/sock.c
void sock_init_data(struct socket *sock, struct sock *sk)
{
skb_queue_head_init(&sk-receive_queue); /* 初始化3条队列 接受,发送,错误*/
skb_queue_head_init(&sk-write_queue);
skb_queue_head_init(&sk-error_queue);
init_timer(&sk-timer);
/* 初始化timer */
sk-allocation = GFP_KERNEL;
sk-rcvbuf = sysctl_rmem_default;
sk-sndbuf = sysctl_wmem_default;
sk-state = TCP_CLOSE;
sk-zapped = 1;
sk-socket = sock;
if(sock)
{
sk-type = sock-type;
sk-sleep = &sock-wait;
sock-sk = sk;
} else
sk-sleep = NULL;
sk-dst_lock
= RW_LOCK_UNLOCKED;
sk-callback_lock = RW_LOCK_UNLOCKED;
/* sock_def_wakeup(),sock_def_readable(),
sock_def_write_space(),sock_def_error_report(),
sock_def_destruct() 在net/core/sock.c */
sk-state_change = sock_def_wakeup;
sk-data_ready
= sock_def_readable;
sk-write_space
= sock_def_write_space;
sk-error_report = sock_def_error_report;
sk-destruct
=
sock_def_destruct;
sk-peercred.pid = 0;
sk-peercred.uid = -1;
sk-peercred.gid = -1;
sk-rcvlowat
= 1;
sk-rcvtimeo
= MAX_SCHEDULE_TIMEOUT; /* 设置接受,发送超时 */
sk-sndtimeo
= MAX_SCHEDULE_TIMEOUT;
atomic_set(&sk-refcnt, 1);
}
1.1 SOCK_STREAM的初始化
在net/ipv4/tcp_ipv4.c
static int tcp_v4_init_sock(struct sock *sk)
{
struct tcp_opt *tp = &(sk-tp_pinfo.af_tcp);
skb_queue_head_init(&tp-out_of_order_queue);
tcp_init_xmit_timers(sk);
tcp_prequeue_init(tp);
tp-rto
= TCP_TIMEOUT_INIT;
tp-mdev = TCP_TIMEOUT_INIT;
/* So many TCP implementations out there (incorrectly) count the
* initial SYN frame in their delayed-ACK and congestion control
* algorithms that we must have the following bandaid to talk
* efficiently to them.
-DaveM
*/
tp-snd_cwnd = 2;
/* See draft-stevens-tcpca-spec-01 for discussion of the
* initialization of these values.
*/
tp-snd_ssthresh = 0x7fffffff; /* Infinity */
tp-snd_cwnd_clamp = ~0;
tp-mss_cache = 536;
tp-reordering = sysctl_tcp_reordering;
sk-state = TCP_CLOSE;
sk-write_space = tcp_write_space; /* tcp_write_space() 在net/ipv4/tcp.c */
sk-use_write_queue = 1;
sk-tp_pinfo.af_tcp.af_specific = &ipv4_specific;
/* ipv4_specific 在net/ipv4/tcp_ipv4.c */
sk-sndbuf = sysctl_tcp_wmem[1]; /* 设置发送和接收缓冲区大小 */
sk-rcvbuf = sysctl_tcp_rmem[1]; /* sysctl_tcp_* 在net/ipv4/tcp.c */
atomic_inc(&tcp_sockets_allocated); /* tcp_sockets_allocated是当前TCP socket的数量 */
return 0;
}
SOCK_DGRAM无初始化
1.2 SOCK_RAW初始化
在net/ipv4/raw.c
static int raw_init(struct sock *sk)
{
struct raw_opt *tp = &(sk-tp_pinfo.tp_raw4);
if (sk-num == IPPROTO_ICMP)
memset(&tp-filter, 0, sizeof(tp-filter));
return 0;
}
2.Server
2.1 bind
static int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
{
struct sockaddr_in *addr=(struct sockaddr_in *)uaddr;
struct sock *sk=sock-sk;
unsigned short snum;
int chk_addr_ret;
int err;
/* If the socket has its own bind function then use it. (RAW) */
if(sk-prot-bind)
return sk-prot-bind(sk, uaddr, addr_len);
/* 只有SOCK_RAW定义了自己的bind函数 */
if (addr_len
return -EINVAL;
chk_addr_ret = inet_addr_type(addr-sin_addr.s_addr);
/* inet_addr_type返回地址的类型 */
/* 在net/ipv4/fib_frontend.c */
/* Not specified by any standard per-se, however it breaks too
* many applications when removed.
It is unfortunate since
* allowing applications to make a non-local bind solv