菜鸟笔记
提升您的技术认知

tcp连接建立系列 — tcp选项解析-ag真人游戏

本文主要分析:在收到客户端的syn包时,服务器端是如何解析它所携带的tcp选项,并结合本端情况决定是否予以支持。

内核版本:3.6 

概述

 

收到客户端的syn包时,需要全面的解析它携带的tcp选项,这样我们就知道客户端支持哪些选项,如果本端也支持,

那么连接就支持这些tcp选项。这些信息在连接建立的过程中,是保存在连接请求块的(request_sock、inet_request_sock、

tcp_request_sock)。

 

函数调用路径:

tcp_v4_conn_request

        |--> tcp_parse_options

 

3.6版本linux内核支持的tcp选项包括:

nop,用于填充。

max segment size,最大分段大小。

window scaling,窗口扩大因子。

sack permit,是否允许使用sack。

sack,携带sack块。

timestamp,时间戳选项。

md5sig,md5签名。

cookie,cookie extension,2013年3月已从内核移除。

exp,experimental,处于实验阶段的选项,新版本用于fast open。

 

实现

 

全面解析数据包的tcp选项,并保存到指定的tcp_options_received实例中。

/* look for tcp options. normally only called on syn and synack packets.
 * but, this can also be called on packets in the established flow when the fast version below fails.
 */
void tcp_parse_options(const struct sk_buff *skb, struct tcp_options_received *opt_rx, const u8 **hvpp,
                       int estab, struct tcp_fastopen_cookie *foc)
{
    const unsigned char *ptr;
    const struct tcphdr *th = tcp_hdr(skb);
    int length = (th->doff * 4) - sizeof(struct tcphdr); /* 选项的总长度 */
    ptr = (const unsigned char *) (th   1); /* 选项的起始地址 */
    opt_rx->saw_tstamp = 0; /* saw timestamp on last packet */
    while(length > 0) {
        int opcode = *ptr  ; /* 选项kind */
        int opsize; /* 选项length */
        switch(opcode) {
        case tcpopt_eol: /* end of options */
            return; 
        case tcpopt_nop: /* padding,填充选项 */
            length--; /* 此选项只占一个字节 */
            continue;
        default:
            opsize = *ptr  ; /* 选项长度 */
            if (opsize < 2) /* silly options */
                return; /* 选项长度过小 */
            if (opsize > length)
                return; /* don't parse partial options */
            switch(opcode) {
            case tcpopt_mss: /* mss选项 */
                if (opsize == tcpolen_mss && th->syn && ! estab) {
                    u16 in_mss = get_unaligned_be16(ptr);
                    if (in_mss) {
                        /* 如果用户指定了mss,且比对端通告的小 */
                        if (opt_rx->user_mss && opt_rx->user_mss < in_mss)
                            in_mss = opt_rx->user_mss;
                        opt_rx->mss_clamp = in_mss; /* maximal mss */
                    }
                }
                break;
            case tcpopt_window: /* window scaling */
                if (opsize == tcpolen_window && th->syn && ! estab &&
                    sysctl_tcp_window_scaling) {
                    __u8 snd_wscale = *(__u8 *)ptr; /* 对端的接收窗口扩大因子 */
                    opt_rx->wscale_ok = 1; /* 连接使用窗口扩大选项 */
                    if (snd_wscale > 14) {
                        net_info_ratelimited("%s: illegal window scaling value %d >14 received\n",
                               __func__, snd_wscale);
                        snd_wscale = 14;
                    }
                    opt_rx->snd_wscale = snd_wscale; /* 保存对端的接收窗口扩大因子 */
                }
                break;
            case tcpopt_timestamp: /* better rtt estimations/paws */
                if ((opsize == tcpolen_timestamp) && ((estab && opt_rx->tstamp_ok) ||
                     (!estab && sysctl_tcp_timestamps))) {
                    opt_rx->saw_tstamp = 1; /* 连接支持timestamp选项 */
                    opt_rx->rcv_tsval = get_unaligned_be32(ptr); /* time stamp value */
                    opt_rx->rcv_tsecr = get_unaligned_be32(ptr   4); /* time stamp echo reply */
                }
                break;
            case tcpopt_sack_perm: /* sack permitted */
                if (opsize == tcpolen_sack_perm && th->syn && !estab && sysctl_tcp_sack) {
                    opt_rx->sack_ok = tcp_sack_seen; /* 连接支持sack选项 */
                    tcp_sack_reset(opt_rx); /* 清空dsack和num_sacks变量 */
                }
                break;
            case tcpopt_sack: /* sack block */
                if ((opsize >= (tcpolen_sack_base   tcpolen_sack_perblock)) &&
                    !((opsize - tcpolen_sack_base) % tcpolen_sack_perblock) && opt_rx->sack_ok) {
                    /* 保存sack选项的起始偏移地址 */
                    tcp_skb_cb(skb)->sacked = (ptr - 2) - (unsigned char *) th; 
                }
                break;
#ifdef config_tcp_md5sig
            case tcpopt_md5sig:
                /* the md5 hash has already been checked
                 * (see tcp_v{4, 6}_do_rcv()).
                 */
                break;
#endif
            /* 注意,在新版内核中此选项已被删除。cookie extension (experimental) */
            case tcpopt_cookie: 
                /* this option is variable length. */
                switch(opsize) {
                case tcpolen_cookie_base:
                    /* not yet implemented */
                    break;
                case tcpolen_cookie_pair:
                    /* not yet implemented */
                    break;
                case tcpolen_cookie_min   0:
                case tcpolen_cookie_min   2:
                case tcpolen_cookie_min   4:
                case tcpolen_cookie_min   6:
                case tcpolen_cookie_max:
                    /* 16-bit multple */
                    opt_rx->cookie_plus = opsize;
                    *hvpp = ptr;
                    break;
                default:
                    /* ignore option */
                    break;
                }
                break;
 
            case tcpopt_exp: /* experimental */
                /* fast open option shares code 254 using a 16 bits magic number.
                 * it's valid only in syn or syn-ack with an even size.
                 */
                if (opsize < tcpolen_exp_fastopen_base || get_unaligned_be16(ptr) != tcpopt_fastopen_magic
                    || foc == null || ! th->syn || (opsize & 1))
                    break;
                foc->len = opsize - tcpolen_exp_fastopen_base;
                if (foc->len >= tcp_fastopen_cookie_min && foc->len <= tcp_fastopen_cookie_max)
                    memcpy(foc->val, ptr   2, foc->len);
                else if (foc->len != 0)
                    foc->len = -1;
                break;
            }
            ptr  = opsize - 2;
            length -= opsize;
        }
    }
}
/* tcp options */
#define tcpopt_nop 1 /* padding */
#define tcpopt_eol 0 /* end of options */
#define tcpopt_mss 2 /* segment size negotiating */
#define tcpopt_window 3 /* window scaling */
#define tcpopt_timestamp 8 /* better rtt estimations/paws */
#define tcpopt_sack_perm 4 /* sack permitted */
#define tcpopt_sack 5 /* sack block */
#define tcpopt_md5sig 19 /* md5 signature (rfc2385) */
#define tcpopt_cookie 253 /* cookie extension (experimental) */
#define tcpopt_exp 254 /* experimental */
/* magic number to be after the option value for sharing tcp experimental options.
 * see draft-ietf-tcpm-experimental-options-00.txt
 */
#define tcpopt_fastopen_magic 0xf989
/* tcp option lengths */
#define tcpolen_mss 4
#define tcpolen_window 3
#define tcpolen_timestamp 10
#define tcpolen_sack_perm 2
#define tcpolen_cookie_base 2 /* cookie-less header extension */
#define tcpolen_cookie_pair 3 /* cookie pair header extension */
#define tcpolen_cookie_min (tcpolen_cookie_base   tcp_cookie_min)
#define tcpolen_cookie_max (tcpolen_cookie_base   tcp_cookie_max)
#define tcpolen_exp_fastopen_base 4
/* these are used to set the sack_ok field in struct tcp_options_received */
#define tcp_sack_seen (1 << 0) /* peer is sack capable */
#define tcp_fack_enabled (1 << 1) /* fack is enabled locally */
#define tcp_dsack_seen (1 << 2) /* dsack was received from peer */
/* for tcp_cookie_transactions (tcpct) socket option */
#define tcp_cookie_min 8 /* 64-bits */
#define tcp_cookie_max 16 /* 128-bits */
/* tcp fast open */
#define tcp_fastopen_cookie_min 4 /* min fast open cookie size in bytes */
#define tcp_fastopen_cookie_max 16 /* max fast open cookie size in bytes */
/* tcp fast open cookie as stored in memory */
struct tcp_fastopen_cookie {
    s8 len;
    u8 val[tcp_fastopen_cookie_max];
};
static inline void tcp_sack_reset(struct tcp_options_received *rx_opt)
{
    rx_opt->dsack = 0;
    rx_opt->num_sacks = 0;
}

 

用于判断连接是否使用ecn。要注意的是在建立连接时,要求ip报的ecn域不能被设置,否则就禁用ecn。

/* rfc3168: 6.1.1 syn packets must not have ect/ecn bits set.
 *
 * if we receive a syn packet with these bits set, it means a network is playing  bad games
 * with tos bits. in order to avoid possible false congestion notifications, we disable
 * tcp ecn negociation.
 */
static inline void tcp_ecn_create_request(struct request_sock *req, const struct sk_buff *skb)
{
    const struct tcphdr *th = tcp_hdr(skb);
    if (sysctl_tcp_ecn && th->ece && th->cwr && inet_ecn_is_not_ect(tcp_skb_cb(skb)->ip_dsfield))
        inet_rsk(req)->ecn_ok = 1; /* 连接支持ecn */
}

 

清零tcp选项。

static inline void tcp_clear_options(struct tcp_options_received *rx_opt)
{
    rx_opt->tstamp_ok = rx_opt->sack_ok = 0;
    rx_opt->wscale_ok = rx_opt->snd_wscale = 0;
    rx_opt->cookie_plus = 0;
}

 

网站地图