现在的位置: 首页 > 综合 > 正文

linux网络协议栈分析笔记8-arp邻居子系统1

2013年10月04日 ⁄ 综合 ⁄ 共 11238字 ⁄ 字号 评论关闭
我们进入arp协议,我们看看linux中号称邻居子系统是啥样的
static struct packet_type arp_packet_type __read_mostly = {
     .type =     cpu_to_be16(ETH_P_ARP),   
     .func =     arp_rcv,
};
还记得那个netif_receive_skb吧,选择三层协议处理的地方,arp通过dev_add_pack注册了上述的packet_type结构
#define ETH_P_ARP     0x0806          /* Address Resolution packet     */

void __init arp_init(void)
{
     neigh_table_init(&arp_tbl);               初始化arp邻居表

     dev_add_pack(&arp_packet_type);   注册三层处理协议类型 arp
     arp_proc_init();                     /proc文件系统相关arp部分创建
#ifdef CONFIG_SYSCTL
     neigh_sysctl_register(NULL, &arp_tbl.parms, NET_IPV4,
                     NET_IPV4_NEIGH, "ipv4", NULL, NULL);
#endif
     register_netdevice_notifier(&arp_netdev_notifier);        注册通知链
}

neigh_table_init(&arp_tbl);        其实就是把arp_tbl挂在全局的neigh_tables变量上
          for (tmp = neigh_tables; tmp; tmp = tmp->next) {
          if (tmp->family == tbl->family)        用协议族在区分  arp类型为AF_INET   IPV6的ND协议为AF_INET6
               break;
     }
     tbl->next     = neigh_tables; 
     neigh_tables     = tbl; 

重要数据结构:
struct neigh_table
{
     struct neigh_table     *next;
     int               family;                     协议族          
     int               entry_size;             邻居项的大小    为sizeof(struct neighbour) + 4  考虑到有零长数组
     int               key_len;                   hash关键字的长度 为4
     __u32               (*hash)(const void *pkey, const struct net_device *);         hash函数
     int               (*constructor)(struct neighbour *);
     int               (*pconstructor)(struct pneigh_entry *);
     void               (*pdestructor)(struct pneigh_entry *);
     void               (*proxy_redo)(struct sk_buff *skb);
     char               *id;
     struct neigh_parms     parms;
     /* HACK. gc_* shoul follow parms without a gap! */
     int               gc_interval;               常规的垃圾回收间隔时间  默认30秒
     int               gc_thresh1;               门限
     int               gc_thresh2;
     int               gc_thresh3;                    
     unsigned long          last_flush;
     struct delayed_work     gc_work;            常规的垃圾回收定时器
     struct timer_list      proxy_timer;
     struct sk_buff_head     proxy_queue;
     atomic_t          entries;      整个表中邻居项的个数  当entries大于hash_mask+1,哈希桶增长为原来的两倍
     rwlock_t          lock;
     unsigned long          last_rand;
     struct kmem_cache          *kmem_cachep;
     struct neigh_statistics     *stats;
     struct neighbour     **hash_buckets;               存放邻居项的哈希桶
     unsigned int          hash_mask;                        hash桶大小的掩码
     __u32               hash_rnd;
     struct pneigh_entry     **phash_buckets;          用于代理arp的邻居哈希表
};
struct neigh_table arp_tbl = {             arp的邻居表项
     .family =     AF_INET,
     .entry_size =     sizeof(struct neighbour) + 4,
     .key_len =     4,
     .hash =          arp_hash,
     .constructor =     arp_constructor,

     .proxy_redo =     parp_redo,
     .id =          "arp_cache",
     .parms = {
          .tbl =               &arp_tbl,
          .base_reachable_time =     30 * HZ,
          .retrans_time =     1 * HZ,
          .gc_staletime =     60 * HZ,
          .reachable_time =          30 * HZ,
          .delay_probe_time =     5 * HZ,
          .queue_len =          3,
          .ucast_probes =     3,
          .mcast_probes =     3,
          .anycast_delay =     1 * HZ,
          .proxy_delay =          (8 * HZ) / 10,
          .proxy_qlen =          64,
          .locktime =          1 * HZ,
     },
     .gc_interval =     30 * HZ,
     .gc_thresh1 =     128,
     .gc_thresh2 =     512,
     .gc_thresh3 =     1024,
};

struct neighbour
{
     struct neighbour     *next;
     struct neigh_table     *tbl;
     struct neigh_parms     *parms;
     struct net_device          *dev;
     unsigned long          used;
     unsigned long          confirmed;
     unsigned long          updated;
     __u8               flags;
     __u8               nud_state;                          邻居项的对象
     __u8               type;
     __u8               dead;                  存活标志,如果dead为1,那么垃圾回收函数会将此项删除
     atomic_t          probes;              重发arp请求的次数
     rwlock_t          lock;
     unsigned char          ha[ALIGN(MAX_ADDR_LEN, sizeof(unsigned long))];
     struct hh_cache          *hh;
     atomic_t          refcnt;
     int               (*output)(struct sk_buff *skb);
     struct sk_buff_head     arp_queue;
     struct timer_list     timer;                       定时器
     const struct neigh_ops     *ops;      邻居项操作函数
     u8     primary_key[0];  哈希关键字 这是个零长数组空间大小在分配时+4,就是那个4 其实就是ip地址长度
.entry_size =     sizeof(struct neighbour) + 4,  见struct neigh_table arp_tbl ={} 
后面在函数的分析过程中会讲到这些数据结构是如何组织的,以及其功能

分析入口:查找到路由后,会调用arp_bind_neighbour绑定一个邻居项
int arp_bind_neighbour(struct dst_entry *dst)      参数是个dst路由表项,路由的东西我们暂时放一边,后续我们会详细分析 姑且认为就是个表项
{
     struct net_device *dev = dst->dev;
     struct neighbour *n = dst->neighbour;       取得路由项banding的邻居项
     if (dev == NULL)
          return -EINVAL;
     if (n == NULL) {
          __be32 nexthop = ((struct rtable *)dst)->rt_gateway;  取下一跳  路由就是找下一跳
          if (dev->flags&(IFF_LOOPBACK|IFF_POINTOPOINT))
               nexthop = 0;
          n = __neigh_lookup_errno( &arp_tbl, &nexthop, dev);    找下一跳对应的邻居项
          if (IS_ERR(n))
               return PTR_ERR(n);            
          dst->neighbour = n;                找到了就赋值
     }
     return 0;
}

static inline struct neighbour *__neigh_lookup_errno(struct neigh_table *tbl, const void *pkey,
  struct net_device *dev)           可以看到pkey就是ip地址,环回接口和点对点结构就是0
{
     struct neighbour *n = neigh_lookup(tbl, pkey, dev);  找ip对应的邻居项
     if (n)
          return n;
     return neigh_create(tbl, pkey, dev);         创建新的邻居项
}

struct neighbour *neigh_lookup(struct neigh_table *tbl, const void *pkey,
                      struct net_device *dev)
{
     struct neighbour *n;
     int key_len = tbl->key_len;        为4,ip地址长度
     u32 hash_val;
     NEIGH_CACHE_STAT_INC(tbl, lookups);
     read_lock_bh(&tbl->lock);
     hash_val = tbl->hash(pkey, dev);          利用注册的arphash函数进行key的计算         arp_hash()
     for (n = tbl->hash_buckets[hash_val & tbl->hash_mask]; n; n = n->next) {          在hash桶里匹配
          if (dev == n->dev && !memcmp(n->primary_key, pkey, key_len)) {
               neigh_hold(n);
               NEIGH_CACHE_STAT_INC(tbl, hits);
               break;
          }
     }
     read_unlock_bh(&tbl->lock);
     return n;
}

struct neighbour *neigh_create(struct neigh_table *tbl, const void *pkey,
                      struct net_device *dev)
{
     u32 hash_val;
     int key_len = tbl->key_len;
     int error;
     struct neighbour *n1, *rc, *n = neigh_alloc(tbl);            申请一个邻居项结构
     if (!n) {
          rc = ERR_PTR(-ENOBUFS);
          goto out;
     }
     memcpy(n->primary_key, pkey, key_len);        哈希键值就是目的ip地址
     n->dev = dev;
     dev_hold(dev);
     /* Protocol specific setup. */
     if (tbl->constructor &&     (error = tbl->constructor(n)) < 0) {    有构造函数就调用构造函数 arp_constructor()
          rc = ERR_PTR(error);
          goto out_neigh_release;
     }
     /* Device specific setup. */
     if (n->parms->neigh_setup &&
         (error = n->parms->neigh_setup(n)) < 0) {         有setup就调用setup 
          rc = ERR_PTR(error);
          goto out_neigh_release;
     }
     n->confirmed = jiffies - (n->parms->base_reachable_time << 1);
     write_lock_bh(&tbl->lock);
     if (atomic_read(&tbl->entries) > (tbl->hash_mask + 1))      如果总项数超过hash_mask 就增加hash表
          neigh_hash_grow(tbl, (tbl->hash_mask + 1) << 1);

     hash_val = tbl->hash(pkey, dev) & tbl->hash_mask;
     if (n->parms->dead) {
          rc = ERR_PTR(-EINVAL);
          goto out_tbl_unlock;
     }
     for (n1 = tbl->hash_buckets[hash_val]; n1; n1 = n1->next) {          插入hash表,有可能已经存在
          if (dev == n1->dev && !memcmp(n1->primary_key, pkey, key_len)) {
               neigh_hold(n1);
               rc = n1;
               goto out_tbl_unlock;
          }
     }
     n->next = tbl->hash_buckets[hash_val];
     tbl->hash_buckets[hash_val] = n;
     n->dead = 0;
     neigh_hold(n);
     write_unlock_bh(&tbl->lock);
     NEIGH_PRINTK2("neigh %p is created.\n", n);
     rc = n;
out:
     return rc;
out_tbl_unlock:
     write_unlock_bh(&tbl->lock);
out_neigh_release:
     neigh_release(n);
     goto out;
}

static struct neighbour *neigh_alloc(struct neigh_table *tbl)
{
     struct neighbour *n = NULL;
     unsigned long now = jiffies;
     int entries;
     entries = atomic_inc_return(&tbl->entries) - 1;        将邻居项数+1
     if (entries >= tbl->gc_thresh3 ||
         (entries >= tbl->gc_thresh2 &&
          time_after(now, tbl->last_flush + 5 * HZ))) {
          if (!neigh_forced_gc(tbl) &&
              entries >= tbl->gc_thresh3)                    超过一定的门限值
               goto out_entries;
     }
     n = kmem_cache_zalloc(tbl->kmem_cachep, GFP_ATOMIC);   从高速缓存中申请一个邻居项
     if (!n)
          goto out_entries;
     skb_queue_head_init(&n->arp_queue);
     rwlock_init(&n->lock);
     n->updated       = n->used = now;
     n->nud_state       = NUD_NONE;               初始化状态
     n->output       = neigh_blackhole;
     n->parms       = neigh_parms_clone(&tbl->parms);
     setup_timer(&n->timer, neigh_timer_handler, (unsigned long)n);  定时器
     NEIGH_CACHE_STAT_INC(tbl, allocs);
     n->tbl            = tbl;
     atomic_set(&n->refcnt, 1);
     n->dead            = 1;
out:
     return n;
out_entries:
     atomic_dec(&tbl->entries);
     goto out;
}

static int arp_constructor(struct neighbour *neigh)
{
     __be32 addr = *(__be32*)neigh->primary_key;
     struct net_device *dev = neigh->dev;
     struct in_device *in_dev;
     struct neigh_parms *parms;

     rcu_read_lock();
     in_dev = __in_dev_get_rcu(dev);
     if (in_dev == NULL) {
          rcu_read_unlock();
          return -EINVAL;
     }

     neigh->type = inet_addr_type(dev_net(dev), addr);

     parms = in_dev->arp_parms;
     __neigh_parms_put(neigh->parms);
     neigh->parms = neigh_parms_clone(parms);
     rcu_read_unlock();

     if (!dev->header_ops) {              无二层头操作的, 给予一套arp_direct_ops操作集
          neigh->nud_state = NUD_NOARP;
          neigh->ops = &arp_direct_ops;
          neigh->output = neigh->ops->queue_xmit;
     } else {

#if 1
          /* So... these "amateur" devices are hopeless.
             The only thing, that I can say now:
             It is very sad that we need to keep ugly obsolete
             code to make them happy.

             They should be moved to more reasonable state, now
             they use rebuild_header INSTEAD OF hard_start_xmit!!!
             Besides that, they are sort of out of date
             (a lot of redundant clones/copies, useless in 2.1),
             I wonder why people believe that they work.
          */
          switch (dev->type) {
          default:
               break;
          case ARPHRD_ROSE:
#if defined(CONFIG_AX25) || defined(CONFIG_AX25_MODULE)
          case ARPHRD_AX25:
#if defined(CONFIG_NETROM) || defined(CONFIG_NETROM_MODULE)
          case ARPHRD_NETROM:
#endif
               neigh->ops = &arp_broken_ops;
               neigh->output = neigh->ops->output;
               return 0;
#endif
          ;}
#endif
          if (neigh->type == RTN_MULTICAST) {        根据不同二层协议类型,给予不同的操作集
               neigh->nud_state = NUD_NOARP;
               arp_mc_map(addr, neigh->ha, dev, 1);
          } else if (dev->flags&(IFF_NOARP|IFF_LOOPBACK)) {
               neigh->nud_state = NUD_NOARP;
               memcpy(neigh->ha, dev->dev_addr, dev->addr_len);
          } else if (neigh->type == RTN_BROADCAST || dev->flags&IFF_POINTOPOINT) {
               neigh->nud_state = NUD_NOARP;
               memcpy(neigh->ha, dev->broadcast, dev->addr_len);
          }
          if (dev->header_ops->cache)               //ether_setup函数中dev->header_ops = &eth_header_ops;  
               neigh->ops = &arp_hh_ops;          .cache          = eth_header_cache,  所以走这里
          else
               neigh->ops = &arp_generic_ops;

          if (neigh->nud_state&NUD_VALID)
               neigh->output = neigh->ops->connected_output;
          else
               neigh->output = neigh->ops->output;                 我们的初始状态是NUD_NONE 所以走这里
     }
     return 0;
}


static const struct neigh_ops arp_hh_ops = {
     .family =          AF_INET,
     .solicit =          arp_solicit,
     .error_report =          arp_error_report,
     .output =          neigh_resolve_output,
     .connected_output =     neigh_resolve_output,
     .hh_output =          dev_queue_xmit,
     .queue_xmit =          dev_queue_xmit,
};

最后来个图对这段分析的总结:    没有ULNI中画的好,只是为了说明结构组织   
每个邻居协议都有个table结构,IPV4的arp_table     IPV6的nd_tbl            
对于每个发送报文查到路由后都会对目的ip进行邻居查找,因此对每个目的ip都有个邻居项来保存信息
邻居项以hash表的形式进行在邻居表中保存
上面的分析仅仅对邻居项的创建进行说明,而整个子系统的东西还有很多很多...

抱歉!评论已关闭.