{
struct dst_entry *dst = skb_dst(skb);
struct neighbour *neigh;
int rc = 0;
if (!dst || !(neigh = dst->neighbour)) 异常退出
goto discard;
__skb_pull(skb, skb_network_offset(skb));
if (!neigh_event_send(neigh, skb)) { 判断邻居项是否有可用状态,如果可用,则把数据包发送出去
int err;
struct net_device *dev = neigh->dev;
if (dev->header_ops->cache && !dst->hh) {
write_lock_bh(&neigh->lock);
if (!dst->hh)
neigh_hh_init(neigh, dst, dst->ops->protocol);
err = dev_hard_header(skb, dev, ntohs(skb->protocol),
neigh->ha, NULL, skb->len);
write_unlock_bh(&neigh->lock);
} else {
read_lock_bh(&neigh->lock);
err = dev_hard_header(skb, dev, ntohs(skb->protocol),
neigh->ha, NULL, skb->len);
read_unlock_bh(&neigh->lock);
}
if (err >= 0)
rc = neigh->ops->queue_xmit(skb);
else
goto out_kfree_skb;
}
out:
return rc;
discard:
NEIGH_PRINTK1("neigh_resolve_output: dst=%p neigh=%p\n",
dst, dst ? dst->neighbour : NULL);
out_kfree_skb:
rc = -EINVAL;
kfree_skb(skb);
goto out;
}
这里叫邻居事件发送,自己瞎叫的
{
neigh->used = jiffies;
if (!(neigh->nud_state&(NUD_CONNECTED|NUD_DELAY|NUD_PROBE))) 假设带着NUD_NONE进来的
return __neigh_event_send(neigh, skb);
return 0;
}
这里开始涉及状态的东西了 先对NUD状态描述下
#define NUD_REACHABLE 0x02 邻居的地址被放入缓存,并且知道该邻居是可到达的
#define NUD_DELAY 0x08
#define NUD_PROBE 0x10 这三个用于状态转换阶段,当本地主机确定邻居是否可到达时,状态会发生改变
/* Dummy states */
#define NUD_NOARP 0x40 用于标记不要任何协议进行L3到L2的地址映射的邻居
#define NUD_PERMANENT 0x80 邻居的L2地址是静态配置,因此不需要邻居协议进行地址解析
#define NUD_NONE 0x00 邻居项刚被创建,还没有状态可用
#define NUD_VALID (NUD_PERMANENT|NUD_NOARP|NUD_REACHABLE|NUD_PROBE|NUD_STALE|NUD_DELAY)
{
int rc;
unsigned long now;
write_lock_bh(&neigh->lock);
rc = 0;
if (neigh->nud_state & (NUD_CONNECTED | NUD_DELAY | NUD_PROBE))
goto out_unlock_bh;
now = jiffies;
if (!(neigh->nud_state & (NUD_STALE | NUD_INCOMPLETE))) {
if (neigh->parms->mcast_probes + neigh->parms->app_probes) { .mcast_probes = 3,
atomic_set(&neigh->probes, neigh->parms->ucast_probes); 重发arp请求的次数 .ucast_probes = 3,
neigh->nud_state = NUD_INCOMPLETE; 设状态为NUD_INCOMPLETE,
neigh->updated = jiffies;
neigh_add_timer(neigh, now + 1); 且设置定时器
} else {
neigh->nud_state = NUD_FAILED;
neigh->updated = jiffies;
write_unlock_bh(&neigh->lock);
kfree_skb(skb);
return 1;
}
} else if (neigh->nud_state & NUD_STALE) {
NEIGH_PRINTK2("neigh %p is delayed.\n", neigh);
neigh->nud_state = NUD_DELAY;
neigh->updated = jiffies;
neigh_add_timer(neigh,
jiffies + neigh->parms->delay_probe_time);
}
if (neigh->nud_state == NUD_INCOMPLETE) { 如果是NUD_INCOMPLETE状态
if (skb) {
if (skb_queue_len(&neigh->arp_queue) >=检查队列长度,应该是每个邻居项都有一个可供缓存待解析的skb
neigh->parms->queue_len) { .queue_len = 3,
struct sk_buff *buff;
buff = __skb_dequeue(&neigh->arp_queue);
kfree_skb(buff); 取出一个 释放掉
NEIGH_CACHE_STAT_INC(neigh->tbl, unres_discards);
}
__skb_queue_tail(&neigh->arp_queue, skb); 把这个新的放进去
}
rc = 1;
}
out_unlock_bh:
write_unlock_bh(&neigh->lock);
return rc;
}
setup_timer(&n->timer, neigh_timer_handler, (unsigned long)n);
struct sk_buff *skb = skb_peek(&neigh->arp_queue);
/* keep skb alive even if arp_queue overflows */
if (skb)
skb = skb_copy(skb, GFP_ATOMIC);
write_unlock(&neigh->lock);
neigh->ops->solicit(neigh, skb); 在arp_constructor()中被置上
atomic_inc(&neigh->probes);
kfree_skb(skb);
.family = AF_INET,
.solicit = arp_solicit,
.error_report = arp_error_report,
.output = neigh_resolve_output,
.connected_output = neigh_resolve_output,
.hh_output = dev_queue_xmit,
.queue_xmit = dev_queue_xmit,
};
->arp_solicit()
struct packet_type *pt, struct net_device *orig_dev)
{
struct arphdr *arp;
/* ARP header, plus 2 device addresses, plus 2 IP addresses. */
if (!pskb_may_pull(skb, arp_hdr_len(dev)))
goto freeskb;
arp = arp_hdr(skb);
if (arp->ar_hln != dev->addr_len ||
dev->flags & IFF_NOARP ||
skb->pkt_type == PACKET_OTHERHOST ||
skb->pkt_type == PACKET_LOOPBACK ||
arp->ar_pln != 4)
goto freeskb;
if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL)
goto out_of_mem; 一系列的合法性检查
memset(NEIGH_CB(skb), 0, sizeof(struct neighbour_cb)); 这里把skb的cb区域用作neighbour_cb结构空间
return NF_HOOK(NFPROTO_ARP, NF_ARP_IN, skb, dev, NULL, arp_process);
freeskb:
kfree_skb(skb);
out_of_mem:
return 0;
}
{
__be16 ar_hrd; 硬件地址格式
__be16 ar_pro; 协议地址格式
unsigned char ar_hln; 硬件地址长度
unsigned char ar_pln; 协议地址长度
__be16 ar_op; 命令代码
#if 0
/*
* Ethernet looks like this : This bit is variable sized however...
*/
unsigned char ar_sha[ETH_ALEN]; /* sender hardware address */
unsigned char ar_sip[4]; /* sender IP address */
unsigned char ar_tha[ETH_ALEN]; /* target hardware address */
unsigned char ar_tip[4]; /* target IP address */
#endif
};
进入arp_process()
goto out; 不是reply 也不是 request 那是啥呢?
sha = arp_ptr; 发送端以太网首址
arp_ptr += dev->addr_len;
memcpy(&sip, arp_ptr, 4); 发送端ip地址
arp_ptr += 4;
arp_ptr += dev->addr_len;
memcpy(&tip, arp_ptr, 4); 目的IP地址
goto out;
if (arp->ar_op == htons(ARPOP_REQUEST) && 收到一个请求包
ip_route_input(skb, tip, sip, 0, dev) == 0) {
rt = skb_rtable(skb);
addr_type = rt->rt_type;
if (addr_type == RTN_LOCAL) { 若是送往本机的arp请求包
int dont_send = 0;
if (!dont_send)
dont_send |= arp_ignore(in_dev,sip,tip);
if (!dont_send && IN_DEV_ARPFILTER(in_dev))
dont_send |= arp_filter(sip,tip,dev);
if (!dont_send) {
n = neigh_event_ns(&arp_tbl, sha, &sip, dev); 更新arp邻居表
if (n) {
arp_send(ARPOP_REPLY,ETH_P_ARP,sip,dev,tip,sha,dev->dev_addr,sha); 发送arp应答
neigh_release(n);
}
}
goto out;
} else if (IN_DEV_FORWARD(in_dev)) { 代理arp
if (addr_type == RTN_UNICAST && rt->u.dst.dev != dev &&
(arp_fwd_proxy(in_dev, rt) || pneigh_lookup(&arp_tbl, net, &tip, dev, 0))) {
n = neigh_event_ns(&arp_tbl, sha, &sip, dev);
if (n)
neigh_release(n);
if (NEIGH_CB(skb)->flags & LOCALLY_ENQUEUED ||
skb->pkt_type == PACKET_HOST ||
in_dev->arp_parms->proxy_delay == 0) {
arp_send(ARPOP_REPLY,ETH_P_ARP,sip,dev,tip,sha,dev->dev_addr,sha);
} else {
pneigh_enqueue(&arp_tbl, in_dev->arp_parms, skb);
in_dev_put(in_dev);
return 0;
}
goto out;
}
}
if (IPV4_DEVCONF_ALL(dev_net(dev), ARP_ACCEPT)) {
/* Unsolicited ARP is not accepted by default.
It is possible, that this option should be enabled for some
devices (strip is candidate)
*/
if (n == NULL &&
arp->ar_op == htons(ARPOP_REPLY) &&
inet_addr_type(net, sip) == RTN_UNICAST)
n = __neigh_lookup(&arp_tbl, &sip, dev, 1);
}
if (n) {
int state = NUD_REACHABLE; 如果存在
int override;
/* If several different ARP replies follows back-to-back,
use the FIRST one. It is possible, if several proxy
agents are active. Taking the first reply prevents
arp trashing and chooses the fastest router.
*/
override = time_after(jiffies, n->updated + n->parms->locktime);
/* Broadcast replies and request packets
do not assert neighbour reachability.
*/
if (arp->ar_op != htons(ARPOP_REPLY) ||
skb->pkt_type != PACKET_HOST)
state = NUD_STALE;
neigh_update(n, sha, state, override ? NEIGH_UPDATE_F_OVERRIDE : 0); 更新邻居项状态
neigh_release(n);
}
层的处理增加负担,也即不想改变IP 层的原始语义。
Linux维护一个stale状态其实就是为了保留一个neighbour结构体,在其状态改变时只是个别字段得到修改或者填充。如果按照简单的实现,只保存一个reachable状态即可,其到期则删除arp缓存表项。Linux的做法只是做了很多的优化,但是如果你为这些优化而绞尽脑汁,那就悲剧了...