现在的位置: 首页 > 综合 > 正文

linxu kernel version 1.0 TCP/IP 协议栈源代码分析2, TCP/IP协议栈启动分析 (按源代码中运行的顺序分析,Kernel startup)

2014年02月23日 ⁄ 综合 ⁄ 共 16829字 ⁄ 字号 评论关闭

 

Linux_1.0_TCP-IP协议栈分析

Author: Wenxy

Version: 1.0

Begin date: 2009-2-10

End date:

1.    TCP/IP协议栈启动分析 (按源代码中运行的顺序分析,Kernel startup)

1.1 ./net/inet/sock.c

/* Called by ddi.c on kernel startup.  */

void inet_proto_init(struct ddi_proto *pro)

{

  struct
inet_protocol
[Wenxy1]  *p;

  int
i;

 

 
printk("Swansea University Computer Society Net2Debugged
[1.30]/n");

  /*
Set up our UNIX VFS major device. */

  if
(register_chrdev(AF_INET_MAJOR, "af_inet", &inet_fops) < 0) {

       printk("%s:
cannot register major device %d!/n",

                                   pro->name,
AF_INET_MAJOR);

       return;

  } 

 

  /*
Tell SOCKET that we are alive... */

 
(void) sock_register(inet_proto_ops.family, &inet_proto_ops);

 

 
seq_offset = CURRENT_TIME*250;

 

  /* Add all the protocols. */

 
for(i = 0; i < SOCK_ARRAY_SIZE; i++) {

       tcp_prot.sock_array[i] =
NULL;                                                                                     

       udp_prot.sock_array[i]
= NULL;

       raw_prot.sock_array[i]
= NULL;

  }

 
printk("IP Protocols: ");

  for(p = inet_protocol_base; p != NULL;) [Wenxy2] {

       struct
inet_protocol *tmp;

 

       tmp
= (struct inet_protocol *) p->next;

       inet_add_protocol(p);
   /* init TCP/IP
stack */

       printk("%s%s",p->name,tmp?",
":"/n");

       p
= tmp;

  }

 

  /* Initialize the DEV module. */

 
dev_init();

 

  /*
Initialize the "Buffer Head" pointers. */

 
bh_base[INET_BH].routine = inet_bh;

}

 

1.2 ./net/inet/protocol.c

void

inet_add_protocol(struct inet_protocol
*prot)

{

 
unsigned char hash;

 
struct inet_protocol *p2;

 

 
hash = prot->protocol & (MAX_INET_PROTOS - 1);

 
prot ->next = inet_protos[hash];

 
inet_protos[hash] = prot;

 
prot->copy = 0;

 

  /*
Set the copy bit if we need to. */

  p2
= (struct inet_protocol *) prot->next;

  while(p2 != NULL) {

       if (p2->protocol == prot->protocol)
{

              prot->copy
= 1;

              break;

       }

       p2
= (struct inet_protocol *) prot->next;

  }

}

1.3 ./net/inet/dev.c

/* Initialize the DEV
module. */

void

dev_init(void)

{

 
struct device *dev, *dev2;

 

  /*
Add the devices.

   *
If the call to dev->init fails, the dev is removed

   *
from the chain disconnecting the device until the

   *
next reboot.

   */

 
dev2 = NULL;

  for
(dev = dev_base; dev != NULL; dev=dev->next) {

       if
(dev->init && dev->init(dev)
[Wenxy3] ) {

              if
(dev2 == NULL) dev_base = dev->next;

                else dev2->next = dev->next;

       }
else {

              dev2
= dev;

       }

  }

 

  /*
Set up some IP addresses. */

 
ip_bcast = in_aton("255.255.255.255");

}

 

1.4 ./drivers/net/ eexpress.c (Insume install Intel
EtherExpress NIC)

/* Check for a network
adaptor of this type, and return '0' iff one exists.

   If dev->base_addr == 0, probe all likely
locations.

   If dev->base_addr == 1, always return
failure.

   If dev->base_addr == 2, (detachable
devices only) alloate space for the

   device and return success.

   */

int

express_probe(struct device *dev)

{

       /*
Don't probe all settable addresses, 0x[23][0-7]0, just common ones. */

       int
*port, ports[] = {0x300, 0x270, 0x320, 0x340, 0};

       int
base_addr = dev->base_addr;

 

       if
(base_addr > 0x1ff)    /* Check a
single specified location. */

              return
eexp_probe1(dev, base_addr);

       else
if (base_addr > 0)

              return
ENXIO;              /* Don't probe at all.
*/

 

       for
(port = &ports[0]; *port; port++) {

              short
id_addr = *port + ID_PORT;

              unsigned
short sum = 0;

              int
i;

#ifdef notdef

              for
(i = 16; i > 0; i--)

                     sum
+= inb(id_addr);

              printk("EtherExpress
ID checksum is %04x./n", sum);

#else

              for
(i = 4; i > 0; i--) {

                     short
id_val = inb(id_addr);

                     sum
|= (id_val >> 4) << ((id_val & 3) << 2);

              }

#endif

              if
(sum == 0xbaba

                     &&
eexp_probe1(dev, *port) == 0)

                     return
0;

       }

 

       return
ENODEV;                  /* ENODEV would
be more accurate. */

}

 

 

int eexp_probe1(struct
device *dev, short ioaddr)

{

       unsigned
short station_addr[3];

       int
i;

 

       printk("%s:
EtherExpress at %#x,", dev->name, ioaddr);

 

       /*
The station address is stored !backwards! in the EEPROM, reverse

          after reading.  (Hmmm, a little brain-damage there at Intel,
eh?) */

       station_addr[0]
= read_eeprom(ioaddr, 2);

       station_addr[1]
= read_eeprom(ioaddr, 3);

       station_addr[2]
= read_eeprom(ioaddr, 4);

 

       /*
Check the first three octets of the S.A. for the manufactor's code. */

       if
(station_addr[2] != 0x00aa || (station_addr[1] & 0xff00) != 0x0000) {

              printk("
rejected (invalid address %04x%04x%04x)./n",

                        station_addr[2], station_addr[1],
station_addr[0]);

              return
ENODEV;

       }

 

       /*
We've committed to using the board, and can start filling in *dev. */

       snarf_region(ioaddr,
16);

       dev->base_addr
= ioaddr;

 

       for
(i = 0; i < 6; i++) {

              dev->dev_addr[i]
= ((unsigned char*)station_addr)[5-i];

              printk("
%02x", dev->dev_addr[i]);

       }

 

       /*
There is no reason for the driver to care, but I print out the

          interface to minimize bogus bug reports. */

       {

              char
irqmap[] = {0, 9, 3, 4, 5, 10, 11, 0};

              char
*ifmap[] = {"AUI", "BNC", "10baseT"};

              enum
iftype {AUI=0, BNC=1, TP=2};

              unsigned
short setupval = read_eeprom(ioaddr, 0);

 

              dev->irq
= irqmap[setupval >> 13];

              dev->if_port
= (setupval & 0x1000) == 0 ? AUI :

                     read_eeprom(ioaddr,
5) & 0x1 ? TP : BNC;

              printk(",
IRQ %d, Interface %s./n", dev->irq, ifmap[dev->if_port]);

              /*
Release the IRQ line so that it can be shared if we don't use the

                 ethercard. */

              outb(0x00,
ioaddr + SET_IRQ);

       }

 

       /*
It's now OK to leave the board in reset, pending the open(). */

       outb(ASIC_RESET,
ioaddr + EEPROM_Ctrl);

 

       if
((dev->mem_start & 0xf) > 0)

              net_debug
= dev->mem_start & 7;

 

       if
(net_debug)

              printk(version);

 

       /*
Initialize the device structure. */

       dev->priv
= kmalloc(sizeof(struct net_local), GFP_KERNEL);

       memset(dev->priv,
0, sizeof(struct net_local));

 

       dev->open             = eexp_open;

       dev->stop              = eexp_close;

       dev->hard_start_xmit =
eexp_send_packet;

       dev->get_stats       = eexp_get_stats;

#ifdef
HAVE_MULTICAST

       dev->set_multicast_list =
&set_multicast_list;

#endif

[Wenxy4] 

       /*
Fill in the fields of the device structure with ethernet-generic values.

          This should be in a common file instead of
per-driver.  */

       for
(i = 0; i < DEV_NUMBUFFS; i++)

              dev->buffs[i]
= NULL;

 

       dev->hard_header   = eth_header;

       dev->add_arp  = eth_add_arp;

       dev->queue_xmit
= dev_queue_xmit;

       dev->rebuild_header
= eth_rebuild_header;

       dev->type_trans
= eth_type_trans;

 

       dev->type              = ARPHRD_ETHER;

       dev->hard_header_len
= ETH_HLEN;

       dev->mtu              = 1500; /* eth_mtu */

       dev->addr_len = ETH_ALEN;

       for
(i = 0; i < ETH_ALEN; i++) {

              dev->broadcast[i]=0xff;

       }

 

       /*
New-style flags. */

       dev->flags             = IFF_BROADCAST;

       dev->family           = AF_INET;

       dev->pa_addr  = 0;

       dev->pa_brdaddr
= 0;

       dev->pa_mask       = 0;

       dev->pa_alen  = sizeof(unsigned long);

 

       return
0;

}

 

 

Note, right, TCP/IP stack was initialized,
NIC device was initialized, network communication is ready.

 

1.5 驱动接收到网络数据包后,让TCP/IP协议栈处理的流程,通常kernel的设计是:驱动程序处理上半部分的工作,内核的中断处理程序来处理下半部分的工作。

由于在网卡驱动中接收到了一个网络数据包,上半部分的工作是把加入到skbuff链表的tail。以8390网卡驱动为例,代码如下:

/* We have a good packet(s), get it/them
out of the buffers. */

 

static void ei_receive(struct device *dev)

{

    int e8390_base = dev->base_addr;

    struct ei_device
*ei_local = (struct ei_device *) dev->priv;

   
int rxing_page, this_frame, next_frame, current_offset;

   
int rx_pkt_count = 0;

   
struct e8390_pkt_hdr rx_frame;

   
int num_rx_pages = ei_local->stop_page-ei_local->rx_start_page;

   

   
while (++rx_pkt_count < 10) {

              int
pkt_len;

             

              /*
Get the rx page (incoming packet pointer).
*/

              outb_p(E8390_NODMA+E8390_PAGE1,
e8390_base + E8390_CMD);

              rxing_page = inb_p(e8390_base +
EN1_CURPAG);

              outb_p(E8390_NODMA+E8390_PAGE0,
e8390_base + E8390_CMD);

             

              /* Remove
one frame from the ring.  Boundary is
alway a page behind. */

              this_frame
= inb_p(e8390_base + EN0_BOUNDARY) + 1;

              if
(this_frame >= ei_local->stop_page)

                     this_frame
= ei_local->rx_start_page;

             

              /*
Someday we'll omit the previous, iff we never get this message.

                 (There is at least one clone claimed to have
a problem.)  */

              if
(ei_debug > 0  &&  this_frame != ei_local->current_page)

                     printk("%s:
mismatched read page pointers %2x vs %2x./n",

                               dev->name, this_frame,
ei_local->current_page);

             

              if
(this_frame == rxing_page)       /* Read
all the frames? */

                     break;                          /* Done for now */

             

              current_offset
= this_frame << 8;

              ei_block_input(dev,
sizeof(rx_frame), (char *)&rx_frame,

                                      current_offset);

             

              pkt_len
= rx_frame.count - sizeof(rx_frame);

             

              next_frame
= this_frame + 1 + ((pkt_len+4)>>8);

             

              /*
Check for bogosity warned by 3c503
book: the status byte is never

                 written. 
This happened a lot during testing! This code should be

                 cleaned up someday. */

              if
(rx_frame.next != next_frame

                     &&
rx_frame.next != next_frame + 1

                     &&
rx_frame.next != next_frame - num_rx_pages

                     &&
rx_frame.next != next_frame + 1 - num_rx_pages) {

                     ei_local->current_page
= rxing_page;

                     outb(ei_local->current_page-1,
e8390_base+EN0_BOUNDARY);

                     ei_local->stat.rx_errors++;

                     continue;

              }

 

              if
(pkt_len < 60  ||  pkt_len > 1518) {

                     if
(ei_debug)

                            printk("%s:
bogus packet size: %d, status=%#2x nxpg=%#2x./n",

                                      dev->name, rx_frame.count,
rx_frame.status,

                                      rx_frame.next);

                     ei_local->stat.rx_errors++;

              }
else if ((rx_frame.status & 0x0F)
== ENRSR_RXOK) {

                     int
sksize = sizeof(struct sk_buff) + pkt_len;

                     struct
sk_buff *skb;

                    

                     skb
= alloc_skb(sksize, GFP_ATOMIC);
[Wenxy5] 

                     if
(skb == NULL) {

                            if
(ei_debug)

                                   printk("%s:
Couldn't allocate a sk_buff of size %d./n",

                                             dev->name, sksize);

                            ei_local->stat.rx_dropped++;

                            break;

                     }
else {

                            skb->mem_len
= sksize;

                            skb->mem_addr
= skb;

                            skb->len
= pkt_len;

                            skb->dev
= dev;

                           

                            ei_block_input(dev,
pkt_len, (char *) skb->data,

                                                    current_offset + sizeof(rx_frame));

                            netif_rx(skb);[Wenxy6] 

                            ei_local->stat.rx_packets++;

                     }

              }
else {

                     int
errs = rx_frame.status;

                     if
(ei_debug)

                            printk("%s:
bogus packet: status=%#2x nxpg=%#2x size=%d/n",

                                      dev->name, rx_frame.status,
rx_frame.next,

                                      rx_frame.count);

                     if
(errs & ENRSR_FO)

                            ei_local->stat.rx_fifo_errors++;

              }

              next_frame
= rx_frame.next;

             

              /*
This _should_ never happen: it's here for avoiding bad clones. */

              if
(next_frame >= ei_local->stop_page) {

                     printk("%s:
next frame inconsistency, %#2x..", dev->name,

                               next_frame);

                     next_frame
= ei_local->rx_start_page;

              }

              ei_local->current_page
= next_frame;

              outb(next_frame-1,
e8390_base+EN0_BOUNDARY);

    }

   
/* If any worth-while packets have been received, dev_rint()

      
has done a mark_bh(INET_BH) for us and will work on them

      
when we get to the bottom-half routine. */

 

       /*
Record the maximum Rx packet queue. */

       if
(rx_pkt_count > high_water_mark)

              high_water_mark
= rx_pkt_count;

 

   
/* Bug alert!  Reset ENISR_OVER to
avoid spurious overruns!
*/

    outb_p(ENISR_RX+ENISR_RX_ERR+ENISR_OVER,
e8390_base+EN0_ISR);

    return;

}

 

./net/inet/skbuff.c

/*

 *    Insert an sk_buff at the end of a list.

 */

 

void skb_queue_tail(struct sk_buff
*volatile* list, struct sk_buff *newsk)

{

       unsigned
long flags;

 

       if(newsk->list)

              printk("Suspicious
queue tail: sk_buff on list!/n");

 

       IS_SKB(newsk);

       save_flags(flags);

       cli();

 

       newsk->list=list;

       if(*list)

       {

              (*list)->prev->next=newsk;

              newsk->prev=(*list)->prev;

              newsk->next=*list;

              (*list)->prev=newsk;

       }

       else

       {

              newsk->next=newsk;

              newsk->prev=newsk;

              *list=newsk;

       }

       IS_SKB(newsk->prev);

       IS_SKB(newsk->next);

       restore_flags(flags);

 

}

 

 

./net/inet/dev.c

/*

 *
Receive a packet from a device driver and queue it for the upper

 *
(protocol) levels.  It always succeeds.

 */

void

netif_rx(struct sk_buff *skb)

{

  /*
Set any necessary flags. */

 
skb->sk = NULL;

 
skb->free = 1;

 

  /*
and add it to the "backlog" queue. */

 
IS_SKB(skb);

 
skb_queue_tail(&backlog,skb);

  

  /*
If any packet arrived, mark it for processing. */

  if
(backlog != NULL) mark_bh(INET_BH);
[Wenxy7] 

 

 
return;

}

 

1.6 进入协议栈的流程: ./kernel/irq.c

/*

 *
do_bottom_half() runs at normal kernel priority: all interrupts

 *
enabled.  do_bottom_half() is atomic with
respect to itself: a

 *
bottom_half handler need not be re-entrant.

 */

asmlinkage void do_bottom_half[Wenxy8] (void)

{

       unsigned
long active;

       unsigned
long mask, left;

       struct bh_struct *bh;

 

       bh = bh_base;

       active
= bh_active & bh_mask;

       for
(mask = 1, left = ~0 ; left & active ; bh++,mask += mask,left += left) {

              if
(mask & active) {

                     void
(*fn)(void *);

                     bh_active
&= ~mask;

                     fn =
bh->routine;
[Wenxy9] 

                     if
(!fn)

                            goto
bad_bh;

                     fn(bh->data);

              }

       }

       return;

bad_bh:

       printk
("irq.c:bad bottom half entry/n");

}

 

1.7 接下来执行:/net/inet/dev.c

 

/*

 *
This function gets called periodically, to see if we can

 *
process any data that came in from some interface.

 *

 */

void

inet_bh(void
*tmp)

[Wenxy10] {

 
struct sk_buff *skb;

 
struct packet_type *ptype;

 
unsigned short type;

 
unsigned char flag = 0;

  int
nitcount;

 

  /*
Atomically check and mark our BUSY state. */

  if
(set_bit(1, (void*)&in_bh))

     
return;

 

  /*
Can we send anything now? */

 
dev_transmit();
[Wenxy11] 

 

  /*
Any data left to process? */

  while((skb=skb_dequeue(&backlog))!=NULL)[Wenxy12] 

  {

      nitcount=dev_nit;

       flag=0;

       sti();

      
/*

       *
Bump the pointer to the next structure.

       *
This assumes that the basic 'skb' pointer points to

       *
the MAC header, if any (as indicated by its "length"

       *
field).  Take care now!

       */

      
skb->h.raw = skb->data + skb->dev->hard_header_len;

      
skb->len -= skb->dev->hard_header_len;

 

      
/*

       *
Fetch the packet protocol ID.  This is
also quite ugly, as

       *
it depends on the protocol driver (the interface itself) to

       *
know what the type is, or where to get it from. 
The Ethernet

       *
interfaces fetch the ID from the two bytes in the Ethernet MAC

       *
header (the h_proto field in struct ethhdr), but drivers like

       *
SLIP and PLIP have no alternative but to force the type to be

       *
IP or something like that.  Sigh- FvK

       */

      
type = skb->dev->type_trans(skb, skb->dev);

 

       /*

        * We got a packet ID.  Now loop over the "known protocols"

        * table (which is actually a linked list, but
this will

        * change soon if I get my way- FvK), and
forward the packet

        * to anyone who wants it.

        */

       for
(ptype = ptype_base; ptype != NULL; ptype = ptype->next) {

              if
(ptype->type == type || ptype->type == NET16(ETH_P_ALL)) {

                     struct
sk_buff *skb2;

 

                     if
(ptype->type==NET16(ETH_P_ALL))

                            nitcount--;

                     if
(ptype->copy || nitcount) { /* copy if
we need to     */

                            skb2 = alloc_skb(skb->mem_len,
GFP_ATOMIC);

                            if (skb2 == NULL)

                                   continue;

                            memcpy(skb2,
(const void *) skb, skb->mem_len);

                            skb2->mem_addr
= skb2;

                            skb2->h.raw
= (unsigned char *)(

                                (unsigned long) skb2 +

                                (unsigned long) skb->h.raw -

                                (unsigned long) skb

                            );

                            skb2->free
= 1;

                     }
else {

                            skb2
= skb;

                     }

 

                     /*
This used to be in the 'else' part, but then

                      * we don't have this flag set when we get a

                      * protocol that *does* require copying... -FvK

                      */

                     flag
= 1;

 

                     /*
Kick the protocol handler. */

                     ptype->func(skb2,
skb->dev, ptype);

              }

       }

 

       /*

        * That's odd. 
We got an unknown packet.  Who's
using

        * stuff like Novell or Amoeba on this
network??

        */

       if
(!flag) {

              DPRINTF((DBG_DEV,

                     "INET:
unknown packet type 0x%04X (ignored)/n", type));

              skb->sk
= NULL;

              kfree_skb(skb,
FREE_WRITE);

       }

 

       /*
Again, see if we can transmit anything now. */

       dev_transmit();                                                                                

       cli();

  }

 
in_bh = 0;

 

抱歉!评论已关闭.