现在的位置: 首页 > 综合 > 正文

Solaris Source Insight: PCI bus driver moduls – pcieb driver module

2013年10月21日 ⁄ 综合 ⁄ 共 15138字 ⁄ 字号 评论关闭

我们接下来看
pcieb

 

[root@blu-nhm-ep:~]modinfo
| grep PCI

15
fffffffffba46ce0 bfb0 - 1 pci_autoconfig (PCI BIOS interface)

37
fffffffffbacd3f0 ce28 183 1 npe (Host to PCIe nexus driver)

38
fffffffffbad95c8 5f50 - 1 pcihp (PCI nexus hotplug support)

40
fffffffffbae14f0 bb00 - 1 pcie (PCIE: PCI framework)

89
fffffffff7bff000 4c68 184 1 pcieb (PCIe to PCI nexus driver)



90
fffffffff7999000 1d68 84 1 pci_pci (PCI to PCI bridge nexus
driver)

 

pcieb

PCI-E
to PCI bus bridge nexus driver

。通过下面的方法可以在
onnv-gate
中找到实现的文件。

 

[allen@blu-xvm-osol:uts]find
. -name Makefile/* | xargs grep pcieb

./common/Makefile.files:PCIEB_OBJS
+= pcieb.o


./sparc/pcieb/Makefile:#
uts/sparc/pcieb/Makefile

./sparc/pcieb/Makefile:# This
makefile drives the production of the pcieb driver kernel module

./sparc/pcieb/Makefile:MODULE =
pcieb

./sparc/pcieb_bcm/Makefile:#
uts/sparc/pcieb_bcm/Makefile

./sparc/pcieb_bcm/Makefile:# This
makefile drives the production of the pcieb_bcm driver kernel module

./sparc/pcieb_bcm/Makefile:MODULE =
pcieb_bcm

./sparc/pcieb_bcm/Makefile:CPPFLAGS
+= -DPCIEB_BCM -DPX_MOD_NAME=pcieb_bcm

./sparc/Makefile.sparc.shared:DRV_KMODS +=
pci_pci pcieb pcieb_bcm pcie

./sparc/Makefile.files:PCIEB_OBJS
+= pcieb_sparc.o

./intel/Makefile.intel.shared:DRV_KMODS +=
pcieb

./intel/Makefile.files:PCIEB_OBJS
+= pcieb_x86.o


./intel/pcieb/Makefile:#
uts/intel/pcieb/Makefile

./intel/pcieb/Makefile:# This
makefile drives the production of the pcieb driver kernel

./intel/pcieb/Makefile:MODULE =
pcieb

 

[allen@blu-xvm-osol:uts]find
. -name pcieb.c

./common/io/pciex/pcieb.c

[allen@blu-xvm-osol:uts]find
. -name pcieb_x86.c

./intel/io/pciex/pcieb_x86.c

 

注:
以下对源代码的引用,除非特殊说明,都引自

common/io/pciex/pcieb.c

这是一个
nexus
driver

, 它的
modlinkage

定义是:

 

185
/*

186
* Module linkage information for the kernel.

187
*/

188

189
static struct modldrv modldrv = {

190
|_______&mod_driverops, /* Type of module */

191
|_______"PCIe bridge/switch driver",

192
|_______&pcieb_ops,|____/* driver ops */

193
};

194

195
static struct modlinkage modlinkage = {

196
|_______MODREV_1,

197
|_______(void *)&modldrv,

198
|_______NULL

199
};

 

其中,
pcieb_ops
定义为:

 

170
static struct dev_ops pcieb_ops = {

171
|_______DEVO_REV,|______|_______/* devo_rev */

172
|_______0,|_____|_______|_______/* refcnt */

173
|_______pcieb_info,|____|_______/* info */

174
|_______nulldev,|_______|_______/* identify */

175
|_______pcieb_probe,|___|_______/* probe */

176
|_______pcieb_attach,|__|_______/* attach */

177
|_______pcieb_detach,|__|_______/* detach */

178
|_______nulldev,|_______|_______/* reset */

179
|_______&pcieb_cb_ops,|_|_______/* driver operations */

180
|_______&pcieb_bus_ops,||_______/* bus operations */

181
|_______pcie_power,|____|_______/* power */

182
|_______ddi_quiesce_not_needed,||_______/* quiesce */

183
};

 

Letś check _init() implementation.

 

215
int

216
_init(void)

217
{

218
|_______int e;

219

220
|_______if ((e = ddi_soft_state_init(&pcieb_state, sizeof
(pcieb_devstate_t),

221
|_______ 1)) == 0 && (e = mod_install(&modlinkage)) !=
0)

222
|_______|_______ddi_soft_state_fini(&pcieb_state);

223
|_______return (e);

224
}

 

It initializes the state structure for
allocation. Next is attach function, pcieb_attach(). If itś called
with DDI_RESUME command, pcie_pwr_resume() in pcie module is called.
Otherwise, itś a normal attach command (DDI_ATTACH). When question
comes to me when I read the first lines of attach function. The bus
private data for this devinfo node is used directly in this function,
but where does it be allocated and initilized? The common interface
to allocate and initialize a pcie_bus_t data structure is provided by
pcie module.

 

[common/io/pciex/pcie.c]

739
/*

740
* Initialize PCIe Bus Private Data

741
*

742
* PCIe Bus Private Data contains commonly used PCI/PCIe information
and offsets

743
* to key registers.

744
*/

745
pcie_bus_t *

746
pcie_init_bus(dev_info_t *cdip)

 

And following function call this
function according to cscope back tracing.

 

Cscope
tag: pcie_init_bus

#
line filename / context / line

1
451 common/sys/pcie_impl.h <<GLOBAL>>

extern
pcie_bus_t *pcie_init_bus(dev_info_t *cdip);

2
746 common/io/pciex/pcie.c <<pcie_init_bus>>

pcie_init_bus(dev_info_t
*cdip)

3
777 common/io/pciex/pcieb.c <<pcieb_initchild>>

if
(!pcie_init_bus(child) || pcie_initchild(child) != DDI_SUCCESS) {

4
942 i86pc/io/pciex/npe.c <<npe_initchild>>

bus_p
= pcie_init_bus(child);

5
677 intel/io/pci/pci_pci.c <<ppb_initchild>>

if
(pcie_init_bus(child) == NULL)

6
528 sun4/io/px/px_util.c <<px_init_child>>

if
(pcie_init_bus(child))

7
966 sun4u/io/pci/pci_pci.c <<ppb_initchild>>

if
(pcie_init_bus(child) == NULL) {

Type
number and <Enter> (empty cancels):

 

We can find the fact that _initchild()
of each nexus bus driver will call this interface to create pcie
private data for it's children. So the question is translated to “in
which path is the initchild function called?” The answer comes to
ndi_devi_bind_driver(), which is called to bind a devinfo node with a
specific driver module.

 

ndi_devi_bind_driver

-->i_ndi_config_node(DS_BOUND)

  
-->init_node

     
-->pdev->devi_ops->devo_bus_ops->bus_ctl(DDI_CTLOPS_INITCHILD)

       
-->npe_ctlops
/ pepb_ctlops / ppb_ctlops / pci_ctlops /

       
isa_ctlops
/ rootnex_ctlops / ata_disk_bus_ctl /

       
cpunex_bus_ctl
/ ...

          
-->np


e_initchild
/ ppb_initchild / pcieb_initchild / …

 

When
the devinfo node state is being changed from DS_BOUND to
DS_INITIALIZED,

 

[common/os/devcfg.c]

1537
|_______|_______case DS_BOUND:

1538
|_______|_______|_______/*

1539
|_______|_______|_______ * The following transitions synchronizes on
the

1540
|_______|_______|_______ * per-driver busy changing flag, since we
already

1541
|_______|_______|_______ * have a driver.

1542
|_______|_______|_______ */

1543
|_______|_______|_______if ((rv = init_node(dip)) == DDI_SUCCESS)

1544
|_______|_______|_______|_______i_ddi_set_node_state(dip,
DS_INITIALIZED);

1545
|_______|_______|_______break;

 

in
init_node(), initchild function of the parent bus is called to
initialize the children nodes.

 

[common/os/devcfg.c]

871
|_______/*

872
|_______ * Invoke the parent's bus_ctl operation with the
DDI_CTLOPS_INITCHILD

873
|_______ * command to transform the child to canonical form 1. If
there

874
|_______ * is an error, ddi_remove_child should be called, to clean
up.

875
|_______ */

876
|_______error = (*f)(pdip, pdip, DDI_CTLOPS_INITCHILD, dip, NULL);

877
|_______if (error != DDI_SUCCESS) {

878
|_______|_______NDI_CONFIG_DEBUG((CE_CONT, "init_node: %s 0x%p
failed/n",

879
|_______|_______ path, (void *)dip));

880
|_______|_______remove_global_props(dip);

881
|_______|_______/* in case nexus driver didn't clear this field */

882
|_______|_______ddi_set_name_addr(dip, NULL);

883
|_______|_______error = DDI_FAILURE;

884
|_______|_______goto out;

885
|_______}

 

Next, allocate and get the soft state
structure. The state structure is defined as below.

 

[common/io/pciex/pcieb.h]

84
typedef struct {

85
|_______dev_info_t|_____|_______*pcieb_dip;

86

87
|_______/* Interrupt support */

88
|_______ddi_intr_handle_t|______*pcieb_htable;|_|_______/* Intr
Handlers */

89
|_______int|____|_______|_______pcieb_htable_size;|_____/* htable
size */

90
|_______int|____|_______|_______pcieb_intr_count;|______/* Num of
Intr */

91
|_______uint_t|_|_______|_______pcieb_intr_priority;|___/* Intr
Priority */

92
|_______int|____|_______|_______pcieb_intr_type;|_______/* (MSI |
FIXED) */

93
|_______int|____|_______|_______pcieb_isr_tab[4];|______/* MSI source
offset */

94

95
|_______int|____|_______|_______pcieb_init_flags;

96
|_______kmutex_t|_______|_______pcieb_mutex;|___|_______/* Soft state
mutex */

97
|_______kmutex_t|_______|_______pcieb_intr_mutex;|______/* Intr
handler mutex */

98
|_______kmutex_t|_______|_______pcieb_err_mutex;|_______/* Error
mutex */

99
|_______kmutex_t|_______|_______pcieb_peek_poke_mutex; /* Peekpoke
mutex */

100

101
|_______/* FMA */

102
|_______boolean_t|______|_______pcieb_no_aer_msi;

103
|_______ddi_iblock_cookie_t|____pcieb_fm_ibc;

104
} pcieb_devstate_t;

 

    Next >>>

  1. Fault management initialization.
    Initialize the mutex locks. And then create special properties for
    device identification. “first-in-chassis” property: set if
    “First In Chassis bit” of “Expansion Slot Register”, the
    first byte of “PCI Slot Id Capabilities” is set. "serialid#"
    property: set according to PCI-Express Device Serial Number
    Capability register.

  2. Next comes to “Power management
    setup”. This also makes sure that switch/bridge is at D0 during
    attach. The common pcie power management interfaces are implemented
    in pcie module.

  3. Make sure the devinfo node has
    “device_type” and “range” property. Set if not. “pciex”
    or “pci” according to the type of bridge. For PCI and PCI-X
    devices including PCIe2PCI bridge, initialize cache-line-size and
    latency timer configuration registers.

  4. Initialize bridge itself by
    calling pcie_init(). pci_init() is a common interface for PCIe
    devices provided by pcie module.

  5. Initialize interrupt handlers.
    Before interrupts are intialized, _OSC initialization needs to be
    done. _OSC object is a control method that is used by OSPM to
    communicate to the platform the feature support or capabilities
    provided by a device’s driver. This object is a child object of a
    device and may also exist in the /_SB scope, where it can be used to
    convey platform wide OSPM capabilities. Driver needs to evaluate
    _OSC to notify platform that it can handle advanced error. When
    Initializing interrupt handlers, if both MSI and FIXED are
    supported, try to attach MSI first. If MSI fails for any reason,
    then try FIXED, but only allow one type to be attached. For a
    bridge, interrupts are allocated and initilized for hotplug, PME and
    errors. PME is power management event. Components may wakeup the
    system using a wakeup mechanism followed by a power management event
    (PME) Message.

  6. Do any platform specific
    workarounds needed. x86 specific workarounds needed at the end of
    pcieb attach. Must apply workaround only after all initialization is
    done.

  7. If this is a root port, determine
    and set the max payload size. Since this will involve scanning the
    fabric, all error enabling and sw workarounds should be in place
    before doing this.

 

pcieb_detach() does the opposite
operations.

 

Driver Operations:

 

For nexus bus driver, driver operations
are mainly designed for bus/device control. pcieb_open() is called
when the device special file is being opened by an application. It
uses a mutex lock to keep exclusive open and calls pcie_open().
pcie_open() is a common interface provided by misc/pcie module.
pcieb_close() does the opposite operations. pcieb_ioctl() relies on
pcie_ioctl() to handle devctl and hotplug related ioctls.

 

Bus Operations:

 

Bus operation is core of a nuxus
driver. Pcieb module defines the bus operations as below.

 

92
static struct bus_ops pcieb_bus_ops = {

93
|_______BUSO_REV,

94
|_______pcieb_bus_map,

95
|_______0,

96
|_______0,

97
|_______0,

98
|_______i_ddi_map_fault,

99
|_______ddi_dma_map,

100
|_______pcieb_dma_allochdl,

101
|_______ddi_dma_freehdl,

102
|_______ddi_dma_bindhdl,

103
|_______ddi_dma_unbindhdl,

104
|_______ddi_dma_flush,

105
|_______ddi_dma_win,

106
|_______pcieb_dma_mctl,

107
|_______pcieb_ctlops,

108
|_______ddi_bus_prop_op,

109
|_______ndi_busop_get_eventcookie,|_____/*
(*bus_get_eventcookie)();|___*/

110
|_______ndi_busop_add_eventcall,|_______/*
(*bus_add_eventcall)();|_____*/

111
|_______ndi_busop_remove_eventcall,|____/*
(*bus_remove_eventcall)();|__*/

112
|_______ndi_post_event,||_______|_______/*
(*bus_post_event)();||_______*/

113
|_______NULL,|__|_______|_______|_______/*
(*bus_intr_ctl)();|__|_______*/

114
|_______NULL,|__|_______|_______|_______/* (*bus_config)();
|___|_______*/

115
|_______NULL,|__|_______|_______|_______/* (*bus_unconfig)();
|_|_______*/

116
|_______pcieb_fm_init_child,|___|_______/* (*bus_fm_init)();
|__|_______*/

117
|_______NULL,|__|_______|_______|_______/* (*bus_fm_fini)();
|__|_______*/

118
|_______i_ndi_busop_access_enter,|______/* (*bus_fm_access_enter)();
|__*/

119
|_______i_ndi_busop_access_exit,|_______/* (*bus_fm_access_exit)();
|___*/

120
|_______pcie_bus_power,||_______|_______/* (*bus_power)(); |____*/

121
|_______pcieb_intr_ops,||_______|_______/* (*bus_intr_op)();
|__|_______*/

122
|_______pcie_hp_common_ops|_____|_______/* (*bus_hp_op)();
|____|_______*/

123
};

 

As you can see, it implements
bus_map(), dma_allochdl(), dma_mctl(), ctlops(), fm_initchild(),
bus_power(), intr_ops() and hp_common_ops(). Let's check them out one
by one.

 

  • pcieb_bus_map()

Call parent's
bus_map() function.

  • pcieb_dma_allochdl()

A software
workaround for PCI-X to PCI-E bridges.

 

1323
/*

1324
* Some PCI-X to PCI-E bridges do not support full 64-bit addressing
on the

1325
* PCI-X side of the bridge. We build a special version of this
driver for

1326
* those bridges, which uses PCIEB_ADDR_LIMIT_LO and/or
PCIEB_ADDR_LIMIT_HI

1327
* to define the range of values which the chip can handle. The code
below

1328
* then clamps the DMA address range supplied by the driver,
preventing the

1329
* PCI-E nexus driver from allocating any memory the bridge can't
deal

1330
* with.

1331
*/

 

  • pcieb_dma_mctl()

FDVMA feature is
not supported for any child device of Broadcom 5714/5715 PCIe-PCI
bridge due to prefetch bug. Return failure immediately, so that these
drivers will switch to regular DVMA path.

  • pcie_hp_common_ops()

  • pcieb_fm_init_child()

/* PASSTHROUGH */

  • pcieb_intr_ops()

No significant
platform level operation, calling i_ddi_intr_ops() at last.

  • pcieb_ctlops()

How to understand
the following comments.

 

[common/io/pciex/pcieb.c]

704
|_______/*

705
|_______ * Pseudo nodes indicate a prototype node with per-instance

706
|_______ * properties to be merged into the real h/w device node.

707
|_______ * The interpretation of the unit-address is DD[,F]

708
|_______ * where DD is the device id and F is the function.

709
|_______ */

 

Answer:

This is a magic to
set per instance device properties by using solaris driver.conf
file.
You can create pseudo device nodes by adding rules to
driver.conf in following format.

 

name="node
name" class="class name" [property-name=value ...];

 

When loading
driver.conf, which is before hardware device enemeration, Solaris
will create those pseudo device nodes with given "node name"
and properties.
Later when enumerating hardware device nodes, it
will try to merging real hardware device node with pseudo device node
with the same "node name".
By that way, you can assign per
instance properties by using driver.conf file.

 

For code path,
when a devinfo node is being initialized after binding to a driver,
init_node() is called.

 

[common/os/devcfg.c]

1537
|_______|_______case DS_BOUND:

1538
|_______|_______|_______/*

1539
|_______|_______|_______ * The following transitions synchronizes on
the

1540
|_______|_______|_______ * per-driver busy changing flag, since we
already

1541
|_______|_______|_______ * have a driver.

1542
|_______|_______|_______ */

1543
|_______|_______|_______if ((rv = init_node(dip)) == DDI_SUCCESS)

1544
|_______|_______|_______|_______i_ddi_set_node_state(dip,
DS_INITIALIZED);

1545
|_______|_______|_______break;

 

If the dip is a
persistent node, then i_ndi_make_spec_children() is called to create
and attach a dev_info node from a .conf file spec.

 

[common/os/devcfg.c]

983
|_______|_______/*

984
|_______|_______ * If the node is not a driver.conf node then merge

985
|_______|_______ * driver.conf properties from new path-bound
driver.conf.

986
|_______|_______ */

987
|_______|_______if (ndi_dev_is_persistent_node(dip))

988
|_______|_______|_______(void) i_ndi_make_spec_children(pdip, 0);

 

In this function,
get the spec from .conf file and call init_spec_child() for each
spec. A devi_flag bit, DEVI_MADE_CHILDREN, is reserved for mark
whether spec children have been created.

 

[common/os/devcfg.c]

4501
/*

4502
* Lookup hwc specs from hash tables and make children from the spec

4503
* Because some .conf children are "merge" nodes, we also
initialize

4504
* .conf children to merge properties onto hardware nodes.

4505
*

4506
* The pdip must be held busy.

4507
*/

4508
int

4509
i_ndi_make_spec_children(dev_info_t *pdip, uint_t flags)

4510
{

4511
|_______extern struct hwc_spec *hwc_get_child_spec(dev_info_t *,
major_t);

4512
|_______int|____|_______|_______circ;

4513
|_______struct hwc_spec||_______*list, *spec;

4514

4515
|_______ndi_devi_enter(pdip, &circ);

4516
|_______if (DEVI(pdip)->devi_flags & DEVI_MADE_CHILDREN) {

4517
|_______|_______ndi_devi_exit(pdip, circ);

4518
|_______|_______return (DDI_SUCCESS);

4519
|_______}

4520

4521
|_______list = hwc_get_child_spec(pdip, DDI_MAJOR_T_NONE);

4522
|_______for (spec = list; spec != NULL; spec = spec->hwc_next) {

4523
|_______|_______init_spec_child(pdip, spec, flags);

4524
|_______}

4525
|_______hwc_free_spec_list(list);

4526

4527
|_______mutex_enter(&DEVI(pdip)->devi_lock);

4528
|_______DEVI(pdip)->devi_flags |= DEVI_MADE_CHILDREN;

4529
|_______mutex_exit(&DEVI(pdip)->devi_lock);

4530
|_______ndi_devi_exit(pdip, circ);

4531
|_______return (DDI_SUCCESS);

4532
}

 

[common/os/devcfg.c]

4470
/*

4471
* create and attach a dev_info node from a .conf file spec

4472
*/

抱歉!评论已关闭.