首先我想引用一下官方的源代码中的文档,对设备驱动的工作做一个概括性的叙述:

1 首先是调用 pci_register_driver()函数,对设备进行注册。
2.Once the driver knows about a PCI device and takes ownership, the
driver generally needs to perform the following initialization:

	Enable the device
	Request MMIO/IOP resources
	Set the DMA mask size (for both coherent and streaming DMA)
	Allocate and initialize shared control data (pci_allocate_coherent())
	Access device configuration space (if needed)
	Register IRQ handler (request_irq())
	Initialize non-PCI (i.e. LAN/SCSI/etc parts of the chip)
	Enable DMA/processing engines

When done using the device, and perhaps the module needs to be unloaded,
the driver needs to take the follow steps:
	Disable the device from generating IRQs
	Release the IRQ (free_irq())
	Stop all DMA activity
	Release DMA buffers (both streaming and coherent)
	Unregister from other subsystems (e.g. scsi or netdev)
	Release MMIO/IOP resources
	Disable the device

前面讲了很多关于PCI的架构之类的基础知识,现在我们结合代码看一下怎么实现一个PCI设备驱动。软件代码依然是NXP开源的linux代码。我们以bt878的驱动代码为例。
Software: linux version 4.14.98

1. pci_register_driver()

首先我们需要注册PCI设备驱动,废话少说,直接看代码

/*******************************/
/* Module management functions */
/*******************************/

static int __init bt878_init_module(void)
{
        bt878_num = 0;

        printk(KERN_INFO "bt878: AUDIO driver version %d.%d.%d loaded\n",
               (BT878_VERSION_CODE >> 16) & 0xff,
               (BT878_VERSION_CODE >> 8) & 0xff,
               BT878_VERSION_CODE & 0xff);

        return pci_register_driver(&bt878_pci_driver);
}

static void __exit bt878_cleanup_module(void)
{
        pci_unregister_driver(&bt878_pci_driver);
}

module_init(bt878_init_module);
module_exit(bt878_cleanup_module);

下面我们看pci_register_driver(&bt878_pci_driver);

/*
 * pci_register_driver must be a macro so that KBUILD_MODNAME can be expanded
 */
#define pci_register_driver(driver)             \
        __pci_register_driver(driver, THIS_MODULE, KBUILD_MODNAME)

__pci_register_driver()函数如下

/**
 * __pci_register_driver - register a new pci driver
 * @drv: the driver structure to register
 * @owner: owner module of drv
 * @mod_name: module name string
 *
 * Adds the driver structure to the list of registered drivers.
 * Returns a negative value on error, otherwise 0.
 * If no error occurred, the driver remains registered even if
 * no device was claimed during registration.
 */
int __pci_register_driver(struct pci_driver *drv, struct module *owner,
                          const char *mod_name)
{
        /* initialize common driver fields */
        drv->driver.name = drv->name;
        drv->driver.bus = &pci_bus_type;
        drv->driver.owner = owner;
        drv->driver.mod_name = mod_name;
        drv->driver.groups = drv->groups;

        spin_lock_init(&drv->dynids.lock);
        INIT_LIST_HEAD(&drv->dynids.list);

        /* register with core */
        return driver_register(&drv->driver);
}
EXPORT_SYMBOL(__pci_register_driver);

driver_register()函数如下

/**
 * driver_register - register driver with bus
 * @drv: driver to register
 *
 * We pass off most of the work to the bus_add_driver() call,
 * since most of the things we have to do deal with the bus
 * structures.
 */
int driver_register(struct device_driver *drv)
{
        int ret;
        struct device_driver *other;

        BUG_ON(!drv->bus->p);

        if ((drv->bus->probe && drv->probe) ||
            (drv->bus->remove && drv->remove) ||
            (drv->bus->shutdown && drv->shutdown))
                printk(KERN_WARNING "Driver '%s' needs updating - please use "
                        "bus_type methods\n", drv->name);

        other = driver_find(drv->name, drv->bus);
        if (other) {
                printk(KERN_ERR "Error: Driver '%s' is already registered, "
                        "aborting...\n", drv->name);
                return -EBUSY;
        }

        ret = bus_add_driver(drv);
        if (ret)
                return ret;
        ret = driver_add_groups(drv, drv->groups);
        if (ret) {
                bus_remove_driver(drv);
                return ret;
        }
        kobject_uevent(&drv->p->kobj, KOBJ_ADD);

        return ret;
}
EXPORT_SYMBOL_GPL(driver_register);

2.设备初始化

2.1 probe()函数

我们对driver进行register之后,我们接下来看probe函数。
首先看bt878_pci_driver的定义:

static struct pci_driver bt878_pci_driver = {
      .name     = "bt878",
      .id_table = bt878_pci_tbl,
      .probe    = bt878_probe,
      .remove   = bt878_remove,
};

再看bt878_probe函数的定义:

/***********************/
/* PCI device handling */
/***********************/

static int bt878_probe(struct pci_dev *dev, const struct pci_device_id *pci_id)
{
        int result = 0;
        unsigned char lat;
        struct bt878 *bt;
        unsigned int cardid;

        printk(KERN_INFO "bt878: Bt878 AUDIO function found (%d).\n",
               bt878_num);
        if (bt878_num >= BT878_MAX) {
                printk(KERN_ERR "bt878: Too many devices inserted\n");
                return -ENOMEM;
        }
        if (pci_enable_device(dev))
                return -EIO;

        cardid = dev->subsystem_device << 16;
        cardid |= dev->subsystem_vendor;

        printk(KERN_INFO "%s: card id=[0x%x],[ %s ] has DVB functions.\n",
                                __func__, cardid, card_name(pci_id));

        bt = &bt878[bt878_num];
        bt->dev = dev;
        bt->nr = bt878_num;
        bt->shutdown = 0;

        bt->id = dev->device;
        bt->irq = dev->irq;
        bt->bt878_adr = pci_resource_start(dev, 0);
        if (!request_mem_region(pci_resource_start(dev, 0),
                                pci_resource_len(dev, 0), "bt878")) {
                result = -EBUSY;
                goto fail0;
        }

        bt->revision = dev->revision;
        pci_read_config_byte(dev, PCI_LATENCY_TIMER, &lat);


        printk(KERN_INFO "bt878(%d): Bt%x (rev %d) at %02x:%02x.%x, ",
               bt878_num, bt->id, bt->revision, dev->bus->number,
               PCI_SLOT(dev->devfn), PCI_FUNC(dev->devfn));
        printk("irq: %d, latency: %d, memory: 0x%lx\n",
               bt->irq, lat, bt->bt878_adr);

#ifdef __sparc__
        bt->bt878_mem = (unsigned char *) bt->bt878_adr;
#else
        bt->bt878_mem = ioremap(bt->bt878_adr, 0x1000);
#endif

        /* clear interrupt mask */
        btwrite(0, BT848_INT_MASK);

        result = request_irq(bt->irq, bt878_irq,
                             IRQF_SHARED, "bt878", (void *) bt);
        if (result == -EINVAL) {
                printk(KERN_ERR "bt878(%d): Bad irq number or handler\n",
                       bt878_num);
                goto fail1;
        }
        if (result == -EBUSY) {
                printk(KERN_ERR
                       "bt878(%d): IRQ %d busy, change your PnP config in BIOS\n",
                       bt878_num, bt->irq);
                goto fail1;
        }
        if (result < 0)
                goto fail1;

        pci_set_master(dev);
        pci_set_drvdata(dev, bt);

        if ((result = bt878_mem_alloc(bt))) {
                printk(KERN_ERR "bt878: failed to allocate memory!\n");
                goto fail2;
        }

        bt878_make_risc(bt);
        btwrite(0, BT878_AINT_MASK);
        bt878_num++;

        return 0;

      fail2:
        free_irq(bt->irq, bt);
      fail1:
        release_mem_region(pci_resource_start(bt->dev, 0),
                           pci_resource_len(bt->dev, 0));
      fail0:
        pci_disable_device(dev);
        return result;
}

在这里我们重要讲解probe()的几个重要函数。

2.2 使能PCI设备

我们重点查看函数pci_enable_device()
看probe中的代码片段:

        if (pci_enable_device(dev))
                return -EIO;

那么这个函数主要是做什么呢,在这里摘录一段源代码中自带的文档中的说明:

Before touching any device registers, the driver needs to enable
the PCI device by calling pci_enable_device(). This will:
	o wake up the device if it was in suspended state,
	o allocate I/O and memory regions of the device (if BIOS did not),
	o allocate an IRQ (if BIOS did not).

翻译过来其实就是,唤醒设备,分配I/O和memory region,分配IRQ,好了我们接下来继续跟踪这个函数

/**
 * pci_enable_device - Initialize device before it's used by a driver.
 * @dev: PCI device to be initialized
 *
 *  Initialize device before it's used by a driver. Ask low-level code
 *  to enable I/O and memory. Wake up the device if it was suspended.
 *  Beware, this function can fail.
 *
 *  Note we don't actually enable the device many times if we call
 *  this function repeatedly (we just increment the count).
 */
int pci_enable_device(struct pci_dev *dev)
{
        return pci_enable_device_flags(dev, IORESOURCE_MEM | IORESOURCE_IO);
}
EXPORT_SYMBOL(pci_enable_device);

继续看pci_enable_device_flags()函数

static int pci_enable_device_flags(struct pci_dev *dev, unsigned long flags)
{
        struct pci_dev *bridge;
        int err;
        int i, bars = 0;

        /*
         * Power state could be unknown at this point, either due to a fresh
         * boot or a device removal call.  So get the current power state
         * so that things like MSI message writing will behave as expected
         * (e.g. if the device really is in D0 at enable time).
         */
        if (dev->pm_cap) {
                u16 pmcsr;
                pci_read_config_word(dev, dev->pm_cap + PCI_PM_CTRL, &pmcsr);
                dev->current_state = (pmcsr & PCI_PM_CTRL_STATE_MASK);
        }

        if (atomic_inc_return(&dev->enable_cnt) > 1)
                return 0;               /* already enabled */

        bridge = pci_upstream_bridge(dev);
        if (bridge)
                pci_enable_bridge(bridge);

        /* only skip sriov related */
        for (i = 0; i <= PCI_ROM_RESOURCE; i++)
                if (dev->resource[i].flags & flags)
                        bars |= (1 << i);
        for (i = PCI_BRIDGE_RESOURCES; i < DEVICE_COUNT_RESOURCE; i++)
                if (dev->resource[i].flags & flags)
                        bars |= (1 << i);

        err = do_pci_enable_device(dev, bars);
        if (err < 0)
                atomic_dec(&dev->enable_cnt);
        return err;
}

重点看函数do_pci_enable_device()


static int do_pci_enable_device(struct pci_dev *dev, int bars)
{
        int err;
        struct pci_dev *bridge;
        u16 cmd;
        u8 pin;

        err = pci_set_power_state(dev, PCI_D0);
        if (err < 0 && err != -EIO)
                return err;

        bridge = pci_upstream_bridge(dev);
        if (bridge)
                pcie_aspm_powersave_config_link(bridge);

        err = pcibios_enable_device(dev, bars);
        if (err < 0)
                return err;
        pci_fixup_device(pci_fixup_enable, dev);

        if (dev->msi_enabled || dev->msix_enabled)
                return 0;

        pci_read_config_byte(dev, PCI_INTERRUPT_PIN, &pin);
        if (pin) {
                pci_read_config_word(dev, PCI_COMMAND, &cmd);
                if (cmd & PCI_COMMAND_INTX_DISABLE)
                        pci_write_config_word(dev, PCI_COMMAND,
                                              cmd & ~PCI_COMMAND_INTX_DISABLE);
        }

        return 0;
}

看函数pci_set_power_state(dev, PCI_D0);

/**
 * pci_load_and_free_saved_state - Reload the save state pointed to by state,
 *                                 and free the memory allocated for it.
 * @dev: PCI device that we're dealing with
 * 0 if the transition is to D1 or D2 but D1 and D2 are not supported.
 * 0 if device already is in the requested state.
 * 0 if the transition is to D3 but D3 is not supported.
 * 0 if device's power state has been successfully changed.
 */
int pci_set_power_state(struct pci_dev *dev, pci_power_t state)
{
        int error;

        /* bound the state we're entering */
        if (state > PCI_D3cold)
                state = PCI_D3cold;
        else if (state < PCI_D0)
                state = PCI_D0;
        else if ((state == PCI_D1 || state == PCI_D2) && pci_no_d1d2(dev))
                /*
                 * If the device or the parent bridge do not support PCI PM,
                 * ignore the request if we're doing anything other than putting
                 * it into D0 (which would only happen on boot).
                 */
                return 0;

        /* Check if we're already there */
        if (dev->current_state == state)
                return 0;

        __pci_start_power_transition(dev, state);

        /* This device is quirked not to be put into D3, so
           don't put it in D3 */
        if (state >= PCI_D3hot && (dev->dev_flags & PCI_DEV_FLAGS_NO_D3))
                return 0;

        /*
         * To put device in D3cold, we put device into D3hot in native
         * way, then put device into D3cold with platform ops
         */
        error = pci_raw_set_power_state(dev, state > PCI_D3hot ?
                                        PCI_D3hot : state);

        if (!__pci_complete_power_transition(dev, state))
                error = 0;

        return error;
}
EXPORT_SYMBOL(pci_set_power_state);

在这里就是设置PCI 设备的power 状态。
再回到do_pci_enable_device()函数

err = pcibios_enable_device(dev, bars);
if (err < 0)
        return err;

在pcibios_enable_device(dev, bars)函数中,设置I/O和memory region
接下来设置中断:

        pci_read_config_byte(dev, PCI_INTERRUPT_PIN, &pin);
        if (pin) {
                pci_read_config_word(dev, PCI_COMMAND, &cmd);
                if (cmd & PCI_COMMAND_INTX_DISABLE)
                        pci_write_config_word(dev, PCI_COMMAND,
                                              cmd & ~PCI_COMMAND_INTX_DISABLE);
        }

接下来我们讲另外一个函数

2.3 申请 MMIO/IOP 资源

我们看request_mem_region(pci_resource_start(dev, 0),pci_resource_len(dev, 0), “bt878”) 函数

/* these helpers provide future and backwards compatibility
 * for accessing popular PCI BAR info */
#define pci_resource_start(dev, bar)    ((dev)->resource[(bar)].start)
#define pci_resource_end(dev, bar)      ((dev)->resource[(bar)].end)
#define pci_resource_flags(dev, bar)    ((dev)->resource[(bar)].flags)
#define pci_resource_len(dev,bar) \
        ((pci_resource_start((dev), (bar)) == 0 &&      \
          pci_resource_end((dev), (bar)) ==             \
          pci_resource_start((dev), (bar))) ? 0 :       \
                                                        \
         (pci_resource_end((dev), (bar)) -              \
          pci_resource_start((dev), (bar)) + 1))

其中 request_mem_region()定义如下

#define request_mem_region(start,n,name) __request_region(&iomem_resource, (start), (n), (name), 0)

接下来我们接着追函数

/**
 * @flags: IO resource flags
 */
struct resource * __request_region(struct resource *parent,
                                   resource_size_t start, resource_size_t n,
                                   const char *name, int flags)
{
        DECLARE_WAITQUEUE(wait, current);
        struct resource *res = alloc_resource(GFP_KERNEL);

        if (!res)
                return NULL;

        res->name = name;
        res->start = start;
        res->end = start + n - 1;

        write_lock(&resource_lock);

        for (;;) {
                struct resource *conflict;

                res->flags = resource_type(parent) | resource_ext_type(parent);
                res->flags |= IORESOURCE_BUSY | flags;
                res->desc = parent->desc;

                conflict = __request_resource(parent, res);
                if (!conflict)
                        break;
                if (conflict != parent) {
                        if (!(conflict->flags & IORESOURCE_BUSY)) {
                                parent = conflict;
                                continue;
                        }
                }
                if (conflict->flags & flags & IORESOURCE_MUXED) {
                        add_wait_queue(&muxed_resource_wait, &wait);
                        write_unlock(&resource_lock);
                        set_current_state(TASK_UNINTERRUPTIBLE);
                        schedule();
                        remove_wait_queue(&muxed_resource_wait, &wait);
                        write_lock(&resource_lock);
                        continue;
                }
                /* Uhhuh, that didn't work out.. */
                free_resource(res);
                res = NULL;
                break;
        }
        write_unlock(&resource_lock);
        return res;
}
EXPORT_SYMBOL(__request_region);

2.4 申请注册中断函数

看函数

result = request_irq(bt->irq, bt878_irq,
                     IRQF_SHARED, "bt878", (void *) bt);

bt878_irq函数定义如下:

/*****************************/
/* Interrupt service routine */
/*****************************/

static irqreturn_t bt878_irq(int irq, void *dev_id)
{
        u32 stat, astat, mask;
        int count;
        struct bt878 *bt;

        bt = (struct bt878 *) dev_id;

        count = 0;
        while (1) {
                stat = btread(BT878_AINT_STAT);
                mask = btread(BT878_AINT_MASK);
                if (!(astat = (stat & mask)))
                        return IRQ_NONE;        /* this interrupt is not for me */
/*              dprintk("bt878(%d) debug: irq count %d, stat 0x%8.8x, mask 0x%8.8x\n",bt->nr,count,stat,mask); */
                btwrite(astat, BT878_AINT_STAT);        /* try to clear interrupt condition */


                if (astat & (BT878_ASCERR | BT878_AOCERR)) {
                        if (bt878_verbose) {
                                printk(KERN_INFO
                                       "bt878(%d): irq%s%s risc_pc=%08x\n",
                                       bt->nr,
                                       (astat & BT878_ASCERR) ? " SCERR" :
                                       "",
                                       (astat & BT878_AOCERR) ? " OCERR" :
                                       "", btread(BT878_ARISC_PC));
                        }
                }
                if (astat & (BT878_APABORT | BT878_ARIPERR | BT878_APPERR)) {
                        if (bt878_verbose) {
                                printk(KERN_INFO
                                     "bt878(%d): irq%s%s%s risc_pc=%08x\n",
                                     bt->nr,
                                     (astat & BT878_APABORT) ? " PABORT" :
                                     "",
                                     (astat & BT878_ARIPERR) ? " RIPERR" :
                                     "",
                                     (astat & BT878_APPERR) ? " PPERR" :
                                     "", btread(BT878_ARISC_PC));
                        }
                }
                if (astat & (BT878_AFDSR | BT878_AFTRGT | BT878_AFBUS)) {
                        if (bt878_verbose) {
                                printk(KERN_INFO
                                     "bt878(%d): irq%s%s%s risc_pc=%08x\n",
                                     bt->nr,
                                     (astat & BT878_AFDSR) ? " FDSR" : "",
                                     (astat & BT878_AFTRGT) ? " FTRGT" :
                                     "",
                                     (astat & BT878_AFBUS) ? " FBUS" : "",
                                     btread(BT878_ARISC_PC));
                        }
                }
                if (astat & BT878_ARISCI) {
                        bt->finished_block = (stat & BT878_ARISCS) >> 28;
                        tasklet_schedule(&bt->tasklet);
                        break;
                }
                count++;
                if (count > 20) {
                        btwrite(0, BT878_AINT_MASK);
                        printk(KERN_ERR
                               "bt878(%d): IRQ lockup, cleared int mask\n",
                               bt->nr);
                        break;
                }
        }
        return IRQ_HANDLED;
}

2.5 使能DMA

看函数pci_set_master(dev);,官方说明如下

pci_set_master() will enable DMA by setting the bus master bit
in the PCI_COMMAND register. It also fixes the latency timer value if
it's set to something bogus by the BIOS.  pci_clear_master() will
disable DMA by clearing the bus master bit.

跟踪代码如下:

/**
 * pci_set_master - enables bus-mastering for device dev
 * @dev: the PCI device to enable
 *
 * Enables bus-mastering on the device and calls pcibios_set_master()
 * to do the needed arch specific settings.
 */
void pci_set_master(struct pci_dev *dev)
{
        __pci_set_master(dev, true);
        pcibios_set_master(dev);
}
EXPORT_SYMBOL(pci_set_master);

我们看函数__pci_set_master(dev, true);


static void __pci_set_master(struct pci_dev *dev, bool enable)
{
        u16 old_cmd, cmd;

        pci_read_config_word(dev, PCI_COMMAND, &old_cmd);
        if (enable)
                cmd = old_cmd | PCI_COMMAND_MASTER;
        else
                cmd = old_cmd & ~PCI_COMMAND_MASTER;
        if (cmd != old_cmd) {
                dev_dbg(&dev->dev, "%s bus mastering\n",
                        enable ? "enabling" : "disabling");
                pci_write_config_word(dev, PCI_COMMAND, cmd);
        }
        dev->is_busmaster = enable;
}

这样就能使能master了。
再回到之前的pci_set_master()函数,查看函数 pcibios_set_master(dev);

/**
 * pcibios_set_master - enable PCI bus-mastering for device dev
 * @dev: the PCI device to enable
 *
 * Enables PCI bus-mastering for the device.  This is the default
 * implementation.  Architecture specific implementations can override
 * this if necessary.
 */
void __weak pcibios_set_master(struct pci_dev *dev)
{
        u8 lat;

        /* The latency timer doesn't apply to PCIe (either Type 0 or Type 1) */
        if (pci_is_pcie(dev))
                return;

        pci_read_config_byte(dev, PCI_LATENCY_TIMER, &lat);
        if (lat < 16)
                lat = (64 <= pcibios_max_latency) ? 64 : pcibios_max_latency;
        else if (lat > pcibios_max_latency)
                lat = pcibios_max_latency;
        else
                return;

        pci_write_config_byte(dev, PCI_LATENCY_TIMER, lat);
}

这里有对latency timer value的处理。

2.6 分配 “consistent” 内存

看函数

        if ((result = bt878_mem_alloc(bt))) {
                printk(KERN_ERR "bt878: failed to allocate memory!\n");
                goto fail2;
        }

我们看一下bt878_mem_alloc(struct bt878 *bt)函数


static int bt878_mem_alloc(struct bt878 *bt)
{
        if (!bt->buf_cpu) {
                bt->buf_size = 128 * 1024;

                bt->buf_cpu = pci_zalloc_consistent(bt->dev, bt->buf_size,
                                                    &bt->buf_dma);
                if (!bt->buf_cpu)
                        return -ENOMEM;
        }

        if (!bt->risc_cpu) {
                bt->risc_size = PAGE_SIZE;
                bt->risc_cpu = pci_zalloc_consistent(bt->dev, bt->risc_size,
                                                     &bt->risc_dma);
                if (!bt->risc_cpu) {
                        bt878_mem_free(bt);
                        return -ENOMEM;
                }
        }

        return 0;
}

继续看函数pci_zalloc_consistent()

static inline void *
pci_zalloc_consistent(struct pci_dev *hwdev, size_t size,
                      dma_addr_t *dma_handle)
{
        return dma_zalloc_coherent(hwdev == NULL ? NULL : &hwdev->dev,
                                   size, dma_handle, GFP_ATOMIC);
}

我们看函数继续看函数pci_zalloc_coherent()函数

static inline void *dma_zalloc_coherent(struct device *dev, size_t size,
                                        dma_addr_t *dma_handle, gfp_t flag)
{
        void *ret = dma_alloc_coherent(dev, size, dma_handle,
                                       flag | __GFP_ZERO);
        return ret;
}

这样就分配好了

3 释放资源

我们看bt878_remove函数

static void bt878_remove(struct pci_dev *pci_dev)
{
        u8 command;
        struct bt878 *bt = pci_get_drvdata(pci_dev);

        if (bt878_verbose)
                printk(KERN_INFO "bt878(%d): unloading\n", bt->nr);

        /* turn off all capturing, DMA and IRQs */
        btand(~0x13, BT878_AGPIO_DMA_CTL);

        /* first disable interrupts before unmapping the memory! */
        btwrite(0, BT878_AINT_MASK);
        btwrite(~0U, BT878_AINT_STAT);

        /* disable PCI bus-mastering */
        pci_read_config_byte(bt->dev, PCI_COMMAND, &command);
        /* Should this be &=~ ?? */
        command &= ~PCI_COMMAND_MASTER;
        pci_write_config_byte(bt->dev, PCI_COMMAND, command);

        free_irq(bt->irq, bt);
        printk(KERN_DEBUG "bt878_mem: 0x%p.\n", bt->bt878_mem);
        if (bt->bt878_mem)
                iounmap(bt->bt878_mem);

        release_mem_region(pci_resource_start(bt->dev, 0),
                           pci_resource_len(bt->dev, 0));
        /* wake up any waiting processes
           because shutdown flag is set, no new processes (in this queue)
           are expected
         */
        bt->shutdown = 1;
        bt878_mem_free(bt);

        pci_disable_device(pci_dev);
        return;
}

在这里对资源进行释放。
当模块卸载的时候会调用函数pci_unregister_driver(&bt878_pci_driver),注销这个driver。

更多推荐

PCI设备驱动解析