From de7997518a7659d301012edad249ee210aa16a4c Mon Sep 17 00:00:00 2001 From: David Brownell Date: Mon, 18 Jun 2007 09:34:31 +0200 Subject: [PATCH] First pass at a "DMA Engine" framework driver for the AP7000 DMAC. No peripheral DMA support yet ... that'd need new APIs. PRELIMINARY, untuned. "ttcp" triggers this code if the network layer is told to try using this memcpy offload engine. But using it is a net performance lose: 15% more wall clock time, 14% less throughput. One potential source of performance issues: this is configured as using byte transfers. Synopsys docs (AP7000 docs are a subset) suggest, but don't seem to actually say one way or another, that the controller is smart about burst access to memory ... constructing bursts etc. If it isn't, that could very easily explain why this seems slow. However, the first attempt to set up e.g. 32-bit wide reads/writes (fully aligned!) didn't work; addreses were not incrementing properly. Another potential slowdown: cache operations might take too long. --- drivers/dma/Kconfig | 8 + drivers/dma/Makefile | 1 + drivers/dma/dw_dmac.c | 628 +++++++++++++++++++++++++++++++++++++++++++++++++ drivers/dma/dw_dmac.h | 219 +++++++++++++++++ 4 files changed, 856 insertions(+), 0 deletions(-) create mode 100644 drivers/dma/dw_dmac.c create mode 100644 drivers/dma/dw_dmac.h diff --git a/drivers/dma/Kconfig b/drivers/dma/Kconfig index 72be6c6..010e92e 100644 --- a/drivers/dma/Kconfig +++ b/drivers/dma/Kconfig @@ -32,4 +32,12 @@ config INTEL_IOATDMA ---help--- Enable support for the Intel(R) I/OAT DMA engine. +config DW_DMAC + tristate "Synopsys DesignWare AHB DMA support" + depends on DMA_ENGINE + default y if CPU_AT32AP7000 + ---help--- + Support the Synopsys DesignWare AHB DMA controller. This + can be integrated in chips such as the Atmel AT32ap7000. + endmenu diff --git a/drivers/dma/Makefile b/drivers/dma/Makefile index bdcfdbd..496cb50 100644 --- a/drivers/dma/Makefile +++ b/drivers/dma/Makefile @@ -1,3 +1,4 @@ obj-$(CONFIG_DMA_ENGINE) += dmaengine.o obj-$(CONFIG_NET_DMA) += iovlock.o obj-$(CONFIG_INTEL_IOATDMA) += ioatdma.o +obj-$(CONFIG_DW_DMAC) += dw_dmac.o diff --git a/drivers/dma/dw_dmac.c b/drivers/dma/dw_dmac.c new file mode 100644 index 0000000..469d275 --- /dev/null +++ b/drivers/dma/dw_dmac.c @@ -0,0 +1,628 @@ +#include +#include +#include +#include +#include +#include +#include +#include + + +/* + * This supports the Synopsis "DesignWare AHB Central DMA Controller", + * (DW_ahb_dmac) which is used with various AMBA 2.0 systems (not all + * of which use ARM any more). See the "Databook" from Synopsis for + * information beyond what licensees probably provide. + * + * This "DMA Engine" framework is currently only a memcpy accelerator, + * so the **PRIMARY FUNCTIONALITY** of this controller is not available: + * hardware-synchronized DMA to/from external hardware or integrated + * peripherals (such as an MMC/SD controller or audio interface). + * + * The driver has currently been tested only with the Atmel AT32AP7000, + * which appears to be configured without writeback ... contrary to docs, + * unless there's a bug in dma-coherent memory allocation. + */ + +#include "dw_dmac.h" + +/*----------------------------------------------------------------------*/ + +/* Because we're not relying on writeback from the controller (it may not + * even be configured into the core!) we don't need to use dma_pool. These + * descriptors -- and associated data -- are cacheable. We do need to make + * sure their dcache entries are written back before handing them off to + * the controller, though. + */ + +#define FREECNT 10 /* for fastpath allocations */ + +static struct dw_lli * +dwc_lli_alloc(struct dw_dma_chan *dwc, gfp_t flags) +{ + struct dw_lli *lli; + dma_addr_t phys; + + lli = kmem_cache_alloc(dwc->lli, flags); + if (unlikely(!lli)) + return NULL; + phys = dma_map_single(dwc->dev, lli, sizeof *lli, DMA_TO_DEVICE); + + lli->ctllo = lli->ctlhi = 0; + lli->dma = phys; + + return lli; +} + +static inline void +dwc_lli_free(struct dw_dma_chan *dwc, struct dw_lli *lli) +{ + dma_unmap_single(dwc->dev, lli->dma, sizeof *lli, DMA_TO_DEVICE); + kmem_cache_free(dwc->lli, lli); +} + +static inline struct dw_lli * +dwc_lli_get(struct dw_dma_chan *dwc, gfp_t flags) +{ + struct dw_lli *ret = dwc->free; + + if (ret && FREECNT) { + dwc->free = ret->next; + dwc->freecnt--; + return ret; + } + + return dwc_lli_alloc(dwc, flags); +} + +static inline void +dwc_lli_put(struct dw_dma_chan *dwc, struct dw_lli *lli) +{ + if (dwc->freecnt < FREECNT) { + lli->ctllo = lli->ctlhi = 0; + lli->next = dwc->free; + dwc->free = lli; + dwc->freecnt++; + } else + dwc_lli_free(dwc, lli); +} + +static inline void +dwc_lli_sync(struct dw_dma_chan *dwc, struct dw_lli *lli) +{ + dma_sync_single_for_device(dwc->dev, + lli->dma, sizeof *lli, DMA_TO_DEVICE); +} + +/*----------------------------------------------------------------------*/ + +static void dwc_dostart(struct dw_dma_chan *dwc, struct dw_lli *first) +{ + struct dw_dma *dw = to_dw_dma(dwc->chan.device); + + /* ASSERT: channel is idle */ + + channel_writel(dwc, LLP, first->dma); + channel_writel(dwc, CTL_LO, + DWC_CTLL_LLP_D_EN | DWC_CTLL_LLP_S_EN); + channel_writel(dwc, CTL_HI, 0); + channel_set_bit(dw, CH_EN, dwc->mask); + dwc->end = NULL; +} + +static void dwc_memcpy_scan(struct dw_dma_chan *dwc) +{ + struct dw_lli *lli, *next; + dma_cookie_t cookie = 0; + int saw_end; + u32 next_lli; + + /* Cores configured without writeback never set DWC_CTLH_DONE, + * so don't scan for that ... scan up to the chip's active LLP. + */ + next_lli = channel_readl(dwc, LLP) & ~0x3; + //for (lli = dwc->first, next_lli = lli->dma, saw_end = 0; + for (lli = dwc->first, saw_end = 0; + lli != dwc->last && !saw_end; + lli = next) { + +// TWEAK: shrink race-while-scan + + /* Race: queue can advance while we scan */ +// if (lli->dma == next_lli) +// next_lli = channel_readl(dwc, LLP); + + /* past the head of the queue? */ + if (lli->dma == next_lli) + break; + + /* "Case 5" termination means our scan finished, and we + * found end-of-list. Its transfer isn't done until the + * channel disables itself. + */ + if (!(lli->ctllo & DWC_CTLL_LLP_S_EN)) { + struct dw_dma *dw = to_dw_dma(dwc->chan.device); + + if (dma_readl(dw, CH_EN) & dwc->mask) + break; + saw_end = 1; + } + + /* This LLI is completely done ... clean up. + * + * NOTE: we "know" that for now, dma_unmap calls are safe + * to omit on all relevant platforms. Otherwise we'd need + * to track stuff and unmap here in various ways ... + */ + cookie = lli->cookie; + next = lli->next; + + dwc_lli_put(dwc, lli); + } + if (cookie) { + dwc->first = lli; + dwc->completed = cookie; + } + + /* Wait till the channel finished the transfer (e.g. flushed FIFO) + * before reporting anything, or starting a new transfer. + */ + if (saw_end && dwc->first != dwc->last) + dwc_dostart(dwc, dwc->first); +} + +static dma_cookie_t +do_dwc_memcpy(struct dw_dma_chan *dwc, + dma_addr_t dest, dma_addr_t src, + size_t len) +{ + u32 ctllo; + unsigned long flags; + dma_cookie_t cookie; + struct dw_lli *first; + + /* It's not clear what rules to follow for mem-to-mem transfers, + * but it's surely safe to specify source and destination widths + * as one byte and, with fifo usage and bursting enabled, allow + * the controller to turn that into multiword bursts using memory + * bandwidth efficiently. + * + * REVISIT is the controller smart enough to do that as well as + * cpu-based memcpy does with cacheline fill/write? + */ + ctllo = DWC_CTLL_DST_WIDTH(0) + | DWC_CTLL_SRC_WIDTH(0) + | DWC_CTLL_DST_INC + | DWC_CTLL_SRC_INC + | DWC_CTLL_DST_MSIZE(4) + | DWC_CTLL_SRC_MSIZE(4) + | DWC_CTLL_FC_M2M + /* NOTE: DMS+SMS could be system-specific... */ + | DWC_CTLL_DMS(0) + | DWC_CTLL_SMS(0) + | DWC_CTLL_LLP_D_EN + | DWC_CTLL_LLP_S_EN; + + spin_lock_irqsave(&dwc->lock, flags); + + cookie = dwc->chan.cookie; + + if (!len) + goto done; + + /* Use block chaining, and "transfer type 10" with source and + * destination addresses updated through LLP. Terminate using + * "transfer type 5" (LLP valid, LLP_*_EN clear). + * + * IMPORTANT: here we assume the core is configured with each + * channel supporting dma descriptor lists! + */ + first = dwc->last; + while (len) { + struct dw_lli *lli = dwc->last; + struct dw_lli *next = dwc_lli_get(dwc, GFP_ATOMIC); + size_t delta; + + /* FIXME try something more clever than BUG_ON() */ + BUG_ON(!next); + + /* chain all except the last LLI */ + if (len > DWC_CTLH_BLOCK_TS_MASK) { + delta = DWC_CTLH_BLOCK_TS_MASK; + len -= delta; + } else { + delta = len; + len = 0; + + /* This controller doesn't handle live queue updates + * politely; it must stop, then restart. Let's make + * restarts cover the whole pending transfer queue. + */ + if (first != dwc->first) { + struct dw_lli *tmp = dwc->end; + + if (tmp) { +// FIXME if we can sometimes make dest write faster, +// we must mask the old ctllo value not overwrite it... + tmp->ctllo = ctllo; + dwc_lli_sync(dwc, tmp); + } + } + dwc->end = lli; + + /* make this be the only type 5 terminator */ + ctllo &= ~(DWC_CTLL_LLP_D_EN | DWC_CTLL_LLP_S_EN); + } + + if (cookie++ < 0) + cookie = 1; + + lli->sar = src; + lli->dar = dest; + lli->llp = next->dma; + lli->ctllo = ctllo; + lli->ctlhi = delta; + lli->next = next; + lli->cookie = cookie; + dwc_lli_sync(dwc, lli); + + dwc->last = next; + + dest += delta; + src += delta; + } + dwc->chan.cookie = cookie; + + /* Start/restart the queue ASAP */ + if (first == dwc->first) + dwc_dostart(dwc, first); + else + dwc_memcpy_scan(dwc); +done: + spin_unlock_irqrestore(&dwc->lock, flags); + return cookie; +} + +/*----------------------------------------------------------------------*/ + +static int dwc_alloc_memcpy_resources(struct dma_chan *chan) +{ + struct dw_dma_chan *dwc = to_dw_dma_chan(chan); + struct dw_dma *dw = to_dw_dma(chan->device); + + /* ASSERT: channel is idle */ + if (dma_readl(dw, CH_EN) & dwc->mask) { + pr_debug("%s: DMA channel not idle?\n", + chan->class_dev.class_id); + return -EIO; + } + + dwc->completed = chan->cookie = 1; + + /* "no" handshaking, and no fancy games */ + channel_writel(dwc, CFG_LO, 0); + channel_writel(dwc, CFG_HI, DWC_CFGH_FIFO_MODE); + + /* NOTE: got access faults trying to clear SGR and DSR; + * also later when trying to read SSTATAR and DSTATAR... + */ + + return 1; +} + +static void dwc_free_resources(struct dma_chan *chan) +{ + struct dw_dma_chan *dwc = to_dw_dma_chan(chan); + struct dw_dma *dw = to_dw_dma(chan->device); + struct dw_lli *lli = dwc->free, *tmp; + + /* ASSERT: channel is idle */ + BUG_ON(dma_readl(dw, CH_EN) & dwc->mask); + + for (lli = dwc->free; lli && FREECNT; lli = tmp) { + tmp = lli->next; + dwc_lli_free(dwc, lli); + } + dwc->freecnt = 0; +} + +static dma_cookie_t +dwc_memcpy_buf_to_buf(struct dma_chan *chan, + void *dest, + void *src, + size_t len) +{ + dma_addr_t dest_addr; + dma_addr_t src_addr; + struct dw_dma_chan *dwc = to_dw_dma_chan(chan); + + dest_addr = dma_map_single(dwc->dev, dest, len, DMA_FROM_DEVICE); + src_addr = dma_map_single(dwc->dev, src, len, DMA_TO_DEVICE); + + return do_dwc_memcpy(dwc, dest_addr, src_addr, len); +} + +static dma_cookie_t +dwc_memcpy_buf_to_pg(struct dma_chan *chan, + struct page *page, unsigned int offset, + void *src, + size_t len) +{ + dma_addr_t dest_addr; + dma_addr_t src_addr; + struct dw_dma_chan *dwc = to_dw_dma_chan(chan); + + dest_addr = dma_map_page(dwc->dev, page, offset, + len, DMA_FROM_DEVICE); + src_addr = dma_map_single(dwc->dev, src, + len, DMA_TO_DEVICE); + + return do_dwc_memcpy(dwc, dest_addr, src_addr, len); +} + +static dma_cookie_t +dwc_memcpy_pg_to_pg(struct dma_chan *chan, + struct page *dest_pg, unsigned int dest_off, + struct page *src_pg, unsigned int src_off, + size_t len) +{ + dma_addr_t dest_addr; + dma_addr_t src_addr; + struct dw_dma_chan *dwc = to_dw_dma_chan(chan); + + dest_addr = dma_map_page(dwc->dev, dest_pg, dest_off, + len, DMA_FROM_DEVICE); + src_addr = dma_map_page(dwc->dev, src_pg, src_off, + len, DMA_TO_DEVICE); + + return do_dwc_memcpy(dwc, dest_addr, src_addr, len); +} + +static enum dma_status +dwc_dma_is_complete(struct dma_chan *chan, + dma_cookie_t cookie, + dma_cookie_t *done, dma_cookie_t *used) +{ + struct dw_dma_chan *dwc = to_dw_dma_chan(chan); + unsigned long flags; + dma_cookie_t last_used; + dma_cookie_t last_complete; + + spin_lock_irqsave(&dwc->lock, flags); + + if (dwc->first != dwc->last) + dwc_memcpy_scan(dwc); + + last_complete = dwc->completed; + if (done) + *done= last_complete; + last_used = chan->cookie; + if (used) + *used = last_used; + + spin_unlock_irqrestore(&dwc->lock, flags); + + return dma_async_is_complete(cookie, last_complete, last_used); +} + +static void dwc_memcpy_issue_pending(struct dma_chan *chan) +{ + struct dw_dma_chan *dwc = to_dw_dma_chan(chan); + unsigned long flags; + + spin_lock_irqsave(&dwc->lock, flags); + if (dwc->first != dwc->last) + dwc_memcpy_scan(dwc); + spin_unlock_irqrestore(&dwc->lock, flags); +} + +/*----------------------------------------------------------------------*/ + +static void dw_dma_off(struct dw_dma *dw) +{ + dma_writel(dw, CFG, 0); + while (dma_readl(dw, CFG) & DW_CFG_DMA_EN) + cpu_relax(); +} + +static int __init dw_probe(struct platform_device *pdev) +{ + struct resource *io; + struct dw_dma *dw; + struct kmem_cache *lli; + int err; + int i; + + io = platform_get_resource(pdev, IORESOURCE_MEM, 0); + if (!io) + return -EINVAL; + + /* FIXME platform_data holds NDMA. Use that to adjust the size + * of this allocation to match the silicon, and channel init. + */ + + dw = kzalloc(sizeof *dw, GFP_KERNEL); + if (!dw) + return -ENOMEM; + + if (request_mem_region(io->start, DW_REGLEN, + pdev->dev.driver->name) == 0) { + err = -EBUSY; + goto err_kfree; + } + + memset(dw, 0, sizeof *dw); + + dw->regs = ioremap(io->start, DW_REGLEN); + if (!dw->regs) { + err = -ENOMEM; + goto err_release_r; + } + + dw->clk = clk_get(&pdev->dev, "hclk"); + if (IS_ERR(dw->clk)) { + err = PTR_ERR(dw->clk); + goto err_clk; + } + clk_enable(dw->clk); + + /* force dma off, just in case */ + dw_dma_off(dw); + + lli = kmem_cache_create(pdev->dev.bus_id, + sizeof(struct dw_lli), 4, 0, + NULL, NULL); + if (!lli) { + err = -ENOMEM; + goto err_dma_pool; + } + + dw->lli = lli; + platform_set_drvdata(pdev, dw); + + /* NOTE: not using IRQs for now. Since the interface is purely + * async "collect status when done", we can eliminate that cost. + */ + + INIT_LIST_HEAD(&dw->dma.channels); + for (i = 0; i < NDMA; i++, dw->dma.chancnt++) { + struct dw_dma_chan *dwc = &dw->chan[i]; + + dwc->chan.device = &dw->dma; + dwc->chan.cookie = dwc->completed = 1; + dwc->chan.chan_id = i; + list_add_tail(&dwc->chan.device_node, &dw->dma.channels); + + dwc->ch_regs = dw->regs + DW_DMAC_CHAN_BASE(i); + dwc->lli = lli; + spin_lock_init(&dwc->lock); + dwc->mask = 1 << i; + + /* FIXME dmaengine API bug: the dma_device isn't coupled + * to the underlying hardware; so neither is the dma_chan. + * + * Workaround: dwc->dev instead of dwc->chan.cdev.dev + * (or eventually dwc->chan.dev.parent). + */ + dwc->dev = &pdev->dev; + + /* Invariant: dwc->last is always present and deactivated */ + dwc->first = dwc->last = dwc_lli_alloc(dwc, GFP_KERNEL); + if (!dwc->first) + break; + + channel_clear_bit(dw, CH_EN, dwc->mask); + } + + dw->dma.device_alloc_chan_resources = dwc_alloc_memcpy_resources; + dw->dma.device_free_chan_resources = dwc_free_resources; + + dw->dma.device_memcpy_buf_to_buf = dwc_memcpy_buf_to_buf; + dw->dma.device_memcpy_buf_to_pg = dwc_memcpy_buf_to_pg; + dw->dma.device_memcpy_pg_to_pg = dwc_memcpy_pg_to_pg; + + dw->dma.device_memcpy_complete = dwc_dma_is_complete; + dw->dma.device_memcpy_issue_pending = dwc_memcpy_issue_pending; + + dma_writel(dw, CFG, DW_CFG_DMA_EN); + + printk(KERN_INFO "%s: DesignWare DMA Controller, %d channels\n", + pdev->dev.bus_id, dw->dma.chancnt); + + dma_async_device_register(&dw->dma); + + return 0; + +err_dma_pool: + clk_disable(dw->clk); + clk_put(dw->clk); +err_clk: + iounmap(dw->regs); + dw->regs = NULL; +err_release_r: + release_resource(io); +err_kfree: + kfree(dw); + return err; +} + +static int __exit dw_remove(struct platform_device *pdev) +{ + struct dw_dma *dw = platform_get_drvdata(pdev); + struct dw_dma_chan *dwc; + struct resource *io; + + dw_dma_off(dw); + dma_async_device_unregister(&dw->dma); + + list_for_each_entry(dwc, &dw->dma.channels, chan.device_node) { + channel_clear_bit(dw, CH_EN, dwc->mask); + dwc_lli_free(dwc, dwc->last); + } + + kmem_cache_destroy(dw->lli); + + clk_disable(dw->clk); + clk_put(dw->clk); + + iounmap(dw->regs); + dw->regs = NULL; + + io = platform_get_resource(pdev, IORESOURCE_MEM, 0); + release_mem_region(io->start, DW_REGLEN); + + kfree(dw); + return 0; +} + +static void dw_shutdown(struct platform_device *pdev) +{ + struct dw_dma *dw = platform_get_drvdata(pdev); + + dw_dma_off(platform_get_drvdata(pdev)); + clk_disable(dw->clk); +} + +static int dw_suspend_late(struct platform_device *pdev, pm_message_t mesg) +{ + struct dw_dma *dw = platform_get_drvdata(pdev); + + dw_dma_off(platform_get_drvdata(pdev)); + clk_disable(dw->clk); + return 0; +} + +static int dw_resume_early(struct platform_device *pdev) +{ + struct dw_dma *dw = platform_get_drvdata(pdev); + + clk_enable(dw->clk); + dma_writel(dw, CFG, DW_CFG_DMA_EN); + return 0; + +} + +static struct platform_driver dw_driver = { + .remove = __exit_p(dw_remove), + .shutdown = dw_shutdown, + .suspend_late = dw_suspend_late, + .resume_early = dw_resume_early, + .driver = { + .name = "dw_dmac", + }, +}; + +static int __init dw_init(void) +{ + BUILD_BUG_ON(NDMA > 8); + return platform_driver_probe(&dw_driver, dw_probe); +} +device_initcall(dw_init); + +static void __exit dw_exit(void) +{ + platform_driver_unregister(&dw_driver); +} +module_exit(dw_exit); + +MODULE_LICENSE("GPL"); diff --git a/drivers/dma/dw_dmac.h b/drivers/dma/dw_dmac.h new file mode 100644 index 0000000..39d7e24 --- /dev/null +++ b/drivers/dma/dw_dmac.h @@ -0,0 +1,219 @@ +/* + * Driver for the Synopsys DesignWare AHB DMA Controller + * + * Copyright (C) 2005-2007 Atmel Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +/* REVISIT Synopsys provides a C header; use symbols from there instead? */ + +/* per-channel registers */ +#define DW_DMAC_CHAN_SAR 0x000 +#define DW_DMAC_CHAN_DAR 0x008 +#define DW_DMAC_CHAN_LLP 0x010 +#define DW_DMAC_CHAN_CTL_LO 0x018 +# define DWC_CTLL_INT_EN (1 << 0) /* irqs enabled? */ +# define DWC_CTLL_DST_WIDTH(n) ((n)<<1) /* bytes per element */ +# define DWC_CTLL_SRC_WIDTH(n) ((n)<<5) +# define DWC_CTLL_DST_INC (0<<7) /* DAR update/not */ +# define DWC_CTLL_DST_DEC (1<<7) +# define DWC_CTLL_DST_FIX (2<<7) +# define DWC_CTLL_SRC_INC (0<<7) /* SAR update/not */ +# define DWC_CTLL_SRC_DEC (1<<9) +# define DWC_CTLL_SRC_FIX (2<<9) +# define DWC_CTLL_DST_MSIZE(n) ((n)<<11) /* burst, #elements */ +# define DWC_CTLL_SRC_MSIZE(n) ((n)<<14) +# define DWC_CTLL_S_GATH_EN (1 << 17) /* src gather, !FIX */ +# define DWC_CTLL_D_SCAT_EN (1 << 18) /* dst scatter, !FIX */ +# define DWC_CTLL_FC_M2M (0 << 20) /* mem-to-mem */ + /* plus 7 flow control/synch models for peripheral I/O ... */ +# define DWC_CTLL_DMS(n) ((n)<<23) +# define DWC_CTLL_SMS(n) ((n)<<25) +# define DWC_CTLL_LLP_D_EN (1 << 27) /* dest block chain */ +# define DWC_CTLL_LLP_S_EN (1 << 28) /* src block chain */ +#define DW_DMAC_CHAN_CTL_HI 0x01c +# define DWC_CTLH_DONE 0x00001000 +# define DWC_CTLH_BLOCK_TS_MASK 0x000007ff +//#define DW_DMAC_CHAN_SSTAT 0x020 +//#define DW_DMAC_CHAN_DSTAT 0x028 +#define DW_DMAC_CHAN_SSTATAR 0x030 +#define DW_DMAC_CHAN_DSTATAR 0x038 +#define DW_DMAC_CHAN_CFG_LO 0x040 +# define DWC_CFGL_PRIO(x) ((x) << 5) /* priority */ +# define DWC_CFGL_CH_SUSP (1 << 8) /* pause xfer */ +# define DWC_CFGL_FIFO_EMPTY (1 << 9) /* pause xfer */ +# define DWC_CFGL_HS_DST (1 << 10) /* handshake w/dst */ +# define DWC_CFGL_HS_SRC (1 << 11) /* handshake w/src */ +# define DWC_CFGL_LOCK_CH_XFER (0 << 12) /* scope of LOCK_CH */ +# define DWC_CFGL_LOCK_CH_BLOCK (1 << 12) +# define DWC_CFGL_LOCK_CH_XACT (2 << 12) +# define DWC_CFGL_LOCK_BUS_XFER (0 << 14) /* scope of LOCK_BUS */ +# define DWC_CFGL_LOCK_BUS_BLOCK (1 << 14) +# define DWC_CFGL_LOCK_BUS_XACT (2 << 14) +# define DWC_CFGL_LOCK_CH (1 << 15) /* channel lockout */ +# define DWC_CFGL_LOCK_BUS (1 << 16) /* busmaster lockout */ +# define DWC_CFGL_HS_DST_POL (1 << 18) +# define DWC_CFGL_HS_SRC_POL (1 << 19) +# define DWC_CFGL_MAX_BURST(x) ((x) << 20) +# define DWC_CFGL_RELOAD_SAR (1 << 30) +# define DWC_CFGL_RELOAD_DAR (1 << 31) +#define DW_DMAC_CHAN_CFG_HI 0x044 +# define DWC_CFGH_FCMODE (1 << 0) +# define DWC_CFGH_FIFO_MODE (1 << 1) +# define DWC_CFGH_PROTCTL(x) ((x) << 2) +# define DWC_CFGH_DS_UPD_EN (1 << 5) +# define DWC_CFGH_SS_UPD_EN (1 << 6) +# define DWC_CFGH_SRC_PER(x) ((x) << 7) +# define DWC_CFGH_DST_PER(x) ((x) << 11) +#define DW_DMAC_CHAN_SGR 0x048 +# define DWC_SGR_SGI(x) ((x) << 0) +# define DWC_SGR_SGC(x) ((x) << 20) +#define DW_DMAC_CHAN_DSR 0x050 +# define DWC_DSR_DSI(x) ((x) << 0) +# define DWC_DSR_DSC(x) ((x) << 20) + +#define DW_DMAC_CHAN_BASE(n) ((n)*0x58) + +/* irq handling */ +#define DW_DMAC_RAW_XFER 0x2c0 /* r */ +#define DW_DMAC_RAW_BLOCK 0x2c8 +#define DW_DMAC_RAW_SRC_TRAN 0x2d0 +#define DW_DMAC_RAW_DST_TRAN 0x2d8 +#define DW_DMAC_RAW_ERROR 0x2e0 + +#define DW_DMAC_STATUS_XFER 0x2e8 /* r (raw & mask) */ +#define DW_DMAC_STATUS_BLOCK 0x2f0 +#define DW_DMAC_STATUS_SRC_TRAN 0x2f8 +#define DW_DMAC_STATUS_DST_TRAN 0x300 +#define DW_DMAC_STATUS_ERROR 0x308 + +#define DW_DMAC_MASK_XFER 0x310 /* rw (set = irq enabled) */ +#define DW_DMAC_MASK_BLOCK 0x318 +#define DW_DMAC_MASK_SRC_TRAN 0x320 +#define DW_DMAC_MASK_DST_TRAN 0x328 +#define DW_DMAC_MASK_ERROR 0x330 + +#define DW_DMAC_CLEAR_XFER 0x338 /* w (ack, affects "raw") */ +#define DW_DMAC_CLEAR_BLOCK 0x340 +#define DW_DMAC_CLEAR_SRC_TRAN 0x348 +#define DW_DMAC_CLEAR_DST_TRAN 0x350 +#define DW_DMAC_CLEAR_ERROR 0x358 + +#define DW_DMAC_STATUS_INT 0x360 /* r */ + +/* software handshaking */ +#define DW_DMAC_REQ_SRC 0x368 /* rw */ +#define DW_DMAC_REQ_DST 0x370 +#define DW_DMAC_SGL_REQ_SRC 0x378 +#define DW_DMAC_SGL_REQ_DST 0x380 +#define DW_DMAC_LAST_SRC 0x388 +#define DW_DMAC_LAST_DST 0x390 + +/* miscellaneous */ +#define DW_DMAC_CFG 0x398 /* rw */ +# define DW_CFG_DMA_EN (1 << 0) +#define DW_DMAC_CH_EN 0x3a0 + +#define DW_DMAC_ID 0x3a8 /* r */ +#define DW_DMAC_TEST 0x3b0 /* rw */ + +/* optional encoded params, 0x3c8..0x3 */ + +#define DW_REGLEN 0x400 + + +/* How many channels ... potentially, up to 8 */ +#ifdef CONFIG_CPU_AT32AP7000 +#define NDMA 3 +#endif + +#ifndef NDMA +/* REVISIT want a better (static) solution than this */ +#warning system unrecognized, assuming max NDMA=8 +#define NDMA 8 +#endif + +struct dw_dma_chan { + struct dma_chan chan; + void __iomem *ch_regs; + struct kmem_cache *lli; + struct device *dev; + + u8 mask; + + spinlock_t lock; + + /* these other elements are all protected by lock */ + dma_cookie_t completed; + struct dw_lli *first; /* head of hw queue */ + struct dw_lli *last; /* tail of hw queue */ + struct dw_lli *end; /* next "type 5" lli */ + struct dw_lli *free; /* freelist */ + unsigned freecnt; +}; + +/* REVISIT these register access macros cause inefficient code: the st.w + * and ld.w displacements are all zero, never DW_DMAC_ constants embedded + * in the instructions. GCC 4.0.2-atmel.0.99.2 issue? Struct access is + * as efficient as one would expect... + */ + +#define channel_readl(dwc, name) \ + __raw_readl((dwc)->ch_regs + DW_DMAC_CHAN_##name) +#define channel_writel(dwc, name, val) \ + __raw_writel((val), (dwc)->ch_regs + DW_DMAC_CHAN_##name) + +static inline struct dw_dma_chan *to_dw_dma_chan(struct dma_chan *chan) +{ + return container_of(chan, struct dw_dma_chan, chan); +} + + +struct dw_dma { + struct dma_device dma; + void __iomem *regs; + struct kmem_cache *lli; + struct clk *clk; + struct dw_dma_chan chan[NDMA]; +}; + +#define dma_readl(dw, name) \ + __raw_readl((dw)->regs + DW_DMAC_##name) +#define dma_writel(dw, name, val) \ + __raw_writel((val), (dw)->regs + DW_DMAC_##name) + +#define channel_set_bit(dw, reg, mask) \ + dma_writel(dw, reg, ((mask) << 8) | (mask)) +#define channel_clear_bit(dw, reg, mask) \ + dma_writel(dw, reg, ((mask) << 8) | 0) + +static inline struct dw_dma *to_dw_dma(struct dma_device *ddev) +{ + return container_of(ddev, struct dw_dma, dma); +} + + +/* LLI == Linked List Item; a.k.a. DMA block descriptor */ +struct dw_lli { + /* FIRST values the hardware uses */ + dma_addr_t sar; + dma_addr_t dar; + dma_addr_t llp; /* chain to next lli */ + u32 ctllo; + /* values that may get written back: */ + u32 ctlhi; + /* sstat and dstat can snapshot peripheral register state. + * silicon config may discard either or both... + */ + u32 sstat; + u32 dstat; + + /* THEN values for driver housekeeping */ + struct dw_lli *next; /* mirrors llp */ + dma_addr_t dma; /* dma addr of this lli */ + dma_cookie_t cookie; +}; + -- 1.5.2.2