openwrtv3/target/linux/brcm2708/patches-4.1/0106-vchiq_arm-Two-cacheing-fixes.patch
Felix Fietkau 0b5d87fd30 brcm2708: update 4.1 patches
As usual, this patches were taken (and rebased) from
https://github.com/raspberrypi/linux/commits/rpi-4.1.y

Signed-off-by: Álvaro Fernández Rojas <noltari@gmail.com>

SVN-Revision: 46853
2015-09-11 16:32:00 +00:00

268 lines
8.9 KiB
Diff

From fa6257d47be3fff4cf93de15a0bb59223d6a81b5 Mon Sep 17 00:00:00 2001
From: Phil Elwell <phil@raspberrypi.org>
Date: Wed, 8 Jul 2015 14:48:57 +0100
Subject: [PATCH 106/171] vchiq_arm: Two cacheing fixes
1) Make fragment size vary with cache line size
Without this patch, non-cache-line-aligned transfers may corrupt
(or be corrupted by) adjacent data structures.
Both ARM and VC need to be updated to enable this feature. This is
ensured by having the loader apply a new DT parameter -
cache-line-size. The existence of this parameter guarantees that the
kernel is capable, and the parameter will only be modified from the
safe default if the loader is capable.
2) Flush/invalidate vmalloc'd memory, and invalidate after reads
---
arch/arm/boot/dts/bcm2708_common.dtsi | 5 +
.../interface/vchiq_arm/vchiq_2835_arm.c | 112 +++++++++++++--------
2 files changed, 77 insertions(+), 40 deletions(-)
--- a/arch/arm/boot/dts/bcm2708_common.dtsi
+++ b/arch/arm/boot/dts/bcm2708_common.dtsi
@@ -218,6 +218,7 @@
compatible = "brcm,bcm2835-vchiq";
reg = <0x7e00b840 0xf>;
interrupts = <0 2>;
+ cache-line-size = <32>;
};
thermal: thermal {
@@ -270,4 +271,8 @@
clock-frequency = <126000000>;
};
};
+
+ __overrides__ {
+ cache_line_size = <&vchiq>, "cache-line-size:0";
+ };
};
--- a/drivers/misc/vc04_services/interface/vchiq_arm/vchiq_2835_arm.c
+++ b/drivers/misc/vc04_services/interface/vchiq_arm/vchiq_2835_arm.c
@@ -42,6 +42,7 @@
#include <linux/platform_data/mailbox-bcm2708.h>
#include <linux/platform_device.h>
#include <linux/uaccess.h>
+#include <linux/of.h>
#include <asm/pgtable.h>
#define TOTAL_SLOTS (VCHIQ_SLOT_ZERO_SLOTS + 2 * 32)
@@ -64,8 +65,10 @@ typedef struct vchiq_2835_state_struct {
} VCHIQ_2835_ARM_STATE_T;
static void __iomem *g_regs;
-static FRAGMENTS_T *g_fragments_base;
-static FRAGMENTS_T *g_free_fragments;
+static unsigned int g_cache_line_size = sizeof(CACHE_LINE_SIZE);
+static unsigned int g_fragments_size;
+static char *g_fragments_base;
+static char *g_free_fragments;
static struct semaphore g_free_fragments_sema;
static unsigned long g_virt_to_bus_offset;
@@ -95,9 +98,13 @@ int vchiq_platform_init(struct platform_
g_virt_to_bus_offset = virt_to_dma(dev, (void *)0);
+ (void)of_property_read_u32(dev->of_node, "cache-line-size",
+ &g_cache_line_size);
+ g_fragments_size = 2 * g_cache_line_size;
+
/* Allocate space for the channels in coherent memory */
slot_mem_size = PAGE_ALIGN(TOTAL_SLOTS * VCHIQ_SLOT_SIZE);
- frag_mem_size = PAGE_ALIGN(sizeof(FRAGMENTS_T) * MAX_FRAGMENTS);
+ frag_mem_size = PAGE_ALIGN(g_fragments_size * MAX_FRAGMENTS);
slot_mem = dmam_alloc_coherent(dev, slot_mem_size + frag_mem_size,
&slot_phys, GFP_KERNEL);
@@ -117,15 +124,15 @@ int vchiq_platform_init(struct platform_
vchiq_slot_zero->platform_data[VCHIQ_PLATFORM_FRAGMENTS_COUNT_IDX] =
MAX_FRAGMENTS;
- g_fragments_base = (FRAGMENTS_T *)(slot_mem + slot_mem_size);
+ g_fragments_base = (char *)slot_mem + slot_mem_size;
slot_mem_size += frag_mem_size;
g_free_fragments = g_fragments_base;
for (i = 0; i < (MAX_FRAGMENTS - 1); i++) {
- *(FRAGMENTS_T **)&g_fragments_base[i] =
- &g_fragments_base[i + 1];
+ *(char **)&g_fragments_base[i*g_fragments_size] =
+ &g_fragments_base[(i + 1)*g_fragments_size];
}
- *(FRAGMENTS_T **)&g_fragments_base[i] = NULL;
+ *(char **)&g_fragments_base[i * g_fragments_size] = NULL;
sema_init(&g_free_fragments_sema, MAX_FRAGMENTS);
if (vchiq_init_state(state, vchiq_slot_zero, 0) != VCHIQ_SUCCESS)
@@ -344,7 +351,7 @@ vchiq_doorbell_irq(int irq, void *dev_id
** cached area.
** N.B. This implementation plays slightly fast and loose with the Linux
-** driver programming rules, e.g. its use of __virt_to_bus instead of
+** driver programming rules, e.g. its use of dmac_map_area instead of
** dma_map_single, but it isn't a multi-platform driver and it benefits
** from increased speed as a result.
*/
@@ -355,7 +362,6 @@ create_pagelist(char __user *buf, size_t
{
PAGELIST_T *pagelist;
struct page **pages;
- struct page *page;
unsigned long *addrs;
unsigned int num_pages, offset, i;
char *addr, *base_addr, *next_addr;
@@ -386,10 +392,25 @@ create_pagelist(char __user *buf, size_t
pages = (struct page **)(addrs + num_pages + 1);
if (is_vmalloc_addr(buf)) {
- for (actual_pages = 0; actual_pages < num_pages; actual_pages++) {
- pages[actual_pages] = vmalloc_to_page(buf + (actual_pages * PAGE_SIZE));
+ int dir = (type == PAGELIST_WRITE) ?
+ DMA_TO_DEVICE : DMA_FROM_DEVICE;
+ unsigned long length = pagelist->length;
+ unsigned int offset = pagelist->offset;
+
+ for (actual_pages = 0; actual_pages < num_pages;
+ actual_pages++) {
+ struct page *pg = vmalloc_to_page(buf + (actual_pages *
+ PAGE_SIZE));
+ size_t bytes = PAGE_SIZE - offset;
+
+ if (bytes > length)
+ bytes = length;
+ pages[actual_pages] = pg;
+ dmac_map_area(page_address(pg) + offset, bytes, dir);
+ length -= bytes;
+ offset = 0;
}
- *need_release = 0; /* do not try and release vmalloc pages */
+ *need_release = 0; /* do not try and release vmalloc pages */
} else {
down_read(&task->mm->mmap_sem);
actual_pages = get_user_pages(task, task->mm,
@@ -418,7 +439,7 @@ create_pagelist(char __user *buf, size_t
actual_pages = -ENOMEM;
return actual_pages;
}
- *need_release = 1; /* release user pages */
+ *need_release = 1; /* release user pages */
}
pagelist->length = count;
@@ -451,10 +472,10 @@ create_pagelist(char __user *buf, size_t
/* Partial cache lines (fragments) require special measures */
if ((type == PAGELIST_READ) &&
- ((pagelist->offset & (CACHE_LINE_SIZE - 1)) ||
+ ((pagelist->offset & (g_cache_line_size - 1)) ||
((pagelist->offset + pagelist->length) &
- (CACHE_LINE_SIZE - 1)))) {
- FRAGMENTS_T *fragments;
+ (g_cache_line_size - 1)))) {
+ char *fragments;
if (down_interruptible(&g_free_fragments_sema) != 0) {
kfree(pagelist);
@@ -464,19 +485,15 @@ create_pagelist(char __user *buf, size_t
WARN_ON(g_free_fragments == NULL);
down(&g_free_fragments_mutex);
- fragments = (FRAGMENTS_T *) g_free_fragments;
+ fragments = g_free_fragments;
WARN_ON(fragments == NULL);
- g_free_fragments = *(FRAGMENTS_T **) g_free_fragments;
+ g_free_fragments = *(char **) g_free_fragments;
up(&g_free_fragments_mutex);
- pagelist->type =
- PAGELIST_READ_WITH_FRAGMENTS + (fragments -
- g_fragments_base);
+ pagelist->type = PAGELIST_READ_WITH_FRAGMENTS +
+ (fragments - g_fragments_base) / g_fragments_size;
}
- for (page = virt_to_page(pagelist);
- page <= virt_to_page(addrs + num_pages - 1); page++) {
- flush_dcache_page(page);
- }
+ dmac_flush_range(pagelist, addrs + num_pages);
*ppagelist = pagelist;
@@ -502,13 +519,14 @@ free_pagelist(PAGELIST_T *pagelist, int
/* Deal with any partial cache lines (fragments) */
if (pagelist->type >= PAGELIST_READ_WITH_FRAGMENTS) {
- FRAGMENTS_T *fragments = g_fragments_base +
- (pagelist->type - PAGELIST_READ_WITH_FRAGMENTS);
+ char *fragments = g_fragments_base +
+ (pagelist->type - PAGELIST_READ_WITH_FRAGMENTS) *
+ g_fragments_size;
int head_bytes, tail_bytes;
- head_bytes = (CACHE_LINE_SIZE - pagelist->offset) &
- (CACHE_LINE_SIZE - 1);
+ head_bytes = (g_cache_line_size - pagelist->offset) &
+ (g_cache_line_size - 1);
tail_bytes = (pagelist->offset + actual) &
- (CACHE_LINE_SIZE - 1);
+ (g_cache_line_size - 1);
if ((actual >= 0) && (head_bytes != 0)) {
if (head_bytes > actual)
@@ -516,32 +534,46 @@ free_pagelist(PAGELIST_T *pagelist, int
memcpy((char *)page_address(pages[0]) +
pagelist->offset,
- fragments->headbuf,
+ fragments,
head_bytes);
}
if ((actual >= 0) && (head_bytes < actual) &&
(tail_bytes != 0)) {
memcpy((char *)page_address(pages[num_pages - 1]) +
((pagelist->offset + actual) &
- (PAGE_SIZE - 1) & ~(CACHE_LINE_SIZE - 1)),
- fragments->tailbuf, tail_bytes);
+ (PAGE_SIZE - 1) & ~(g_cache_line_size - 1)),
+ fragments + g_cache_line_size,
+ tail_bytes);
}
down(&g_free_fragments_mutex);
- *(FRAGMENTS_T **) fragments = g_free_fragments;
+ *(char **)fragments = g_free_fragments;
g_free_fragments = fragments;
up(&g_free_fragments_mutex);
up(&g_free_fragments_sema);
}
- if (*need_release) {
+ if (*need_release) {
+ unsigned int length = pagelist->length;
+ unsigned int offset = pagelist->offset;
+
for (i = 0; i < num_pages; i++) {
- if (pagelist->type != PAGELIST_WRITE)
- set_page_dirty(pages[i]);
+ struct page *pg = pages[i];
- page_cache_release(pages[i]);
+ if (pagelist->type != PAGELIST_WRITE) {
+ unsigned int bytes = PAGE_SIZE - offset;
+
+ if (bytes > length)
+ bytes = length;
+ dmac_unmap_area(page_address(pg) + offset,
+ bytes, DMA_FROM_DEVICE);
+ length -= bytes;
+ offset = 0;
+ set_page_dirty(pg);
+ }
+ page_cache_release(pg);
}
- }
+ }
kfree(pagelist);
}