kernel: unroll MIPS r4k cache blast function
Optimize the compiler output for larger cache blast cases that are common for DMA-based networking. On ar71xx, I measured a routing throughput increase of ~8% Signed-off-by: Ben Menchaca <ben.menchaca@qca.qualcomm.com> Signed-off-by: Rosen Penev <rosenp@gmail.com> Signed-off-by: Felix Fietkau <nbd@nbd.name>
This commit is contained in:
parent
916277a033
commit
4e8f1e9f4c
3 changed files with 180 additions and 7 deletions
|
@ -204,7 +204,7 @@
|
|||
|
||||
#define __BUILD_BLAST_USER_CACHE(pfx, desc, indexop, hitop, lsize) \
|
||||
static inline void blast_##pfx##cache##lsize##_user_page(unsigned long page) \
|
||||
@@ -660,17 +744,19 @@ __BUILD_BLAST_USER_CACHE(d, dcache, Inde
|
||||
@@ -660,53 +744,23 @@ __BUILD_BLAST_USER_CACHE(d, dcache, Inde
|
||||
__BUILD_BLAST_USER_CACHE(i, icache, Index_Invalidate_I, Hit_Invalidate_I, 64)
|
||||
|
||||
/* build blast_xxx_range, protected_blast_xxx_range */
|
||||
|
@ -214,18 +214,59 @@
|
|||
unsigned long end) \
|
||||
{ \
|
||||
unsigned long lsize = cpu_##desc##_line_size(); \
|
||||
- unsigned long lsize_2 = lsize * 2; \
|
||||
- unsigned long lsize_3 = lsize * 3; \
|
||||
- unsigned long lsize_4 = lsize * 4; \
|
||||
- unsigned long lsize_5 = lsize * 5; \
|
||||
- unsigned long lsize_6 = lsize * 6; \
|
||||
- unsigned long lsize_7 = lsize * 7; \
|
||||
- unsigned long lsize_8 = lsize * 8; \
|
||||
unsigned long addr = start & ~(lsize - 1); \
|
||||
unsigned long aend = (end - 1) & ~(lsize - 1); \
|
||||
- unsigned long aend = (end + lsize - 1) & ~(lsize - 1); \
|
||||
- int lines = (aend - addr) / lsize; \
|
||||
+ unsigned long aend = (end - 1) & ~(lsize - 1); \
|
||||
+ war \
|
||||
\
|
||||
__##pfx##flush_prologue \
|
||||
\
|
||||
while (1) { \
|
||||
- while (lines >= 8) { \
|
||||
- prot##cache_op(hitop, addr); \
|
||||
- prot##cache_op(hitop, addr + lsize); \
|
||||
- prot##cache_op(hitop, addr + lsize_2); \
|
||||
- prot##cache_op(hitop, addr + lsize_3); \
|
||||
- prot##cache_op(hitop, addr + lsize_4); \
|
||||
- prot##cache_op(hitop, addr + lsize_5); \
|
||||
- prot##cache_op(hitop, addr + lsize_6); \
|
||||
- prot##cache_op(hitop, addr + lsize_7); \
|
||||
- addr += lsize_8; \
|
||||
- lines -= 8; \
|
||||
- } \
|
||||
- \
|
||||
- if (lines & 0x4) { \
|
||||
- prot##cache_op(hitop, addr); \
|
||||
- prot##cache_op(hitop, addr + lsize); \
|
||||
- prot##cache_op(hitop, addr + lsize_2); \
|
||||
- prot##cache_op(hitop, addr + lsize_3); \
|
||||
- addr += lsize_4; \
|
||||
- } \
|
||||
- \
|
||||
- if (lines & 0x2) { \
|
||||
- prot##cache_op(hitop, addr); \
|
||||
- prot##cache_op(hitop, addr + lsize); \
|
||||
- addr += lsize_2; \
|
||||
- } \
|
||||
- \
|
||||
- if (lines & 0x1) { \
|
||||
+ while (1) { \
|
||||
+ war2 \
|
||||
prot##cache_op(hitop, addr); \
|
||||
if (addr == aend) \
|
||||
break; \
|
||||
@@ -682,8 +768,8 @@ static inline void prot##extra##blast_##
|
||||
+ if (addr == aend) \
|
||||
+ break; \
|
||||
+ addr += lsize; \
|
||||
} \
|
||||
\
|
||||
__##pfx##flush_epilogue \
|
||||
@@ -714,8 +768,8 @@ static inline void prot##extra##blast_##
|
||||
|
||||
#ifndef CONFIG_EVA
|
||||
|
||||
|
@ -236,7 +277,7 @@
|
|||
|
||||
#else
|
||||
|
||||
@@ -720,14 +806,14 @@ __BUILD_PROT_BLAST_CACHE_RANGE(d, dcache
|
||||
@@ -752,14 +806,14 @@ __BUILD_PROT_BLAST_CACHE_RANGE(d, dcache
|
||||
__BUILD_PROT_BLAST_CACHE_RANGE(i, icache, Hit_Invalidate_I)
|
||||
|
||||
#endif
|
||||
|
|
|
@ -0,0 +1,66 @@
|
|||
From: Ben Menchaca <ben.menchaca@qca.qualcomm.com>
|
||||
Date: Fri, 7 Jun 2013 18:35:22 -0500
|
||||
Subject: MIPS: r4k_cache: use more efficient cache blast
|
||||
|
||||
Optimize the compiler output for larger cache blast cases that are
|
||||
common for DMA-based networking.
|
||||
|
||||
Signed-off-by: Ben Menchaca <ben.menchaca@qca.qualcomm.com>
|
||||
Signed-off-by: Felix Fietkau <nbd@nbd.name>
|
||||
---
|
||||
--- a/arch/mips/include/asm/r4kcache.h
|
||||
+++ b/arch/mips/include/asm/r4kcache.h
|
||||
@@ -682,16 +682,48 @@ static inline void prot##extra##blast_##
|
||||
unsigned long end) \
|
||||
{ \
|
||||
unsigned long lsize = cpu_##desc##_line_size(); \
|
||||
+ unsigned long lsize_2 = lsize * 2; \
|
||||
+ unsigned long lsize_3 = lsize * 3; \
|
||||
+ unsigned long lsize_4 = lsize * 4; \
|
||||
+ unsigned long lsize_5 = lsize * 5; \
|
||||
+ unsigned long lsize_6 = lsize * 6; \
|
||||
+ unsigned long lsize_7 = lsize * 7; \
|
||||
+ unsigned long lsize_8 = lsize * 8; \
|
||||
unsigned long addr = start & ~(lsize - 1); \
|
||||
- unsigned long aend = (end - 1) & ~(lsize - 1); \
|
||||
+ unsigned long aend = (end + lsize - 1) & ~(lsize - 1); \
|
||||
+ int lines = (aend - addr) / lsize; \
|
||||
\
|
||||
__##pfx##flush_prologue \
|
||||
\
|
||||
- while (1) { \
|
||||
+ while (lines >= 8) { \
|
||||
+ prot##cache_op(hitop, addr); \
|
||||
+ prot##cache_op(hitop, addr + lsize); \
|
||||
+ prot##cache_op(hitop, addr + lsize_2); \
|
||||
+ prot##cache_op(hitop, addr + lsize_3); \
|
||||
+ prot##cache_op(hitop, addr + lsize_4); \
|
||||
+ prot##cache_op(hitop, addr + lsize_5); \
|
||||
+ prot##cache_op(hitop, addr + lsize_6); \
|
||||
+ prot##cache_op(hitop, addr + lsize_7); \
|
||||
+ addr += lsize_8; \
|
||||
+ lines -= 8; \
|
||||
+ } \
|
||||
+ \
|
||||
+ if (lines & 0x4) { \
|
||||
+ prot##cache_op(hitop, addr); \
|
||||
+ prot##cache_op(hitop, addr + lsize); \
|
||||
+ prot##cache_op(hitop, addr + lsize_2); \
|
||||
+ prot##cache_op(hitop, addr + lsize_3); \
|
||||
+ addr += lsize_4; \
|
||||
+ } \
|
||||
+ \
|
||||
+ if (lines & 0x2) { \
|
||||
+ prot##cache_op(hitop, addr); \
|
||||
+ prot##cache_op(hitop, addr + lsize); \
|
||||
+ addr += lsize_2; \
|
||||
+ } \
|
||||
+ \
|
||||
+ if (lines & 0x1) { \
|
||||
prot##cache_op(hitop, addr); \
|
||||
- if (addr == aend) \
|
||||
- break; \
|
||||
- addr += lsize; \
|
||||
} \
|
||||
\
|
||||
__##pfx##flush_epilogue \
|
|
@ -0,0 +1,66 @@
|
|||
From: Ben Menchaca <ben.menchaca@qca.qualcomm.com>
|
||||
Date: Fri, 7 Jun 2013 18:35:22 -0500
|
||||
Subject: MIPS: r4k_cache: use more efficient cache blast
|
||||
|
||||
Optimize the compiler output for larger cache blast cases that are
|
||||
common for DMA-based networking.
|
||||
|
||||
Signed-off-by: Ben Menchaca <ben.menchaca@qca.qualcomm.com>
|
||||
Signed-off-by: Felix Fietkau <nbd@nbd.name>
|
||||
---
|
||||
--- a/arch/mips/include/asm/r4kcache.h
|
||||
+++ b/arch/mips/include/asm/r4kcache.h
|
||||
@@ -665,16 +665,48 @@ static inline void prot##extra##blast_##pfx##cache##_range(unsigned long start,
|
||||
unsigned long end) \
|
||||
{ \
|
||||
unsigned long lsize = cpu_##desc##_line_size(); \
|
||||
+ unsigned long lsize_2 = lsize * 2; \
|
||||
+ unsigned long lsize_3 = lsize * 3; \
|
||||
+ unsigned long lsize_4 = lsize * 4; \
|
||||
+ unsigned long lsize_5 = lsize * 5; \
|
||||
+ unsigned long lsize_6 = lsize * 6; \
|
||||
+ unsigned long lsize_7 = lsize * 7; \
|
||||
+ unsigned long lsize_8 = lsize * 8; \
|
||||
unsigned long addr = start & ~(lsize - 1); \
|
||||
- unsigned long aend = (end - 1) & ~(lsize - 1); \
|
||||
+ unsigned long aend = (end + lsize - 1) & ~(lsize - 1); \
|
||||
+ int lines = (aend - addr) / lsize; \
|
||||
\
|
||||
__##pfx##flush_prologue \
|
||||
\
|
||||
- while (1) { \
|
||||
+ while (lines >= 8) { \
|
||||
+ prot##cache_op(hitop, addr); \
|
||||
+ prot##cache_op(hitop, addr + lsize); \
|
||||
+ prot##cache_op(hitop, addr + lsize_2); \
|
||||
+ prot##cache_op(hitop, addr + lsize_3); \
|
||||
+ prot##cache_op(hitop, addr + lsize_4); \
|
||||
+ prot##cache_op(hitop, addr + lsize_5); \
|
||||
+ prot##cache_op(hitop, addr + lsize_6); \
|
||||
+ prot##cache_op(hitop, addr + lsize_7); \
|
||||
+ addr += lsize_8; \
|
||||
+ lines -= 8; \
|
||||
+ } \
|
||||
+ \
|
||||
+ if (lines & 0x4) { \
|
||||
+ prot##cache_op(hitop, addr); \
|
||||
+ prot##cache_op(hitop, addr + lsize); \
|
||||
+ prot##cache_op(hitop, addr + lsize_2); \
|
||||
+ prot##cache_op(hitop, addr + lsize_3); \
|
||||
+ addr += lsize_4; \
|
||||
+ } \
|
||||
+ \
|
||||
+ if (lines & 0x2) { \
|
||||
+ prot##cache_op(hitop, addr); \
|
||||
+ prot##cache_op(hitop, addr + lsize); \
|
||||
+ addr += lsize_2; \
|
||||
+ } \
|
||||
+ \
|
||||
+ if (lines & 0x1) { \
|
||||
prot##cache_op(hitop, addr); \
|
||||
- if (addr == aend) \
|
||||
- break; \
|
||||
- addr += lsize; \
|
||||
} \
|
||||
\
|
||||
__##pfx##flush_epilogue \
|
Loading…
Reference in a new issue