bcm53xx: update copy of ASM entry flushing whole D-cache

Previous version was based on code from kernel 2.6.22 with Broadcom two trivial modifications. This updates the copy to the version from current kernel and refreshes the patch. This was tested for regressions on Netgear R6250 (BCM4708A0), D-Link DIR-885L (BCM4709C0) and Tenda AC9 (BCM47189B0). Signed-off-by: Rafał Miłecki <rafal@milecki.pl>
2016-08-19 12:44:44 +02:00 · 2016-08-19 12:44:44 +02:00 · 28d641be43
commit 28d641be43
parent 1bef5050ef
2 changed files with 45 additions and 30 deletions
--- a/target/linux/bcm53xx/files/arch/arm/boot/compressed/cache-v7-min.S
+++ b/target/linux/bcm53xx/files/arch/arm/boot/compressed/cache-v7-min.S
@ -20,48 +20,62 @@
 *
 *	Flush the whole D-cache.
 *
- *	Corrupted registers: r0-r5, r7, r9-r11
+ *	Corrupted registers: r0-r7, r9-r11 (r6 only in Thumb mode)
 *
 *	- mm    - mm_struct describing address space
 */
 ENTRY(v7_flush_dcache_all)
+	dmb					@ ensure ordering with previous memory accesses
 	mrc	p15, 1, r0, c0, c0, 1		@ read clidr
-	ands	r3, r0, #0x7000000		@ extract loc from clidr
-	mov	r3, r3, lsr #23			@ left align loc bit field
+	mov	r3, r0, lsr #23			@ move LoC into position
+	ands	r3, r3, #7 << 1			@ extract LoC*2 from clidr
 	beq	finished			@ if loc is 0, then no need to clean
+start_flush_levels:
 	mov	r10, #0				@ start clean at cache level 0
-loop1:
+flush_levels:
 	add	r2, r10, r10, lsr #1		@ work out 3x current cache level
 	mov	r1, r0, lsr r2			@ extract cache type bits from clidr
 	and	r1, r1, #7			@ mask of the bits for current cache only
 	cmp	r1, #2				@ see what cache we have at this level
 	blt	skip				@ skip if no cache, or just i-cache
+#ifdef CONFIG_PREEMPT
+	save_and_disable_irqs_notrace r9	@ make cssr&csidr read atomic
+#endif
 	mcr	p15, 2, r10, c0, c0, 0		@ select current cache level in cssr
 	isb					@ isb to sych the new cssr&csidr
 	mrc	p15, 1, r1, c0, c0, 0		@ read the new csidr
+#ifdef CONFIG_PREEMPT
+	restore_irqs_notrace r9
+#endif
 	and	r2, r1, #7			@ extract the length of the cache lines
 	add	r2, r2, #4			@ add 4 (line length offset)
-	ldr	r4, =0x3ff
+	movw	r4, #0x3ff
 	ands	r4, r4, r1, lsr #3		@ find maximum number on the way size
 	clz	r5, r4				@ find bit position of way size increment
-	ldr	r7, =0x7fff
+	movw	r7, #0x7fff
 	ands	r7, r7, r1, lsr #13		@ extract max number of the index size
+loop1:
+	mov	r9, r7				@ create working copy of max index
 loop2:
-	mov	r9, r4				@ create working copy of max way size
-loop3:
-	orr	r11, r10, r9, lsl r5		@ factor way and cache number into r11
-	orr	r11, r11, r7, lsl r2		@ factor index number into r11
+ ARM(	orr	r11, r10, r4, lsl r5	)	@ factor way and cache number into r11
+ THUMB(	lsl	r6, r4, r5		)
+ THUMB(	orr	r11, r10, r6		)	@ factor way and cache number into r11
+ ARM(	orr	r11, r11, r9, lsl r2	)	@ factor index number into r11
+ THUMB(	lsl	r6, r9, r2		)
+ THUMB(	orr	r11, r11, r6		)	@ factor index number into r11
 	mcr	p15, 0, r11, c7, c14, 2		@ clean & invalidate by set/way
-	subs	r9, r9, #1			@ decrement the way
-	bge	loop3
-	subs	r7, r7, #1			@ decrement the index
+	subs	r9, r9, #1			@ decrement the index
 	bge	loop2
+	subs	r4, r4, #1			@ decrement the way
+	bge	loop1
 skip:
 	add	r10, r10, #2			@ increment cache number
 	cmp	r3, r10
-	bgt	loop1
+	bgt	flush_levels
 finished:
 	mov	r10, #0				@ swith back to cache level 0
 	mcr	p15, 2, r10, c0, c0, 0		@ select current cache level in cssr
+	dsb	st
 	isb
-	mov	pc, lr
+	ret	lr
+ENDPROC(v7_flush_dcache_all)
--- a/target/linux/bcm53xx/patches-4.4/300-ARM-BCM5301X-Disable-MMU-and-Dcache-during-decompres.patch
+++ b/target/linux/bcm53xx/patches-4.4/300-ARM-BCM5301X-Disable-MMU-and-Dcache-during-decompres.patch
@ -82,19 +82,20 @@ Signed-off-by: Rafał Miłecki <rafal@milecki.pl>
 +	mov	r8, r12
 --- a/arch/arm/boot/compressed/cache-v7-min.S
 +++ b/arch/arm/boot/compressed/cache-v7-min.S
-@@ -51,7 +51,7 @@ loop2:
- loop3:
- 	orr	r11, r10, r9, lsl r5		@ factor way and cache number into r11
- 	orr	r11, r11, r7, lsl r2		@ factor index number into r11
+@@ -12,6 +12,7 @@
+ 
+ #include <linux/linkage.h>
+ #include <linux/init.h>
+#include <asm/assembler.h>
+ 
+ 	__INIT
+ 
+@@ -63,7 +64,7 @@ loop2:
+  ARM(	orr	r11, r11, r9, lsl r2	)	@ factor index number into r11
+  THUMB(	lsl	r6, r9, r2		)
+  THUMB(	orr	r11, r11, r6		)	@ factor index number into r11
 -	mcr	p15, 0, r11, c7, c14, 2		@ clean & invalidate by set/way
-+	mcr     p15, 0, r11, c7, c6, 2		@ Invalidate line
- 	subs	r9, r9, #1			@ decrement the way
- 	bge	loop3
- 	subs	r7, r7, #1			@ decrement the index
-@@ -63,5 +63,6 @@ skip:
- finished:
- 	mov	r10, #0				@ swith back to cache level 0
- 	mcr	p15, 2, r10, c0, c0, 0		@ select current cache level in cssr
-+	dsb
- 	isb
- 	mov	pc, lr
+	mcr     p15, 0, r11, c7, c6, 2		@ clean & invalidate by set/way
+ 	subs	r9, r9, #1			@ decrement the index
+ 	bge	loop2
+ 	subs	r4, r4, #1			@ decrement the way