m68k: Unroll raw_outsb() loop

Unroll the raw_outsb() loop using the optimized assembler code from raw_outsw(). That code is copied and pasted, with movew changed to moveb. This improves the performance of sequential write transfers using mac_esp in PIO mode by 5% or 10%. (The DMA controller on the 840av/660av models is still unsupported so PIO transfers are used.) Tested-by: Stan Johnson <userm57@yahoo.com> Signed-off-by: Finn Thain <fthain@telegraphics.com.au> Signed-off-by: Geert Uytterhoeven <geert@linux-m68k.org>
2018-10-13 11:47:55 +11:00 · 2018-10-13 11:47:55 +11:00 · b6cf523c16
commit b6cf523c16
parent 651022382c
1 changed files with 35 additions and 4 deletions
--- a/arch/m68k/include/asm/raw_io.h
+++ b/arch/m68k/include/asm/raw_io.h
@ -107,12 +107,43 @@ static inline void raw_insb(volatile u8 __iomem *port, u8 *buf, unsigned int len
 }

 static inline void raw_outsb(volatile u8 __iomem *port, const u8 *buf,
-			     unsigned int len)
+			     unsigned int nr)
 {
-	unsigned int i;
+	unsigned int tmp;

-        for (i = 0; i < len; i++)
-		out_8(port, *buf++);
+	if (nr & 15) {
+		tmp = (nr & 15) - 1;
+		asm volatile (
+			"1: moveb %0@+,%2@; dbra %1,1b"
+			: "=a" (buf), "=d" (tmp)
+			: "a" (port), "0" (buf),
+			  "1" (tmp));
+	}
+	if (nr >> 4) {
+		tmp = (nr >> 4) - 1;
+		asm volatile (
+			"1: "
+			"moveb %0@+,%2@; "
+			"moveb %0@+,%2@; "
+			"moveb %0@+,%2@; "
+			"moveb %0@+,%2@; "
+			"moveb %0@+,%2@; "
+			"moveb %0@+,%2@; "
+			"moveb %0@+,%2@; "
+			"moveb %0@+,%2@; "
+			"moveb %0@+,%2@; "
+			"moveb %0@+,%2@; "
+			"moveb %0@+,%2@; "
+			"moveb %0@+,%2@; "
+			"moveb %0@+,%2@; "
+			"moveb %0@+,%2@; "
+			"moveb %0@+,%2@; "
+			"moveb %0@+,%2@; "
+			"dbra %1,1b"
+			: "=a" (buf), "=d" (tmp)
+			: "a" (port), "0" (buf),
+			  "1" (tmp));
+	}
 }

 static inline void raw_insw(volatile u16 __iomem *port, u16 *buf, unsigned int nr)