diff --git a/arch/arm/boot/compressed/Makefile b/arch/arm/boot/compressed/Makefile
index 53faa9063a035be790c3aef7c5f32c4f48eb8025..864a002137fed0bbc42efe7b4f2dc787a06ba48a 100644
--- a/arch/arm/boot/compressed/Makefile
+++ b/arch/arm/boot/compressed/Makefile
@@ -71,6 +71,9 @@ targets       := vmlinux vmlinux.lds \
 		 piggy.$(suffix_y) piggy.$(suffix_y).o \
 		 font.o font.c head.o misc.o $(OBJS)
 
+# Make sure files are removed during clean
+extra-y       += piggy.gzip piggy.lzo piggy.lzma lib1funcs.S
+
 ifeq ($(CONFIG_FUNCTION_TRACER),y)
 ORIG_CFLAGS := $(KBUILD_CFLAGS)
 KBUILD_CFLAGS = $(subst -pg, , $(ORIG_CFLAGS))
diff --git a/arch/arm/common/sa1111.c b/arch/arm/common/sa1111.c
index 6f80665f477e70b0acad4d6ead228295446863c3..9eaf65f43642e6886b3ab4bae43e7821aad285b6 100644
--- a/arch/arm/common/sa1111.c
+++ b/arch/arm/common/sa1111.c
@@ -1028,13 +1028,12 @@ static int sa1111_remove(struct platform_device *pdev)
 	struct sa1111 *sachip = platform_get_drvdata(pdev);
 
 	if (sachip) {
-		__sa1111_remove(sachip);
-		platform_set_drvdata(pdev, NULL);
-
 #ifdef CONFIG_PM
 		kfree(sachip->saved_state);
 		sachip->saved_state = NULL;
 #endif
+		__sa1111_remove(sachip);
+		platform_set_drvdata(pdev, NULL);
 	}
 
 	return 0;
diff --git a/arch/arm/include/asm/io.h b/arch/arm/include/asm/io.h
index c980156f32638f8f031b93e730d0c2e8396aec40..1261b1f928d95a9ac1cf374148208415c4dc248d 100644
--- a/arch/arm/include/asm/io.h
+++ b/arch/arm/include/asm/io.h
@@ -26,6 +26,7 @@
 #include <linux/types.h>
 #include <asm/byteorder.h>
 #include <asm/memory.h>
+#include <asm/system.h>
 
 /*
  * ISA I/O bus memory addresses are 1:1 with the physical address.
@@ -179,25 +180,38 @@ extern void _memset_io(volatile void __iomem *, int, size_t);
  * IO port primitives for more information.
  */
 #ifdef __mem_pci
-#define readb(c) ({ __u8  __v = __raw_readb(__mem_pci(c)); __v; })
-#define readw(c) ({ __u16 __v = le16_to_cpu((__force __le16) \
+#define readb_relaxed(c) ({ u8  __v = __raw_readb(__mem_pci(c)); __v; })
+#define readw_relaxed(c) ({ u16 __v = le16_to_cpu((__force __le16) \
 					__raw_readw(__mem_pci(c))); __v; })
-#define readl(c) ({ __u32 __v = le32_to_cpu((__force __le32) \
+#define readl_relaxed(c) ({ u32 __v = le32_to_cpu((__force __le32) \
 					__raw_readl(__mem_pci(c))); __v; })
-#define readb_relaxed(addr) readb(addr)
-#define readw_relaxed(addr) readw(addr)
-#define readl_relaxed(addr) readl(addr)
+
+#define writeb_relaxed(v,c)	((void)__raw_writeb(v,__mem_pci(c)))
+#define writew_relaxed(v,c)	((void)__raw_writew((__force u16) \
+					cpu_to_le16(v),__mem_pci(c)))
+#define writel_relaxed(v,c)	((void)__raw_writel((__force u32) \
+					cpu_to_le32(v),__mem_pci(c)))
+
+#ifdef CONFIG_ARM_DMA_MEM_BUFFERABLE
+#define __iormb()		rmb()
+#define __iowmb()		wmb()
+#else
+#define __iormb()		do { } while (0)
+#define __iowmb()		do { } while (0)
+#endif
+
+#define readb(c)		({ u8  __v = readb_relaxed(c); __iormb(); __v; })
+#define readw(c)		({ u16 __v = readw_relaxed(c); __iormb(); __v; })
+#define readl(c)		({ u32 __v = readl_relaxed(c); __iormb(); __v; })
+
+#define writeb(v,c)		({ __iowmb(); writeb_relaxed(v,c); })
+#define writew(v,c)		({ __iowmb(); writew_relaxed(v,c); })
+#define writel(v,c)		({ __iowmb(); writel_relaxed(v,c); })
 
 #define readsb(p,d,l)		__raw_readsb(__mem_pci(p),d,l)
 #define readsw(p,d,l)		__raw_readsw(__mem_pci(p),d,l)
 #define readsl(p,d,l)		__raw_readsl(__mem_pci(p),d,l)
 
-#define writeb(v,c)		__raw_writeb(v,__mem_pci(c))
-#define writew(v,c)		__raw_writew((__force __u16) \
-					cpu_to_le16(v),__mem_pci(c))
-#define writel(v,c)		__raw_writel((__force __u32) \
-					cpu_to_le32(v),__mem_pci(c))
-
 #define writesb(p,d,l)		__raw_writesb(__mem_pci(p),d,l)
 #define writesw(p,d,l)		__raw_writesw(__mem_pci(p),d,l)
 #define writesl(p,d,l)		__raw_writesl(__mem_pci(p),d,l)
@@ -244,13 +258,13 @@ extern void _memset_io(volatile void __iomem *, int, size_t);
  * io{read,write}{8,16,32} macros
  */
 #ifndef ioread8
-#define ioread8(p)	({ unsigned int __v = __raw_readb(p); __v; })
-#define ioread16(p)	({ unsigned int __v = le16_to_cpu((__force __le16)__raw_readw(p)); __v; })
-#define ioread32(p)	({ unsigned int __v = le32_to_cpu((__force __le32)__raw_readl(p)); __v; })
+#define ioread8(p)	({ unsigned int __v = __raw_readb(p); __iormb(); __v; })
+#define ioread16(p)	({ unsigned int __v = le16_to_cpu((__force __le16)__raw_readw(p)); __iormb(); __v; })
+#define ioread32(p)	({ unsigned int __v = le32_to_cpu((__force __le32)__raw_readl(p)); __iormb(); __v; })
 
-#define iowrite8(v,p)	__raw_writeb(v, p)
-#define iowrite16(v,p)	__raw_writew((__force __u16)cpu_to_le16(v), p)
-#define iowrite32(v,p)	__raw_writel((__force __u32)cpu_to_le32(v), p)
+#define iowrite8(v,p)	({ __iowmb(); (void)__raw_writeb(v, p); })
+#define iowrite16(v,p)	({ __iowmb(); (void)__raw_writew((__force __u16)cpu_to_le16(v), p); })
+#define iowrite32(v,p)	({ __iowmb(); (void)__raw_writel((__force __u32)cpu_to_le32(v), p); })
 
 #define ioread8_rep(p,d,c)	__raw_readsb(p,d,c)
 #define ioread16_rep(p,d,c)	__raw_readsw(p,d,c)
diff --git a/arch/arm/lib/csumpartialcopyuser.S b/arch/arm/lib/csumpartialcopyuser.S
index 59ff6fdc1e634835575846693fb0828a05c66a55..7d08b43d2c0e496fc704378301b82d3b34d93f3a 100644
--- a/arch/arm/lib/csumpartialcopyuser.S
+++ b/arch/arm/lib/csumpartialcopyuser.S
@@ -71,7 +71,7 @@
 		.pushsection .fixup,"ax"
 		.align	4
 9001:		mov	r4, #-EFAULT
-		ldr	r5, [fp, #4]		@ *err_ptr
+		ldr	r5, [sp, #8*4]		@ *err_ptr
 		str	r4, [r5]
 		ldmia	sp, {r1, r2}		@ retrieve dst, len
 		add	r2, r2, r1
diff --git a/arch/arm/mach-realview/core.c b/arch/arm/mach-realview/core.c
index 595be19f8ad5027e29daed72bbe0fb2c5d2d1a04..02e9fdeb8faf26891bdf43cb5846ae717fb720ea 100644
--- a/arch/arm/mach-realview/core.c
+++ b/arch/arm/mach-realview/core.c
@@ -237,7 +237,7 @@ static unsigned int realview_mmc_status(struct device *dev)
 	else
 		mask = 2;
 
-	return !(readl(REALVIEW_SYSMCI) & mask);
+	return readl(REALVIEW_SYSMCI) & mask;
 }
 
 struct mmci_platform_data realview_mmc0_plat_data = {
diff --git a/arch/arm/mach-ux500/include/mach/uncompress.h b/arch/arm/mach-ux500/include/mach/uncompress.h
index 8552eb188b50274c7ed4b18ccf48e743192652de..0271ca0a83df86c4fb43eb37b3c93754414d706d 100644
--- a/arch/arm/mach-ux500/include/mach/uncompress.h
+++ b/arch/arm/mach-ux500/include/mach/uncompress.h
@@ -30,22 +30,22 @@
 static void putc(const char c)
 {
 	/* Do nothing if the UART is not enabled. */
-	if (!(readb(U8500_UART_CR) & 0x1))
+	if (!(__raw_readb(U8500_UART_CR) & 0x1))
 		return;
 
 	if (c == '\n')
 		putc('\r');
 
-	while (readb(U8500_UART_FR) & (1 << 5))
+	while (__raw_readb(U8500_UART_FR) & (1 << 5))
 		barrier();
-	writeb(c, U8500_UART_DR);
+	__raw_writeb(c, U8500_UART_DR);
 }
 
 static void flush(void)
 {
-	if (!(readb(U8500_UART_CR) & 0x1))
+	if (!(__raw_readb(U8500_UART_CR) & 0x1))
 		return;
-	while (readb(U8500_UART_FR) & (1 << 3))
+	while (__raw_readb(U8500_UART_FR) & (1 << 3))
 		barrier();
 }
 
diff --git a/arch/arm/mach-vexpress/v2m.c b/arch/arm/mach-vexpress/v2m.c
index d250711b8c7a4e3c17ee2ddcc53ad5b2e24ee420..c84239761cb4a2881d226f6f82b3acb694d30dae 100644
--- a/arch/arm/mach-vexpress/v2m.c
+++ b/arch/arm/mach-vexpress/v2m.c
@@ -241,7 +241,7 @@ static struct platform_device v2m_flash_device = {
 
 static unsigned int v2m_mmci_status(struct device *dev)
 {
-	return !(readl(MMIO_P2V(V2M_SYS_MCI)) & (1 << 0));
+	return readl(MMIO_P2V(V2M_SYS_MCI)) & (1 << 0);
 }
 
 static struct mmci_platform_data v2m_mmci_data = {
diff --git a/arch/arm/mm/cache-l2x0.c b/arch/arm/mm/cache-l2x0.c
index df4955885b21d412ede58df0a8c26a4fcb8ca0fb..9982eb385c0f9844d70f53d45c7377fcf5fa2887 100644
--- a/arch/arm/mm/cache-l2x0.c
+++ b/arch/arm/mm/cache-l2x0.c
@@ -32,14 +32,14 @@ static uint32_t l2x0_way_mask;	/* Bitmask of active ways */
 static inline void cache_wait(void __iomem *reg, unsigned long mask)
 {
 	/* wait for the operation to complete */
-	while (readl(reg) & mask)
+	while (readl_relaxed(reg) & mask)
 		;
 }
 
 static inline void cache_sync(void)
 {
 	void __iomem *base = l2x0_base;
-	writel(0, base + L2X0_CACHE_SYNC);
+	writel_relaxed(0, base + L2X0_CACHE_SYNC);
 	cache_wait(base + L2X0_CACHE_SYNC, 1);
 }
 
@@ -47,14 +47,14 @@ static inline void l2x0_clean_line(unsigned long addr)
 {
 	void __iomem *base = l2x0_base;
 	cache_wait(base + L2X0_CLEAN_LINE_PA, 1);
-	writel(addr, base + L2X0_CLEAN_LINE_PA);
+	writel_relaxed(addr, base + L2X0_CLEAN_LINE_PA);
 }
 
 static inline void l2x0_inv_line(unsigned long addr)
 {
 	void __iomem *base = l2x0_base;
 	cache_wait(base + L2X0_INV_LINE_PA, 1);
-	writel(addr, base + L2X0_INV_LINE_PA);
+	writel_relaxed(addr, base + L2X0_INV_LINE_PA);
 }
 
 #ifdef CONFIG_PL310_ERRATA_588369
@@ -75,9 +75,9 @@ static inline void l2x0_flush_line(unsigned long addr)
 
 	/* Clean by PA followed by Invalidate by PA */
 	cache_wait(base + L2X0_CLEAN_LINE_PA, 1);
-	writel(addr, base + L2X0_CLEAN_LINE_PA);
+	writel_relaxed(addr, base + L2X0_CLEAN_LINE_PA);
 	cache_wait(base + L2X0_INV_LINE_PA, 1);
-	writel(addr, base + L2X0_INV_LINE_PA);
+	writel_relaxed(addr, base + L2X0_INV_LINE_PA);
 }
 #else
 
@@ -90,7 +90,7 @@ static inline void l2x0_flush_line(unsigned long addr)
 {
 	void __iomem *base = l2x0_base;
 	cache_wait(base + L2X0_CLEAN_INV_LINE_PA, 1);
-	writel(addr, base + L2X0_CLEAN_INV_LINE_PA);
+	writel_relaxed(addr, base + L2X0_CLEAN_INV_LINE_PA);
 }
 #endif
 
@@ -109,7 +109,7 @@ static inline void l2x0_inv_all(void)
 
 	/* invalidate all ways */
 	spin_lock_irqsave(&l2x0_lock, flags);
-	writel(l2x0_way_mask, l2x0_base + L2X0_INV_WAY);
+	writel_relaxed(l2x0_way_mask, l2x0_base + L2X0_INV_WAY);
 	cache_wait(l2x0_base + L2X0_INV_WAY, l2x0_way_mask);
 	cache_sync();
 	spin_unlock_irqrestore(&l2x0_lock, flags);
@@ -215,8 +215,8 @@ void __init l2x0_init(void __iomem *base, __u32 aux_val, __u32 aux_mask)
 
 	l2x0_base = base;
 
-	cache_id = readl(l2x0_base + L2X0_CACHE_ID);
-	aux = readl(l2x0_base + L2X0_AUX_CTRL);
+	cache_id = readl_relaxed(l2x0_base + L2X0_CACHE_ID);
+	aux = readl_relaxed(l2x0_base + L2X0_AUX_CTRL);
 
 	aux &= aux_mask;
 	aux |= aux_val;
@@ -248,15 +248,15 @@ void __init l2x0_init(void __iomem *base, __u32 aux_val, __u32 aux_mask)
 	 * If you are booting from non-secure mode
 	 * accessing the below registers will fault.
 	 */
-	if (!(readl(l2x0_base + L2X0_CTRL) & 1)) {
+	if (!(readl_relaxed(l2x0_base + L2X0_CTRL) & 1)) {
 
 		/* l2x0 controller is disabled */
-		writel(aux, l2x0_base + L2X0_AUX_CTRL);
+		writel_relaxed(aux, l2x0_base + L2X0_AUX_CTRL);
 
 		l2x0_inv_all();
 
 		/* enable L2X0 */
-		writel(1, l2x0_base + L2X0_CTRL);
+		writel_relaxed(1, l2x0_base + L2X0_CTRL);
 	}
 
 	outer_cache.inv_range = l2x0_inv_range;
diff --git a/arch/arm/mm/highmem.c b/arch/arm/mm/highmem.c
index 086816b205b8cf25c9d178c9d56c44a654e2ff9f..6ab244062b4ab2f720e84d870011faac64878c81 100644
--- a/arch/arm/mm/highmem.c
+++ b/arch/arm/mm/highmem.c
@@ -163,19 +163,22 @@ static DEFINE_PER_CPU(int, kmap_high_l1_vipt_depth);
 
 void *kmap_high_l1_vipt(struct page *page, pte_t *saved_pte)
 {
-	unsigned int idx, cpu = smp_processor_id();
-	int *depth = &per_cpu(kmap_high_l1_vipt_depth, cpu);
+	unsigned int idx, cpu;
+	int *depth;
 	unsigned long vaddr, flags;
 	pte_t pte, *ptep;
 
+	if (!in_interrupt())
+		preempt_disable();
+
+	cpu = smp_processor_id();
+	depth = &per_cpu(kmap_high_l1_vipt_depth, cpu);
+
 	idx = KM_L1_CACHE + KM_TYPE_NR * cpu;
 	vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx);
 	ptep = TOP_PTE(vaddr);
 	pte = mk_pte(page, kmap_prot);
 
-	if (!in_interrupt())
-		preempt_disable();
-
 	raw_local_irq_save(flags);
 	(*depth)++;
 	if (pte_val(*ptep) == pte_val(pte)) {
diff --git a/drivers/mmc/host/mmci.c b/drivers/mmc/host/mmci.c
index 4917af96bae1d0be7472e32bf13784ec5ffb4412..2ed435bd4b6c18c426640140cf39908b1d065574 100644
--- a/drivers/mmc/host/mmci.c
+++ b/drivers/mmc/host/mmci.c
@@ -539,9 +539,13 @@ static int mmci_get_cd(struct mmc_host *mmc)
 	if (host->gpio_cd == -ENOSYS)
 		status = host->plat->status(mmc_dev(host->mmc));
 	else
-		status = gpio_get_value(host->gpio_cd);
+		status = !gpio_get_value(host->gpio_cd);
 
-	return !status;
+	/*
+	 * Use positive logic throughout - status is zero for no card,
+	 * non-zero for card inserted.
+	 */
+	return status;
 }
 
 static const struct mmc_host_ops mmci_ops = {
diff --git a/drivers/video/cyber2000fb.c b/drivers/video/cyber2000fb.c
index 3a561df2e8a29d7b020913beef4a86fb705836a3..0c1afd13ddd3055be4c61abd56253edf38f7ac36 100644
--- a/drivers/video/cyber2000fb.c
+++ b/drivers/video/cyber2000fb.c
@@ -388,6 +388,7 @@ cyber2000fb_setcolreg(u_int regno, u_int red, u_int green, u_int blue,
 		pseudo_val |= convert_bitfield(red, &var->red);
 		pseudo_val |= convert_bitfield(green, &var->green);
 		pseudo_val |= convert_bitfield(blue, &var->blue);
+		ret = 0;
 		break;
 	}
 
@@ -436,6 +437,8 @@ static void cyber2000fb_write_ramdac_ctrl(struct cfb_info *cfb)
 	cyber2000fb_writeb(i | 4, 0x3cf, cfb);
 	cyber2000fb_writeb(val, 0x3c6, cfb);
 	cyber2000fb_writeb(i, 0x3cf, cfb);
+	/* prevent card lock-up observed on x86 with CyberPro 2000 */
+	cyber2000fb_readb(0x3cf, cfb);
 }
 
 static void cyber2000fb_set_timing(struct cfb_info *cfb, struct par_info *hw)