diff --git a/arch/alpha/include/asm/hardirq.h b/arch/alpha/include/asm/hardirq.h
index d953e234daa8ff694ced89b3af4f24f155b3b058..88971460fa6ce33b7bdda339951f0ae83e5846a2 100644
--- a/arch/alpha/include/asm/hardirq.h
+++ b/arch/alpha/include/asm/hardirq.h
@@ -14,17 +14,4 @@ typedef struct {
 
 void ack_bad_irq(unsigned int irq);
 
-#define HARDIRQ_BITS	12
-
-/*
- * The hardirq mask has to be large enough to have
- * space for potentially nestable IRQ sources in the system
- * to nest on a single CPU. On Alpha, interrupts are masked at the CPU
- * by IPL as well as at the system level. We only have 8 IPLs (UNIX PALcode)
- * so we really only have 8 nestable IRQs, but allow some overhead
- */
-#if (1 << HARDIRQ_BITS) < 16
-#error HARDIRQ_BITS is too low!
-#endif
-
 #endif /* _ALPHA_HARDIRQ_H */
diff --git a/arch/avr32/include/asm/hardirq.h b/arch/avr32/include/asm/hardirq.h
index 267354356f604ccedd153f3ddea44b339d72e7c3..015bc75ea79852cd6bd41d923ac26375ba0e7ba4 100644
--- a/arch/avr32/include/asm/hardirq.h
+++ b/arch/avr32/include/asm/hardirq.h
@@ -20,15 +20,4 @@ void ack_bad_irq(unsigned int irq);
 
 #endif /* __ASSEMBLY__ */
 
-#define HARDIRQ_BITS	12
-
-/*
- * The hardirq mask has to be large enough to have
- * space for potentially all IRQ sources in the system
- * nesting on a single CPU:
- */
-#if (1 << HARDIRQ_BITS) < NR_IRQS
-# error HARDIRQ_BITS is too low!
-#endif
-
 #endif /* __ASM_AVR32_HARDIRQ_H */
diff --git a/arch/ia64/include/asm/hardirq.h b/arch/ia64/include/asm/hardirq.h
index 140e495b8e0e841b615a1a1b1c971438638c720b..d514cd9edb49f45c9ac5871d407ec7ff65b0927e 100644
--- a/arch/ia64/include/asm/hardirq.h
+++ b/arch/ia64/include/asm/hardirq.h
@@ -20,16 +20,6 @@
 
 #define local_softirq_pending()		(local_cpu_data->softirq_pending)
 
-#define HARDIRQ_BITS	14
-
-/*
- * The hardirq mask has to be large enough to have space for potentially all IRQ sources
- * in the system nesting on a single CPU:
- */
-#if (1 << HARDIRQ_BITS) < NR_IRQS
-# error HARDIRQ_BITS is too low!
-#endif
-
 extern void __iomem *ipi_base_addr;
 
 void ack_bad_irq(unsigned int irq);
diff --git a/include/linux/hardirq.h b/include/linux/hardirq.h
index f3cf86e1465bd64b64e5ce898524064a05bff529..9841221f53f2e73b055edad8a1bf59b3b455b9c7 100644
--- a/include/linux/hardirq.h
+++ b/include/linux/hardirq.h
@@ -15,61 +15,61 @@
  * - bits 0-7 are the preemption count (max preemption depth: 256)
  * - bits 8-15 are the softirq count (max # of softirqs: 256)
  *
- * The hardirq count can be overridden per architecture, the default is:
+ * The hardirq count can in theory reach the same as NR_IRQS.
+ * In reality, the number of nested IRQS is limited to the stack
+ * size as well. For archs with over 1000 IRQS it is not practical
+ * to expect that they will all nest. We give a max of 10 bits for
+ * hardirq nesting. An arch may choose to give less than 10 bits.
+ * m68k expects it to be 8.
  *
- * - bits 16-27 are the hardirq count (max # of hardirqs: 4096)
- * - ( bit 28 is the PREEMPT_ACTIVE flag. )
+ * - bits 16-25 are the hardirq count (max # of nested hardirqs: 1024)
+ * - bit 26 is the NMI_MASK
+ * - bit 28 is the PREEMPT_ACTIVE flag
  *
  * PREEMPT_MASK: 0x000000ff
  * SOFTIRQ_MASK: 0x0000ff00
- * HARDIRQ_MASK: 0x0fff0000
+ * HARDIRQ_MASK: 0x03ff0000
+ *     NMI_MASK: 0x04000000
  */
 #define PREEMPT_BITS	8
 #define SOFTIRQ_BITS	8
+#define NMI_BITS	1
 
-#ifndef HARDIRQ_BITS
-#define HARDIRQ_BITS	12
+#define MAX_HARDIRQ_BITS 10
 
-#ifndef MAX_HARDIRQS_PER_CPU
-#define MAX_HARDIRQS_PER_CPU NR_IRQS
+#ifndef HARDIRQ_BITS
+# define HARDIRQ_BITS	MAX_HARDIRQ_BITS
 #endif
 
-/*
- * The hardirq mask has to be large enough to have space for potentially
- * all IRQ sources in the system nesting on a single CPU.
- */
-#if (1 << HARDIRQ_BITS) < MAX_HARDIRQS_PER_CPU
-# error HARDIRQ_BITS is too low!
-#endif
+#if HARDIRQ_BITS > MAX_HARDIRQ_BITS
+#error HARDIRQ_BITS too high!
 #endif
 
 #define PREEMPT_SHIFT	0
 #define SOFTIRQ_SHIFT	(PREEMPT_SHIFT + PREEMPT_BITS)
 #define HARDIRQ_SHIFT	(SOFTIRQ_SHIFT + SOFTIRQ_BITS)
+#define NMI_SHIFT	(HARDIRQ_SHIFT + HARDIRQ_BITS)
 
 #define __IRQ_MASK(x)	((1UL << (x))-1)
 
 #define PREEMPT_MASK	(__IRQ_MASK(PREEMPT_BITS) << PREEMPT_SHIFT)
 #define SOFTIRQ_MASK	(__IRQ_MASK(SOFTIRQ_BITS) << SOFTIRQ_SHIFT)
 #define HARDIRQ_MASK	(__IRQ_MASK(HARDIRQ_BITS) << HARDIRQ_SHIFT)
+#define NMI_MASK	(__IRQ_MASK(NMI_BITS)     << NMI_SHIFT)
 
 #define PREEMPT_OFFSET	(1UL << PREEMPT_SHIFT)
 #define SOFTIRQ_OFFSET	(1UL << SOFTIRQ_SHIFT)
 #define HARDIRQ_OFFSET	(1UL << HARDIRQ_SHIFT)
+#define NMI_OFFSET	(1UL << NMI_SHIFT)
 
-#if PREEMPT_ACTIVE < (1 << (HARDIRQ_SHIFT + HARDIRQ_BITS))
+#if PREEMPT_ACTIVE < (1 << (NMI_SHIFT + NMI_BITS))
 #error PREEMPT_ACTIVE is too low!
 #endif
 
-#define NMI_OFFSET	(PREEMPT_ACTIVE << 1)
-
-#if NMI_OFFSET >= 0x80000000
-#error PREEMPT_ACTIVE too high!
-#endif
-
 #define hardirq_count()	(preempt_count() & HARDIRQ_MASK)
 #define softirq_count()	(preempt_count() & SOFTIRQ_MASK)
-#define irq_count()	(preempt_count() & (HARDIRQ_MASK | SOFTIRQ_MASK))
+#define irq_count()	(preempt_count() & (HARDIRQ_MASK | SOFTIRQ_MASK \
+				 | NMI_MASK))
 
 /*
  * Are we doing bottom half or hardware interrupt processing?
@@ -82,7 +82,7 @@
 /*
  * Are we in NMI context?
  */
-#define in_nmi()	(preempt_count() & NMI_OFFSET)
+#define in_nmi()	(preempt_count() & NMI_MASK)
 
 #if defined(CONFIG_PREEMPT)
 # define PREEMPT_INATOMIC_BASE kernel_locked()