diff --git a/arch/nds32/Kconfig b/arch/nds32/Kconfig
index 1d4248fa55e995fdc7cba95f9209372ed6533b52..7068f341133d7eb038bb94a9953a3b9946d9bf51 100644
--- a/arch/nds32/Kconfig
+++ b/arch/nds32/Kconfig
@@ -40,6 +40,10 @@ config NDS32
 	select NO_IOPORT_MAP
 	select RTC_LIB
 	select THREAD_INFO_IN_TASK
+	select HAVE_FUNCTION_TRACER
+	select HAVE_FUNCTION_GRAPH_TRACER
+	select HAVE_FTRACE_MCOUNT_RECORD
+	select HAVE_DYNAMIC_FTRACE
 	help
 	  Andes(nds32) Linux support.
 
diff --git a/arch/nds32/Makefile b/arch/nds32/Makefile
index 63f4f173e5f4b97afa7078a3d8600fbfb50c13a6..3509fac104919ff8d9727d4cdfe2185d10473746 100644
--- a/arch/nds32/Makefile
+++ b/arch/nds32/Makefile
@@ -5,6 +5,10 @@ KBUILD_DEFCONFIG := defconfig
 
 comma = ,
 
+ifdef CONFIG_FUNCTION_TRACER
+arch-y += -malways-save-lp -mno-relax
+endif
+
 KBUILD_CFLAGS	+= $(call cc-option, -mno-sched-prolog-epilog)
 KBUILD_CFLAGS	+= -mcmodel=large
 
diff --git a/arch/nds32/include/asm/elf.h b/arch/nds32/include/asm/elf.h
index 56c47905880256455a17943427651b0bb72e03ed..f5f9cf7e054401431f89b72af891c5a44b20baba 100644
--- a/arch/nds32/include/asm/elf.h
+++ b/arch/nds32/include/asm/elf.h
@@ -121,9 +121,9 @@ struct elf32_hdr;
  */
 #define ELF_CLASS	ELFCLASS32
 #ifdef __NDS32_EB__
-#define ELF_DATA	ELFDATA2MSB;
+#define ELF_DATA	ELFDATA2MSB
 #else
-#define ELF_DATA	ELFDATA2LSB;
+#define ELF_DATA	ELFDATA2LSB
 #endif
 #define ELF_ARCH	EM_NDS32
 #define USE_ELF_CORE_DUMP
diff --git a/arch/nds32/include/asm/ftrace.h b/arch/nds32/include/asm/ftrace.h
new file mode 100644
index 0000000000000000000000000000000000000000..2f96cc96aa35c0dd0795674068bf414e7a08ab2e
--- /dev/null
+++ b/arch/nds32/include/asm/ftrace.h
@@ -0,0 +1,46 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef __ASM_NDS32_FTRACE_H
+#define __ASM_NDS32_FTRACE_H
+
+#ifdef CONFIG_FUNCTION_TRACER
+
+#define HAVE_FUNCTION_GRAPH_FP_TEST
+
+#define MCOUNT_ADDR ((unsigned long)(_mcount))
+/* mcount call is composed of three instructions:
+ * sethi + ori + jral
+ */
+#define MCOUNT_INSN_SIZE 12
+
+extern void _mcount(unsigned long parent_ip);
+
+#ifdef CONFIG_DYNAMIC_FTRACE
+
+#define FTRACE_ADDR ((unsigned long)_ftrace_caller)
+
+#ifdef __NDS32_EL__
+#define INSN_NOP		0x09000040
+#define INSN_SIZE(insn)		(((insn & 0x00000080) == 0) ? 4 : 2)
+#define IS_SETHI(insn)		((insn & 0x000000fe) == 0x00000046)
+#define ENDIAN_CONVERT(insn)	be32_to_cpu(insn)
+#else /* __NDS32_EB__ */
+#define INSN_NOP		0x40000009
+#define INSN_SIZE(insn)		(((insn & 0x80000000) == 0) ? 4 : 2)
+#define IS_SETHI(insn)		((insn & 0xfe000000) == 0x46000000)
+#define ENDIAN_CONVERT(insn)	(insn)
+#endif
+
+extern void _ftrace_caller(unsigned long parent_ip);
+static inline unsigned long ftrace_call_adjust(unsigned long addr)
+{
+	return addr;
+}
+struct dyn_arch_ftrace {
+};
+
+#endif /* CONFIG_DYNAMIC_FTRACE */
+
+#endif /* CONFIG_FUNCTION_TRACER */
+
+#endif /* __ASM_NDS32_FTRACE_H */
diff --git a/arch/nds32/include/asm/nds32.h b/arch/nds32/include/asm/nds32.h
index 19b19394a936cfdecaa51dba9159ee03473b76e7..68c38151c3e41c24c24081f92449a097bf077f02 100644
--- a/arch/nds32/include/asm/nds32.h
+++ b/arch/nds32/include/asm/nds32.h
@@ -17,6 +17,7 @@
 #else
 #define FP_OFFSET (-2)
 #endif
+#define LP_OFFSET (-1)
 
 extern void __init early_trap_init(void);
 static inline void GIE_ENABLE(void)
diff --git a/arch/nds32/include/asm/uaccess.h b/arch/nds32/include/asm/uaccess.h
index 18a009f3804d5e94ac23bd67b6eb4144ffa2c614..362a32d9bd16871e1db6c45d544eb2c4450bfeb5 100644
--- a/arch/nds32/include/asm/uaccess.h
+++ b/arch/nds32/include/asm/uaccess.h
@@ -38,7 +38,7 @@ struct exception_table_entry {
 extern int fixup_exception(struct pt_regs *regs);
 
 #define KERNEL_DS 	((mm_segment_t) { ~0UL })
-#define USER_DS     	((mm_segment_t) {TASK_SIZE - 1})
+#define USER_DS		((mm_segment_t) {TASK_SIZE - 1})
 
 #define get_ds()	(KERNEL_DS)
 #define get_fs()	(current_thread_info()->addr_limit)
@@ -49,11 +49,11 @@ static inline void set_fs(mm_segment_t fs)
 	current_thread_info()->addr_limit = fs;
 }
 
-#define segment_eq(a, b)    ((a) == (b))
+#define segment_eq(a, b)	((a) == (b))
 
 #define __range_ok(addr, size) (size <= get_fs() && addr <= (get_fs() -size))
 
-#define access_ok(type, addr, size)                 \
+#define access_ok(type, addr, size)	\
 	__range_ok((unsigned long)addr, (unsigned long)size)
 /*
  * Single-value transfer routines.  They automatically use the right
@@ -75,70 +75,73 @@ static inline void set_fs(mm_segment_t fs)
  * versions are void (ie, don't return a value as such).
  */
 
-#define get_user(x,p)							\
-({									\
-	long __e = -EFAULT;						\
-	if(likely(access_ok(VERIFY_READ,  p, sizeof(*p)))) {		\
-		__e = __get_user(x,p);					\
-	} else								\
-		x = 0;							\
-	__e;								\
-})
-#define __get_user(x,ptr)						\
+#define get_user	__get_user					\
+
+#define __get_user(x, ptr)						\
 ({									\
 	long __gu_err = 0;						\
-	__get_user_err((x),(ptr),__gu_err);				\
+	__get_user_check((x), (ptr), __gu_err);				\
 	__gu_err;							\
 })
 
-#define __get_user_error(x,ptr,err)					\
+#define __get_user_error(x, ptr, err)					\
 ({									\
-	__get_user_err((x),(ptr),err);					\
-	(void) 0;							\
+	__get_user_check((x), (ptr), (err));				\
+	(void)0;							\
 })
 
-#define __get_user_err(x,ptr,err)					\
+#define __get_user_check(x, ptr, err)					\
+({									\
+	const __typeof__(*(ptr)) __user *__p = (ptr);			\
+	might_fault();							\
+	if (access_ok(VERIFY_READ, __p, sizeof(*__p))) {		\
+		__get_user_err((x), __p, (err));			\
+	} else {							\
+		(x) = 0; (err) = -EFAULT;				\
+	}								\
+})
+
+#define __get_user_err(x, ptr, err)					\
 do {									\
-	unsigned long __gu_addr = (unsigned long)(ptr);			\
 	unsigned long __gu_val;						\
 	__chk_user_ptr(ptr);						\
 	switch (sizeof(*(ptr))) {					\
 	case 1:								\
-		__get_user_asm("lbi",__gu_val,__gu_addr,err);		\
+		__get_user_asm("lbi", __gu_val, (ptr), (err));		\
 		break;							\
 	case 2:								\
-		__get_user_asm("lhi",__gu_val,__gu_addr,err);		\
+		__get_user_asm("lhi", __gu_val, (ptr), (err));		\
 		break;							\
 	case 4:								\
-		__get_user_asm("lwi",__gu_val,__gu_addr,err);		\
+		__get_user_asm("lwi", __gu_val, (ptr), (err));		\
 		break;							\
 	case 8:								\
-		__get_user_asm_dword(__gu_val,__gu_addr,err);		\
+		__get_user_asm_dword(__gu_val, (ptr), (err));		\
 		break;							\
 	default:							\
 		BUILD_BUG(); 						\
 		break;							\
 	}								\
-	(x) = (__typeof__(*(ptr)))__gu_val;				\
+	(x) = (__force __typeof__(*(ptr)))__gu_val;			\
 } while (0)
 
-#define __get_user_asm(inst,x,addr,err)					\
-	asm volatile(							\
-	"1:	"inst"	%1,[%2]\n"					\
-	"2:\n"								\
-	"	.section .fixup,\"ax\"\n"				\
-	"	.align	2\n"						\
-	"3:	move %0, %3\n"						\
-	"	move %1, #0\n"						\
-	"	b	2b\n"						\
-	"	.previous\n"						\
-	"	.section __ex_table,\"a\"\n"				\
-	"	.align	3\n"						\
-	"	.long	1b, 3b\n"					\
-	"	.previous"						\
-	: "+r" (err), "=&r" (x)						\
-	: "r" (addr), "i" (-EFAULT)					\
-	: "cc")
+#define __get_user_asm(inst, x, addr, err)				\
+	__asm__ __volatile__ (						\
+		"1:	"inst"	%1,[%2]\n"				\
+		"2:\n"							\
+		"	.section .fixup,\"ax\"\n"			\
+		"	.align	2\n"					\
+		"3:	move %0, %3\n"					\
+		"	move %1, #0\n"					\
+		"	b	2b\n"					\
+		"	.previous\n"					\
+		"	.section __ex_table,\"a\"\n"			\
+		"	.align	3\n"					\
+		"	.long	1b, 3b\n"				\
+		"	.previous"					\
+		: "+r" (err), "=&r" (x)					\
+		: "r" (addr), "i" (-EFAULT)				\
+		: "cc")
 
 #ifdef __NDS32_EB__
 #define __gu_reg_oper0 "%H1"
@@ -149,61 +152,66 @@ do {									\
 #endif
 
 #define __get_user_asm_dword(x, addr, err) 				\
-	asm volatile(			    				\
-	"\n1:\tlwi " __gu_reg_oper0 ",[%2]\n"				\
-	"\n2:\tlwi " __gu_reg_oper1 ",[%2+4]\n"				\
-	"3:\n"								\
-	"	.section .fixup,\"ax\"\n"				\
-	"	.align	2\n"						\
-	"4:	move	%0, %3\n"					\
-	"	b	3b\n"						\
-	"	.previous\n"						\
-	"	.section __ex_table,\"a\"\n"				\
-	"	.align	3\n"						\
-	"	.long	1b, 4b\n"					\
-	"	.long	2b, 4b\n"					\
-	"	.previous"						\
-	: "+r"(err), "=&r"(x)                				\
-	: "r"(addr), "i"(-EFAULT) 					\
-	: "cc")
-#define put_user(x,p)							\
-({									\
-	long __e = -EFAULT;						\
-	if(likely(access_ok(VERIFY_WRITE,  p, sizeof(*p)))) {		\
-		__e = __put_user(x,p);					\
-	}								\
-	__e;								\
-})
-#define __put_user(x,ptr)						\
+	__asm__ __volatile__ (						\
+		"\n1:\tlwi " __gu_reg_oper0 ",[%2]\n"			\
+		"\n2:\tlwi " __gu_reg_oper1 ",[%2+4]\n"			\
+		"3:\n"							\
+		"	.section .fixup,\"ax\"\n"			\
+		"	.align	2\n"					\
+		"4:	move	%0, %3\n"				\
+		"	b	3b\n"					\
+		"	.previous\n"					\
+		"	.section __ex_table,\"a\"\n"			\
+		"	.align	3\n"					\
+		"	.long	1b, 4b\n"				\
+		"	.long	2b, 4b\n"				\
+		"	.previous"					\
+		: "+r"(err), "=&r"(x)					\
+		: "r"(addr), "i"(-EFAULT)				\
+		: "cc")
+
+#define put_user	__put_user					\
+
+#define __put_user(x, ptr)						\
 ({									\
 	long __pu_err = 0;						\
-	__put_user_err((x),(ptr),__pu_err);				\
+	__put_user_err((x), (ptr), __pu_err);				\
 	__pu_err;							\
 })
 
-#define __put_user_error(x,ptr,err)					\
+#define __put_user_error(x, ptr, err)					\
+({									\
+	__put_user_err((x), (ptr), (err));				\
+	(void)0;							\
+})
+
+#define __put_user_check(x, ptr, err)					\
 ({									\
-	__put_user_err((x),(ptr),err);					\
-	(void) 0;							\
+	__typeof__(*(ptr)) __user *__p = (ptr);				\
+	might_fault();							\
+	if (access_ok(VERIFY_WRITE, __p, sizeof(*__p))) {		\
+		__put_user_err((x), __p, (err));			\
+	} else	{							\
+		(err) = -EFAULT;					\
+	}								\
 })
 
-#define __put_user_err(x,ptr,err)					\
+#define __put_user_err(x, ptr, err)					\
 do {									\
-	unsigned long __pu_addr = (unsigned long)(ptr);			\
 	__typeof__(*(ptr)) __pu_val = (x);				\
 	__chk_user_ptr(ptr);						\
 	switch (sizeof(*(ptr))) {					\
 	case 1:								\
-		__put_user_asm("sbi",__pu_val,__pu_addr,err);		\
+		__put_user_asm("sbi", __pu_val, (ptr), (err));		\
 		break;							\
 	case 2: 							\
-		__put_user_asm("shi",__pu_val,__pu_addr,err);		\
+		__put_user_asm("shi", __pu_val, (ptr), (err));		\
 		break;							\
 	case 4: 							\
-		__put_user_asm("swi",__pu_val,__pu_addr,err);		\
+		__put_user_asm("swi", __pu_val, (ptr), (err));		\
 		break;							\
 	case 8:								\
-		__put_user_asm_dword(__pu_val,__pu_addr,err);		\
+		__put_user_asm_dword(__pu_val, (ptr), (err));		\
 		break;							\
 	default:							\
 		BUILD_BUG(); 						\
@@ -211,22 +219,22 @@ do {									\
 	}								\
 } while (0)
 
-#define __put_user_asm(inst,x,addr,err)					\
-	asm volatile(							\
-	"1:	"inst"	%1,[%2]\n"					\
-	"2:\n"								\
-	"	.section .fixup,\"ax\"\n"				\
-	"	.align	2\n"						\
-	"3:	move	%0, %3\n"					\
-	"	b	2b\n"						\
-	"	.previous\n"						\
-	"	.section __ex_table,\"a\"\n"				\
-	"	.align	3\n"						\
-	"	.long	1b, 3b\n"					\
-	"	.previous"						\
-	: "+r" (err)							\
-	: "r" (x), "r" (addr), "i" (-EFAULT)				\
-	: "cc")
+#define __put_user_asm(inst, x, addr, err)				\
+	__asm__ __volatile__ (						\
+		"1:	"inst"	%1,[%2]\n"				\
+		"2:\n"							\
+		"	.section .fixup,\"ax\"\n"			\
+		"	.align	2\n"					\
+		"3:	move	%0, %3\n"				\
+		"	b	2b\n"					\
+		"	.previous\n"					\
+		"	.section __ex_table,\"a\"\n"			\
+		"	.align	3\n"					\
+		"	.long	1b, 3b\n"				\
+		"	.previous"					\
+		: "+r" (err)						\
+		: "r" (x), "r" (addr), "i" (-EFAULT)			\
+		: "cc")
 
 #ifdef __NDS32_EB__
 #define __pu_reg_oper0 "%H2"
@@ -237,23 +245,24 @@ do {									\
 #endif
 
 #define __put_user_asm_dword(x, addr, err) 				\
-	asm volatile(			    				\
-	"\n1:\tswi " __pu_reg_oper0 ",[%1]\n"				\
-	"\n2:\tswi " __pu_reg_oper1 ",[%1+4]\n"				\
-	"3:\n"								\
-	"	.section .fixup,\"ax\"\n"				\
-	"	.align	2\n"						\
-	"4:	move	%0, %3\n"					\
-	"	b	3b\n"						\
-	"	.previous\n"						\
-	"	.section __ex_table,\"a\"\n"				\
-	"	.align	3\n"						\
-	"	.long	1b, 4b\n"					\
-	"	.long	2b, 4b\n"					\
-	"	.previous"						\
-	: "+r"(err)                					\
-	: "r"(addr), "r"(x), "i"(-EFAULT) 				\
-	: "cc")
+	__asm__ __volatile__ (						\
+		"\n1:\tswi " __pu_reg_oper0 ",[%1]\n"			\
+		"\n2:\tswi " __pu_reg_oper1 ",[%1+4]\n"			\
+		"3:\n"							\
+		"	.section .fixup,\"ax\"\n"			\
+		"	.align	2\n"					\
+		"4:	move	%0, %3\n"				\
+		"	b	3b\n"					\
+		"	.previous\n"					\
+		"	.section __ex_table,\"a\"\n"			\
+		"	.align	3\n"					\
+		"	.long	1b, 4b\n"				\
+		"	.long	2b, 4b\n"				\
+		"	.previous"					\
+		: "+r"(err)						\
+		: "r"(addr), "r"(x), "i"(-EFAULT)			\
+		: "cc")
+
 extern unsigned long __arch_clear_user(void __user * addr, unsigned long n);
 extern long strncpy_from_user(char *dest, const char __user * src, long count);
 extern __must_check long strlen_user(const char __user * str);
diff --git a/arch/nds32/kernel/Makefile b/arch/nds32/kernel/Makefile
index 42792743e8b953290b7cec0b4722684c72744df3..27cded39fa66266171a427afc988ad9db35fe75e 100644
--- a/arch/nds32/kernel/Makefile
+++ b/arch/nds32/kernel/Makefile
@@ -21,3 +21,9 @@ extra-y := head.o vmlinux.lds
 
 
 obj-y				+= vdso/
+
+obj-$(CONFIG_FUNCTION_TRACER)   += ftrace.o
+
+ifdef CONFIG_FUNCTION_TRACER
+CFLAGS_REMOVE_ftrace.o = $(CC_FLAGS_FTRACE)
+endif
diff --git a/arch/nds32/kernel/atl2c.c b/arch/nds32/kernel/atl2c.c
index 0c6d031a1c4a9df00dc991c0f52dbdd7bddab51c..0c5386e72098e45f48b925efee3431bb275a1398 100644
--- a/arch/nds32/kernel/atl2c.c
+++ b/arch/nds32/kernel/atl2c.c
@@ -9,7 +9,8 @@
 
 void __iomem *atl2c_base;
 static const struct of_device_id atl2c_ids[] __initconst = {
-	{.compatible = "andestech,atl2c",}
+	{.compatible = "andestech,atl2c",},
+	{}
 };
 
 static int __init atl2c_of_init(void)
diff --git a/arch/nds32/kernel/ex-entry.S b/arch/nds32/kernel/ex-entry.S
index b8ae4e9a6b93db793d0b6b7d434adcdb47827743..21a144071566989af1daa55400a0c9fab98b34ee 100644
--- a/arch/nds32/kernel/ex-entry.S
+++ b/arch/nds32/kernel/ex-entry.S
@@ -118,7 +118,7 @@ common_exception_handler:
 	/* interrupt */
 2:
 #ifdef CONFIG_TRACE_IRQFLAGS
-	jal     trace_hardirqs_off
+	jal     __trace_hardirqs_off
 #endif
 	move	$r0, $sp
 	sethi	$lp, hi20(ret_from_intr)
diff --git a/arch/nds32/kernel/ex-exit.S b/arch/nds32/kernel/ex-exit.S
index 03e4f7788a1882a15cf89e68aa325978cbc3b0ce..f00af92f7e22fde904fc6e54999a9ebcdde7e950 100644
--- a/arch/nds32/kernel/ex-exit.S
+++ b/arch/nds32/kernel/ex-exit.S
@@ -138,8 +138,8 @@ no_work_pending:
 #ifdef CONFIG_TRACE_IRQFLAGS
 	lwi	$p0, [$sp+(#IPSW_OFFSET)]
 	andi	$p0, $p0, #0x1
-	la	$r10, trace_hardirqs_off
-	la	$r9, trace_hardirqs_on
+	la	$r10, __trace_hardirqs_off
+	la	$r9, __trace_hardirqs_on
 	cmovz	$r9, $p0, $r10
 	jral	$r9
 #endif
diff --git a/arch/nds32/kernel/ftrace.c b/arch/nds32/kernel/ftrace.c
new file mode 100644
index 0000000000000000000000000000000000000000..a0a9679ad5dee8a9d08810556cc204dedd127c08
--- /dev/null
+++ b/arch/nds32/kernel/ftrace.c
@@ -0,0 +1,309 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/ftrace.h>
+#include <linux/uaccess.h>
+#include <asm/cacheflush.h>
+
+#ifndef CONFIG_DYNAMIC_FTRACE
+extern void (*ftrace_trace_function)(unsigned long, unsigned long,
+				     struct ftrace_ops*, struct pt_regs*);
+extern int ftrace_graph_entry_stub(struct ftrace_graph_ent *trace);
+extern void ftrace_graph_caller(void);
+
+noinline void __naked ftrace_stub(unsigned long ip, unsigned long parent_ip,
+				  struct ftrace_ops *op, struct pt_regs *regs)
+{
+	__asm__ ("");  /* avoid to optimize as pure function */
+}
+
+noinline void _mcount(unsigned long parent_ip)
+{
+	/* save all state by the compiler prologue */
+
+	unsigned long ip = (unsigned long)__builtin_return_address(0);
+
+	if (ftrace_trace_function != ftrace_stub)
+		ftrace_trace_function(ip - MCOUNT_INSN_SIZE, parent_ip,
+				      NULL, NULL);
+
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+	if (ftrace_graph_return != (trace_func_graph_ret_t)ftrace_stub
+	    || ftrace_graph_entry != ftrace_graph_entry_stub)
+		ftrace_graph_caller();
+#endif
+
+	/* restore all state by the compiler epilogue */
+}
+EXPORT_SYMBOL(_mcount);
+
+#else /* CONFIG_DYNAMIC_FTRACE */
+
+noinline void __naked ftrace_stub(unsigned long ip, unsigned long parent_ip,
+				  struct ftrace_ops *op, struct pt_regs *regs)
+{
+	__asm__ ("");  /* avoid to optimize as pure function */
+}
+
+noinline void __naked _mcount(unsigned long parent_ip)
+{
+	__asm__ ("");  /* avoid to optimize as pure function */
+}
+EXPORT_SYMBOL(_mcount);
+
+#define XSTR(s) STR(s)
+#define STR(s) #s
+void _ftrace_caller(unsigned long parent_ip)
+{
+	/* save all state needed by the compiler prologue */
+
+	/*
+	 * prepare arguments for real tracing function
+	 * first  arg : __builtin_return_address(0) - MCOUNT_INSN_SIZE
+	 * second arg : parent_ip
+	 */
+	__asm__ __volatile__ (
+		"move $r1, %0				   \n\t"
+		"addi $r0, %1, #-" XSTR(MCOUNT_INSN_SIZE) "\n\t"
+		:
+		: "r" (parent_ip), "r" (__builtin_return_address(0)));
+
+	/* a placeholder for the call to a real tracing function */
+	__asm__ __volatile__ (
+		"ftrace_call:		\n\t"
+		"nop			\n\t"
+		"nop			\n\t"
+		"nop			\n\t");
+
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+	/* a placeholder for the call to ftrace_graph_caller */
+	__asm__ __volatile__ (
+		"ftrace_graph_call:	\n\t"
+		"nop			\n\t"
+		"nop			\n\t"
+		"nop			\n\t");
+#endif
+	/* restore all state needed by the compiler epilogue */
+}
+
+int __init ftrace_dyn_arch_init(void)
+{
+	return 0;
+}
+
+int ftrace_arch_code_modify_prepare(void)
+{
+	set_all_modules_text_rw();
+	return 0;
+}
+
+int ftrace_arch_code_modify_post_process(void)
+{
+	set_all_modules_text_ro();
+	return 0;
+}
+
+static unsigned long gen_sethi_insn(unsigned long addr)
+{
+	unsigned long opcode = 0x46000000;
+	unsigned long imm = addr >> 12;
+	unsigned long rt_num = 0xf << 20;
+
+	return ENDIAN_CONVERT(opcode | rt_num | imm);
+}
+
+static unsigned long gen_ori_insn(unsigned long addr)
+{
+	unsigned long opcode = 0x58000000;
+	unsigned long imm = addr & 0x0000fff;
+	unsigned long rt_num = 0xf << 20;
+	unsigned long ra_num = 0xf << 15;
+
+	return ENDIAN_CONVERT(opcode | rt_num | ra_num | imm);
+}
+
+static unsigned long gen_jral_insn(unsigned long addr)
+{
+	unsigned long opcode = 0x4a000001;
+	unsigned long rt_num = 0x1e << 20;
+	unsigned long rb_num = 0xf << 10;
+
+	return ENDIAN_CONVERT(opcode | rt_num | rb_num);
+}
+
+static void ftrace_gen_call_insn(unsigned long *call_insns,
+				 unsigned long addr)
+{
+	call_insns[0] = gen_sethi_insn(addr); /* sethi $r15, imm20u       */
+	call_insns[1] = gen_ori_insn(addr);   /* ori   $r15, $r15, imm15u */
+	call_insns[2] = gen_jral_insn(addr);  /* jral  $lp,  $r15         */
+}
+
+static int __ftrace_modify_code(unsigned long pc, unsigned long *old_insn,
+				unsigned long *new_insn, bool validate)
+{
+	unsigned long orig_insn[3];
+
+	if (validate) {
+		if (probe_kernel_read(orig_insn, (void *)pc, MCOUNT_INSN_SIZE))
+			return -EFAULT;
+		if (memcmp(orig_insn, old_insn, MCOUNT_INSN_SIZE))
+			return -EINVAL;
+	}
+
+	if (probe_kernel_write((void *)pc, new_insn, MCOUNT_INSN_SIZE))
+		return -EPERM;
+
+	return 0;
+}
+
+static int ftrace_modify_code(unsigned long pc, unsigned long *old_insn,
+			      unsigned long *new_insn, bool validate)
+{
+	int ret;
+
+	ret = __ftrace_modify_code(pc, old_insn, new_insn, validate);
+	if (ret)
+		return ret;
+
+	flush_icache_range(pc, pc + MCOUNT_INSN_SIZE);
+
+	return ret;
+}
+
+int ftrace_update_ftrace_func(ftrace_func_t func)
+{
+	unsigned long pc = (unsigned long)&ftrace_call;
+	unsigned long old_insn[3] = {INSN_NOP, INSN_NOP, INSN_NOP};
+	unsigned long new_insn[3] = {INSN_NOP, INSN_NOP, INSN_NOP};
+
+	if (func != ftrace_stub)
+		ftrace_gen_call_insn(new_insn, (unsigned long)func);
+
+	return ftrace_modify_code(pc, old_insn, new_insn, false);
+}
+
+int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
+{
+	unsigned long pc = rec->ip;
+	unsigned long nop_insn[3] = {INSN_NOP, INSN_NOP, INSN_NOP};
+	unsigned long call_insn[3] = {INSN_NOP, INSN_NOP, INSN_NOP};
+
+	ftrace_gen_call_insn(call_insn, addr);
+
+	return ftrace_modify_code(pc, nop_insn, call_insn, true);
+}
+
+int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec,
+		    unsigned long addr)
+{
+	unsigned long pc = rec->ip;
+	unsigned long nop_insn[3] = {INSN_NOP, INSN_NOP, INSN_NOP};
+	unsigned long call_insn[3] = {INSN_NOP, INSN_NOP, INSN_NOP};
+
+	ftrace_gen_call_insn(call_insn, addr);
+
+	return ftrace_modify_code(pc, call_insn, nop_insn, true);
+}
+#endif /* CONFIG_DYNAMIC_FTRACE */
+
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr,
+			   unsigned long frame_pointer)
+{
+	unsigned long return_hooker = (unsigned long)&return_to_handler;
+	struct ftrace_graph_ent trace;
+	unsigned long old;
+	int err;
+
+	if (unlikely(atomic_read(&current->tracing_graph_pause)))
+		return;
+
+	old = *parent;
+
+	trace.func = self_addr;
+	trace.depth = current->curr_ret_stack + 1;
+
+	/* Only trace if the calling function expects to */
+	if (!ftrace_graph_entry(&trace))
+		return;
+
+	err = ftrace_push_return_trace(old, self_addr, &trace.depth,
+				       frame_pointer, NULL);
+
+	if (err == -EBUSY)
+		return;
+
+	*parent = return_hooker;
+}
+
+noinline void ftrace_graph_caller(void)
+{
+	unsigned long *parent_ip =
+		(unsigned long *)(__builtin_frame_address(2) - 4);
+
+	unsigned long selfpc =
+		(unsigned long)(__builtin_return_address(1) - MCOUNT_INSN_SIZE);
+
+	unsigned long frame_pointer =
+		(unsigned long)__builtin_frame_address(3);
+
+	prepare_ftrace_return(parent_ip, selfpc, frame_pointer);
+}
+
+extern unsigned long ftrace_return_to_handler(unsigned long frame_pointer);
+void __naked return_to_handler(void)
+{
+	__asm__ __volatile__ (
+		/* save state needed by the ABI     */
+		"smw.adm $r0,[$sp],$r1,#0x0  \n\t"
+
+		/* get original return address      */
+		"move $r0, $fp               \n\t"
+		"bal ftrace_return_to_handler\n\t"
+		"move $lp, $r0               \n\t"
+
+		/* restore state nedded by the ABI  */
+		"lmw.bim $r0,[$sp],$r1,#0x0  \n\t");
+}
+
+#ifdef CONFIG_DYNAMIC_FTRACE
+extern unsigned long ftrace_graph_call;
+
+static int ftrace_modify_graph_caller(bool enable)
+{
+	unsigned long pc = (unsigned long)&ftrace_graph_call;
+	unsigned long nop_insn[3] = {INSN_NOP, INSN_NOP, INSN_NOP};
+	unsigned long call_insn[3] = {INSN_NOP, INSN_NOP, INSN_NOP};
+
+	ftrace_gen_call_insn(call_insn, (unsigned long)ftrace_graph_caller);
+
+	if (enable)
+		return ftrace_modify_code(pc, nop_insn, call_insn, true);
+	else
+		return ftrace_modify_code(pc, call_insn, nop_insn, true);
+}
+
+int ftrace_enable_ftrace_graph_caller(void)
+{
+	return ftrace_modify_graph_caller(true);
+}
+
+int ftrace_disable_ftrace_graph_caller(void)
+{
+	return ftrace_modify_graph_caller(false);
+}
+#endif /* CONFIG_DYNAMIC_FTRACE */
+
+#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
+
+
+#ifdef CONFIG_TRACE_IRQFLAGS
+noinline void __trace_hardirqs_off(void)
+{
+	trace_hardirqs_off();
+}
+noinline void __trace_hardirqs_on(void)
+{
+	trace_hardirqs_on();
+}
+#endif /* CONFIG_TRACE_IRQFLAGS */
diff --git a/arch/nds32/kernel/module.c b/arch/nds32/kernel/module.c
index 4167283d8293f16c65e5f1b100a40201265a1c26..1e31829cbc2a71ec4dd3c6be62874245cda010c2 100644
--- a/arch/nds32/kernel/module.c
+++ b/arch/nds32/kernel/module.c
@@ -40,7 +40,7 @@ void do_reloc16(unsigned int val, unsigned int *loc, unsigned int val_mask,
 
 	tmp2 = tmp & loc_mask;
 	if (partial_in_place) {
-		tmp &= (!loc_mask);
+		tmp &= (~loc_mask);
 		tmp =
 		    tmp2 | ((tmp + ((val & val_mask) >> val_shift)) & val_mask);
 	} else {
@@ -70,7 +70,7 @@ void do_reloc32(unsigned int val, unsigned int *loc, unsigned int val_mask,
 
 	tmp2 = tmp & loc_mask;
 	if (partial_in_place) {
-		tmp &= (!loc_mask);
+		tmp &= (~loc_mask);
 		tmp =
 		    tmp2 | ((tmp + ((val & val_mask) >> val_shift)) & val_mask);
 	} else {
diff --git a/arch/nds32/kernel/stacktrace.c b/arch/nds32/kernel/stacktrace.c
index 8b231e910ea68980dbd517be895200ad19e49f55..d974c0c1c65f34123af8c7d2a63e2fb3e390e9f9 100644
--- a/arch/nds32/kernel/stacktrace.c
+++ b/arch/nds32/kernel/stacktrace.c
@@ -4,6 +4,7 @@
 #include <linux/sched/debug.h>
 #include <linux/sched/task_stack.h>
 #include <linux/stacktrace.h>
+#include <linux/ftrace.h>
 
 void save_stack_trace(struct stack_trace *trace)
 {
@@ -16,6 +17,7 @@ void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace)
 	unsigned long *fpn;
 	int skip = trace->skip;
 	int savesched;
+	int graph_idx = 0;
 
 	if (tsk == current) {
 		__asm__ __volatile__("\tori\t%0, $fp, #0\n":"=r"(fpn));
@@ -29,10 +31,12 @@ void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace)
 	       && (fpn >= (unsigned long *)TASK_SIZE)) {
 		unsigned long lpp, fpp;
 
-		lpp = fpn[-1];
+		lpp = fpn[LP_OFFSET];
 		fpp = fpn[FP_OFFSET];
 		if (!__kernel_text_address(lpp))
 			break;
+		else
+			lpp = ftrace_graph_ret_addr(tsk, &graph_idx, lpp, NULL);
 
 		if (savesched || !in_sched_functions(lpp)) {
 			if (skip) {
diff --git a/arch/nds32/kernel/traps.c b/arch/nds32/kernel/traps.c
index a6205fd4db521eaf83c606d399975a4fe275c250..1496aab48998817c00cb175dbd8b09b3453d73dd 100644
--- a/arch/nds32/kernel/traps.c
+++ b/arch/nds32/kernel/traps.c
@@ -8,6 +8,7 @@
 #include <linux/kdebug.h>
 #include <linux/sched/task_stack.h>
 #include <linux/uaccess.h>
+#include <linux/ftrace.h>
 
 #include <asm/proc-fns.h>
 #include <asm/unistd.h>
@@ -94,28 +95,6 @@ static void dump_instr(struct pt_regs *regs)
 	set_fs(fs);
 }
 
-#ifdef CONFIG_FUNCTION_GRAPH_TRACER
-#include <linux/ftrace.h>
-static void
-get_real_ret_addr(unsigned long *addr, struct task_struct *tsk, int *graph)
-{
-	if (*addr == (unsigned long)return_to_handler) {
-		int index = tsk->curr_ret_stack;
-
-		if (tsk->ret_stack && index >= *graph) {
-			index -= *graph;
-			*addr = tsk->ret_stack[index].ret;
-			(*graph)++;
-		}
-	}
-}
-#else
-static inline void
-get_real_ret_addr(unsigned long *addr, struct task_struct *tsk, int *graph)
-{
-}
-#endif
-
 #define LOOP_TIMES (100)
 static void __dump(struct task_struct *tsk, unsigned long *base_reg)
 {
@@ -126,7 +105,8 @@ static void __dump(struct task_struct *tsk, unsigned long *base_reg)
 		while (!kstack_end(base_reg)) {
 			ret_addr = *base_reg++;
 			if (__kernel_text_address(ret_addr)) {
-				get_real_ret_addr(&ret_addr, tsk, &graph);
+				ret_addr = ftrace_graph_ret_addr(
+						tsk, &graph, ret_addr, NULL);
 				print_ip_sym(ret_addr);
 			}
 			if (--cnt < 0)
@@ -137,15 +117,12 @@ static void __dump(struct task_struct *tsk, unsigned long *base_reg)
 		       !((unsigned long)base_reg & 0x3) &&
 		       ((unsigned long)base_reg >= TASK_SIZE)) {
 			unsigned long next_fp;
-#if !defined(NDS32_ABI_2)
-			ret_addr = base_reg[0];
-			next_fp = base_reg[1];
-#else
-			ret_addr = base_reg[-1];
+			ret_addr = base_reg[LP_OFFSET];
 			next_fp = base_reg[FP_OFFSET];
-#endif
 			if (__kernel_text_address(ret_addr)) {
-				get_real_ret_addr(&ret_addr, tsk, &graph);
+
+				ret_addr = ftrace_graph_ret_addr(
+						tsk, &graph, ret_addr, NULL);
 				print_ip_sym(ret_addr);
 			}
 			if (--cnt < 0)
@@ -196,11 +173,10 @@ void die(const char *str, struct pt_regs *regs, int err)
 	pr_emerg("CPU: %i\n", smp_processor_id());
 	show_regs(regs);
 	pr_emerg("Process %s (pid: %d, stack limit = 0x%p)\n",
-		 tsk->comm, tsk->pid, task_thread_info(tsk) + 1);
+		 tsk->comm, tsk->pid, end_of_stack(tsk));
 
 	if (!user_mode(regs) || in_interrupt()) {
-		dump_mem("Stack: ", regs->sp,
-			 THREAD_SIZE + (unsigned long)task_thread_info(tsk));
+		dump_mem("Stack: ", regs->sp, (regs->sp + PAGE_SIZE) & PAGE_MASK);
 		dump_instr(regs);
 		dump_stack();
 	}
diff --git a/arch/nds32/kernel/vmlinux.lds.S b/arch/nds32/kernel/vmlinux.lds.S
index 288313b886efa269fb0e13eb9231d50879e72947..9e90f30a181d7d9c9b06dc04d230c02bfde67b78 100644
--- a/arch/nds32/kernel/vmlinux.lds.S
+++ b/arch/nds32/kernel/vmlinux.lds.S
@@ -13,14 +13,26 @@ OUTPUT_ARCH(nds32)
 ENTRY(_stext_lma)
 jiffies = jiffies_64;
 
+#if defined(CONFIG_GCOV_KERNEL)
+#define NDS32_EXIT_KEEP(x)	x
+#else
+#define NDS32_EXIT_KEEP(x)
+#endif
+
 SECTIONS
 {
 	_stext_lma = TEXTADDR - LOAD_OFFSET;
 	. = TEXTADDR;
 	__init_begin = .;
 	HEAD_TEXT_SECTION
+	.exit.text : {
+		NDS32_EXIT_KEEP(EXIT_TEXT)
+	}
 	INIT_TEXT_SECTION(PAGE_SIZE)
 	INIT_DATA_SECTION(16)
+	.exit.data : {
+		NDS32_EXIT_KEEP(EXIT_DATA)
+	}
 	PERCPU_SECTION(L1_CACHE_BYTES)
 	__init_end = .;
 
diff --git a/scripts/recordmcount.pl b/scripts/recordmcount.pl
index fe06e77c15eb7072d1cb9c09d9481048984e03d7..f599031260d5178d10dd83554ac20d957c8b82af 100755
--- a/scripts/recordmcount.pl
+++ b/scripts/recordmcount.pl
@@ -389,6 +389,9 @@ if ($arch eq "x86_64") {
     $mcount_regex = "^\\s*([0-9a-fA-F]+):\\sR_RISCV_CALL\\s_mcount\$";
     $type = ".quad";
     $alignment = 2;
+} elsif ($arch eq "nds32") {
+    $mcount_regex = "^\\s*([0-9a-fA-F]+):\\s*R_NDS32_HI20_RELA\\s+_mcount\$";
+    $alignment = 2;
 } else {
     die "Arch $arch is not supported with CONFIG_FTRACE_MCOUNT_RECORD";
 }